aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--doc/usage.md18
-rw-r--r--gen/Makefile13
-rw-r--r--gen/avx_type.hpp214
-rw-r--r--gen/avx_type_def.h55
-rw-r--r--gen/gen_avx512.cpp169
-rw-r--r--gen/gen_code.cpp692
-rw-r--r--test/Makefile31
-rw-r--r--test/apx.cpp1777
-rw-r--r--test/cvt_test.cpp4
-rw-r--r--test/test_all.bat6
-rw-r--r--test/test_misc.bat4
-rw-r--r--xbyak/xbyak.h738
-rw-r--r--xbyak/xbyak_mnemonic.h2428
13 files changed, 4223 insertions, 1926 deletions
diff --git a/doc/usage.md b/doc/usage.md
index 7b5678e..e0ecccc 100644
--- a/doc/usage.md
+++ b/doc/usage.md
@@ -128,6 +128,24 @@ vpdpbusd(xm0, xm1, xm2); // VEX encoding
* use `ptr_b` for broadcast `{1toX}`. X is automatically determined.
* specify `xword`/`yword`/`zword(_b)` for m128/m256/m512 if necessary.
+## APX
+[Advanced Performance Extensions (APX) Architecture Specification](https://www.intel.com/content/www/us/en/content-details/786223/intel-advanced-performance-extensions-intel-apx-architecture-specification.html)
+- Support 64-bit 16 additional GPRs (general-purpose registers) r16, ..., r31
+ - 32-bit regs are r16d, ..., r31d
+ - 16-bit regs are r16w, ..., r31w
+ - 8-bit regs are r16b, ..., r31b
+ - `add(r20, r21);`
+ - `lea(r30, ptr[r29+r31]);`
+- Support three-operand instruction
+ - `add(r20, r21, r23);`
+ - `add(r20, ptr[rax + rcx * 8 + 0x1234], r23);`
+- Support T_nf for NF=1 (status flags update suppression)
+ - `add(r20|T_nf, r21, r23);` // Set EVEX.NF=1
+- Support T_zu for NF=ZU (zero upper) for imul and setcc
+ - `imul(ax|T_zu, cx, 0x1234);` // Set ND=ZU
+ - `imul(ax|T_zu|T_nf, cx, 0x1234);` // Set ND=ZU and EVEX.NF=1
+ - `setb(r31b|T_zu);` // same as set(r31b); movzx(r31, r31b);
+
## Label
Two kinds of Label are supported. (String literal and Label class).
diff --git a/gen/Makefile b/gen/Makefile
index f254d71..12bea51 100644
--- a/gen/Makefile
+++ b/gen/Makefile
@@ -1,12 +1,17 @@
TARGET=../xbyak/xbyak_mnemonic.h
BIN=sortline gen_code gen_avx512
-CFLAGS=-I../ -O2 -Wall -Wextra -Wno-missing-field-initializers $(CXXFLAGS) $(CPPFLAGS) $(LDFLAGS)
+CFLAGS=-I../ -I ./ -Wall -Wextra -Wno-missing-field-initializers $(CXXFLAGS) $(CPPFLAGS) $(LDFLAGS)
all: $(TARGET) ../CMakeLists.txt ../meson.build ../readme.md ../readme.txt
+
+avx_type_def.h: ../xbyak/xbyak.h
+ sed -n '/@@@begin of avx_type_def.h/,/@@@end of avx_type_def.h/p' $< > $@
+
+avx_type.hpp: avx_type_def.h
sortline: sortline.cpp
$(CXX) $(CFLAGS) $< -o $@
-gen_code: gen_code.cpp ../xbyak/xbyak.h avx_type.hpp
+gen_code: gen_code.cpp avx_type.hpp
$(CXX) $(CFLAGS) $< -o $@
-gen_avx512: gen_avx512.cpp ../xbyak/xbyak.h avx_type.hpp
+gen_avx512: gen_avx512.cpp avx_type.hpp
$(CXX) $(CFLAGS) $< -o $@
$(TARGET): $(BIN)
@@ -36,4 +41,4 @@ VER=$(shell head -n 1 ../xbyak/xbyak_mnemonic.h|grep -o "[0-9.]*")
sed -l 2 -i -e "s/Xbyak [0-9.]*/Xbyak $(VER)/" $@
clean:
- $(RM) $(BIN) $(TARGET)
+ $(RM) $(BIN) $(TARGET) avx_type_def.h
diff --git a/gen/avx_type.hpp b/gen/avx_type.hpp
index 4652d98..5ec0229 100644
--- a/gen/avx_type.hpp
+++ b/gen/avx_type.hpp
@@ -1,190 +1,74 @@
#include <assert.h>
-// copy CodeGenerator::AVXtype
- enum AVXtype {
- // low 3 bit
- T_N1 = 1,
- T_N2 = 2,
- T_N4 = 3,
- T_N8 = 4,
- T_N16 = 5,
- T_N32 = 6,
- T_NX_MASK = 7,
- //
- T_N_VL = 1 << 3, // N * (1, 2, 4) for VL
- T_DUP = 1 << 4, // N = (8, 32, 64)
- T_66 = 1 << 5, // pp = 1
- T_F3 = 1 << 6, // pp = 2
- T_F2 = T_66 | T_F3, // pp = 3
- T_ER_R = 1 << 7, // reg{er}
- T_0F = 1 << 8,
- T_0F38 = 1 << 9,
- T_0F3A = 1 << 10,
- T_L0 = 1 << 11,
- T_L1 = 1 << 12,
- T_W0 = 1 << 13,
- T_W1 = 1 << 14,
- T_EW0 = 1 << 15,
- T_EW1 = 1 << 16,
- T_YMM = 1 << 17, // support YMM, ZMM
- T_EVEX = 1 << 18,
- T_ER_X = 1 << 19, // xmm{er}
- T_ER_Y = 1 << 20, // ymm{er}
- T_ER_Z = 1 << 21, // zmm{er}
- T_SAE_X = 1 << 22, // xmm{sae}
- T_SAE_Y = 1 << 23, // ymm{sae}
- T_SAE_Z = 1 << 24, // zmm{sae}
- T_MUST_EVEX = 1 << 25, // contains T_EVEX
- T_B32 = 1 << 26, // m32bcst
- T_B64 = 1 << 27, // m64bcst
- T_B16 = T_B32 | T_B64, // m16bcst
- T_M_K = 1 << 28, // mem{k}
- T_VSIB = 1 << 29,
- T_MEM_EVEX = 1 << 30, // use evex if mem
- T_FP16 = 1 << 31,
- T_MAP5 = T_FP16 | T_0F,
- T_MAP6 = T_FP16 | T_0F38,
- T_XXX
- };
- // T_66 = 1, T_F3 = 2, T_F2 = 3
- uint32_t getPP(int type) { return (type >> 5) & 3; }
-
+#include "avx_type_def.h"
const int NONE = 256; // same as Xbyak::CodeGenerator::NONE
-std::string type2String(int type)
+std::string type2String(uint64_t type)
{
+ if (type == 0) return "T_NONE";
std::string str;
int low = type & T_NX_MASK;
- if (0 < low) {
+ if (0 < low && low < 7) {
const char *tbl[8] = {
"T_N1", "T_N2", "T_N4", "T_N8", "T_N16", "T_N32"
};
assert(low < int(sizeof(tbl) / sizeof(tbl[0])));
str = tbl[low - 1];
}
- if (type & T_N_VL) {
- if (!str.empty()) str += " | ";
- str += "T_N_VL";
- }
- if (type & T_DUP) {
- if (!str.empty()) str += " | ";
- str += "T_DUP";
- }
- if (type & T_F2) {
- if (!str.empty()) str += " | ";
- switch (type & T_F2) {
- case T_66: str += "T_66"; break;
- case T_F3: str += "T_F3"; break;
- case T_F2: str += "T_F2"; break;
- default: break;
- }
- }
+ if (type & T_N_VL) str += "|T_N_VL";
+ if (type & T_APX) str += "|T_APX";
+ if ((type & T_NX_MASK) == T_DUP) str += "|T_DUP";
+ if (type & T_66) str += "|T_66";
+ if (type & T_F3) str += "|T_F3";
+ if (type & T_F2) str += "|T_F2";
if (type & T_0F) {
- if (!str.empty()) str += " | ";
if (type & T_FP16) {
- str += "T_MAP5";
+ str += "|T_MAP5";
} else {
- str += "T_0F";
+ str += "|T_0F";
}
}
if (type & T_0F38) {
- if (!str.empty()) str += " | ";
if (type & T_FP16) {
- str += "T_MAP6";
- } else {
- str += "T_0F38";
- }
- }
- if (type & T_0F3A) {
- if (!str.empty()) str += " | ";
- str += "T_0F3A";
- }
- if (type & T_L0) {
- if (!str.empty()) str += " | ";
- str += "VEZ_L0";
- }
- if (type & T_L1) {
- if (!str.empty()) str += " | ";
- str += "VEZ_L1";
- }
- if (type & T_W0) {
- if (!str.empty()) str += " | ";
- str += "T_W0";
- }
- if (type & T_W1) {
- if (!str.empty()) str += " | ";
- str += "T_W1";
- }
- if (type & T_EW0) {
- if (!str.empty()) str += " | ";
- str += "T_EW0";
- }
- if (type & T_EW1) {
- if (!str.empty()) str += " | ";
- str += "T_EW1";
- }
- if (type & T_YMM) {
- if (!str.empty()) str += " | ";
- str += "T_YMM";
- }
- if (type & T_EVEX) {
- if (!str.empty()) str += " | ";
- str += "T_EVEX";
- }
- if (type & T_ER_X) {
- if (!str.empty()) str += " | ";
- str += "T_ER_X";
- }
- if (type & T_ER_Y) {
- if (!str.empty()) str += " | ";
- str += "T_ER_Y";
- }
- if (type & T_ER_Z) {
- if (!str.empty()) str += " | ";
- str += "T_ER_Z";
- }
- if (type & T_ER_R) {
- if (!str.empty()) str += " | ";
- str += "T_ER_R";
- }
- if (type & T_SAE_X) {
- if (!str.empty()) str += " | ";
- str += "T_SAE_X";
- }
- if (type & T_SAE_Y) {
- if (!str.empty()) str += " | ";
- str += "T_SAE_Y";
- }
- if (type & T_SAE_Z) {
- if (!str.empty()) str += " | ";
- str += "T_SAE_Z";
- }
- if (type & T_MUST_EVEX) {
- if (!str.empty()) str += " | ";
- str += "T_MUST_EVEX";
- }
- if (type & T_B32) {
- if (!str.empty()) str += " | ";
- if (type & T_B64) {
- str += "T_B16"; // T_B16 = T_B32 | T_B64
+ str += "|T_MAP6";
} else {
- str += "T_B32";
+ str += "|T_0F38";
}
- } else if (type & T_B64) {
- if (!str.empty()) str += " | ";
- str += "T_B64";
- }
- if (type & T_M_K) {
- if (!str.empty()) str += " | ";
- str += "T_M_K";
- }
- if (type & T_VSIB) {
- if (!str.empty()) str += " | ";
- str += "T_VSIB";
- }
- if (type & T_MEM_EVEX) {
- if (!str.empty()) str += " | ";
- str += "T_MEM_EVEX";
}
+ if (type & T_0F3A) str += "|T_0F3A";
+ if (type & T_L0) str += "|T_L0";
+ if (type & T_L1) str += "|T_L1";
+ if (type & T_W0) str += "|T_W0";
+ if (type & T_W1) str += "|T_W1";
+ if (type & T_EW0) str += "|T_EW0";
+ if (type & T_EW1) str += "|T_EW1";
+ if (type & T_YMM) str += "|T_YMM";
+ if (type & T_EVEX) str += "|T_EVEX";
+ if (type & T_ER_X) str += "|T_ER_X";
+ if (type & T_ER_Y) str += "|T_ER_Y";
+ if (type & T_ER_Z) str += "|T_ER_Z";
+ if (type & T_ER_R) str += "|T_ER_R";
+ if (type & T_SAE_X) str += "|T_SAE_X";
+ if (type & T_SAE_Y) str += "|T_SAE_Y";
+ if (type & T_SAE_Z) str += "|T_SAE_Z";
+ if (type & T_MUST_EVEX) str += "|T_MUST_EVEX";
+
+ switch (type & T_B16) { // T_B16 = T_B32 | T_B64
+ case T_B16: str += "|T_B16"; break;
+ case T_B32: str += "|T_B32"; break;
+ case T_B64: str += "|T_B64"; break;
+ default: break;
+ }
+ if (type & T_M_K) str += "|T_M_K";
+ if (type & T_VSIB) str += "|T_VSIB";
+ if (type & T_MEM_EVEX) str += "|T_MEM_EVEX";
+ if (type & T_NF) str += "|T_NF";
+ if (type & T_CODE1_IF1) str += "|T_CODE1_IF1";
+ if (type & T_MAP3) str += "|T_MAP3";
+ if (type & T_ND1) str += "|T_ND1";
+ if (type & T_ZU) str += "|T_ZU";
+ if (type & T_MAP1) str += "|T_MAP1";
+
+ if (str[0] == '|') str = str.substr(1);
return str;
}
diff --git a/gen/avx_type_def.h b/gen/avx_type_def.h
new file mode 100644
index 0000000..81052c4
--- /dev/null
+++ b/gen/avx_type_def.h
@@ -0,0 +1,55 @@
+ // @@@begin of avx_type_def.h
+ static const uint64_t T_NONE = 0ull;
+ // low 3 bit
+ static const uint64_t T_N1 = 1ull;
+ static const uint64_t T_N2 = 2ull;
+ static const uint64_t T_N4 = 3ull;
+ static const uint64_t T_N8 = 4ull;
+ static const uint64_t T_N16 = 5ull;
+ static const uint64_t T_N32 = 6ull;
+ static const uint64_t T_NX_MASK = 7ull;
+ static const uint64_t T_DUP = T_NX_MASK;//1 << 4, // N = (8, 32, 64)
+ static const uint64_t T_N_VL = 1ull << 3; // N * (1, 2, 4) for VL
+ static const uint64_t T_APX = 1ull << 4;
+ static const uint64_t T_66 = 1ull << 5; // pp = 1
+ static const uint64_t T_F3 = 1ull << 6; // pp = 2
+ static const uint64_t T_ER_R = 1ull << 7; // reg{er}
+ static const uint64_t T_0F = 1ull << 8;
+ static const uint64_t T_0F38 = 1ull << 9;
+ static const uint64_t T_0F3A = 1ull << 10;
+ static const uint64_t T_L0 = 1ull << 11;
+ static const uint64_t T_L1 = 1ull << 12;
+ static const uint64_t T_W0 = 1ull << 13;
+ static const uint64_t T_W1 = 1ull << 14;
+ static const uint64_t T_EW0 = 1ull << 15;
+ static const uint64_t T_EW1 = 1ull << 16;
+ static const uint64_t T_YMM = 1ull << 17; // support YMM, ZMM
+ static const uint64_t T_EVEX = 1ull << 18;
+ static const uint64_t T_ER_X = 1ull << 19; // xmm{er}
+ static const uint64_t T_ER_Y = 1ull << 20; // ymm{er}
+ static const uint64_t T_ER_Z = 1ull << 21; // zmm{er}
+ static const uint64_t T_SAE_X = 1ull << 22; // xmm{sae}
+ static const uint64_t T_SAE_Y = 1ull << 23; // ymm{sae}
+ static const uint64_t T_SAE_Z = 1ull << 24; // zmm{sae}
+ static const uint64_t T_MUST_EVEX = 1ull << 25; // contains T_EVEX
+ static const uint64_t T_B32 = 1ull << 26; // m32bcst
+ static const uint64_t T_B64 = 1ull << 27; // m64bcst
+ static const uint64_t T_B16 = T_B32 | T_B64; // m16bcst (Be careful)
+ static const uint64_t T_M_K = 1ull << 28; // mem{k}
+ static const uint64_t T_VSIB = 1ull << 29;
+ static const uint64_t T_MEM_EVEX = 1ull << 30; // use evex if mem
+ static const uint64_t T_FP16 = 1ull << 31; // avx512-fp16
+ static const uint64_t T_MAP5 = T_FP16 | T_0F;
+ static const uint64_t T_MAP6 = T_FP16 | T_0F38;
+ static const uint64_t T_NF = 1ull << 32; // T_nf
+ static const uint64_t T_CODE1_IF1 = 1ull << 33; // code|=1 if !r.isBit(8)
+ static const uint64_t T_MAP3 = 1ull << 34; // rorx only
+ static const uint64_t T_ND1 = 1ull << 35; // ND=1
+ static const uint64_t T_ZU = 1ull << 36; // ND=ZU
+ static const uint64_t T_F2 = 1ull << 37; // pp = 3
+ static const uint64_t T_MAP1 = 1ull << 38; // kmov
+ // T_66 = 1, T_F3 = 2, T_F2 = 3
+ static inline uint32_t getPP(uint64_t type) { return (type & T_66) ? 1 : (type & T_F3) ? 2 : (type & T_F2) ? 3 : 0; }
+ static inline uint32_t getMMM(uint64_t type) { return (type & T_0F) ? 1 : (type & T_0F38) ? 2 : (type & T_0F3A) ? 3 : 0; }
+
+ // @@@end of avx_type_def.h
diff --git a/gen/gen_avx512.cpp b/gen/gen_avx512.cpp
index 8283a54..499db28 100644
--- a/gen/gen_avx512.cpp
+++ b/gen/gen_avx512.cpp
@@ -15,8 +15,7 @@ using namespace Xbyak;
void putOpmask(bool only64bit)
{
if (only64bit) {
- puts("void kmovq(const Opmask& k, const Reg64& r) { opVex(k, 0, r, T_L0 | T_0F | T_F2 | T_W1, 0x92); }");
- puts("void kmovq(const Reg64& r, const Opmask& k) { opVex(r, 0, k, T_L0 | T_0F | T_F2 | T_W1, 0x93); }");
+ puts("void kmovq(const Reg64& r, const Opmask& k) { opKmov(k, r, true, 64); }");
return;
}
@@ -76,22 +75,14 @@ void putOpmask(bool only64bit)
printf("void %sd(const Opmask& r1, const Opmask& r2, uint8_t imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W0, 0x%02X, imm); }\n", p.name, p.code + 1);
}
}
- puts("void kmovw(const Opmask& k, const Operand& op) { if (!op.isMEM() && !op.isOPMASK()) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(k, 0, op, T_L0 | T_0F | T_W0, 0x90); }");
- puts("void kmovq(const Opmask& k, const Operand& op) { if (!op.isMEM() && !op.isOPMASK()) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(k, 0, op, T_L0 | T_0F | T_W1, 0x90); }");
- puts("void kmovb(const Opmask& k, const Operand& op) { if (!op.isMEM() && !op.isOPMASK()) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(k, 0, op, T_L0 | T_0F | T_66 | T_W0, 0x90); }");
- puts("void kmovd(const Opmask& k, const Operand& op) { if (!op.isMEM() && !op.isOPMASK()) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(k, 0, op, T_L0 | T_0F | T_66 | T_W1, 0x90); }");
-
- puts("void kmovw(const Address& addr, const Opmask& k) { opVex(k, 0, addr, T_L0 | T_0F | T_W0, 0x91); }");
- puts("void kmovq(const Address& addr, const Opmask& k) { opVex(k, 0, addr, T_L0 | T_0F | T_W1, 0x91); }");
- puts("void kmovb(const Address& addr, const Opmask& k) { opVex(k, 0, addr, T_L0 | T_0F | T_66 | T_W0, 0x91); }");
- puts("void kmovd(const Address& addr, const Opmask& k) { opVex(k, 0, addr, T_L0 | T_0F | T_66 | T_W1, 0x91); }");
-
- puts("void kmovw(const Opmask& k, const Reg32& r) { opVex(k, 0, r, T_L0 | T_0F | T_W0, 0x92); }");
- puts("void kmovw(const Reg32& r, const Opmask& k) { opVex(r, 0, k, T_L0 | T_0F | T_W0, 0x93); }");
- puts("void kmovb(const Opmask& k, const Reg32& r) { opVex(k, 0, r, T_L0 | T_0F | T_66 | T_W0, 0x92); }");
- puts("void kmovb(const Reg32& r, const Opmask& k) { opVex(r, 0, k, T_L0 | T_0F | T_66 | T_W0, 0x93); }");
- puts("void kmovd(const Opmask& k, const Reg32& r) { opVex(k, 0, r, T_L0 | T_0F | T_F2 | T_W0, 0x92); }");
- puts("void kmovd(const Reg32& r, const Opmask& k) { opVex(r, 0, k, T_L0 | T_0F | T_F2 | T_W0, 0x93); }");
+ for (int i = 0; i < 4; i++) {
+ const char tbl[] = "bwdq";
+ const int bitTbl[] = { 8, 16, 32, 64 };
+ int bit = bitTbl[i];
+ printf("void kmov%c(const Opmask& k, const Operand& op) { opKmov(k, op, false, %d); }\n", tbl[i], bit);
+ printf("void kmov%c(const Address& addr, const Opmask& k) { opKmov(k, addr, true, %d); }\n", tbl[i], bit);
+ if (i < 3) printf("void kmov%c(const Reg32& r, const Opmask& k) { opKmov(k, r, true, %d); }\n", tbl[i], bit);
+ }
}
// vcmppd(k, x, op)
@@ -100,7 +91,7 @@ void putVcmp()
const struct Tbl {
uint8_t code;
const char *name;
- int type;
+ uint64_t type;
bool hasIMM;
} tbl[] = {
{ 0xC2, "vcmppd", T_0F | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_YMM | T_66 | T_B64, true },
@@ -142,9 +133,9 @@ void putVcmp()
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
- std::string type = type2String(p->type);
+ std::string s = type2String(p->type);
printf("void %s(const Opmask& k, const Xmm& x, const Operand& op%s) { opAVX_K_X_XM(k, x, op, %s, 0x%02X%s); }\n"
- , p->name, p->hasIMM ? ", uint8_t imm" : "", type.c_str(), p->code, p->hasIMM ? ", imm" : "");
+ , p->name, p->hasIMM ? ", uint8_t imm" : "", s.c_str(), p->code, p->hasIMM ? ", imm" : "");
}
puts("void vcomish(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_MAP5 | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N2, 0x2F); }");
puts("void vucomish(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_MAP5 | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N2, 0x2E); }");
@@ -173,7 +164,7 @@ void putX_XM()
const struct Tbl {
uint8_t code;
const char *name;
- int type;
+ uint64_t type;
} tbl[] = {
{ 0x6F, "vmovdqa32", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_ER_X | T_ER_Y | T_ER_Z },
{ 0x6F, "vmovdqa64", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z },
@@ -210,8 +201,8 @@ void putX_XM()
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
- std::string type = type2String(p->type);
- printf("void %s(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, %s, 0x%02X); }\n", p->name, type.c_str(), p->code);
+ std::string s = type2String(p->type);
+ printf("void %s(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, %s, 0x%02X); }\n", p->name, s.c_str(), p->code);
}
puts("void vpabsq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_MUST_EVEX | T_EW1 | T_B64 | T_YMM, 0x1F); }");
@@ -229,7 +220,7 @@ void putM_X()
const struct Tbl {
uint8_t code;
const char *name;
- int type;
+ uint64_t type;
} tbl[] = {
{ 0x7F, "vmovdqa32", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_ER_X | T_ER_Y | T_ER_Z | T_M_K },
{ 0x7F, "vmovdqa64", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z | T_M_K },
@@ -242,8 +233,8 @@ void putM_X()
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
- std::string type = type2String(p->type);
- printf("void %s(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, %s, 0x%02X); }\n", p->name, type.c_str(), p->code);
+ std::string s = type2String(p->type);
+ printf("void %s(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, %s, 0x%02X); }\n", p->name, s.c_str(), p->code);
}
}
@@ -252,7 +243,7 @@ void putXM_X()
const struct Tbl {
uint8_t code;
const char *name;
- int type;
+ uint64_t type;
} tbl[] = {
{ 0x8A, "vcompresspd", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_N8 },
{ 0x8A, "vcompressps", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 },
@@ -265,8 +256,8 @@ void putXM_X()
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
- std::string type = type2String(p->type);
- printf("void %s(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, %s, 0x%02X); }\n", p->name, type.c_str(), p->code);
+ std::string s = type2String(p->type);
+ printf("void %s(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, %s, 0x%02X); }\n", p->name, s.c_str(), p->code);
}
}
@@ -275,7 +266,7 @@ void putX_X_XM_IMM()
const struct Tbl {
uint8_t code;
const char *name;
- int type;
+ uint64_t type;
bool hasIMM;
} tbl[] = {
{ 0x03, "valignd", T_MUST_EVEX | T_66 | T_0F3A | T_EW0 | T_YMM, true },
@@ -413,9 +404,9 @@ void putX_X_XM_IMM()
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
- std::string type = type2String(p->type);
+ std::string s = type2String(p->type);
printf("void %s(const Xmm& x1, const Xmm& x2, const Operand& op%s) { opAVX_X_X_XM(x1, x2, op, %s, 0x%02X%s); }\n"
- , p->name, p->hasIMM ? ", uint8_t imm" : "", type.c_str(), p->code, p->hasIMM ? ", imm" : "");
+ , p->name, p->hasIMM ? ", uint8_t imm" : "", s.c_str(), p->code, p->hasIMM ? ", imm" : "");
}
}
@@ -425,7 +416,7 @@ void putShift()
const char *name;
uint8_t code;
int idx;
- int type;
+ uint64_t type;
} tbl[] = {
{ "vpsraq", 0x72, 4, T_0F | T_66 | T_YMM | T_MUST_EVEX |T_EW1 | T_B64 },
{ "vprold", 0x72, 1, T_66 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 },
@@ -435,8 +426,8 @@ void putShift()
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl& p = tbl[i];
- std::string type = type2String(p.type);
- printf("void %s(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), %d), x, op, %s, 0x%02X, imm); }\n", p.name, p.idx, type.c_str(), p.code);
+ std::string s = type2String(p.type);
+ printf("void %s(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), %d), x, op, %s, 0x%02X, imm); }\n", p.name, p.idx, s.c_str(), p.code);
}
}
@@ -446,7 +437,7 @@ void putExtractInsert()
const struct Tbl {
const char *name;
uint8_t code;
- int type;
+ uint64_t type;
bool isZMM;
} tbl[] = {
{ "vextractf32x4", 0x19, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_YMM | T_N16, false },
@@ -461,16 +452,16 @@ void putExtractInsert()
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl& p = tbl[i];
- std::string type = type2String(p.type);
+ std::string s = type2String(p.type);
const char *kind = p.isZMM ? "Operand::MEM | Operand::YMM" : "Operand::MEM | Operand::XMM";
- printf("void %s(const Operand& op, const %s& r, uint8_t imm) { if (!op.is(%s)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r, 0, op, %s, 0x%2X, imm); }\n", p.name, p.isZMM ? "Zmm" : "Ymm", kind, type.c_str(), p.code);
+ printf("void %s(const Operand& op, const %s& r, uint8_t imm) { if (!op.is(%s)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r, 0, op, %s, 0x%2X, imm); }\n", p.name, p.isZMM ? "Zmm" : "Ymm", kind, s.c_str(), p.code);
}
}
{
const struct Tbl {
const char *name;
uint8_t code;
- int type;
+ uint64_t type;
bool isZMM;
} tbl[] = {
{ "vinsertf32x4", 0x18, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_YMM | T_N16, false },
@@ -485,12 +476,12 @@ void putExtractInsert()
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl& p = tbl[i];
- std::string type = type2String(p.type);
+ std::string s = type2String(p.type);
const char *x = p.isZMM ? "Zmm" : "Ymm";
const char *cond = p.isZMM ? "op.is(Operand::MEM | Operand::YMM)" : "(r1.getKind() == r2.getKind() && op.is(Operand::MEM | Operand::XMM))";
printf("void %s(const %s& r1, const %s& r2, const Operand& op, uint8_t imm) {"
"if (!%s) XBYAK_THROW(ERR_BAD_COMBINATION) "
- "opVex(r1, &r2, op, %s, 0x%2X, imm); }\n", p.name, x, x, cond, type.c_str(), p.code);
+ "opVex(r1, &r2, op, %s, 0x%2X, imm); }\n", p.name, x, x, cond, s.c_str(), p.code);
}
}
}
@@ -501,7 +492,7 @@ void putBroadcast(bool only64bit)
const struct Tbl {
uint8_t code;
const char *name;
- int type;
+ uint64_t type;
int reg;
} tbl[] = {
{ 0x7A, "vpbroadcastb", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0, 8 },
@@ -511,9 +502,9 @@ void putBroadcast(bool only64bit)
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl& p = tbl[i];
- std::string type = type2String(p.type);
+ std::string s = type2String(p.type);
if ((only64bit && p.reg == 64) || (!only64bit && p.reg != 64)) {
- printf("void %s(const Xmm& x, const Reg%d& r) { opVex(x, 0, r, %s, 0x%02X); }\n", p.name, p.reg, type.c_str(), p.code);
+ printf("void %s(const Xmm& x, const Reg%d& r) { opVex(x, 0, r, %s, 0x%02X); }\n", p.name, p.reg, s.c_str(), p.code);
}
}
}
@@ -536,7 +527,7 @@ void putCvt()
const struct Tbl {
uint8_t code;
const char *name;
- int type;
+ uint64_t type;
int ptn;
} tbl[] = {
{ 0x79, "vcvtsd2usi", T_F2 | T_0F | T_MUST_EVEX | T_N8 | T_ER_X, 0 },
@@ -583,28 +574,28 @@ void putCvt()
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl& p = tbl[i];
- std::string type = type2String(p.type);
+ std::string s = type2String(p.type);
switch (p.ptn) {
case 0:
- printf("void %s(const Reg32e& r, const Operand& op) { int type = (%s) | (r.isREG(64) ? T_EW1 : T_EW0); opVex(r, &xm0, op, type, 0x%02X); }\n", p.name, type.c_str(), p.code);
+ printf("void %s(const Reg32e& r, const Operand& op) { uint64_t type = (%s) | (r.isREG(64) ? T_EW1 : T_EW0); opVex(r, &xm0, op, type, 0x%02X); }\n", p.name, s.c_str(), p.code);
break;
case 1:
- printf("void %s(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, %s, 0x%02X); }\n", p.name, type.c_str(), p.code);
+ printf("void %s(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, %s, 0x%02X); }\n", p.name, s.c_str(), p.code);
break;
case 2:
- printf("void %s(const Xmm& x, const Operand& op) { opCvt2(x, op, %s, 0x%02X); }\n", p.name, type.c_str(), p.code);
+ printf("void %s(const Xmm& x, const Operand& op) { opCvt2(x, op, %s, 0x%02X); }\n", p.name, s.c_str(), p.code);
break;
case 3:
- printf("void %s(const Xmm& x, const Operand& op) { if (!op.isXMM() && !op.isMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(x, 0, op, %s, 0x%02X); }\n", p.name, type.c_str(), p.code);
+ printf("void %s(const Xmm& x, const Operand& op) { if (!op.isXMM() && !op.isMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(x, 0, op, %s, 0x%02X); }\n", p.name, s.c_str(), p.code);
break;
case 4:
- printf("void %s(const Xmm& x, const Operand& op) { checkCvt4(x, op); opCvt(x, op, %s, 0x%02X); }\n", p.name, type.c_str(), p.code);
+ printf("void %s(const Xmm& x, const Operand& op) { checkCvt4(x, op); opCvt(x, op, %s, 0x%02X); }\n", p.name, s.c_str(), p.code);
break;
case 5:
- printf("void %s(const Xmm& x, const Operand& op) { opCvt5(x, op, %s, 0x%02X); }\n", p.name, type.c_str(), p.code);
+ printf("void %s(const Xmm& x, const Operand& op) { opCvt5(x, op, %s, 0x%02X); }\n", p.name, s.c_str(), p.code);
break;
case 6:
- printf("void %s(const Xmm& x1, const Xmm& x2, const Operand& op) { if (!(x1.isXMM() && x2.isXMM() && op.isBit(32|64))) XBYAK_THROW(ERR_BAD_COMBINATION) int type = (%s) | (op.isBit(32) ? (T_EW0 | T_N4) : (T_EW1 | T_N8)); opVex(x1, &x2, op, type, 0x%02X); }\n", p.name, type.c_str(), p.code);
+ printf("void %s(const Xmm& x1, const Xmm& x2, const Operand& op) { if (!(x1.isXMM() && x2.isXMM() && op.isBit(32|64))) XBYAK_THROW(ERR_BAD_COMBINATION) uint64_t type = (%s) | (op.isBit(32) ? (T_EW0 | T_N4) : (T_EW1 | T_N8)); opVex(x1, &x2, op, type, 0x%02X); }\n", p.name, s.c_str(), p.code);
break;
}
}
@@ -621,7 +612,7 @@ void putGather()
{
const struct Tbl {
const char *name;
- int type;
+ uint64_t type;
uint8_t code;
int mode;
} tbl[] = {
@@ -636,15 +627,15 @@ void putGather()
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl& p = tbl[i];
- std::string type = type2String(p.type | T_VSIB);
- printf("void %s(const Xmm& x, const Address& addr) { opGather2(x, addr, %s, 0x%02X, %d); }\n", p.name, type.c_str(), p.code, p.mode);
+ std::string s = type2String(p.type | T_VSIB);
+ printf("void %s(const Xmm& x, const Address& addr) { opGather2(x, addr, %s, 0x%02X, %d); }\n", p.name, s.c_str(), p.code, p.mode);
}
}
void putScatter()
{
const struct Tbl {
const char *name;
- int type;
+ uint64_t type;
uint8_t code;
int mode; // reverse of gather
} tbl[] = {
@@ -660,8 +651,8 @@ void putScatter()
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl& p = tbl[i];
- std::string type = type2String(p.type | T_VSIB);
- printf("void %s(const Address& addr, const Xmm& x) { opGather2(x, addr, %s, 0x%02X, %d); }\n", p.name, type.c_str(), p.code, p.mode);
+ std::string s = type2String(p.type | T_VSIB);
+ printf("void %s(const Address& addr, const Xmm& x) { opGather2(x, addr, %s, 0x%02X, %d); }\n", p.name, s.c_str(), p.code, p.mode);
}
}
@@ -689,7 +680,7 @@ void putMov()
const struct Tbl {
uint8_t code;
const char *name;
- int type;
+ uint64_t type;
int mode;
} tbl[] = {
{ 0x32, "vpmovqb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N2 | T_N_VL | T_M_K, false },
@@ -718,8 +709,8 @@ void putMov()
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl& p = tbl[i];
- std::string type = type2String(p.type);
- printf("void %s(const Operand& op, const Xmm& x) { opVmov(op, x, %s, 0x%02X, %s); }\n", p.name, type.c_str(), p.code, p.mode ? "true" : "false");
+ std::string s = type2String(p.type);
+ printf("void %s(const Operand& op, const Xmm& x) { opVmov(op, x, %s, 0x%02X, %s); }\n", p.name, s.c_str(), p.code, p.mode ? "true" : "false");
}
}
}
@@ -729,7 +720,7 @@ void putX_XM_IMM()
const struct Tbl {
uint8_t code;
const char *name;
- int type;
+ uint64_t type;
bool hasIMM;
} tbl[] = {
{ 0x26, "vgetmantpd", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_SAE_Z, true },
@@ -770,9 +761,9 @@ void putX_XM_IMM()
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
- std::string type = type2String(p->type);
+ std::string s = type2String(p->type);
printf("void %s(const Xmm& x, const Operand& op%s) { opAVX_X_XM_IMM(x, op, %s, 0x%02X%s); }\n"
- , p->name, p->hasIMM ? ", uint8_t imm" : "", type.c_str(), p->code, p->hasIMM ? ", imm" : "");
+ , p->name, p->hasIMM ? ", uint8_t imm" : "", s.c_str(), p->code, p->hasIMM ? ", imm" : "");
}
}
@@ -784,7 +775,7 @@ void putMisc()
const struct Tbl {
const char *name;
int zm;
- int type;
+ uint64_t type;
uint8_t code;
bool isZmm;
} tbl[] = {
@@ -810,9 +801,9 @@ void putMisc()
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl& p = tbl[i];
- std::string type = type2String(p.type | T_66 | T_0F38 | T_MUST_EVEX | T_M_K | T_VSIB);
+ std::string s = type2String(p.type | T_66 | T_0F38 | T_MUST_EVEX | T_M_K | T_VSIB);
printf("void %s(const Address& addr) { opGatherFetch(addr, zm%d, %s, 0x%2X, Operand::%s); }\n"
- , p.name, p.zm, type.c_str(), p.code, p.isZmm ? "ZMM" : "YMM");
+ , p.name, p.zm, s.c_str(), p.code, p.isZmm ? "ZMM" : "YMM");
}
}
@@ -887,18 +878,18 @@ void putFP16_FMA()
{ "213", 0xA0 },
{ "231", 0xB0 },
};
- int t = T_66 | T_MAP6 | T_EW0 | T_MUST_EVEX;
+ uint64_t type = T_66 | T_MAP6 | T_EW0 | T_MUST_EVEX;
const char *suf = 0;
if (tbl[i].isPH) {
- t |= T_ER_Z | T_YMM | T_B16;
+ type |= T_ER_Z | T_YMM | T_B16;
suf = "ph";
} else {
- t |= T_ER_X | T_N2;
+ type |= T_ER_X | T_N2;
suf = "sh";
}
- std::string type = type2String(t);
+ std::string s = type2String(type);
printf("void %s%s%s(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, %s, 0x%02X); }\n"
- , tbl[i].name, ord[k].str, suf, type.c_str(), tbl[i].code | ord[k].code);
+ , tbl[i].name, ord[k].str, suf, s.c_str(), tbl[i].code | ord[k].code);
}
}
}
@@ -914,23 +905,23 @@ void putFP16_FMA2()
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
for (int j = 0; j < 2; j++) {
- int t = T_MAP6 | T_EW0 | T_MUST_EVEX;
+ uint64_t type = T_MAP6 | T_EW0 | T_MUST_EVEX;
if (j == 0) {
- t |= T_F2;
+ type |= T_F2;
} else {
- t |= T_F3;
+ type |= T_F3;
}
const char *suf = 0;
if (tbl[i].isPH) {
- t |= T_ER_Z | T_YMM | T_B32;
+ type |= T_ER_Z | T_YMM | T_B32;
suf = "ph";
} else {
- t |= T_ER_X | T_N2;
+ type |= T_ER_X | T_N2;
suf = "sh";
}
- std::string type = type2String(t);
+ std::string s = type2String(type);
printf("void vf%s%s%s(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, %s, 0x%02X); }\n"
- , j == 0 ? "c" : "", tbl[i].name, suf, type.c_str(), tbl[i].code);
+ , j == 0 ? "c" : "", tbl[i].name, suf, s.c_str(), tbl[i].code);
}
}
}
@@ -938,16 +929,16 @@ void putFP16_FMA2()
void putFP16_2()
{
{
- int t = T_F3 | T_MAP5 | T_MUST_EVEX | T_EW0 | T_N2;
- std::string type = type2String(t);
- printf("void vmovsh(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, %s, 0x10); }\n", type.c_str());
- printf("void vmovsh(const Xmm& x1, const Xmm& x2, const Xmm& x3) { opAVX_X_X_XM(x1, x2, x3, %s, 0x10); }\n", type.c_str());
+ uint64_t type = T_F3 | T_MAP5 | T_MUST_EVEX | T_EW0 | T_N2;
+ std::string s = type2String(type);
+ printf("void vmovsh(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, %s, 0x10); }\n", s.c_str());
+ printf("void vmovsh(const Xmm& x1, const Xmm& x2, const Xmm& x3) { opAVX_X_X_XM(x1, x2, x3, %s, 0x10); }\n", s.c_str());
}
{
- int t = T_66 | T_MAP5 | T_MUST_EVEX | T_N2;
- std::string type = type2String(t);
- printf("void vmovw(const Xmm& x, const Operand& op) { if (!op.isREG(32|64) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, xm0, op, %s, 0x6E); }\n", type.c_str());
- printf("void vmovw(const Reg32e& r, const Xmm& x) { opAVX_X_X_XM(x, xm0, r, %s, 0x7E); }\n", type.c_str());
+ uint64_t type = T_66 | T_MAP5 | T_MUST_EVEX | T_N2;
+ std::string s = type2String(type);
+ printf("void vmovw(const Xmm& x, const Operand& op) { if (!op.isREG(32|64) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, xm0, op, %s, 0x6E); }\n", s.c_str());
+ printf("void vmovw(const Reg32e& r, const Xmm& x) { opAVX_X_X_XM(x, xm0, r, %s, 0x7E); }\n", s.c_str());
}
}
diff --git a/gen/gen_code.cpp b/gen/gen_code.cpp
index 0c71cca..6a73f42 100644
--- a/gen/gen_code.cpp
+++ b/gen/gen_code.cpp
@@ -48,7 +48,7 @@ void putX_X_XM(bool omitOnly)
const struct Tbl {
uint8_t code;
const char *name;
- int type;
+ uint64_t type;
bool hasIMM;
bool enableOmit;
int mode; // 1 : sse, 2 : avx, 3 : sse + avx
@@ -216,7 +216,7 @@ void putX_X_XM(bool omitOnly)
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
- std::string type = type2String(p->type);
+ std::string s = type2String(p->type);
if (omitOnly) {
if (p->enableOmit) {
printf("void v%s(const Xmm& x, const Operand& op%s) { v%s(x, x, op%s); }\n", p->name, p->hasIMM ? ", uint8_t imm" : "", p->name, p->hasIMM ? ", imm" : "");
@@ -224,35 +224,32 @@ void putX_X_XM(bool omitOnly)
} else {
if (p->mode & 1) {
if (p->hasIMM) {
- printf("void %s(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x%02X, 0x66, isXMM_XMMorMEM, static_cast<uint8_t>(imm), 0x3A); }\n", p->name, p->code);
+ printf("void %s(const Xmm& xmm, const Operand& op, int imm) { opSSE(xmm, op, T_66 | T_0F3A, 0x%02X, isXMM_XMMorMEM, static_cast<uint8_t>(imm)); }\n", p->name, p->code);
} else {
- printf("void %s(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x%02X, 0x66, isXMM_XMMorMEM, NONE, 0x38); }\n", p->name, p->code);
+ printf("void %s(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_66 | T_0F38, 0x%02X, isXMM_XMMorMEM); }\n", p->name, p->code);
}
}
if (p->mode & 2) {
printf("void v%s(const Xmm& x1, const Xmm& x2, const Operand& op%s) { opAVX_X_X_XM(x1, x2, op, %s, 0x%02X%s); }\n"
- , p->name, p->hasIMM ? ", uint8_t imm" : "", type.c_str(), p->code, p->hasIMM ? ", imm" : "");
+ , p->name, p->hasIMM ? ", uint8_t imm" : "", s.c_str(), p->code, p->hasIMM ? ", imm" : "");
}
}
}
}
}
-void putMemOp(const char *name, uint8_t prefix, uint8_t ext, uint8_t code1, int code2, int bit = 32)
+void putMemOp(const char *name, const char *type, uint8_t ext, uint8_t code, int bit, int fwait = false)
{
- printf("void %s(const Address& addr) { ", name);
- if (prefix) printf("db(0x%02X); ", prefix);
- printf("opModM(addr, Reg%d(%d), 0x%02X, 0x%02X); }\n", bit, ext, code1, code2);
+ printf("void %s(const Address& addr) { %sopMR(addr, Reg%d(%d), %s, 0x%02X); }\n", name, fwait ? "db(0x9B); " : "", bit, ext, type, code);
}
-void putLoadSeg(const char *name, uint8_t code1, int code2 = NONE)
+void putLoadSeg(const char *name, uint64_t type, uint8_t code)
{
- printf("void %s(const Reg& reg, const Address& addr) { opLoadSeg(addr, reg, 0x%02X, 0x%02X); }\n", name, code1, code2);
+ printf("void %s(const Reg& reg, const Address& addr) { opLoadSeg(addr, reg, %s, 0x%02X); }\n", name, type ? "T_0F" : "T_NONE", code);
}
void put()
{
- const int NO = CodeGenerator::NONE;
{
char buf[16];
unsigned int v = VERSION;
@@ -383,43 +380,40 @@ void put()
{
const struct Tbl {
- uint8_t code;
- uint8_t pref;
+ const char *pref;
const char *name;
} tbl[] = {
- { 0x70, 0, "pshufw" },
- { 0x70, 0xF2, "pshuflw" },
- { 0x70, 0xF3, "pshufhw" },
- { 0x70, 0x66, "pshufd" },
+ { "T_NONE", "pshufw" },
+ { "T_F2", "pshuflw" },
+ { "T_F3", "pshufhw" },
+ { "T_66", "pshufd" },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
- printf("void %s(const Mmx& mmx, const Operand& op, uint8_t imm8) { opMMX(mmx, op, 0x%02X, 0x%02X, imm8); }\n", p->name, p->code, p->pref);
+ printf("void %s(const Mmx& mmx, const Operand& op, uint8_t imm8) { opMMX(mmx, op, 0x70, T_0F, %s, imm8); }\n", p->name, p->pref);
}
}
{
const struct MmxTbl6 {
uint8_t code; // for (reg, reg/[mem])
uint8_t code2; // for ([mem], reg)
- int pref;
+ const char *pref;
const char *name;
} mmxTbl6[] = {
- { 0x6F, 0x7F, 0x66, "movdqa" },
- { 0x6F, 0x7F, 0xF3, "movdqu" },
+ { 0x6F, 0x7F, "T_66", "movdqa" },
+ { 0x6F, 0x7F, "T_F3", "movdqu" },
// SSE2
- { 0x28, 0x29, NO, "movaps" },
- { 0x10, 0x11, 0xF3, "movss" },
- { 0x10, 0x11, NO, "movups" },
- { 0x28, 0x29, 0x66, "movapd" },
- { 0x10, 0x11, 0xF2, "movsd" },
- { 0x10, 0x11, 0x66, "movupd" },
+ { 0x28, 0x29, "T_NONE", "movaps" },
+ { 0x10, 0x11, "T_F3", "movss" },
+ { 0x10, 0x11, "T_NONE", "movups" },
+ { 0x28, 0x29, "T_66", "movapd" },
+ { 0x10, 0x11, "T_F2", "movsd" },
+ { 0x10, 0x11, "T_66", "movupd" },
};
for (size_t i = 0; i < NUM_OF_ARRAY(mmxTbl6); i++) {
const MmxTbl6 *p = &mmxTbl6[i];
- printf("void %s(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x%02X, 0x%02X); }\n", p->name, p->code, p->pref);
- printf("void %s(const Address& addr, const Xmm& xmm) { ", p->name);
- if (p->pref != NO) printf("db(0x%02X); ", p->pref);
- printf("opModM(addr, xmm, 0x0F, 0x%02X); }\n", p->code2);
+ printf("void %s(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x%02X, T_0F, %s); }\n", p->name, p->code, p->pref);
+ printf("void %s(const Address& addr, const Xmm& xmm) { opMR(addr, xmm, T_0F|%s, 0x%02X); }\n", p->name, p->pref, p->code2);
}
}
{
@@ -430,13 +424,13 @@ void put()
SD = 1 << 3
};
const struct {
- int code;
+ const char *type;
const char *name;
} sufTbl[] = {
- { NO, "ps" },
- { 0xF3, "ss" },
- { 0x66, "pd" },
- { 0xF2, "sd" },
+ { "T_0F", "ps" },
+ { "T_0F | T_F3", "ss" },
+ { "T_0F | T_66", "pd" },
+ { "T_0F | T_F2", "sd" },
};
const struct Tbl {
uint8_t code;
@@ -469,9 +463,9 @@ void put()
if (!(p->mode & (1 << j))) continue;
if (p->hasImm) {
// don't change uint8_t to int because NO is not in byte
- printf("void %s%s(const Xmm& xmm, const Operand& op, uint8_t imm8) { opGen(xmm, op, 0x%2X, 0x%02X, isXMM_XMMorMEM, imm8); }\n", p->name, sufTbl[j].name, p->code, sufTbl[j].code);
+ printf("void %s%s(const Xmm& xmm, const Operand& op, uint8_t imm8) { opSSE(xmm, op, %s, 0x%02X, isXMM_XMMorMEM, imm8); }\n", p->name, sufTbl[j].name, sufTbl[j].type, p->code);
} else {
- printf("void %s%s(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x%2X, 0x%02X, isXMM_XMMorMEM); }\n", p->name, sufTbl[j].name, p->code, sufTbl[j].code);
+ printf("void %s%s(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, %s, 0x%02X, isXMM_XMMorMEM); }\n", p->name, sufTbl[j].name, sufTbl[j].type, p->code);
}
}
}
@@ -479,77 +473,77 @@ void put()
{
// (XMM, XMM)
const struct Tbl {
+ uint64_t type;
uint8_t code;
- uint8_t pref;
const char *name;
} tbl[] = {
- { 0xF7, 0x66, "maskmovdqu" },
- { 0x12, 0 , "movhlps" },
- { 0x16, 0 , "movlhps" },
+ { T_66 | T_0F, 0xF7, "maskmovdqu" },
+ { T_0F, 0x12, "movhlps" },
+ { T_0F, 0x16, "movlhps" },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
- printf("void %s(const Xmm& reg1, const Xmm& reg2) { ", p->name);
- if (p->pref) printf("db(0x%02X); ", p->pref);
- printf(" opModR(reg1, reg2, 0x0F, 0x%02X); }\n", p->code);
+ std::string s = type2String(p->type);
+ printf("void %s(const Xmm& reg1, const Xmm& reg2) { opRR(reg1, reg2, %s, 0x%02X); }\n", p->name, s.c_str(), p->code);
}
}
{
// (XMM, XMM|MEM)
const struct Tbl {
uint8_t code;
- int pref;
+ uint64_t type;
const char *name;
} tbl[] = {
- { 0x6D, 0x66, "punpckhqdq" },
- { 0x6C, 0x66, "punpcklqdq" },
-
- { 0x2F, NO , "comiss" },
- { 0x2E, NO , "ucomiss" },
- { 0x2F, 0x66, "comisd" },
- { 0x2E, 0x66, "ucomisd" },
-
- { 0x5A, 0x66, "cvtpd2ps" },
- { 0x5A, NO , "cvtps2pd" },
- { 0x5A, 0xF2, "cvtsd2ss" },
- { 0x5A, 0xF3, "cvtss2sd" },
- { 0xE6, 0xF2, "cvtpd2dq" },
- { 0xE6, 0x66, "cvttpd2dq" },
- { 0xE6, 0xF3, "cvtdq2pd" },
- { 0x5B, 0x66, "cvtps2dq" },
- { 0x5B, 0xF3, "cvttps2dq" },
- { 0x5B, NO , "cvtdq2ps" },
+ { 0x6D, T_66, "punpckhqdq" },
+ { 0x6C, T_66, "punpcklqdq" },
+
+ { 0x2F, T_NONE, "comiss" },
+ { 0x2E, T_NONE, "ucomiss" },
+ { 0x2F, T_66, "comisd" },
+ { 0x2E, T_66, "ucomisd" },
+
+ { 0x5A, T_66, "cvtpd2ps" },
+ { 0x5A, T_NONE, "cvtps2pd" },
+ { 0x5A, T_F2, "cvtsd2ss" },
+ { 0x5A, T_F3, "cvtss2sd" },
+ { 0xE6, T_F2, "cvtpd2dq" },
+ { 0xE6, T_66, "cvttpd2dq" },
+ { 0xE6, T_F3, "cvtdq2pd" },
+ { 0x5B, T_66, "cvtps2dq" },
+ { 0x5B, T_F3, "cvttps2dq" },
+ { 0x5B, T_NONE, "cvtdq2ps" },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
- printf("void %s(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x%02X, 0x%02X, isXMM_XMMorMEM); }\n", p->name, p->code, p->pref);
+ std::string s = type2String(p->type | T_0F);
+ printf("void %s(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, %s, 0x%02X, isXMM_XMMorMEM); }\n", p->name, s.c_str(), p->code);
}
}
-
{
// special type
const struct Tbl {
uint8_t code;
- int pref;
+ uint64_t type;
const char *name;
const char *cond;
} tbl[] = {
- { 0x2A, NO , "cvtpi2ps", "isXMM_MMXorMEM" },
- { 0x2D, NO , "cvtps2pi", "isMMX_XMMorMEM" },
- { 0x2A, 0xF3, "cvtsi2ss", "isXMM_REG32orMEM" },
- { 0x2D, 0xF3, "cvtss2si", "isREG32_XMMorMEM" },
- { 0x2C, NO , "cvttps2pi", "isMMX_XMMorMEM" },
- { 0x2C, 0xF3, "cvttss2si", "isREG32_XMMorMEM" },
- { 0x2A, 0x66, "cvtpi2pd", "isXMM_MMXorMEM" },
- { 0x2D, 0x66, "cvtpd2pi", "isMMX_XMMorMEM" },
- { 0x2A, 0xF2, "cvtsi2sd", "isXMM_REG32orMEM" },
- { 0x2D, 0xF2, "cvtsd2si", "isREG32_XMMorMEM" },
- { 0x2C, 0x66, "cvttpd2pi", "isMMX_XMMorMEM" },
- { 0x2C, 0xF2, "cvttsd2si", "isREG32_XMMorMEM" },
+ { 0x2A, T_NONE , "cvtpi2ps", "isXMM_MMXorMEM" },
+ { 0x2D, T_NONE , "cvtps2pi", "isMMX_XMMorMEM" },
+ { 0x2A, T_F3, "cvtsi2ss", "isXMM_REG32orMEM" },
+ { 0x2D, T_F3, "cvtss2si", "isREG32_XMMorMEM" },
+ { 0x2C, T_NONE , "cvttps2pi", "isMMX_XMMorMEM" },
+ { 0x2C, T_F3, "cvttss2si", "isREG32_XMMorMEM" },
+ { 0x2A, T_66, "cvtpi2pd", "isXMM_MMXorMEM" },
+ { 0x2D, T_66, "cvtpd2pi", "isMMX_XMMorMEM" },
+ { 0x2A, T_F2, "cvtsi2sd", "isXMM_REG32orMEM" },
+ { 0x2D, T_F2, "cvtsd2si", "isREG32_XMMorMEM" },
+ { 0x2C, T_66, "cvttpd2pi", "isMMX_XMMorMEM" },
+ { 0x2C, T_F2, "cvttsd2si", "isREG32_XMMorMEM" },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
- printf("void %s(const Operand& reg, const Operand& op) { opGen(reg, op, 0x%02X, 0x%02X, %s); }\n", p->name, p->code, p->pref, p->cond);
+ std::string s = type2String(p->type | T_0F);
+ printf("void %s(const Reg& reg, const Operand& op) { opSSE(reg, op, %s, 0x%02X, %s); }\n", p->name, s.c_str(), p->code, p->cond);
}
}
{
@@ -570,23 +564,24 @@ void put()
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
- printf("void prefetch%s(const Address& addr) { opModM(addr, Reg32(%d), 0x0F, 0x%02X); }\n", p->name, p->ext, p->code);
+ printf("void prefetch%s(const Address& addr) { opMR(addr, Reg32(%d), T_0F, 0x%02X); }\n", p->name, p->ext, p->code);
}
}
{
const struct Tbl {
uint8_t code;
- int pref;
const char *name;
+ uint64_t type;
} tbl[] = {
- { 0x16, NO, "movhps" },
- { 0x12, NO, "movlps" },
- { 0x16, 0x66, "movhpd" },
- { 0x12, 0x66, "movlpd" },
+ { 0x16, "movhps", T_0F },
+ { 0x12, "movlps", T_0F },
+ { 0x16, "movhpd", T_0F | T_66 },
+ { 0x12, "movlpd", T_0F | T_66 },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
- printf("void %s(const Operand& op1, const Operand& op2) { opMovXMM(op1, op2, 0x%02X, 0x%02X); }\n", p->name, p->code, p->pref);
+ std::string s = type2String(p->type);
+ printf("void %s(const Operand& op1, const Operand& op2) { opMovXMM(op1, op2, %s, 0x%02X); }\n", p->name, s.c_str(), p->code);
}
}
{
@@ -629,12 +624,50 @@ void put()
const char *msg = "//-V524"; // disable warning of PVS-Studio
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
- printf("void cmov%s(const Reg& reg, const Operand& op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0x40 | %d); }%s\n", p->name, p->ext, msg);
+ printf("void cmov%s(const Reg& reg, const Operand& op) { opRO(reg, op, T_0F, 0x40 | %d, op.isREG(16|i32e)); }%s\n", p->name, p->ext, msg);
+ printf("void cmov%s(const Reg& d, const Reg& reg, const Operand& op) { opROO(d, op, reg, T_APX|T_ND1, 0x40 | %d); }%s\n", p->name, p->ext, msg);
printf("void j%s(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x%02X, 0x%02X, 0x%02X); }%s\n", p->name, p->ext | 0x70, p->ext | 0x80, 0x0F, msg);
printf("void j%s(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x%02X, 0x%02X, 0x%02X); }%s\n", p->name, p->ext | 0x70, p->ext | 0x80, 0x0F, msg);
printf("void j%s(const char *label, LabelType type = T_AUTO) { j%s(std::string(label), type); }%s\n", p->name, p->name, msg);
printf("void j%s(const void *addr) { opJmpAbs(addr, T_NEAR, 0x%02X, 0x%02X, 0x%02X); }%s\n", p->name, p->ext | 0x70, p->ext | 0x80, 0x0F, msg);
- printf("void set%s(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | %d); }%s\n", p->name, p->ext, msg);
+ printf("void set%s(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | %d)) return; opRext(op, 8, 0, T_0F, 0x90 | %d); }%s\n", p->name, p->ext, p->ext, msg);
+
+ // ccmpscc
+ // true if SCC = 0b1010, false if SCC = 0b1011 (see APX Architecture Specification p.266)
+ const char *s = p->ext == 10 ? "t" : p->ext == 11 ? "f" : p->name;
+ printf("void ccmp%s(const Operand& op1, const Operand& op2, int dfv) { opCcmp(op1, op2, dfv, 0x38, %d); }\n", s, p->ext);
+ printf("void ccmp%s(const Operand& op, int imm, int dfv) { opCcmpi(op, imm, dfv, %d); }\n", s, p->ext);
+ printf("void ctest%s(const Operand& op, const Reg& r, int dfv) { opCcmp(op, r, dfv, 0x84, %d); }\n", s, p->ext);
+ printf("void ctest%s(const Operand& op, int imm, int dfv) { opTesti(op, imm, dfv, %d); }\n", s, p->ext);
+ }
+ }
+ {
+ // cfcmov
+ const struct Tbl {
+ uint8_t code;
+ const char *suf;
+ } tbl[] = {
+ {0x40, "o"},
+ {0x41, "no"},
+ {0x42, "b"},
+ {0x43, "nb"},
+ {0x44, "z"},
+ {0x45, "nz"},
+ {0x46, "be"},
+ {0x47, "nbe"},
+ {0x48, "s"},
+ {0x49, "ns"},
+ {0x4A, "p"},
+ {0x4B, "np"},
+ {0x4C, "l"},
+ {0x4D, "nl"},
+ {0x4E, "le"},
+ {0x4F, "nle"},
+ };
+ for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
+ const Tbl *p = &tbl[i];
+ printf("void cfcmov%s(const Operand& op1, const Operand& op2) { opCfcmov(Reg(), op1, op2, 0x%02X); }\n", p->suf, p->code);
+ printf("void cfcmov%s(const Reg& d, const Reg& r, const Operand& op) { opCfcmov(d|T_nf, op, r, 0x%02X); }\n", p->suf, p->code);
}
}
{
@@ -790,61 +823,66 @@ void put()
putGeneric(tbl, NUM_OF_ARRAY(tbl));
puts("void enter(uint16_t x, uint8_t y) { db(0xC8); dw(x); db(y); }");
puts("void int_(uint8_t x) { db(0xCD); db(x); }");
- putLoadSeg("lss", 0x0F, 0xB2);
- putLoadSeg("lfs", 0x0F, 0xB4);
- putLoadSeg("lgs", 0x0F, 0xB5);
+ putLoadSeg("lss", T_0F, 0xB2);
+ putLoadSeg("lfs", T_0F, 0xB4);
+ putLoadSeg("lgs", T_0F, 0xB5);
}
{
const struct Tbl {
uint8_t code; // (reg, reg)
uint8_t ext; // (reg, imm)
const char *name;
+ bool support3op;
+ uint64_t type;
} tbl[] = {
- { 0x10, 2, "adc" },
- { 0x00, 0, "add" },
- { 0x20, 4, "and_" },
- { 0x38, 7, "cmp" },
- { 0x08, 1, "or_" },
- { 0x18, 3, "sbb" },
- { 0x28, 5, "sub" },
- { 0x30, 6, "xor_" },
+ { 0x10, 2, "adc", true, T_NONE },
+ { 0x00, 0, "add", true, T_NF | T_CODE1_IF1 },
+ { 0x20, 4, "and_", true, T_NF | T_CODE1_IF1 },
+ { 0x38, 7, "cmp", false, T_NONE },
+ { 0x08, 1, "or_", true, T_NF | T_CODE1_IF1 },
+ { 0x18, 3, "sbb", true, T_NONE },
+ { 0x28, 5, "sub", true, T_NF | T_CODE1_IF1 },
+ { 0x30, 6, "xor_", true, T_NF | T_CODE1_IF1 },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
- printf("void %s(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x%02X); }\n", p->name, p->code);
- printf("void %s(const Operand& op, uint32_t imm) { opRM_I(op, imm, 0x%02X, %d); }\n", p->name, p->code, p->ext);
+ printf("void %s(const Operand& op1, const Operand& op2) { opRO_MR(op1, op2, 0x%02X); }\n", p->name, p->code);
+ printf("void %s(const Operand& op, uint32_t imm) { opOI(op, imm, 0x%02X, %d); }\n", p->name, p->code, p->ext);
+ if (!p->support3op) continue;
+ std::string s = type2String(p->type);
+ printf("void %s(const Reg& d, const Operand& op1, const Operand& op2) { opROO(d, op1, op2, %s, 0x%02X); }\n", p->name, s.c_str(), p->code);
+ printf("void %s(const Reg& d, const Operand& op, uint32_t imm) { opROI(d, op, imm, %s, %d); }\n", p->name, s.c_str(), p->ext);
}
}
{
const struct Tbl {
const char *name;
- uint8_t prefix;
+ const char *prefix;
} tbl[] = {
- { "aadd", 0 },
- { "aand", 0x66 },
- { "aor", 0xF2 },
- { "axor", 0xF3 },
+ { "aadd", "" },
+ { "aand", " | T_66" },
+ { "aor", " | T_F2" },
+ { "axor", " | T_F3" },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
printf("void %s(const Address& addr, const Reg32e &reg) { ", p->name);
- if (p->prefix) printf("db(0x%02X); ", p->prefix);
- printf("opModM(addr, reg, 0x0F, 0x38, 0x0FC); }\n");
+ printf("opMR(addr, reg, T_0F38%s, 0x0FC); }\n", p->prefix);
}
}
{
const struct Tbl {
- uint8_t code;
uint8_t ext;
const char *name;
} tbl[] = {
- { 0x48, 1, "dec" },
- { 0x40, 0, "inc" },
+ { 1, "dec" },
+ { 0, "inc" },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
- printf("void %s(const Operand& op) { opIncDec(op, 0x%02X, %d); }\n", p->name, p->code, p->ext);
+ printf("void %s(const Operand& op) { opIncDec(Reg(), op, %d); }\n", p->name, p->ext);
+ printf("void %s(const Reg& d, const Operand& op) { opIncDec(d, op, %d); }\n", p->name, p->ext);
}
}
{
@@ -860,8 +898,8 @@ void put()
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
- printf("void %s(const Operand& op, const Reg& reg) { opModRM(reg, op, op.isREG(16|32|64) && op.getBit() == reg.getBit(), op.isMEM(), 0x0f, 0x%02X); }\n", p->name, p->code);
- printf("void %s(const Operand& op, uint8_t imm) { opR_ModM(op, 16|32|64, %d, 0x0f, 0xba, NONE, false, 1); db(imm); }\n", p->name, p->ext);
+ printf("void %s(const Operand& op, const Reg& reg) { opRO(reg, op, T_0F, 0x%02X, op.isREG(16|i32e) && op.getBit() == reg.getBit()); }\n", p->name, p->code);
+ printf("void %s(const Operand& op, uint8_t imm) { opRext(op, 16|i32e, %d, T_0F, 0xba, false, 1); db(imm); }\n", p->name, p->ext);
}
}
{
@@ -869,53 +907,70 @@ void put()
uint8_t code;
uint8_t ext;
const char *name;
+ bool NF;
+ int n; // # of op
} tbl[] = {
- { 0xF6, 6, "div" },
- { 0xF6, 7, "idiv" },
- { 0xF6, 5, "imul" },
- { 0xF6, 4, "mul" },
- { 0xF6, 3, "neg" },
- { 0xF6, 2, "not_" },
+ { 0xF6, 6, "div", true, 1 },
+ { 0xF6, 7, "idiv", true, 1 },
+ { 0xF6, 5, "imul", true ,3 },
+ { 0xF6, 4, "mul", true, 1 },
+ { 0xF6, 3, "neg", true, 2 },
+ { 0xF6, 2, "not_", false, 2 },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
const std::string name = p->name;
- printf("void %s(const Operand& op) { opR_ModM(op, 0, %d, 0x%02X); }\n", p->name, p->ext, p->code);
+ uint64_t type = T_APX|T_CODE1_IF1;
+ if (p->NF) type |= T_NF;
+ std::string s = type2String(type);
+ printf("void %s(const Operand& op) { opRext(op, 0, %d, %s, 0x%02X); }\n", p->name, p->ext, s.c_str(), p->code);
+ if (p->n == 2) {
+ type = T_APX|T_ND1|T_CODE1_IF1;
+ if (p->NF) type |= T_NF;
+ s = type2String(type);
+ printf("void %s(const Reg& d, const Operand& op) { opROO(d, op, Reg(%d, Operand::REG, d.getBit()), %s, 0xF6); }\n", p->name, p->ext, s.c_str());
+ }
}
+ printf("void imul(const Reg& reg, const Operand& op) { if (opROO(Reg(), op, reg, T_APX|T_NF, 0xAF)) return; opRO(reg, op, T_0F, 0xAF, reg.getKind() == op.getKind()); }\n");
}
{
const struct Tbl {
const char *name;
- uint8_t ext;
+ uint8_t ext; // |8 means supporting NF=1
} tbl[] = {
- { "rcl", 2 },
- { "rcr", 3 },
- { "rol", 0 },
- { "ror", 1 },
- { "sar", 7 },
- { "shl", 4 },
- { "shr", 5 },
-
- { "sal", 4 },
+ { "rcl", 2|0 },
+ { "rcr", 3|0 },
+ { "rol", 0|8 },
+ { "ror", 1|8 },
+ { "sar", 7|8 },
+ { "shl", 4|8 },
+ { "shr", 5|8 },
+
+ { "sal", 4|8 },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
printf("void %s(const Operand& op, int imm) { opShift(op, imm, %d); }\n", p->name, p->ext);
printf("void %s(const Operand& op, const Reg8& _cl) { opShift(op, _cl, %d); }\n", p->name, p->ext);
+ printf("void %s(const Reg& d, const Operand& op, int imm) { opShift(op, imm, %d, &d); }\n", p->name, p->ext);
+ printf("void %s(const Reg& d, const Operand& op, const Reg8& _cl) { opShift(op, _cl, %d, &d); }\n", p->name, p->ext);
}
}
{
const struct Tbl {
const char *name;
uint8_t code;
+ uint8_t code2;
} tbl[] = {
- { "shld", 0xA4 },
- { "shrd", 0xAC },
+ { "shld", 0xA4, 0x24 },
+ { "shrd", 0xAC, 0x2C },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
- printf("void %s(const Operand& op, const Reg& reg, uint8_t imm) { opShxd(op, reg, imm, 0x%02X); }\n", p->name, p->code);
- printf("void %s(const Operand& op, const Reg& reg, const Reg8& _cl) { opShxd(op, reg, 0, 0x%02X, &_cl); }\n", p->name, p->code);
+ printf("void %s(const Operand& op, const Reg& reg, uint8_t imm) { opShxd(Reg(), op, reg, imm, 0x%02X, 0x%02X); }\n", p->name, p->code, p->code2);
+ printf("void %s(const Operand& op, const Reg& reg, const Reg8& _cl) { opShxd(Reg(), op, reg, 0, 0x%02X, 0x%02X, &_cl); }\n", p->name, p->code, p->code2);
+ printf("void %s(const Reg& d, const Operand& op, const Reg& reg, uint8_t imm) { opShxd(d, op, reg, imm, 0x%02X, 0x%02X); }\n", p->name, p->code, p->code2);
+ printf("void %s(const Reg& d, const Operand& op, const Reg& reg, const Reg8& _cl) { opShxd(d, op, reg, 0, 0x%02X, 0x%02X, &_cl); }\n", p->name, p->code, p->code2);
}
}
{
@@ -928,21 +983,26 @@ void put()
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
- printf("void %s(const Reg&reg, const Operand& op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0x%02X); }\n", p->name, p->code);
+ printf("void %s(const Reg&reg, const Operand& op) { opRO(reg, op, T_0F, 0x%02X, op.isREG(16|i32e)); }\n", p->name, p->code);
}
}
{
const struct Tbl {
const char *name;
uint8_t code;
+ uint8_t code2;
} tbl[] = {
- { "popcnt", 0xB8 },
- { "tzcnt", 0xBC },
- { "lzcnt", 0xBD },
+ { "popcnt", 0xB8, 0 },
+ { "tzcnt", 0xBC, 0xF4 },
+ { "lzcnt", 0xBD, 0xF5 },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
- printf("void %s(const Reg&reg, const Operand& op) { opSp1(reg, op, 0xF3, 0x0F, 0x%02X); }\n", p->name, p->code);
+ if (p->code2) {
+ printf("void %s(const Reg&reg, const Operand& op) { if (opROO(Reg(), op, reg, T_APX|T_NF, 0x%02X)) return; opCnt(reg, op, 0x%02X); }\n", p->name, p->code2, p->code);
+ } else {
+ printf("void %s(const Reg&reg, const Operand& op) { opCnt(reg, op, 0x%02X); }\n", p->name, p->code);
+ }
}
}
// SSSE3
@@ -969,9 +1029,9 @@ void put()
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
- printf("void %s(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x%02X, 0x66, NONE, 0x38); }\n", p->name, p->code);
+ printf("void %s(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x%02X, T_0F38, T_66); }\n", p->name, p->code);
}
- printf("void palignr(const Mmx& mmx, const Operand& op, int imm) { opMMX(mmx, op, 0x0f, 0x66, static_cast<uint8_t>(imm), 0x3a); }\n");
+ printf("void palignr(const Mmx& mmx, const Operand& op, int imm) { opMMX(mmx, op, 0x0F, T_0F3A, T_66, static_cast<uint8_t>(imm)); }\n");
}
{
const struct Tbl {
@@ -991,34 +1051,34 @@ void put()
}
{
const struct Tbl {
- uint8_t code1;
- int code2;
+ const char *type;
+ uint8_t code;
uint8_t ext;
const char *name;
- uint8_t prefix;
+ bool fwait;
} tbl[] = {
- { 0x0F, 0xAE, 2, "ldmxcsr", 0 },
- { 0x0F, 0xAE, 3, "stmxcsr", 0 },
- { 0x0F, 0xAE, 7, "clflush", 0 },
- { 0x0F, 0xAE, 7, "clflushopt", 0x66 },
- { 0xDF, NONE, 4, "fbld", 0 },
- { 0xDF, NONE, 6, "fbstp", 0 },
- { 0xD9, NONE, 5, "fldcw", 0 },
- { 0xD9, NONE, 4, "fldenv", 0 },
- { 0xDD, NONE, 4, "frstor", 0 },
- { 0xDD, NONE, 6, "fsave", 0x9B },
- { 0xDD, NONE, 6, "fnsave", 0 },
- { 0xD9, NONE, 7, "fstcw", 0x9B },
- { 0xD9, NONE, 7, "fnstcw", 0 },
- { 0xD9, NONE, 6, "fstenv", 0x9B },
- { 0xD9, NONE, 6, "fnstenv", 0 },
- { 0xDD, NONE, 7, "fstsw", 0x9B },
- { 0xDD, NONE, 7, "fnstsw", 0 },
- { 0x0F, 0xAE, 1, "fxrstor", 0 },
+ { "T_0F", 0xAE, 2, "ldmxcsr", false },
+ { "T_0F", 0xAE, 3, "stmxcsr", false },
+ { "T_0F", 0xAE, 7, "clflush", false },
+ { "T_66 | T_0F", 0xAE, 7, "clflushopt", false},
+ { "0", 0xDF, 4, "fbld", false },
+ { "0", 0xDF, 6, "fbstp", false },
+ { "0", 0xD9, 5, "fldcw", false },
+ { "0", 0xD9, 4, "fldenv", false },
+ { "0", 0xDD, 4, "frstor", false },
+ { "0", 0xDD, 6, "fsave", true },
+ { "0", 0xDD, 6, "fnsave", false },
+ { "0", 0xD9, 7, "fstcw", true },
+ { "0", 0xD9, 7, "fnstcw", false },
+ { "0", 0xD9, 6, "fstenv", true },
+ { "0", 0xD9, 6, "fnstenv", false },
+ { "0", 0xDD, 7, "fstsw", true },
+ { "0", 0xDD, 7, "fnstsw", false },
+ { "T_0F", 0xAE, 1, "fxrstor", false },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
- putMemOp(p->name, p->prefix, p->ext, p->code1, p->code2);
+ putMemOp(p->name, p->type, p->ext, p->code, 32, p->fwait);
}
puts("void fstsw(const Reg16& r) { if (r.getIdx() != Operand::AX) XBYAK_THROW(ERR_BAD_PARAMETER) db(0x9B); db(0xDF); db(0xE0); }");
puts("void fnstsw(const Reg16& r) { if (r.getIdx() != Operand::AX) XBYAK_THROW(ERR_BAD_PARAMETER) db(0xDF); db(0xE0); }");
@@ -1034,7 +1094,7 @@ void put()
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
// cast xmm register to 16bit register to put 0x66
- printf("void %s(const Address& addr, const Xmm& reg) { opModM(addr, Reg16(reg.getIdx()), 0x0F, 0x%02X); }\n", p->name, p->code);
+ printf("void %s(const Address& addr, const Xmm& reg) { opMR(addr, Reg16(reg.getIdx()), T_0F, 0x%02X); }\n", p->name, p->code);
}
}
{
@@ -1050,6 +1110,20 @@ void put()
printf("void %s(const Reg& reg, const Operand& op) { opMovxx(reg, op, 0x%02X); }\n", p->name, p->code);
}
}
+ {
+ const struct Tbl {
+ uint8_t prefix;
+ const char *name;
+ } tbl[] = {
+ { 0x66, "adcx" },
+ { 0xF3, "adox" },
+ };
+ for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
+ const Tbl *p = &tbl[i];
+ printf("void %s(const Reg32e& reg, const Operand& op) { if (!reg.isREG(16|i32e) && reg.getBit() == op.getBit()) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) if (opROO(Reg(), op, reg, T_%02X, 0x66)) return; opRO(reg, op, T_%02X | T_0F38, 0xF6); }\n", p->name, p->prefix, p->prefix);
+ printf("void %s(const Reg32e& d, const Reg32e& reg, const Operand& op) { opROO(d, op, reg, T_%02X, 0x66); }\n", p->name, p->prefix);
+ }
+ }
{ // in/out
puts("void in_(const Reg& a, uint8_t v) { opInOut(a, 0xE4, v); }");
puts("void in_(const Reg& a, const Reg& d) { opInOut(a, d, 0xEC); }");
@@ -1058,67 +1132,65 @@ void put()
}
// mpx
{
- puts("void bndcl(const BoundsReg& bnd, const Operand& op) { db(0xF3); opR_ModM(op, i32e, bnd.getIdx(), 0x0F, 0x1A, NONE, !op.isMEM()); }");
- puts("void bndcu(const BoundsReg& bnd, const Operand& op) { db(0xF2); opR_ModM(op, i32e, bnd.getIdx(), 0x0F, 0x1A, NONE, !op.isMEM()); }");
- puts("void bndcn(const BoundsReg& bnd, const Operand& op) { db(0xF2); opR_ModM(op, i32e, bnd.getIdx(), 0x0F, 0x1B, NONE, !op.isMEM()); }");
- puts("void bndldx(const BoundsReg& bnd, const Address& addr) { opMIB(addr, bnd, 0x0F, 0x1A); }");
- puts("void bndmk(const BoundsReg& bnd, const Address& addr) { db(0xF3); opModM(addr, bnd, 0x0F, 0x1B); }");
- puts("void bndmov(const BoundsReg& bnd, const Operand& op) { db(0x66); opModRM(bnd, op, op.isBNDREG(), op.isMEM(), 0x0F, 0x1A); }");
- puts("void bndmov(const Address& addr, const BoundsReg& bnd) { db(0x66); opModM(addr, bnd, 0x0F, 0x1B); }");
- puts("void bndstx(const Address& addr, const BoundsReg& bnd) { opMIB(addr, bnd, 0x0F, 0x1B); }");
+ puts("void bndcl(const BoundsReg& bnd, const Operand& op) { opRext(op, i32e, bnd.getIdx(), T_F3 | T_0F, 0x1A, !op.isMEM()); }");
+ puts("void bndcu(const BoundsReg& bnd, const Operand& op) { opRext(op, i32e, bnd.getIdx(), T_F2 | T_0F, 0x1A, !op.isMEM()); }");
+ puts("void bndcn(const BoundsReg& bnd, const Operand& op) { opRext(op, i32e, bnd.getIdx(), T_F2 | T_0F, 0x1B, !op.isMEM()); }");
+ puts("void bndldx(const BoundsReg& bnd, const Address& addr) { opMIB(addr, bnd, T_0F, 0x1A); }");
+ puts("void bndmk(const BoundsReg& bnd, const Address& addr) { opMR(addr, bnd, T_F3 | T_0F, 0x1B); }");
+ puts("void bndmov(const BoundsReg& bnd, const Operand& op) { opRO(bnd, op, T_66 | T_0F, 0x1A, op.isBNDREG()); }");
+ puts("void bndmov(const Address& addr, const BoundsReg& bnd) { opMR(addr, bnd, T_66 | T_0F, 0x1B); }");
+ puts("void bndstx(const Address& addr, const BoundsReg& bnd) { opMIB(addr, bnd, T_0F, 0x1B); }");
}
// misc
{
- puts("void lea(const Reg& reg, const Address& addr) { if (!reg.isBit(16 | i32e)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) opModM(addr, reg, 0x8D); }");
- puts("void bswap(const Reg32e& reg) { opModR(Reg32(1), reg, 0x0F); }");
+ puts("void lea(const Reg& reg, const Address& addr) { if (!reg.isBit(16 | i32e)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) opMR(addr, reg, 0, 0x8D); }");
+ puts("void bswap(const Reg32e& reg) { opRR(Reg32(1), reg, 0, 0x0F); }");
puts("void ret(int imm = 0) { if (imm) { db(0xC2); dw(imm); } else { db(0xC3); } }");
puts("void retf(int imm = 0) { if (imm) { db(0xCA); dw(imm); } else { db(0xCB); } }");
- puts("void xadd(const Operand& op, const Reg& reg) { opModRM(reg, op, (op.isREG() && reg.isREG() && op.getBit() == reg.getBit()), op.isMEM(), 0x0F, 0xC0 | (reg.isBit(8) ? 0 : 1)); }");
- puts("void cmpxchg(const Operand& op, const Reg& reg) { opModRM(reg, op, (op.isREG() && reg.isREG() && op.getBit() == reg.getBit()), op.isMEM(), 0x0F, 0xB0 | (reg.isBit(8) ? 0 : 1)); }");
- puts("void movbe(const Reg& reg, const Address& addr) { opModM(addr, reg, 0x0F, 0x38, 0xF0); }");
- puts("void movbe(const Address& addr, const Reg& reg) { opModM(addr, reg, 0x0F, 0x38, 0xF1); }");
- puts("void movdiri(const Address& addr, const Reg32e& reg) { opModM(addr, reg, 0x0F, 0x38, 0xF9); }");
- puts("void movdir64b(const Reg& reg, const Address& addr) { db(0x66); opModM(addr, reg.cvt32(), 0x0F, 0x38, 0xF8); }");
- puts("void adcx(const Reg32e& reg, const Operand& op) { opGen(reg, op, 0xF6, 0x66, isREG32_REG32orMEM, NONE, 0x38); }");
- puts("void adox(const Reg32e& reg, const Operand& op) { opGen(reg, op, 0xF6, 0xF3, isREG32_REG32orMEM, NONE, 0x38); }");
- puts("void cmpxchg8b(const Address& addr) { opModM(addr, Reg32(1), 0x0F, 0xC7); }");
+ puts("void xadd(const Operand& op, const Reg& reg) { opRO(reg, op, T_0F, 0xC0 | (reg.isBit(8) ? 0 : 1), op.getBit() == reg.getBit()); }");
+ puts("void cmpxchg(const Operand& op, const Reg& reg) { opRO(reg, op, T_0F, 0xB0 | (reg.isBit(8) ? 0 : 1), op.getBit() == reg.getBit()); }");
+ puts("void movbe(const Reg& reg, const Address& addr) { if (opROO(Reg(), addr, reg, T_APX, 0x60)) return; opMR(addr, reg, T_0F38, 0xF0); }");
+ puts("void movbe(const Address& addr, const Reg& reg) { if (opROO(Reg(), addr, reg, T_APX, 0x61)) return; opMR(addr, reg, T_0F38, 0xF1); }");
+ puts("void movdiri(const Address& addr, const Reg32e& reg) { if (opROO(Reg(), addr, reg, T_APX, 0xF9)) return; opMR(addr, reg, T_0F38, 0xF9); }");
+ puts("void movdir64b(const Reg& reg, const Address& addr) { if (opROO(Reg(), addr, reg.cvt32(), T_APX|T_66, 0xF8)) return; opMR(addr, reg.cvt32(), T_66 | T_0F38, 0xF8); }");
+ puts("void cmpxchg8b(const Address& addr) { opMR(addr, Reg32(1), T_0F, 0xC7); }");
puts("void pextrw(const Operand& op, const Mmx& xmm, uint8_t imm) { opExt(op, xmm, 0x15, imm, true); }");
puts("void pextrb(const Operand& op, const Xmm& xmm, uint8_t imm) { opExt(op, xmm, 0x14, imm); }");
puts("void pextrd(const Operand& op, const Xmm& xmm, uint8_t imm) { opExt(op, xmm, 0x16, imm); }");
puts("void extractps(const Operand& op, const Xmm& xmm, uint8_t imm) { opExt(op, xmm, 0x17, imm); }");
- puts("void pinsrw(const Mmx& mmx, const Operand& op, int imm) { if (!op.isREG(32) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opGen(mmx, op, 0xC4, mmx.isXMM() ? 0x66 : NONE, 0, imm); }");
- puts("void insertps(const Xmm& xmm, const Operand& op, uint8_t imm) { opGen(xmm, op, 0x21, 0x66, isXMM_XMMorMEM, imm, 0x3A); }");
- puts("void pinsrb(const Xmm& xmm, const Operand& op, uint8_t imm) { opGen(xmm, op, 0x20, 0x66, isXMM_REG32orMEM, imm, 0x3A); }");
- puts("void pinsrd(const Xmm& xmm, const Operand& op, uint8_t imm) { opGen(xmm, op, 0x22, 0x66, isXMM_REG32orMEM, imm, 0x3A); }");
-
- puts("void pmovmskb(const Reg32e& reg, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opModR(reg, mmx, 0x0F, 0xD7); }");
- puts("void maskmovq(const Mmx& reg1, const Mmx& reg2) { if (!reg1.isMMX() || !reg2.isMMX()) XBYAK_THROW(ERR_BAD_COMBINATION) opModR(reg1, reg2, 0x0F, 0xF7); }");
- puts("void movmskps(const Reg32e& reg, const Xmm& xmm) { opModR(reg, xmm, 0x0F, 0x50); }");
+ puts("void pinsrw(const Mmx& mmx, const Operand& op, int imm) { if (!op.isREG(32) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opSSE(mmx, op, T_0F | (mmx.isXMM() ? T_66 : T_NONE), 0xC4, 0, imm); }");
+ puts("void insertps(const Xmm& xmm, const Operand& op, uint8_t imm) { opSSE(xmm, op, T_66 | T_0F3A, 0x21, isXMM_XMMorMEM, imm); }");
+ puts("void pinsrb(const Xmm& xmm, const Operand& op, uint8_t imm) { opSSE(xmm, op, T_66 | T_0F3A, 0x20, isXMM_REG32orMEM, imm); }");
+ puts("void pinsrd(const Xmm& xmm, const Operand& op, uint8_t imm) { opSSE(xmm, op, T_66 | T_0F3A, 0x22, isXMM_REG32orMEM, imm); }");
+
+ puts("void pmovmskb(const Reg32e& reg, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opRR(reg, mmx, T_0F, 0xD7); }");
+ puts("void maskmovq(const Mmx& reg1, const Mmx& reg2) { if (!reg1.isMMX() || !reg2.isMMX()) XBYAK_THROW(ERR_BAD_COMBINATION) opRR(reg1, reg2, T_0F, 0xF7); }");
+ puts("void movmskps(const Reg32e& reg, const Xmm& xmm) { opRR(reg, xmm, T_0F, 0x50); }");
puts("void movmskpd(const Reg32e& reg, const Xmm& xmm) { db(0x66); movmskps(reg, xmm); }");
- puts("void movntps(const Address& addr, const Xmm& xmm) { opModM(addr, Mmx(xmm.getIdx()), 0x0F, 0x2B); }");
- puts("void movntdqa(const Xmm& xmm, const Address& addr) { db(0x66); opModM(addr, xmm, 0x0F, 0x38, 0x2A); }");
- puts("void lddqu(const Xmm& xmm, const Address& addr) { db(0xF2); opModM(addr, xmm, 0x0F, 0xF0); }");
- puts("void movnti(const Address& addr, const Reg32e& reg) { opModM(addr, reg, 0x0F, 0xC3); }");
- puts("void movntq(const Address& addr, const Mmx& mmx) { if (!mmx.isMMX()) XBYAK_THROW(ERR_BAD_COMBINATION) opModM(addr, mmx, 0x0F, 0xE7); }");
-
- puts("void movd(const Address& addr, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opModM(addr, mmx, 0x0F, 0x7E); }");
- puts("void movd(const Reg32& reg, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opModR(mmx, reg, 0x0F, 0x7E); }");
- puts("void movd(const Mmx& mmx, const Address& addr) { if (mmx.isXMM()) db(0x66); opModM(addr, mmx, 0x0F, 0x6E); }");
- puts("void movd(const Mmx& mmx, const Reg32& reg) { if (mmx.isXMM()) db(0x66); opModR(mmx, reg, 0x0F, 0x6E); }");
- puts("void movq2dq(const Xmm& xmm, const Mmx& mmx) { db(0xF3); opModR(xmm, mmx, 0x0F, 0xD6); }");
- puts("void movdq2q(const Mmx& mmx, const Xmm& xmm) { db(0xF2); opModR(mmx, xmm, 0x0F, 0xD6); }");
- puts("void movq(const Mmx& mmx, const Operand& op) { if (mmx.isXMM()) db(0xF3); opModRM(mmx, op, (mmx.getKind() == op.getKind()), op.isMEM(), 0x0F, mmx.isXMM() ? 0x7E : 0x6F); }");
- puts("void movq(const Address& addr, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opModM(addr, mmx, 0x0F, mmx.isXMM() ? 0xD6 : 0x7F); }");
- puts("void rdrand(const Reg& r) { if (r.isBit(8)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) opModR(Reg(6, Operand::REG, r.getBit()), r, 0x0F, 0xC7); }");
- puts("void rdseed(const Reg& r) { if (r.isBit(8)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) opModR(Reg(7, Operand::REG, r.getBit()), r, 0x0F, 0xC7); }");
- puts("void crc32(const Reg32e& reg, const Operand& op) { if (reg.isBit(32) && op.isBit(16)) db(0x66); db(0xF2); opModRM(reg, op, op.isREG(), op.isMEM(), 0x0F, 0x38, 0xF0 | (op.isBit(8) ? 0 : 1)); }");
+ puts("void movntps(const Address& addr, const Xmm& xmm) { opMR(addr, Mmx(xmm.getIdx()), T_0F, 0x2B); }");
+ puts("void movntdqa(const Xmm& xmm, const Address& addr) { opMR(addr, xmm, T_66 | T_0F38, 0x2A); }");
+ puts("void lddqu(const Xmm& xmm, const Address& addr) { opMR(addr, xmm, T_F2 | T_0F, 0xF0); }");
+ puts("void movnti(const Address& addr, const Reg32e& reg) { opMR(addr, reg, T_0F, 0xC3); }");
+ puts("void movntq(const Address& addr, const Mmx& mmx) { if (!mmx.isMMX()) XBYAK_THROW(ERR_BAD_COMBINATION) opMR(addr, mmx, T_0F, 0xE7); }");
+
+ puts("void movd(const Address& addr, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opMR(addr, mmx, T_0F, 0x7E); }");
+ puts("void movd(const Reg32& reg, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opRR(mmx, reg, T_0F, 0x7E); }");
+ puts("void movd(const Mmx& mmx, const Address& addr) { if (mmx.isXMM()) db(0x66); opMR(addr, mmx, T_0F, 0x6E); }");
+ puts("void movd(const Mmx& mmx, const Reg32& reg) { if (mmx.isXMM()) db(0x66); opRR(mmx, reg, T_0F, 0x6E); }");
+ puts("void movq2dq(const Xmm& xmm, const Mmx& mmx) { opRR(xmm, mmx, T_F3 | T_0F, 0xD6); }");
+ puts("void movdq2q(const Mmx& mmx, const Xmm& xmm) { opRR(mmx, xmm, T_F2 | T_0F, 0xD6); }");
+ puts("void movq(const Mmx& mmx, const Operand& op) { if (mmx.isXMM()) db(0xF3); opRO(mmx, op, T_0F, mmx.isXMM() ? 0x7E : 0x6F, mmx.getKind() == op.getKind()); }");
+ puts("void movq(const Address& addr, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opMR(addr, mmx, T_0F, mmx.isXMM() ? 0xD6 : 0x7F); }");
+ puts("void rdrand(const Reg& r) { if (r.isBit(8)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) opRR(Reg(6, Operand::REG, r.getBit()), r, T_0F, 0xC7); }");
+ puts("void rdseed(const Reg& r) { if (r.isBit(8)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) opRR(Reg(7, Operand::REG, r.getBit()), r, T_0F, 0xC7); }");
+ puts("void crc32(const Reg32e& r, const Operand& op) { if (!((r.isBit(32) && op.isBit(8|16|32)) || (r.isBit(64) && op.isBit(8|64)))) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) int code = 0xF0 | (op.isBit(8) ? 0 : 1); uint64_t type = op.isBit(16) ? T_66:0; if (opROO(Reg(), op, static_cast<const Reg&>(r), T_APX|type, code)) return; opRO(r, op, T_F2|T_0F38|type, code); }");
puts("void tpause(const Reg32& r) { int idx = r.getIdx(); if (idx > 7) XBYAK_THROW(ERR_BAD_PARAMETER) db(0x66); db(0x0F); db(0xAE); setModRM(3, 6, idx); }");
puts("void umonitor(const Reg& r) { int idx = r.getIdx(); if (idx > 7) XBYAK_THROW(ERR_BAD_PARAMETER) int bit = r.getBit(); if (BIT != bit) { if ((BIT == 32 && bit == 16) || (BIT == 64 && bit == 32)) { db(0x67); } else { XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) } } db(0xF3); db(0x0F); db(0xAE); setModRM(3, 6, idx); }");
puts("void umwait(const Reg32& r) { int idx = r.getIdx(); if (idx > 7) XBYAK_THROW(ERR_BAD_PARAMETER) db(0xF2); db(0x0F); db(0xAE); setModRM(3, 6, idx); }");
- puts("void clwb(const Address& addr) { db(0x66); opMIB(addr, esi, 0x0F, 0xAE); }");
- puts("void cldemote(const Address& addr) { opMIB(addr, eax, 0x0F, 0x1C); }");
+ puts("void clwb(const Address& addr) { opMR(addr, esi, T_66 | T_0F, 0xAE); }");
+ puts("void cldemote(const Address& addr) { opMR(addr, eax, T_0F, 0x1C); }");
puts("void xabort(uint8_t imm) { db(0xC6); db(0xF8); db(imm); }");
puts("void xbegin(uint32_t rel) { db(0xC7); db(0xF8); dd(rel); }");
@@ -1264,7 +1336,7 @@ void put()
const struct Tbl {
uint8_t code;
const char *name;
- int type;
+ uint64_t type;
bool hasIMM;
int mode; // 1 : SSE, 2 : AVX, 3 : SSE + AVX
} tbl[] = {
@@ -1341,18 +1413,15 @@ void put()
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
- std::string type = type2String(p->type);
+ std::string s = type2String(p->type);
if (p->mode & 1) {
const char *immS1 = p->hasIMM ? ", uint8_t imm" : "";
const char *immS2 = p->hasIMM ? ", imm" : ", NONE";
- const char *prefTbl[5] = { "NONE", "0x66", "0xF3", "0xF2" };
- const char *pref = prefTbl[getPP(p->type)];
- const char *suf = p->type & T_0F38 ? "0x38" : p->type & T_0F3A ? "0x3A" : "NONE";
- printf("void %s(const Xmm& xmm, const Operand& op%s) { opGen(xmm, op, 0x%02X, %s, isXMM_XMMorMEM%s, %s); }\n", p->name, immS1, p->code, pref, immS2, suf);
+ printf("void %s(const Xmm& xmm, const Operand& op%s) { opSSE(xmm, op, %s, 0x%02X, isXMM_XMMorMEM%s); }\n", p->name, immS1, s.c_str(), p->code, immS2);
}
if (p->mode & 2) {
printf("void v%s(const Xmm& xm, const Operand& op%s) { opAVX_X_XM_IMM(xm, op, %s, 0x%02X%s); }\n"
- , p->name, p->hasIMM ? ", uint8_t imm" : "", type.c_str(), p->code, p->hasIMM ? ", imm" : "");
+ , p->name, p->hasIMM ? ", uint8_t imm" : "", s.c_str(), p->code, p->hasIMM ? ", imm" : "");
}
}
}
@@ -1361,7 +1430,7 @@ void put()
const struct Tbl {
uint8_t code;
const char *name;
- int type;
+ uint64_t type;
} tbl[] = {
{ 0x29, "movapd", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_M_K },
{ 0x29, "movaps", T_0F | T_YMM | T_EVEX | T_EW0 | T_M_K },
@@ -1372,9 +1441,9 @@ void put()
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
- std::string type = type2String(p->type);
+ std::string s = type2String(p->type);
printf("void v%s(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, %s, 0x%02X); }\n"
- , p->name, type.c_str(), p->code);
+ , p->name, s.c_str(), p->code);
}
}
// (x, x/m), (y, y/m), (x, x, x/m), (y, y, y/m)
@@ -1382,7 +1451,7 @@ void put()
const struct Tbl {
uint8_t code;
const char *name;
- int type;
+ uint64_t type;
int mode; // 1 : sse, 2 : avx, 3 : sse + avx
} tbl[] = {
{ 0xD0, "addsubpd", T_0F | T_66 | T_YMM, 3 },
@@ -1397,17 +1466,15 @@ void put()
{ 0xDE, "aesdec", T_0F38 | T_66 | T_YMM | T_EVEX, 3 },
{ 0xDF, "aesdeclast", T_0F38 | T_66 | T_YMM | T_EVEX, 3 },
};
- const uint8_t ppTbl[] = { 0, 0x66, 0xf3, 0xf2 };
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
- std::string type = type2String(p->type);
+ std::string s = type2String(p->type);
if (p->mode & 1) {
- uint8_t pref = ppTbl[getPP(p->type)];
- printf("void %s(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x%02X, 0x%02X, isXMM_XMMorMEM%s); }\n", p->name, p->code, pref, p->type & T_0F38 ? ", NONE, 0x38" : "");
+ printf("void %s(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, %s, 0x%02X, isXMM_XMMorMEM); }\n", p->name, s.c_str(), p->code);
}
if (p->mode & 2) {
printf("void v%s(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, %s, 0x%02X); }\n"
- , p->name, type.c_str(), p->code);
+ , p->name, s.c_str(), p->code);
}
}
}
@@ -1432,7 +1499,7 @@ void put()
const struct Tbl {
uint8_t code;
const char *name;
- int type;
+ uint64_t type;
} tbl[] = {
{ 0x36, "vpermd", T_66 | T_0F38 | T_W0 | T_YMM | T_EVEX | T_EW0 | T_B32 },
{ 0x36, "vpermq", T_66 | T_0F38 | T_W0 | T_YMM | T_EVEX | T_EW1 | T_B64 },
@@ -1441,8 +1508,8 @@ void put()
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl& p = tbl[i];
- std::string type = type2String(p.type);
- printf("void %s(const Ymm& y1, const Ymm& y2, const Operand& op) { opAVX_X_X_XM(y1, y2, op, %s, 0x%02X); }\n", p.name, type.c_str(), p.code);
+ std::string s = type2String(p.type);
+ printf("void %s(const Ymm& y1, const Ymm& y2, const Operand& op) { opAVX_X_X_XM(y1, y2, op, %s, 0x%02X); }\n", p.name, s.c_str(), p.code);
}
}
// vpermq, vpermpd
@@ -1450,15 +1517,15 @@ void put()
const struct Tbl {
uint8_t code;
const char *name;
- int type;
+ uint64_t type;
} tbl[] = {
{ 0x00, "vpermq", T_0F3A | T_66 | T_W1 | T_YMM | T_EVEX | T_EW1 | T_B64 },
{ 0x01, "vpermpd", T_0F3A | T_66 | T_W1 | T_YMM | T_EVEX | T_EW1 | T_B64 },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl& p = tbl[i];
- std::string type = type2String(p.type);
- printf("void %s(const Ymm& y, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(y, op, %s, 0x%02X, imm); }\n", p.name, type.c_str(), p.code);
+ std::string s = type2String(p.type);
+ printf("void %s(const Ymm& y, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(y, op, %s, 0x%02X, imm); }\n", p.name, s.c_str(), p.code);
}
}
// vcmpeqps
@@ -1496,11 +1563,11 @@ void put()
const Tbl& p = tbl[i];
char c = p.isH ? 'h' : 'l';
const char *suf = p.isPd ? "pd" : "ps";
- const char *type = p.isPd ? "T_0F | T_66 | T_EVEX | T_EW1 | T_N8" : "T_0F | T_EVEX | T_EW0 | T_N8";
+ std::string s = type2String(p.isPd ? (T_0F | T_66 | T_EVEX | T_EW1 | T_N8) : (T_0F | T_EVEX | T_EW0 | T_N8));
printf("void vmov%c%s(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !op2.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, op1, op2, %s, 0x%02X); }\n"
- , c, suf, type, p.code);
+ , c, suf, s.c_str(), p.code);
printf("void vmov%c%s(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, %s, 0x%02X); }\n"
- , c, suf, type, p.code + 1);
+ , c, suf, s.c_str(), p.code + 1);
}
}
// FMA
@@ -1549,9 +1616,9 @@ void put()
} else { // ss
t |= T_ER_X | T_N4;
}
- std::string type = type2String(t);
+ std::string s = type2String(t);
printf("void %s%s%s(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, %s, 0x%02X); }\n"
- , tbl[i].name, ord[k].str, suf.c_str(), type.c_str(), tbl[i].code + ord[k].code);
+ , tbl[i].name, ord[k].str, suf.c_str(), s.c_str(), tbl[i].code + ord[k].code);
}
}
}
@@ -1564,7 +1631,7 @@ void put()
const struct Tbl {
const char *name;
uint8_t code;
- int type;
+ uint64_t type;
bool ew1;
} tbl[] = {
{ "vbroadcastss", 0x18, T_0F38 | T_66 | T_W0 | T_YMM | T_EVEX | T_N4 },
@@ -1575,8 +1642,8 @@ void put()
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl& p = tbl[i];
- std::string type = type2String(p.type);
- printf("void %s(const Xmm& x, const Operand& op) { if (!(op.isXMM() || op.isMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_XM_IMM(x, op, %s, 0x%02X); }\n", p.name, type.c_str(), p.code);
+ std::string s = type2String(p.type);
+ printf("void %s(const Xmm& x, const Operand& op) { if (!(op.isXMM() || op.isMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_XM_IMM(x, op, %s, 0x%02X); }\n", p.name, s.c_str(), p.code);
}
puts("void vextractf128(const Operand& op, const Ymm& y, uint8_t imm) { if (!(op.isXMEM() && y.isYMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y, 0, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x19, imm); }");
@@ -1611,7 +1678,7 @@ void put()
const char *name;
uint8_t code;
int idx;
- int type;
+ uint64_t type;
} tbl[] = {
{ "pslldq", 0x73, 7, T_0F | T_66 | T_YMM | T_EVEX | T_MEM_EVEX },
{ "psrldq", 0x73, 3, T_0F | T_66 | T_YMM | T_EVEX | T_MEM_EVEX },
@@ -1626,8 +1693,8 @@ void put()
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl& p = tbl[i];
- std::string type = type2String(p.type);
- printf("void v%s(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), %d), x, op, %s, 0x%02X, imm); }\n", p.name, p.idx, type.c_str(), p.code);
+ std::string s = type2String(p.type);
+ printf("void v%s(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), %d), x, op, %s, 0x%02X, imm); }\n", p.name, p.idx, s.c_str(), p.code);
}
}
// 4-op
@@ -1650,7 +1717,7 @@ void put()
printf("void vmovd(const Xmm& x, const Operand& op) { if (!op.isREG(32) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, xm0, op, T_0F | T_66 | T_W0 | T_EVEX | T_N4, 0x6E); }\n");
printf("void vmovd(const Operand& op, const Xmm& x) { if (!op.isREG(32) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, xm0, op, T_0F | T_66 | T_W0 | T_EVEX | T_N4, 0x7E); }\n");
- printf("void vmovq(const Xmm& x, const Address& addr) { int type, code; if (x.getIdx() < 16) { type = T_0F | T_F3; code = 0x7E; } else { type = T_0F | T_66 | T_EVEX | T_EW1 | T_N8; code = 0x6E; } opAVX_X_X_XM(x, xm0, addr, type, code); }\n");
+ printf("void vmovq(const Xmm& x, const Address& addr) { uint64_t type; uint8_t code; if (x.getIdx() < 16) { type = T_0F | T_F3; code = 0x7E; } else { type = T_0F | T_66 | T_EVEX | T_EW1 | T_N8; code = 0x6E; } opAVX_X_X_XM(x, xm0, addr, type, code); }\n");
printf("void vmovq(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, T_0F | T_66 | T_EVEX | T_EW1 | T_N8, x.getIdx() < 16 ? 0xD6 : 0x7E); }\n");
printf("void vmovq(const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x1, xm0, x2, T_0F | T_F3 | T_EVEX | T_EW1 | T_N8, 0x7E); }\n");
@@ -1668,7 +1735,7 @@ void put()
// vmovsd, vmovss
for (int i = 0; i < 2; i++) {
char c1 = i == 0 ? 'd' : 's';
- int type = T_0F | T_EVEX;
+ uint64_t type = T_0F | T_EVEX;
type |= i == 0 ? (T_F2 | T_EW1 | T_N8) : (T_F3 | T_EW0 | T_N4);
std::string s = type2String(type);
printf("void vmovs%c(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x1, x2, op, %s, 0x10); }\n", c1, s.c_str());
@@ -1702,7 +1769,7 @@ void put()
{
const struct Tbl {
const char *name;
- int type;
+ uint64_t type;
uint8_t code;
} tbl[] = {
{ "vbcstnebf162ps", T_F3 | T_0F38 | T_W0 | T_B16 | T_YMM, 0xB1 },
@@ -1716,59 +1783,59 @@ void put()
const Tbl& p = tbl[i];
printf("void %s(const Xmm& x, const Address& addr) { opVex(x, 0, addr, %s, 0x%02X); }\n", p.name, type2String(p.type).c_str(), p.code);
}
- puts("void vcvtneps2bf16(const Xmm& x, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opCvt2(x, op, T_F3 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_B32 | orEvexIf(encoding), 0x72); }");
+ printf("void vcvtneps2bf16(const Xmm& x, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opCvt2(x, op, %s|orEvexIf(encoding), 0x72); }\n", type2String(T_F3 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_B32).c_str());
}
// haswell gpr(reg, reg, r/m)
{
const struct Tbl {
const char *name;
- int type;
+ uint64_t type;
uint8_t code;
} tbl[] = {
- { "andn", T_0F38, 0xF2 },
- { "mulx", T_F2 | T_0F38, 0xF6 },
- { "pdep", T_F2 | T_0F38, 0xF5 },
- { "pext", T_F3 | T_0F38, 0xF5 },
+ { "andn", T_0F38 | T_APX | T_NF, 0xF2 },
+ { "mulx", T_F2 | T_0F38 | T_APX , 0xF6 },
+ { "pdep", T_F2 | T_0F38 | T_APX, 0xF5 },
+ { "pext", T_F3 | T_0F38 | T_APX, 0xF5 },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl& p = tbl[i];
- printf("void %s(const Reg32e& r1, const Reg32e& r2, const Operand& op) { opGpr(r1, r2, op, %s, 0x%x, true); }\n", p.name, type2String(p.type).c_str(), p.code);
+ printf("void %s(const Reg32e& r1, const Reg32e& r2, const Operand& op) { opRRO(r1, r2, op, %s, 0x%x); }\n", p.name, type2String(p.type).c_str(), p.code);
}
}
// gpr(reg, r/m, reg)
{
const struct Tbl {
const char *name;
- int type;
+ uint64_t type;
uint8_t code;
} tbl[] = {
- { "bextr", T_0F38, 0xF7 },
- { "bzhi", T_0F38, 0xF5 },
- { "sarx", T_0F38 | T_F3, 0xF7 },
- { "shlx", T_0F38 | T_66, 0xF7 },
- { "shrx", T_0F38 | T_F2, 0xF7 },
+ { "bextr", T_0F38 | T_APX | T_NF, 0xF7 },
+ { "bzhi", T_0F38 | T_APX | T_NF, 0xF5 },
+ { "sarx", T_0F38 | T_F3 | T_APX, 0xF7 },
+ { "shlx", T_0F38 | T_66 | T_APX, 0xF7 },
+ { "shrx", T_0F38 | T_F2 | T_APX, 0xF7 },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl& p = tbl[i];
- printf("void %s(const Reg32e& r1, const Operand& op, const Reg32e& r2) { opGpr(r1, op, r2, %s, 0x%x, false); }\n", p.name, type2String(p.type).c_str(), p.code);
+ printf("void %s(const Reg32e& r1, const Operand& op, const Reg32e& r2) { opRRO(r1, r2, op, %s, 0x%x); }\n", p.name, type2String(p.type).c_str(), p.code);
}
- puts("void rorx(const Reg32e& r, const Operand& op, uint8_t imm) { opGpr(r, op, Reg32e(0, r.getBit()), T_0F3A | T_F2, 0xF0, false, imm); }");
+ puts("void rorx(const Reg32e& r, const Operand& op, uint8_t imm) { opRRO(r, Reg32e(0, r.getBit()), op, T_0F3A|T_F2|T_APX|T_MAP3, 0xF0, imm); }");
}
// gpr(reg, r/m)
{
const struct Tbl {
const char *name;
- int type;
+ uint64_t type;
uint8_t code;
uint8_t idx;
} tbl[] = {
- { "blsi", T_0F38, 0xF3, 3 },
- { "blsmsk", T_0F38, 0xF3, 2 },
- { "blsr", T_0F38, 0xF3, 1 },
+ { "blsi", T_0F38 | T_APX|T_NF, 0xF3, 3 },
+ { "blsmsk", T_0F38 | T_APX|T_NF, 0xF3, 2 },
+ { "blsr", T_0F38 | T_APX|T_NF, 0xF3, 1 },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl& p = tbl[i];
- printf("void %s(const Reg32e& r, const Operand& op) { opGpr(Reg32e(%d, r.getBit()), op, r, %s, 0x%x, false); }\n", p.name, p.idx, type2String(p.type).c_str(), p.code);
+ printf("void %s(const Reg32e& r, const Operand& op) { opRRO(Reg32e(%d, r.getBit()), r, op, %s, 0x%x); }\n", p.name, p.idx, type2String(p.type).c_str(), p.code);
}
}
// gather
@@ -1801,7 +1868,7 @@ void put()
const struct Tbl {
uint8_t code;
const char *name;
- int type;
+ uint64_t type;
} tbl[] = {
{ 0x50, "vpdpbusd", T_66 | T_0F38 | T_YMM | T_EW0 | T_SAE_Z | T_B32},
{ 0x51, "vpdpbusds", T_66 | T_0F38 | T_YMM | T_EW0 | T_SAE_Z | T_B32},
@@ -1812,8 +1879,8 @@ void put()
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
- std::string type = type2String(p->type);
- printf("void %s(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, %s, 0x%02X, encoding); }\n", p->name, type.c_str(), p->code);
+ std::string s = type2String(p->type);
+ printf("void %s(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, %s, 0x%02X, encoding); }\n", p->name, s.c_str(), p->code);
}
}
// avx-vnni-int8
@@ -1822,7 +1889,7 @@ void put()
const struct Tbl {
uint8_t code;
const char *name;
- int type;
+ uint64_t type;
} tbl[] = {
{ 0x50, "vpdpbssd", T_F2 | T_0F38 | T_W0 | T_YMM },
{ 0x51, "vpdpbssds", T_F2 | T_0F38 | T_W0 | T_YMM },
@@ -1840,8 +1907,8 @@ void put()
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
- std::string type = type2String(p->type);
- printf("void %s(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, %s, 0x%02X); }\n", p->name, type.c_str(), p->code);
+ std::string s = type2String(p->type);
+ printf("void %s(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, %s, 0x%02X); }\n", p->name, s.c_str(), p->code);
}
}
}
@@ -1867,8 +1934,8 @@ void put32()
{ "popa", 0x61 },
};
putGeneric(tbl, NUM_OF_ARRAY(tbl));
- putLoadSeg("lds", 0xC5, NONE);
- putLoadSeg("les", 0xC4, NONE);
+ putLoadSeg("lds", 0, 0xC5);
+ putLoadSeg("les", 0, 0xC4);
}
void put64()
@@ -1876,33 +1943,35 @@ void put64()
put_jREGz("ecx", true);
put_jREGz("rcx", false);
- const GenericTbl tbl[] = {
- { "cdqe", 0x48, 0x98 },
- { "cqo", 0x48, 0x99 },
- { "cmpsq", 0x48, 0xA7 },
- { "popfq", 0x9D },
- { "pushfq", 0x9C },
- { "lodsq", 0x48, 0xAD },
- { "movsq", 0x48, 0xA5 },
- { "scasq", 0x48, 0xAF },
- { "stosq", 0x48, 0xAB },
- { "syscall", 0x0F, 0x05 },
- { "sysret", 0x0F, 0x07 },
- { "clui", 0xF3, 0x0F, 0x01, 0xEE },
- { "stui", 0xF3, 0x0F, 0x01, 0xEF },
- { "testui", 0xF3, 0x0F, 0x01, 0xED },
- { "uiret", 0xF3, 0x0F, 0x01, 0xEC },
- };
- putGeneric(tbl, NUM_OF_ARRAY(tbl));
+ {
+ const GenericTbl tbl[] = {
+ { "cdqe", 0x48, 0x98 },
+ { "cqo", 0x48, 0x99 },
+ { "cmpsq", 0x48, 0xA7 },
+ { "popfq", 0x9D },
+ { "pushfq", 0x9C },
+ { "lodsq", 0x48, 0xAD },
+ { "movsq", 0x48, 0xA5 },
+ { "scasq", 0x48, 0xAF },
+ { "stosq", 0x48, 0xAB },
+ { "syscall", 0x0F, 0x05 },
+ { "sysret", 0x0F, 0x07 },
+ { "clui", 0xF3, 0x0F, 0x01, 0xEE },
+ { "stui", 0xF3, 0x0F, 0x01, 0xEF },
+ { "testui", 0xF3, 0x0F, 0x01, 0xED },
+ { "uiret", 0xF3, 0x0F, 0x01, 0xEC },
+ };
+ putGeneric(tbl, NUM_OF_ARRAY(tbl));
+ }
- putMemOp("cmpxchg16b", 0, 1, 0x0F, 0xC7, 64);
- putMemOp("fxrstor64", 0, 1, 0x0F, 0xAE, 64);
- puts("void movq(const Reg64& reg, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opModR(mmx, reg, 0x0F, 0x7E); }");
- puts("void movq(const Mmx& mmx, const Reg64& reg) { if (mmx.isXMM()) db(0x66); opModR(mmx, reg, 0x0F, 0x6E); }");
- puts("void movsxd(const Reg64& reg, const Operand& op) { if (!op.isBit(32)) XBYAK_THROW(ERR_BAD_COMBINATION) opModRM(reg, op, op.isREG(), op.isMEM(), 0x63); }");
- puts("void pextrq(const Operand& op, const Xmm& xmm, uint8_t imm) { if (!op.isREG(64) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opGen(Reg64(xmm.getIdx()), op, 0x16, 0x66, 0, imm, 0x3A); }");
- puts("void pinsrq(const Xmm& xmm, const Operand& op, uint8_t imm) { if (!op.isREG(64) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opGen(Reg64(xmm.getIdx()), op, 0x22, 0x66, 0, imm, 0x3A); }");
- puts("void senduipi(const Reg64& r) { db(0xF3); opModR(Reg32(6), r.cvt32(), 0x0F, 0xC7); }");
+ putMemOp("cmpxchg16b", "T_0F", 1, 0xC7, 64);
+ putMemOp("fxrstor64", "T_0F", 1, 0xAE, 64);
+ puts("void movq(const Reg64& reg, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opRR(mmx, reg, T_0F, 0x7E); }");
+ puts("void movq(const Mmx& mmx, const Reg64& reg) { if (mmx.isXMM()) db(0x66); opRR(mmx, reg, T_0F, 0x6E); }");
+ puts("void movsxd(const Reg64& reg, const Operand& op) { if (!op.isBit(32)) XBYAK_THROW(ERR_BAD_COMBINATION) opRO(reg, op, 0, 0x63); }");
+ puts("void pextrq(const Operand& op, const Xmm& xmm, uint8_t imm) { if (!op.isREG(64) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opSSE(Reg64(xmm.getIdx()), op, T_66 | T_0F3A, 0x16, 0, imm); }");
+ puts("void pinsrq(const Xmm& xmm, const Operand& op, uint8_t imm) { if (!op.isREG(64) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opSSE(Reg64(xmm.getIdx()), op, T_66 | T_0F3A, 0x22, 0, imm); }");
+ puts("void senduipi(const Reg64& r) { opRR(Reg32(6), r.cvt32(), T_F3 | T_0F, 0xC7); }");
puts("void vcvtss2si(const Reg64& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F3 | T_W1 | T_EVEX | T_EW1 | T_ER_X | T_N8, 0x2D); }");
puts("void vcvttss2si(const Reg64& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F3 | T_W1 | T_EVEX | T_EW1 | T_SAE_X | T_N8, 0x2C); }");
@@ -1911,6 +1980,11 @@ void put64()
puts("void vmovq(const Xmm& x, const Reg64& r) { opAVX_X_X_XM(x, xm0, Xmm(r.getIdx()), T_66 | T_0F | T_W1 | T_EVEX | T_EW1, 0x6E); }");
puts("void vmovq(const Reg64& r, const Xmm& x) { opAVX_X_X_XM(x, xm0, Xmm(r.getIdx()), T_66 | T_0F | T_W1 | T_EVEX | T_EW1, 0x7E); }");
+ puts("void jmpabs(uint64_t addr) { db(0xD5); db(0x00); db(0xA1); dq(addr); }");
+ puts("void push2(const Reg64& r1, const Reg64& r2) { opROO(r1, r2, Reg64(6), T_APX|T_ND1|T_W0, 0xFF); }");
+ puts("void push2p(const Reg64& r1, const Reg64& r2) { opROO(r1, r2, Reg64(6), T_APX|T_ND1|T_W1, 0xFF); }");
+ puts("void pop2(const Reg64& r1, const Reg64& r2) { opROO(r1, r2, Reg64(0), T_APX|T_ND1|T_W0, 0x8F); }");
+ puts("void pop2p(const Reg64& r1, const Reg64& r2) { opROO(r1, r2, Reg64(0), T_APX|T_ND1|T_W1, 0x8F); }");
// CMPccXADD
{
const struct Tbl {
@@ -1936,19 +2010,19 @@ void put64()
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
- printf("void cmp%sxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opGpr(r1, addr, r2, T_66 | T_0F38, 0x%02X, false); }\n", p->name, p->code);
+ printf("void cmp%sxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opRRO(r1, r2, addr, T_APX|T_66|T_0F38, 0x%02X); }\n", p->name, p->code);
}
}
}
void putAMX_TILE()
{
- puts("void ldtilecfg(const Address& addr) { opVex(tmm0, &tmm0, addr, T_0F38 | T_W0, 0x49); }");
- puts("void sttilecfg(const Address& addr) { opVex(tmm0, &tmm0, addr, T_66 | T_0F38 | T_W0, 0x49); }");
- puts("void tileloadd(const Tmm& tm, const Address& addr) { opAMX(tm, addr, T_F2 | T_0F38 | T_W0, 0x4b); }");
- puts("void tileloaddt1(const Tmm& tm, const Address& addr) { opAMX(tm, addr, T_66 | T_0F38 | T_W0, 0x4b); }");
+ puts("void ldtilecfg(const Address& addr) { if (opROO(Reg(), addr, tmm0, T_APX|T_0F38|T_W0, 0x49)) return; opVex(tmm0, &tmm0, addr, T_0F38|T_W0, 0x49); }");
+ puts("void sttilecfg(const Address& addr) { if (opROO(Reg(), addr, tmm0, T_APX|T_66|T_0F38|T_W0, 0x49)) return; opVex(tmm0, &tmm0, addr, T_66|T_0F38 | T_W0, 0x49); }");
+ puts("void tileloadd(const Tmm& tm, const Address& addr) { opAMX(tm, addr, T_F2|T_0F38|T_W0, 0x4B); }");
+ puts("void tileloaddt1(const Tmm& tm, const Address& addr) { opAMX(tm, addr, T_66|T_0F38|T_W0, 0x4B); }");
puts("void tilerelease() { db(0xc4); db(0xe2); db(0x78); db(0x49); db(0xc0); }");
- puts("void tilestored(const Address& addr, const Tmm& tm) { opVex(tm, &tmm0, addr, T_F3 | T_0F38 | T_W0, 0x4b); }");
+ puts("void tilestored(const Address& addr, const Tmm& tm) { if (opROO(Reg(), addr, tm, T_APX|T_F3|T_0F38|T_W0, 0x4B)) return; opVex(tm, &tmm0, addr, T_F3|T_0F38|T_W0, 0x4B); }");
puts("void tilezero(const Tmm& Tmm) { opVex(Tmm, &tmm0, tmm0, T_F2 | T_0F38 | T_W0, 0x49); }");
}
void putAMX_INT8()
diff --git a/test/Makefile b/test/Makefile
index 28d8e6f..6d7cfc8 100644
--- a/test/Makefile
+++ b/test/Makefile
@@ -1,5 +1,5 @@
-TARGET = make_nm normalize_prefix bad_address misc cvt_test cvt_test32 noexception misc32 detect_x32
-XBYAK_INC=../xbyak/xbyak.h
+TARGET = make_nm normalize_prefix bad_address misc cvt_test cvt_test32 noexception misc32 detect_x32 apx
+XBYAK_INC=../xbyak/xbyak.h ../xbyak/xbyak_mnemonic.h
UNAME_S=$(shell uname -s)
ifeq ($(shell ./detect_x32),x32)
X32?=1
@@ -31,30 +31,32 @@ CFLAGS_WARN=-Wall -Wextra -Wformat=2 -Wcast-qual -Wwrite-strings -Wfloat-equal -
CFLAGS=-O2 -Wall -I.. -I. $(CFLAGS_WARN) $(CXXFLAGS) $(CPPFLAGS) $(LDFLAGS) #-std=c++0x
make_nm:
$(CXX) $(CFLAGS) make_nm.cpp -o $@
-normalize_prefix: normalize_prefix.cpp ../xbyak/xbyak.h
+normalize_prefix: normalize_prefix.cpp $(XBYAK_INC)
$(CXX) $(CFLAGS) normalize_prefix.cpp -o $@
-test_mmx: test_mmx.cpp ../xbyak/xbyak.h
+test_mmx: test_mmx.cpp $(XBYAK_INC)
$(CXX) $(CFLAGS) test_mmx.cpp -o $@ -lpthread
-jmp: jmp.cpp ../xbyak/xbyak.h
+jmp: jmp.cpp $(XBYAK_INC)
$(CXX) $(CFLAGS) jmp.cpp -o $@ -m32
-jmp64: jmp.cpp ../xbyak/xbyak.h
+jmp64: jmp.cpp $(XBYAK_INC)
$(CXX) $(CFLAGS) jmp.cpp -o $@ -m64
-address: address.cpp ../xbyak/xbyak.h
+address: address.cpp $(XBYAK_INC)
$(CXX) $(CFLAGS) address.cpp -o $@ -m32
-address64: address.cpp ../xbyak/xbyak.h
+address64: address.cpp $(XBYAK_INC)
$(CXX) $(CFLAGS) address.cpp -o $@ -m64
-bad_address: bad_address.cpp ../xbyak/xbyak.h
+bad_address: bad_address.cpp $(XBYAK_INC)
$(CXX) $(CFLAGS) bad_address.cpp -o $@
-misc: misc.cpp ../xbyak/xbyak.h
+misc: misc.cpp $(XBYAK_INC)
$(CXX) $(CFLAGS) misc.cpp -o $@
-misc32: misc.cpp ../xbyak/xbyak.h
+misc32: misc.cpp $(XBYAK_INC)
$(CXX) $(CFLAGS) misc.cpp -o $@ -DXBYAK32
-cvt_test: cvt_test.cpp ../xbyak/xbyak.h
+cvt_test: cvt_test.cpp $(XBYAK_INC)
$(CXX) $(CFLAGS) $< -o $@
-cvt_test32: cvt_test.cpp ../xbyak/xbyak.h
+cvt_test32: cvt_test.cpp $(XBYAK_INC)
$(CXX) $(CFLAGS) $< -o $@ -DXBYAK32
-noexception: noexception.cpp ../xbyak/xbyak.h
+noexception: noexception.cpp $(XBYAK_INC)
$(CXX) $(CFLAGS) $< -o $@ -fno-exceptions
+apx: apx.cpp $(XBYAK_INC)
+ $(CXX) $(CFLAGS) apx.cpp -o $@
test_nm: normalize_prefix $(TARGET)
$(MAKE) -C ../gen
@@ -71,6 +73,7 @@ endif
./misc
./misc32
./cvt_test
+ ./apx
ifeq ($(BIT),64)
CXX=$(CXX) ./test_address.sh 64
ifneq ($(X32),1)
diff --git a/test/apx.cpp b/test/apx.cpp
new file mode 100644
index 0000000..f03b032
--- /dev/null
+++ b/test/apx.cpp
@@ -0,0 +1,1777 @@
+#include <stdio.h>
+#include <string.h>
+#include <xbyak/xbyak.h>
+#include <xbyak/xbyak_util.h>
+#include <cybozu/test.hpp>
+
+#ifndef XBYAK64
+ #error "only 64-bit mode"
+#endif
+
+using namespace Xbyak;
+
+CYBOZU_TEST_AUTO(reg_rm)
+{
+ struct Code : Xbyak::CodeGenerator {
+ Code()
+ {
+ adc(r17, ptr [rax]);
+ adc(ptr [r18], rdx);
+ adc(r30, rcx);
+ add(r17, ptr [rax]);
+ add(ptr [r18], rdx);
+ add(r30, rcx);
+ and_(r17, ptr [rax]);
+ and_(ptr [r18], rdx);
+ and_(r30, rcx);
+ cmp(r17, ptr [rax]);
+ cmp(ptr [r18], rdx);
+ cmp(r30, rcx);
+ or_(r17, ptr [rax]);
+ or_(ptr [r18], rdx);
+ or_(r30, rcx);
+ sbb(r17, ptr [rax]);
+ sbb(ptr [r18], rdx);
+ sbb(r30, rcx);
+ sub(r17, ptr [rax]);
+ sub(ptr [r18], rdx);
+ sub(r30, rcx);
+ xor_(r17, ptr [rax]);
+ xor_(ptr [r18], rdx);
+ xor_(r30, rcx);
+
+ add(r30, ptr [rbx+rcx*4]);
+ add(rax, ptr [r30+rcx*4]);
+ add(rax, ptr [rbx+r30*4]);
+ }
+ } c;
+ const uint8_t tbl[] = {
+ 0xd5, 0x48, 0x13, 0x08, 0xd5, 0x18, 0x11, 0x12, 0xd5, 0x19, 0x11, 0xce, 0xd5, 0x48, 0x03, 0x08,
+ 0xd5, 0x18, 0x01, 0x12, 0xd5, 0x19, 0x01, 0xce, 0xd5, 0x48, 0x23, 0x08, 0xd5, 0x18, 0x21, 0x12,
+ 0xd5, 0x19, 0x21, 0xce, 0xd5, 0x48, 0x3b, 0x08, 0xd5, 0x18, 0x39, 0x12, 0xd5, 0x19, 0x39, 0xce,
+ 0xd5, 0x48, 0x0b, 0x08, 0xd5, 0x18, 0x09, 0x12, 0xd5, 0x19, 0x09, 0xce, 0xd5, 0x48, 0x1b, 0x08,
+ 0xd5, 0x18, 0x19, 0x12, 0xd5, 0x19, 0x19, 0xce, 0xd5, 0x48, 0x2b, 0x08, 0xd5, 0x18, 0x29, 0x12,
+ 0xd5, 0x19, 0x29, 0xce, 0xd5, 0x48, 0x33, 0x08, 0xd5, 0x18, 0x31, 0x12, 0xd5, 0x19, 0x31, 0xce,
+ 0xd5, 0x4c, 0x03, 0x34, 0x8b, 0xd5, 0x19, 0x03, 0x04, 0x8e, 0xd5, 0x2a, 0x03, 0x04, 0xb3,
+ };
+ const size_t n = sizeof(tbl);
+ CYBOZU_TEST_EQUAL(c.getSize(), n);
+ CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
+}
+
+CYBOZU_TEST_AUTO(reg64)
+{
+ struct Code : Xbyak::CodeGenerator {
+ Code()
+ {
+ adc(r30, rax); adc(r30, rcx); adc(r30, rdx); adc(r30, rbx); adc(r30, rsp); adc(r30, rbp); adc(r30, rsi); adc(r30, rdi);
+ adc(r30, r8); adc(r30, r9); adc(r30, r10); adc(r30, r11); adc(r30, r12); adc(r30, r13); adc(r30, r14); adc(r30, r15);
+ adc(r30, r16); adc(r30, r17); adc(r30, r18); adc(r30, r19); adc(r30, r20); adc(r30, r21); adc(r30, r22); adc(r30, r23);
+ adc(r30, r24); adc(r30, r25); adc(r30, r26); adc(r30, r27); adc(r30, r28); adc(r30, r29); adc(r30, r30); adc(r30, r31);
+ adc(rax, r30); adc(rcx, r30); adc(rdx, r30); adc(rbx, r30); adc(rsp, r30); adc(rbp, r30); adc(rsi, r30); adc(rdi, r30);
+ adc(r8, r30); adc(r9, r30); adc(r10, r30); adc(r11, r30); adc(r12, r30); adc(r13, r30); adc(r14, r30); adc(r15, r30);
+ adc(r16, r30); adc(r17, r30); adc(r18, r30); adc(r19, r30); adc(r20, r30); adc(r21, r30); adc(r22, r30); adc(r23, r30);
+ adc(r24, r30); adc(r25, r30); adc(r26, r30); adc(r27, r30); adc(r28, r30); adc(r29, r30); adc(r30, r30); adc(r31, r30);
+ }
+ } c;
+ const uint8_t tbl[] = {
+ 0xd5, 0x19, 0x11, 0xc6, 0xd5, 0x19, 0x11, 0xce, 0xd5, 0x19, 0x11, 0xd6, 0xd5, 0x19, 0x11, 0xde,
+ 0xd5, 0x19, 0x11, 0xe6, 0xd5, 0x19, 0x11, 0xee, 0xd5, 0x19, 0x11, 0xf6, 0xd5, 0x19, 0x11, 0xfe,
+ 0xd5, 0x1d, 0x11, 0xc6, 0xd5, 0x1d, 0x11, 0xce, 0xd5, 0x1d, 0x11, 0xd6, 0xd5, 0x1d, 0x11, 0xde,
+ 0xd5, 0x1d, 0x11, 0xe6, 0xd5, 0x1d, 0x11, 0xee, 0xd5, 0x1d, 0x11, 0xf6, 0xd5, 0x1d, 0x11, 0xfe,
+ 0xd5, 0x59, 0x11, 0xc6, 0xd5, 0x59, 0x11, 0xce, 0xd5, 0x59, 0x11, 0xd6, 0xd5, 0x59, 0x11, 0xde,
+ 0xd5, 0x59, 0x11, 0xe6, 0xd5, 0x59, 0x11, 0xee, 0xd5, 0x59, 0x11, 0xf6, 0xd5, 0x59, 0x11, 0xfe,
+ 0xd5, 0x5d, 0x11, 0xc6, 0xd5, 0x5d, 0x11, 0xce, 0xd5, 0x5d, 0x11, 0xd6, 0xd5, 0x5d, 0x11, 0xde,
+ 0xd5, 0x5d, 0x11, 0xe6, 0xd5, 0x5d, 0x11, 0xee, 0xd5, 0x5d, 0x11, 0xf6, 0xd5, 0x5d, 0x11, 0xfe,
+ 0xd5, 0x4c, 0x11, 0xf0, 0xd5, 0x4c, 0x11, 0xf1, 0xd5, 0x4c, 0x11, 0xf2, 0xd5, 0x4c, 0x11, 0xf3,
+ 0xd5, 0x4c, 0x11, 0xf4, 0xd5, 0x4c, 0x11, 0xf5, 0xd5, 0x4c, 0x11, 0xf6, 0xd5, 0x4c, 0x11, 0xf7,
+ 0xd5, 0x4d, 0x11, 0xf0, 0xd5, 0x4d, 0x11, 0xf1, 0xd5, 0x4d, 0x11, 0xf2, 0xd5, 0x4d, 0x11, 0xf3,
+ 0xd5, 0x4d, 0x11, 0xf4, 0xd5, 0x4d, 0x11, 0xf5, 0xd5, 0x4d, 0x11, 0xf6, 0xd5, 0x4d, 0x11, 0xf7,
+ 0xd5, 0x5c, 0x11, 0xf0, 0xd5, 0x5c, 0x11, 0xf1, 0xd5, 0x5c, 0x11, 0xf2, 0xd5, 0x5c, 0x11, 0xf3,
+ 0xd5, 0x5c, 0x11, 0xf4, 0xd5, 0x5c, 0x11, 0xf5, 0xd5, 0x5c, 0x11, 0xf6, 0xd5, 0x5c, 0x11, 0xf7,
+ 0xd5, 0x5d, 0x11, 0xf0, 0xd5, 0x5d, 0x11, 0xf1, 0xd5, 0x5d, 0x11, 0xf2, 0xd5, 0x5d, 0x11, 0xf3,
+ 0xd5, 0x5d, 0x11, 0xf4, 0xd5, 0x5d, 0x11, 0xf5, 0xd5, 0x5d, 0x11, 0xf6, 0xd5, 0x5d, 0x11, 0xf7,
+ };
+ const size_t n = sizeof(tbl);
+ CYBOZU_TEST_EQUAL(c.getSize(), n);
+ CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
+}
+
+CYBOZU_TEST_AUTO(reg32)
+{
+ struct Code : Xbyak::CodeGenerator {
+ Code()
+ {
+ adc(r30d, eax); adc(r30d, ecx); adc(r30d, edx); adc(r30d, ebx); adc(r30d, esp); adc(r30d, ebp); adc(r30d, esi); adc(r30d, edi);
+ adc(r30d, r8d); adc(r30d, r9d); adc(r30d, r10d); adc(r30d, r11d); adc(r30d, r12d); adc(r30d, r13d); adc(r30d, r14d); adc(r30d, r15d);
+ adc(r30d, r16d); adc(r30d, r17d); adc(r30d, r18d); adc(r30d, r19d); adc(r30d, r20d); adc(r30d, r21d); adc(r30d, r22d); adc(r30d, r23d);
+ adc(r30d, r24d); adc(r30d, r25d); adc(r30d, r26d); adc(r30d, r27d); adc(r30d, r28d); adc(r30d, r29d); adc(r30d, r30d); adc(r30d, r31d);
+ adc(eax, r29d); adc(ecx, r29d); adc(edx, r29d); adc(ebx, r29d); adc(esp, r29d); adc(ebp, r29d); adc(esi, r29d); adc(edi, r29d);
+ adc(r8d, r29d); adc(r9d, r29d); adc(r10d, r29d); adc(r11d, r29d); adc(r12d, r29d); adc(r13d, r29d); adc(r14d, r29d); adc(r15d, r29d);
+ adc(r16d, r29d); adc(r17d, r29d); adc(r18d, r29d); adc(r19d, r29d); adc(r20d, r29d); adc(r21d, r29d); adc(r22d, r29d); adc(r23d, r29d);
+ adc(r24d, r29d); adc(r25d, r29d); adc(r26d, r29d); adc(r27d, r29d); adc(r28d, r29d); adc(r29d, r29d); adc(r30d, r29d); adc(r31d, r29d);
+ }
+ } c;
+ const uint8_t tbl[] = {
+ 0xd5, 0x11, 0x11, 0xc6, 0xd5, 0x11, 0x11, 0xce, 0xd5, 0x11, 0x11, 0xd6, 0xd5, 0x11, 0x11, 0xde,
+ 0xd5, 0x11, 0x11, 0xe6, 0xd5, 0x11, 0x11, 0xee, 0xd5, 0x11, 0x11, 0xf6, 0xd5, 0x11, 0x11, 0xfe,
+ 0xd5, 0x15, 0x11, 0xc6, 0xd5, 0x15, 0x11, 0xce, 0xd5, 0x15, 0x11, 0xd6, 0xd5, 0x15, 0x11, 0xde,
+ 0xd5, 0x15, 0x11, 0xe6, 0xd5, 0x15, 0x11, 0xee, 0xd5, 0x15, 0x11, 0xf6, 0xd5, 0x15, 0x11, 0xfe,
+ 0xd5, 0x51, 0x11, 0xc6, 0xd5, 0x51, 0x11, 0xce, 0xd5, 0x51, 0x11, 0xd6, 0xd5, 0x51, 0x11, 0xde,
+ 0xd5, 0x51, 0x11, 0xe6, 0xd5, 0x51, 0x11, 0xee, 0xd5, 0x51, 0x11, 0xf6, 0xd5, 0x51, 0x11, 0xfe,
+ 0xd5, 0x55, 0x11, 0xc6, 0xd5, 0x55, 0x11, 0xce, 0xd5, 0x55, 0x11, 0xd6, 0xd5, 0x55, 0x11, 0xde,
+ 0xd5, 0x55, 0x11, 0xe6, 0xd5, 0x55, 0x11, 0xee, 0xd5, 0x55, 0x11, 0xf6, 0xd5, 0x55, 0x11, 0xfe,
+ 0xd5, 0x44, 0x11, 0xe8, 0xd5, 0x44, 0x11, 0xe9, 0xd5, 0x44, 0x11, 0xea, 0xd5, 0x44, 0x11, 0xeb,
+ 0xd5, 0x44, 0x11, 0xec, 0xd5, 0x44, 0x11, 0xed, 0xd5, 0x44, 0x11, 0xee, 0xd5, 0x44, 0x11, 0xef,
+ 0xd5, 0x45, 0x11, 0xe8, 0xd5, 0x45, 0x11, 0xe9, 0xd5, 0x45, 0x11, 0xea, 0xd5, 0x45, 0x11, 0xeb,
+ 0xd5, 0x45, 0x11, 0xec, 0xd5, 0x45, 0x11, 0xed, 0xd5, 0x45, 0x11, 0xee, 0xd5, 0x45, 0x11, 0xef,
+ 0xd5, 0x54, 0x11, 0xe8, 0xd5, 0x54, 0x11, 0xe9, 0xd5, 0x54, 0x11, 0xea, 0xd5, 0x54, 0x11, 0xeb,
+ 0xd5, 0x54, 0x11, 0xec, 0xd5, 0x54, 0x11, 0xed, 0xd5, 0x54, 0x11, 0xee, 0xd5, 0x54, 0x11, 0xef,
+ 0xd5, 0x55, 0x11, 0xe8, 0xd5, 0x55, 0x11, 0xe9, 0xd5, 0x55, 0x11, 0xea, 0xd5, 0x55, 0x11, 0xeb,
+ 0xd5, 0x55, 0x11, 0xec, 0xd5, 0x55, 0x11, 0xed, 0xd5, 0x55, 0x11, 0xee, 0xd5, 0x55, 0x11, 0xef,
+ };
+ const size_t n = sizeof(tbl);
+ CYBOZU_TEST_EQUAL(c.getSize(), n);
+ CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
+}
+
+CYBOZU_TEST_AUTO(reg16)
+{
+ struct Code : Xbyak::CodeGenerator {
+ Code()
+ {
+ adc(r30w, ax); adc(r30w, cx); adc(r30w, dx); adc(r30w, bx); adc(r30w, sp); adc(r30w, bp); adc(r30w, si); adc(r30w, di);
+ adc(r30w, r8w); adc(r30w, r9w); adc(r30w, r10w); adc(r30w, r11w); adc(r30w, r12w); adc(r30w, r13w); adc(r30w, r14w); adc(r30w, r15w);
+ adc(r30w, r16w); adc(r30w, r17w); adc(r30w, r18w); adc(r30w, r19w); adc(r30w, r20w); adc(r30w, r21w); adc(r30w, r22w); adc(r30w, r23w);
+ adc(r30w, r24w); adc(r30w, r25w); adc(r30w, r26w); adc(r30w, r27w); adc(r30w, r28w); adc(r30w, r29w); adc(r30w, r30w); adc(r30w, r31w);
+ adc(ax, r29w); adc(cx, r29w); adc(dx, r29w); adc(bx, r29w); adc(sp, r29w); adc(bp, r29w); adc(si, r29w); adc(di, r29w);
+ adc(r8w, r29w); adc(r9w, r29w); adc(r10w, r29w); adc(r11w, r29w); adc(r12w, r29w); adc(r13w, r29w); adc(r14w, r29w); adc(r15w, r29w);
+ adc(r16w, r29w); adc(r17w, r29w); adc(r18w, r29w); adc(r19w, r29w); adc(r20w, r29w); adc(r21w, r29w); adc(r22w, r29w); adc(r23w, r29w);
+ adc(r24w, r29w); adc(r25w, r29w); adc(r26w, r29w); adc(r27w, r29w); adc(r28w, r29w); adc(r29w, r29w); adc(r30w, r29w); adc(r31w, r29w);
+ }
+ } c;
+ const uint8_t tbl[] = {
+ 0x66, 0xd5, 0x11, 0x11, 0xc6, 0x66, 0xd5, 0x11, 0x11, 0xce, 0x66, 0xd5, 0x11, 0x11, 0xd6, 0x66,
+ 0xd5, 0x11, 0x11, 0xde, 0x66, 0xd5, 0x11, 0x11, 0xe6, 0x66, 0xd5, 0x11, 0x11, 0xee, 0x66, 0xd5,
+ 0x11, 0x11, 0xf6, 0x66, 0xd5, 0x11, 0x11, 0xfe, 0x66, 0xd5, 0x15, 0x11, 0xc6, 0x66, 0xd5, 0x15,
+ 0x11, 0xce, 0x66, 0xd5, 0x15, 0x11, 0xd6, 0x66, 0xd5, 0x15, 0x11, 0xde, 0x66, 0xd5, 0x15, 0x11,
+ 0xe6, 0x66, 0xd5, 0x15, 0x11, 0xee, 0x66, 0xd5, 0x15, 0x11, 0xf6, 0x66, 0xd5, 0x15, 0x11, 0xfe,
+ 0x66, 0xd5, 0x51, 0x11, 0xc6, 0x66, 0xd5, 0x51, 0x11, 0xce, 0x66, 0xd5, 0x51, 0x11, 0xd6, 0x66,
+ 0xd5, 0x51, 0x11, 0xde, 0x66, 0xd5, 0x51, 0x11, 0xe6, 0x66, 0xd5, 0x51, 0x11, 0xee, 0x66, 0xd5,
+ 0x51, 0x11, 0xf6, 0x66, 0xd5, 0x51, 0x11, 0xfe, 0x66, 0xd5, 0x55, 0x11, 0xc6, 0x66, 0xd5, 0x55,
+ 0x11, 0xce, 0x66, 0xd5, 0x55, 0x11, 0xd6, 0x66, 0xd5, 0x55, 0x11, 0xde, 0x66, 0xd5, 0x55, 0x11,
+ 0xe6, 0x66, 0xd5, 0x55, 0x11, 0xee, 0x66, 0xd5, 0x55, 0x11, 0xf6, 0x66, 0xd5, 0x55, 0x11, 0xfe,
+ 0x66, 0xd5, 0x44, 0x11, 0xe8, 0x66, 0xd5, 0x44, 0x11, 0xe9, 0x66, 0xd5, 0x44, 0x11, 0xea, 0x66,
+ 0xd5, 0x44, 0x11, 0xeb, 0x66, 0xd5, 0x44, 0x11, 0xec, 0x66, 0xd5, 0x44, 0x11, 0xed, 0x66, 0xd5,
+ 0x44, 0x11, 0xee, 0x66, 0xd5, 0x44, 0x11, 0xef, 0x66, 0xd5, 0x45, 0x11, 0xe8, 0x66, 0xd5, 0x45,
+ 0x11, 0xe9, 0x66, 0xd5, 0x45, 0x11, 0xea, 0x66, 0xd5, 0x45, 0x11, 0xeb, 0x66, 0xd5, 0x45, 0x11,
+ 0xec, 0x66, 0xd5, 0x45, 0x11, 0xed, 0x66, 0xd5, 0x45, 0x11, 0xee, 0x66, 0xd5, 0x45, 0x11, 0xef,
+ 0x66, 0xd5, 0x54, 0x11, 0xe8, 0x66, 0xd5, 0x54, 0x11, 0xe9, 0x66, 0xd5, 0x54, 0x11, 0xea, 0x66,
+ 0xd5, 0x54, 0x11, 0xeb, 0x66, 0xd5, 0x54, 0x11, 0xec, 0x66, 0xd5, 0x54, 0x11, 0xed, 0x66, 0xd5,
+ 0x54, 0x11, 0xee, 0x66, 0xd5, 0x54, 0x11, 0xef, 0x66, 0xd5, 0x55, 0x11, 0xe8, 0x66, 0xd5, 0x55,
+ 0x11, 0xe9, 0x66, 0xd5, 0x55, 0x11, 0xea, 0x66, 0xd5, 0x55, 0x11, 0xeb, 0x66, 0xd5, 0x55, 0x11,
+ 0xec, 0x66, 0xd5, 0x55, 0x11, 0xed, 0x66, 0xd5, 0x55, 0x11, 0xee, 0x66, 0xd5, 0x55, 0x11, 0xef,
+ };
+ const size_t n = sizeof(tbl);
+ CYBOZU_TEST_EQUAL(c.getSize(), n);
+ CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
+}
+
+CYBOZU_TEST_AUTO(reg8)
+{
+ struct Code : Xbyak::CodeGenerator {
+ Code()
+ {
+ adc(r17b, al); adc(r17b, cl); adc(r17b, dl); adc(r17b, bl); adc(r17b, spl); adc(r17b, bpl); adc(r17b, sil); adc(r17b, dil);
+ adc(r17b, r8b); adc(r17b, r9b); adc(r17b, r10b); adc(r17b, r11b); adc(r17b, r12b); adc(r17b, r13b); adc(r17b, r14b); adc(r17b, r15b);
+ adc(r17b, r16b); adc(r17b, r17b); adc(r17b, r18b); adc(r17b, r19b); adc(r17b, r20b); adc(r17b, r21b); adc(r17b, r22b); adc(r17b, r23b);
+ adc(r17b, r24b); adc(r17b, r25b); adc(r17b, r26b); adc(r17b, r27b); adc(r17b, r28b); adc(r17b, r29b); adc(r17b, r30b); adc(r17b, r31b);
+ adc(al, r20b); adc(cl, r20b); adc(dl, r20b); adc(bl, r20b); adc(spl, r20b); adc(bpl, r20b); adc(sil, r20b); adc(dil, r20b);
+ adc(r8b, r20b); adc(r9b, r20b); adc(r10b, r20b); adc(r11b, r20b); adc(r12b, r20b); adc(r13b, r20b); adc(r14b, r20b); adc(r15b, r20b);
+ adc(r16b, r20b); adc(r17b, r20b); adc(r18b, r20b); adc(r19b, r20b); adc(r20b, r20b); adc(r21b, r20b); adc(r22b, r20b); adc(r23b, r20b);
+ adc(r24b, r20b); adc(r25b, r20b); adc(r26b, r20b); adc(r27b, r20b); adc(r28b, r20b); adc(r29b, r20b); adc(r30b, r20b); adc(r31b, r20b);
+ }
+ } c;
+ const uint8_t tbl[] = {
+ 0xd5, 0x10, 0x10, 0xc1, 0xd5, 0x10, 0x10, 0xc9, 0xd5, 0x10, 0x10, 0xd1, 0xd5, 0x10, 0x10, 0xd9,
+ 0xd5, 0x10, 0x10, 0xe1, 0xd5, 0x10, 0x10, 0xe9, 0xd5, 0x10, 0x10, 0xf1, 0xd5, 0x10, 0x10, 0xf9,
+ 0xd5, 0x14, 0x10, 0xc1, 0xd5, 0x14, 0x10, 0xc9, 0xd5, 0x14, 0x10, 0xd1, 0xd5, 0x14, 0x10, 0xd9,
+ 0xd5, 0x14, 0x10, 0xe1, 0xd5, 0x14, 0x10, 0xe9, 0xd5, 0x14, 0x10, 0xf1, 0xd5, 0x14, 0x10, 0xf9,
+ 0xd5, 0x50, 0x10, 0xc1, 0xd5, 0x50, 0x10, 0xc9, 0xd5, 0x50, 0x10, 0xd1, 0xd5, 0x50, 0x10, 0xd9,
+ 0xd5, 0x50, 0x10, 0xe1, 0xd5, 0x50, 0x10, 0xe9, 0xd5, 0x50, 0x10, 0xf1, 0xd5, 0x50, 0x10, 0xf9,
+ 0xd5, 0x54, 0x10, 0xc1, 0xd5, 0x54, 0x10, 0xc9, 0xd5, 0x54, 0x10, 0xd1, 0xd5, 0x54, 0x10, 0xd9,
+ 0xd5, 0x54, 0x10, 0xe1, 0xd5, 0x54, 0x10, 0xe9, 0xd5, 0x54, 0x10, 0xf1, 0xd5, 0x54, 0x10, 0xf9,
+ 0xd5, 0x40, 0x10, 0xe0, 0xd5, 0x40, 0x10, 0xe1, 0xd5, 0x40, 0x10, 0xe2, 0xd5, 0x40, 0x10, 0xe3,
+ 0xd5, 0x40, 0x10, 0xe4, 0xd5, 0x40, 0x10, 0xe5, 0xd5, 0x40, 0x10, 0xe6, 0xd5, 0x40, 0x10, 0xe7,
+ 0xd5, 0x41, 0x10, 0xe0, 0xd5, 0x41, 0x10, 0xe1, 0xd5, 0x41, 0x10, 0xe2, 0xd5, 0x41, 0x10, 0xe3,
+ 0xd5, 0x41, 0x10, 0xe4, 0xd5, 0x41, 0x10, 0xe5, 0xd5, 0x41, 0x10, 0xe6, 0xd5, 0x41, 0x10, 0xe7,
+ 0xd5, 0x50, 0x10, 0xe0, 0xd5, 0x50, 0x10, 0xe1, 0xd5, 0x50, 0x10, 0xe2, 0xd5, 0x50, 0x10, 0xe3,
+ 0xd5, 0x50, 0x10, 0xe4, 0xd5, 0x50, 0x10, 0xe5, 0xd5, 0x50, 0x10, 0xe6, 0xd5, 0x50, 0x10, 0xe7,
+ 0xd5, 0x51, 0x10, 0xe0, 0xd5, 0x51, 0x10, 0xe1, 0xd5, 0x51, 0x10, 0xe2, 0xd5, 0x51, 0x10, 0xe3,
+ 0xd5, 0x51, 0x10, 0xe4, 0xd5, 0x51, 0x10, 0xe5, 0xd5, 0x51, 0x10, 0xe6, 0xd5, 0x51, 0x10, 0xe7,
+ };
+ const size_t n = sizeof(tbl);
+ CYBOZU_TEST_EQUAL(c.getSize(), n);
+ CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
+}
+
+CYBOZU_TEST_AUTO(rm)
+{
+ struct Code : Xbyak::CodeGenerator {
+ Code()
+ {
+ adc(r16, ptr [r17+0x40]);
+ adc(ptr [r17+0x40], r16);
+ adc(r16d, ptr [r17+0x40]);
+ adc(ptr [r17+0x40], r16d);
+ adc(r16w, ptr [r17+0x40]);
+ adc(ptr [r17+0x40], r16w);
+ adc(r16b, ptr [r17+0x40]);
+ adc(ptr [r17+0x40], r16b);
+ adc(r16, ptr [r18*4+0x40]);
+ adc(ptr [r18*4+0x40], r16);
+ adc(r16d, ptr [r18*4+0x40]);
+ adc(ptr [r18*4+0x40], r16d);
+ adc(r16w, ptr [r18*4+0x40]);
+ adc(ptr [r18*4+0x40], r16w);
+ adc(r16b, ptr [r18*4+0x40]);
+ adc(ptr [r18*4+0x40], r16b);
+ adc(r16, ptr [r17+r18*4+0x40]);
+ adc(ptr [r17+r18*4+0x40], r16);
+ adc(r16d, ptr [r17+r18*4+0x40]);
+ adc(ptr [r17+r18*4+0x40], r16d);
+ adc(r16w, ptr [r17+r18*4+0x40]);
+ adc(ptr [r17+r18*4+0x40], r16w);
+ adc(r16b, ptr [r17+r18*4+0x40]);
+ adc(ptr [r17+r18*4+0x40], r16b);
+ }
+ } c;
+ const uint8_t tbl[] = {
+ 0xd5, 0x58, 0x13, 0x41, 0x40, 0xd5, 0x58, 0x11, 0x41, 0x40, 0xd5, 0x50, 0x13, 0x41, 0x40, 0xd5,
+ 0x50, 0x11, 0x41, 0x40, 0x66, 0xd5, 0x50, 0x13, 0x41, 0x40, 0x66, 0xd5, 0x50, 0x11, 0x41, 0x40,
+ 0xd5, 0x50, 0x12, 0x41, 0x40, 0xd5, 0x50, 0x10, 0x41, 0x40, 0xd5, 0x68, 0x13, 0x04, 0x95, 0x40,
+ 0x00, 0x00, 0x00, 0xd5, 0x68, 0x11, 0x04, 0x95, 0x40, 0x00, 0x00, 0x00, 0xd5, 0x60, 0x13, 0x04,
+ 0x95, 0x40, 0x00, 0x00, 0x00, 0xd5, 0x60, 0x11, 0x04, 0x95, 0x40, 0x00, 0x00, 0x00, 0x66, 0xd5,
+ 0x60, 0x13, 0x04, 0x95, 0x40, 0x00, 0x00, 0x00, 0x66, 0xd5, 0x60, 0x11, 0x04, 0x95, 0x40, 0x00,
+ 0x00, 0x00, 0xd5, 0x60, 0x12, 0x04, 0x95, 0x40, 0x00, 0x00, 0x00, 0xd5, 0x60, 0x10, 0x04, 0x95,
+ 0x40, 0x00, 0x00, 0x00, 0xd5, 0x78, 0x13, 0x44, 0x91, 0x40, 0xd5, 0x78, 0x11, 0x44, 0x91, 0x40,
+ 0xd5, 0x70, 0x13, 0x44, 0x91, 0x40, 0xd5, 0x70, 0x11, 0x44, 0x91, 0x40, 0x66, 0xd5, 0x70, 0x13,
+ 0x44, 0x91, 0x40, 0x66, 0xd5, 0x70, 0x11, 0x44, 0x91, 0x40, 0xd5, 0x70, 0x12, 0x44, 0x91, 0x40,
+ 0xd5, 0x70, 0x10, 0x44, 0x91, 0x40,
+ };
+ const size_t n = sizeof(tbl);
+ CYBOZU_TEST_EQUAL(c.getSize(), n);
+ CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
+}
+
+CYBOZU_TEST_AUTO(r3)
+{
+ struct Code : Xbyak::CodeGenerator {
+ Code()
+ {
+ adc(r20b, r21b, r23b);
+ adc(r20w, r21w, r23w);
+ adc(r20d, r21d, r23d);
+ adc(r20, r21, r23);
+ }
+ } c;
+ const uint8_t tbl[] = {
+ 0x62, 0xec, 0x5c, 0x10, 0x10, 0xfd, 0x62, 0xec, 0x5d, 0x10, 0x11, 0xfd, 0x62, 0xec, 0x5c, 0x10,
+ 0x11, 0xfd, 0x62, 0xec, 0xdc, 0x10, 0x11, 0xfd,
+ };
+ const size_t n = sizeof(tbl);
+ CYBOZU_TEST_EQUAL(c.getSize(), n);
+ CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
+}
+
+CYBOZU_TEST_AUTO(rm3)
+{
+ struct Code : Xbyak::CodeGenerator {
+ Code()
+ {
+ adc(rax, r18, ptr [rbx+rcx*4+0x123]);
+ adc(rax, ptr [rbx+rcx*4+0x123], r20);
+ adc(rax, ptr [r30], r29);
+ adc(r11, r13, ptr [r10]);
+ adc(r11, r13, ptr [r10*4]);
+ adc(r11, ptr [r10*8], r9);
+ }
+ } c;
+ const uint8_t tbl[] = {
+ 0x62, 0xe4, 0xfc, 0x18, 0x13, 0x94, 0x8b, 0x23, 0x01, 0x00, 0x00, 0x62, 0xe4, 0xfc, 0x18, 0x11,
+ 0xa4, 0x8b, 0x23, 0x01, 0x00, 0x00, 0x62, 0x4c, 0xfc, 0x18, 0x11, 0x2e, 0x62, 0x54, 0xa4, 0x18,
+ 0x13, 0x2a, 0x62, 0x34, 0xa4, 0x18, 0x13, 0x2c, 0x95, 0x00, 0x00, 0x00, 0x00, 0x62, 0x34, 0xa4,
+ 0x18, 0x11, 0x0c, 0xd5, 0x00, 0x00, 0x00, 0x00,
+ };
+ const size_t n = sizeof(tbl);
+ CYBOZU_TEST_EQUAL(c.getSize(), n);
+ CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
+}
+
+CYBOZU_TEST_AUTO(rm3_2)
+{
+ struct Code : Xbyak::CodeGenerator {
+ Code()
+ {
+ adc(r20b, r21b, r23b);
+ adc(r20w, r21w, r23w);
+ adc(r20d, r21d, r23d);
+ adc(r20, r21, r23);
+ adc(r20b, ptr [rax+rcx*4+0x7fffffff], 0x12);
+ adc(r20w, ptr [rax+rcx*4+0x7fffffff], 0x1234);
+ adc(r20d, ptr [rax+rcx*4+0x7fffffff], 0x12345678);
+ adc(r20, ptr [rax+rcx*4+0x7fffffff], 0x12345678);
+ adc(r20b, al, 0x12);
+ adc(r20w, ax, 0x1234);
+ adc(r20d, eax, 0x12345678);
+ adc(r20, rax, 0x12345678);
+ }
+ } c;
+ const uint8_t tbl[] = {
+ 0x62, 0xec, 0x5c, 0x10, 0x10, 0xfd, 0x62, 0xec, 0x5d, 0x10, 0x11, 0xfd, 0x62, 0xec, 0x5c, 0x10,
+ 0x11, 0xfd, 0x62, 0xec, 0xdc, 0x10, 0x11, 0xfd, 0x62, 0xf4, 0x5c, 0x10, 0x80, 0x94, 0x88, 0xff,
+ 0xff, 0xff, 0x7f, 0x12, 0x62, 0xf4, 0x5d, 0x10, 0x81, 0x94, 0x88, 0xff, 0xff, 0xff, 0x7f, 0x34,
+ 0x12, 0x62, 0xf4, 0x5c, 0x10, 0x81, 0x94, 0x88, 0xff, 0xff, 0xff, 0x7f, 0x78, 0x56, 0x34, 0x12,
+ 0x62, 0xf4, 0xdc, 0x10, 0x81, 0x94, 0x88, 0xff, 0xff, 0xff, 0x7f, 0x78, 0x56, 0x34, 0x12, 0x62,
+ 0xf4, 0x5c, 0x10, 0x80, 0xd0, 0x12, 0x62, 0xf4, 0x5d, 0x10, 0x81, 0xd0, 0x34, 0x12, 0x62, 0xf4,
+ 0x5c, 0x10, 0x81, 0xd0, 0x78, 0x56, 0x34, 0x12, 0x62, 0xf4, 0xdc, 0x10, 0x81, 0xd0, 0x78, 0x56,
+ 0x34, 0x12,
+ };
+ const size_t n = sizeof(tbl);
+ CYBOZU_TEST_EQUAL(c.getSize(), n);
+ CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
+}
+
+CYBOZU_TEST_AUTO(adcx_adox)
+{
+ struct Code : Xbyak::CodeGenerator {
+ Code()
+ {
+ adcx(rax, r30);
+ adcx(ecx, r20d);
+ adcx(ecx, ptr [r31+r29*4]);
+ adcx(r20d, ptr [rax]);
+ adcx(r16, ptr [r31+r29*4]);
+ adcx(r17, ptr [rax]);
+ adcx(rax, rcx, rdx);
+
+ adox(rax, r30);
+ adox(ecx, r20d);
+ adox(ecx, ptr [r31+r29*4]);
+ adox(r20d, ptr [rax]);
+ adox(r16, ptr [r31+r29*4]);
+ adox(r17, ptr [rax]);
+ adox(rax, rcx, rdx);
+ }
+ } c;
+ const uint8_t tbl[] = {
+ 0x62, 0xdc, 0xfd, 0x08, 0x66, 0xc6, 0x62, 0xfc, 0x7d, 0x08, 0x66, 0xcc, 0x62, 0x9c, 0x79, 0x08,
+ 0x66, 0x0c, 0xaf, 0x62, 0xe4, 0x7d, 0x08, 0x66, 0x20, 0x62, 0x8c, 0xf9, 0x08, 0x66, 0x04, 0xaf,
+ 0x62, 0xe4, 0xfd, 0x08, 0x66, 0x08, 0x62, 0xf4, 0xfd, 0x18, 0x66, 0xca,
+
+ 0x62, 0xdc, 0xfe, 0x08, 0x66, 0xc6, 0x62, 0xfc, 0x7e, 0x08, 0x66, 0xcc, 0x62, 0x9c, 0x7a, 0x08,
+ 0x66, 0x0c, 0xaf, 0x62, 0xe4, 0x7e, 0x08, 0x66, 0x20, 0x62, 0x8c, 0xfa, 0x08, 0x66, 0x04, 0xaf,
+ 0x62, 0xe4, 0xfe, 0x08, 0x66, 0x08, 0x62, 0xf4, 0xfe, 0x18, 0x66, 0xca,
+ };
+ const size_t n = sizeof(tbl);
+ CYBOZU_TEST_EQUAL(c.getSize(), n);
+ CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
+}
+
+CYBOZU_TEST_AUTO(r3_2)
+{
+ struct Code : Xbyak::CodeGenerator {
+ Code()
+ {
+ add(rax, rcx, rdx);
+ adc(rax, rcx, rdx);
+ and_(rax, rcx, rdx);
+ or_(rax, rcx, rdx);
+ sbb(rax, rcx, rdx);
+ sub(rax, rcx, rdx);
+ xor_(rax, rcx, rdx);
+ add(r30, ptr [r20], r9);
+ adc(r30, ptr [r20], r9);
+ and_(r30, ptr [r20], r9);
+ or_(r30, ptr [r20], r9);
+ sbb(r30, ptr [r20], r9);
+ sub(r30, ptr [r20], r9);
+ xor_(r30, ptr [r20], r9);
+ }
+ } c;
+ const uint8_t tbl[] = {
+ 0x62, 0xf4, 0xfc, 0x18, 0x01, 0xd1, 0x62, 0xf4, 0xfc, 0x18, 0x11, 0xd1, 0x62, 0xf4, 0xfc, 0x18,
+ 0x21, 0xd1, 0x62, 0xf4, 0xfc, 0x18, 0x09, 0xd1, 0x62, 0xf4, 0xfc, 0x18, 0x19, 0xd1, 0x62, 0xf4,
+ 0xfc, 0x18, 0x29, 0xd1, 0x62, 0xf4, 0xfc, 0x18, 0x31, 0xd1, 0x62, 0x7c, 0x8c, 0x10, 0x01, 0x0c,
+ 0x24, 0x62, 0x7c, 0x8c, 0x10, 0x11, 0x0c, 0x24, 0x62, 0x7c, 0x8c, 0x10, 0x21, 0x0c, 0x24, 0x62,
+ 0x7c, 0x8c, 0x10, 0x09, 0x0c, 0x24, 0x62, 0x7c, 0x8c, 0x10, 0x19, 0x0c, 0x24, 0x62, 0x7c, 0x8c,
+ 0x10, 0x29, 0x0c, 0x24, 0x62, 0x7c, 0x8c, 0x10, 0x31, 0x0c, 0x24,
+ };
+ const size_t n = sizeof(tbl);
+ CYBOZU_TEST_EQUAL(c.getSize(), n);
+ CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
+}
+
+CYBOZU_TEST_AUTO(NF)
+{
+ struct Code : Xbyak::CodeGenerator {
+ Code()
+ {
+ add(rax, rcx, rdx);
+ add(rax|T_nf, rcx, rdx);
+ and_(rax, rcx, rdx);
+ and_(rax|T_nf, rcx, rdx);
+ or_(rax, rcx, rdx);
+ or_(rax|T_nf, rcx, rdx);
+ sub(rax, rcx, rdx);
+ sub(rax|T_nf, rcx, rdx);
+ xor_(rax, rcx, rdx);
+ xor_(rax|T_nf, rcx, rdx);
+
+ add(rax, rcx, 3);
+ add(rax|T_nf, rcx, 3);
+ and_(rax, rcx, 3);
+ and_(rax|T_nf, rcx, 3);
+ or_(rax, rcx, 3);
+ or_(rax|T_nf, rcx, 3);
+ sub(rax, rcx, 3);
+ sub(rax|T_nf, rcx, 3);
+ xor_(rax, rcx, 3);
+ xor_(rax|T_nf, rcx, 3);
+ }
+ } c;
+ const uint8_t tbl[] = {
+ 0x62, 0xf4, 0xfc, 0x18, 0x01, 0xd1,
+ 0x62, 0xf4, 0xfc, 0x1c, 0x01, 0xd1,
+
+ 0x62, 0xf4, 0xfc, 0x18, 0x21, 0xd1,
+ 0x62, 0xf4, 0xfc, 0x1c, 0x21, 0xd1,
+
+ 0x62, 0xf4, 0xfc, 0x18, 0x09, 0xd1,
+ 0x62, 0xf4, 0xfc, 0x1c, 0x09, 0xd1,
+
+ 0x62, 0xf4, 0xfc, 0x18, 0x29, 0xd1,
+ 0x62, 0xf4, 0xfc, 0x1c, 0x29, 0xd1,
+
+ 0x62, 0xf4, 0xfc, 0x18, 0x31, 0xd1,
+ 0x62, 0xf4, 0xfc, 0x1c, 0x31, 0xd1,
+
+ 0x62, 0xf4, 0xfc, 0x18, 0x83, 0xc1, 0x03,
+ 0x62, 0xf4, 0xfc, 0x1c, 0x83, 0xc1, 0x03,
+
+ 0x62, 0xf4, 0xfc, 0x18, 0x83, 0xe1, 0x03,
+ 0x62, 0xf4, 0xfc, 0x1c, 0x83, 0xe1, 0x03,
+
+ 0x62, 0xf4, 0xfc, 0x18, 0x83, 0xc9, 0x03,
+ 0x62, 0xf4, 0xfc, 0x1c, 0x83, 0xc9, 0x03,
+
+ 0x62, 0xf4, 0xfc, 0x18, 0x83, 0xe9, 0x03,
+ 0x62, 0xf4, 0xfc, 0x1c, 0x83, 0xe9, 0x03,
+
+ 0x62, 0xf4, 0xfc, 0x18, 0x83, 0xf1, 0x03,
+ 0x62, 0xf4, 0xfc, 0x1c, 0x83, 0xf1, 0x03,
+ };
+ const size_t n = sizeof(tbl);
+ CYBOZU_TEST_EQUAL(c.getSize(), n);
+ CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
+}
+
+CYBOZU_TEST_AUTO(andn_etc)
+{
+ struct Code : Xbyak::CodeGenerator {
+ Code()
+ {
+ andn(r29, r30, r31);
+ andn(r29|T_nf, r30, r31);
+ andn(eax, ecx, r17d); // 32bit
+ andn(r29, r30, ptr [r31+r20*4]);
+
+ mulx(eax, ecx, r17d);
+ mulx(r29, r30, r31);
+ mulx(r29, r30, ptr [r31+r20*4]);
+ pdep(eax, ecx, r17d);
+ pdep(r29, r30, r31);
+ pdep(r29, r30, ptr [r31+r20*4]);
+ pext(eax, ecx, r17d);
+ pext(r29, r30, r31);
+ pext(r29, r30, ptr [r31+r20*4]);
+
+ }
+ } c;
+ const uint8_t tbl[] = {
+ // andn
+ 0x62, 0x4a, 0x8c, 0x00, 0xf2, 0xef,
+ 0x62, 0x4a, 0x8c, 0x04, 0xf2, 0xef,
+ 0x62, 0xfa, 0x74, 0x08, 0xf2, 0xc1,
+ 0x62, 0x4a, 0x88, 0x00, 0xf2, 0x2c, 0xa7,
+
+ // mulx, pdep, pext
+ 0x62, 0xfa, 0x77, 0x08, 0xf6, 0xc1, 0x62, 0x4a, 0x8f, 0x00, 0xf6, 0xef, 0x62, 0x4a, 0x8b, 0x00, 0xf6, 0x2c, 0xa7,
+ 0x62, 0xfa, 0x77, 0x08, 0xf5, 0xc1, 0x62, 0x4a, 0x8f, 0x00, 0xf5, 0xef, 0x62, 0x4a, 0x8b, 0x00, 0xf5, 0x2c, 0xa7,
+ 0x62, 0xfa, 0x76, 0x08, 0xf5, 0xc1, 0x62, 0x4a, 0x8e, 0x00, 0xf5, 0xef, 0x62, 0x4a, 0x8a, 0x00, 0xf5, 0x2c, 0xa7,
+ };
+ const size_t n = sizeof(tbl);
+ CYBOZU_TEST_EQUAL(c.getSize(), n);
+ CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
+}
+
+CYBOZU_TEST_AUTO(bextr_etc)
+{
+ struct Code : Xbyak::CodeGenerator {
+ Code()
+ {
+ bextr(r29, r30, r31);
+ bextr(r29|T_nf, r30, r31);
+ bextr(eax, ecx, r17d);
+ bextr(r29, ptr [r31+r20*4], r30);
+
+ bzhi(r29, r30, r31);
+ bzhi(r29|T_nf, r30, r31);
+ bzhi(eax, ecx, r17d);
+ bzhi(r29, ptr [r31+r20*4], r30);
+
+ sarx(r29, r30, r31);
+ sarx(eax, ecx, r17d);
+ sarx(r29, ptr [r31+r20*4], r30);
+
+ shlx(r29, r30, r31);
+ shlx(eax, ecx, r17d);
+ shlx(r29, ptr [r31+r20*4], r30);
+
+ shrx(r29, r30, r31);
+ shrx(eax, ecx, r17d);
+ shrx(r29, ptr [r31+r20*4], r30);
+
+ blsi(r30, r31);
+ blsi(r30|T_nf, r31);
+ blsi(ecx, r17d);
+ blsi(r30, ptr [r31+r20*4]);
+
+ blsmsk(r30, r31);
+ blsmsk(r30|T_nf, r31);
+ blsmsk(ecx, r17d);
+ blsmsk(r30, ptr [r31+r20*4]);
+
+ blsr(r30, r31);
+ blsr(r30|T_nf, r31);
+ blsr(ecx, r17d);
+ blsr(r30, ptr [r31+r20*4]);
+
+ rorx(r30, r31, 3);
+ rorx(ecx, r17d, 5);
+ rorx(r30, ptr [r31+r20*4], 4);
+ }
+ } c;
+ const uint8_t tbl[] = {
+ // bextr
+ 0x62, 0x4a, 0x84, 0x00, 0xf7, 0xee,
+ 0x62, 0x4a, 0x84, 0x04, 0xf7, 0xee,
+ 0x62, 0xf2, 0x74, 0x00, 0xf7, 0xc1,
+ 0x62, 0x4a, 0x88, 0x00, 0xf7, 0x2c, 0xa7,
+
+ // bzhi
+ 0x62, 0x4a, 0x84, 0x00, 0xf5, 0xee,
+ 0x62, 0x4a, 0x84, 0x04, 0xf5, 0xee,
+ 0x62, 0xf2, 0x74, 0x00, 0xf5, 0xc1,
+ 0x62, 0x4a, 0x88, 0x00, 0xf5, 0x2c, 0xa7,
+
+ // sarx
+ 0x62, 0x4a, 0x86, 0x00, 0xf7, 0xee,
+ 0x62, 0xf2, 0x76, 0x00, 0xf7, 0xc1,
+ 0x62, 0x4a, 0x8a, 0x00, 0xf7, 0x2c, 0xa7,
+
+ // shlx
+ 0x62, 0x4a, 0x85, 0x00, 0xf7, 0xee,
+ 0x62, 0xf2, 0x75, 0x00, 0xf7, 0xc1,
+ 0x62, 0x4a, 0x89, 0x00, 0xf7, 0x2c, 0xa7,
+
+ // shrx
+ 0x62, 0x4a, 0x87, 0x00, 0xf7, 0xee,
+ 0x62, 0xf2, 0x77, 0x00, 0xf7, 0xc1,
+ 0x62, 0x4a, 0x8b, 0x00, 0xf7, 0x2c, 0xa7,
+
+ // blsi
+ 0x62, 0xda, 0x8c, 0x00, 0xf3, 0xdf,
+ 0x62, 0xda, 0x8c, 0x04, 0xf3, 0xdf,
+ 0x62, 0xfa, 0x74, 0x08, 0xf3, 0xd9,
+ 0x62, 0xda, 0x88, 0x00, 0xf3, 0x1c, 0xa7,
+
+ // blsmsk
+ 0x62, 0xda, 0x8c, 0x00, 0xf3, 0xd7,
+ 0x62, 0xda, 0x8c, 0x04, 0xf3, 0xd7,
+ 0x62, 0xfa, 0x74, 0x08, 0xf3, 0xd1,
+ 0x62, 0xda, 0x88, 0x00, 0xf3, 0x14, 0xa7,
+
+ // blsr
+ 0x62, 0xda, 0x8c, 0x00, 0xf3, 0xcf,
+ 0x62, 0xda, 0x8c, 0x04, 0xf3, 0xcf,
+ 0x62, 0xfa, 0x74, 0x08, 0xf3, 0xc9,
+ 0x62, 0xda, 0x88, 0x00, 0xf3, 0x0c, 0xa7,
+
+ // rorx
+ 0x62, 0x4b, 0xff, 0x08, 0xf0, 0xf7, 0x03,
+ 0x62, 0xfb, 0x7f, 0x08, 0xf0, 0xc9, 0x05,
+ 0x62, 0x4b, 0xfb, 0x08, 0xf0, 0x34, 0xa7, 0x04,
+ };
+ const size_t n = sizeof(tbl);
+ CYBOZU_TEST_EQUAL(c.getSize(), n);
+ CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
+}
+
+CYBOZU_TEST_AUTO(bit)
+{
+ struct Code : Xbyak::CodeGenerator {
+ Code()
+ {
+ // adc
+ adc(r20b, r21b, r22b);
+ adc(r20w, r21w, r22w);
+ adc(r20d, r21d, r22d);
+ adc(r20, r21, r22);
+
+ adc(r20b, r21b);
+ adc(r20w, r21w);
+ adc(r20d, r21d);
+ adc(r20, r21);
+
+ adc(r20b, r21b, 0x3);
+ adc(r20w, r21w, 0x3);
+ adc(r20d, r21d, 0x3);
+ adc(r20, r21, 0x3);
+
+ adc(r20b, 0x3);
+ adc(r20w, 0x3);
+ adc(r20d, 0x3);
+ adc(r20, 0x3);
+
+ // add
+ add(r20b, r21b, r22b);
+ add(r20w, r21w, r22w);
+ add(r20d, r21d, r22d);
+ add(r20, r21, r22);
+ add(r20b, r21b);
+ add(r20w, r21w);
+ add(r20d, r21d);
+ add(r20, r21);
+ add(r20b, r21b, 0x3);
+ add(r20w, r21w, 0x3);
+ add(r20d, r21d, 0x3);
+ add(r20, r21, 0x3);
+ add(r20b, 0x3);
+ add(r20w, 0x3);
+ add(r20d, 0x3);
+ add(r20, 0x3);
+ }
+ } c;
+ const uint8_t tbl[] = {
+ // adc
+ 0x62, 0xec, 0x5c, 0x10, 0x10, 0xf5, 0x62, 0xec, 0x5d, 0x10, 0x11, 0xf5, 0x62, 0xec, 0x5c, 0x10,
+ 0x11, 0xf5, 0x62, 0xec, 0xdc, 0x10, 0x11, 0xf5, 0xd5, 0x50, 0x10, 0xec, 0x66, 0xd5, 0x50, 0x11,
+ 0xec, 0xd5, 0x50, 0x11, 0xec, 0xd5, 0x58, 0x11, 0xec, 0x62, 0xfc, 0x5c, 0x10, 0x80, 0xd5, 0x03,
+ 0x62, 0xfc, 0x5d, 0x10, 0x83, 0xd5, 0x03, 0x62, 0xfc, 0x5c, 0x10, 0x83, 0xd5, 0x03, 0x62, 0xfc,
+ 0xdc, 0x10, 0x83, 0xd5, 0x03, 0xd5, 0x10, 0x80, 0xd4, 0x03, 0x66, 0xd5, 0x10, 0x83, 0xd4, 0x03,
+ 0xd5, 0x10, 0x83, 0xd4, 0x03, 0xd5, 0x18, 0x83, 0xd4, 0x03,
+
+ // add
+ 0x62, 0xec, 0x5c, 0x10, 0x00, 0xf5, 0x62, 0xec, 0x5d, 0x10, 0x01, 0xf5, 0x62, 0xec, 0x5c, 0x10,
+ 0x01, 0xf5, 0x62, 0xec, 0xdc, 0x10, 0x01, 0xf5, 0xd5, 0x50, 0x00, 0xec, 0x66, 0xd5, 0x50, 0x01,
+ 0xec, 0xd5, 0x50, 0x01, 0xec, 0xd5, 0x58, 0x01, 0xec, 0x62, 0xfc, 0x5c, 0x10, 0x80, 0xc5, 0x03,
+ 0x62, 0xfc, 0x5d, 0x10, 0x83, 0xc5, 0x03, 0x62, 0xfc, 0x5c, 0x10, 0x83, 0xc5, 0x03, 0x62, 0xfc,
+ 0xdc, 0x10, 0x83, 0xc5, 0x03, 0xd5, 0x10, 0x80, 0xc4, 0x03, 0x66, 0xd5, 0x10, 0x83, 0xc4, 0x03,
+ 0xd5, 0x10, 0x83, 0xc4, 0x03, 0xd5, 0x18, 0x83, 0xc4, 0x03,
+ };
+ const size_t n = sizeof(tbl);
+ CYBOZU_TEST_EQUAL(c.getSize(), n);
+ CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
+}
+
+CYBOZU_TEST_AUTO(inc_dec)
+{
+ struct Code : Xbyak::CodeGenerator {
+ Code()
+ {
+ inc(r30b);
+ inc(r30w);
+ inc(r30d);
+ inc(r30);
+ inc(r30b, r31b);
+ inc(r30w, r31w);
+ inc(r30w|T_nf, r31w);
+ inc(r30d, r31d);
+ inc(r30, r31);
+ inc(r30, ptr [r31]);
+
+ dec(r30b);
+ dec(r30w);
+ dec(r30d);
+ dec(r30);
+ dec(r30b, r31b);
+ dec(r30w, r31w);
+ dec(r30w|T_nf, r31w);
+ dec(r30d, r31d);
+ dec(r30, r31);
+ dec(r30, ptr [r31]);
+ }
+ } c;
+ const uint8_t tbl[] = {
+ // inc
+ 0xd5, 0x11, 0xfe, 0xc6, 0x66, 0xd5, 0x11, 0xff, 0xc6, 0xd5, 0x11, 0xff, 0xc6, 0xd5, 0x19, 0xff,
+ 0xc6, 0x62, 0xdc, 0x0c, 0x10, 0xfe, 0xc7,
+ 0x62, 0xdc, 0x0d, 0x10, 0xff, 0xc7,
+ 0x62, 0xdc, 0x0d, 0x14, 0xff, 0xc7, // T_nf
+ 0x62, 0xdc, 0x0c, 0x10, 0xff, 0xc7, 0x62, 0xdc, 0x8c, 0x10, 0xff, 0xc7, 0x62,
+ 0xdc, 0x8c, 0x10, 0xff, 0x07,
+ // decA
+ 0xd5, 0x11, 0xfe, 0xce, 0x66, 0xd5, 0x11, 0xff, 0xce, 0xd5, 0x11, 0xff, 0xce, 0xd5, 0x19, 0xff,
+ 0xce, 0x62, 0xdc, 0x0c, 0x10, 0xfe, 0xcf,
+ 0x62, 0xdc, 0x0d, 0x10, 0xff, 0xcf,
+ 0x62, 0xdc, 0x0d, 0x14, 0xff, 0xcf, // T_nf
+ 0x62, 0xdc, 0x0c, 0x10, 0xff, 0xcf, 0x62, 0xdc, 0x8c, 0x10, 0xff, 0xcf, 0x62,
+ 0xdc, 0x8c, 0x10, 0xff, 0x0f,
+
+ };
+ const size_t n = sizeof(tbl);
+ CYBOZU_TEST_EQUAL(c.getSize(), n);
+ CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
+}
+
+CYBOZU_TEST_AUTO(div_op1)
+{
+ struct Code : Xbyak::CodeGenerator {
+ Code()
+ {
+ div(r20b);
+ div(r20d);
+ div(r20w);
+ div(r20);
+ div(r20|T_nf);
+ div(eax|T_nf);
+ div(byte [r20+r30*1]);
+ div(word [r20+r30*1]);
+ div(dword [r20+r30*1]);
+ div(qword [r20+r30*1]);
+
+ idiv(r20b);
+ idiv(r20d);
+ idiv(r20w);
+ idiv(r20);
+ idiv(r20|T_nf);
+ idiv(eax|T_nf);
+ idiv(byte [r20+r30*1]);
+ idiv(word [r20+r30*1]);
+ idiv(dword [r20+r30*1]);
+ idiv(qword [r20+r30*1]);
+
+ imul(r20b);
+ imul(r20d);
+ imul(r20w);
+ imul(r20);
+ imul(r20|T_nf);
+ imul(eax|T_nf);
+ imul(byte [r20+r30*1]);
+ imul(word [r20+r30*1]);
+ imul(dword [r20+r30*1]);
+ imul(qword [r20+r30*1]);
+
+ mul(r20b);
+ mul(r20d);
+ mul(r20w);
+ mul(r20);
+ mul(r20|T_nf);
+ mul(eax|T_nf);
+ mul(byte [r20+r30*1]);
+ mul(word [r20+r30*1]);
+ mul(dword [r20+r30*1]);
+ mul(qword [r20+r30*1]);
+
+ neg(r20b);
+ neg(r20d);
+ neg(r20w);
+ neg(r20);
+ neg(r20|T_nf);
+ neg(eax|T_nf);
+ neg(byte [r20+r30*1]);
+ neg(word [r20+r30*1]);
+ neg(dword [r20+r30*1]);
+ neg(qword [r20+r30*1]);
+
+ // not_ does not have NF=1
+ not_(r20b);
+ not_(r20d);
+ not_(r20w);
+ not_(r20);
+ not_(byte [r20+r30*1]);
+ not_(word [r20+r30*1]);
+ not_(dword [r20+r30*1]);
+ not_(qword [r20+r30*1]);
+ }
+ } c;
+ const uint8_t tbl[] = {
+ 0x62, 0xfc, 0x7c, 0x08, 0xf6, 0xf4,
+ 0x62, 0xfc, 0x7c, 0x08, 0xf7, 0xf4,
+ 0x62, 0xfc, 0x7d, 0x08, 0xf7, 0xf4,
+ 0x62, 0xfc, 0xfc, 0x08, 0xf7, 0xf4,
+ 0x62, 0xfc, 0xfc, 0x0c, 0xf7, 0xf4, // r20|T_nf
+ 0x62, 0xf4, 0x7c, 0x0c, 0xf7, 0xf0, // eax|T_nf
+ 0x62, 0xbc, 0x78, 0x08, 0xf6, 0x34, 0x34,
+ 0x62, 0xbc, 0x79, 0x08, 0xf7, 0x34, 0x34,
+ 0x62, 0xbc, 0x78, 0x08, 0xf7, 0x34, 0x34,
+ 0x62, 0xbc, 0xf8, 0x08, 0xf7, 0x34, 0x34,
+
+ 0x62, 0xfc, 0x7c, 0x08, 0xf6, 0xfc, 0x62, 0xfc, 0x7c, 0x08, 0xf7, 0xfc, 0x62, 0xfc, 0x7d, 0x08,
+ 0xf7, 0xfc, 0x62, 0xfc, 0xfc, 0x08, 0xf7, 0xfc, 0x62, 0xfc, 0xfc, 0x0c, 0xf7, 0xfc, 0x62, 0xf4,
+ 0x7c, 0x0c, 0xf7, 0xf8, 0x62, 0xbc, 0x78, 0x08, 0xf6, 0x3c, 0x34, 0x62, 0xbc, 0x79, 0x08, 0xf7,
+ 0x3c, 0x34, 0x62, 0xbc, 0x78, 0x08, 0xf7, 0x3c, 0x34, 0x62, 0xbc, 0xf8, 0x08, 0xf7, 0x3c, 0x34,
+
+ 0x62, 0xfc, 0x7c, 0x08, 0xf6, 0xec, 0x62, 0xfc, 0x7c, 0x08, 0xf7, 0xec, 0x62, 0xfc, 0x7d, 0x08,
+ 0xf7, 0xec, 0x62, 0xfc, 0xfc, 0x08, 0xf7, 0xec, 0x62, 0xfc, 0xfc, 0x0c, 0xf7, 0xec, 0x62, 0xf4,
+ 0x7c, 0x0c, 0xf7, 0xe8, 0x62, 0xbc, 0x78, 0x08, 0xf6, 0x2c, 0x34, 0x62, 0xbc, 0x79, 0x08, 0xf7,
+ 0x2c, 0x34, 0x62, 0xbc, 0x78, 0x08, 0xf7, 0x2c, 0x34, 0x62, 0xbc, 0xf8, 0x08, 0xf7, 0x2c, 0x34,
+ 0x62, 0xfc, 0x7c, 0x08, 0xf6, 0xe4, 0x62, 0xfc, 0x7c, 0x08, 0xf7, 0xe4, 0x62, 0xfc, 0x7d, 0x08,
+ 0xf7, 0xe4, 0x62, 0xfc, 0xfc, 0x08, 0xf7, 0xe4, 0x62, 0xfc, 0xfc, 0x0c, 0xf7, 0xe4, 0x62, 0xf4,
+ 0x7c, 0x0c, 0xf7, 0xe0, 0x62, 0xbc, 0x78, 0x08, 0xf6, 0x24, 0x34, 0x62, 0xbc, 0x79, 0x08, 0xf7,
+ 0x24, 0x34, 0x62, 0xbc, 0x78, 0x08, 0xf7, 0x24, 0x34, 0x62, 0xbc, 0xf8, 0x08, 0xf7, 0x24, 0x34,
+ 0x62, 0xfc, 0x7c, 0x08, 0xf6, 0xdc, 0x62, 0xfc, 0x7c, 0x08, 0xf7, 0xdc, 0x62, 0xfc, 0x7d, 0x08,
+ 0xf7, 0xdc, 0x62, 0xfc, 0xfc, 0x08, 0xf7, 0xdc, 0x62, 0xfc, 0xfc, 0x0c, 0xf7, 0xdc, 0x62, 0xf4,
+ 0x7c, 0x0c, 0xf7, 0xd8, 0x62, 0xbc, 0x78, 0x08, 0xf6, 0x1c, 0x34, 0x62, 0xbc, 0x79, 0x08, 0xf7,
+ 0x1c, 0x34, 0x62, 0xbc, 0x78, 0x08, 0xf7, 0x1c, 0x34, 0x62, 0xbc, 0xf8, 0x08, 0xf7, 0x1c, 0x34,
+ 0x62, 0xfc, 0x7c, 0x08, 0xf6, 0xd4, 0x62, 0xfc, 0x7c, 0x08, 0xf7, 0xd4, 0x62, 0xfc, 0x7d, 0x08,
+ 0xf7, 0xd4, 0x62, 0xfc, 0xfc, 0x08, 0xf7, 0xd4, 0x62, 0xbc, 0x78, 0x08, 0xf6, 0x14, 0x34, 0x62,
+ 0xbc, 0x79, 0x08, 0xf7, 0x14, 0x34, 0x62, 0xbc, 0x78, 0x08, 0xf7, 0x14, 0x34, 0x62, 0xbc, 0xf8,
+ 0x08, 0xf7, 0x14, 0x34,
+ };
+ const size_t n = sizeof(tbl);
+ CYBOZU_TEST_EQUAL(c.getSize(), n);
+ CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
+}
+
+CYBOZU_TEST_AUTO(imul_2op)
+{
+ struct Code : Xbyak::CodeGenerator {
+ Code()
+ {
+ // imul(r30b, al); // QQQ : not supported?
+ imul(r30w, ax);
+ imul(r30d, eax);
+ imul(r30, rax);
+ imul(r30|T_nf, rax);
+ imul(rcx|T_nf, rax);
+ imul(rcx, ptr [r30]);
+
+ neg(r30b, al);
+ neg(r30w, ax);
+ neg(r30d, eax);
+ neg(r30, rax);
+ neg(r30|T_nf, rax);
+ neg(rcx|T_nf, rax);
+ neg(rcx, ptr [r30]);
+
+ not_(r30b, al);
+ not_(r30w, ax);
+ not_(r30d, eax);
+ not_(r30, rax);
+ not_(rcx, ptr [r30]);
+ }
+ } c;
+ const uint8_t tbl[] = {
+ // imul
+ 0x62, 0x64, 0x7d, 0x08, 0xaf, 0xf0,
+ 0x62, 0x64, 0x7c, 0x08, 0xaf, 0xf0,
+ 0x62, 0x64, 0xfc, 0x08, 0xaf, 0xf0,
+ 0x62, 0x64, 0xfc, 0x0c, 0xaf, 0xf0,
+ 0x62, 0xf4, 0xfc, 0x0c, 0xaf, 0xc8,
+ 0x62, 0xdc, 0xfc, 0x08, 0xaf, 0x0e,
+
+ // neg
+ 0x62, 0xf4, 0x0c, 0x10, 0xf6, 0xd8, 0x62, 0xf4, 0x0d, 0x10, 0xf7, 0xd8, 0x62, 0xf4, 0x0c, 0x10,
+ 0xf7, 0xd8, 0x62, 0xf4, 0x8c, 0x10, 0xf7, 0xd8, 0x62, 0xf4, 0x8c, 0x14, 0xf7, 0xd8, 0x62, 0xf4,
+ 0xf4, 0x1c, 0xf7, 0xd8, 0x62, 0xdc, 0xf4, 0x18, 0xf7, 0x1e,
+
+ // not
+ 0x62, 0xf4, 0x0c, 0x10, 0xf6, 0xd0, 0x62, 0xf4, 0x0d, 0x10, 0xf7, 0xd0, 0x62, 0xf4, 0x0c, 0x10,
+ 0xf7, 0xd0, 0x62, 0xf4, 0x8c, 0x10, 0xf7, 0xd0, 0x62, 0xdc, 0xf4, 0x18, 0xf7, 0x16,
+ };
+ const size_t n = sizeof(tbl);
+ CYBOZU_TEST_EQUAL(c.getSize(), n);
+ CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
+}
+
+CYBOZU_TEST_AUTO(imul_zu)
+{
+ struct Code : Xbyak::CodeGenerator {
+ Code()
+ {
+ imul(ax|T_zu, cx, 0x1234);
+ imul(ax|T_nf, cx, 0x1234);
+ imul(ax|T_zu|T_nf, cx, 0x1234);
+ imul(r30w, ax, 0x1234);
+ imul(r30d, eax, 0x12345678);
+ imul(r30, rax, 0x12345678);
+ imul(r30|T_zu, rax, 0x12345678);
+ imul(r30|T_nf, rax, 0x12345678);
+ imul(r30|T_nf|T_zu, rax, 0x12345678);
+ imul(rcx, ptr [r30], 0x12345678);
+ }
+ } c;
+ const uint8_t tbl[] = {
+ 0x62, 0xf4, 0x7d, 0x18, 0x69, 0xc1, 0x34, 0x12, // T_zu
+ 0x62, 0xf4, 0x7d, 0x0c, 0x69, 0xc1, 0x34, 0x12, // T_nf
+ 0x62, 0xf4, 0x7d, 0x1c, 0x69, 0xc1, 0x34, 0x12, // T_zu|T_nf
+ 0x62, 0x64, 0x7d, 0x08, 0x69, 0xf0, 0x34, 0x12, // w
+ 0x62, 0x64, 0x7c, 0x08, 0x69, 0xf0, 0x78, 0x56, 0x34, 0x12, // d
+ 0x62, 0x64, 0xfc, 0x08, 0x69, 0xf0, 0x78, 0x56, 0x34, 0x12,
+ 0x62, 0x64, 0xfc, 0x18, 0x69, 0xf0, 0x78, 0x56, 0x34, 0x12, // T_zu
+ 0x62, 0x64, 0xfc, 0x0c, 0x69, 0xf0, 0x78, 0x56, 0x34, 0x12, // T_nf
+ 0x62, 0x64, 0xfc, 0x1c, 0x69, 0xf0, 0x78, 0x56, 0x34, 0x12, // T_nf|T_zu
+ 0x62, 0xdc, 0xfc, 0x08, 0x69, 0x0e, 0x78, 0x56, 0x34, 0x12,
+
+ };
+ const size_t n = sizeof(tbl);
+ CYBOZU_TEST_EQUAL(c.getSize(), n);
+ CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
+}
+
+CYBOZU_TEST_AUTO(lzcnt)
+{
+ struct Code : Xbyak::CodeGenerator {
+ Code()
+ {
+ lzcnt(r16w, r17w);
+ lzcnt(r16d, r17d);
+ lzcnt(r16, r17);
+ lzcnt(r16|T_nf, r17);
+ lzcnt(rax|T_nf, rcx);
+ lzcnt(rax, ptr [r18]);
+
+ tzcnt(r16w, r17w);
+ tzcnt(r16d, r17d);
+ tzcnt(r16, r17);
+ tzcnt(r16|T_nf, r17);
+ tzcnt(rax|T_nf, rcx);
+ tzcnt(rax, ptr [r18]);
+ }
+ } c;
+ const uint8_t tbl[] = {
+ // lzcnt
+ 0x62, 0xec, 0x7d, 0x08, 0xf5, 0xc1, 0x62, 0xec, 0x7c, 0x08, 0xf5, 0xc1, 0x62, 0xec, 0xfc, 0x08,
+ 0xf5, 0xc1, 0x62, 0xec, 0xfc, 0x0c, 0xf5, 0xc1, 0x62, 0xf4, 0xfc, 0x0c, 0xf5, 0xc1, 0x62, 0xfc,
+ 0xfc, 0x08, 0xf5, 0x02,
+ // tzcnt
+ 0x62, 0xec, 0x7d, 0x08, 0xf4, 0xc1, 0x62, 0xec, 0x7c, 0x08, 0xf4, 0xc1, 0x62, 0xec, 0xfc, 0x08,
+ 0xf4, 0xc1, 0x62, 0xec, 0xfc, 0x0c, 0xf4, 0xc1, 0x62, 0xf4, 0xfc, 0x0c, 0xf4, 0xc1, 0x62, 0xfc,
+ 0xfc, 0x08, 0xf4, 0x02,
+ };
+ const size_t n = sizeof(tbl);
+ CYBOZU_TEST_EQUAL(c.getSize(), n);
+ CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
+}
+
+CYBOZU_TEST_AUTO(shld)
+{
+ struct Code : Xbyak::CodeGenerator {
+ Code()
+ {
+ shld(rax|T_nf, rcx, cl);
+ shld(r16, rcx, cl);
+ shld(r16, rcx, 0x9);
+ shld(r16|T_nf, rcx, 0x9);
+ shld(r20, r16, rcx, cl);
+ shld(r20|T_nf, r16, rcx, cl);
+ shld(r20, r16, rcx, 0x9);
+ shld(r20|T_nf, r16, rcx, 0x9);
+ shld(r20, ptr [r21], rcx, 0x9);
+
+ shrd(rax|T_nf, rcx, cl);
+ shrd(r16, rcx, cl);
+ shrd(r16, rcx, 0x9);
+ shrd(r16|T_nf, rcx, 0x9);
+ shrd(r20, r16, rcx, cl);
+ shrd(r20|T_nf, r16, rcx, cl);
+ shrd(r20, r16, rcx, 0x9);
+ shrd(r20|T_nf, r16, rcx, 0x9);
+ shrd(r20, ptr [r21], rcx, 0x9);
+ }
+ } c;
+ const uint8_t tbl[] = {
+ // shld
+ 0x62, 0xf4, 0xfc, 0x0c, 0xa5, 0xc8, 0x62, 0xfc, 0xfc, 0x08, 0xa5, 0xc8, 0x62, 0xfc, 0xfc, 0x08,
+ 0x24, 0xc8, 0x09, 0x62, 0xfc, 0xfc, 0x0c, 0x24, 0xc8, 0x09, 0x62, 0xfc, 0xdc, 0x10, 0xa5, 0xc8,
+ 0x62, 0xfc, 0xdc, 0x14, 0xa5, 0xc8, 0x62, 0xfc, 0xdc, 0x10, 0x24, 0xc8, 0x09, 0x62, 0xfc, 0xdc,
+ 0x14, 0x24, 0xc8, 0x09, 0x62, 0xfc, 0xdc, 0x10, 0x24, 0x4d, 0x00, 0x09,
+ // shrd
+ 0x62, 0xf4, 0xfc, 0x0c, 0xad, 0xc8, 0x62, 0xfc, 0xfc, 0x08, 0xad, 0xc8, 0x62, 0xfc, 0xfc, 0x08,
+ 0x2c, 0xc8, 0x09, 0x62, 0xfc, 0xfc, 0x0c, 0x2c, 0xc8, 0x09, 0x62, 0xfc, 0xdc, 0x10, 0xad, 0xc8,
+ 0x62, 0xfc, 0xdc, 0x14, 0xad, 0xc8, 0x62, 0xfc, 0xdc, 0x10, 0x2c, 0xc8, 0x09, 0x62, 0xfc, 0xdc,
+ 0x14, 0x2c, 0xc8, 0x09, 0x62, 0xfc, 0xdc, 0x10, 0x2c, 0x4d, 0x00, 0x09,
+
+ };
+ const size_t n = sizeof(tbl);
+ CYBOZU_TEST_EQUAL(c.getSize(), n);
+ CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
+}
+
+CYBOZU_TEST_AUTO(base)
+{
+ struct Code : Xbyak::CodeGenerator {
+ Code()
+ {
+ lea(r30, ptr[r20+r21]);
+ add(r30, r20);
+ add(r30, ptr[r20]);
+ cmp(r30, ptr[r20]);
+ push(r16);
+ pop(r16);
+ }
+ } c;
+ const uint8_t tbl[] = {
+ // lea
+ 0xd5, 0x7c, 0x8d, 0x34, 0x2c,
+ // add
+ 0xd5, 0x59, 0x01, 0xe6,
+ 0xd5, 0x5c, 0x03, 0x34, 0x24,
+ // cmp
+ 0xd5, 0x5c, 0x3b, 0x34, 0x24,
+ // push
+ 0xd5, 0x10, 0x50,
+ // pop
+ 0xd5, 0x10, 0x58,
+ };
+ const size_t n = sizeof(tbl);
+ CYBOZU_TEST_EQUAL(c.getSize(), n);
+ CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
+}
+
+CYBOZU_TEST_AUTO(mov_misc)
+{
+ struct Code : Xbyak::CodeGenerator {
+ Code()
+ {
+ movdir64b(r16, ptr [r20+r21*8+0x4]);
+ movdiri(ptr [r20+r21*8+0x4], r16);
+
+ movbe(ptr [r16], r30w);
+ movbe(ptr [r16], r30d);
+ movbe(ptr [r16], r30);
+ movbe(r30w, ptr [r16]);
+ movbe(r30d, ptr [r16]);
+ movbe(r30, ptr [r16]);
+
+ crc32(r30d, r8b);
+ crc32(r30d, r8w);
+ crc32(r30d, r8d);
+ crc32(r30, r8b);
+ crc32(r30, r8);
+
+ jmpabs(0x12345678aabbccdd);
+
+ cmpbexadd(ptr [r20+r30*8], r21, r22);
+ cmpbexadd(ptr [r20+r30*8], r21d, r22d);
+
+ cmovb(r8, r9, r10);
+ cmovb(r8d, r9d, r10d);
+
+ setb(r31b);
+ setb(r31b|T_zu);
+ setb(r15b|T_zu);
+ setb(ptr [r30]);
+ }
+ } c;
+ const uint8_t tbl[] = {
+ // movdir64b
+ 0x62, 0xec, 0x79, 0x08, 0xf8, 0x44, 0xec, 0x04,
+ // movdiri
+ 0x62, 0xec, 0xf8, 0x08, 0xf9, 0x44, 0xec, 0x04,
+ // movbe
+ 0x62, 0x6c, 0x7d, 0x08, 0x61, 0x30, 0x62, 0x6c, 0x7c, 0x08, 0x61, 0x30, 0x62, 0x6c, 0xfc, 0x08,
+ 0x61, 0x30, 0x62, 0x6c, 0x7d, 0x08, 0x60, 0x30, 0x62, 0x6c, 0x7c, 0x08, 0x60, 0x30, 0x62, 0x6c,
+ 0xfc, 0x08, 0x60, 0x30,
+ // crc32
+ 0x62, 0x44, 0x7c, 0x08, 0xf0, 0xf0, 0x62, 0x44, 0x7d, 0x08, 0xf1, 0xf0, 0x62, 0x44, 0x7c, 0x08,
+ 0xf1, 0xf0, 0x62, 0x44, 0xfc, 0x08, 0xf0, 0xf0, 0x62, 0x44, 0xfc, 0x08, 0xf1, 0xf0,
+ // jmpabs
+ 0xd5, 0x00, 0xa1, 0xdd, 0xcc, 0xbb, 0xaa, 0x78, 0x56, 0x34, 0x12,
+ //cmpbexadd
+ 0x62, 0xaa, 0xc9, 0x00, 0xe6, 0x2c, 0xf4,
+ 0x62, 0xaa, 0x49, 0x00, 0xe6, 0x2c, 0xf4,
+ // cmovb
+ 0x62, 0x54, 0xbc, 0x18, 0x42, 0xca,
+ 0x62, 0x54, 0x3c, 0x18, 0x42, 0xca,
+ // setb
+ 0x62, 0xdc, 0x7f, 0x08, 0x42, 0xc7,
+ 0x62, 0xdc, 0x7f, 0x18, 0x42, 0xc7,
+ 0x62, 0xd4, 0x7f, 0x18, 0x42, 0xc7,
+ 0x62, 0xdc, 0x7f, 0x08, 0x42, 0x06,
+
+ };
+ const size_t n = sizeof(tbl);
+ CYBOZU_TEST_EQUAL(c.getSize(), n);
+ CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
+}
+
+CYBOZU_TEST_AUTO(shift_2op)
+{
+ struct Code : Xbyak::CodeGenerator {
+ Code()
+ {
+ shl(r16b, cl);
+ shl(r16w, cl);
+ shl(r16d, cl);
+ shl(r16, cl);
+ shl(r16|T_nf, cl);
+ shl(r16b, 0x3);
+ shl(r16w, 0x5);
+ shl(r16d, 0x7);
+ shl(r16, 0x9);
+ shl(byte [r30], 0x3);
+ shl(word [r30], 0x5);
+ shl(dword [r30], 0x7);
+ shl(qword [r30], 0x9);
+
+ shr(r16b, cl);
+ shr(r16w, cl);
+ shr(r16d, cl);
+ shr(r16, cl);
+ shr(r16|T_nf, cl);
+ shr(r16b, 0x3);
+ shr(r16w, 0x5);
+ shr(r16d, 0x7);
+ shr(r16, 0x9);
+ shr(byte [r30], 0x3);
+ shr(word [r30], 0x5);
+ shr(dword [r30], 0x7);
+ shr(qword [r30], 0x9);
+
+ sar(r16b, cl);
+ sar(r16w, cl);
+ sar(r16d, cl);
+ sar(r16, cl);
+ sar(r16|T_nf, cl);
+ sar(r16b, 0x3);
+ sar(r16w, 0x5);
+ sar(r16d, 0x7);
+ sar(r16, 0x9);
+ sar(byte [r30], 0x3);
+ sar(word [r30], 0x5);
+ sar(dword [r30], 0x7);
+ sar(qword [r30], 0x9);
+
+ ror(r16b, cl);
+ ror(r16w, cl);
+ ror(r16d, cl);
+ ror(r16, cl);
+ ror(r16|T_nf, cl);
+ ror(r16b, 0x3);
+ ror(r16w, 0x5);
+ ror(r16d, 0x7);
+ ror(r16, 0x9);
+ ror(byte [r30], 0x3);
+ ror(word [r30], 0x5);
+ ror(dword [r30], 0x7);
+ ror(qword [r30], 0x9);
+
+ rol(r16b, cl);
+ rol(r16w, cl);
+ rol(r16d, cl);
+ rol(r16, cl);
+ rol(r16|T_nf, cl);
+ rol(r16b, 0x3);
+ rol(r16w, 0x5);
+ rol(r16d, 0x7);
+ rol(r16, 0x9);
+ rol(byte [r30], 0x3);
+ rol(word [r30], 0x5);
+ rol(dword [r30], 0x7);
+ rol(qword [r30], 0x9);
+
+ rcl(r16b, cl);
+ rcl(r16w, cl);
+ rcl(r16d, cl);
+ rcl(r16, cl);
+ rcl(r16b, 0x3);
+ rcl(r16w, 0x5);
+ rcl(r16d, 0x7);
+ rcl(r16, 0x9);
+ rcl(byte [r30], 0x3);
+ rcl(word [r30], 0x5);
+ rcl(dword [r30], 0x7);
+ rcl(qword [r30], 0x9);
+
+ rcr(r16b, cl);
+ rcr(r16w, cl);
+ rcr(r16d, cl);
+ rcr(r16, cl);
+ rcr(r16b, 0x3);
+ rcr(r16w, 0x5);
+ rcr(r16d, 0x7);
+ rcr(r16, 0x9);
+ rcr(byte [r30], 0x3);
+ rcr(word [r30], 0x5);
+ rcr(dword [r30], 0x7);
+ rcr(qword [r30], 0x9);
+ }
+ } c;
+ const uint8_t tbl[] = {
+ // shl
+ 0x62, 0xfc, 0x7c, 0x08, 0xd2, 0xe0, 0x62, 0xfc, 0x7d, 0x08, 0xd3, 0xe0, 0x62, 0xfc, 0x7c, 0x08,
+ 0xd3, 0xe0, 0x62, 0xfc, 0xfc, 0x08, 0xd3, 0xe0, 0x62, 0xfc, 0xfc, 0x0c, 0xd3, 0xe0, 0x62, 0xfc,
+ 0x7c, 0x08, 0xc0, 0xe0, 0x03, 0x62, 0xfc, 0x7d, 0x08, 0xc1, 0xe0, 0x05, 0x62, 0xfc, 0x7c, 0x08,
+ 0xc1, 0xe0, 0x07, 0x62, 0xfc, 0xfc, 0x08, 0xc1, 0xe0, 0x09, 0x62, 0xdc, 0x7c, 0x08, 0xc0, 0x26,
+ 0x03, 0x62, 0xdc, 0x7d, 0x08, 0xc1, 0x26, 0x05, 0x62, 0xdc, 0x7c, 0x08, 0xc1, 0x26, 0x07, 0x62,
+ 0xdc, 0xfc, 0x08, 0xc1, 0x26, 0x09,
+ // shr
+ 0x62, 0xfc, 0x7c, 0x08, 0xd2, 0xe8, 0x62, 0xfc, 0x7d, 0x08, 0xd3, 0xe8, 0x62, 0xfc, 0x7c, 0x08,
+ 0xd3, 0xe8, 0x62, 0xfc, 0xfc, 0x08, 0xd3, 0xe8, 0x62, 0xfc, 0xfc, 0x0c, 0xd3, 0xe8, 0x62, 0xfc,
+ 0x7c, 0x08, 0xc0, 0xe8, 0x03, 0x62, 0xfc, 0x7d, 0x08, 0xc1, 0xe8, 0x05, 0x62, 0xfc, 0x7c, 0x08,
+ 0xc1, 0xe8, 0x07, 0x62, 0xfc, 0xfc, 0x08, 0xc1, 0xe8, 0x09, 0x62, 0xdc, 0x7c, 0x08, 0xc0, 0x2e,
+ 0x03, 0x62, 0xdc, 0x7d, 0x08, 0xc1, 0x2e, 0x05, 0x62, 0xdc, 0x7c, 0x08, 0xc1, 0x2e, 0x07, 0x62,
+ 0xdc, 0xfc, 0x08, 0xc1, 0x2e, 0x09,
+ // sar
+ 0x62, 0xfc, 0x7c, 0x08, 0xd2, 0xf8, 0x62, 0xfc, 0x7d, 0x08, 0xd3, 0xf8, 0x62, 0xfc, 0x7c, 0x08,
+ 0xd3, 0xf8, 0x62, 0xfc, 0xfc, 0x08, 0xd3, 0xf8, 0x62, 0xfc, 0xfc, 0x0c, 0xd3, 0xf8, 0x62, 0xfc,
+ 0x7c, 0x08, 0xc0, 0xf8, 0x03, 0x62, 0xfc, 0x7d, 0x08, 0xc1, 0xf8, 0x05, 0x62, 0xfc, 0x7c, 0x08,
+ 0xc1, 0xf8, 0x07, 0x62, 0xfc, 0xfc, 0x08, 0xc1, 0xf8, 0x09, 0x62, 0xdc, 0x7c, 0x08, 0xc0, 0x3e,
+ 0x03, 0x62, 0xdc, 0x7d, 0x08, 0xc1, 0x3e, 0x05, 0x62, 0xdc, 0x7c, 0x08, 0xc1, 0x3e, 0x07, 0x62,
+ 0xdc, 0xfc, 0x08, 0xc1, 0x3e, 0x09,
+ // ror
+ 0x62, 0xfc, 0x7c, 0x08, 0xd2, 0xc8, 0x62, 0xfc, 0x7d, 0x08, 0xd3, 0xc8, 0x62, 0xfc, 0x7c, 0x08,
+ 0xd3, 0xc8, 0x62, 0xfc, 0xfc, 0x08, 0xd3, 0xc8, 0x62, 0xfc, 0xfc, 0x0c, 0xd3, 0xc8, 0x62, 0xfc,
+ 0x7c, 0x08, 0xc0, 0xc8, 0x03, 0x62, 0xfc, 0x7d, 0x08, 0xc1, 0xc8, 0x05, 0x62, 0xfc, 0x7c, 0x08,
+ 0xc1, 0xc8, 0x07, 0x62, 0xfc, 0xfc, 0x08, 0xc1, 0xc8, 0x09, 0x62, 0xdc, 0x7c, 0x08, 0xc0, 0x0e,
+ 0x03, 0x62, 0xdc, 0x7d, 0x08, 0xc1, 0x0e, 0x05, 0x62, 0xdc, 0x7c, 0x08, 0xc1, 0x0e, 0x07, 0x62,
+ 0xdc, 0xfc, 0x08, 0xc1, 0x0e, 0x09,
+ // rol
+ 0x62, 0xfc, 0x7c, 0x08, 0xd2, 0xc0, 0x62, 0xfc, 0x7d, 0x08, 0xd3, 0xc0, 0x62, 0xfc, 0x7c, 0x08,
+ 0xd3, 0xc0, 0x62, 0xfc, 0xfc, 0x08, 0xd3, 0xc0, 0x62, 0xfc, 0xfc, 0x0c, 0xd3, 0xc0, 0x62, 0xfc,
+ 0x7c, 0x08, 0xc0, 0xc0, 0x03, 0x62, 0xfc, 0x7d, 0x08, 0xc1, 0xc0, 0x05, 0x62, 0xfc, 0x7c, 0x08,
+ 0xc1, 0xc0, 0x07, 0x62, 0xfc, 0xfc, 0x08, 0xc1, 0xc0, 0x09, 0x62, 0xdc, 0x7c, 0x08, 0xc0, 0x06,
+ 0x03, 0x62, 0xdc, 0x7d, 0x08, 0xc1, 0x06, 0x05, 0x62, 0xdc, 0x7c, 0x08, 0xc1, 0x06, 0x07, 0x62,
+ 0xdc, 0xfc, 0x08, 0xc1, 0x06, 0x09,
+ // rcl
+ 0x62, 0xfc, 0x7c, 0x08, 0xd2, 0xd0, 0x62, 0xfc, 0x7d, 0x08, 0xd3, 0xd0, 0x62, 0xfc, 0x7c, 0x08,
+ 0xd3, 0xd0, 0x62, 0xfc, 0xfc, 0x08, 0xd3, 0xd0, 0x62, 0xfc, 0x7c, 0x08, 0xc0, 0xd0, 0x03, 0x62,
+ 0xfc, 0x7d, 0x08, 0xc1, 0xd0, 0x05, 0x62, 0xfc, 0x7c, 0x08, 0xc1, 0xd0, 0x07, 0x62, 0xfc, 0xfc,
+ 0x08, 0xc1, 0xd0, 0x09, 0x62, 0xdc, 0x7c, 0x08, 0xc0, 0x16, 0x03, 0x62, 0xdc, 0x7d, 0x08, 0xc1,
+ 0x16, 0x05, 0x62, 0xdc, 0x7c, 0x08, 0xc1, 0x16, 0x07, 0x62, 0xdc, 0xfc, 0x08, 0xc1, 0x16, 0x09,
+ // rcr
+ 0x62, 0xfc, 0x7c, 0x08, 0xd2, 0xd8, 0x62, 0xfc, 0x7d, 0x08, 0xd3, 0xd8, 0x62, 0xfc, 0x7c, 0x08,
+ 0xd3, 0xd8, 0x62, 0xfc, 0xfc, 0x08, 0xd3, 0xd8, 0x62, 0xfc, 0x7c, 0x08, 0xc0, 0xd8, 0x03, 0x62,
+ 0xfc, 0x7d, 0x08, 0xc1, 0xd8, 0x05, 0x62, 0xfc, 0x7c, 0x08, 0xc1, 0xd8, 0x07, 0x62, 0xfc, 0xfc,
+ 0x08, 0xc1, 0xd8, 0x09, 0x62, 0xdc, 0x7c, 0x08, 0xc0, 0x1e, 0x03, 0x62, 0xdc, 0x7d, 0x08, 0xc1,
+ 0x1e, 0x05, 0x62, 0xdc, 0x7c, 0x08, 0xc1, 0x1e, 0x07, 0x62, 0xdc, 0xfc, 0x08, 0xc1, 0x1e, 0x09,
+ };
+ const size_t n = sizeof(tbl);
+ CYBOZU_TEST_EQUAL(c.getSize(), n);
+ CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
+}
+
+CYBOZU_TEST_AUTO(shift_3op)
+{
+ struct Code : Xbyak::CodeGenerator {
+ Code()
+ {
+ rcl(r20b, r16b, cl);
+ rcl(r20w, r16w, cl);
+ rcl(r20d, r16d, cl);
+ rcl(r20, r16, cl);
+ rcl(r20b, ptr [r16], cl);
+ rcl(r20w, ptr [r16], cl);
+ rcl(r20d, ptr [r16], cl);
+ rcl(r20, ptr [r16], cl);
+ rcl(r20b, r16b, 0x2);
+ rcl(r20w, r16w, 0x4);
+ rcl(r20d, r16d, 0x6);
+ rcl(r20, r16, 0x8);
+ rcl(r20b, ptr [r16], 0x2);
+ rcl(r20w, ptr [r16], 0x4);
+ rcl(r20d, ptr [r16], 0x6);
+ rcl(r20, ptr [r16], 0x8);
+
+ rcr(r20b, r16b, cl);
+ rcr(r20w, r16w, cl);
+ rcr(r20d, r16d, cl);
+ rcr(r20, r16, cl);
+ rcr(r20b, ptr [r16], cl);
+ rcr(r20w, ptr [r16], cl);
+ rcr(r20d, ptr [r16], cl);
+ rcr(r20, ptr [r16], cl);
+ rcr(r20b, r16b, 0x2);
+ rcr(r20w, r16w, 0x4);
+ rcr(r20d, r16d, 0x6);
+ rcr(r20, r16, 0x8);
+ rcr(r20b, ptr [r16], 0x2);
+ rcr(r20w, ptr [r16], 0x4);
+ rcr(r20d, ptr [r16], 0x6);
+ rcr(r20, ptr [r16], 0x8);
+
+ rol(r20b, r16b, cl);
+ rol(r20w, r16w, cl);
+ rol(r20d, r16d, cl);
+ rol(r20, r16, cl);
+ rol(r20b, ptr [r16], cl);
+ rol(r20w, ptr [r16], cl);
+ rol(r20d, ptr [r16], cl);
+ rol(r20, ptr [r16], cl);
+ rol(r20b, r16b, 0x2);
+ rol(r20w, r16w, 0x4);
+ rol(r20d, r16d, 0x6);
+ rol(r20, r16, 0x8);
+ rol(r20b, ptr [r16], 0x2);
+ rol(r20w, ptr [r16], 0x4);
+ rol(r20d, ptr [r16], 0x6);
+ rol(r20, ptr [r16], 0x8);
+
+ shl(r20b, r16b, cl);
+ shl(r20w, r16w, cl);
+ shl(r20d, r16d, cl);
+ shl(r20, r16, cl);
+ shl(r20b, ptr [r16], cl);
+ shl(r20w, ptr [r16], cl);
+ shl(r20d, ptr [r16], cl);
+ shl(r20, ptr [r16], cl);
+ shl(r20b, r16b, 0x2);
+ shl(r20w, r16w, 0x4);
+ shl(r20d, r16d, 0x6);
+ shl(r20, r16, 0x8);
+ shl(r20b, ptr [r16], 0x2);
+ shl(r20w, ptr [r16], 0x4);
+ shl(r20d, ptr [r16], 0x6);
+ shl(r20, ptr [r16], 0x8);
+
+ shr(r20b, r16b, cl);
+ shr(r20w, r16w, cl);
+ shr(r20d, r16d, cl);
+ shr(r20, r16, cl);
+ shr(r20b, ptr [r16], cl);
+ shr(r20w, ptr [r16], cl);
+ shr(r20d, ptr [r16], cl);
+ shr(r20, ptr [r16], cl);
+ shr(r20b, r16b, 0x2);
+ shr(r20w, r16w, 0x4);
+ shr(r20d, r16d, 0x6);
+ shr(r20, r16, 0x8);
+ shr(r20b, ptr [r16], 0x2);
+ shr(r20w, ptr [r16], 0x4);
+ shr(r20d, ptr [r16], 0x6);
+ shr(r20, ptr [r16], 0x8);
+
+ sar(r20b, r16b, cl);
+ sar(r20w, r16w, cl);
+ sar(r20d, r16d, cl);
+ sar(r20, r16, cl);
+ sar(r20b, ptr [r16], cl);
+ sar(r20w, ptr [r16], cl);
+ sar(r20d, ptr [r16], cl);
+ sar(r20, ptr [r16], cl);
+ sar(r20b, r16b, 0x2);
+ sar(r20w, r16w, 0x4);
+ sar(r20d, r16d, 0x6);
+ sar(r20, r16, 0x8);
+ sar(r20b, ptr [r16], 0x2);
+ sar(r20w, ptr [r16], 0x4);
+ sar(r20d, ptr [r16], 0x6);
+ sar(r20, ptr [r16], 0x8);
+ }
+ } c;
+ const uint8_t tbl[] = {
+ // rcl
+ 0x62, 0xfc, 0x5c, 0x10, 0xd2, 0xd0, 0x62, 0xfc, 0x5d, 0x10, 0xd3, 0xd0, 0x62, 0xfc, 0x5c, 0x10,
+ 0xd3, 0xd0, 0x62, 0xfc, 0xdc, 0x10, 0xd3, 0xd0, 0x62, 0xfc, 0x5c, 0x10, 0xd2, 0x10, 0x62, 0xfc,
+ 0x5d, 0x10, 0xd3, 0x10, 0x62, 0xfc, 0x5c, 0x10, 0xd3, 0x10, 0x62, 0xfc, 0xdc, 0x10, 0xd3, 0x10,
+ 0x62, 0xfc, 0x5c, 0x10, 0xc0, 0xd0, 0x02, 0x62, 0xfc, 0x5d, 0x10, 0xc1, 0xd0, 0x04, 0x62, 0xfc,
+ 0x5c, 0x10, 0xc1, 0xd0, 0x06, 0x62, 0xfc, 0xdc, 0x10, 0xc1, 0xd0, 0x08, 0x62, 0xfc, 0x5c, 0x10,
+ 0xc0, 0x10, 0x02, 0x62, 0xfc, 0x5d, 0x10, 0xc1, 0x10, 0x04, 0x62, 0xfc, 0x5c, 0x10, 0xc1, 0x10,
+ 0x06, 0x62, 0xfc, 0xdc, 0x10, 0xc1, 0x10, 0x08,
+ // rcr
+ 0x62, 0xfc, 0x5c, 0x10, 0xd2, 0xd8, 0x62, 0xfc, 0x5d, 0x10, 0xd3, 0xd8, 0x62, 0xfc, 0x5c, 0x10,
+ 0xd3, 0xd8, 0x62, 0xfc, 0xdc, 0x10, 0xd3, 0xd8, 0x62, 0xfc, 0x5c, 0x10, 0xd2, 0x18, 0x62, 0xfc,
+ 0x5d, 0x10, 0xd3, 0x18, 0x62, 0xfc, 0x5c, 0x10, 0xd3, 0x18, 0x62, 0xfc, 0xdc, 0x10, 0xd3, 0x18,
+ 0x62, 0xfc, 0x5c, 0x10, 0xc0, 0xd8, 0x02, 0x62, 0xfc, 0x5d, 0x10, 0xc1, 0xd8, 0x04, 0x62, 0xfc,
+ 0x5c, 0x10, 0xc1, 0xd8, 0x06, 0x62, 0xfc, 0xdc, 0x10, 0xc1, 0xd8, 0x08, 0x62, 0xfc, 0x5c, 0x10,
+ 0xc0, 0x18, 0x02, 0x62, 0xfc, 0x5d, 0x10, 0xc1, 0x18, 0x04, 0x62, 0xfc, 0x5c, 0x10, 0xc1, 0x18,
+ 0x06, 0x62, 0xfc, 0xdc, 0x10, 0xc1, 0x18, 0x08,
+ // rol
+ 0x62, 0xfc, 0x5c, 0x10, 0xd2, 0xc0, 0x62, 0xfc,
+ 0x5d, 0x10, 0xd3, 0xc0, 0x62, 0xfc, 0x5c, 0x10, 0xd3, 0xc0, 0x62, 0xfc, 0xdc, 0x10, 0xd3, 0xc0,
+ 0x62, 0xfc, 0x5c, 0x10, 0xd2, 0x00, 0x62, 0xfc, 0x5d, 0x10, 0xd3, 0x00, 0x62, 0xfc, 0x5c, 0x10,
+ 0xd3, 0x00, 0x62, 0xfc, 0xdc, 0x10, 0xd3, 0x00, 0x62, 0xfc, 0x5c, 0x10, 0xc0, 0xc0, 0x02, 0x62,
+ 0xfc, 0x5d, 0x10, 0xc1, 0xc0, 0x04, 0x62, 0xfc, 0x5c, 0x10, 0xc1, 0xc0, 0x06, 0x62, 0xfc, 0xdc,
+ 0x10, 0xc1, 0xc0, 0x08, 0x62, 0xfc, 0x5c, 0x10, 0xc0, 0x00, 0x02, 0x62, 0xfc, 0x5d, 0x10, 0xc1,
+ 0x00, 0x04, 0x62, 0xfc, 0x5c, 0x10, 0xc1, 0x00, 0x06, 0x62, 0xfc, 0xdc, 0x10, 0xc1, 0x00, 0x08,
+ // shl
+ 0x62, 0xfc, 0x5c, 0x10, 0xd2, 0xe0, 0x62, 0xfc, 0x5d, 0x10, 0xd3, 0xe0, 0x62, 0xfc, 0x5c, 0x10,
+ 0xd3, 0xe0, 0x62, 0xfc, 0xdc, 0x10, 0xd3, 0xe0, 0x62, 0xfc, 0x5c, 0x10, 0xd2, 0x20, 0x62, 0xfc,
+ 0x5d, 0x10, 0xd3, 0x20, 0x62, 0xfc, 0x5c, 0x10, 0xd3, 0x20, 0x62, 0xfc, 0xdc, 0x10, 0xd3, 0x20,
+ 0x62, 0xfc, 0x5c, 0x10, 0xc0, 0xe0, 0x02, 0x62, 0xfc, 0x5d, 0x10, 0xc1, 0xe0, 0x04, 0x62, 0xfc,
+ 0x5c, 0x10, 0xc1, 0xe0, 0x06, 0x62, 0xfc, 0xdc, 0x10, 0xc1, 0xe0, 0x08, 0x62, 0xfc, 0x5c, 0x10,
+ 0xc0, 0x20, 0x02, 0x62, 0xfc, 0x5d, 0x10, 0xc1, 0x20, 0x04, 0x62, 0xfc, 0x5c, 0x10, 0xc1, 0x20,
+ 0x06, 0x62, 0xfc, 0xdc, 0x10, 0xc1, 0x20, 0x08,
+ // shr
+ 0x62, 0xfc, 0x5c, 0x10, 0xd2, 0xe8, 0x62, 0xfc,
+ 0x5d, 0x10, 0xd3, 0xe8, 0x62, 0xfc, 0x5c, 0x10, 0xd3, 0xe8, 0x62, 0xfc, 0xdc, 0x10, 0xd3, 0xe8,
+ 0x62, 0xfc, 0x5c, 0x10, 0xd2, 0x28, 0x62, 0xfc, 0x5d, 0x10, 0xd3, 0x28, 0x62, 0xfc, 0x5c, 0x10,
+ 0xd3, 0x28, 0x62, 0xfc, 0xdc, 0x10, 0xd3, 0x28, 0x62, 0xfc, 0x5c, 0x10, 0xc0, 0xe8, 0x02, 0x62,
+ 0xfc, 0x5d, 0x10, 0xc1, 0xe8, 0x04, 0x62, 0xfc, 0x5c, 0x10, 0xc1, 0xe8, 0x06, 0x62, 0xfc, 0xdc,
+ 0x10, 0xc1, 0xe8, 0x08, 0x62, 0xfc, 0x5c, 0x10, 0xc0, 0x28, 0x02, 0x62, 0xfc, 0x5d, 0x10, 0xc1,
+ 0x28, 0x04, 0x62, 0xfc, 0x5c, 0x10, 0xc1, 0x28, 0x06, 0x62, 0xfc, 0xdc, 0x10, 0xc1, 0x28, 0x08,
+ // sar
+ 0x62, 0xfc, 0x5c, 0x10, 0xd2, 0xf8, 0x62, 0xfc, 0x5d, 0x10, 0xd3, 0xf8, 0x62, 0xfc, 0x5c, 0x10,
+ 0xd3, 0xf8, 0x62, 0xfc, 0xdc, 0x10, 0xd3, 0xf8, 0x62, 0xfc, 0x5c, 0x10, 0xd2, 0x38, 0x62, 0xfc,
+ 0x5d, 0x10, 0xd3, 0x38, 0x62, 0xfc, 0x5c, 0x10, 0xd3, 0x38, 0x62, 0xfc, 0xdc, 0x10, 0xd3, 0x38,
+ 0x62, 0xfc, 0x5c, 0x10, 0xc0, 0xf8, 0x02, 0x62, 0xfc, 0x5d, 0x10, 0xc1, 0xf8, 0x04, 0x62, 0xfc,
+ 0x5c, 0x10, 0xc1, 0xf8, 0x06, 0x62, 0xfc, 0xdc, 0x10, 0xc1, 0xf8, 0x08, 0x62, 0xfc, 0x5c, 0x10,
+ 0xc0, 0x38, 0x02, 0x62, 0xfc, 0x5d, 0x10, 0xc1, 0x38, 0x04, 0x62, 0xfc, 0x5c, 0x10, 0xc1, 0x38,
+ 0x06, 0x62, 0xfc, 0xdc, 0x10, 0xc1, 0x38, 0x08,
+ };
+ const size_t n = sizeof(tbl);
+ CYBOZU_TEST_EQUAL(c.getSize(), n);
+ CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
+}
+
+CYBOZU_TEST_AUTO(push2_pop2)
+{
+ struct Code : Xbyak::CodeGenerator {
+ Code()
+ {
+ push2(r20, r30);
+ push2(rax, rcx);
+ push2p(r20, r30);
+ push2p(rdx, r8);
+
+ pop2(rax, rcx);
+ pop2(r20, r30);
+ pop2p(rax, rcx);
+ pop2p(r20, r30);
+ }
+ } c;
+ const uint8_t tbl[] = {
+ // push2
+ 0x62, 0xdc, 0x5c, 0x10, 0xff, 0xf6,
+ 0x62, 0xf4, 0x7c, 0x18, 0xff, 0xf1,
+ // push2p (What is this?)
+ 0x62, 0xdc, 0xdc, 0x10, 0xff, 0xf6,
+ 0x62, 0xd4, 0xec, 0x18, 0xff, 0xf0,
+ // pop2
+ 0x62, 0xf4, 0x7c, 0x18, 0x8f, 0xc1,
+ 0x62, 0xdc, 0x5c, 0x10, 0x8f, 0xc6,
+ // pop2p
+ 0x62, 0xf4, 0xfc, 0x18, 0x8f, 0xc1,
+ 0x62, 0xdc, 0xdc, 0x10, 0x8f, 0xc6,
+ };
+ const size_t n = sizeof(tbl);
+ CYBOZU_TEST_EQUAL(c.getSize(), n);
+ CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
+}
+
+CYBOZU_TEST_AUTO(ccmp)
+{
+ struct Code : Xbyak::CodeGenerator {
+ Code()
+ {
+ ccmpb(rax, rbx, 0);
+ ccmpb(r30b, r31b, 1);
+ ccmpb(r30w, r31w, 2);
+ ccmpb(r30d, r31d, 3);
+ ccmpb(r30, r31, 4);
+ ccmpb(ptr [r30], r31b, 5);
+ ccmpb(ptr [r30], r31w, 6);
+ ccmpb(ptr [r30], r31d, 7);
+ ccmpb(ptr [r30], r31, 8);
+ ccmpb(r31b, ptr [r30], 9);
+ ccmpb(r31w, ptr [r30], 10);
+ ccmpb(r31d, ptr [r30], 11);
+ ccmpb(r31, ptr [r30], 12);
+
+ ccmpb(r20b, 0x12, 9);
+ ccmpb(r20w, 0x1234, 9);
+ ccmpb(r20d, 0x12345678, 9);
+ ccmpb(r20, 0x12345678, 9);
+ ccmpb(byte [r20], 0x12, 9);
+ ccmpb(word [r20], 0x1234, 9);
+ ccmpb(dword [r20], 0x12345678, 9);
+ ccmpb(qword [r20], 0x12345678, 9);
+
+ ccmpo(rax, rcx, 0);
+ ccmpno(rax, rcx, 1);
+ ccmpb(rax, rcx, 2);
+ ccmpnb(rax, rcx, 3);
+ ccmpz(rax, rcx, 4);
+ ccmpnz(rax, rcx, 5);
+ ccmpbe(rax, rcx, 6);
+ ccmpnbe(rax, rcx, 7);
+ ccmps(rax, rcx, 8);
+ ccmpns(rax, rcx, 9);
+ ccmpt(rax, rcx, 10);
+ ccmpf(rax, rcx, 11);
+ ccmpl(rax, rcx, 12);
+ ccmpnl(rax, rcx, 13);
+ ccmple(rax, rcx, 14);
+ ccmpnle(rax, rcx, 15);
+ }
+ } c;
+ const uint8_t tbl[] = {
+ // ccmpb
+ 0x62, 0xf4, 0x84, 0x02, 0x39, 0xd8, 0x62, 0x4c, 0x0c, 0x02, 0x38, 0xfe, 0x62, 0x4c, 0x15, 0x02,
+ 0x39, 0xfe, 0x62, 0x4c, 0x1c, 0x02, 0x39, 0xfe, 0x62, 0x4c, 0xa4, 0x02, 0x39, 0xfe, 0x62, 0x4c,
+ 0x2c, 0x02, 0x38, 0x3e, 0x62, 0x4c, 0x35, 0x02, 0x39, 0x3e, 0x62, 0x4c, 0x3c, 0x02, 0x39, 0x3e,
+ 0x62, 0x4c, 0xc4, 0x02, 0x39, 0x3e, 0x62, 0x4c, 0x4c, 0x02, 0x3a, 0x3e, 0x62, 0x4c, 0x55, 0x02,
+ 0x3b, 0x3e, 0x62, 0x4c, 0x5c, 0x02, 0x3b, 0x3e, 0x62, 0x4c, 0xe4, 0x02, 0x3b, 0x3e,
+ // ccmpb imm
+ 0x62, 0x7c, 0x4c, 0x02, 0x80, 0xfc, 0x12, 0x62, 0x7c, 0x4d, 0x02, 0x81, 0xfc, 0x34, 0x12, 0x62,
+ 0x7c, 0x4c, 0x02, 0x81, 0xfc, 0x78, 0x56, 0x34, 0x12, 0x62, 0x7c, 0xcc, 0x02, 0x81, 0xfc, 0x78,
+ 0x56, 0x34, 0x12, 0x62, 0x7c, 0x4c, 0x02, 0x80, 0x3c, 0x24, 0x12, 0x62, 0x7c, 0x4d, 0x02, 0x81,
+ 0x3c, 0x24, 0x34, 0x12, 0x62, 0x7c, 0x4c, 0x02, 0x81, 0x3c, 0x24, 0x78, 0x56, 0x34, 0x12, 0x62,
+ 0x7c, 0xcc, 0x02, 0x81, 0x3c, 0x24, 0x78, 0x56, 0x34, 0x12,
+ // all
+ 0x62, 0xf4, 0x84, 0x00, 0x39, 0xc8, 0x62, 0xf4, 0x8c, 0x01, 0x39, 0xc8, 0x62, 0xf4, 0x94, 0x02,
+ 0x39, 0xc8, 0x62, 0xf4, 0x9c, 0x03, 0x39, 0xc8, 0x62, 0xf4, 0xa4, 0x04, 0x39, 0xc8, 0x62, 0xf4,
+ 0xac, 0x05, 0x39, 0xc8, 0x62, 0xf4, 0xb4, 0x06, 0x39, 0xc8, 0x62, 0xf4, 0xbc, 0x07, 0x39, 0xc8,
+ 0x62, 0xf4, 0xc4, 0x08, 0x39, 0xc8, 0x62, 0xf4, 0xcc, 0x09, 0x39, 0xc8, 0x62, 0xf4, 0xd4, 0x0a,
+ 0x39, 0xc8, 0x62, 0xf4, 0xdc, 0x0b, 0x39, 0xc8, 0x62, 0xf4, 0xe4, 0x0c, 0x39, 0xc8, 0x62, 0xf4,
+ 0xec, 0x0d, 0x39, 0xc8, 0x62, 0xf4, 0xf4, 0x0e, 0x39, 0xc8, 0x62, 0xf4, 0xfc, 0x0f, 0x39, 0xc8,
+ };
+ const size_t n = sizeof(tbl);
+ CYBOZU_TEST_EQUAL(c.getSize(), n);
+ CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
+}
+
+CYBOZU_TEST_AUTO(ctestb)
+{
+ struct Code : Xbyak::CodeGenerator {
+ Code()
+ {
+ ctestb(r30b, r31b, 0);
+ ctestb(r30w, r31w, 1);
+ ctestb(r30d, r31d, 2);
+ ctestb(r30, r31, 3);
+
+ ctestb(ptr [r30], r31b, 4);
+ ctestb(ptr [r30], r31w, 5);
+ ctestb(ptr [r30], r31d, 6);
+ ctestb(ptr [r30], r31, 7);
+
+ ctestb(r30b, 0x12, 8);
+ ctestb(r30w, 0x1234, 9);
+ ctestb(r30d, 0x12345678, 10);
+ ctestb(r30, 0x12345678, 11);
+
+ ctestb(byte [r30], 0x12, 12);
+ ctestb(word [r30], 0x1234, 13);
+ ctestb(dword [r30], 0x12345678, 14);
+ ctestb(qword [r30], 0x12345678, 15);
+
+ // all
+ ctesto(rax, rcx, 0);
+ ctestno(rax, rcx, 1);
+ ctestb(rax, rcx, 2);
+ ctestnb(rax, rcx, 3);
+ ctestz(rax, rcx, 4);
+ ctestnz(rax, rcx, 5);
+ ctestbe(rax, rcx, 6);
+ ctestnbe(rax, rcx, 7);
+ ctests(rax, rcx, 8);
+ ctestns(rax, rcx, 9);
+ ctestt(rax, rcx, 10);
+ ctestf(rax, rcx, 11);
+ ctestl(rax, rcx, 12);
+ ctestnl(rax, rcx, 13);
+ ctestle(rax, rcx, 14);
+ ctestnle(rax, rcx, 15);
+ }
+ } c;
+ const uint8_t tbl[] = {
+ // ctestb
+ 0x62, 0x4c, 0x04, 0x02, 0x84, 0xfe, 0x62, 0x4c, 0x0d, 0x02, 0x85, 0xfe, 0x62, 0x4c, 0x14, 0x02,
+ 0x85, 0xfe, 0x62, 0x4c, 0x9c, 0x02, 0x85, 0xfe, 0x62, 0x4c, 0x24, 0x02, 0x84, 0x3e, 0x62, 0x4c,
+ 0x2d, 0x02, 0x85, 0x3e, 0x62, 0x4c, 0x34, 0x02, 0x85, 0x3e, 0x62, 0x4c, 0xbc, 0x02, 0x85, 0x3e,
+ 0x62, 0xdc, 0x44, 0x02, 0xf6, 0xc6, 0x12, 0x62, 0xdc, 0x4d, 0x02, 0xf7, 0xc6, 0x34, 0x12, 0x62,
+ 0xdc, 0x54, 0x02, 0xf7, 0xc6, 0x78, 0x56, 0x34, 0x12, 0x62, 0xdc, 0xdc, 0x02, 0xf7, 0xc6, 0x78,
+ 0x56, 0x34, 0x12, 0x62, 0xdc, 0x64, 0x02, 0xf6, 0x06, 0x12, 0x62, 0xdc, 0x6d, 0x02, 0xf7, 0x06,
+ 0x34, 0x12, 0x62, 0xdc, 0x74, 0x02, 0xf7, 0x06, 0x78, 0x56, 0x34, 0x12, 0x62, 0xdc, 0xfc, 0x02,
+ 0xf7, 0x06, 0x78, 0x56, 0x34, 0x12,
+ // all
+ 0x62, 0xf4, 0x84, 0x00, 0x85, 0xc8, 0x62, 0xf4, 0x8c, 0x01, 0x85, 0xc8, 0x62, 0xf4, 0x94, 0x02,
+ 0x85, 0xc8, 0x62, 0xf4, 0x9c, 0x03, 0x85, 0xc8, 0x62, 0xf4, 0xa4, 0x04, 0x85, 0xc8, 0x62, 0xf4,
+ 0xac, 0x05, 0x85, 0xc8, 0x62, 0xf4, 0xb4, 0x06, 0x85, 0xc8, 0x62, 0xf4, 0xbc, 0x07, 0x85, 0xc8,
+ 0x62, 0xf4, 0xc4, 0x08, 0x85, 0xc8, 0x62, 0xf4, 0xcc, 0x09, 0x85, 0xc8, 0x62, 0xf4, 0xd4, 0x0a,
+ 0x85, 0xc8, 0x62, 0xf4, 0xdc, 0x0b, 0x85, 0xc8, 0x62, 0xf4, 0xe4, 0x0c, 0x85, 0xc8, 0x62, 0xf4,
+ 0xec, 0x0d, 0x85, 0xc8, 0x62, 0xf4, 0xf4, 0x0e, 0x85, 0xc8, 0x62, 0xf4, 0xfc, 0x0f, 0x85, 0xc8,
+ };
+ const size_t n = sizeof(tbl);
+ CYBOZU_TEST_EQUAL(c.getSize(), n);
+ CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
+}
+
+CYBOZU_TEST_AUTO(cfcmov)
+{
+ struct Code : Xbyak::CodeGenerator {
+ Code()
+ {
+ cfcmovb(r30w, r31w);
+ cfcmovb(r30d, r31d);
+ cfcmovb(r30, r31);
+ cfcmovb(ptr [r8+r20*4+0x3], r19w);
+ cfcmovb(ptr [r8+r20*4+0x3], r19d);
+ cfcmovb(ptr [r8+r20*4+0x3], r19);
+ cfcmovb(r30w, ptr [r9]);
+ cfcmovb(r30d, ptr [r9]);
+ cfcmovb(r30, ptr [r9]);
+ cfcmovb(r20w, r30w, r31w);
+ cfcmovb(r20d, r30d, r31d);
+ cfcmovb(r20, r30, r31);
+ cfcmovb(r20w, r30w, ptr [r9]);
+ cfcmovb(r20d, r30d, ptr [r9]);
+ cfcmovb(r20, r30, ptr [r9]);
+
+ // all
+ cfcmovo(r20, r21, r22);
+ cfcmovo(r20, r21, ptr [r22]);
+ cfcmovno(r20, r21, r22);
+ cfcmovno(r20, r21, ptr [r22]);
+ cfcmovb(r20, r21, r22);
+ cfcmovb(r20, r21, ptr [r22]);
+ cfcmovnb(r20, r21, r22);
+ cfcmovnb(r20, r21, ptr [r22]);
+ cfcmovz(r20, r21, r22);
+ cfcmovz(r20, r21, ptr [r22]);
+ cfcmovnz(r20, r21, r22);
+ cfcmovnz(r20, r21, ptr [r22]);
+ cfcmovbe(r20, r21, r22);
+ cfcmovbe(r20, r21, ptr [r22]);
+ cfcmovnbe(r20, r21, r22);
+ cfcmovnbe(r20, r21, ptr [r22]);
+ cfcmovs(r20, r21, r22);
+ cfcmovs(r20, r21, ptr [r22]);
+ cfcmovns(r20, r21, r22);
+ cfcmovns(r20, r21, ptr [r22]);
+ cfcmovp(r20, r21, r22);
+ cfcmovp(r20, r21, ptr [r22]);
+ cfcmovnp(r20, r21, r22);
+ cfcmovnp(r20, r21, ptr [r22]);
+ cfcmovl(r20, r21, r22);
+ cfcmovl(r20, r21, ptr [r22]);
+ cfcmovnl(r20, r21, r22);
+ cfcmovnl(r20, r21, ptr [r22]);
+ cfcmovle(r20, r21, r22);
+ cfcmovle(r20, r21, ptr [r22]);
+ cfcmovnle(r20, r21, r22);
+ cfcmovnle(r20, r21, ptr [r22]);
+
+ }
+ } c;
+ const uint8_t tbl[] = {
+ 0x62, 0x4c, 0x7d, 0x0c, 0x42, 0xfe, 0x62, 0x4c, 0x7c, 0x0c, 0x42, 0xfe, 0x62, 0x4c, 0xfc, 0x0c,
+ 0x42, 0xfe, 0x62, 0xc4, 0x79, 0x0c, 0x42, 0x5c, 0xa0, 0x03, 0x62, 0xc4, 0x78, 0x0c, 0x42, 0x5c,
+ 0xa0, 0x03, 0x62, 0xc4, 0xf8, 0x0c, 0x42, 0x5c, 0xa0, 0x03, 0x62, 0x44, 0x7d, 0x08, 0x42, 0x31,
+ 0x62, 0x44, 0x7c, 0x08, 0x42, 0x31, 0x62, 0x44, 0xfc, 0x08, 0x42, 0x31, 0x62, 0x4c, 0x5d, 0x14,
+ 0x42, 0xf7, 0x62, 0x4c, 0x5c, 0x14, 0x42, 0xf7, 0x62, 0x4c, 0xdc, 0x14, 0x42, 0xf7, 0x62, 0x44,
+ 0x5d, 0x14, 0x42, 0x31, 0x62, 0x44, 0x5c, 0x14, 0x42, 0x31, 0x62, 0x44, 0xdc, 0x14, 0x42, 0x31,
+ // all
+ 0x62, 0xec, 0xdc, 0x14, 0x40, 0xee, 0x62, 0xec, 0xdc, 0x14, 0x40, 0x2e, 0x62, 0xec, 0xdc, 0x14,
+ 0x41, 0xee, 0x62, 0xec, 0xdc, 0x14, 0x41, 0x2e, 0x62, 0xec, 0xdc, 0x14, 0x42, 0xee, 0x62, 0xec,
+ 0xdc, 0x14, 0x42, 0x2e, 0x62, 0xec, 0xdc, 0x14, 0x43, 0xee, 0x62, 0xec, 0xdc, 0x14, 0x43, 0x2e,
+ 0x62, 0xec, 0xdc, 0x14, 0x44, 0xee, 0x62, 0xec, 0xdc, 0x14, 0x44, 0x2e, 0x62, 0xec, 0xdc, 0x14,
+ 0x45, 0xee, 0x62, 0xec, 0xdc, 0x14, 0x45, 0x2e, 0x62, 0xec, 0xdc, 0x14, 0x46, 0xee, 0x62, 0xec,
+ 0xdc, 0x14, 0x46, 0x2e, 0x62, 0xec, 0xdc, 0x14, 0x47, 0xee, 0x62, 0xec, 0xdc, 0x14, 0x47, 0x2e,
+ 0x62, 0xec, 0xdc, 0x14, 0x48, 0xee, 0x62, 0xec, 0xdc, 0x14, 0x48, 0x2e, 0x62, 0xec, 0xdc, 0x14,
+ 0x49, 0xee, 0x62, 0xec, 0xdc, 0x14, 0x49, 0x2e, 0x62, 0xec, 0xdc, 0x14, 0x4a, 0xee, 0x62, 0xec,
+ 0xdc, 0x14, 0x4a, 0x2e, 0x62, 0xec, 0xdc, 0x14, 0x4b, 0xee, 0x62, 0xec, 0xdc, 0x14, 0x4b, 0x2e,
+ 0x62, 0xec, 0xdc, 0x14, 0x4c, 0xee, 0x62, 0xec, 0xdc, 0x14, 0x4c, 0x2e, 0x62, 0xec, 0xdc, 0x14,
+ 0x4d, 0xee, 0x62, 0xec, 0xdc, 0x14, 0x4d, 0x2e, 0x62, 0xec, 0xdc, 0x14, 0x4e, 0xee, 0x62, 0xec,
+ 0xdc, 0x14, 0x4e, 0x2e, 0x62, 0xec, 0xdc, 0x14, 0x4f, 0xee, 0x62, 0xec, 0xdc, 0x14, 0x4f, 0x2e,
+ };
+ const size_t n = sizeof(tbl);
+ CYBOZU_TEST_EQUAL(c.getSize(), n);
+ CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
+}
+
+CYBOZU_TEST_AUTO(evex_misc)
+{
+ struct Code : Xbyak::CodeGenerator {
+ Code()
+ {
+ vmovaps(xmm31, ptr [r30+r26*8+0x40]);
+ vaddps(zmm30, zmm21, ptr [r20+r30*1]);
+ vcvtsd2si(r30d, ptr [r17+r31*4]);
+
+ test(ptr[r30], r31);
+ test(byte[r30], 0x12);
+ call(r20);
+ call(ptr[r20]);
+ }
+ } c;
+ const uint8_t tbl[] = {
+ 0x62, 0x09, 0x78, 0x08, 0x28, 0x7c, 0xd6, 0x04,
+ 0x62, 0x29, 0x50, 0x40, 0x58, 0x34, 0x34, 0x62,
+ 0x29, 0x7b, 0x08, 0x2d, 0x34, 0xb9,
+
+ 0xd5, 0x5d, 0x85, 0x3e,
+ 0xd5, 0x11, 0xf6, 0x06, 0x12,
+ 0xd5, 0x10, 0xff, 0xd4,
+ 0xd5, 0x10, 0xff, 0x14, 0x24,
+ };
+ const size_t n = sizeof(tbl);
+ CYBOZU_TEST_EQUAL(c.getSize(), n);
+ CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
+}
+
+CYBOZU_TEST_AUTO(kmov)
+{
+ struct Code : Xbyak::CodeGenerator {
+ Code()
+ {
+ kmovb(k1, ptr [r20]);
+ kmovb(k2, r21d);
+ kmovb(ptr [r22], k3);
+ kmovb(r23d, k4);
+
+ kmovw(k1, ptr [r20]);
+ kmovw(k2, r21d);
+ kmovw(ptr [r22], k3);
+ kmovw(r23d, k4);
+
+ kmovd(k1, ptr [r20]);
+ kmovd(k2, r21d);
+ kmovd(ptr [r22], k3);
+ kmovd(r23d, k4);
+
+ kmovq(k1, ptr [r20]);
+ kmovq(k2, r21);
+ kmovq(ptr [r22], k3);
+ kmovq(r23, k4);
+ }
+ } c;
+ const uint8_t tbl[] = {
+ 0x62, 0xf9, 0x7d, 0x08, 0x90, 0x0c, 0x24, 0x62, 0xf9, 0x7d, 0x08, 0x92, 0xd5, 0x62, 0xf9, 0x7d,
+ 0x08, 0x91, 0x1e, 0x62, 0xe1, 0x7d, 0x08, 0x93, 0xfc, 0x62, 0xf9, 0x7c, 0x08, 0x90, 0x0c, 0x24,
+ 0x62, 0xf9, 0x7c, 0x08, 0x92, 0xd5, 0x62, 0xf9, 0x7c, 0x08, 0x91, 0x1e, 0x62, 0xe1, 0x7c, 0x08,
+ 0x93, 0xfc, 0x62, 0xf9, 0xfd, 0x08, 0x90, 0x0c, 0x24, 0x62, 0xf9, 0x7f, 0x08, 0x92, 0xd5, 0x62,
+ 0xf9, 0xfd, 0x08, 0x91, 0x1e, 0x62, 0xe1, 0x7f, 0x08, 0x93, 0xfc, 0x62, 0xf9, 0xfc, 0x08, 0x90,
+ 0x0c, 0x24, 0x62, 0xf9, 0xff, 0x08, 0x92, 0xd5, 0x62, 0xf9, 0xfc, 0x08, 0x91, 0x1e, 0x62, 0xe1,
+ 0xff, 0x08, 0x93, 0xfc,
+ };
+ const size_t n = sizeof(tbl);
+ CYBOZU_TEST_EQUAL(c.getSize(), n);
+ CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
+}
+
+CYBOZU_TEST_AUTO(amx)
+{
+ struct Code : Xbyak::CodeGenerator {
+ Code()
+ {
+ ldtilecfg(ptr [r30+r29*4+0x12]);
+ sttilecfg(ptr [r30+r29*4+0x12]);
+ tileloadd(tmm1, ptr [r30+r29*4+0x12]);
+ tileloaddt1(tmm3, ptr [r30+r29*4+0x12]);
+ tilestored(ptr [r30+r29*4+0x12], tmm5);
+ }
+ } c;
+ const uint8_t tbl[] = {
+ 0x62, 0x9a, 0x78, 0x08, 0x49, 0x44, 0xae, 0x12, 0x62, 0x9a, 0x79, 0x08, 0x49, 0x44, 0xae, 0x12,
+ 0x62, 0x9a, 0x7b, 0x08, 0x4b, 0x4c, 0xae, 0x12, 0x62, 0x9a, 0x79, 0x08, 0x4b, 0x5c, 0xae, 0x12,
+ 0x62, 0x9a, 0x7a, 0x08, 0x4b, 0x6c, 0xae, 0x12,
+ };
+ const size_t n = sizeof(tbl);
+ CYBOZU_TEST_EQUAL(c.getSize(), n);
+ CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
+}
diff --git a/test/cvt_test.cpp b/test/cvt_test.cpp
index ba1917e..e5e9748 100644
--- a/test/cvt_test.cpp
+++ b/test/cvt_test.cpp
@@ -109,8 +109,8 @@ CYBOZU_TEST_AUTO(changeBit)
{ &dil, &di, &edi, &rdi, &xmm7, &ymm7, &zmm7 },
{ &r8b, &r8w, &r8d, &r8, &xmm8, &ymm8, &zmm8 },
{ &r15b, &r15w, &r15d, &r15, &xmm15, &ymm15, &zmm15 },
- { 0, 0, 0, 0, &xmm16, &ymm16, &zmm16 },
- { 0, 0, 0, 0, &xmm31, &ymm31, &zmm31 },
+ { &r16b, &r16w, &r16d, &r16, &xmm16, &ymm16, &zmm16 },
+ { &r31b, &r31w, &r31d, &r31, &xmm31, &ymm31, &zmm31 },
};
const int bitTbl[N] = { 8, 16, 32, 64, 128, 256, 512 };
#else
diff --git a/test/test_all.bat b/test/test_all.bat
index d6c244d..0bcb787 100644
--- a/test/test_all.bat
+++ b/test/test_all.bat
@@ -5,4 +5,10 @@ call test_address
call test_address 64
echo *** test jmp address ***
call test_jmp
+echo *** test misc ***
+set FILE=misc
+call test_misc
+echo *** test APX ***
+set FILE=apx
+call test_misc
echo *** all test end ***
diff --git a/test/test_misc.bat b/test/test_misc.bat
index cdb81d3..38cc97b 100644
--- a/test/test_misc.bat
+++ b/test/test_misc.bat
@@ -1,4 +1,4 @@
call set_opt
bmake -f Makefile.win all
-cl -I../ -I./ -DXBYAK_TEST misc.cpp %OPT% /Od /Zi
-misc
+cl -I../ -I./ -DXBYAK_TEST %FILE%.cpp %OPT% /Od /Zi
+%FILE%
diff --git a/xbyak/xbyak.h b/xbyak/xbyak.h
index 01fddf7..181d6ed 100644
--- a/xbyak/xbyak.h
+++ b/xbyak/xbyak.h
@@ -227,6 +227,10 @@ enum {
ERR_X2APIC_IS_NOT_SUPPORTED,
ERR_NOT_SUPPORTED,
ERR_SAME_REGS_ARE_INVALID,
+ ERR_INVALID_NF,
+ ERR_INVALID_ZU,
+ ERR_CANT_USE_REX2,
+ ERR_INVALID_DFV,
ERR_INTERNAL // Put it at last.
};
@@ -280,6 +284,10 @@ inline const char *ConvertErrorToString(int err)
"x2APIC is not supported",
"not supported",
"same regs are invalid",
+ "invalid NF",
+ "invalid ZU",
+ "can't use rex2",
+ "invalid dfv",
"internal error"
};
assert(ERR_INTERNAL + 1 == sizeof(errTbl) / sizeof(*errTbl));
@@ -386,13 +394,15 @@ inline size_t getPageSize()
#ifdef _WIN32
static const SystemInfo si;
return si.info.dwPageSize;
-#elif defined(__GNUC__)
+#else
+#ifdef __GNUC__
static const long pageSize = sysconf(_SC_PAGESIZE);
if (pageSize > 0) {
return (size_t)pageSize;
}
#endif
return 4096;
+#endif
}
inline bool IsInDisp8(uint32_t x) { return 0xFFFFFF80 <= x || x <= 0x7F; }
@@ -523,6 +533,15 @@ typedef Allocator MmapAllocator;
class Address;
class Reg;
+struct ApxFlagNF {};
+struct ApxFlagZU {};
+
+// dfv (default flags value) is or operation of these flags
+static const int T_of = 8;
+static const int T_sf = 4;
+static const int T_zf = 2;
+static const int T_cf = 1;
+
class Operand {
static const uint8_t EXT8BIT = 0x20;
unsigned int idx_:6; // 0..31 + EXT8BIT = 1 if spl/bpl/sil/dil
@@ -532,6 +551,8 @@ protected:
unsigned int zero_:1;
unsigned int mask_:3;
unsigned int rounding_:3;
+ unsigned int NF_:1;
+ unsigned int ZU_:1; // ND=ZU
void setIdx(int idx) { idx_ = idx; }
public:
enum Kind {
@@ -550,31 +571,37 @@ public:
enum Code {
#ifdef XBYAK64
RAX = 0, RCX, RDX, RBX, RSP, RBP, RSI, RDI, R8, R9, R10, R11, R12, R13, R14, R15,
+ R16, R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31,
R8D = 8, R9D, R10D, R11D, R12D, R13D, R14D, R15D,
+ R16D, R17D, R18D, R19D, R20D, R21D, R22D, R23D, R24D, R25D, R26D, R27D, R28D, R29D, R30D, R31D,
R8W = 8, R9W, R10W, R11W, R12W, R13W, R14W, R15W,
+ R16W, R17W, R18W, R19W, R20W, R21W, R22W, R23W, R24W, R25W, R26W, R27W, R28W, R29W, R30W, R31W,
R8B = 8, R9B, R10B, R11B, R12B, R13B, R14B, R15B,
+ R16B, R17B, R18B, R19B, R20B, R21B, R22B, R23B, R24B, R25B, R26B, R27B, R28B, R29B, R30B, R31B,
SPL = 4, BPL, SIL, DIL,
#endif
EAX = 0, ECX, EDX, EBX, ESP, EBP, ESI, EDI,
AX = 0, CX, DX, BX, SP, BP, SI, DI,
AL = 0, CL, DL, BL, AH, CH, DH, BH
};
- XBYAK_CONSTEXPR Operand() : idx_(0), kind_(0), bit_(0), zero_(0), mask_(0), rounding_(0) { }
+ XBYAK_CONSTEXPR Operand() : idx_(0), kind_(0), bit_(0), zero_(0), mask_(0), rounding_(0), NF_(0), ZU_(0) { }
XBYAK_CONSTEXPR Operand(int idx, Kind kind, int bit, bool ext8bit = 0)
: idx_(static_cast<uint8_t>(idx | (ext8bit ? EXT8BIT : 0)))
, kind_(kind)
, bit_(bit)
- , zero_(0), mask_(0), rounding_(0)
+ , zero_(0), mask_(0), rounding_(0), NF_(0), ZU_(0)
{
assert((bit_ & (bit_ - 1)) == 0); // bit must be power of two
}
XBYAK_CONSTEXPR Kind getKind() const { return static_cast<Kind>(kind_); }
XBYAK_CONSTEXPR int getIdx() const { return idx_ & (EXT8BIT - 1); }
+ XBYAK_CONSTEXPR bool hasIdxBit(int bit) const { return idx_ & (1<<bit); }
XBYAK_CONSTEXPR bool isNone() const { return kind_ == 0; }
XBYAK_CONSTEXPR bool isMMX() const { return is(MMX); }
XBYAK_CONSTEXPR bool isXMM() const { return is(XMM); }
XBYAK_CONSTEXPR bool isYMM() const { return is(YMM); }
XBYAK_CONSTEXPR bool isZMM() const { return is(ZMM); }
+ XBYAK_CONSTEXPR bool isSIMD() const { return is(XMM|YMM|ZMM); }
XBYAK_CONSTEXPR bool isTMM() const { return is(TMM); }
XBYAK_CONSTEXPR bool isXMEM() const { return is(XMM | MEM); }
XBYAK_CONSTEXPR bool isYMEM() const { return is(YMM | MEM); }
@@ -589,6 +616,9 @@ public:
XBYAK_CONSTEXPR bool isExtIdx2() const { return (getIdx() & 16) != 0; }
XBYAK_CONSTEXPR bool hasEvex() const { return isZMM() || isExtIdx2() || getOpmaskIdx() || getRounding(); }
XBYAK_CONSTEXPR bool hasRex() const { return isExt8bit() || isREG(64) || isExtIdx(); }
+ XBYAK_CONSTEXPR bool hasRex2() const;
+ XBYAK_CONSTEXPR bool hasRex2NF() const { return hasRex2() || NF_; }
+ XBYAK_CONSTEXPR bool hasRex2NFZU() const { return hasRex2() || NF_ || ZU_; }
XBYAK_CONSTEXPR bool hasZero() const { return zero_; }
XBYAK_CONSTEXPR int getOpmaskIdx() const { return mask_; }
XBYAK_CONSTEXPR int getRounding() const { return rounding_; }
@@ -611,6 +641,10 @@ public:
rounding_ = idx;
}
void setZero() { zero_ = true; }
+ void setNF() { NF_ = true; }
+ int getNF() const { return NF_; }
+ void setZU() { ZU_ = true; }
+ int getZU() const { return ZU_; }
// ah, ch, dh, bh?
bool isHigh8bit() const
{
@@ -634,11 +668,19 @@ public:
static const char *tbl[4] = { "spl", "bpl", "sil", "dil" };
return tbl[idx - 4];
}
- static const char *tbl[4][16] = {
- { "al", "cl", "dl", "bl", "ah", "ch", "dh", "bh", "r8b", "r9b", "r10b", "r11b", "r12b", "r13b", "r14b", "r15b" },
- { "ax", "cx", "dx", "bx", "sp", "bp", "si", "di", "r8w", "r9w", "r10w", "r11w", "r12w", "r13w", "r14w", "r15w" },
- { "eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi", "r8d", "r9d", "r10d", "r11d", "r12d", "r13d", "r14d", "r15d" },
- { "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15" },
+ static const char *tbl[4][32] = {
+ { "al", "cl", "dl", "bl", "ah", "ch", "dh", "bh", "r8b", "r9b", "r10b", "r11b", "r12b", "r13b", "r14b", "r15b",
+ "r16b", "r17b", "r18b", "r19b", "r20b", "r21b", "r22b", "r23b", "r24b", "r25b", "r26b", "r27b", "r28b", "r29b", "r30b", "r31b",
+ },
+ { "ax", "cx", "dx", "bx", "sp", "bp", "si", "di", "r8w", "r9w", "r10w", "r11w", "r12w", "r13w", "r14w", "r15w",
+ "r16w", "r17w", "r18w", "r19w", "r20w", "r21w", "r22w", "r23w", "r24w", "r25w", "r26w", "r27w", "r28w", "r29w", "r30w", "r31w",
+ },
+ { "eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi", "r8d", "r9d", "r10d", "r11d", "r12d", "r13d", "r14d", "r15d",
+ "r16d", "r17d", "r18d", "r19d", "r20d", "r21d", "r22d", "r23d", "r24d", "r25d", "r26d", "r27d", "r28d", "r29d", "r30d", "r31d",
+ },
+ { "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
+ "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31",
+ },
};
return tbl[bit_ == 8 ? 0 : bit_ == 16 ? 1 : bit_ == 32 ? 2 : 3][idx];
} else if (isOPMASK()) {
@@ -701,17 +743,21 @@ inline void Operand::setBit(int bit)
Kind kind = REG;
switch (bit) {
case 8:
- if (idx >= 16) goto ERR;
#ifdef XBYAK32
if (idx >= 4) goto ERR;
#else
+ if (idx >= 32) goto ERR;
if (4 <= idx && idx < 8) idx |= EXT8BIT;
#endif
break;
case 16:
case 32:
case 64:
+#ifdef XBYAK32
if (idx >= 16) goto ERR;
+#else
+ if (idx >= 32) goto ERR;
+#endif
break;
case 128: kind = XMM; break;
case 256: kind = YMM; break;
@@ -744,22 +790,14 @@ public:
XBYAK_CONSTEXPR Reg(int idx, Kind kind, int bit = 0, bool ext8bit = false) : Operand(idx, kind, bit, ext8bit) { }
// convert to Reg8/Reg16/Reg32/Reg64/XMM/YMM/ZMM
Reg changeBit(int bit) const { Reg r(*this); r.setBit(bit); return r; }
- uint8_t getRexW() const { return isREG(64) ? 8 : 0; }
- uint8_t getRexR() const { return isExtIdx() ? 4 : 0; }
- uint8_t getRexX() const { return isExtIdx() ? 2 : 0; }
- uint8_t getRexB() const { return isExtIdx() ? 1 : 0; }
- uint8_t getRex(const Reg& base = Reg()) const
- {
- uint8_t rex = getRexW() | getRexR() | base.getRexW() | base.getRexB();
- if (rex || isExt8bit() || base.isExt8bit()) rex |= 0x40;
- return rex;
- }
Reg8 cvt8() const;
Reg16 cvt16() const;
Reg32 cvt32() const;
#ifdef XBYAK64
Reg64 cvt64() const;
#endif
+ Reg operator|(const ApxFlagNF&) const { Reg r(*this); r.setNF(); return r; }
+ Reg operator|(const ApxFlagZU&) const { Reg r(*this); r.setZU(); return r; }
};
inline const Reg& Operand::getReg() const
@@ -835,6 +873,8 @@ struct Fpu : public Reg {
struct Reg32e : public Reg {
explicit XBYAK_CONSTEXPR Reg32e(int idx, int bit) : Reg(idx, Operand::REG, bit) {}
+ Reg32e operator|(const ApxFlagNF&) const { Reg32e r(*this); r.setNF(); return r; }
+ Reg32e operator|(const ApxFlagZU&) const { Reg32e r(*this); r.setZU(); return r; }
};
struct Reg32 : public Reg32e {
explicit XBYAK_CONSTEXPR Reg32(int idx = 0) : Reg32e(idx, 32) {}
@@ -963,11 +1003,6 @@ public:
}
friend RegExp operator+(const RegExp& a, const RegExp& b);
friend RegExp operator-(const RegExp& e, size_t disp);
- uint8_t getRex() const
- {
- uint8_t rex = index_.getRexX() | base_.getRexB();
- return rex ? uint8_t(rex | 0x40) : 0;
- }
private:
/*
[base_ + index_ * scale_ + disp_]
@@ -1261,31 +1296,28 @@ public:
M_ripAddr
};
XBYAK_CONSTEXPR Address(uint32_t sizeBit, bool broadcast, const RegExp& e)
- : Operand(0, MEM, sizeBit), e_(e), label_(0), mode_(M_ModRM), broadcast_(broadcast)
+ : Operand(0, MEM, sizeBit), e_(e), label_(0), mode_(M_ModRM), broadcast_(broadcast), optimize_(true)
{
e_.verify();
}
#ifdef XBYAK64
explicit XBYAK_CONSTEXPR Address(size_t disp)
- : Operand(0, MEM, 64), e_(disp), label_(0), mode_(M_64bitDisp), broadcast_(false){ }
+ : Operand(0, MEM, 64), e_(disp), label_(0), mode_(M_64bitDisp), broadcast_(false), optimize_(true) { }
XBYAK_CONSTEXPR Address(uint32_t sizeBit, bool broadcast, const RegRip& addr)
- : Operand(0, MEM, sizeBit), e_(addr.disp_), label_(addr.label_), mode_(addr.isAddr_ ? M_ripAddr : M_rip), broadcast_(broadcast) { }
+ : Operand(0, MEM, sizeBit), e_(addr.disp_), label_(addr.label_), mode_(addr.isAddr_ ? M_ripAddr : M_rip), broadcast_(broadcast), optimize_(true) { }
#endif
- RegExp getRegExp(bool optimize = true) const
+ RegExp getRegExp() const
{
- return optimize ? e_.optimize() : e_;
+ return optimize_ ? e_.optimize() : e_;
}
+ Address cloneNoOptimize() const { Address addr = *this; addr.optimize_ = false; return addr; }
Mode getMode() const { return mode_; }
bool is32bit() const { return e_.getBase().getBit() == 32 || e_.getIndex().getBit() == 32; }
bool isOnlyDisp() const { return !e_.getBase().getBit() && !e_.getIndex().getBit(); } // for mov eax
size_t getDisp() const { return e_.getDisp(); }
- uint8_t getRex() const
- {
- if (mode_ != M_ModRM) return 0;
- return getRegExp().getRex();
- }
bool is64bitDisp() const { return mode_ == M_64bitDisp; } // for moffset
bool isBroadcast() const { return broadcast_; }
+ bool hasRex2() const { return e_.getBase().hasRex2() || e_.getIndex().hasRex2(); }
const Label* getLabel() const { return label_; }
bool operator==(const Address& rhs) const
{
@@ -1298,6 +1330,7 @@ private:
const Label* label_;
Mode mode_;
bool broadcast_;
+ bool optimize_;
};
inline const Address& Operand::getAddress() const
@@ -1312,6 +1345,11 @@ inline bool Operand::operator==(const Operand& rhs) const
return isEqualIfNotInherited(rhs);
}
+inline XBYAK_CONSTEXPR bool Operand::hasRex2() const
+{
+ return (isREG() && isExtIdx2()) || (isMEM() && static_cast<const Address&>(*this).hasRex2());
+}
+
class AddressFrame {
void operator=(const AddressFrame&);
AddressFrame(const AddressFrame&);
@@ -1683,72 +1721,120 @@ private:
// SSE instructions do not support XMM16 - XMM31
return !(op1.isXMM() && op1.getIdx() >= 16);
}
- void rex(const Operand& op1, const Operand& op2 = Operand())
+ static inline uint8_t rexRXB(int bit, int bit3, const Reg& r, const Reg& b, const Reg& x = Reg())
{
+ int v = bit3 ? 8 : 0;
+ if (r.hasIdxBit(bit)) v |= 4;
+ if (x.hasIdxBit(bit)) v |= 2;
+ if (b.hasIdxBit(bit)) v |= 1;
+ return uint8_t(v);
+ }
+ void rex2(int bit3, int rex4bit, const Reg& r, const Reg& b, const Reg& x = Reg())
+ {
+ db(0xD5);
+ db((rexRXB(4, bit3, r, b, x) << 4) | rex4bit);
+ }
+ void rex(const Operand& op1, const Operand& op2 = Operand(), uint64_t type = 0)
+ {
+ if (op1.getNF() | op2.getNF()) XBYAK_THROW(ERR_INVALID_NF)
+ if (op1.getZU() | op2.getZU()) XBYAK_THROW(ERR_INVALID_ZU)
uint8_t rex = 0;
const Operand *p1 = &op1, *p2 = &op2;
if (p1->isMEM()) std::swap(p1, p2);
if (p1->isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION)
+ // except movsx(16bit, 32/64bit)
+ bool p66 = (op1.isBit(16) && !op2.isBit(i32e)) || (op2.isBit(16) && !op1.isBit(i32e));
+ if ((type & T_66) || p66) db(0x66);
+ if (type & T_F2) {
+ db(0xF2);
+ }
+ if (type & T_F3) {
+ db(0xF3);
+ }
if (p2->isMEM()) {
+ const Reg& r = *static_cast<const Reg*>(p1);
const Address& addr = p2->getAddress();
+ const RegExp e = addr.getRegExp();
+ const Reg& base = e.getBase();
+ const Reg& idx = e.getIndex();
if (BIT == 64 && addr.is32bit()) db(0x67);
- rex = addr.getRex() | p1->getReg().getRex();
+ rex = rexRXB(3, r.isREG(64), r, base, idx);
+ if (r.hasRex2() || addr.hasRex2()) {
+ if (type & (T_0F|T_0F38|T_0F3A)) XBYAK_THROW(ERR_CANT_USE_REX2)
+ rex2(0, rex, r, base, idx);
+ return;
+ }
+ if (rex || r.isExt8bit()) rex |= 0x40;
} else {
+ const Reg& r1 = static_cast<const Reg&>(op1);
+ const Reg& r2 = static_cast<const Reg&>(op2);
// ModRM(reg, base);
- rex = op2.getReg().getRex(op1.getReg());
+ rex = rexRXB(3, r1.isREG(64) || r2.isREG(64), r2, r1);
+ if (r1.hasRex2() || r2.hasRex2()) {
+ if (type & (T_0F|T_0F38|T_0F3A)) XBYAK_THROW(ERR_CANT_USE_REX2)
+ rex2(0, rex, r2, r1);
+ return;
+ }
+ if (rex || r1.isExt8bit() || r2.isExt8bit()) rex |= 0x40;
}
- // except movsx(16bit, 32/64bit)
- if ((op1.isBit(16) && !op2.isBit(i32e)) || (op2.isBit(16) && !op1.isBit(i32e))) db(0x66);
if (rex) db(rex);
}
- enum AVXtype {
+ // @@@begin of avx_type_def.h
+ static const uint64_t T_NONE = 0ull;
// low 3 bit
- T_N1 = 1,
- T_N2 = 2,
- T_N4 = 3,
- T_N8 = 4,
- T_N16 = 5,
- T_N32 = 6,
- T_NX_MASK = 7,
- //
- T_N_VL = 1 << 3, // N * (1, 2, 4) for VL
- T_DUP = 1 << 4, // N = (8, 32, 64)
- T_66 = 1 << 5, // pp = 1
- T_F3 = 1 << 6, // pp = 2
- T_F2 = T_66 | T_F3, // pp = 3
- T_ER_R = 1 << 7, // reg{er}
- T_0F = 1 << 8,
- T_0F38 = 1 << 9,
- T_0F3A = 1 << 10,
- T_L0 = 1 << 11,
- T_L1 = 1 << 12,
- T_W0 = 1 << 13,
- T_W1 = 1 << 14,
- T_EW0 = 1 << 15,
- T_EW1 = 1 << 16,
- T_YMM = 1 << 17, // support YMM, ZMM
- T_EVEX = 1 << 18,
- T_ER_X = 1 << 19, // xmm{er}
- T_ER_Y = 1 << 20, // ymm{er}
- T_ER_Z = 1 << 21, // zmm{er}
- T_SAE_X = 1 << 22, // xmm{sae}
- T_SAE_Y = 1 << 23, // ymm{sae}
- T_SAE_Z = 1 << 24, // zmm{sae}
- T_MUST_EVEX = 1 << 25, // contains T_EVEX
- T_B32 = 1 << 26, // m32bcst
- T_B64 = 1 << 27, // m64bcst
- T_B16 = T_B32 | T_B64, // m16bcst (Be careful)
- T_M_K = 1 << 28, // mem{k}
- T_VSIB = 1 << 29,
- T_MEM_EVEX = 1 << 30, // use evex if mem
- T_FP16 = 1 << 31, // avx512-fp16
- T_MAP5 = T_FP16 | T_0F,
- T_MAP6 = T_FP16 | T_0F38,
- T_XXX
- };
+ static const uint64_t T_N1 = 1ull;
+ static const uint64_t T_N2 = 2ull;
+ static const uint64_t T_N4 = 3ull;
+ static const uint64_t T_N8 = 4ull;
+ static const uint64_t T_N16 = 5ull;
+ static const uint64_t T_N32 = 6ull;
+ static const uint64_t T_NX_MASK = 7ull;
+ static const uint64_t T_DUP = T_NX_MASK;//1 << 4, // N = (8, 32, 64)
+ static const uint64_t T_N_VL = 1ull << 3; // N * (1, 2, 4) for VL
+ static const uint64_t T_APX = 1ull << 4;
+ static const uint64_t T_66 = 1ull << 5; // pp = 1
+ static const uint64_t T_F3 = 1ull << 6; // pp = 2
+ static const uint64_t T_ER_R = 1ull << 7; // reg{er}
+ static const uint64_t T_0F = 1ull << 8;
+ static const uint64_t T_0F38 = 1ull << 9;
+ static const uint64_t T_0F3A = 1ull << 10;
+ static const uint64_t T_L0 = 1ull << 11;
+ static const uint64_t T_L1 = 1ull << 12;
+ static const uint64_t T_W0 = 1ull << 13;
+ static const uint64_t T_W1 = 1ull << 14;
+ static const uint64_t T_EW0 = 1ull << 15;
+ static const uint64_t T_EW1 = 1ull << 16;
+ static const uint64_t T_YMM = 1ull << 17; // support YMM, ZMM
+ static const uint64_t T_EVEX = 1ull << 18;
+ static const uint64_t T_ER_X = 1ull << 19; // xmm{er}
+ static const uint64_t T_ER_Y = 1ull << 20; // ymm{er}
+ static const uint64_t T_ER_Z = 1ull << 21; // zmm{er}
+ static const uint64_t T_SAE_X = 1ull << 22; // xmm{sae}
+ static const uint64_t T_SAE_Y = 1ull << 23; // ymm{sae}
+ static const uint64_t T_SAE_Z = 1ull << 24; // zmm{sae}
+ static const uint64_t T_MUST_EVEX = 1ull << 25; // contains T_EVEX
+ static const uint64_t T_B32 = 1ull << 26; // m32bcst
+ static const uint64_t T_B64 = 1ull << 27; // m64bcst
+ static const uint64_t T_B16 = T_B32 | T_B64; // m16bcst (Be careful)
+ static const uint64_t T_M_K = 1ull << 28; // mem{k}
+ static const uint64_t T_VSIB = 1ull << 29;
+ static const uint64_t T_MEM_EVEX = 1ull << 30; // use evex if mem
+ static const uint64_t T_FP16 = 1ull << 31; // avx512-fp16
+ static const uint64_t T_MAP5 = T_FP16 | T_0F;
+ static const uint64_t T_MAP6 = T_FP16 | T_0F38;
+ static const uint64_t T_NF = 1ull << 32; // T_nf
+ static const uint64_t T_CODE1_IF1 = 1ull << 33; // code|=1 if !r.isBit(8)
+ static const uint64_t T_MAP3 = 1ull << 34; // rorx only
+ static const uint64_t T_ND1 = 1ull << 35; // ND=1
+ static const uint64_t T_ZU = 1ull << 36; // ND=ZU
+ static const uint64_t T_F2 = 1ull << 37; // pp = 3
+ static const uint64_t T_MAP1 = 1ull << 38; // kmov
// T_66 = 1, T_F3 = 2, T_F2 = 3
- uint32_t getPP(int type) const { return (type >> 5) & 3; }
- void vex(const Reg& reg, const Reg& base, const Operand *v, int type, int code, bool x = false)
+ static inline uint32_t getPP(uint64_t type) { return (type & T_66) ? 1 : (type & T_F3) ? 2 : (type & T_F2) ? 3 : 0; }
+ static inline uint32_t getMMM(uint64_t type) { return (type & T_0F) ? 1 : (type & T_0F38) ? 2 : (type & T_0F3A) ? 3 : 0; }
+
+ // @@@end of avx_type_def.h
+ void vex(const Reg& reg, const Reg& base, const Operand *v, uint64_t type, int code, bool x = false)
{
int w = (type & T_W1) ? 1 : 0;
bool is256 = (type & T_L1) ? true : (type & T_L0) ? false : reg.isYMM();
@@ -1761,17 +1847,17 @@ private:
if (!b && !x && !w && (type & T_0F)) {
db(0xC5); db((r ? 0 : 0x80) | vvvv);
} else {
- uint32_t mmmm = (type & T_0F) ? 1 : (type & T_0F38) ? 2 : (type & T_0F3A) ? 3 : 0;
+ uint32_t mmmm = getMMM(type);
db(0xC4); db((r ? 0 : 0x80) | (x ? 0 : 0x40) | (b ? 0 : 0x20) | mmmm); db((w << 7) | vvvv);
}
db(code);
}
- void verifySAE(const Reg& r, int type) const
+ void verifySAE(const Reg& r, uint64_t type) const
{
if (((type & T_SAE_X) && r.isXMM()) || ((type & T_SAE_Y) && r.isYMM()) || ((type & T_SAE_Z) && r.isZMM())) return;
XBYAK_THROW(ERR_SAE_IS_INVALID)
}
- void verifyER(const Reg& r, int type) const
+ void verifyER(const Reg& r, uint64_t type) const
{
if ((type & T_ER_R) && r.isREG(32|64)) return;
if (((type & T_ER_X) && r.isXMM()) || ((type & T_ER_Y) && r.isYMM()) || ((type & T_ER_Z) && r.isZMM())) return;
@@ -1784,20 +1870,22 @@ private:
if ((a > 0 && a != v) + (b > 0 && b != v) + (c > 0 && c != v) > 0) XBYAK_THROW_RET(err, 0)
return v;
}
- int evex(const Reg& reg, const Reg& base, const Operand *v, int type, int code, bool x = false, bool b = false, int aaa = 0, uint32_t VL = 0, bool Hi16Vidx = false)
+ int evex(const Reg& reg, const Reg& base, const Operand *v, uint64_t type, int code, const Reg *x = 0, bool b = false, int aaa = 0, uint32_t VL = 0, bool Hi16Vidx = false)
{
if (!(type & (T_EVEX | T_MUST_EVEX))) XBYAK_THROW_RET(ERR_EVEX_IS_INVALID, 0)
int w = (type & T_EW1) ? 1 : 0;
- uint32_t mmm = (type & T_0F) ? 1 : (type & T_0F38) ? 2 : (type & T_0F3A) ? 3 : 0;
+ uint32_t mmm = getMMM(type);
if (type & T_FP16) mmm |= 4;
uint32_t pp = getPP(type);
int idx = v ? v->getIdx() : 0;
uint32_t vvvv = ~idx;
- bool R = !reg.isExtIdx();
- bool X = x ? false : !base.isExtIdx2();
- bool B = !base.isExtIdx();
- bool Rp = !reg.isExtIdx2();
+ bool R = reg.isExtIdx();
+ bool X3 = (x && x->isExtIdx()) || (base.isSIMD() && base.isExtIdx2());
+ bool B4 = base.isREG() && base.isExtIdx2();
+ bool X4 = x && (x->isREG() && x->isExtIdx2());
+ bool B = base.isExtIdx();
+ bool Rp = reg.isExtIdx2();
int LL;
int rounding = verifyDuplicate(reg.getRounding(), base.getRounding(), v ? v->getRounding() : 0, ERR_ROUNDING_IS_ALREADY_SET);
int disp8N = 1;
@@ -1814,7 +1902,7 @@ private:
LL = (VL == 512) ? 2 : (VL == 256) ? 1 : 0;
if (b) {
disp8N = ((type & T_B16) == T_B16) ? 2 : (type & T_B32) ? 4 : 8;
- } else if (type & T_DUP) {
+ } else if ((type & T_NX_MASK) == T_DUP) {
disp8N = VL == 128 ? 8 : VL == 256 ? 32 : 64;
} else {
if ((type & (T_NX_MASK | T_N_VL)) == 0) {
@@ -1827,17 +1915,52 @@ private:
}
}
}
- bool Vp = !((v ? v->isExtIdx2() : 0) | Hi16Vidx);
+ bool V4 = ((v ? v->isExtIdx2() : 0) || Hi16Vidx);
bool z = reg.hasZero() || base.hasZero() || (v ? v->hasZero() : false);
if (aaa == 0) aaa = verifyDuplicate(base.getOpmaskIdx(), reg.getOpmaskIdx(), (v ? v->getOpmaskIdx() : 0), ERR_OPMASK_IS_ALREADY_SET);
if (aaa == 0) z = 0; // clear T_z if mask is not set
db(0x62);
- db((R ? 0x80 : 0) | (X ? 0x40 : 0) | (B ? 0x20 : 0) | (Rp ? 0x10 : 0) | mmm);
- db((w == 1 ? 0x80 : 0) | ((vvvv & 15) << 3) | 4 | (pp & 3));
- db((z ? 0x80 : 0) | ((LL & 3) << 5) | (b ? 0x10 : 0) | (Vp ? 8 : 0) | (aaa & 7));
+ db((R ? 0 : 0x80) | (X3 ? 0 : 0x40) | (B ? 0 : 0x20) | (Rp ? 0 : 0x10) | (B4 ? 8 : 0) | mmm);
+ db((w == 1 ? 0x80 : 0) | ((vvvv & 15) << 3) | (X4 ? 0 : 4) | (pp & 3));
+ db((z ? 0x80 : 0) | ((LL & 3) << 5) | (b ? 0x10 : 0) | (V4 ? 0 : 8) | (aaa & 7));
db(code);
return disp8N;
}
+ static inline int getMap(uint64_t type)
+ {
+ if (type & T_MAP1) return 1;
+ if (type & T_MAP3) return 3;
+ if (type & (T_0F38|T_0F3A)) return 2;
+ return 4; // legacy
+ }
+ // evex of Legacy
+ void evexLeg(const Reg& r, const Reg& b, const Reg& x, const Reg& v, uint64_t type, int sc = NONE)
+ {
+ int M = getMap(type);
+ int R3 = !r.isExtIdx();
+ int X3 = !x.isExtIdx();
+ int B3 = b.isExtIdx() ? 0 : 0x20;
+ int R4 = r.isExtIdx2() ? 0 : 0x10;
+ int B4 = b.isExtIdx2() ? 0x08 : 0;
+ int w = (type & T_W0) ? 0 : (r.isBit(64) || v.isBit(64) || (type & T_W1));
+ int V = (~v.getIdx() & 15) << 3;
+ int X4 = x.isExtIdx2() ? 0 : 0x04;
+ int pp = (type & (T_F2|T_F3|T_66)) ? getPP(type) : (r.isBit(16) || v.isBit(16));
+ int V4 = !v.isExtIdx2();
+ int ND = (type & T_ZU) ? (r.getZU() || b.getZU()) : (type & T_ND1) ? 1 : (type & T_APX) ? 0 : v.isREG();
+ int NF = r.getNF() | b.getNF() | x.getNF() | v.getNF();
+ int L = 0;
+ if ((type & T_NF) == 0 && NF) XBYAK_THROW(ERR_INVALID_NF)
+ if ((type & T_ZU) == 0 && r.getZU()) XBYAK_THROW(ERR_INVALID_ZU)
+ db(0x62);
+ db((R3<<7) | (X3<<6) | B3 | R4 | B4 | M);
+ db((w<<7) | V | X4 | pp);
+ if (sc != NONE) {
+ db((L<<5) | (ND<<4) | sc);
+ } else {
+ db((L<<5) | (ND<<4) | (V4<<3) | (NF<<2));
+ }
+ }
void setModRM(int mod, int r1, int r2)
{
db(static_cast<uint8_t>((mod << 6) | ((r1 & 7) << 3) | (r2 & 7)));
@@ -1906,37 +2029,47 @@ private:
}
LabelManager labelMgr_;
bool isInDisp16(uint32_t x) const { return 0xFFFF8000 <= x || x <= 0x7FFF; }
- void opModR(const Reg& reg1, const Reg& reg2, int code0, int code1 = NONE, int code2 = NONE)
+ void writeCode(uint64_t type, const Reg& r, int code)
+ {
+ if (!(type & T_APX)) {
+ if (type & T_0F) {
+ db(0x0F);
+ } else if (type & T_0F38) {
+ db(0x0F); db(0x38);
+ } else if (type & T_0F3A) {
+ db(0x0F); db(0x3A);
+ }
+ }
+ db(code | ((type == 0 || (type & T_CODE1_IF1)) && !r.isBit(8)));
+ }
+ void opRR(const Reg& reg1, const Reg& reg2, uint64_t type, int code)
{
- rex(reg2, reg1);
- db(code0 | (reg1.isBit(8) ? 0 : 1)); if (code1 != NONE) db(code1); if (code2 != NONE) db(code2);
+ rex(reg2, reg1, type);
+ writeCode(type, reg1, code);
setModRM(3, reg1.getIdx(), reg2.getIdx());
}
- void opModM(const Address& addr, const Reg& reg, int code0, int code1 = NONE, int code2 = NONE, int immSize = 0)
+ void opMR(const Address& addr, const Reg& r, uint64_t type, int code, int immSize = 0)
{
if (addr.is64bitDisp()) XBYAK_THROW(ERR_CANT_USE_64BIT_DISP)
- rex(addr, reg);
- db(code0 | (reg.isBit(8) ? 0 : 1)); if (code1 != NONE) db(code1); if (code2 != NONE) db(code2);
- opAddr(addr, reg.getIdx(), immSize);
+ rex(addr, r, type);
+ writeCode(type, r, code);
+ opAddr(addr, r.getIdx(), immSize);
}
- void opLoadSeg(const Address& addr, const Reg& reg, int code0, int code1 = NONE)
+ void opLoadSeg(const Address& addr, const Reg& reg, uint64_t type, int code)
{
- if (addr.is64bitDisp()) XBYAK_THROW(ERR_CANT_USE_64BIT_DISP)
if (reg.isBit(8)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER)
- rex(addr, reg);
- db(code0); if (code1 != NONE) db(code1);
+ if (addr.is64bitDisp()) XBYAK_THROW(ERR_CANT_USE_64BIT_DISP)
+ // can't use opMR
+ rex(addr, reg, type);
+ if (type & T_0F) db(0x0F);
+ db(code);
opAddr(addr, reg.getIdx());
}
- void opMIB(const Address& addr, const Reg& reg, int code0, int code1)
+ // for only MPX(bnd*)
+ void opMIB(const Address& addr, const Reg& reg, uint64_t type, int code)
{
- if (addr.is64bitDisp()) XBYAK_THROW(ERR_CANT_USE_64BIT_DISP)
if (addr.getMode() != Address::M_ModRM) XBYAK_THROW(ERR_INVALID_MIB_ADDRESS)
- if (BIT == 64 && addr.is32bit()) db(0x67);
- const RegExp& regExp = addr.getRegExp(false);
- uint8_t rex = regExp.getRex();
- if (rex) db(rex);
- db(code0); db(code1);
- setSIB(regExp, reg.getIdx());
+ opMR(addr.cloneNoOptimize(), reg, type, code);
}
void makeJmp(uint32_t disp, LabelType type, uint8_t shortCode, uint8_t longCode, uint8_t longPref)
{
@@ -1994,9 +2127,9 @@ private:
const int bit = 16|i32e;
if (type == T_FAR) {
if (!op.isMEM(bit)) XBYAK_THROW(ERR_NOT_SUPPORTED)
- opR_ModM(op, bit, ext + 1, 0xFF, NONE, NONE, false);
+ opRext(op, bit, ext + 1, 0, 0xFF, false);
} else {
- opR_ModM(op, bit, ext, 0xFF, NONE, NONE, true);
+ opRext(op, bit, ext, 0, 0xFF, true);
}
}
// reg is reg field of ModRM
@@ -2021,135 +2154,192 @@ private:
}
}
}
- /* preCode is for SSSE3/SSE4 */
- void opGen(const Operand& reg, const Operand& op, int code, int pref, bool isValid(const Operand&, const Operand&), int imm8 = NONE, int preCode = NONE)
+ void opSSE(const Reg& r, const Operand& op, uint64_t type, int code, bool isValid(const Operand&, const Operand&), int imm8 = NONE)
{
- if (isValid && !isValid(reg, op)) XBYAK_THROW(ERR_BAD_COMBINATION)
- if (!isValidSSE(reg) || !isValidSSE(op)) XBYAK_THROW(ERR_NOT_SUPPORTED)
- if (pref != NONE) db(pref);
- if (op.isMEM()) {
- opModM(op.getAddress(), reg.getReg(), 0x0F, preCode, code, (imm8 != NONE) ? 1 : 0);
- } else {
- opModR(reg.getReg(), op.getReg(), 0x0F, preCode, code);
- }
+ if (isValid && !isValid(r, op)) XBYAK_THROW(ERR_BAD_COMBINATION)
+ if (!isValidSSE(r) || !isValidSSE(op)) XBYAK_THROW(ERR_NOT_SUPPORTED)
+ opRO(r, op, type, code, true, (imm8 != NONE) ? 1 : 0);
if (imm8 != NONE) db(imm8);
}
void opMMX_IMM(const Mmx& mmx, int imm8, int code, int ext)
{
if (!isValidSSE(mmx)) XBYAK_THROW(ERR_NOT_SUPPORTED)
- if (mmx.isXMM()) db(0x66);
- opModR(Reg32(ext), mmx, 0x0F, code);
+ uint64_t type = T_0F;
+ if (mmx.isXMM()) type |= T_66;
+ opRR(Reg32(ext), mmx, type, code);
db(imm8);
}
- void opMMX(const Mmx& mmx, const Operand& op, int code, int pref = 0x66, int imm8 = NONE, int preCode = NONE)
+ void opMMX(const Mmx& mmx, const Operand& op, int code, uint64_t type = T_0F, uint64_t pref = T_66, int imm8 = NONE)
{
- opGen(mmx, op, code, mmx.isXMM() ? pref : NONE, isXMMorMMX_MEM, imm8, preCode);
+ if (mmx.isXMM()) type |= pref;
+ opSSE(mmx, op, type, code, isXMMorMMX_MEM, imm8);
}
- void opMovXMM(const Operand& op1, const Operand& op2, int code, int pref)
+ void opMovXMM(const Operand& op1, const Operand& op2, uint64_t type, int code)
{
if (!isValidSSE(op1) || !isValidSSE(op2)) XBYAK_THROW(ERR_NOT_SUPPORTED)
- if (pref != NONE) db(pref);
if (op1.isXMM() && op2.isMEM()) {
- opModM(op2.getAddress(), op1.getReg(), 0x0F, code);
+ opMR(op2.getAddress(), op1.getReg(), type, code);
} else if (op1.isMEM() && op2.isXMM()) {
- opModM(op1.getAddress(), op2.getReg(), 0x0F, code | 1);
+ opMR(op1.getAddress(), op2.getReg(), type, code | 1);
} else {
XBYAK_THROW(ERR_BAD_COMBINATION)
}
}
+ // pextr{w,b,d}, extractps
void opExt(const Operand& op, const Mmx& mmx, int code, int imm, bool hasMMX2 = false)
{
if (!isValidSSE(op) || !isValidSSE(mmx)) XBYAK_THROW(ERR_NOT_SUPPORTED)
if (hasMMX2 && op.isREG(i32e)) { /* pextrw is special */
if (mmx.isXMM()) db(0x66);
- opModR(op.getReg(), mmx, 0x0F, 0xC5); db(imm);
+ opRR(op.getReg(), mmx, T_0F, 0xC5); db(imm);
+ } else {
+ opSSE(mmx, op, T_66 | T_0F3A, code, isXMM_REG32orMEM, imm);
+ }
+ }
+ // (r, r, m) or (r, m, r)
+ bool opROO(const Reg& d, const Operand& op1, const Operand& op2, uint64_t type, int code, int immSize = 0, int sc = NONE)
+ {
+ if (!(type & T_MUST_EVEX) && !d.isREG() && !(d.hasRex2NFZU() || op1.hasRex2NFZU() || op2.hasRex2NFZU())) return false;
+ const Operand *p1 = &op1, *p2 = &op2;
+ if (p1->isMEM()) { std::swap(p1, p2); } else { if (p2->isMEM()) code |= 2; }
+ if (p1->isMEM()) XBYAK_THROW_RET(ERR_BAD_COMBINATION, false)
+ if (p2->isMEM()) {
+ const Reg& r = *static_cast<const Reg*>(p1);
+ const Address& addr = p2->getAddress();
+ const RegExp e = addr.getRegExp();
+ evexLeg(r, e.getBase(), e.getIndex(), d, type, sc);
+ writeCode(type, d, code);
+ opAddr(addr, r.getIdx(), immSize);
} else {
- opGen(mmx, op, code, 0x66, isXMM_REG32orMEM, imm, 0x3A);
+ evexLeg(static_cast<const Reg&>(op2), static_cast<const Reg&>(op1), Reg(), d, type, sc);
+ writeCode(type, d, code);
+ setModRM(3, op2.getIdx(), op1.getIdx());
}
+ return true;
}
- void opR_ModM(const Operand& op, int bit, int ext, int code0, int code1 = NONE, int code2 = NONE, bool disableRex = false, int immSize = 0)
+ void opRext(const Operand& op, int bit, int ext, uint64_t type, int code, bool disableRex = false, int immSize = 0, const Reg *d = 0)
{
int opBit = op.getBit();
if (disableRex && opBit == 64) opBit = 32;
- if (op.isREG(bit)) {
- opModR(Reg(ext, Operand::REG, opBit), op.getReg().changeBit(opBit), code0, code1, code2);
- } else if (op.isMEM()) {
- opModM(op.getAddress(), Reg(ext, Operand::REG, opBit), code0, code1, code2, immSize);
+ const Reg r(ext, Operand::REG, opBit);
+ if ((type & T_APX) && op.hasRex2NFZU() && opROO(d ? *d : Reg(0, Operand::REG, opBit), op, r, type, code)) return;
+ if (op.isMEM()) {
+ opMR(op.getAddress(), r, type, code, immSize);
+ } else if (op.isREG(bit)) {
+ opRR(r, op.getReg().changeBit(opBit), type, code);
} else {
XBYAK_THROW(ERR_BAD_COMBINATION)
}
}
- void opShift(const Operand& op, int imm, int ext)
+ void opShift(const Operand& op, int imm, int ext, const Reg *d = 0)
{
- verifyMemHasSize(op);
- opR_ModM(op, 0, ext, (0xC0 | ((imm == 1 ? 1 : 0) << 4)), NONE, NONE, false, (imm != 1) ? 1 : 0);
+ if (d == 0) verifyMemHasSize(op);
+ if (d && op.getBit() != 0 && d->getBit() != op.getBit()) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER)
+ uint64_t type = T_APX|T_CODE1_IF1; if (ext & 8) type |= T_NF; if (d) type |= T_ND1;
+ opRext(op, 0, ext&7, type, (0xC0 | ((imm == 1 ? 1 : 0) << 4)), false, (imm != 1) ? 1 : 0, d);
if (imm != 1) db(imm);
}
- void opShift(const Operand& op, const Reg8& _cl, int ext)
+ void opShift(const Operand& op, const Reg8& _cl, int ext, const Reg *d = 0)
{
if (_cl.getIdx() != Operand::CL) XBYAK_THROW(ERR_BAD_COMBINATION)
- opR_ModM(op, 0, ext, 0xD2);
+ if (d && op.getBit() != 0 && d->getBit() != op.getBit()) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER)
+ uint64_t type = T_APX|T_CODE1_IF1; if (ext & 8) type |= T_NF; if (d) type |= T_ND1;
+ opRext(op, 0, ext&7, type, 0xD2, false, 0, d);
}
- void opModRM(const Operand& op1, const Operand& op2, bool condR, bool condM, int code0, int code1 = NONE, int code2 = NONE, int immSize = 0)
+ // condR assumes that op.isREG() is true
+ void opRO(const Reg& r, const Operand& op, uint64_t type, int code, bool condR = true, int immSize = 0)
{
- if (condR) {
- opModR(op1.getReg(), op2.getReg(), code0, code1, code2);
- } else if (condM) {
- opModM(op2.getAddress(), op1.getReg(), code0, code1, code2, immSize);
+ if (op.isMEM()) {
+ opMR(op.getAddress(), r, type, code, immSize);
+ } else if (condR) {
+ opRR(r, op.getReg(), type, code);
} else {
XBYAK_THROW(ERR_BAD_COMBINATION)
}
}
- void opShxd(const Operand& op, const Reg& reg, uint8_t imm, int code, const Reg8 *_cl = 0)
+ void opShxd(const Reg& d, const Operand& op, const Reg& reg, uint8_t imm, int code, int code2, const Reg8 *_cl = 0)
{
if (_cl && _cl->getIdx() != Operand::CL) XBYAK_THROW(ERR_BAD_COMBINATION)
- opModRM(reg, op, (op.isREG(16 | i32e) && op.getBit() == reg.getBit()), op.isMEM() && (reg.isREG(16 | i32e)), 0x0F, code | (_cl ? 1 : 0), NONE, _cl ? 0 : 1);
+ if (!reg.isREG(16|i32e)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER)
+ int immSize = _cl ? 0 : 1;
+ if (_cl) code |= 1;
+ uint64_t type = T_APX | T_NF;
+ if (d.isREG()) type |= T_ND1;
+ if (!opROO(d, op, reg, type, _cl ? code : code2, immSize)) {
+ opRO(reg, op, T_0F, code, true, immSize);
+ }
if (!_cl) db(imm);
}
// (REG, REG|MEM), (MEM, REG)
- void opRM_RM(const Operand& op1, const Operand& op2, int code)
+ void opRO_MR(const Operand& op1, const Operand& op2, int code)
{
- if (op1.isREG() && op2.isMEM()) {
- opModM(op2.getAddress(), op1.getReg(), code | 2);
+ if (op2.isMEM()) {
+ if (!op1.isREG()) XBYAK_THROW(ERR_BAD_COMBINATION)
+ opMR(op2.getAddress(), op1.getReg(), 0, code | 2);
} else {
- opModRM(op2, op1, op1.isREG() && op1.getKind() == op2.getKind(), op1.isMEM() && op2.isREG(), code);
+ opRO(static_cast<const Reg&>(op2), op1, 0, code, op1.getKind() == op2.getKind());
}
}
- // (REG|MEM, IMM)
- void opRM_I(const Operand& op, uint32_t imm, int code, int ext)
+ uint32_t getImmBit(const Operand& op, uint32_t imm)
{
verifyMemHasSize(op);
uint32_t immBit = inner::IsInDisp8(imm) ? 8 : isInDisp16(imm) ? 16 : 32;
if (op.isBit(8)) immBit = 8;
- if (op.getBit() < immBit) XBYAK_THROW(ERR_IMM_IS_TOO_BIG)
+ if (op.getBit() < immBit) XBYAK_THROW_RET(ERR_IMM_IS_TOO_BIG, 0)
if (op.isBit(32|64) && immBit == 16) immBit = 32; /* don't use MEM16 if 32/64bit mode */
+ return immBit;
+ }
+ // (REG|MEM, IMM)
+ void opOI(const Operand& op, uint32_t imm, int code, int ext)
+ {
+ uint32_t immBit = getImmBit(op, imm);
if (op.isREG() && op.getIdx() == 0 && (op.getBit() == immBit || (op.isBit(64) && immBit == 32))) { // rax, eax, ax, al
rex(op);
db(code | 4 | (immBit == 8 ? 0 : 1));
} else {
int tmp = immBit < (std::min)(op.getBit(), 32U) ? 2 : 0;
- opR_ModM(op, 0, ext, 0x80 | tmp, NONE, NONE, false, immBit / 8);
+ opRext(op, 0, ext, 0, 0x80 | tmp, false, immBit / 8);
}
db(imm, immBit / 8);
}
- void opIncDec(const Operand& op, int code, int ext)
+ // (r, r/m, imm)
+ void opROI(const Reg& d, const Operand& op, uint32_t imm, uint64_t type, int ext)
+ {
+ uint32_t immBit = getImmBit(d, imm);
+ int code = immBit < (std::min)(d.getBit(), 32U) ? 2 : 0;
+ opROO(d, op, Reg(ext, Operand::REG, d.getBit()), type, 0x80 | code, immBit / 8);
+ db(imm, immBit / 8);
+ }
+ void opIncDec(const Reg& d, const Operand& op, int ext)
{
+#ifdef XBYAK64
+ if (d.isREG()) {
+ int code = d.isBit(8) ? 0xFE : 0xFF;
+ uint64_t type = T_APX|T_NF|T_ND1;
+ if (d.isBit(16)) type |= T_66;
+ opROO(d, op, Reg(ext, Operand::REG, d.getBit()), type, code);
+ return;
+ }
+#else
+ (void)d;
+#endif
verifyMemHasSize(op);
#ifndef XBYAK64
if (op.isREG() && !op.isBit(8)) {
- rex(op); db(code | op.getIdx());
+ rex(op); db((ext ? 0x48 : 0x40) | op.getIdx());
return;
}
#endif
- code = 0xFE;
- if (op.isREG()) {
- opModR(Reg(ext, Operand::REG, op.getBit()), op.getReg(), code);
- } else {
- opModM(op.getAddress(), Reg(ext, Operand::REG, op.getBit()), code);
- }
+ opRext(op, op.getBit(), ext, 0, 0xFE);
}
void opPushPop(const Operand& op, int code, int ext, int alt)
{
+ if (op.isREG() && op.hasRex2()) {
+ const Reg& r = static_cast<const Reg&>(op);
+ rex2(0, rexRXB(3, 0, Reg(), r), Reg(), r);
+ db(alt);
+ return;
+ }
int bit = op.getBit();
if (bit == 16 || bit == BIT) {
if (bit == 16) db(0x66);
@@ -2159,7 +2349,7 @@ private:
return;
}
if (op.isMEM()) {
- opModM(op.getAddress(), Reg(ext, Operand::REG, 32), code);
+ opMR(op.getAddress(), Reg(ext, Operand::REG, 32), 0, code);
return;
}
}
@@ -2216,8 +2406,8 @@ private:
{
if (op.isBit(32)) XBYAK_THROW(ERR_BAD_COMBINATION)
int w = op.isBit(16);
- bool cond = reg.isREG() && (reg.getBit() > op.getBit());
- opModRM(reg, op, cond && op.isREG(), cond && op.isMEM(), 0x0F, code | w);
+ if (!(reg.isREG() && (reg.getBit() > op.getBit()))) XBYAK_THROW(ERR_BAD_COMBINATION)
+ opRO(reg, op, T_0F, code | w);
}
void opFpuMem(const Address& addr, uint8_t m16, uint8_t m32, uint8_t m64, uint8_t ext, uint8_t m64ext)
{
@@ -2225,7 +2415,6 @@ private:
uint8_t code = addr.isBit(16) ? m16 : addr.isBit(32) ? m32 : addr.isBit(64) ? m64 : 0;
if (!code) XBYAK_THROW(ERR_BAD_MEM_SIZE)
if (m64ext && addr.isBit(64)) ext = m64ext;
-
rex(addr, st0);
db(code);
opAddr(addr, ext);
@@ -2243,7 +2432,7 @@ private:
{
db(code1); db(code2 | reg.getIdx());
}
- void opVex(const Reg& r, const Operand *p1, const Operand& op2, int type, int code, int imm8 = NONE)
+ void opVex(const Reg& r, const Operand *p1, const Operand& op2, uint64_t type, int code, int imm8 = NONE)
{
if (op2.isMEM()) {
const Address& addr = op2.getAddress();
@@ -2252,8 +2441,7 @@ private:
const Reg& index = regExp.getIndex();
if (BIT == 64 && addr.is32bit()) db(0x67);
int disp8N = 0;
- bool x = index.isExtIdx();
- if ((type & (T_MUST_EVEX|T_MEM_EVEX)) || r.hasEvex() || (p1 && p1->hasEvex()) || addr.isBroadcast() || addr.getOpmaskIdx()) {
+ if ((type & (T_MUST_EVEX|T_MEM_EVEX)) || r.hasEvex() || (p1 && p1->hasEvex()) || addr.isBroadcast() || addr.getOpmaskIdx() || addr.hasRex2()) {
int aaa = addr.getOpmaskIdx();
if (aaa && !(type & T_M_K)) XBYAK_THROW(ERR_INVALID_OPMASK_WITH_MEMORY)
bool b = false;
@@ -2262,9 +2450,9 @@ private:
b = true;
}
int VL = regExp.isVsib() ? index.getBit() : 0;
- disp8N = evex(r, base, p1, type, code, x, b, aaa, VL, index.isExtIdx2());
+ disp8N = evex(r, base, p1, type, code, &index, b, aaa, VL, index.isSIMD() && index.isExtIdx2());
} else {
- vex(r, base, p1, type, code, x);
+ vex(r, base, p1, type, code, index.isExtIdx());
}
opAddr(addr, r.getIdx(), (imm8 != NONE) ? 1 : 0, disp8N, (type & T_VSIB) != 0);
} else {
@@ -2278,19 +2466,21 @@ private:
}
if (imm8 != NONE) db(imm8);
}
- // (r, r, r/m) if isR_R_RM
- // (r, r/m, r)
- void opGpr(const Reg32e& r, const Operand& op1, const Operand& op2, int type, uint8_t code, bool isR_R_RM, int imm8 = NONE)
+ // (r, r, r/m)
+ // opRRO(a, b, c) == opROO(b, c, a)
+ void opRRO(const Reg& d, const Reg& r1, const Operand& op2, uint64_t type, uint8_t code, int imm8 = NONE)
{
- const Operand *p1 = &op1;
- const Operand *p2 = &op2;
- if (!isR_R_RM) std::swap(p1, p2);
- const unsigned int bit = r.getBit();
- if (p1->getBit() != bit || (p2->isREG() && p2->getBit() != bit)) XBYAK_THROW(ERR_BAD_COMBINATION)
+ const unsigned int bit = d.getBit();
+ if (r1.getBit() != bit || (op2.isREG() && op2.getBit() != bit)) XBYAK_THROW(ERR_BAD_COMBINATION)
type |= (bit == 64) ? T_W1 : T_W0;
- opVex(r, p1, *p2, type, code, imm8);
+ if (d.hasRex2() || r1.hasRex2() || op2.hasRex2() || d.getNF()) {
+ opROO(r1, op2, d, type, code);
+ if (imm8 != NONE) db(imm8);
+ } else {
+ opVex(d, &r1, op2, type, code, imm8);
+ }
}
- void opAVX_X_X_XM(const Xmm& x1, const Operand& op1, const Operand& op2, int type, int code0, int imm8 = NONE)
+ void opAVX_X_X_XM(const Xmm& x1, const Operand& op1, const Operand& op2, uint64_t type, int code, int imm8 = NONE)
{
const Xmm *x2 = static_cast<const Xmm*>(&op1);
const Operand *op = &op2;
@@ -2300,12 +2490,12 @@ private:
}
// (x1, x2, op)
if (!((x1.isXMM() && x2->isXMM()) || ((type & T_YMM) && ((x1.isYMM() && x2->isYMM()) || (x1.isZMM() && x2->isZMM()))))) XBYAK_THROW(ERR_BAD_COMBINATION)
- opVex(x1, x2, *op, type, code0, imm8);
+ opVex(x1, x2, *op, type, code, imm8);
}
- void opAVX_K_X_XM(const Opmask& k, const Xmm& x2, const Operand& op3, int type, int code0, int imm8 = NONE)
+ void opAVX_K_X_XM(const Opmask& k, const Xmm& x2, const Operand& op3, uint64_t type, int code, int imm8 = NONE)
{
if (!op3.isMEM() && (x2.getKind() != op3.getKind())) XBYAK_THROW(ERR_BAD_COMBINATION)
- opVex(k, &x2, op3, type, code0, imm8);
+ opVex(k, &x2, op3, type, code, imm8);
}
// (x, x/m), (y, x/m256), (z, y/m)
void checkCvt1(const Operand& x, const Operand& op) const
@@ -2317,17 +2507,17 @@ private:
{
if (!(x.isXMM() && op.is(Operand::XMM | Operand::YMM | Operand::MEM)) && !(x.isYMM() && op.is(Operand::ZMM | Operand::MEM))) XBYAK_THROW(ERR_BAD_COMBINATION)
}
- void opCvt(const Xmm& x, const Operand& op, int type, int code)
+ void opCvt(const Xmm& x, const Operand& op, uint64_t type, int code)
{
Operand::Kind kind = x.isXMM() ? (op.isBit(256) ? Operand::YMM : Operand::XMM) : Operand::ZMM;
opVex(x.copyAndSetKind(kind), &xm0, op, type, code);
}
- void opCvt2(const Xmm& x, const Operand& op, int type, int code)
+ void opCvt2(const Xmm& x, const Operand& op, uint64_t type, int code)
{
checkCvt2(x, op);
opCvt(x, op, type, code);
}
- void opCvt3(const Xmm& x1, const Xmm& x2, const Operand& op, int type, int type64, int type32, uint8_t code)
+ void opCvt3(const Xmm& x1, const Xmm& x2, const Operand& op, uint64_t type, uint64_t type64, uint64_t type32, uint8_t code)
{
if (!(x1.isXMM() && x2.isXMM() && (op.isREG(i32e) || op.isMEM()))) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER)
Xmm x(op.getIdx());
@@ -2340,7 +2530,7 @@ private:
if (!(x.isXMM() && op.is(Operand::XMM | Operand::YMM | Operand::MEM) && op.isBit(128|256)) && !(x.isYMM() && op.is(Operand::ZMM | Operand::MEM))) XBYAK_THROW(ERR_BAD_COMBINATION)
}
// (x, x/y/z/xword/yword/zword)
- void opCvt5(const Xmm& x, const Operand& op, int type, int code)
+ void opCvt5(const Xmm& x, const Operand& op, uint64_t type, int code)
{
if (!(x.isXMM() && op.isBit(128|256|512))) XBYAK_THROW(ERR_BAD_COMBINATION)
Operand::Kind kind = op.isBit(128) ? Operand::XMM : op.isBit(256) ? Operand::YMM : Operand::ZMM;
@@ -2351,20 +2541,19 @@ private:
return x.isZMM() ? zm0 : x.isYMM() ? ym0 : xm0;
}
// support (x, x/m, imm), (y, y/m, imm)
- void opAVX_X_XM_IMM(const Xmm& x, const Operand& op, int type, int code, int imm8 = NONE)
+ void opAVX_X_XM_IMM(const Xmm& x, const Operand& op, uint64_t type, int code, int imm8 = NONE)
{
opAVX_X_X_XM(x, cvtIdx0(x), op, type, code, imm8);
}
- // QQQ:need to refactor
- void opSp1(const Reg& reg, const Operand& op, uint8_t pref, uint8_t code0, uint8_t code1)
+ void opCnt(const Reg& reg, const Operand& op, uint8_t code)
{
if (reg.isBit(8)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER)
bool is16bit = reg.isREG(16) && (op.isREG(16) || op.isMEM());
if (!is16bit && !(reg.isREG(i32e) && (op.isREG(reg.getBit()) || op.isMEM()))) XBYAK_THROW(ERR_BAD_COMBINATION)
if (is16bit) db(0x66);
- db(pref); opModRM(reg.changeBit(i32e == 32 ? 32 : reg.getBit()), op, op.isREG(), true, code0, code1);
+ opRO(reg.changeBit(i32e == 32 ? 32 : reg.getBit()), op, T_F3 | T_0F, code);
}
- void opGather(const Xmm& x1, const Address& addr, const Xmm& x2, int type, uint8_t code, int mode)
+ void opGather(const Xmm& x1, const Address& addr, const Xmm& x2, uint64_t type, uint8_t code, int mode)
{
const RegExp& regExp = addr.getRegExp();
if (!regExp.isVsib(128 | 256)) XBYAK_THROW(ERR_BAD_VSIB_ADDRESSING)
@@ -2407,7 +2596,7 @@ private:
}
XBYAK_THROW(ERR_BAD_VSIB_ADDRESSING)
}
- void opGather2(const Xmm& x, const Address& addr, int type, uint8_t code, int mode)
+ void opGather2(const Xmm& x, const Address& addr, uint64_t type, uint8_t code, int mode)
{
if (x.hasZero()) XBYAK_THROW(ERR_INVALID_ZERO)
const RegExp& regExp = addr.getRegExp();
@@ -2422,7 +2611,7 @@ private:
xx_xy_yz ; mode = true
xx_xy_xz ; mode = false
*/
- void opVmov(const Operand& op, const Xmm& x, int type, uint8_t code, bool mode)
+ void opVmov(const Operand& op, const Xmm& x, uint64_t type, uint8_t code, bool mode)
{
if (mode) {
if (!op.isMEM() && !((op.isXMM() && x.isXMM()) || (op.isXMM() && x.isYMM()) || (op.isYMM() && x.isZMM()))) XBYAK_THROW(ERR_BAD_COMBINATION)
@@ -2431,15 +2620,15 @@ private:
}
opVex(x, 0, op, type, code);
}
- void opGatherFetch(const Address& addr, const Xmm& x, int type, uint8_t code, Operand::Kind kind)
+ void opGatherFetch(const Address& addr, const Xmm& x, uint64_t type, uint8_t code, Operand::Kind kind)
{
if (addr.hasZero()) XBYAK_THROW(ERR_INVALID_ZERO)
if (addr.getRegExp().getIndex().getKind() != kind) XBYAK_THROW(ERR_BAD_VSIB_ADDRESSING)
opVex(x, 0, addr, type, code);
}
- void opEncoding(const Xmm& x1, const Xmm& x2, const Operand& op, int type, int code0, PreferredEncoding encoding)
+ void opEncoding(const Xmm& x1, const Xmm& x2, const Operand& op, uint64_t type, int code, PreferredEncoding encoding)
{
- opAVX_X_X_XM(x1, x2, op, type | orEvexIf(encoding), code0);
+ opAVX_X_X_XM(x1, x2, op, type | orEvexIf(encoding), code);
}
int orEvexIf(PreferredEncoding encoding) {
if (encoding == DefaultEncoding) {
@@ -2475,13 +2664,52 @@ private:
}
XBYAK_THROW(ERR_BAD_COMBINATION)
}
+ void opCcmp(const Operand& op1, const Operand& op2, int dfv, int code, int sc) // cmp = 0x38, test = 0x84
+ {
+ if (dfv < 0 || 15 < dfv) XBYAK_THROW(ERR_INVALID_DFV)
+ opROO(Reg(15 - dfv, Operand::REG, (op1.getBit() | op2.getBit())), op1, op2, T_APX|T_CODE1_IF1, code, 0, sc);
+ }
+ void opCcmpi(const Operand& op, int imm, int dfv, int sc)
+ {
+ if (dfv < 0 || 15 < dfv) XBYAK_THROW(ERR_INVALID_DFV)
+ uint32_t immBit = getImmBit(op, imm);
+ uint32_t opBit = op.getBit();
+ int tmp = immBit < (std::min)(opBit, 32U) ? 2 : 0;
+ opROO(Reg(15 - dfv, Operand::REG, opBit), op, Reg(15, Operand::REG, opBit), T_APX|T_CODE1_IF1, 0x80 | tmp, immBit / 8, sc);
+ db(imm, immBit / 8);
+ }
+ void opTesti(const Operand& op, int imm, int dfv, int sc)
+ {
+ if (dfv < 0 || 15 < dfv) XBYAK_THROW(ERR_INVALID_DFV)
+ uint32_t opBit = op.getBit();
+ if (opBit == 0) XBYAK_THROW(ERR_MEM_SIZE_IS_NOT_SPECIFIED);
+ int immBit = (std::min)(opBit, 32U);
+ opROO(Reg(15 - dfv, Operand::REG, opBit), op, Reg(0, Operand::REG, opBit), T_APX|T_CODE1_IF1, 0xF6, immBit / 8, sc);
+ db(imm, immBit / 8);
+ }
+ void opCfcmov(const Reg& d, const Operand& op1, const Operand& op2, int code)
+ {
+ const int dBit = d.getBit();
+ const int op2Bit = op2.getBit();
+ if (dBit > 0 && op2Bit > 0 && dBit != op2Bit) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER)
+ if (op1.isBit(8) || op2Bit == 8) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER)
+ if (op2.isMEM()) {
+ if (op1.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION)
+ uint64_t type = dBit > 0 ? (T_MUST_EVEX|T_NF) : T_MUST_EVEX;
+ opROO(d, op2, op1, type, code);
+ } else {
+ opROO(d, op1, static_cast<const Reg&>(op2)|T_nf, T_MUST_EVEX|T_NF, code);
+ }
+ }
#ifdef XBYAK64
- void opAMX(const Tmm& t1, const Address& addr, int type, int code0)
+ void opAMX(const Tmm& t1, const Address& addr, uint64_t type, int code)
{
// require both base and index
- const RegExp exp = addr.getRegExp(false);
+ Address addr2 = addr.cloneNoOptimize();
+ const RegExp exp = addr2.getRegExp();
if (exp.getBase().getBit() == 0 || exp.getIndex().getBit() == 0) XBYAK_THROW(ERR_NOT_SUPPORTED)
- opVex(t1, &tmm0, addr, type, code0);
+ if (opROO(Reg(), addr2, t1, T_APX|type, code)) return;
+ opVex(t1, &tmm0, addr2, type, code);
}
#endif
public:
@@ -2504,11 +2732,17 @@ public:
const BoundsReg bnd0, bnd1, bnd2, bnd3;
const EvexModifierRounding T_sae, T_rn_sae, T_rd_sae, T_ru_sae, T_rz_sae; // {sae}, {rn-sae}, {rd-sae}, {ru-sae}, {rz-sae}
const EvexModifierZero T_z; // {z}
+ const ApxFlagNF T_nf;
+ const ApxFlagZU T_zu;
#ifdef XBYAK64
const Reg64 rax, rcx, rdx, rbx, rsp, rbp, rsi, rdi, r8, r9, r10, r11, r12, r13, r14, r15;
+ const Reg64 r16, r17, r18, r19, r20, r21, r22, r23, r24, r25, r26, r27, r28, r29, r30, r31;
const Reg32 r8d, r9d, r10d, r11d, r12d, r13d, r14d, r15d;
+ const Reg32 r16d, r17d, r18d, r19d, r20d, r21d, r22d, r23d, r24d, r25d, r26d, r27d, r28d, r29d, r30d, r31d;
const Reg16 r8w, r9w, r10w, r11w, r12w, r13w, r14w, r15w;
+ const Reg16 r16w, r17w, r18w, r19w, r20w, r21w, r22w, r23w, r24w, r25w, r26w, r27w, r28w, r29w, r30w, r31w;
const Reg8 r8b, r9b, r10b, r11b, r12b, r13b, r14b, r15b;
+ const Reg8 r16b, r17b, r18b, r19b, r20b, r21b, r22b, r23b, r24b, r25b, r26b, r27b, r28b, r29b, r30b, r31b;
const Reg8 spl, bpl, sil, dil;
const Xmm xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15;
const Xmm xmm16, xmm17, xmm18, xmm19, xmm20, xmm21, xmm22, xmm23;
@@ -2579,29 +2813,28 @@ public:
void test(const Operand& op, const Reg& reg)
{
- opModRM(reg, op, op.isREG() && (op.getKind() == reg.getKind()), op.isMEM(), 0x84);
+ opRO(reg, op, 0, 0x84, op.getKind() == reg.getKind());
}
void test(const Operand& op, uint32_t imm)
{
verifyMemHasSize(op);
- int immSize = (std::min)(op.getBit() / 8, 4U);
+ int immSize = (std::min)(op.getBit() / 8, 4U);
if (op.isREG() && op.getIdx() == 0) { // al, ax, eax
rex(op);
db(0xA8 | (op.isBit(8) ? 0 : 1));
} else {
- opR_ModM(op, 0, 0, 0xF6, NONE, NONE, false, immSize);
+ opRext(op, 0, 0, 0, 0xF6, false, immSize);
}
db(imm, immSize);
}
- void imul(const Reg& reg, const Operand& op)
- {
- opModRM(reg, op, op.isREG() && (reg.getKind() == op.getKind()), op.isMEM(), 0x0F, 0xAF);
- }
void imul(const Reg& reg, const Operand& op, int imm)
{
int s = inner::IsInDisp8(imm) ? 1 : 0;
- int immSize = s ? 1 : reg.isREG(16) ? 2 : 4;
- opModRM(reg, op, op.isREG() && (reg.getKind() == op.getKind()), op.isMEM(), 0x69 | (s << 1), NONE, NONE, immSize);
+ int immSize = s ? 1 : reg.isREG(16) ? 2 : 4;
+ uint8_t code = uint8_t(0x69 | (s << 1));
+ if (!opROO(Reg(), op, reg, T_APX|T_NF|T_ZU, code, immSize)) {
+ opRO(reg, op, 0, code, reg.getKind() == op.getKind(), immSize);
+ }
db(imm, immSize);
}
void push(const Operand& op) { opPushPop(op, 0xFF, 6, 0x50); }
@@ -2625,26 +2858,26 @@ public:
push(dword, imm);
}
}
- void mov(const Operand& reg1, const Operand& reg2)
+ void mov(const Operand& op1, const Operand& op2)
{
const Reg *reg = 0;
const Address *addr = 0;
uint8_t code = 0;
- if (reg1.isREG() && reg1.getIdx() == 0 && reg2.isMEM()) { // mov eax|ax|al, [disp]
- reg = &reg1.getReg();
- addr= &reg2.getAddress();
+ if (op1.isREG() && op1.getIdx() == 0 && op2.isMEM()) { // mov eax|ax|al, [disp]
+ reg = &op1.getReg();
+ addr= &op2.getAddress();
code = 0xA0;
} else
- if (reg1.isMEM() && reg2.isREG() && reg2.getIdx() == 0) { // mov [disp], eax|ax|al
- reg = &reg2.getReg();
- addr= &reg1.getAddress();
+ if (op1.isMEM() && op2.isREG() && op2.getIdx() == 0) { // mov [disp], eax|ax|al
+ reg = &op2.getReg();
+ addr= &op1.getAddress();
code = 0xA2;
}
#ifdef XBYAK64
if (addr && addr->is64bitDisp()) {
if (code) {
rex(*reg);
- db(reg1.isREG(8) ? 0xA0 : reg1.isREG() ? 0xA1 : reg2.isREG(8) ? 0xA2 : 0xA3);
+ db(op1.isREG(8) ? 0xA0 : op1.isREG() ? 0xA1 : op2.isREG(8) ? 0xA2 : 0xA3);
db(addr->getDisp(), 8);
} else {
XBYAK_THROW(ERR_BAD_COMBINATION)
@@ -2658,7 +2891,7 @@ public:
} else
#endif
{
- opRM_RM(reg1, reg2, 0x88);
+ opRO_MR(op1, op2, 0x88);
}
}
void mov(const Operand& op, uint64_t imm)
@@ -2676,7 +2909,7 @@ public:
if (!inner::IsInInt32(imm)) XBYAK_THROW(ERR_IMM_IS_TOO_BIG)
immSize = 4;
}
- opModM(op.getAddress(), Reg(0, Operand::REG, op.getBit()), 0xC6, NONE, NONE, immSize);
+ opMR(op.getAddress(), Reg(0, Operand::REG, op.getBit()), 0, 0xC6, immSize);
db(static_cast<uint32_t>(imm), immSize);
} else {
XBYAK_THROW(ERR_BAD_COMBINATION)
@@ -2708,7 +2941,7 @@ public:
rex(*p2, *p1); db(0x90 | (p2->getIdx() & 7));
return;
}
- opModRM(*p1, *p2, (p1->isREG() && p2->isREG() && (p1->getBit() == p2->getBit())), p2->isMEM(), 0x86 | (p1->isBit(8) ? 0 : 1));
+ opRO(static_cast<const Reg&>(*p1), *p2, 0, 0x86 | (p1->isBit(8) ? 0 : 1), (p1->isREG() && (p1->getBit() == p2->getBit())));
}
#ifndef XBYAK_DISABLE_SEGMENT
@@ -2753,11 +2986,11 @@ public:
}
void mov(const Operand& op, const Segment& seg)
{
- opModRM(Reg8(seg.getIdx()), op, op.isREG(16|i32e), op.isMEM(), 0x8C);
+ opRO(Reg8(seg.getIdx()), op, 0, 0x8C, op.isREG(16|i32e));
}
void mov(const Segment& seg, const Operand& op)
{
- opModRM(Reg8(seg.getIdx()), op.isREG(16|i32e) ? static_cast<const Operand&>(op.getReg().cvt32()) : op, op.isREG(16|i32e), op.isMEM(), 0x8E);
+ opRO(Reg8(seg.getIdx()), op.isREG(16|i32e) ? static_cast<const Operand&>(op.getReg().cvt32()) : op, 0, 0x8E, op.isREG(16|i32e));
}
#endif
@@ -2784,11 +3017,17 @@ public:
, bnd0(0), bnd1(1), bnd2(2), bnd3(3)
, T_sae(EvexModifierRounding::T_SAE), T_rn_sae(EvexModifierRounding::T_RN_SAE), T_rd_sae(EvexModifierRounding::T_RD_SAE), T_ru_sae(EvexModifierRounding::T_RU_SAE), T_rz_sae(EvexModifierRounding::T_RZ_SAE)
, T_z()
+ , T_nf()
+ , T_zu()
#ifdef XBYAK64
, rax(Operand::RAX), rcx(Operand::RCX), rdx(Operand::RDX), rbx(Operand::RBX), rsp(Operand::RSP), rbp(Operand::RBP), rsi(Operand::RSI), rdi(Operand::RDI), r8(Operand::R8), r9(Operand::R9), r10(Operand::R10), r11(Operand::R11), r12(Operand::R12), r13(Operand::R13), r14(Operand::R14), r15(Operand::R15)
+ , r16(Operand::R16), r17(Operand::R17), r18(Operand::R18), r19(Operand::R19), r20(Operand::R20), r21(Operand::R21), r22(Operand::R22), r23(Operand::R23), r24(Operand::R24), r25(Operand::R25), r26(Operand::R26), r27(Operand::R27), r28(Operand::R28), r29(Operand::R29), r30(Operand::R30), r31(Operand::R31)
, r8d(8), r9d(9), r10d(10), r11d(11), r12d(12), r13d(13), r14d(14), r15d(15)
+ , r16d(Operand::R16D), r17d(Operand::R17D), r18d(Operand::R18D), r19d(Operand::R19D), r20d(Operand::R20D), r21d(Operand::R21D), r22d(Operand::R22D), r23d(Operand::R23D), r24d(Operand::R24D), r25d(Operand::R25D), r26d(Operand::R26D), r27d(Operand::R27D), r28d(Operand::R28D), r29d(Operand::R29D), r30d(Operand::R30D), r31d(Operand::R31D)
, r8w(8), r9w(9), r10w(10), r11w(11), r12w(12), r13w(13), r14w(14), r15w(15)
+ , r16w(Operand::R16W), r17w(Operand::R17W), r18w(Operand::R18W), r19w(Operand::R19W), r20w(Operand::R20W), r21w(Operand::R21W), r22w(Operand::R22W), r23w(Operand::R23W), r24w(Operand::R24W), r25w(Operand::R25W), r26w(Operand::R26W), r27w(Operand::R27W), r28w(Operand::R28W), r29w(Operand::R29W), r30w(Operand::R30W), r31w(Operand::R31W)
, r8b(8), r9b(9), r10b(10), r11b(11), r12b(12), r13b(13), r14b(14), r15b(15)
+ , r16b(Operand::R16B), r17b(Operand::R17B), r18b(Operand::R18B), r19b(Operand::R19B), r20b(Operand::R20B), r21b(Operand::R21B), r22b(Operand::R22B), r23b(Operand::R23B), r24b(Operand::R24B), r25b(Operand::R25B), r26b(Operand::R26B), r27b(Operand::R27B), r28b(Operand::R28B), r29b(Operand::R29B), r30b(Operand::R30B), r31b(Operand::R31B)
, spl(Operand::SPL, true), bpl(Operand::BPL, true), sil(Operand::SIL, true), dil(Operand::DIL, true)
, xmm8(8), xmm9(9), xmm10(10), xmm11(11), xmm12(12), xmm13(13), xmm14(14), xmm15(15)
, xmm16(16), xmm17(17), xmm18(18), xmm19(19), xmm20(20), xmm21(21), xmm22(22), xmm23(23)
@@ -2857,6 +3096,30 @@ public:
// set default encoding to select Vex or Evex
void setDefaultEncoding(PreferredEncoding encoding) { defaultEncoding_ = encoding; }
+ // (reg32e/mem, k) if rev else (k, k/mem/reg32e)
+ // size = 8, 16, 32, 64
+ void opKmov(const Opmask& k, const Operand& op, bool rev, int size)
+ {
+ int code = 0;
+ bool isReg = op.isREG(size < 64 ? 32 : 64);
+ if (rev) {
+ code = isReg ? 0x93 : op.isMEM() ? 0x91 : 0;
+ } else {
+ code = op.isOPMASK() || op.isMEM() ? 0x90 : isReg ? 0x92 : 0;
+ }
+ if (code == 0) XBYAK_THROW(ERR_BAD_COMBINATION)
+ uint64_t type = 0;
+ switch (size) {
+ case 8: type = T_W0|T_66; break;
+ case 16: type = T_W0; break;
+ case 32: type = isReg ? T_W0|T_F2 : T_W1|T_66; break;
+ case 64: type = isReg ? T_W1|T_F2 : T_W1; break;
+ }
+ const Operand *p1 = &k, *p2 = &op;
+ if (code == 0x93) { std::swap(p1, p2); }
+ if (opROO(Reg(), *p2, *p1, T_MAP1|type, code)) return;
+ opVex(static_cast<const Reg&>(*p1), 0, *p2, T_L0|T_0F|type, code);
+ }
/*
use single byte nop if useMultiByteNop = false
*/
@@ -2893,7 +3156,6 @@ public:
size -= len;
}
}
-
#ifndef XBYAK_DONT_READ_LIST
#include "xbyak_mnemonic.h"
/*
@@ -2937,9 +3199,13 @@ static const XBYAK_CONSTEXPR EvexModifierRounding T_sae(EvexModifierRounding::T_
static const XBYAK_CONSTEXPR EvexModifierZero T_z;
#ifdef XBYAK64
static const XBYAK_CONSTEXPR Reg64 rax(Operand::RAX), rcx(Operand::RCX), rdx(Operand::RDX), rbx(Operand::RBX), rsp(Operand::RSP), rbp(Operand::RBP), rsi(Operand::RSI), rdi(Operand::RDI), r8(Operand::R8), r9(Operand::R9), r10(Operand::R10), r11(Operand::R11), r12(Operand::R12), r13(Operand::R13), r14(Operand::R14), r15(Operand::R15);
+static const XBYAK_CONSTEXPR Reg64 r16(16), r17(17), r18(18), r19(19), r20(20), r21(21), r22(22), r23(23), r24(24), r25(25), r26(26), r27(27), r28(28), r29(29), r30(30), r31(31);
static const XBYAK_CONSTEXPR Reg32 r8d(8), r9d(9), r10d(10), r11d(11), r12d(12), r13d(13), r14d(14), r15d(15);
+static const XBYAK_CONSTEXPR Reg32 r16d(16), r17d(17), r18d(18), r19d(19), r20d(20), r21d(21), r22d(22), r23d(23), r24d(24), r25d(25), r26d(26), r27d(27), r28d(28), r29d(29), r30d(30), r31d(31);
static const XBYAK_CONSTEXPR Reg16 r8w(8), r9w(9), r10w(10), r11w(11), r12w(12), r13w(13), r14w(14), r15w(15);
+static const XBYAK_CONSTEXPR Reg16 r16w(16), r17w(17), r18w(18), r19w(19), r20w(20), r21w(21), r22w(22), r23w(23), r24w(24), r25w(25), r26w(26), r27w(27), r28w(28), r29w(29), r30w(30), r31w(31);
static const XBYAK_CONSTEXPR Reg8 r8b(8), r9b(9), r10b(10), r11b(11), r12b(12), r13b(13), r14b(14), r15b(15), spl(Operand::SPL, true), bpl(Operand::BPL, true), sil(Operand::SIL, true), dil(Operand::DIL, true);
+static const XBYAK_CONSTEXPR Reg8 r16b(16), r17b(17), r18b(18), r19b(19), r20b(20), r21b(21), r22b(22), r23b(23), r24b(24), r25b(25), r26b(26), r27b(27), r28b(28), r29b(29), r30b(30), r31b(31);
static const XBYAK_CONSTEXPR Xmm xmm8(8), xmm9(9), xmm10(10), xmm11(11), xmm12(12), xmm13(13), xmm14(14), xmm15(15);
static const XBYAK_CONSTEXPR Xmm xmm16(16), xmm17(17), xmm18(18), xmm19(19), xmm20(20), xmm21(21), xmm22(22), xmm23(23);
static const XBYAK_CONSTEXPR Xmm xmm24(24), xmm25(25), xmm26(26), xmm27(27), xmm28(28), xmm29(29), xmm30(30), xmm31(31);
@@ -2951,6 +3217,8 @@ static const XBYAK_CONSTEXPR Zmm zmm16(16), zmm17(17), zmm18(18), zmm19(19), zmm
static const XBYAK_CONSTEXPR Zmm zmm24(24), zmm25(25), zmm26(26), zmm27(27), zmm28(28), zmm29(29), zmm30(30), zmm31(31);
static const XBYAK_CONSTEXPR Zmm tmm0(0), tmm1(1), tmm2(2), tmm3(3), tmm4(4), tmm5(5), tmm6(6), tmm7(7);
static const XBYAK_CONSTEXPR RegRip rip;
+static const XBYAK_CONSTEXPR ApxFlagNF T_nf;
+static const XBYAK_CONSTEXPR ApxFlagZU T_zu;
#endif
#ifndef XBYAK_DISABLE_SEGMENT
static const XBYAK_CONSTEXPR Segment es(Segment::es), cs(Segment::cs), ss(Segment::ss), ds(Segment::ds), fs(Segment::fs), gs(Segment::gs);
diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h
index edc76c5..882ec02 100644
--- a/xbyak/xbyak_mnemonic.h
+++ b/xbyak/xbyak_mnemonic.h
@@ -1,105 +1,231 @@
const char *getVersionString() const { return "6.73"; }
-void aadd(const Address& addr, const Reg32e &reg) { opModM(addr, reg, 0x0F, 0x38, 0x0FC); }
-void aand(const Address& addr, const Reg32e &reg) { db(0x66); opModM(addr, reg, 0x0F, 0x38, 0x0FC); }
-void adc(const Operand& op, uint32_t imm) { opRM_I(op, imm, 0x10, 2); }
-void adc(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x10); }
-void adcx(const Reg32e& reg, const Operand& op) { opGen(reg, op, 0xF6, 0x66, isREG32_REG32orMEM, NONE, 0x38); }
-void add(const Operand& op, uint32_t imm) { opRM_I(op, imm, 0x00, 0); }
-void add(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x00); }
-void addpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x58, 0x66, isXMM_XMMorMEM); }
-void addps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x58, 0x100, isXMM_XMMorMEM); }
-void addsd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x58, 0xF2, isXMM_XMMorMEM); }
-void addss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x58, 0xF3, isXMM_XMMorMEM); }
-void addsubpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xD0, 0x66, isXMM_XMMorMEM); }
-void addsubps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xD0, 0xF2, isXMM_XMMorMEM); }
-void adox(const Reg32e& reg, const Operand& op) { opGen(reg, op, 0xF6, 0xF3, isREG32_REG32orMEM, NONE, 0x38); }
-void aesdec(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xDE, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
-void aesdeclast(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xDF, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
-void aesenc(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xDC, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
-void aesenclast(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xDD, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
-void aesimc(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xDB, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
-void aeskeygenassist(const Xmm& xmm, const Operand& op, uint8_t imm) { opGen(xmm, op, 0xDF, 0x66, isXMM_XMMorMEM, imm, 0x3A); }
-void and_(const Operand& op, uint32_t imm) { opRM_I(op, imm, 0x20, 4); }
-void and_(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x20); }
-void andn(const Reg32e& r1, const Reg32e& r2, const Operand& op) { opGpr(r1, r2, op, T_0F38, 0xf2, true); }
-void andnpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x55, 0x66, isXMM_XMMorMEM); }
-void andnps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x55, 0x100, isXMM_XMMorMEM); }
-void andpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x54, 0x66, isXMM_XMMorMEM); }
-void andps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x54, 0x100, isXMM_XMMorMEM); }
-void aor(const Address& addr, const Reg32e &reg) { db(0xF2); opModM(addr, reg, 0x0F, 0x38, 0x0FC); }
-void axor(const Address& addr, const Reg32e &reg) { db(0xF3); opModM(addr, reg, 0x0F, 0x38, 0x0FC); }
-void bextr(const Reg32e& r1, const Operand& op, const Reg32e& r2) { opGpr(r1, op, r2, T_0F38, 0xf7, false); }
-void blendpd(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x0D, 0x66, isXMM_XMMorMEM, static_cast<uint8_t>(imm), 0x3A); }
-void blendps(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x0C, 0x66, isXMM_XMMorMEM, static_cast<uint8_t>(imm), 0x3A); }
-void blendvpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x15, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
-void blendvps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x14, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
-void blsi(const Reg32e& r, const Operand& op) { opGpr(Reg32e(3, r.getBit()), op, r, T_0F38, 0xf3, false); }
-void blsmsk(const Reg32e& r, const Operand& op) { opGpr(Reg32e(2, r.getBit()), op, r, T_0F38, 0xf3, false); }
-void blsr(const Reg32e& r, const Operand& op) { opGpr(Reg32e(1, r.getBit()), op, r, T_0F38, 0xf3, false); }
+void aadd(const Address& addr, const Reg32e &reg) { opMR(addr, reg, T_0F38, 0x0FC); }
+void aand(const Address& addr, const Reg32e &reg) { opMR(addr, reg, T_0F38 | T_66, 0x0FC); }
+void adc(const Operand& op, uint32_t imm) { opOI(op, imm, 0x10, 2); }
+void adc(const Operand& op1, const Operand& op2) { opRO_MR(op1, op2, 0x10); }
+void adc(const Reg& d, const Operand& op, uint32_t imm) { opROI(d, op, imm, T_NONE, 2); }
+void adc(const Reg& d, const Operand& op1, const Operand& op2) { opROO(d, op1, op2, T_NONE, 0x10); }
+void adcx(const Reg32e& d, const Reg32e& reg, const Operand& op) { opROO(d, op, reg, T_66, 0x66); }
+void adcx(const Reg32e& reg, const Operand& op) { if (!reg.isREG(16|i32e) && reg.getBit() == op.getBit()) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) if (opROO(Reg(), op, reg, T_66, 0x66)) return; opRO(reg, op, T_66 | T_0F38, 0xF6); }
+void add(const Operand& op, uint32_t imm) { opOI(op, imm, 0x00, 0); }
+void add(const Operand& op1, const Operand& op2) { opRO_MR(op1, op2, 0x00); }
+void add(const Reg& d, const Operand& op, uint32_t imm) { opROI(d, op, imm, T_NF|T_CODE1_IF1, 0); }
+void add(const Reg& d, const Operand& op1, const Operand& op2) { opROO(d, op1, op2, T_NF|T_CODE1_IF1, 0x00); }
+void addpd(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F | T_66, 0x58, isXMM_XMMorMEM); }
+void addps(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F, 0x58, isXMM_XMMorMEM); }
+void addsd(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F | T_F2, 0x58, isXMM_XMMorMEM); }
+void addss(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F | T_F3, 0x58, isXMM_XMMorMEM); }
+void addsubpd(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_66|T_0F|T_YMM, 0xD0, isXMM_XMMorMEM); }
+void addsubps(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_F2|T_0F|T_YMM, 0xD0, isXMM_XMMorMEM); }
+void adox(const Reg32e& d, const Reg32e& reg, const Operand& op) { opROO(d, op, reg, T_F3, 0x66); }
+void adox(const Reg32e& reg, const Operand& op) { if (!reg.isREG(16|i32e) && reg.getBit() == op.getBit()) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) if (opROO(Reg(), op, reg, T_F3, 0x66)) return; opRO(reg, op, T_F3 | T_0F38, 0xF6); }
+void aesdec(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_66|T_0F38|T_YMM|T_EVEX, 0xDE, isXMM_XMMorMEM); }
+void aesdeclast(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_66|T_0F38|T_YMM|T_EVEX, 0xDF, isXMM_XMMorMEM); }
+void aesenc(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_66|T_0F38|T_YMM|T_EVEX, 0xDC, isXMM_XMMorMEM); }
+void aesenclast(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_66|T_0F38|T_YMM|T_EVEX, 0xDD, isXMM_XMMorMEM); }
+void aesimc(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_66|T_0F38|T_W0, 0xDB, isXMM_XMMorMEM, NONE); }
+void aeskeygenassist(const Xmm& xmm, const Operand& op, uint8_t imm) { opSSE(xmm, op, T_66|T_0F3A, 0xDF, isXMM_XMMorMEM, imm); }
+void and_(const Operand& op, uint32_t imm) { opOI(op, imm, 0x20, 4); }
+void and_(const Operand& op1, const Operand& op2) { opRO_MR(op1, op2, 0x20); }
+void and_(const Reg& d, const Operand& op, uint32_t imm) { opROI(d, op, imm, T_NF|T_CODE1_IF1, 4); }
+void and_(const Reg& d, const Operand& op1, const Operand& op2) { opROO(d, op1, op2, T_NF|T_CODE1_IF1, 0x20); }
+void andn(const Reg32e& r1, const Reg32e& r2, const Operand& op) { opRRO(r1, r2, op, T_APX|T_0F38|T_NF, 0xf2); }
+void andnpd(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F | T_66, 0x55, isXMM_XMMorMEM); }
+void andnps(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F, 0x55, isXMM_XMMorMEM); }
+void andpd(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F | T_66, 0x54, isXMM_XMMorMEM); }
+void andps(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F, 0x54, isXMM_XMMorMEM); }
+void aor(const Address& addr, const Reg32e &reg) { opMR(addr, reg, T_0F38 | T_F2, 0x0FC); }
+void axor(const Address& addr, const Reg32e &reg) { opMR(addr, reg, T_0F38 | T_F3, 0x0FC); }
+void bextr(const Reg32e& r1, const Operand& op, const Reg32e& r2) { opRRO(r1, r2, op, T_APX|T_0F38|T_NF, 0xf7); }
+void blendpd(const Xmm& xmm, const Operand& op, int imm) { opSSE(xmm, op, T_66 | T_0F3A, 0x0D, isXMM_XMMorMEM, static_cast<uint8_t>(imm)); }
+void blendps(const Xmm& xmm, const Operand& op, int imm) { opSSE(xmm, op, T_66 | T_0F3A, 0x0C, isXMM_XMMorMEM, static_cast<uint8_t>(imm)); }
+void blendvpd(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_66|T_0F38, 0x15, isXMM_XMMorMEM, NONE); }
+void blendvps(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_66|T_0F38, 0x14, isXMM_XMMorMEM, NONE); }
+void blsi(const Reg32e& r, const Operand& op) { opRRO(Reg32e(3, r.getBit()), r, op, T_APX|T_0F38|T_NF, 0xf3); }
+void blsmsk(const Reg32e& r, const Operand& op) { opRRO(Reg32e(2, r.getBit()), r, op, T_APX|T_0F38|T_NF, 0xf3); }
+void blsr(const Reg32e& r, const Operand& op) { opRRO(Reg32e(1, r.getBit()), r, op, T_APX|T_0F38|T_NF, 0xf3); }
void bnd() { db(0xF2); }
-void bndcl(const BoundsReg& bnd, const Operand& op) { db(0xF3); opR_ModM(op, i32e, bnd.getIdx(), 0x0F, 0x1A, NONE, !op.isMEM()); }
-void bndcn(const BoundsReg& bnd, const Operand& op) { db(0xF2); opR_ModM(op, i32e, bnd.getIdx(), 0x0F, 0x1B, NONE, !op.isMEM()); }
-void bndcu(const BoundsReg& bnd, const Operand& op) { db(0xF2); opR_ModM(op, i32e, bnd.getIdx(), 0x0F, 0x1A, NONE, !op.isMEM()); }
-void bndldx(const BoundsReg& bnd, const Address& addr) { opMIB(addr, bnd, 0x0F, 0x1A); }
-void bndmk(const BoundsReg& bnd, const Address& addr) { db(0xF3); opModM(addr, bnd, 0x0F, 0x1B); }
-void bndmov(const Address& addr, const BoundsReg& bnd) { db(0x66); opModM(addr, bnd, 0x0F, 0x1B); }
-void bndmov(const BoundsReg& bnd, const Operand& op) { db(0x66); opModRM(bnd, op, op.isBNDREG(), op.isMEM(), 0x0F, 0x1A); }
-void bndstx(const Address& addr, const BoundsReg& bnd) { opMIB(addr, bnd, 0x0F, 0x1B); }
-void bsf(const Reg&reg, const Operand& op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0xBC); }
-void bsr(const Reg&reg, const Operand& op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0xBD); }
-void bswap(const Reg32e& reg) { opModR(Reg32(1), reg, 0x0F); }
-void bt(const Operand& op, const Reg& reg) { opModRM(reg, op, op.isREG(16|32|64) && op.getBit() == reg.getBit(), op.isMEM(), 0x0f, 0xA3); }
-void bt(const Operand& op, uint8_t imm) { opR_ModM(op, 16|32|64, 4, 0x0f, 0xba, NONE, false, 1); db(imm); }
-void btc(const Operand& op, const Reg& reg) { opModRM(reg, op, op.isREG(16|32|64) && op.getBit() == reg.getBit(), op.isMEM(), 0x0f, 0xBB); }
-void btc(const Operand& op, uint8_t imm) { opR_ModM(op, 16|32|64, 7, 0x0f, 0xba, NONE, false, 1); db(imm); }
-void btr(const Operand& op, const Reg& reg) { opModRM(reg, op, op.isREG(16|32|64) && op.getBit() == reg.getBit(), op.isMEM(), 0x0f, 0xB3); }
-void btr(const Operand& op, uint8_t imm) { opR_ModM(op, 16|32|64, 6, 0x0f, 0xba, NONE, false, 1); db(imm); }
-void bts(const Operand& op, const Reg& reg) { opModRM(reg, op, op.isREG(16|32|64) && op.getBit() == reg.getBit(), op.isMEM(), 0x0f, 0xAB); }
-void bts(const Operand& op, uint8_t imm) { opR_ModM(op, 16|32|64, 5, 0x0f, 0xba, NONE, false, 1); db(imm); }
-void bzhi(const Reg32e& r1, const Operand& op, const Reg32e& r2) { opGpr(r1, op, r2, T_0F38, 0xf5, false); }
+void bndcl(const BoundsReg& bnd, const Operand& op) { opRext(op, i32e, bnd.getIdx(), T_F3 | T_0F, 0x1A, !op.isMEM()); }
+void bndcn(const BoundsReg& bnd, const Operand& op) { opRext(op, i32e, bnd.getIdx(), T_F2 | T_0F, 0x1B, !op.isMEM()); }
+void bndcu(const BoundsReg& bnd, const Operand& op) { opRext(op, i32e, bnd.getIdx(), T_F2 | T_0F, 0x1A, !op.isMEM()); }
+void bndldx(const BoundsReg& bnd, const Address& addr) { opMIB(addr, bnd, T_0F, 0x1A); }
+void bndmk(const BoundsReg& bnd, const Address& addr) { opMR(addr, bnd, T_F3 | T_0F, 0x1B); }
+void bndmov(const Address& addr, const BoundsReg& bnd) { opMR(addr, bnd, T_66 | T_0F, 0x1B); }
+void bndmov(const BoundsReg& bnd, const Operand& op) { opRO(bnd, op, T_66 | T_0F, 0x1A, op.isBNDREG()); }
+void bndstx(const Address& addr, const BoundsReg& bnd) { opMIB(addr, bnd, T_0F, 0x1B); }
+void bsf(const Reg&reg, const Operand& op) { opRO(reg, op, T_0F, 0xBC, op.isREG(16|i32e)); }
+void bsr(const Reg&reg, const Operand& op) { opRO(reg, op, T_0F, 0xBD, op.isREG(16|i32e)); }
+void bswap(const Reg32e& reg) { opRR(Reg32(1), reg, 0, 0x0F); }
+void bt(const Operand& op, const Reg& reg) { opRO(reg, op, T_0F, 0xA3, op.isREG(16|i32e) && op.getBit() == reg.getBit()); }
+void bt(const Operand& op, uint8_t imm) { opRext(op, 16|i32e, 4, T_0F, 0xba, false, 1); db(imm); }
+void btc(const Operand& op, const Reg& reg) { opRO(reg, op, T_0F, 0xBB, op.isREG(16|i32e) && op.getBit() == reg.getBit()); }
+void btc(const Operand& op, uint8_t imm) { opRext(op, 16|i32e, 7, T_0F, 0xba, false, 1); db(imm); }
+void btr(const Operand& op, const Reg& reg) { opRO(reg, op, T_0F, 0xB3, op.isREG(16|i32e) && op.getBit() == reg.getBit()); }
+void btr(const Operand& op, uint8_t imm) { opRext(op, 16|i32e, 6, T_0F, 0xba, false, 1); db(imm); }
+void bts(const Operand& op, const Reg& reg) { opRO(reg, op, T_0F, 0xAB, op.isREG(16|i32e) && op.getBit() == reg.getBit()); }
+void bts(const Operand& op, uint8_t imm) { opRext(op, 16|i32e, 5, T_0F, 0xba, false, 1); db(imm); }
+void bzhi(const Reg32e& r1, const Operand& op, const Reg32e& r2) { opRRO(r1, r2, op, T_APX|T_0F38|T_NF, 0xf5); }
void cbw() { db(0x66); db(0x98); }
+void ccmpa(const Operand& op, int imm, int dfv) { opCcmpi(op, imm, dfv, 7); }
+void ccmpa(const Operand& op1, const Operand& op2, int dfv) { opCcmp(op1, op2, dfv, 0x38, 7); }
+void ccmpae(const Operand& op, int imm, int dfv) { opCcmpi(op, imm, dfv, 3); }
+void ccmpae(const Operand& op1, const Operand& op2, int dfv) { opCcmp(op1, op2, dfv, 0x38, 3); }
+void ccmpb(const Operand& op, int imm, int dfv) { opCcmpi(op, imm, dfv, 2); }
+void ccmpb(const Operand& op1, const Operand& op2, int dfv) { opCcmp(op1, op2, dfv, 0x38, 2); }
+void ccmpbe(const Operand& op, int imm, int dfv) { opCcmpi(op, imm, dfv, 6); }
+void ccmpbe(const Operand& op1, const Operand& op2, int dfv) { opCcmp(op1, op2, dfv, 0x38, 6); }
+void ccmpc(const Operand& op, int imm, int dfv) { opCcmpi(op, imm, dfv, 2); }
+void ccmpc(const Operand& op1, const Operand& op2, int dfv) { opCcmp(op1, op2, dfv, 0x38, 2); }
+void ccmpe(const Operand& op, int imm, int dfv) { opCcmpi(op, imm, dfv, 4); }
+void ccmpe(const Operand& op1, const Operand& op2, int dfv) { opCcmp(op1, op2, dfv, 0x38, 4); }
+void ccmpf(const Operand& op, int imm, int dfv) { opCcmpi(op, imm, dfv, 11); }
+void ccmpf(const Operand& op1, const Operand& op2, int dfv) { opCcmp(op1, op2, dfv, 0x38, 11); }
+void ccmpg(const Operand& op, int imm, int dfv) { opCcmpi(op, imm, dfv, 15); }
+void ccmpg(const Operand& op1, const Operand& op2, int dfv) { opCcmp(op1, op2, dfv, 0x38, 15); }
+void ccmpge(const Operand& op, int imm, int dfv) { opCcmpi(op, imm, dfv, 13); }
+void ccmpge(const Operand& op1, const Operand& op2, int dfv) { opCcmp(op1, op2, dfv, 0x38, 13); }
+void ccmpl(const Operand& op, int imm, int dfv) { opCcmpi(op, imm, dfv, 12); }
+void ccmpl(const Operand& op1, const Operand& op2, int dfv) { opCcmp(op1, op2, dfv, 0x38, 12); }
+void ccmple(const Operand& op, int imm, int dfv) { opCcmpi(op, imm, dfv, 14); }
+void ccmple(const Operand& op1, const Operand& op2, int dfv) { opCcmp(op1, op2, dfv, 0x38, 14); }
+void ccmpna(const Operand& op, int imm, int dfv) { opCcmpi(op, imm, dfv, 6); }
+void ccmpna(const Operand& op1, const Operand& op2, int dfv) { opCcmp(op1, op2, dfv, 0x38, 6); }
+void ccmpnae(const Operand& op, int imm, int dfv) { opCcmpi(op, imm, dfv, 2); }
+void ccmpnae(const Operand& op1, const Operand& op2, int dfv) { opCcmp(op1, op2, dfv, 0x38, 2); }
+void ccmpnb(const Operand& op, int imm, int dfv) { opCcmpi(op, imm, dfv, 3); }
+void ccmpnb(const Operand& op1, const Operand& op2, int dfv) { opCcmp(op1, op2, dfv, 0x38, 3); }
+void ccmpnbe(const Operand& op, int imm, int dfv) { opCcmpi(op, imm, dfv, 7); }
+void ccmpnbe(const Operand& op1, const Operand& op2, int dfv) { opCcmp(op1, op2, dfv, 0x38, 7); }
+void ccmpnc(const Operand& op, int imm, int dfv) { opCcmpi(op, imm, dfv, 3); }
+void ccmpnc(const Operand& op1, const Operand& op2, int dfv) { opCcmp(op1, op2, dfv, 0x38, 3); }
+void ccmpne(const Operand& op, int imm, int dfv) { opCcmpi(op, imm, dfv, 5); }
+void ccmpne(const Operand& op1, const Operand& op2, int dfv) { opCcmp(op1, op2, dfv, 0x38, 5); }
+void ccmpng(const Operand& op, int imm, int dfv) { opCcmpi(op, imm, dfv, 14); }
+void ccmpng(const Operand& op1, const Operand& op2, int dfv) { opCcmp(op1, op2, dfv, 0x38, 14); }
+void ccmpnge(const Operand& op, int imm, int dfv) { opCcmpi(op, imm, dfv, 12); }
+void ccmpnge(const Operand& op1, const Operand& op2, int dfv) { opCcmp(op1, op2, dfv, 0x38, 12); }
+void ccmpnl(const Operand& op, int imm, int dfv) { opCcmpi(op, imm, dfv, 13); }
+void ccmpnl(const Operand& op1, const Operand& op2, int dfv) { opCcmp(op1, op2, dfv, 0x38, 13); }
+void ccmpnle(const Operand& op, int imm, int dfv) { opCcmpi(op, imm, dfv, 15); }
+void ccmpnle(const Operand& op1, const Operand& op2, int dfv) { opCcmp(op1, op2, dfv, 0x38, 15); }
+void ccmpno(const Operand& op, int imm, int dfv) { opCcmpi(op, imm, dfv, 1); }
+void ccmpno(const Operand& op1, const Operand& op2, int dfv) { opCcmp(op1, op2, dfv, 0x38, 1); }
+void ccmpns(const Operand& op, int imm, int dfv) { opCcmpi(op, imm, dfv, 9); }
+void ccmpns(const Operand& op1, const Operand& op2, int dfv) { opCcmp(op1, op2, dfv, 0x38, 9); }
+void ccmpnz(const Operand& op, int imm, int dfv) { opCcmpi(op, imm, dfv, 5); }
+void ccmpnz(const Operand& op1, const Operand& op2, int dfv) { opCcmp(op1, op2, dfv, 0x38, 5); }
+void ccmpo(const Operand& op, int imm, int dfv) { opCcmpi(op, imm, dfv, 0); }
+void ccmpo(const Operand& op1, const Operand& op2, int dfv) { opCcmp(op1, op2, dfv, 0x38, 0); }
+void ccmps(const Operand& op, int imm, int dfv) { opCcmpi(op, imm, dfv, 8); }
+void ccmps(const Operand& op1, const Operand& op2, int dfv) { opCcmp(op1, op2, dfv, 0x38, 8); }
+void ccmpt(const Operand& op, int imm, int dfv) { opCcmpi(op, imm, dfv, 10); }
+void ccmpt(const Operand& op1, const Operand& op2, int dfv) { opCcmp(op1, op2, dfv, 0x38, 10); }
+void ccmpz(const Operand& op, int imm, int dfv) { opCcmpi(op, imm, dfv, 4); }
+void ccmpz(const Operand& op1, const Operand& op2, int dfv) { opCcmp(op1, op2, dfv, 0x38, 4); }
void cdq() { db(0x99); }
+void cfcmovb(const Operand& op1, const Operand& op2) { opCfcmov(Reg(), op1, op2, 0x42); }
+void cfcmovb(const Reg& d, const Reg& r, const Operand& op) { opCfcmov(d|T_nf, op, r, 0x42); }
+void cfcmovbe(const Operand& op1, const Operand& op2) { opCfcmov(Reg(), op1, op2, 0x46); }
+void cfcmovbe(const Reg& d, const Reg& r, const Operand& op) { opCfcmov(d|T_nf, op, r, 0x46); }
+void cfcmovl(const Operand& op1, const Operand& op2) { opCfcmov(Reg(), op1, op2, 0x4C); }
+void cfcmovl(const Reg& d, const Reg& r, const Operand& op) { opCfcmov(d|T_nf, op, r, 0x4C); }
+void cfcmovle(const Operand& op1, const Operand& op2) { opCfcmov(Reg(), op1, op2, 0x4E); }
+void cfcmovle(const Reg& d, const Reg& r, const Operand& op) { opCfcmov(d|T_nf, op, r, 0x4E); }
+void cfcmovnb(const Operand& op1, const Operand& op2) { opCfcmov(Reg(), op1, op2, 0x43); }
+void cfcmovnb(const Reg& d, const Reg& r, const Operand& op) { opCfcmov(d|T_nf, op, r, 0x43); }
+void cfcmovnbe(const Operand& op1, const Operand& op2) { opCfcmov(Reg(), op1, op2, 0x47); }
+void cfcmovnbe(const Reg& d, const Reg& r, const Operand& op) { opCfcmov(d|T_nf, op, r, 0x47); }
+void cfcmovnl(const Operand& op1, const Operand& op2) { opCfcmov(Reg(), op1, op2, 0x4D); }
+void cfcmovnl(const Reg& d, const Reg& r, const Operand& op) { opCfcmov(d|T_nf, op, r, 0x4D); }
+void cfcmovnle(const Operand& op1, const Operand& op2) { opCfcmov(Reg(), op1, op2, 0x4F); }
+void cfcmovnle(const Reg& d, const Reg& r, const Operand& op) { opCfcmov(d|T_nf, op, r, 0x4F); }
+void cfcmovno(const Operand& op1, const Operand& op2) { opCfcmov(Reg(), op1, op2, 0x41); }
+void cfcmovno(const Reg& d, const Reg& r, const Operand& op) { opCfcmov(d|T_nf, op, r, 0x41); }
+void cfcmovnp(const Operand& op1, const Operand& op2) { opCfcmov(Reg(), op1, op2, 0x4B); }
+void cfcmovnp(const Reg& d, const Reg& r, const Operand& op) { opCfcmov(d|T_nf, op, r, 0x4B); }
+void cfcmovns(const Operand& op1, const Operand& op2) { opCfcmov(Reg(), op1, op2, 0x49); }
+void cfcmovns(const Reg& d, const Reg& r, const Operand& op) { opCfcmov(d|T_nf, op, r, 0x49); }
+void cfcmovnz(const Operand& op1, const Operand& op2) { opCfcmov(Reg(), op1, op2, 0x45); }
+void cfcmovnz(const Reg& d, const Reg& r, const Operand& op) { opCfcmov(d|T_nf, op, r, 0x45); }
+void cfcmovo(const Operand& op1, const Operand& op2) { opCfcmov(Reg(), op1, op2, 0x40); }
+void cfcmovo(const Reg& d, const Reg& r, const Operand& op) { opCfcmov(d|T_nf, op, r, 0x40); }
+void cfcmovp(const Operand& op1, const Operand& op2) { opCfcmov(Reg(), op1, op2, 0x4A); }
+void cfcmovp(const Reg& d, const Reg& r, const Operand& op) { opCfcmov(d|T_nf, op, r, 0x4A); }
+void cfcmovs(const Operand& op1, const Operand& op2) { opCfcmov(Reg(), op1, op2, 0x48); }
+void cfcmovs(const Reg& d, const Reg& r, const Operand& op) { opCfcmov(d|T_nf, op, r, 0x48); }
+void cfcmovz(const Operand& op1, const Operand& op2) { opCfcmov(Reg(), op1, op2, 0x44); }
+void cfcmovz(const Reg& d, const Reg& r, const Operand& op) { opCfcmov(d|T_nf, op, r, 0x44); }
void clc() { db(0xF8); }
void cld() { db(0xFC); }
-void cldemote(const Address& addr) { opMIB(addr, eax, 0x0F, 0x1C); }
-void clflush(const Address& addr) { opModM(addr, Reg32(7), 0x0F, 0xAE); }
-void clflushopt(const Address& addr) { db(0x66); opModM(addr, Reg32(7), 0x0F, 0xAE); }
+void cldemote(const Address& addr) { opMR(addr, eax, T_0F, 0x1C); }
+void clflush(const Address& addr) { opMR(addr, Reg32(7), T_0F, 0xAE); }
+void clflushopt(const Address& addr) { opMR(addr, Reg32(7), T_66 | T_0F, 0xAE); }
void cli() { db(0xFA); }
-void clwb(const Address& addr) { db(0x66); opMIB(addr, esi, 0x0F, 0xAE); }
+void clwb(const Address& addr) { opMR(addr, esi, T_66 | T_0F, 0xAE); }
void clzero() { db(0x0F); db(0x01); db(0xFC); }
void cmc() { db(0xF5); }
-void cmova(const Reg& reg, const Operand& op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0x40 | 7); }//-V524
-void cmovae(const Reg& reg, const Operand& op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0x40 | 3); }//-V524
-void cmovb(const Reg& reg, const Operand& op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0x40 | 2); }//-V524
-void cmovbe(const Reg& reg, const Operand& op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0x40 | 6); }//-V524
-void cmovc(const Reg& reg, const Operand& op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0x40 | 2); }//-V524
-void cmove(const Reg& reg, const Operand& op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0x40 | 4); }//-V524
-void cmovg(const Reg& reg, const Operand& op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0x40 | 15); }//-V524
-void cmovge(const Reg& reg, const Operand& op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0x40 | 13); }//-V524
-void cmovl(const Reg& reg, const Operand& op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0x40 | 12); }//-V524
-void cmovle(const Reg& reg, const Operand& op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0x40 | 14); }//-V524
-void cmovna(const Reg& reg, const Operand& op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0x40 | 6); }//-V524
-void cmovnae(const Reg& reg, const Operand& op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0x40 | 2); }//-V524
-void cmovnb(const Reg& reg, const Operand& op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0x40 | 3); }//-V524
-void cmovnbe(const Reg& reg, const Operand& op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0x40 | 7); }//-V524
-void cmovnc(const Reg& reg, const Operand& op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0x40 | 3); }//-V524
-void cmovne(const Reg& reg, const Operand& op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0x40 | 5); }//-V524
-void cmovng(const Reg& reg, const Operand& op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0x40 | 14); }//-V524
-void cmovnge(const Reg& reg, const Operand& op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0x40 | 12); }//-V524
-void cmovnl(const Reg& reg, const Operand& op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0x40 | 13); }//-V524
-void cmovnle(const Reg& reg, const Operand& op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0x40 | 15); }//-V524
-void cmovno(const Reg& reg, const Operand& op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0x40 | 1); }//-V524
-void cmovnp(const Reg& reg, const Operand& op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0x40 | 11); }//-V524
-void cmovns(const Reg& reg, const Operand& op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0x40 | 9); }//-V524
-void cmovnz(const Reg& reg, const Operand& op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0x40 | 5); }//-V524
-void cmovo(const Reg& reg, const Operand& op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0x40 | 0); }//-V524
-void cmovp(const Reg& reg, const Operand& op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0x40 | 10); }//-V524
-void cmovpe(const Reg& reg, const Operand& op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0x40 | 10); }//-V524
-void cmovpo(const Reg& reg, const Operand& op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0x40 | 11); }//-V524
-void cmovs(const Reg& reg, const Operand& op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0x40 | 8); }//-V524
-void cmovz(const Reg& reg, const Operand& op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0x40 | 4); }//-V524
-void cmp(const Operand& op, uint32_t imm) { opRM_I(op, imm, 0x38, 7); }
-void cmp(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x38); }
+void cmova(const Reg& d, const Reg& reg, const Operand& op) { opROO(d, op, reg, T_APX|T_ND1, 0x40 | 7); }//-V524
+void cmova(const Reg& reg, const Operand& op) { opRO(reg, op, T_0F, 0x40 | 7, op.isREG(16|i32e)); }//-V524
+void cmovae(const Reg& d, const Reg& reg, const Operand& op) { opROO(d, op, reg, T_APX|T_ND1, 0x40 | 3); }//-V524
+void cmovae(const Reg& reg, const Operand& op) { opRO(reg, op, T_0F, 0x40 | 3, op.isREG(16|i32e)); }//-V524
+void cmovb(const Reg& d, const Reg& reg, const Operand& op) { opROO(d, op, reg, T_APX|T_ND1, 0x40 | 2); }//-V524
+void cmovb(const Reg& reg, const Operand& op) { opRO(reg, op, T_0F, 0x40 | 2, op.isREG(16|i32e)); }//-V524
+void cmovbe(const Reg& d, const Reg& reg, const Operand& op) { opROO(d, op, reg, T_APX|T_ND1, 0x40 | 6); }//-V524
+void cmovbe(const Reg& reg, const Operand& op) { opRO(reg, op, T_0F, 0x40 | 6, op.isREG(16|i32e)); }//-V524
+void cmovc(const Reg& d, const Reg& reg, const Operand& op) { opROO(d, op, reg, T_APX|T_ND1, 0x40 | 2); }//-V524
+void cmovc(const Reg& reg, const Operand& op) { opRO(reg, op, T_0F, 0x40 | 2, op.isREG(16|i32e)); }//-V524
+void cmove(const Reg& d, const Reg& reg, const Operand& op) { opROO(d, op, reg, T_APX|T_ND1, 0x40 | 4); }//-V524
+void cmove(const Reg& reg, const Operand& op) { opRO(reg, op, T_0F, 0x40 | 4, op.isREG(16|i32e)); }//-V524
+void cmovg(const Reg& d, const Reg& reg, const Operand& op) { opROO(d, op, reg, T_APX|T_ND1, 0x40 | 15); }//-V524
+void cmovg(const Reg& reg, const Operand& op) { opRO(reg, op, T_0F, 0x40 | 15, op.isREG(16|i32e)); }//-V524
+void cmovge(const Reg& d, const Reg& reg, const Operand& op) { opROO(d, op, reg, T_APX|T_ND1, 0x40 | 13); }//-V524
+void cmovge(const Reg& reg, const Operand& op) { opRO(reg, op, T_0F, 0x40 | 13, op.isREG(16|i32e)); }//-V524
+void cmovl(const Reg& d, const Reg& reg, const Operand& op) { opROO(d, op, reg, T_APX|T_ND1, 0x40 | 12); }//-V524
+void cmovl(const Reg& reg, const Operand& op) { opRO(reg, op, T_0F, 0x40 | 12, op.isREG(16|i32e)); }//-V524
+void cmovle(const Reg& d, const Reg& reg, const Operand& op) { opROO(d, op, reg, T_APX|T_ND1, 0x40 | 14); }//-V524
+void cmovle(const Reg& reg, const Operand& op) { opRO(reg, op, T_0F, 0x40 | 14, op.isREG(16|i32e)); }//-V524
+void cmovna(const Reg& d, const Reg& reg, const Operand& op) { opROO(d, op, reg, T_APX|T_ND1, 0x40 | 6); }//-V524
+void cmovna(const Reg& reg, const Operand& op) { opRO(reg, op, T_0F, 0x40 | 6, op.isREG(16|i32e)); }//-V524
+void cmovnae(const Reg& d, const Reg& reg, const Operand& op) { opROO(d, op, reg, T_APX|T_ND1, 0x40 | 2); }//-V524
+void cmovnae(const Reg& reg, const Operand& op) { opRO(reg, op, T_0F, 0x40 | 2, op.isREG(16|i32e)); }//-V524
+void cmovnb(const Reg& d, const Reg& reg, const Operand& op) { opROO(d, op, reg, T_APX|T_ND1, 0x40 | 3); }//-V524
+void cmovnb(const Reg& reg, const Operand& op) { opRO(reg, op, T_0F, 0x40 | 3, op.isREG(16|i32e)); }//-V524
+void cmovnbe(const Reg& d, const Reg& reg, const Operand& op) { opROO(d, op, reg, T_APX|T_ND1, 0x40 | 7); }//-V524
+void cmovnbe(const Reg& reg, const Operand& op) { opRO(reg, op, T_0F, 0x40 | 7, op.isREG(16|i32e)); }//-V524
+void cmovnc(const Reg& d, const Reg& reg, const Operand& op) { opROO(d, op, reg, T_APX|T_ND1, 0x40 | 3); }//-V524
+void cmovnc(const Reg& reg, const Operand& op) { opRO(reg, op, T_0F, 0x40 | 3, op.isREG(16|i32e)); }//-V524
+void cmovne(const Reg& d, const Reg& reg, const Operand& op) { opROO(d, op, reg, T_APX|T_ND1, 0x40 | 5); }//-V524
+void cmovne(const Reg& reg, const Operand& op) { opRO(reg, op, T_0F, 0x40 | 5, op.isREG(16|i32e)); }//-V524
+void cmovng(const Reg& d, const Reg& reg, const Operand& op) { opROO(d, op, reg, T_APX|T_ND1, 0x40 | 14); }//-V524
+void cmovng(const Reg& reg, const Operand& op) { opRO(reg, op, T_0F, 0x40 | 14, op.isREG(16|i32e)); }//-V524
+void cmovnge(const Reg& d, const Reg& reg, const Operand& op) { opROO(d, op, reg, T_APX|T_ND1, 0x40 | 12); }//-V524
+void cmovnge(const Reg& reg, const Operand& op) { opRO(reg, op, T_0F, 0x40 | 12, op.isREG(16|i32e)); }//-V524
+void cmovnl(const Reg& d, const Reg& reg, const Operand& op) { opROO(d, op, reg, T_APX|T_ND1, 0x40 | 13); }//-V524
+void cmovnl(const Reg& reg, const Operand& op) { opRO(reg, op, T_0F, 0x40 | 13, op.isREG(16|i32e)); }//-V524
+void cmovnle(const Reg& d, const Reg& reg, const Operand& op) { opROO(d, op, reg, T_APX|T_ND1, 0x40 | 15); }//-V524
+void cmovnle(const Reg& reg, const Operand& op) { opRO(reg, op, T_0F, 0x40 | 15, op.isREG(16|i32e)); }//-V524
+void cmovno(const Reg& d, const Reg& reg, const Operand& op) { opROO(d, op, reg, T_APX|T_ND1, 0x40 | 1); }//-V524
+void cmovno(const Reg& reg, const Operand& op) { opRO(reg, op, T_0F, 0x40 | 1, op.isREG(16|i32e)); }//-V524
+void cmovnp(const Reg& d, const Reg& reg, const Operand& op) { opROO(d, op, reg, T_APX|T_ND1, 0x40 | 11); }//-V524
+void cmovnp(const Reg& reg, const Operand& op) { opRO(reg, op, T_0F, 0x40 | 11, op.isREG(16|i32e)); }//-V524
+void cmovns(const Reg& d, const Reg& reg, const Operand& op) { opROO(d, op, reg, T_APX|T_ND1, 0x40 | 9); }//-V524
+void cmovns(const Reg& reg, const Operand& op) { opRO(reg, op, T_0F, 0x40 | 9, op.isREG(16|i32e)); }//-V524
+void cmovnz(const Reg& d, const Reg& reg, const Operand& op) { opROO(d, op, reg, T_APX|T_ND1, 0x40 | 5); }//-V524
+void cmovnz(const Reg& reg, const Operand& op) { opRO(reg, op, T_0F, 0x40 | 5, op.isREG(16|i32e)); }//-V524
+void cmovo(const Reg& d, const Reg& reg, const Operand& op) { opROO(d, op, reg, T_APX|T_ND1, 0x40 | 0); }//-V524
+void cmovo(const Reg& reg, const Operand& op) { opRO(reg, op, T_0F, 0x40 | 0, op.isREG(16|i32e)); }//-V524
+void cmovp(const Reg& d, const Reg& reg, const Operand& op) { opROO(d, op, reg, T_APX|T_ND1, 0x40 | 10); }//-V524
+void cmovp(const Reg& reg, const Operand& op) { opRO(reg, op, T_0F, 0x40 | 10, op.isREG(16|i32e)); }//-V524
+void cmovpe(const Reg& d, const Reg& reg, const Operand& op) { opROO(d, op, reg, T_APX|T_ND1, 0x40 | 10); }//-V524
+void cmovpe(const Reg& reg, const Operand& op) { opRO(reg, op, T_0F, 0x40 | 10, op.isREG(16|i32e)); }//-V524
+void cmovpo(const Reg& d, const Reg& reg, const Operand& op) { opROO(d, op, reg, T_APX|T_ND1, 0x40 | 11); }//-V524
+void cmovpo(const Reg& reg, const Operand& op) { opRO(reg, op, T_0F, 0x40 | 11, op.isREG(16|i32e)); }//-V524
+void cmovs(const Reg& d, const Reg& reg, const Operand& op) { opROO(d, op, reg, T_APX|T_ND1, 0x40 | 8); }//-V524
+void cmovs(const Reg& reg, const Operand& op) { opRO(reg, op, T_0F, 0x40 | 8, op.isREG(16|i32e)); }//-V524
+void cmovz(const Reg& d, const Reg& reg, const Operand& op) { opROO(d, op, reg, T_APX|T_ND1, 0x40 | 4); }//-V524
+void cmovz(const Reg& reg, const Operand& op) { opRO(reg, op, T_0F, 0x40 | 4, op.isREG(16|i32e)); }//-V524
+void cmp(const Operand& op, uint32_t imm) { opOI(op, imm, 0x38, 7); }
+void cmp(const Operand& op1, const Operand& op2) { opRO_MR(op1, op2, 0x38); }
void cmpeqpd(const Xmm& x, const Operand& op) { cmppd(x, op, 0); }
void cmpeqps(const Xmm& x, const Operand& op) { cmpps(x, op, 0); }
void cmpeqsd(const Xmm& x, const Operand& op) { cmpsd(x, op, 0); }
@@ -128,55 +254,112 @@ void cmpordpd(const Xmm& x, const Operand& op) { cmppd(x, op, 7); }
void cmpordps(const Xmm& x, const Operand& op) { cmpps(x, op, 7); }
void cmpordsd(const Xmm& x, const Operand& op) { cmpsd(x, op, 7); }
void cmpordss(const Xmm& x, const Operand& op) { cmpss(x, op, 7); }
-void cmppd(const Xmm& xmm, const Operand& op, uint8_t imm8) { opGen(xmm, op, 0xC2, 0x66, isXMM_XMMorMEM, imm8); }
-void cmpps(const Xmm& xmm, const Operand& op, uint8_t imm8) { opGen(xmm, op, 0xC2, 0x100, isXMM_XMMorMEM, imm8); }
+void cmppd(const Xmm& xmm, const Operand& op, uint8_t imm8) { opSSE(xmm, op, T_0F | T_66, 0xC2, isXMM_XMMorMEM, imm8); }
+void cmpps(const Xmm& xmm, const Operand& op, uint8_t imm8) { opSSE(xmm, op, T_0F, 0xC2, isXMM_XMMorMEM, imm8); }
void cmpsb() { db(0xA6); }
void cmpsd() { db(0xA7); }
-void cmpsd(const Xmm& xmm, const Operand& op, uint8_t imm8) { opGen(xmm, op, 0xC2, 0xF2, isXMM_XMMorMEM, imm8); }
-void cmpss(const Xmm& xmm, const Operand& op, uint8_t imm8) { opGen(xmm, op, 0xC2, 0xF3, isXMM_XMMorMEM, imm8); }
+void cmpsd(const Xmm& xmm, const Operand& op, uint8_t imm8) { opSSE(xmm, op, T_0F | T_F2, 0xC2, isXMM_XMMorMEM, imm8); }
+void cmpss(const Xmm& xmm, const Operand& op, uint8_t imm8) { opSSE(xmm, op, T_0F | T_F3, 0xC2, isXMM_XMMorMEM, imm8); }
void cmpsw() { db(0x66); db(0xA7); }
void cmpunordpd(const Xmm& x, const Operand& op) { cmppd(x, op, 3); }
void cmpunordps(const Xmm& x, const Operand& op) { cmpps(x, op, 3); }
void cmpunordsd(const Xmm& x, const Operand& op) { cmpsd(x, op, 3); }
void cmpunordss(const Xmm& x, const Operand& op) { cmpss(x, op, 3); }
-void cmpxchg(const Operand& op, const Reg& reg) { opModRM(reg, op, (op.isREG() && reg.isREG() && op.getBit() == reg.getBit()), op.isMEM(), 0x0F, 0xB0 | (reg.isBit(8) ? 0 : 1)); }
-void cmpxchg8b(const Address& addr) { opModM(addr, Reg32(1), 0x0F, 0xC7); }
-void comisd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x2F, 0x66, isXMM_XMMorMEM); }
-void comiss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x2F, 0x100, isXMM_XMMorMEM); }
+void cmpxchg(const Operand& op, const Reg& reg) { opRO(reg, op, T_0F, 0xB0 | (reg.isBit(8) ? 0 : 1), op.getBit() == reg.getBit()); }
+void cmpxchg8b(const Address& addr) { opMR(addr, Reg32(1), T_0F, 0xC7); }
+void comisd(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_66|T_0F, 0x2F, isXMM_XMMorMEM); }
+void comiss(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F, 0x2F, isXMM_XMMorMEM); }
void cpuid() { db(0x0F); db(0xA2); }
-void crc32(const Reg32e& reg, const Operand& op) { if (reg.isBit(32) && op.isBit(16)) db(0x66); db(0xF2); opModRM(reg, op, op.isREG(), op.isMEM(), 0x0F, 0x38, 0xF0 | (op.isBit(8) ? 0 : 1)); }
-void cvtdq2pd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xE6, 0xF3, isXMM_XMMorMEM); }
-void cvtdq2ps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5B, 0x100, isXMM_XMMorMEM); }
-void cvtpd2dq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xE6, 0xF2, isXMM_XMMorMEM); }
-void cvtpd2pi(const Operand& reg, const Operand& op) { opGen(reg, op, 0x2D, 0x66, isMMX_XMMorMEM); }
-void cvtpd2ps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5A, 0x66, isXMM_XMMorMEM); }
-void cvtpi2pd(const Operand& reg, const Operand& op) { opGen(reg, op, 0x2A, 0x66, isXMM_MMXorMEM); }
-void cvtpi2ps(const Operand& reg, const Operand& op) { opGen(reg, op, 0x2A, 0x100, isXMM_MMXorMEM); }
-void cvtps2dq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5B, 0x66, isXMM_XMMorMEM); }
-void cvtps2pd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5A, 0x100, isXMM_XMMorMEM); }
-void cvtps2pi(const Operand& reg, const Operand& op) { opGen(reg, op, 0x2D, 0x100, isMMX_XMMorMEM); }
-void cvtsd2si(const Operand& reg, const Operand& op) { opGen(reg, op, 0x2D, 0xF2, isREG32_XMMorMEM); }
-void cvtsd2ss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5A, 0xF2, isXMM_XMMorMEM); }
-void cvtsi2sd(const Operand& reg, const Operand& op) { opGen(reg, op, 0x2A, 0xF2, isXMM_REG32orMEM); }
-void cvtsi2ss(const Operand& reg, const Operand& op) { opGen(reg, op, 0x2A, 0xF3, isXMM_REG32orMEM); }
-void cvtss2sd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5A, 0xF3, isXMM_XMMorMEM); }
-void cvtss2si(const Operand& reg, const Operand& op) { opGen(reg, op, 0x2D, 0xF3, isREG32_XMMorMEM); }
-void cvttpd2dq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xE6, 0x66, isXMM_XMMorMEM); }
-void cvttpd2pi(const Operand& reg, const Operand& op) { opGen(reg, op, 0x2C, 0x66, isMMX_XMMorMEM); }
-void cvttps2dq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5B, 0xF3, isXMM_XMMorMEM); }
-void cvttps2pi(const Operand& reg, const Operand& op) { opGen(reg, op, 0x2C, 0x100, isMMX_XMMorMEM); }
-void cvttsd2si(const Operand& reg, const Operand& op) { opGen(reg, op, 0x2C, 0xF2, isREG32_XMMorMEM); }
-void cvttss2si(const Operand& reg, const Operand& op) { opGen(reg, op, 0x2C, 0xF3, isREG32_XMMorMEM); }
+void crc32(const Reg32e& r, const Operand& op) { if (!((r.isBit(32) && op.isBit(8|16|32)) || (r.isBit(64) && op.isBit(8|64)))) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) int code = 0xF0 | (op.isBit(8) ? 0 : 1); uint64_t type = op.isBit(16) ? T_66:0; if (opROO(Reg(), op, static_cast<const Reg&>(r), T_APX|type, code)) return; opRO(r, op, T_F2|T_0F38|type, code); }
+void ctesta(const Operand& op, const Reg& r, int dfv) { opCcmp(op, r, dfv, 0x84, 7); }
+void ctesta(const Operand& op, int imm, int dfv) { opTesti(op, imm, dfv, 7); }
+void ctestae(const Operand& op, const Reg& r, int dfv) { opCcmp(op, r, dfv, 0x84, 3); }
+void ctestae(const Operand& op, int imm, int dfv) { opTesti(op, imm, dfv, 3); }
+void ctestb(const Operand& op, const Reg& r, int dfv) { opCcmp(op, r, dfv, 0x84, 2); }
+void ctestb(const Operand& op, int imm, int dfv) { opTesti(op, imm, dfv, 2); }
+void ctestbe(const Operand& op, const Reg& r, int dfv) { opCcmp(op, r, dfv, 0x84, 6); }
+void ctestbe(const Operand& op, int imm, int dfv) { opTesti(op, imm, dfv, 6); }
+void ctestc(const Operand& op, const Reg& r, int dfv) { opCcmp(op, r, dfv, 0x84, 2); }
+void ctestc(const Operand& op, int imm, int dfv) { opTesti(op, imm, dfv, 2); }
+void cteste(const Operand& op, const Reg& r, int dfv) { opCcmp(op, r, dfv, 0x84, 4); }
+void cteste(const Operand& op, int imm, int dfv) { opTesti(op, imm, dfv, 4); }
+void ctestf(const Operand& op, const Reg& r, int dfv) { opCcmp(op, r, dfv, 0x84, 11); }
+void ctestf(const Operand& op, int imm, int dfv) { opTesti(op, imm, dfv, 11); }
+void ctestg(const Operand& op, const Reg& r, int dfv) { opCcmp(op, r, dfv, 0x84, 15); }
+void ctestg(const Operand& op, int imm, int dfv) { opTesti(op, imm, dfv, 15); }
+void ctestge(const Operand& op, const Reg& r, int dfv) { opCcmp(op, r, dfv, 0x84, 13); }
+void ctestge(const Operand& op, int imm, int dfv) { opTesti(op, imm, dfv, 13); }
+void ctestl(const Operand& op, const Reg& r, int dfv) { opCcmp(op, r, dfv, 0x84, 12); }
+void ctestl(const Operand& op, int imm, int dfv) { opTesti(op, imm, dfv, 12); }
+void ctestle(const Operand& op, const Reg& r, int dfv) { opCcmp(op, r, dfv, 0x84, 14); }
+void ctestle(const Operand& op, int imm, int dfv) { opTesti(op, imm, dfv, 14); }
+void ctestna(const Operand& op, const Reg& r, int dfv) { opCcmp(op, r, dfv, 0x84, 6); }
+void ctestna(const Operand& op, int imm, int dfv) { opTesti(op, imm, dfv, 6); }
+void ctestnae(const Operand& op, const Reg& r, int dfv) { opCcmp(op, r, dfv, 0x84, 2); }
+void ctestnae(const Operand& op, int imm, int dfv) { opTesti(op, imm, dfv, 2); }
+void ctestnb(const Operand& op, const Reg& r, int dfv) { opCcmp(op, r, dfv, 0x84, 3); }
+void ctestnb(const Operand& op, int imm, int dfv) { opTesti(op, imm, dfv, 3); }
+void ctestnbe(const Operand& op, const Reg& r, int dfv) { opCcmp(op, r, dfv, 0x84, 7); }
+void ctestnbe(const Operand& op, int imm, int dfv) { opTesti(op, imm, dfv, 7); }
+void ctestnc(const Operand& op, const Reg& r, int dfv) { opCcmp(op, r, dfv, 0x84, 3); }
+void ctestnc(const Operand& op, int imm, int dfv) { opTesti(op, imm, dfv, 3); }
+void ctestne(const Operand& op, const Reg& r, int dfv) { opCcmp(op, r, dfv, 0x84, 5); }
+void ctestne(const Operand& op, int imm, int dfv) { opTesti(op, imm, dfv, 5); }
+void ctestng(const Operand& op, const Reg& r, int dfv) { opCcmp(op, r, dfv, 0x84, 14); }
+void ctestng(const Operand& op, int imm, int dfv) { opTesti(op, imm, dfv, 14); }
+void ctestnge(const Operand& op, const Reg& r, int dfv) { opCcmp(op, r, dfv, 0x84, 12); }
+void ctestnge(const Operand& op, int imm, int dfv) { opTesti(op, imm, dfv, 12); }
+void ctestnl(const Operand& op, const Reg& r, int dfv) { opCcmp(op, r, dfv, 0x84, 13); }
+void ctestnl(const Operand& op, int imm, int dfv) { opTesti(op, imm, dfv, 13); }
+void ctestnle(const Operand& op, const Reg& r, int dfv) { opCcmp(op, r, dfv, 0x84, 15); }
+void ctestnle(const Operand& op, int imm, int dfv) { opTesti(op, imm, dfv, 15); }
+void ctestno(const Operand& op, const Reg& r, int dfv) { opCcmp(op, r, dfv, 0x84, 1); }
+void ctestno(const Operand& op, int imm, int dfv) { opTesti(op, imm, dfv, 1); }
+void ctestns(const Operand& op, const Reg& r, int dfv) { opCcmp(op, r, dfv, 0x84, 9); }
+void ctestns(const Operand& op, int imm, int dfv) { opTesti(op, imm, dfv, 9); }
+void ctestnz(const Operand& op, const Reg& r, int dfv) { opCcmp(op, r, dfv, 0x84, 5); }
+void ctestnz(const Operand& op, int imm, int dfv) { opTesti(op, imm, dfv, 5); }
+void ctesto(const Operand& op, const Reg& r, int dfv) { opCcmp(op, r, dfv, 0x84, 0); }
+void ctesto(const Operand& op, int imm, int dfv) { opTesti(op, imm, dfv, 0); }
+void ctests(const Operand& op, const Reg& r, int dfv) { opCcmp(op, r, dfv, 0x84, 8); }
+void ctests(const Operand& op, int imm, int dfv) { opTesti(op, imm, dfv, 8); }
+void ctestt(const Operand& op, const Reg& r, int dfv) { opCcmp(op, r, dfv, 0x84, 10); }
+void ctestt(const Operand& op, int imm, int dfv) { opTesti(op, imm, dfv, 10); }
+void ctestz(const Operand& op, const Reg& r, int dfv) { opCcmp(op, r, dfv, 0x84, 4); }
+void ctestz(const Operand& op, int imm, int dfv) { opTesti(op, imm, dfv, 4); }
+void cvtdq2pd(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_F3|T_0F, 0xE6, isXMM_XMMorMEM); }
+void cvtdq2ps(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F, 0x5B, isXMM_XMMorMEM); }
+void cvtpd2dq(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_F2|T_0F, 0xE6, isXMM_XMMorMEM); }
+void cvtpd2pi(const Reg& reg, const Operand& op) { opSSE(reg, op, T_66|T_0F, 0x2D, isMMX_XMMorMEM); }
+void cvtpd2ps(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_66|T_0F, 0x5A, isXMM_XMMorMEM); }
+void cvtpi2pd(const Reg& reg, const Operand& op) { opSSE(reg, op, T_66|T_0F, 0x2A, isXMM_MMXorMEM); }
+void cvtpi2ps(const Reg& reg, const Operand& op) { opSSE(reg, op, T_0F, 0x2A, isXMM_MMXorMEM); }
+void cvtps2dq(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_66|T_0F, 0x5B, isXMM_XMMorMEM); }
+void cvtps2pd(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F, 0x5A, isXMM_XMMorMEM); }
+void cvtps2pi(const Reg& reg, const Operand& op) { opSSE(reg, op, T_0F, 0x2D, isMMX_XMMorMEM); }
+void cvtsd2si(const Reg& reg, const Operand& op) { opSSE(reg, op, T_F2|T_0F, 0x2D, isREG32_XMMorMEM); }
+void cvtsd2ss(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_F2|T_0F, 0x5A, isXMM_XMMorMEM); }
+void cvtsi2sd(const Reg& reg, const Operand& op) { opSSE(reg, op, T_F2|T_0F, 0x2A, isXMM_REG32orMEM); }
+void cvtsi2ss(const Reg& reg, const Operand& op) { opSSE(reg, op, T_F3|T_0F, 0x2A, isXMM_REG32orMEM); }
+void cvtss2sd(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_F3|T_0F, 0x5A, isXMM_XMMorMEM); }
+void cvtss2si(const Reg& reg, const Operand& op) { opSSE(reg, op, T_F3|T_0F, 0x2D, isREG32_XMMorMEM); }
+void cvttpd2dq(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_66|T_0F, 0xE6, isXMM_XMMorMEM); }
+void cvttpd2pi(const Reg& reg, const Operand& op) { opSSE(reg, op, T_66|T_0F, 0x2C, isMMX_XMMorMEM); }
+void cvttps2dq(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_F3|T_0F, 0x5B, isXMM_XMMorMEM); }
+void cvttps2pi(const Reg& reg, const Operand& op) { opSSE(reg, op, T_0F, 0x2C, isMMX_XMMorMEM); }
+void cvttsd2si(const Reg& reg, const Operand& op) { opSSE(reg, op, T_F2|T_0F, 0x2C, isREG32_XMMorMEM); }
+void cvttss2si(const Reg& reg, const Operand& op) { opSSE(reg, op, T_F3|T_0F, 0x2C, isREG32_XMMorMEM); }
void cwd() { db(0x66); db(0x99); }
void cwde() { db(0x98); }
-void dec(const Operand& op) { opIncDec(op, 0x48, 1); }
-void div(const Operand& op) { opR_ModM(op, 0, 6, 0xF6); }
-void divpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5E, 0x66, isXMM_XMMorMEM); }
-void divps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5E, 0x100, isXMM_XMMorMEM); }
-void divsd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5E, 0xF2, isXMM_XMMorMEM); }
-void divss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5E, 0xF3, isXMM_XMMorMEM); }
-void dppd(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x41, 0x66, isXMM_XMMorMEM, static_cast<uint8_t>(imm), 0x3A); }
-void dpps(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x40, 0x66, isXMM_XMMorMEM, static_cast<uint8_t>(imm), 0x3A); }
+void dec(const Operand& op) { opIncDec(Reg(), op, 1); }
+void dec(const Reg& d, const Operand& op) { opIncDec(d, op, 1); }
+void div(const Operand& op) { opRext(op, 0, 6, T_APX|T_NF|T_CODE1_IF1, 0xF6); }
+void divpd(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F | T_66, 0x5E, isXMM_XMMorMEM); }
+void divps(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F, 0x5E, isXMM_XMMorMEM); }
+void divsd(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F | T_F2, 0x5E, isXMM_XMMorMEM); }
+void divss(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F | T_F3, 0x5E, isXMM_XMMorMEM); }
+void dppd(const Xmm& xmm, const Operand& op, int imm) { opSSE(xmm, op, T_66 | T_0F3A, 0x41, isXMM_XMMorMEM, static_cast<uint8_t>(imm)); }
+void dpps(const Xmm& xmm, const Operand& op, int imm) { opSSE(xmm, op, T_66 | T_0F3A, 0x40, isXMM_XMMorMEM, static_cast<uint8_t>(imm)); }
void emms() { db(0x0F); db(0x77); }
void endbr32() { db(0xF3); db(0x0F); db(0x1E); db(0xFB); }
void endbr64() { db(0xF3); db(0x0F); db(0x1E); db(0xFA); }
@@ -190,8 +373,8 @@ void fadd(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xD8C0, 0xDCC
void faddp() { db(0xDE); db(0xC1); }
void faddp(const Fpu& reg1) { opFpuFpu(reg1, st0, 0x0000, 0xDEC0); }
void faddp(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0x0000, 0xDEC0); }
-void fbld(const Address& addr) { opModM(addr, Reg32(4), 0xDF, 0x100); }
-void fbstp(const Address& addr) { opModM(addr, Reg32(6), 0xDF, 0x100); }
+void fbld(const Address& addr) { opMR(addr, Reg32(4), 0, 0xDF); }
+void fbstp(const Address& addr) { opMR(addr, Reg32(6), 0, 0xDF); }
void fchs() { db(0xD9); db(0xE0); }
void fclex() { db(0x9B); db(0xDB); db(0xE2); }
void fcmovb(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xDAC0, 0x00C0); }
@@ -253,8 +436,8 @@ void fisubr(const Address& addr) { opFpuMem(addr, 0xDE, 0xDA, 0x00, 5, 0); }
void fld(const Address& addr) { opFpuMem(addr, 0x00, 0xD9, 0xDD, 0, 0); }
void fld(const Fpu& reg) { opFpu(reg, 0xD9, 0xC0); }
void fld1() { db(0xD9); db(0xE8); }
-void fldcw(const Address& addr) { opModM(addr, Reg32(5), 0xD9, 0x100); }
-void fldenv(const Address& addr) { opModM(addr, Reg32(4), 0xD9, 0x100); }
+void fldcw(const Address& addr) { opMR(addr, Reg32(5), 0, 0xD9); }
+void fldenv(const Address& addr) { opMR(addr, Reg32(4), 0, 0xD9); }
void fldl2e() { db(0xD9); db(0xEA); }
void fldl2t() { db(0xD9); db(0xE9); }
void fldlg2() { db(0xD9); db(0xEC); }
@@ -270,29 +453,29 @@ void fmulp(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0x0000, 0xDE
void fnclex() { db(0xDB); db(0xE2); }
void fninit() { db(0xDB); db(0xE3); }
void fnop() { db(0xD9); db(0xD0); }
-void fnsave(const Address& addr) { opModM(addr, Reg32(6), 0xDD, 0x100); }
-void fnstcw(const Address& addr) { opModM(addr, Reg32(7), 0xD9, 0x100); }
-void fnstenv(const Address& addr) { opModM(addr, Reg32(6), 0xD9, 0x100); }
-void fnstsw(const Address& addr) { opModM(addr, Reg32(7), 0xDD, 0x100); }
+void fnsave(const Address& addr) { opMR(addr, Reg32(6), 0, 0xDD); }
+void fnstcw(const Address& addr) { opMR(addr, Reg32(7), 0, 0xD9); }
+void fnstenv(const Address& addr) { opMR(addr, Reg32(6), 0, 0xD9); }
+void fnstsw(const Address& addr) { opMR(addr, Reg32(7), 0, 0xDD); }
void fnstsw(const Reg16& r) { if (r.getIdx() != Operand::AX) XBYAK_THROW(ERR_BAD_PARAMETER) db(0xDF); db(0xE0); }
void fpatan() { db(0xD9); db(0xF3); }
void fprem() { db(0xD9); db(0xF8); }
void fprem1() { db(0xD9); db(0xF5); }
void fptan() { db(0xD9); db(0xF2); }
void frndint() { db(0xD9); db(0xFC); }
-void frstor(const Address& addr) { opModM(addr, Reg32(4), 0xDD, 0x100); }
-void fsave(const Address& addr) { db(0x9B); opModM(addr, Reg32(6), 0xDD, 0x100); }
+void frstor(const Address& addr) { opMR(addr, Reg32(4), 0, 0xDD); }
+void fsave(const Address& addr) { db(0x9B); opMR(addr, Reg32(6), 0, 0xDD); }
void fscale() { db(0xD9); db(0xFD); }
void fsin() { db(0xD9); db(0xFE); }
void fsincos() { db(0xD9); db(0xFB); }
void fsqrt() { db(0xD9); db(0xFA); }
void fst(const Address& addr) { opFpuMem(addr, 0x00, 0xD9, 0xDD, 2, 0); }
void fst(const Fpu& reg) { opFpu(reg, 0xDD, 0xD0); }
-void fstcw(const Address& addr) { db(0x9B); opModM(addr, Reg32(7), 0xD9, 0x100); }
-void fstenv(const Address& addr) { db(0x9B); opModM(addr, Reg32(6), 0xD9, 0x100); }
+void fstcw(const Address& addr) { db(0x9B); opMR(addr, Reg32(7), 0, 0xD9); }
+void fstenv(const Address& addr) { db(0x9B); opMR(addr, Reg32(6), 0, 0xD9); }
void fstp(const Address& addr) { opFpuMem(addr, 0x00, 0xD9, 0xDD, 3, 0); }
void fstp(const Fpu& reg) { opFpu(reg, 0xDD, 0xD8); }
-void fstsw(const Address& addr) { db(0x9B); opModM(addr, Reg32(7), 0xDD, 0x100); }
+void fstsw(const Address& addr) { db(0x9B); opMR(addr, Reg32(7), 0, 0xDD); }
void fstsw(const Reg16& r) { if (r.getIdx() != Operand::AX) XBYAK_THROW(ERR_BAD_PARAMETER) db(0x9B); db(0xDF); db(0xE0); }
void fsub(const Address& addr) { opFpuMem(addr, 0x00, 0xD8, 0xDC, 4, 0); }
void fsub(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xD8E0, 0xDCE8); }
@@ -320,24 +503,26 @@ void fwait() { db(0x9B); }
void fxam() { db(0xD9); db(0xE5); }
void fxch() { db(0xD9); db(0xC9); }
void fxch(const Fpu& reg) { opFpu(reg, 0xD9, 0xC8); }
-void fxrstor(const Address& addr) { opModM(addr, Reg32(1), 0x0F, 0xAE); }
+void fxrstor(const Address& addr) { opMR(addr, Reg32(1), T_0F, 0xAE); }
void fxtract() { db(0xD9); db(0xF4); }
void fyl2x() { db(0xD9); db(0xF1); }
void fyl2xp1() { db(0xD9); db(0xF9); }
-void gf2p8affineinvqb(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0xCF, 0x66, isXMM_XMMorMEM, static_cast<uint8_t>(imm), 0x3A); }
-void gf2p8affineqb(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0xCE, 0x66, isXMM_XMMorMEM, static_cast<uint8_t>(imm), 0x3A); }
-void gf2p8mulb(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xCF, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
-void haddpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x7C, 0x66, isXMM_XMMorMEM); }
-void haddps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x7C, 0xF2, isXMM_XMMorMEM); }
+void gf2p8affineinvqb(const Xmm& xmm, const Operand& op, int imm) { opSSE(xmm, op, T_66 | T_0F3A, 0xCF, isXMM_XMMorMEM, static_cast<uint8_t>(imm)); }
+void gf2p8affineqb(const Xmm& xmm, const Operand& op, int imm) { opSSE(xmm, op, T_66 | T_0F3A, 0xCE, isXMM_XMMorMEM, static_cast<uint8_t>(imm)); }
+void gf2p8mulb(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_66 | T_0F38, 0xCF, isXMM_XMMorMEM); }
+void haddpd(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_66|T_0F|T_YMM, 0x7C, isXMM_XMMorMEM); }
+void haddps(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_F2|T_0F|T_YMM, 0x7C, isXMM_XMMorMEM); }
void hlt() { db(0xF4); }
-void hsubpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x7D, 0x66, isXMM_XMMorMEM); }
-void hsubps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x7D, 0xF2, isXMM_XMMorMEM); }
-void idiv(const Operand& op) { opR_ModM(op, 0, 7, 0xF6); }
-void imul(const Operand& op) { opR_ModM(op, 0, 5, 0xF6); }
+void hsubpd(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_66|T_0F|T_YMM, 0x7D, isXMM_XMMorMEM); }
+void hsubps(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_F2|T_0F|T_YMM, 0x7D, isXMM_XMMorMEM); }
+void idiv(const Operand& op) { opRext(op, 0, 7, T_APX|T_NF|T_CODE1_IF1, 0xF6); }
+void imul(const Operand& op) { opRext(op, 0, 5, T_APX|T_NF|T_CODE1_IF1, 0xF6); }
+void imul(const Reg& reg, const Operand& op) { if (opROO(Reg(), op, reg, T_APX|T_NF, 0xAF)) return; opRO(reg, op, T_0F, 0xAF, reg.getKind() == op.getKind()); }
void in_(const Reg& a, const Reg& d) { opInOut(a, d, 0xEC); }
void in_(const Reg& a, uint8_t v) { opInOut(a, 0xE4, v); }
-void inc(const Operand& op) { opIncDec(op, 0x40, 0); }
-void insertps(const Xmm& xmm, const Operand& op, uint8_t imm) { opGen(xmm, op, 0x21, 0x66, isXMM_XMMorMEM, imm, 0x3A); }
+void inc(const Operand& op) { opIncDec(Reg(), op, 0); }
+void inc(const Reg& d, const Operand& op) { opIncDec(d, op, 0); }
+void insertps(const Xmm& xmm, const Operand& op, uint8_t imm) { opSSE(xmm, op, T_66 | T_0F3A, 0x21, isXMM_XMMorMEM, imm); }
void int3() { db(0xCC); }
void int_(uint8_t x) { db(0xCD); db(x); }
void ja(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x77, 0x87, 0x0F); }//-V524
@@ -461,13 +646,13 @@ void jz(const char *label, LabelType type = T_AUTO) { jz(std::string(label), typ
void jz(const void *addr) { opJmpAbs(addr, T_NEAR, 0x74, 0x84, 0x0F); }//-V524
void jz(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x74, 0x84, 0x0F); }//-V524
void lahf() { db(0x9F); }
-void lddqu(const Xmm& xmm, const Address& addr) { db(0xF2); opModM(addr, xmm, 0x0F, 0xF0); }
-void ldmxcsr(const Address& addr) { opModM(addr, Reg32(2), 0x0F, 0xAE); }
-void lea(const Reg& reg, const Address& addr) { if (!reg.isBit(16 | i32e)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) opModM(addr, reg, 0x8D); }
+void lddqu(const Xmm& xmm, const Address& addr) { opMR(addr, xmm, T_F2 | T_0F, 0xF0); }
+void ldmxcsr(const Address& addr) { opMR(addr, Reg32(2), T_0F, 0xAE); }
+void lea(const Reg& reg, const Address& addr) { if (!reg.isBit(16 | i32e)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) opMR(addr, reg, 0, 0x8D); }
void leave() { db(0xC9); }
void lfence() { db(0x0F); db(0xAE); db(0xE8); }
-void lfs(const Reg& reg, const Address& addr) { opLoadSeg(addr, reg, 0x0F, 0xB4); }
-void lgs(const Reg& reg, const Address& addr) { opLoadSeg(addr, reg, 0x0F, 0xB5); }
+void lfs(const Reg& reg, const Address& addr) { opLoadSeg(addr, reg, T_0F, 0xB4); }
+void lgs(const Reg& reg, const Address& addr) { opLoadSeg(addr, reg, T_0F, 0xB5); }
void lock() { db(0xF0); }
void lodsb() { db(0xAC); }
void lodsd() { db(0xAD); }
@@ -481,97 +666,101 @@ void loope(std::string label) { opJmp(label, T_SHORT, 0xE1, 0, 0); }
void loopne(const Label& label) { opJmp(label, T_SHORT, 0xE0, 0, 0); }
void loopne(const char *label) { loopne(std::string(label)); }
void loopne(std::string label) { opJmp(label, T_SHORT, 0xE0, 0, 0); }
-void lss(const Reg& reg, const Address& addr) { opLoadSeg(addr, reg, 0x0F, 0xB2); }
-void lzcnt(const Reg&reg, const Operand& op) { opSp1(reg, op, 0xF3, 0x0F, 0xBD); }
-void maskmovdqu(const Xmm& reg1, const Xmm& reg2) { db(0x66); opModR(reg1, reg2, 0x0F, 0xF7); }
-void maskmovq(const Mmx& reg1, const Mmx& reg2) { if (!reg1.isMMX() || !reg2.isMMX()) XBYAK_THROW(ERR_BAD_COMBINATION) opModR(reg1, reg2, 0x0F, 0xF7); }
-void maxpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5F, 0x66, isXMM_XMMorMEM); }
-void maxps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5F, 0x100, isXMM_XMMorMEM); }
-void maxsd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5F, 0xF2, isXMM_XMMorMEM); }
-void maxss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5F, 0xF3, isXMM_XMMorMEM); }
+void lss(const Reg& reg, const Address& addr) { opLoadSeg(addr, reg, T_0F, 0xB2); }
+void lzcnt(const Reg&reg, const Operand& op) { if (opROO(Reg(), op, reg, T_APX|T_NF, 0xF5)) return; opCnt(reg, op, 0xBD); }
+void maskmovdqu(const Xmm& reg1, const Xmm& reg2) { opRR(reg1, reg2, T_66|T_0F, 0xF7); }
+void maskmovq(const Mmx& reg1, const Mmx& reg2) { if (!reg1.isMMX() || !reg2.isMMX()) XBYAK_THROW(ERR_BAD_COMBINATION) opRR(reg1, reg2, T_0F, 0xF7); }
+void maxpd(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F | T_66, 0x5F, isXMM_XMMorMEM); }
+void maxps(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F, 0x5F, isXMM_XMMorMEM); }
+void maxsd(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F | T_F2, 0x5F, isXMM_XMMorMEM); }
+void maxss(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F | T_F3, 0x5F, isXMM_XMMorMEM); }
void mfence() { db(0x0F); db(0xAE); db(0xF0); }
-void minpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5D, 0x66, isXMM_XMMorMEM); }
-void minps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5D, 0x100, isXMM_XMMorMEM); }
-void minsd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5D, 0xF2, isXMM_XMMorMEM); }
-void minss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5D, 0xF3, isXMM_XMMorMEM); }
+void minpd(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F | T_66, 0x5D, isXMM_XMMorMEM); }
+void minps(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F, 0x5D, isXMM_XMMorMEM); }
+void minsd(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F | T_F2, 0x5D, isXMM_XMMorMEM); }
+void minss(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F | T_F3, 0x5D, isXMM_XMMorMEM); }
void monitor() { db(0x0F); db(0x01); db(0xC8); }
void monitorx() { db(0x0F); db(0x01); db(0xFA); }
-void movapd(const Address& addr, const Xmm& xmm) { db(0x66); opModM(addr, xmm, 0x0F, 0x29); }
-void movapd(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x28, 0x66); }
-void movaps(const Address& addr, const Xmm& xmm) { opModM(addr, xmm, 0x0F, 0x29); }
-void movaps(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x28, 0x100); }
-void movbe(const Address& addr, const Reg& reg) { opModM(addr, reg, 0x0F, 0x38, 0xF1); }
-void movbe(const Reg& reg, const Address& addr) { opModM(addr, reg, 0x0F, 0x38, 0xF0); }
-void movd(const Address& addr, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opModM(addr, mmx, 0x0F, 0x7E); }
-void movd(const Mmx& mmx, const Address& addr) { if (mmx.isXMM()) db(0x66); opModM(addr, mmx, 0x0F, 0x6E); }
-void movd(const Mmx& mmx, const Reg32& reg) { if (mmx.isXMM()) db(0x66); opModR(mmx, reg, 0x0F, 0x6E); }
-void movd(const Reg32& reg, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opModR(mmx, reg, 0x0F, 0x7E); }
-void movddup(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x12, 0xF2, isXMM_XMMorMEM, NONE, NONE); }
-void movdir64b(const Reg& reg, const Address& addr) { db(0x66); opModM(addr, reg.cvt32(), 0x0F, 0x38, 0xF8); }
-void movdiri(const Address& addr, const Reg32e& reg) { opModM(addr, reg, 0x0F, 0x38, 0xF9); }
-void movdq2q(const Mmx& mmx, const Xmm& xmm) { db(0xF2); opModR(mmx, xmm, 0x0F, 0xD6); }
-void movdqa(const Address& addr, const Xmm& xmm) { db(0x66); opModM(addr, xmm, 0x0F, 0x7F); }
-void movdqa(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x6F, 0x66); }
-void movdqu(const Address& addr, const Xmm& xmm) { db(0xF3); opModM(addr, xmm, 0x0F, 0x7F); }
-void movdqu(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x6F, 0xF3); }
-void movhlps(const Xmm& reg1, const Xmm& reg2) { opModR(reg1, reg2, 0x0F, 0x12); }
-void movhpd(const Operand& op1, const Operand& op2) { opMovXMM(op1, op2, 0x16, 0x66); }
-void movhps(const Operand& op1, const Operand& op2) { opMovXMM(op1, op2, 0x16, 0x100); }
-void movlhps(const Xmm& reg1, const Xmm& reg2) { opModR(reg1, reg2, 0x0F, 0x16); }
-void movlpd(const Operand& op1, const Operand& op2) { opMovXMM(op1, op2, 0x12, 0x66); }
-void movlps(const Operand& op1, const Operand& op2) { opMovXMM(op1, op2, 0x12, 0x100); }
+void movapd(const Address& addr, const Xmm& xmm) { opMR(addr, xmm, T_0F|T_66, 0x29); }
+void movapd(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x28, T_0F, T_66); }
+void movaps(const Address& addr, const Xmm& xmm) { opMR(addr, xmm, T_0F|T_NONE, 0x29); }
+void movaps(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x28, T_0F, T_NONE); }
+void movbe(const Address& addr, const Reg& reg) { if (opROO(Reg(), addr, reg, T_APX, 0x61)) return; opMR(addr, reg, T_0F38, 0xF1); }
+void movbe(const Reg& reg, const Address& addr) { if (opROO(Reg(), addr, reg, T_APX, 0x60)) return; opMR(addr, reg, T_0F38, 0xF0); }
+void movd(const Address& addr, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opMR(addr, mmx, T_0F, 0x7E); }
+void movd(const Mmx& mmx, const Address& addr) { if (mmx.isXMM()) db(0x66); opMR(addr, mmx, T_0F, 0x6E); }
+void movd(const Mmx& mmx, const Reg32& reg) { if (mmx.isXMM()) db(0x66); opRR(mmx, reg, T_0F, 0x6E); }
+void movd(const Reg32& reg, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opRR(mmx, reg, T_0F, 0x7E); }
+void movddup(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_DUP|T_F2|T_0F|T_EW1|T_YMM|T_EVEX|T_ER_X|T_ER_Y|T_ER_Z, 0x12, isXMM_XMMorMEM, NONE); }
+void movdir64b(const Reg& reg, const Address& addr) { if (opROO(Reg(), addr, reg.cvt32(), T_APX|T_66, 0xF8)) return; opMR(addr, reg.cvt32(), T_66 | T_0F38, 0xF8); }
+void movdiri(const Address& addr, const Reg32e& reg) { if (opROO(Reg(), addr, reg, T_APX, 0xF9)) return; opMR(addr, reg, T_0F38, 0xF9); }
+void movdq2q(const Mmx& mmx, const Xmm& xmm) { opRR(mmx, xmm, T_F2 | T_0F, 0xD6); }
+void movdqa(const Address& addr, const Xmm& xmm) { opMR(addr, xmm, T_0F|T_66, 0x7F); }
+void movdqa(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x6F, T_0F, T_66); }
+void movdqu(const Address& addr, const Xmm& xmm) { opMR(addr, xmm, T_0F|T_F3, 0x7F); }
+void movdqu(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x6F, T_0F, T_F3); }
+void movhlps(const Xmm& reg1, const Xmm& reg2) { opRR(reg1, reg2, T_0F, 0x12); }
+void movhpd(const Operand& op1, const Operand& op2) { opMovXMM(op1, op2, T_66|T_0F, 0x16); }
+void movhps(const Operand& op1, const Operand& op2) { opMovXMM(op1, op2, T_0F, 0x16); }
+void movlhps(const Xmm& reg1, const Xmm& reg2) { opRR(reg1, reg2, T_0F, 0x16); }
+void movlpd(const Operand& op1, const Operand& op2) { opMovXMM(op1, op2, T_66|T_0F, 0x12); }
+void movlps(const Operand& op1, const Operand& op2) { opMovXMM(op1, op2, T_0F, 0x12); }
void movmskpd(const Reg32e& reg, const Xmm& xmm) { db(0x66); movmskps(reg, xmm); }
-void movmskps(const Reg32e& reg, const Xmm& xmm) { opModR(reg, xmm, 0x0F, 0x50); }
-void movntdq(const Address& addr, const Xmm& reg) { opModM(addr, Reg16(reg.getIdx()), 0x0F, 0xE7); }
-void movntdqa(const Xmm& xmm, const Address& addr) { db(0x66); opModM(addr, xmm, 0x0F, 0x38, 0x2A); }
-void movnti(const Address& addr, const Reg32e& reg) { opModM(addr, reg, 0x0F, 0xC3); }
-void movntpd(const Address& addr, const Xmm& reg) { opModM(addr, Reg16(reg.getIdx()), 0x0F, 0x2B); }
-void movntps(const Address& addr, const Xmm& xmm) { opModM(addr, Mmx(xmm.getIdx()), 0x0F, 0x2B); }
-void movntq(const Address& addr, const Mmx& mmx) { if (!mmx.isMMX()) XBYAK_THROW(ERR_BAD_COMBINATION) opModM(addr, mmx, 0x0F, 0xE7); }
-void movq(const Address& addr, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opModM(addr, mmx, 0x0F, mmx.isXMM() ? 0xD6 : 0x7F); }
-void movq(const Mmx& mmx, const Operand& op) { if (mmx.isXMM()) db(0xF3); opModRM(mmx, op, (mmx.getKind() == op.getKind()), op.isMEM(), 0x0F, mmx.isXMM() ? 0x7E : 0x6F); }
-void movq2dq(const Xmm& xmm, const Mmx& mmx) { db(0xF3); opModR(xmm, mmx, 0x0F, 0xD6); }
+void movmskps(const Reg32e& reg, const Xmm& xmm) { opRR(reg, xmm, T_0F, 0x50); }
+void movntdq(const Address& addr, const Xmm& reg) { opMR(addr, Reg16(reg.getIdx()), T_0F, 0xE7); }
+void movntdqa(const Xmm& xmm, const Address& addr) { opMR(addr, xmm, T_66 | T_0F38, 0x2A); }
+void movnti(const Address& addr, const Reg32e& reg) { opMR(addr, reg, T_0F, 0xC3); }
+void movntpd(const Address& addr, const Xmm& reg) { opMR(addr, Reg16(reg.getIdx()), T_0F, 0x2B); }
+void movntps(const Address& addr, const Xmm& xmm) { opMR(addr, Mmx(xmm.getIdx()), T_0F, 0x2B); }
+void movntq(const Address& addr, const Mmx& mmx) { if (!mmx.isMMX()) XBYAK_THROW(ERR_BAD_COMBINATION) opMR(addr, mmx, T_0F, 0xE7); }
+void movq(const Address& addr, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opMR(addr, mmx, T_0F, mmx.isXMM() ? 0xD6 : 0x7F); }
+void movq(const Mmx& mmx, const Operand& op) { if (mmx.isXMM()) db(0xF3); opRO(mmx, op, T_0F, mmx.isXMM() ? 0x7E : 0x6F, mmx.getKind() == op.getKind()); }
+void movq2dq(const Xmm& xmm, const Mmx& mmx) { opRR(xmm, mmx, T_F3 | T_0F, 0xD6); }
void movsb() { db(0xA4); }
void movsd() { db(0xA5); }
-void movsd(const Address& addr, const Xmm& xmm) { db(0xF2); opModM(addr, xmm, 0x0F, 0x11); }
-void movsd(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x10, 0xF2); }
-void movshdup(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x16, 0xF3, isXMM_XMMorMEM, NONE, NONE); }
-void movsldup(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x12, 0xF3, isXMM_XMMorMEM, NONE, NONE); }
-void movss(const Address& addr, const Xmm& xmm) { db(0xF3); opModM(addr, xmm, 0x0F, 0x11); }
-void movss(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x10, 0xF3); }
+void movsd(const Address& addr, const Xmm& xmm) { opMR(addr, xmm, T_0F|T_F2, 0x11); }
+void movsd(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x10, T_0F, T_F2); }
+void movshdup(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_F3|T_0F|T_EW0|T_YMM|T_EVEX, 0x16, isXMM_XMMorMEM, NONE); }
+void movsldup(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_F3|T_0F|T_EW0|T_YMM|T_EVEX, 0x12, isXMM_XMMorMEM, NONE); }
+void movss(const Address& addr, const Xmm& xmm) { opMR(addr, xmm, T_0F|T_F3, 0x11); }
+void movss(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x10, T_0F, T_F3); }
void movsw() { db(0x66); db(0xA5); }
void movsx(const Reg& reg, const Operand& op) { opMovxx(reg, op, 0xBE); }
-void movupd(const Address& addr, const Xmm& xmm) { db(0x66); opModM(addr, xmm, 0x0F, 0x11); }
-void movupd(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x10, 0x66); }
-void movups(const Address& addr, const Xmm& xmm) { opModM(addr, xmm, 0x0F, 0x11); }
-void movups(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x10, 0x100); }
+void movupd(const Address& addr, const Xmm& xmm) { opMR(addr, xmm, T_0F|T_66, 0x11); }
+void movupd(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x10, T_0F, T_66); }
+void movups(const Address& addr, const Xmm& xmm) { opMR(addr, xmm, T_0F|T_NONE, 0x11); }
+void movups(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x10, T_0F, T_NONE); }
void movzx(const Reg& reg, const Operand& op) { opMovxx(reg, op, 0xB6); }
-void mpsadbw(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x42, 0x66, isXMM_XMMorMEM, static_cast<uint8_t>(imm), 0x3A); }
-void mul(const Operand& op) { opR_ModM(op, 0, 4, 0xF6); }
-void mulpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x59, 0x66, isXMM_XMMorMEM); }
-void mulps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x59, 0x100, isXMM_XMMorMEM); }
-void mulsd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x59, 0xF2, isXMM_XMMorMEM); }
-void mulss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x59, 0xF3, isXMM_XMMorMEM); }
-void mulx(const Reg32e& r1, const Reg32e& r2, const Operand& op) { opGpr(r1, r2, op, T_F2 | T_0F38, 0xf6, true); }
+void mpsadbw(const Xmm& xmm, const Operand& op, int imm) { opSSE(xmm, op, T_66 | T_0F3A, 0x42, isXMM_XMMorMEM, static_cast<uint8_t>(imm)); }
+void mul(const Operand& op) { opRext(op, 0, 4, T_APX|T_NF|T_CODE1_IF1, 0xF6); }
+void mulpd(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F | T_66, 0x59, isXMM_XMMorMEM); }
+void mulps(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F, 0x59, isXMM_XMMorMEM); }
+void mulsd(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F | T_F2, 0x59, isXMM_XMMorMEM); }
+void mulss(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F | T_F3, 0x59, isXMM_XMMorMEM); }
+void mulx(const Reg32e& r1, const Reg32e& r2, const Operand& op) { opRRO(r1, r2, op, T_APX|T_F2|T_0F38, 0xf6); }
void mwait() { db(0x0F); db(0x01); db(0xC9); }
void mwaitx() { db(0x0F); db(0x01); db(0xFB); }
-void neg(const Operand& op) { opR_ModM(op, 0, 3, 0xF6); }
-void not_(const Operand& op) { opR_ModM(op, 0, 2, 0xF6); }
-void or_(const Operand& op, uint32_t imm) { opRM_I(op, imm, 0x08, 1); }
-void or_(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x08); }
-void orpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x56, 0x66, isXMM_XMMorMEM); }
-void orps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x56, 0x100, isXMM_XMMorMEM); }
+void neg(const Operand& op) { opRext(op, 0, 3, T_APX|T_NF|T_CODE1_IF1, 0xF6); }
+void neg(const Reg& d, const Operand& op) { opROO(d, op, Reg(3, Operand::REG, d.getBit()), T_APX|T_NF|T_CODE1_IF1|T_ND1, 0xF6); }
+void not_(const Operand& op) { opRext(op, 0, 2, T_APX|T_CODE1_IF1, 0xF6); }
+void not_(const Reg& d, const Operand& op) { opROO(d, op, Reg(2, Operand::REG, d.getBit()), T_APX|T_CODE1_IF1|T_ND1, 0xF6); }
+void or_(const Operand& op, uint32_t imm) { opOI(op, imm, 0x08, 1); }
+void or_(const Operand& op1, const Operand& op2) { opRO_MR(op1, op2, 0x08); }
+void or_(const Reg& d, const Operand& op, uint32_t imm) { opROI(d, op, imm, T_NF|T_CODE1_IF1, 1); }
+void or_(const Reg& d, const Operand& op1, const Operand& op2) { opROO(d, op1, op2, T_NF|T_CODE1_IF1, 0x08); }
+void orpd(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F | T_66, 0x56, isXMM_XMMorMEM); }
+void orps(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F, 0x56, isXMM_XMMorMEM); }
void out_(const Reg& d, const Reg& a) { opInOut(a, d, 0xEE); }
void out_(uint8_t v, const Reg& a) { opInOut(a, 0xE6, v); }
void outsb() { db(0x6E); }
void outsd() { db(0x6F); }
void outsw() { db(0x66); db(0x6F); }
-void pabsb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x1C, 0x66, NONE, 0x38); }
-void pabsd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x1E, 0x66, NONE, 0x38); }
-void pabsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x1D, 0x66, NONE, 0x38); }
+void pabsb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x1C, T_0F38, T_66); }
+void pabsd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x1E, T_0F38, T_66); }
+void pabsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x1D, T_0F38, T_66); }
void packssdw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x6B); }
void packsswb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x63); }
-void packusdw(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x2B, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
+void packusdw(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_66 | T_0F38, 0x2B, isXMM_XMMorMEM); }
void packuswb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x67); }
void paddb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xFC); }
void paddd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xFE); }
@@ -581,100 +770,100 @@ void paddsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xED); }
void paddusb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xDC); }
void paddusw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xDD); }
void paddw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xFD); }
-void palignr(const Mmx& mmx, const Operand& op, int imm) { opMMX(mmx, op, 0x0f, 0x66, static_cast<uint8_t>(imm), 0x3a); }
+void palignr(const Mmx& mmx, const Operand& op, int imm) { opMMX(mmx, op, 0x0F, T_0F3A, T_66, static_cast<uint8_t>(imm)); }
void pand(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xDB); }
void pandn(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xDF); }
void pause() { db(0xF3); db(0x90); }
void pavgb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xE0); }
void pavgw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xE3); }
-void pblendvb(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x10, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
-void pblendw(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x0E, 0x66, isXMM_XMMorMEM, static_cast<uint8_t>(imm), 0x3A); }
+void pblendvb(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_66|T_0F38, 0x10, isXMM_XMMorMEM, NONE); }
+void pblendw(const Xmm& xmm, const Operand& op, int imm) { opSSE(xmm, op, T_66 | T_0F3A, 0x0E, isXMM_XMMorMEM, static_cast<uint8_t>(imm)); }
void pclmulhqhqdq(const Xmm& xmm, const Operand& op) { pclmulqdq(xmm, op, 0x11); }
void pclmulhqlqdq(const Xmm& xmm, const Operand& op) { pclmulqdq(xmm, op, 0x01); }
void pclmullqhqdq(const Xmm& xmm, const Operand& op) { pclmulqdq(xmm, op, 0x10); }
void pclmullqlqdq(const Xmm& xmm, const Operand& op) { pclmulqdq(xmm, op, 0x00); }
-void pclmulqdq(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x44, 0x66, isXMM_XMMorMEM, static_cast<uint8_t>(imm), 0x3A); }
+void pclmulqdq(const Xmm& xmm, const Operand& op, int imm) { opSSE(xmm, op, T_66 | T_0F3A, 0x44, isXMM_XMMorMEM, static_cast<uint8_t>(imm)); }
void pcmpeqb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x74); }
void pcmpeqd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x76); }
-void pcmpeqq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x29, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
+void pcmpeqq(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_66 | T_0F38, 0x29, isXMM_XMMorMEM); }
void pcmpeqw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x75); }
-void pcmpestri(const Xmm& xmm, const Operand& op, uint8_t imm) { opGen(xmm, op, 0x61, 0x66, isXMM_XMMorMEM, imm, 0x3A); }
-void pcmpestrm(const Xmm& xmm, const Operand& op, uint8_t imm) { opGen(xmm, op, 0x60, 0x66, isXMM_XMMorMEM, imm, 0x3A); }
+void pcmpestri(const Xmm& xmm, const Operand& op, uint8_t imm) { opSSE(xmm, op, T_66|T_0F3A, 0x61, isXMM_XMMorMEM, imm); }
+void pcmpestrm(const Xmm& xmm, const Operand& op, uint8_t imm) { opSSE(xmm, op, T_66|T_0F3A, 0x60, isXMM_XMMorMEM, imm); }
void pcmpgtb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x64); }
void pcmpgtd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x66); }
-void pcmpgtq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x37, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
+void pcmpgtq(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_66 | T_0F38, 0x37, isXMM_XMMorMEM); }
void pcmpgtw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x65); }
-void pcmpistri(const Xmm& xmm, const Operand& op, uint8_t imm) { opGen(xmm, op, 0x63, 0x66, isXMM_XMMorMEM, imm, 0x3A); }
-void pcmpistrm(const Xmm& xmm, const Operand& op, uint8_t imm) { opGen(xmm, op, 0x62, 0x66, isXMM_XMMorMEM, imm, 0x3A); }
-void pdep(const Reg32e& r1, const Reg32e& r2, const Operand& op) { opGpr(r1, r2, op, T_F2 | T_0F38, 0xf5, true); }
-void pext(const Reg32e& r1, const Reg32e& r2, const Operand& op) { opGpr(r1, r2, op, T_F3 | T_0F38, 0xf5, true); }
+void pcmpistri(const Xmm& xmm, const Operand& op, uint8_t imm) { opSSE(xmm, op, T_66|T_0F3A, 0x63, isXMM_XMMorMEM, imm); }
+void pcmpistrm(const Xmm& xmm, const Operand& op, uint8_t imm) { opSSE(xmm, op, T_66|T_0F3A, 0x62, isXMM_XMMorMEM, imm); }
+void pdep(const Reg32e& r1, const Reg32e& r2, const Operand& op) { opRRO(r1, r2, op, T_APX|T_F2|T_0F38, 0xf5); }
+void pext(const Reg32e& r1, const Reg32e& r2, const Operand& op) { opRRO(r1, r2, op, T_APX|T_F3|T_0F38, 0xf5); }
void pextrb(const Operand& op, const Xmm& xmm, uint8_t imm) { opExt(op, xmm, 0x14, imm); }
void pextrd(const Operand& op, const Xmm& xmm, uint8_t imm) { opExt(op, xmm, 0x16, imm); }
void pextrw(const Operand& op, const Mmx& xmm, uint8_t imm) { opExt(op, xmm, 0x15, imm, true); }
-void phaddd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x02, 0x66, NONE, 0x38); }
-void phaddsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x03, 0x66, NONE, 0x38); }
-void phaddw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x01, 0x66, NONE, 0x38); }
-void phminposuw(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x41, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
-void phsubd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x06, 0x66, NONE, 0x38); }
-void phsubsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x07, 0x66, NONE, 0x38); }
-void phsubw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x05, 0x66, NONE, 0x38); }
-void pinsrb(const Xmm& xmm, const Operand& op, uint8_t imm) { opGen(xmm, op, 0x20, 0x66, isXMM_REG32orMEM, imm, 0x3A); }
-void pinsrd(const Xmm& xmm, const Operand& op, uint8_t imm) { opGen(xmm, op, 0x22, 0x66, isXMM_REG32orMEM, imm, 0x3A); }
-void pinsrw(const Mmx& mmx, const Operand& op, int imm) { if (!op.isREG(32) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opGen(mmx, op, 0xC4, mmx.isXMM() ? 0x66 : NONE, 0, imm); }
-void pmaddubsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x04, 0x66, NONE, 0x38); }
+void phaddd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x02, T_0F38, T_66); }
+void phaddsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x03, T_0F38, T_66); }
+void phaddw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x01, T_0F38, T_66); }
+void phminposuw(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_66|T_0F38, 0x41, isXMM_XMMorMEM, NONE); }
+void phsubd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x06, T_0F38, T_66); }
+void phsubsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x07, T_0F38, T_66); }
+void phsubw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x05, T_0F38, T_66); }
+void pinsrb(const Xmm& xmm, const Operand& op, uint8_t imm) { opSSE(xmm, op, T_66 | T_0F3A, 0x20, isXMM_REG32orMEM, imm); }
+void pinsrd(const Xmm& xmm, const Operand& op, uint8_t imm) { opSSE(xmm, op, T_66 | T_0F3A, 0x22, isXMM_REG32orMEM, imm); }
+void pinsrw(const Mmx& mmx, const Operand& op, int imm) { if (!op.isREG(32) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opSSE(mmx, op, T_0F | (mmx.isXMM() ? T_66 : T_NONE), 0xC4, 0, imm); }
+void pmaddubsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x04, T_0F38, T_66); }
void pmaddwd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xF5); }
-void pmaxsb(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x3C, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
-void pmaxsd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x3D, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
+void pmaxsb(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_66 | T_0F38, 0x3C, isXMM_XMMorMEM); }
+void pmaxsd(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_66 | T_0F38, 0x3D, isXMM_XMMorMEM); }
void pmaxsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xEE); }
void pmaxub(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xDE); }
-void pmaxud(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x3F, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
-void pmaxuw(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x3E, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
-void pminsb(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x38, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
-void pminsd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x39, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
+void pmaxud(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_66 | T_0F38, 0x3F, isXMM_XMMorMEM); }
+void pmaxuw(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_66 | T_0F38, 0x3E, isXMM_XMMorMEM); }
+void pminsb(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_66 | T_0F38, 0x38, isXMM_XMMorMEM); }
+void pminsd(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_66 | T_0F38, 0x39, isXMM_XMMorMEM); }
void pminsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xEA); }
void pminub(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xDA); }
-void pminud(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x3B, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
-void pminuw(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x3A, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
-void pmovmskb(const Reg32e& reg, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opModR(reg, mmx, 0x0F, 0xD7); }
-void pmovsxbd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x21, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
-void pmovsxbq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x22, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
-void pmovsxbw(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x20, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
-void pmovsxdq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x25, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
-void pmovsxwd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x23, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
-void pmovsxwq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x24, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
-void pmovzxbd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x31, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
-void pmovzxbq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x32, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
-void pmovzxbw(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x30, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
-void pmovzxdq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x35, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
-void pmovzxwd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x33, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
-void pmovzxwq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x34, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
-void pmuldq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x28, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
-void pmulhrsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x0B, 0x66, NONE, 0x38); }
+void pminud(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_66 | T_0F38, 0x3B, isXMM_XMMorMEM); }
+void pminuw(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_66 | T_0F38, 0x3A, isXMM_XMMorMEM); }
+void pmovmskb(const Reg32e& reg, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opRR(reg, mmx, T_0F, 0xD7); }
+void pmovsxbd(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_N4|T_N_VL|T_66|T_0F38|T_YMM|T_EVEX, 0x21, isXMM_XMMorMEM, NONE); }
+void pmovsxbq(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_N2|T_N_VL|T_66|T_0F38|T_YMM|T_EVEX, 0x22, isXMM_XMMorMEM, NONE); }
+void pmovsxbw(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_N8|T_N_VL|T_66|T_0F38|T_YMM|T_EVEX, 0x20, isXMM_XMMorMEM, NONE); }
+void pmovsxdq(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_N8|T_N_VL|T_66|T_0F38|T_EW0|T_YMM|T_EVEX, 0x25, isXMM_XMMorMEM, NONE); }
+void pmovsxwd(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_N8|T_N_VL|T_66|T_0F38|T_YMM|T_EVEX, 0x23, isXMM_XMMorMEM, NONE); }
+void pmovsxwq(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_N4|T_N_VL|T_66|T_0F38|T_YMM|T_EVEX, 0x24, isXMM_XMMorMEM, NONE); }
+void pmovzxbd(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_N4|T_N_VL|T_66|T_0F38|T_YMM|T_EVEX, 0x31, isXMM_XMMorMEM, NONE); }
+void pmovzxbq(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_N2|T_N_VL|T_66|T_0F38|T_YMM|T_EVEX, 0x32, isXMM_XMMorMEM, NONE); }
+void pmovzxbw(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_N8|T_N_VL|T_66|T_0F38|T_YMM|T_EVEX, 0x30, isXMM_XMMorMEM, NONE); }
+void pmovzxdq(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_N8|T_N_VL|T_66|T_0F38|T_EW0|T_YMM|T_EVEX, 0x35, isXMM_XMMorMEM, NONE); }
+void pmovzxwd(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_N8|T_N_VL|T_66|T_0F38|T_YMM|T_EVEX, 0x33, isXMM_XMMorMEM, NONE); }
+void pmovzxwq(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_N4|T_N_VL|T_66|T_0F38|T_YMM|T_EVEX, 0x34, isXMM_XMMorMEM, NONE); }
+void pmuldq(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_66 | T_0F38, 0x28, isXMM_XMMorMEM); }
+void pmulhrsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x0B, T_0F38, T_66); }
void pmulhuw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xE4); }
void pmulhw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xE5); }
-void pmulld(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x40, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
+void pmulld(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_66 | T_0F38, 0x40, isXMM_XMMorMEM); }
void pmullw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xD5); }
void pmuludq(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xF4); }
-void popcnt(const Reg&reg, const Operand& op) { opSp1(reg, op, 0xF3, 0x0F, 0xB8); }
+void popcnt(const Reg&reg, const Operand& op) { opCnt(reg, op, 0xB8); }
void popf() { db(0x9D); }
void por(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xEB); }
-void prefetchit0(const Address& addr) { opModM(addr, Reg32(7), 0x0F, 0x18); }
-void prefetchit1(const Address& addr) { opModM(addr, Reg32(6), 0x0F, 0x18); }
-void prefetchnta(const Address& addr) { opModM(addr, Reg32(0), 0x0F, 0x18); }
-void prefetcht0(const Address& addr) { opModM(addr, Reg32(1), 0x0F, 0x18); }
-void prefetcht1(const Address& addr) { opModM(addr, Reg32(2), 0x0F, 0x18); }
-void prefetcht2(const Address& addr) { opModM(addr, Reg32(3), 0x0F, 0x18); }
-void prefetchw(const Address& addr) { opModM(addr, Reg32(1), 0x0F, 0x0D); }
-void prefetchwt1(const Address& addr) { opModM(addr, Reg32(2), 0x0F, 0x0D); }
+void prefetchit0(const Address& addr) { opMR(addr, Reg32(7), T_0F, 0x18); }
+void prefetchit1(const Address& addr) { opMR(addr, Reg32(6), T_0F, 0x18); }
+void prefetchnta(const Address& addr) { opMR(addr, Reg32(0), T_0F, 0x18); }
+void prefetcht0(const Address& addr) { opMR(addr, Reg32(1), T_0F, 0x18); }
+void prefetcht1(const Address& addr) { opMR(addr, Reg32(2), T_0F, 0x18); }
+void prefetcht2(const Address& addr) { opMR(addr, Reg32(3), T_0F, 0x18); }
+void prefetchw(const Address& addr) { opMR(addr, Reg32(1), T_0F, 0x0D); }
+void prefetchwt1(const Address& addr) { opMR(addr, Reg32(2), T_0F, 0x0D); }
void psadbw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xF6); }
-void pshufb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x00, 0x66, NONE, 0x38); }
-void pshufd(const Mmx& mmx, const Operand& op, uint8_t imm8) { opMMX(mmx, op, 0x70, 0x66, imm8); }
-void pshufhw(const Mmx& mmx, const Operand& op, uint8_t imm8) { opMMX(mmx, op, 0x70, 0xF3, imm8); }
-void pshuflw(const Mmx& mmx, const Operand& op, uint8_t imm8) { opMMX(mmx, op, 0x70, 0xF2, imm8); }
-void pshufw(const Mmx& mmx, const Operand& op, uint8_t imm8) { opMMX(mmx, op, 0x70, 0x00, imm8); }
-void psignb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x08, 0x66, NONE, 0x38); }
-void psignd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x0A, 0x66, NONE, 0x38); }
-void psignw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x09, 0x66, NONE, 0x38); }
+void pshufb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x00, T_0F38, T_66); }
+void pshufd(const Mmx& mmx, const Operand& op, uint8_t imm8) { opMMX(mmx, op, 0x70, T_0F, T_66, imm8); }
+void pshufhw(const Mmx& mmx, const Operand& op, uint8_t imm8) { opMMX(mmx, op, 0x70, T_0F, T_F3, imm8); }
+void pshuflw(const Mmx& mmx, const Operand& op, uint8_t imm8) { opMMX(mmx, op, 0x70, T_0F, T_F2, imm8); }
+void pshufw(const Mmx& mmx, const Operand& op, uint8_t imm8) { opMMX(mmx, op, 0x70, T_0F, T_NONE, imm8); }
+void psignb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x08, T_0F38, T_66); }
+void psignd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x0A, T_0F38, T_66); }
+void psignw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x09, T_0F38, T_66); }
void pslld(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xF2); }
void pslld(const Mmx& mmx, int imm8) { opMMX_IMM(mmx, imm8, 0x72, 6); }
void pslldq(const Xmm& xmm, int imm8) { opMMX_IMM(xmm, imm8, 0x73, 7); }
@@ -701,27 +890,31 @@ void psubsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xE9); }
void psubusb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xD8); }
void psubusw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xD9); }
void psubw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xF9); }
-void ptest(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x17, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
+void ptest(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_66|T_0F38|T_YMM, 0x17, isXMM_XMMorMEM, NONE); }
void punpckhbw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x68); }
void punpckhdq(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x6A); }
-void punpckhqdq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x6D, 0x66, isXMM_XMMorMEM); }
+void punpckhqdq(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_66|T_0F, 0x6D, isXMM_XMMorMEM); }
void punpckhwd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x69); }
void punpcklbw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x60); }
void punpckldq(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x62); }
-void punpcklqdq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x6C, 0x66, isXMM_XMMorMEM); }
+void punpcklqdq(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_66|T_0F, 0x6C, isXMM_XMMorMEM); }
void punpcklwd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x61); }
void pushf() { db(0x9C); }
void pxor(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xEF); }
void rcl(const Operand& op, const Reg8& _cl) { opShift(op, _cl, 2); }
void rcl(const Operand& op, int imm) { opShift(op, imm, 2); }
-void rcpps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x53, 0x100, isXMM_XMMorMEM); }
-void rcpss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x53, 0xF3, isXMM_XMMorMEM); }
+void rcl(const Reg& d, const Operand& op, const Reg8& _cl) { opShift(op, _cl, 2, &d); }
+void rcl(const Reg& d, const Operand& op, int imm) { opShift(op, imm, 2, &d); }
+void rcpps(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F, 0x53, isXMM_XMMorMEM); }
+void rcpss(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F | T_F3, 0x53, isXMM_XMMorMEM); }
void rcr(const Operand& op, const Reg8& _cl) { opShift(op, _cl, 3); }
void rcr(const Operand& op, int imm) { opShift(op, imm, 3); }
+void rcr(const Reg& d, const Operand& op, const Reg8& _cl) { opShift(op, _cl, 3, &d); }
+void rcr(const Reg& d, const Operand& op, int imm) { opShift(op, imm, 3, &d); }
void rdmsr() { db(0x0F); db(0x32); }
void rdpmc() { db(0x0F); db(0x33); }
-void rdrand(const Reg& r) { if (r.isBit(8)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) opModR(Reg(6, Operand::REG, r.getBit()), r, 0x0F, 0xC7); }
-void rdseed(const Reg& r) { if (r.isBit(8)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) opModR(Reg(7, Operand::REG, r.getBit()), r, 0x0F, 0xC7); }
+void rdrand(const Reg& r) { if (r.isBit(8)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) opRR(Reg(6, Operand::REG, r.getBit()), r, T_0F, 0xC7); }
+void rdseed(const Reg& r) { if (r.isBit(8)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) opRR(Reg(7, Operand::REG, r.getBit()), r, T_0F, 0xC7); }
void rdtsc() { db(0x0F); db(0x31); }
void rdtscp() { db(0x0F); db(0x01); db(0xF9); }
void rep() { db(0xF3); }
@@ -731,136 +924,156 @@ void repnz() { db(0xF2); }
void repz() { db(0xF3); }
void ret(int imm = 0) { if (imm) { db(0xC2); dw(imm); } else { db(0xC3); } }
void retf(int imm = 0) { if (imm) { db(0xCA); dw(imm); } else { db(0xCB); } }
-void rol(const Operand& op, const Reg8& _cl) { opShift(op, _cl, 0); }
-void rol(const Operand& op, int imm) { opShift(op, imm, 0); }
-void ror(const Operand& op, const Reg8& _cl) { opShift(op, _cl, 1); }
-void ror(const Operand& op, int imm) { opShift(op, imm, 1); }
-void rorx(const Reg32e& r, const Operand& op, uint8_t imm) { opGpr(r, op, Reg32e(0, r.getBit()), T_0F3A | T_F2, 0xF0, false, imm); }
-void roundpd(const Xmm& xmm, const Operand& op, uint8_t imm) { opGen(xmm, op, 0x09, 0x66, isXMM_XMMorMEM, imm, 0x3A); }
-void roundps(const Xmm& xmm, const Operand& op, uint8_t imm) { opGen(xmm, op, 0x08, 0x66, isXMM_XMMorMEM, imm, 0x3A); }
-void roundsd(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x0B, 0x66, isXMM_XMMorMEM, static_cast<uint8_t>(imm), 0x3A); }
-void roundss(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x0A, 0x66, isXMM_XMMorMEM, static_cast<uint8_t>(imm), 0x3A); }
-void rsqrtps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x52, 0x100, isXMM_XMMorMEM); }
-void rsqrtss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x52, 0xF3, isXMM_XMMorMEM); }
+void rol(const Operand& op, const Reg8& _cl) { opShift(op, _cl, 8); }
+void rol(const Operand& op, int imm) { opShift(op, imm, 8); }
+void rol(const Reg& d, const Operand& op, const Reg8& _cl) { opShift(op, _cl, 8, &d); }
+void rol(const Reg& d, const Operand& op, int imm) { opShift(op, imm, 8, &d); }
+void ror(const Operand& op, const Reg8& _cl) { opShift(op, _cl, 9); }
+void ror(const Operand& op, int imm) { opShift(op, imm, 9); }
+void ror(const Reg& d, const Operand& op, const Reg8& _cl) { opShift(op, _cl, 9, &d); }
+void ror(const Reg& d, const Operand& op, int imm) { opShift(op, imm, 9, &d); }
+void rorx(const Reg32e& r, const Operand& op, uint8_t imm) { opRRO(r, Reg32e(0, r.getBit()), op, T_0F3A|T_F2|T_APX|T_MAP3, 0xF0, imm); }
+void roundpd(const Xmm& xmm, const Operand& op, uint8_t imm) { opSSE(xmm, op, T_66|T_0F3A|T_YMM, 0x09, isXMM_XMMorMEM, imm); }
+void roundps(const Xmm& xmm, const Operand& op, uint8_t imm) { opSSE(xmm, op, T_66|T_0F3A|T_YMM, 0x08, isXMM_XMMorMEM, imm); }
+void roundsd(const Xmm& xmm, const Operand& op, int imm) { opSSE(xmm, op, T_66 | T_0F3A, 0x0B, isXMM_XMMorMEM, static_cast<uint8_t>(imm)); }
+void roundss(const Xmm& xmm, const Operand& op, int imm) { opSSE(xmm, op, T_66 | T_0F3A, 0x0A, isXMM_XMMorMEM, static_cast<uint8_t>(imm)); }
+void rsqrtps(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F, 0x52, isXMM_XMMorMEM); }
+void rsqrtss(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F | T_F3, 0x52, isXMM_XMMorMEM); }
void sahf() { db(0x9E); }
-void sal(const Operand& op, const Reg8& _cl) { opShift(op, _cl, 4); }
-void sal(const Operand& op, int imm) { opShift(op, imm, 4); }
-void sar(const Operand& op, const Reg8& _cl) { opShift(op, _cl, 7); }
-void sar(const Operand& op, int imm) { opShift(op, imm, 7); }
-void sarx(const Reg32e& r1, const Operand& op, const Reg32e& r2) { opGpr(r1, op, r2, T_F3 | T_0F38, 0xf7, false); }
-void sbb(const Operand& op, uint32_t imm) { opRM_I(op, imm, 0x18, 3); }
-void sbb(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x18); }
+void sal(const Operand& op, const Reg8& _cl) { opShift(op, _cl, 12); }
+void sal(const Operand& op, int imm) { opShift(op, imm, 12); }
+void sal(const Reg& d, const Operand& op, const Reg8& _cl) { opShift(op, _cl, 12, &d); }
+void sal(const Reg& d, const Operand& op, int imm) { opShift(op, imm, 12, &d); }
+void sar(const Operand& op, const Reg8& _cl) { opShift(op, _cl, 15); }
+void sar(const Operand& op, int imm) { opShift(op, imm, 15); }
+void sar(const Reg& d, const Operand& op, const Reg8& _cl) { opShift(op, _cl, 15, &d); }
+void sar(const Reg& d, const Operand& op, int imm) { opShift(op, imm, 15, &d); }
+void sarx(const Reg32e& r1, const Operand& op, const Reg32e& r2) { opRRO(r1, r2, op, T_APX|T_F3|T_0F38, 0xf7); }
+void sbb(const Operand& op, uint32_t imm) { opOI(op, imm, 0x18, 3); }
+void sbb(const Operand& op1, const Operand& op2) { opRO_MR(op1, op2, 0x18); }
+void sbb(const Reg& d, const Operand& op, uint32_t imm) { opROI(d, op, imm, T_NONE, 3); }
+void sbb(const Reg& d, const Operand& op1, const Operand& op2) { opROO(d, op1, op2, T_NONE, 0x18); }
void scasb() { db(0xAE); }
void scasd() { db(0xAF); }
void scasw() { db(0x66); db(0xAF); }
void serialize() { db(0x0F); db(0x01); db(0xE8); }
-void seta(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 7); }//-V524
-void setae(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 3); }//-V524
-void setb(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 2); }//-V524
-void setbe(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 6); }//-V524
-void setc(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 2); }//-V524
-void sete(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 4); }//-V524
-void setg(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 15); }//-V524
-void setge(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 13); }//-V524
-void setl(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 12); }//-V524
-void setle(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 14); }//-V524
-void setna(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 6); }//-V524
-void setnae(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 2); }//-V524
-void setnb(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 3); }//-V524
-void setnbe(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 7); }//-V524
-void setnc(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 3); }//-V524
-void setne(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 5); }//-V524
-void setng(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 14); }//-V524
-void setnge(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 12); }//-V524
-void setnl(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 13); }//-V524
-void setnle(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 15); }//-V524
-void setno(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 1); }//-V524
-void setnp(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 11); }//-V524
-void setns(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 9); }//-V524
-void setnz(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 5); }//-V524
-void seto(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 0); }//-V524
-void setp(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 10); }//-V524
-void setpe(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 10); }//-V524
-void setpo(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 11); }//-V524
-void sets(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 8); }//-V524
-void setz(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 4); }//-V524
+void seta(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 7)) return; opRext(op, 8, 0, T_0F, 0x90 | 7); }//-V524
+void setae(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 3)) return; opRext(op, 8, 0, T_0F, 0x90 | 3); }//-V524
+void setb(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 2)) return; opRext(op, 8, 0, T_0F, 0x90 | 2); }//-V524
+void setbe(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 6)) return; opRext(op, 8, 0, T_0F, 0x90 | 6); }//-V524
+void setc(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 2)) return; opRext(op, 8, 0, T_0F, 0x90 | 2); }//-V524
+void sete(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 4)) return; opRext(op, 8, 0, T_0F, 0x90 | 4); }//-V524
+void setg(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 15)) return; opRext(op, 8, 0, T_0F, 0x90 | 15); }//-V524
+void setge(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 13)) return; opRext(op, 8, 0, T_0F, 0x90 | 13); }//-V524
+void setl(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 12)) return; opRext(op, 8, 0, T_0F, 0x90 | 12); }//-V524
+void setle(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 14)) return; opRext(op, 8, 0, T_0F, 0x90 | 14); }//-V524
+void setna(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 6)) return; opRext(op, 8, 0, T_0F, 0x90 | 6); }//-V524
+void setnae(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 2)) return; opRext(op, 8, 0, T_0F, 0x90 | 2); }//-V524
+void setnb(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 3)) return; opRext(op, 8, 0, T_0F, 0x90 | 3); }//-V524
+void setnbe(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 7)) return; opRext(op, 8, 0, T_0F, 0x90 | 7); }//-V524
+void setnc(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 3)) return; opRext(op, 8, 0, T_0F, 0x90 | 3); }//-V524
+void setne(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 5)) return; opRext(op, 8, 0, T_0F, 0x90 | 5); }//-V524
+void setng(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 14)) return; opRext(op, 8, 0, T_0F, 0x90 | 14); }//-V524
+void setnge(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 12)) return; opRext(op, 8, 0, T_0F, 0x90 | 12); }//-V524
+void setnl(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 13)) return; opRext(op, 8, 0, T_0F, 0x90 | 13); }//-V524
+void setnle(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 15)) return; opRext(op, 8, 0, T_0F, 0x90 | 15); }//-V524
+void setno(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 1)) return; opRext(op, 8, 0, T_0F, 0x90 | 1); }//-V524
+void setnp(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 11)) return; opRext(op, 8, 0, T_0F, 0x90 | 11); }//-V524
+void setns(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 9)) return; opRext(op, 8, 0, T_0F, 0x90 | 9); }//-V524
+void setnz(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 5)) return; opRext(op, 8, 0, T_0F, 0x90 | 5); }//-V524
+void seto(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 0)) return; opRext(op, 8, 0, T_0F, 0x90 | 0); }//-V524
+void setp(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 10)) return; opRext(op, 8, 0, T_0F, 0x90 | 10); }//-V524
+void setpe(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 10)) return; opRext(op, 8, 0, T_0F, 0x90 | 10); }//-V524
+void setpo(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 11)) return; opRext(op, 8, 0, T_0F, 0x90 | 11); }//-V524
+void sets(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 8)) return; opRext(op, 8, 0, T_0F, 0x90 | 8); }//-V524
+void setz(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 4)) return; opRext(op, 8, 0, T_0F, 0x90 | 4); }//-V524
void sfence() { db(0x0F); db(0xAE); db(0xF8); }
-void sha1msg1(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xC9, NONE, isXMM_XMMorMEM, NONE, 0x38); }
-void sha1msg2(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xCA, NONE, isXMM_XMMorMEM, NONE, 0x38); }
-void sha1nexte(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xC8, NONE, isXMM_XMMorMEM, NONE, 0x38); }
-void sha1rnds4(const Xmm& xmm, const Operand& op, uint8_t imm) { opGen(xmm, op, 0xCC, NONE, isXMM_XMMorMEM, imm, 0x3A); }
-void sha256msg1(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xCC, NONE, isXMM_XMMorMEM, NONE, 0x38); }
-void sha256msg2(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xCD, NONE, isXMM_XMMorMEM, NONE, 0x38); }
-void sha256rnds2(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xCB, NONE, isXMM_XMMorMEM, NONE, 0x38); }
-void shl(const Operand& op, const Reg8& _cl) { opShift(op, _cl, 4); }
-void shl(const Operand& op, int imm) { opShift(op, imm, 4); }
-void shld(const Operand& op, const Reg& reg, const Reg8& _cl) { opShxd(op, reg, 0, 0xA4, &_cl); }
-void shld(const Operand& op, const Reg& reg, uint8_t imm) { opShxd(op, reg, imm, 0xA4); }
-void shlx(const Reg32e& r1, const Operand& op, const Reg32e& r2) { opGpr(r1, op, r2, T_66 | T_0F38, 0xf7, false); }
-void shr(const Operand& op, const Reg8& _cl) { opShift(op, _cl, 5); }
-void shr(const Operand& op, int imm) { opShift(op, imm, 5); }
-void shrd(const Operand& op, const Reg& reg, const Reg8& _cl) { opShxd(op, reg, 0, 0xAC, &_cl); }
-void shrd(const Operand& op, const Reg& reg, uint8_t imm) { opShxd(op, reg, imm, 0xAC); }
-void shrx(const Reg32e& r1, const Operand& op, const Reg32e& r2) { opGpr(r1, op, r2, T_F2 | T_0F38, 0xf7, false); }
-void shufpd(const Xmm& xmm, const Operand& op, uint8_t imm8) { opGen(xmm, op, 0xC6, 0x66, isXMM_XMMorMEM, imm8); }
-void shufps(const Xmm& xmm, const Operand& op, uint8_t imm8) { opGen(xmm, op, 0xC6, 0x100, isXMM_XMMorMEM, imm8); }
-void sqrtpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x51, 0x66, isXMM_XMMorMEM); }
-void sqrtps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x51, 0x100, isXMM_XMMorMEM); }
-void sqrtsd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x51, 0xF2, isXMM_XMMorMEM); }
-void sqrtss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x51, 0xF3, isXMM_XMMorMEM); }
+void sha1msg1(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F38, 0xC9, isXMM_XMMorMEM, NONE); }
+void sha1msg2(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F38, 0xCA, isXMM_XMMorMEM, NONE); }
+void sha1nexte(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F38, 0xC8, isXMM_XMMorMEM, NONE); }
+void sha1rnds4(const Xmm& xmm, const Operand& op, uint8_t imm) { opSSE(xmm, op, T_0F3A, 0xCC, isXMM_XMMorMEM, imm); }
+void sha256msg1(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F38, 0xCC, isXMM_XMMorMEM, NONE); }
+void sha256msg2(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F38, 0xCD, isXMM_XMMorMEM, NONE); }
+void sha256rnds2(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F38, 0xCB, isXMM_XMMorMEM, NONE); }
+void shl(const Operand& op, const Reg8& _cl) { opShift(op, _cl, 12); }
+void shl(const Operand& op, int imm) { opShift(op, imm, 12); }
+void shl(const Reg& d, const Operand& op, const Reg8& _cl) { opShift(op, _cl, 12, &d); }
+void shl(const Reg& d, const Operand& op, int imm) { opShift(op, imm, 12, &d); }
+void shld(const Operand& op, const Reg& reg, const Reg8& _cl) { opShxd(Reg(), op, reg, 0, 0xA4, 0x24, &_cl); }
+void shld(const Operand& op, const Reg& reg, uint8_t imm) { opShxd(Reg(), op, reg, imm, 0xA4, 0x24); }
+void shld(const Reg& d, const Operand& op, const Reg& reg, const Reg8& _cl) { opShxd(d, op, reg, 0, 0xA4, 0x24, &_cl); }
+void shld(const Reg& d, const Operand& op, const Reg& reg, uint8_t imm) { opShxd(d, op, reg, imm, 0xA4, 0x24); }
+void shlx(const Reg32e& r1, const Operand& op, const Reg32e& r2) { opRRO(r1, r2, op, T_APX|T_66|T_0F38, 0xf7); }
+void shr(const Operand& op, const Reg8& _cl) { opShift(op, _cl, 13); }
+void shr(const Operand& op, int imm) { opShift(op, imm, 13); }
+void shr(const Reg& d, const Operand& op, const Reg8& _cl) { opShift(op, _cl, 13, &d); }
+void shr(const Reg& d, const Operand& op, int imm) { opShift(op, imm, 13, &d); }
+void shrd(const Operand& op, const Reg& reg, const Reg8& _cl) { opShxd(Reg(), op, reg, 0, 0xAC, 0x2C, &_cl); }
+void shrd(const Operand& op, const Reg& reg, uint8_t imm) { opShxd(Reg(), op, reg, imm, 0xAC, 0x2C); }
+void shrd(const Reg& d, const Operand& op, const Reg& reg, const Reg8& _cl) { opShxd(d, op, reg, 0, 0xAC, 0x2C, &_cl); }
+void shrd(const Reg& d, const Operand& op, const Reg& reg, uint8_t imm) { opShxd(d, op, reg, imm, 0xAC, 0x2C); }
+void shrx(const Reg32e& r1, const Operand& op, const Reg32e& r2) { opRRO(r1, r2, op, T_APX|T_F2|T_0F38, 0xf7); }
+void shufpd(const Xmm& xmm, const Operand& op, uint8_t imm8) { opSSE(xmm, op, T_0F | T_66, 0xC6, isXMM_XMMorMEM, imm8); }
+void shufps(const Xmm& xmm, const Operand& op, uint8_t imm8) { opSSE(xmm, op, T_0F, 0xC6, isXMM_XMMorMEM, imm8); }
+void sqrtpd(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F | T_66, 0x51, isXMM_XMMorMEM); }
+void sqrtps(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F, 0x51, isXMM_XMMorMEM); }
+void sqrtsd(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F | T_F2, 0x51, isXMM_XMMorMEM); }
+void sqrtss(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F | T_F3, 0x51, isXMM_XMMorMEM); }
void stac() { db(0x0F); db(0x01); db(0xCB); }
void stc() { db(0xF9); }
void std() { db(0xFD); }
void sti() { db(0xFB); }
-void stmxcsr(const Address& addr) { opModM(addr, Reg32(3), 0x0F, 0xAE); }
+void stmxcsr(const Address& addr) { opMR(addr, Reg32(3), T_0F, 0xAE); }
void stosb() { db(0xAA); }
void stosd() { db(0xAB); }
void stosw() { db(0x66); db(0xAB); }
-void sub(const Operand& op, uint32_t imm) { opRM_I(op, imm, 0x28, 5); }
-void sub(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x28); }
-void subpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5C, 0x66, isXMM_XMMorMEM); }
-void subps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5C, 0x100, isXMM_XMMorMEM); }
-void subsd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5C, 0xF2, isXMM_XMMorMEM); }
-void subss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5C, 0xF3, isXMM_XMMorMEM); }
+void sub(const Operand& op, uint32_t imm) { opOI(op, imm, 0x28, 5); }
+void sub(const Operand& op1, const Operand& op2) { opRO_MR(op1, op2, 0x28); }
+void sub(const Reg& d, const Operand& op, uint32_t imm) { opROI(d, op, imm, T_NF|T_CODE1_IF1, 5); }
+void sub(const Reg& d, const Operand& op1, const Operand& op2) { opROO(d, op1, op2, T_NF|T_CODE1_IF1, 0x28); }
+void subpd(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F | T_66, 0x5C, isXMM_XMMorMEM); }
+void subps(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F, 0x5C, isXMM_XMMorMEM); }
+void subsd(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F | T_F2, 0x5C, isXMM_XMMorMEM); }
+void subss(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F | T_F3, 0x5C, isXMM_XMMorMEM); }
void sysenter() { db(0x0F); db(0x34); }
void sysexit() { db(0x0F); db(0x35); }
void tpause(const Reg32& r) { int idx = r.getIdx(); if (idx > 7) XBYAK_THROW(ERR_BAD_PARAMETER) db(0x66); db(0x0F); db(0xAE); setModRM(3, 6, idx); }
-void tzcnt(const Reg&reg, const Operand& op) { opSp1(reg, op, 0xF3, 0x0F, 0xBC); }
-void ucomisd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x2E, 0x66, isXMM_XMMorMEM); }
-void ucomiss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x2E, 0x100, isXMM_XMMorMEM); }
+void tzcnt(const Reg&reg, const Operand& op) { if (opROO(Reg(), op, reg, T_APX|T_NF, 0xF4)) return; opCnt(reg, op, 0xBC); }
+void ucomisd(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_66|T_0F, 0x2E, isXMM_XMMorMEM); }
+void ucomiss(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F, 0x2E, isXMM_XMMorMEM); }
void ud2() { db(0x0F); db(0x0B); }
void umonitor(const Reg& r) { int idx = r.getIdx(); if (idx > 7) XBYAK_THROW(ERR_BAD_PARAMETER) int bit = r.getBit(); if (BIT != bit) { if ((BIT == 32 && bit == 16) || (BIT == 64 && bit == 32)) { db(0x67); } else { XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) } } db(0xF3); db(0x0F); db(0xAE); setModRM(3, 6, idx); }
void umwait(const Reg32& r) { int idx = r.getIdx(); if (idx > 7) XBYAK_THROW(ERR_BAD_PARAMETER) db(0xF2); db(0x0F); db(0xAE); setModRM(3, 6, idx); }
-void unpckhpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x15, 0x66, isXMM_XMMorMEM); }
-void unpckhps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x15, 0x100, isXMM_XMMorMEM); }
-void unpcklpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x14, 0x66, isXMM_XMMorMEM); }
-void unpcklps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x14, 0x100, isXMM_XMMorMEM); }
+void unpckhpd(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F | T_66, 0x15, isXMM_XMMorMEM); }
+void unpckhps(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F, 0x15, isXMM_XMMorMEM); }
+void unpcklpd(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F | T_66, 0x14, isXMM_XMMorMEM); }
+void unpcklps(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F, 0x14, isXMM_XMMorMEM); }
void vaddpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x58); }
void vaddps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x58); }
void vaddsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER_X | T_N8, 0x58); }
void vaddss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_EW0 | T_EVEX | T_ER_X | T_N4, 0x58); }
-void vaddsubpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_66 | T_0F | T_YMM, 0xD0); }
-void vaddsubps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_F2 | T_0F | T_YMM, 0xD0); }
-void vaesdec(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_66 | T_0F38 | T_YMM | T_EVEX, 0xDE); }
-void vaesdeclast(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_66 | T_0F38 | T_YMM | T_EVEX, 0xDF); }
-void vaesenc(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_66 | T_0F38 | T_YMM | T_EVEX, 0xDC); }
-void vaesenclast(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_66 | T_0F38 | T_YMM | T_EVEX, 0xDD); }
-void vaesimc(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F38 | T_W0, 0xDB); }
-void vaeskeygenassist(const Xmm& xm, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F3A, 0xDF, imm); }
+void vaddsubpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_66|T_0F|T_YMM, 0xD0); }
+void vaddsubps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_F2|T_0F|T_YMM, 0xD0); }
+void vaesdec(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_66|T_0F38|T_YMM|T_EVEX, 0xDE); }
+void vaesdeclast(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_66|T_0F38|T_YMM|T_EVEX, 0xDF); }
+void vaesenc(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_66|T_0F38|T_YMM|T_EVEX, 0xDC); }
+void vaesenclast(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_66|T_0F38|T_YMM|T_EVEX, 0xDD); }
+void vaesimc(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66|T_0F38|T_W0, 0xDB); }
+void vaeskeygenassist(const Xmm& xm, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(xm, op, T_66|T_0F3A, 0xDF, imm); }
void vandnpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x55); }
void vandnps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x55); }
void vandpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x54); }
void vandps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x54); }
-void vbcstnebf162ps(const Xmm& x, const Address& addr) { opVex(x, 0, addr, T_F3 | T_0F38 | T_W0 | T_YMM | T_B16, 0xB1); }
-void vbcstnesh2ps(const Xmm& x, const Address& addr) { opVex(x, 0, addr, T_66 | T_0F38 | T_W0 | T_YMM | T_B16, 0xB1); }
-void vblendpd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_YMM, 0x0D, imm); }
-void vblendps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_YMM, 0x0C, imm); }
+void vbcstnebf162ps(const Xmm& x, const Address& addr) { opVex(x, 0, addr, T_F3|T_0F38|T_W0|T_YMM|T_B16, 0xB1); }
+void vbcstnesh2ps(const Xmm& x, const Address& addr) { opVex(x, 0, addr, T_66|T_0F38|T_W0|T_YMM|T_B16, 0xB1); }
+void vblendpd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F3A|T_W0|T_YMM, 0x0D, imm); }
+void vblendps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F3A|T_W0|T_YMM, 0x0C, imm); }
void vblendvpd(const Xmm& x1, const Xmm& x2, const Operand& op, const Xmm& x4) { opAVX_X_X_XM(x1, x2, op, T_0F3A | T_66 | T_YMM, 0x4B, x4.getIdx() << 4); }
void vblendvps(const Xmm& x1, const Xmm& x2, const Operand& op, const Xmm& x4) { opAVX_X_X_XM(x1, x2, op, T_0F3A | T_66 | T_YMM, 0x4A, x4.getIdx() << 4); }
void vbroadcastf128(const Ymm& y, const Address& addr) { opAVX_X_XM_IMM(y, addr, T_0F38 | T_66 | T_W0 | T_YMM, 0x1A); }
void vbroadcasti128(const Ymm& y, const Address& addr) { opAVX_X_XM_IMM(y, addr, T_0F38 | T_66 | T_W0 | T_YMM, 0x5A); }
void vbroadcastsd(const Ymm& y, const Operand& op) { if (!op.isMEM() && !(y.isYMM() && op.isXMM()) && !(y.isZMM() && op.isXMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_XM_IMM(y, op, T_0F38 | T_66 | T_W0 | T_YMM | T_EVEX | T_EW1 | T_N8, 0x19); }
-void vbroadcastss(const Xmm& x, const Operand& op) { if (!(op.isXMM() || op.isMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_XM_IMM(x, op, T_N4 | T_66 | T_0F38 | T_W0 | T_YMM | T_EVEX, 0x18); }
+void vbroadcastss(const Xmm& x, const Operand& op) { if (!(op.isXMM() || op.isMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_XM_IMM(x, op, T_N4|T_66|T_0F38|T_W0|T_YMM|T_EVEX, 0x18); }
void vcmpeq_ospd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 16); }
void vcmpeq_osps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 16); }
void vcmpeq_ossd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 16); }
@@ -973,10 +1186,10 @@ void vcmpordpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2,
void vcmpordps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 7); }
void vcmpordsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 7); }
void vcmpordss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 7); }
-void vcmppd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM, 0xC2, imm); }
-void vcmpps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_0F | T_YMM, 0xC2, imm); }
-void vcmpsd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_F2 | T_0F, 0xC2, imm); }
-void vcmpss(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_F3 | T_0F, 0xC2, imm); }
+void vcmppd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_YMM, 0xC2, imm); }
+void vcmpps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_0F|T_YMM, 0xC2, imm); }
+void vcmpsd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_F2|T_0F, 0xC2, imm); }
+void vcmpss(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_F3|T_0F, 0xC2, imm); }
void vcmptrue_uspd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 31); }
void vcmptrue_usps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 31); }
void vcmptrue_ussd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 31); }
@@ -993,114 +1206,114 @@ void vcmpunordpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x
void vcmpunordps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 3); }
void vcmpunordsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 3); }
void vcmpunordss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 3); }
-void vcomisd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_N8 | T_66 | T_0F | T_EW1 | T_EVEX | T_SAE_X, 0x2F); }
-void vcomiss(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_N4 | T_0F | T_EW0 | T_EVEX | T_SAE_X, 0x2F); }
+void vcomisd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_N8|T_66|T_0F|T_EW1|T_EVEX|T_SAE_X, 0x2F); }
+void vcomiss(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_N4|T_0F|T_EW0|T_EVEX|T_SAE_X, 0x2F); }
void vcvtdq2pd(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_0F | T_F3 | T_YMM | T_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL, 0xE6); }
-void vcvtdq2ps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x5B); }
-void vcvtneebf162ps(const Xmm& x, const Address& addr) { opVex(x, 0, addr, T_F3 | T_0F38 | T_W0 | T_YMM, 0xB0); }
-void vcvtneeph2ps(const Xmm& x, const Address& addr) { opVex(x, 0, addr, T_66 | T_0F38 | T_W0 | T_YMM, 0xB0); }
-void vcvtneobf162ps(const Xmm& x, const Address& addr) { opVex(x, 0, addr, T_F2 | T_0F38 | T_W0 | T_YMM, 0xB0); }
-void vcvtneoph2ps(const Xmm& x, const Address& addr) { opVex(x, 0, addr, T_0F38 | T_W0 | T_YMM, 0xB0); }
-void vcvtneps2bf16(const Xmm& x, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opCvt2(x, op, T_F3 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_B32 | orEvexIf(encoding), 0x72); }
+void vcvtdq2ps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F|T_EW0|T_YMM|T_EVEX|T_ER_Z|T_B32, 0x5B); }
+void vcvtneebf162ps(const Xmm& x, const Address& addr) { opVex(x, 0, addr, T_F3|T_0F38|T_W0|T_YMM, 0xB0); }
+void vcvtneeph2ps(const Xmm& x, const Address& addr) { opVex(x, 0, addr, T_66|T_0F38|T_W0|T_YMM, 0xB0); }
+void vcvtneobf162ps(const Xmm& x, const Address& addr) { opVex(x, 0, addr, T_F2|T_0F38|T_W0|T_YMM, 0xB0); }
+void vcvtneoph2ps(const Xmm& x, const Address& addr) { opVex(x, 0, addr, T_0F38|T_W0|T_YMM, 0xB0); }
+void vcvtneps2bf16(const Xmm& x, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opCvt2(x, op, T_F3|T_0F38|T_EW0|T_YMM|T_SAE_Z|T_B32|orEvexIf(encoding), 0x72); }
void vcvtpd2dq(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_F2 | T_YMM | T_EVEX | T_EW1 | T_B64 | T_ER_Z, 0xE6); }
void vcvtpd2ps(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64 | T_ER_Z, 0x5A); }
void vcvtph2ps(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_0F38 | T_66 | T_W0 | T_EVEX | T_EW0 | T_N8 | T_N_VL | T_SAE_Y, 0x13); }
-void vcvtps2dq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x5B); }
+void vcvtps2dq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66|T_0F|T_EW0|T_YMM|T_EVEX|T_ER_Z|T_B32, 0x5B); }
void vcvtps2pd(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_0F | T_YMM | T_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL | T_SAE_Y, 0x5A); }
void vcvtps2ph(const Operand& op, const Xmm& x, uint8_t imm) { checkCvt1(x, op); opVex(x, 0, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N8 | T_N_VL | T_SAE_Y | T_M_K, 0x1D, imm); }
void vcvtsd2si(const Reg32& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F2 | T_W0 | T_EVEX | T_EW0 | T_N4 | T_ER_X, 0x2D); }
-void vcvtsd2ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_F2 | T_0F | T_EW1 | T_EVEX | T_ER_X, 0x5A); }
+void vcvtsd2ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8|T_F2|T_0F|T_EW1|T_EVEX|T_ER_X, 0x5A); }
void vcvtsi2sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt3(x1, x2, op, T_0F | T_F2 | T_EVEX, T_W1 | T_EW1 | T_ER_X | T_N8, T_W0 | T_EW0 | T_N4, 0x2A); }
void vcvtsi2ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt3(x1, x2, op, T_0F | T_F3 | T_EVEX | T_ER_X, T_W1 | T_EW1 | T_N8, T_W0 | T_EW0 | T_N4, 0x2A); }
-void vcvtss2sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_F3 | T_0F | T_EW0 | T_EVEX | T_SAE_X, 0x5A); }
+void vcvtss2sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4|T_F3|T_0F|T_EW0|T_EVEX|T_SAE_X, 0x5A); }
void vcvtss2si(const Reg32& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F3 | T_W0 | T_EVEX | T_EW0 | T_ER_X | T_N8, 0x2D); }
void vcvttpd2dq(const Xmm& x, const Operand& op) { opCvt2(x, op, T_66 | T_0F | T_YMM | T_EVEX |T_EW1 | T_B64 | T_ER_Z, 0xE6); }
-void vcvttps2dq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_F3 | T_0F | T_EW0 | T_YMM | T_EVEX | T_SAE_Z | T_B32, 0x5B); }
+void vcvttps2dq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_F3|T_0F|T_EW0|T_YMM|T_EVEX|T_SAE_Z|T_B32, 0x5B); }
void vcvttsd2si(const Reg32& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F2 | T_W0 | T_EVEX | T_EW0 | T_N4 | T_SAE_X, 0x2C); }
void vcvttss2si(const Reg32& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F3 | T_W0 | T_EVEX | T_EW0 | T_SAE_X | T_N8, 0x2C); }
void vdivpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x5E); }
void vdivps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x5E); }
void vdivsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER_X | T_N8, 0x5E); }
void vdivss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_EW0 | T_EVEX | T_ER_X | T_N4, 0x5E); }
-void vdppd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0, 0x41, imm); }
-void vdpps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_YMM, 0x40, imm); }
+void vdppd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F3A|T_W0, 0x41, imm); }
+void vdpps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F3A|T_W0|T_YMM, 0x40, imm); }
void vextractf128(const Operand& op, const Ymm& y, uint8_t imm) { if (!(op.isXMEM() && y.isYMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y, 0, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x19, imm); }
void vextracti128(const Operand& op, const Ymm& y, uint8_t imm) { if (!(op.isXMEM() && y.isYMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y, 0, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x39, imm); }
void vextractps(const Operand& op, const Xmm& x, uint8_t imm) { if (!((op.isREG(32) || op.isMEM()) && x.isXMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x, 0, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_N4, 0x17, imm); }
-void vfmadd132pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W1 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x98); }
-void vfmadd132ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x98); }
-void vfmadd132sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F38 | T_W1 | T_EW1 | T_EVEX | T_ER_X, 0x99); }
-void vfmadd132ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F38 | T_W0 | T_EW0 | T_EVEX | T_ER_X, 0x99); }
-void vfmadd213pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W1 | T_EW1 | T_YMM | T_EVEX | T_B64, 0xA8); }
-void vfmadd213ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0xA8); }
-void vfmadd213sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F38 | T_W1 | T_EW1 | T_EVEX | T_ER_X, 0xA9); }
-void vfmadd213ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F38 | T_W0 | T_EW0 | T_EVEX | T_ER_X, 0xA9); }
-void vfmadd231pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W1 | T_EW1 | T_YMM | T_EVEX | T_B64, 0xB8); }
-void vfmadd231ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0xB8); }
-void vfmadd231sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F38 | T_W1 | T_EW1 | T_EVEX | T_ER_X, 0xB9); }
-void vfmadd231ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F38 | T_W0 | T_EW0 | T_EVEX | T_ER_X, 0xB9); }
-void vfmaddsub132pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W1 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x96); }
-void vfmaddsub132ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x96); }
-void vfmaddsub213pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W1 | T_EW1 | T_YMM | T_EVEX | T_B64, 0xA6); }
-void vfmaddsub213ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0xA6); }
-void vfmaddsub231pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W1 | T_EW1 | T_YMM | T_EVEX | T_B64, 0xB6); }
-void vfmaddsub231ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0xB6); }
-void vfmsub132pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W1 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x9A); }
-void vfmsub132ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x9A); }
-void vfmsub132sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F38 | T_W1 | T_EW1 | T_EVEX | T_ER_X, 0x9B); }
-void vfmsub132ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F38 | T_W0 | T_EW0 | T_EVEX | T_ER_X, 0x9B); }
-void vfmsub213pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W1 | T_EW1 | T_YMM | T_EVEX | T_B64, 0xAA); }
-void vfmsub213ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0xAA); }
-void vfmsub213sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F38 | T_W1 | T_EW1 | T_EVEX | T_ER_X, 0xAB); }
-void vfmsub213ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F38 | T_W0 | T_EW0 | T_EVEX | T_ER_X, 0xAB); }
-void vfmsub231pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W1 | T_EW1 | T_YMM | T_EVEX | T_B64, 0xBA); }
-void vfmsub231ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0xBA); }
-void vfmsub231sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F38 | T_W1 | T_EW1 | T_EVEX | T_ER_X, 0xBB); }
-void vfmsub231ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F38 | T_W0 | T_EW0 | T_EVEX | T_ER_X, 0xBB); }
-void vfmsubadd132pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W1 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x97); }
-void vfmsubadd132ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x97); }
-void vfmsubadd213pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W1 | T_EW1 | T_YMM | T_EVEX | T_B64, 0xA7); }
-void vfmsubadd213ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0xA7); }
-void vfmsubadd231pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W1 | T_EW1 | T_YMM | T_EVEX | T_B64, 0xB7); }
-void vfmsubadd231ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0xB7); }
-void vfnmadd132pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W1 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x9C); }
-void vfnmadd132ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x9C); }
-void vfnmadd132sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F38 | T_W1 | T_EW1 | T_EVEX | T_ER_X, 0x9D); }
-void vfnmadd132ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F38 | T_W0 | T_EW0 | T_EVEX | T_ER_X, 0x9D); }
-void vfnmadd213pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W1 | T_EW1 | T_YMM | T_EVEX | T_B64, 0xAC); }
-void vfnmadd213ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0xAC); }
-void vfnmadd213sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F38 | T_W1 | T_EW1 | T_EVEX | T_ER_X, 0xAD); }
-void vfnmadd213ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F38 | T_W0 | T_EW0 | T_EVEX | T_ER_X, 0xAD); }
-void vfnmadd231pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W1 | T_EW1 | T_YMM | T_EVEX | T_B64, 0xBC); }
-void vfnmadd231ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0xBC); }
-void vfnmadd231sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F38 | T_W1 | T_EW1 | T_EVEX | T_ER_X, 0xBD); }
-void vfnmadd231ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F38 | T_W0 | T_EW0 | T_EVEX | T_ER_X, 0xBD); }
-void vfnmsub132pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W1 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x9E); }
-void vfnmsub132ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x9E); }
-void vfnmsub132sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F38 | T_W1 | T_EW1 | T_EVEX | T_ER_X, 0x9F); }
-void vfnmsub132ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F38 | T_W0 | T_EW0 | T_EVEX | T_ER_X, 0x9F); }
-void vfnmsub213pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W1 | T_EW1 | T_YMM | T_EVEX | T_B64, 0xAE); }
-void vfnmsub213ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0xAE); }
-void vfnmsub213sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F38 | T_W1 | T_EW1 | T_EVEX | T_ER_X, 0xAF); }
-void vfnmsub213ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F38 | T_W0 | T_EW0 | T_EVEX | T_ER_X, 0xAF); }
-void vfnmsub231pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W1 | T_EW1 | T_YMM | T_EVEX | T_B64, 0xBE); }
-void vfnmsub231ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0xBE); }
-void vfnmsub231sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F38 | T_W1 | T_EW1 | T_EVEX | T_ER_X, 0xBF); }
-void vfnmsub231ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F38 | T_W0 | T_EW0 | T_EVEX | T_ER_X, 0xBF); }
+void vfmadd132pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W1|T_EW1|T_YMM|T_EVEX|T_B64, 0x98); }
+void vfmadd132ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_EW0|T_YMM|T_EVEX|T_B32, 0x98); }
+void vfmadd132sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8|T_66|T_0F38|T_W1|T_EW1|T_EVEX|T_ER_X, 0x99); }
+void vfmadd132ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4|T_66|T_0F38|T_W0|T_EW0|T_EVEX|T_ER_X, 0x99); }
+void vfmadd213pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W1|T_EW1|T_YMM|T_EVEX|T_B64, 0xA8); }
+void vfmadd213ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_EW0|T_YMM|T_EVEX|T_B32, 0xA8); }
+void vfmadd213sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8|T_66|T_0F38|T_W1|T_EW1|T_EVEX|T_ER_X, 0xA9); }
+void vfmadd213ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4|T_66|T_0F38|T_W0|T_EW0|T_EVEX|T_ER_X, 0xA9); }
+void vfmadd231pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W1|T_EW1|T_YMM|T_EVEX|T_B64, 0xB8); }
+void vfmadd231ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_EW0|T_YMM|T_EVEX|T_B32, 0xB8); }
+void vfmadd231sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8|T_66|T_0F38|T_W1|T_EW1|T_EVEX|T_ER_X, 0xB9); }
+void vfmadd231ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4|T_66|T_0F38|T_W0|T_EW0|T_EVEX|T_ER_X, 0xB9); }
+void vfmaddsub132pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W1|T_EW1|T_YMM|T_EVEX|T_B64, 0x96); }
+void vfmaddsub132ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_EW0|T_YMM|T_EVEX|T_B32, 0x96); }
+void vfmaddsub213pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W1|T_EW1|T_YMM|T_EVEX|T_B64, 0xA6); }
+void vfmaddsub213ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_EW0|T_YMM|T_EVEX|T_B32, 0xA6); }
+void vfmaddsub231pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W1|T_EW1|T_YMM|T_EVEX|T_B64, 0xB6); }
+void vfmaddsub231ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_EW0|T_YMM|T_EVEX|T_B32, 0xB6); }
+void vfmsub132pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W1|T_EW1|T_YMM|T_EVEX|T_B64, 0x9A); }
+void vfmsub132ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_EW0|T_YMM|T_EVEX|T_B32, 0x9A); }
+void vfmsub132sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8|T_66|T_0F38|T_W1|T_EW1|T_EVEX|T_ER_X, 0x9B); }
+void vfmsub132ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4|T_66|T_0F38|T_W0|T_EW0|T_EVEX|T_ER_X, 0x9B); }
+void vfmsub213pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W1|T_EW1|T_YMM|T_EVEX|T_B64, 0xAA); }
+void vfmsub213ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_EW0|T_YMM|T_EVEX|T_B32, 0xAA); }
+void vfmsub213sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8|T_66|T_0F38|T_W1|T_EW1|T_EVEX|T_ER_X, 0xAB); }
+void vfmsub213ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4|T_66|T_0F38|T_W0|T_EW0|T_EVEX|T_ER_X, 0xAB); }
+void vfmsub231pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W1|T_EW1|T_YMM|T_EVEX|T_B64, 0xBA); }
+void vfmsub231ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_EW0|T_YMM|T_EVEX|T_B32, 0xBA); }
+void vfmsub231sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8|T_66|T_0F38|T_W1|T_EW1|T_EVEX|T_ER_X, 0xBB); }
+void vfmsub231ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4|T_66|T_0F38|T_W0|T_EW0|T_EVEX|T_ER_X, 0xBB); }
+void vfmsubadd132pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W1|T_EW1|T_YMM|T_EVEX|T_B64, 0x97); }
+void vfmsubadd132ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_EW0|T_YMM|T_EVEX|T_B32, 0x97); }
+void vfmsubadd213pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W1|T_EW1|T_YMM|T_EVEX|T_B64, 0xA7); }
+void vfmsubadd213ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_EW0|T_YMM|T_EVEX|T_B32, 0xA7); }
+void vfmsubadd231pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W1|T_EW1|T_YMM|T_EVEX|T_B64, 0xB7); }
+void vfmsubadd231ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_EW0|T_YMM|T_EVEX|T_B32, 0xB7); }
+void vfnmadd132pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W1|T_EW1|T_YMM|T_EVEX|T_B64, 0x9C); }
+void vfnmadd132ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_EW0|T_YMM|T_EVEX|T_B32, 0x9C); }
+void vfnmadd132sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8|T_66|T_0F38|T_W1|T_EW1|T_EVEX|T_ER_X, 0x9D); }
+void vfnmadd132ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4|T_66|T_0F38|T_W0|T_EW0|T_EVEX|T_ER_X, 0x9D); }
+void vfnmadd213pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W1|T_EW1|T_YMM|T_EVEX|T_B64, 0xAC); }
+void vfnmadd213ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_EW0|T_YMM|T_EVEX|T_B32, 0xAC); }
+void vfnmadd213sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8|T_66|T_0F38|T_W1|T_EW1|T_EVEX|T_ER_X, 0xAD); }
+void vfnmadd213ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4|T_66|T_0F38|T_W0|T_EW0|T_EVEX|T_ER_X, 0xAD); }
+void vfnmadd231pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W1|T_EW1|T_YMM|T_EVEX|T_B64, 0xBC); }
+void vfnmadd231ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_EW0|T_YMM|T_EVEX|T_B32, 0xBC); }
+void vfnmadd231sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8|T_66|T_0F38|T_W1|T_EW1|T_EVEX|T_ER_X, 0xBD); }
+void vfnmadd231ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4|T_66|T_0F38|T_W0|T_EW0|T_EVEX|T_ER_X, 0xBD); }
+void vfnmsub132pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W1|T_EW1|T_YMM|T_EVEX|T_B64, 0x9E); }
+void vfnmsub132ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_EW0|T_YMM|T_EVEX|T_B32, 0x9E); }
+void vfnmsub132sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8|T_66|T_0F38|T_W1|T_EW1|T_EVEX|T_ER_X, 0x9F); }
+void vfnmsub132ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4|T_66|T_0F38|T_W0|T_EW0|T_EVEX|T_ER_X, 0x9F); }
+void vfnmsub213pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W1|T_EW1|T_YMM|T_EVEX|T_B64, 0xAE); }
+void vfnmsub213ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_EW0|T_YMM|T_EVEX|T_B32, 0xAE); }
+void vfnmsub213sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8|T_66|T_0F38|T_W1|T_EW1|T_EVEX|T_ER_X, 0xAF); }
+void vfnmsub213ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4|T_66|T_0F38|T_W0|T_EW0|T_EVEX|T_ER_X, 0xAF); }
+void vfnmsub231pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W1|T_EW1|T_YMM|T_EVEX|T_B64, 0xBE); }
+void vfnmsub231ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_EW0|T_YMM|T_EVEX|T_B32, 0xBE); }
+void vfnmsub231sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8|T_66|T_0F38|T_W1|T_EW1|T_EVEX|T_ER_X, 0xBF); }
+void vfnmsub231ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4|T_66|T_0F38|T_W0|T_EW0|T_EVEX|T_ER_X, 0xBF); }
void vgatherdpd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W1, 0x92, 0); }
void vgatherdps(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W0, 0x92, 1); }
void vgatherqpd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W1, 0x93, 1); }
void vgatherqps(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W0, 0x93, 2); }
-void vgf2p8affineinvqb(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W1 | T_EW1 | T_YMM | T_EVEX | T_SAE_Z | T_B64, 0xCF, imm); }
-void vgf2p8affineqb(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W1 | T_EW1 | T_YMM | T_EVEX | T_SAE_Z | T_B64, 0xCE, imm); }
-void vgf2p8mulb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_SAE_Z, 0xCF); }
-void vhaddpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_66 | T_0F | T_YMM, 0x7C); }
-void vhaddps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_F2 | T_0F | T_YMM, 0x7C); }
-void vhsubpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_66 | T_0F | T_YMM, 0x7D); }
-void vhsubps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_F2 | T_0F | T_YMM, 0x7D); }
+void vgf2p8affineinvqb(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F3A|T_W1|T_EW1|T_YMM|T_EVEX|T_SAE_Z|T_B64, 0xCF, imm); }
+void vgf2p8affineqb(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F3A|T_W1|T_EW1|T_YMM|T_EVEX|T_SAE_Z|T_B64, 0xCE, imm); }
+void vgf2p8mulb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_EW0|T_YMM|T_EVEX|T_SAE_Z, 0xCF); }
+void vhaddpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_66|T_0F|T_YMM, 0x7C); }
+void vhaddps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_F2|T_0F|T_YMM, 0x7C); }
+void vhsubpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_66|T_0F|T_YMM, 0x7D); }
+void vhsubps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_F2|T_0F|T_YMM, 0x7D); }
void vinsertf128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { if (!(y1.isYMM() && y2.isYMM() && op.isXMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x18, imm); }
void vinserti128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { if (!(y1.isYMM() && y2.isYMM() && op.isXMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x38, imm); }
-void vinsertps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F3A | T_W0 | T_EW0 | T_EVEX, 0x21, imm); }
+void vinsertps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N4|T_66|T_0F3A|T_W0|T_EW0|T_EVEX, 0x21, imm); }
void vlddqu(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, cvtIdx0(x), addr, T_0F | T_F2 | T_W0 | T_YMM, 0xF0); }
void vldmxcsr(const Address& addr) { opAVX_X_X_XM(xm2, xm0, addr, T_0F, 0xAE); }
void vmaskmovdqu(const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x1, xm0, x2, T_0F | T_66, 0xF7); }
@@ -1116,27 +1329,27 @@ void vminpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand())
void vminps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x5D); }
void vminsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER_X | T_N8, 0x5D); }
void vminss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_EW0 | T_EVEX | T_ER_X | T_N4, 0x5D); }
-void vmovapd(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_M_K, 0x29); }
-void vmovapd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX, 0x28); }
-void vmovaps(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_0F | T_EW0 | T_YMM | T_EVEX | T_M_K, 0x29); }
-void vmovaps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_EW0 | T_YMM | T_EVEX, 0x28); }
+void vmovapd(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_66|T_0F|T_EW1|T_YMM|T_EVEX|T_M_K, 0x29); }
+void vmovapd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66|T_0F|T_EW1|T_YMM|T_EVEX, 0x28); }
+void vmovaps(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_0F|T_EW0|T_YMM|T_EVEX|T_M_K, 0x29); }
+void vmovaps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F|T_EW0|T_YMM|T_EVEX, 0x28); }
void vmovd(const Operand& op, const Xmm& x) { if (!op.isREG(32) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, xm0, op, T_0F | T_66 | T_W0 | T_EVEX | T_N4, 0x7E); }
void vmovd(const Xmm& x, const Operand& op) { if (!op.isREG(32) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, xm0, op, T_0F | T_66 | T_W0 | T_EVEX | T_N4, 0x6E); }
-void vmovddup(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_DUP | T_F2 | T_0F | T_EW1 | T_YMM | T_EVEX | T_ER_X | T_ER_Y | T_ER_Z, 0x12); }
-void vmovdqa(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_66 | T_0F | T_YMM, 0x7F); }
-void vmovdqa(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F | T_YMM, 0x6F); }
-void vmovdqu(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_F3 | T_0F | T_YMM, 0x7F); }
-void vmovdqu(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_F3 | T_0F | T_YMM, 0x6F); }
+void vmovddup(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_DUP|T_F2|T_0F|T_EW1|T_YMM|T_EVEX|T_ER_X|T_ER_Y|T_ER_Z, 0x12); }
+void vmovdqa(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_66|T_0F|T_YMM, 0x7F); }
+void vmovdqa(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66|T_0F|T_YMM, 0x6F); }
+void vmovdqu(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_F3|T_0F|T_YMM, 0x7F); }
+void vmovdqu(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_F3|T_0F|T_YMM, 0x6F); }
void vmovhlps(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x1, x2, op, T_0F | T_EVEX | T_EW0, 0x12); }
-void vmovhpd(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, T_0F | T_66 | T_EVEX | T_EW1 | T_N8, 0x17); }
-void vmovhpd(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !op2.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, op1, op2, T_0F | T_66 | T_EVEX | T_EW1 | T_N8, 0x16); }
-void vmovhps(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, T_0F | T_EVEX | T_EW0 | T_N8, 0x17); }
-void vmovhps(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !op2.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, op1, op2, T_0F | T_EVEX | T_EW0 | T_N8, 0x16); }
+void vmovhpd(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, T_N8|T_66|T_0F|T_EW1|T_EVEX, 0x17); }
+void vmovhpd(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !op2.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, op1, op2, T_N8|T_66|T_0F|T_EW1|T_EVEX, 0x16); }
+void vmovhps(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, T_N8|T_0F|T_EW0|T_EVEX, 0x17); }
+void vmovhps(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !op2.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, op1, op2, T_N8|T_0F|T_EW0|T_EVEX, 0x16); }
void vmovlhps(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x1, x2, op, T_0F | T_EVEX | T_EW0, 0x16); }
-void vmovlpd(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, T_0F | T_66 | T_EVEX | T_EW1 | T_N8, 0x13); }
-void vmovlpd(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !op2.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, op1, op2, T_0F | T_66 | T_EVEX | T_EW1 | T_N8, 0x12); }
-void vmovlps(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, T_0F | T_EVEX | T_EW0 | T_N8, 0x13); }
-void vmovlps(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !op2.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, op1, op2, T_0F | T_EVEX | T_EW0 | T_N8, 0x12); }
+void vmovlpd(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, T_N8|T_66|T_0F|T_EW1|T_EVEX, 0x13); }
+void vmovlpd(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !op2.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, op1, op2, T_N8|T_66|T_0F|T_EW1|T_EVEX, 0x12); }
+void vmovlps(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, T_N8|T_0F|T_EW0|T_EVEX, 0x13); }
+void vmovlps(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !op2.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, op1, op2, T_N8|T_0F|T_EW0|T_EVEX, 0x12); }
void vmovmskpd(const Reg& r, const Xmm& x) { if (!r.isBit(i32e)) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x.isXMM() ? Xmm(r.getIdx()) : Ymm(r.getIdx()), cvtIdx0(x), x, T_0F | T_66 | T_W0 | T_YMM, 0x50); }
void vmovmskps(const Reg& r, const Xmm& x) { if (!r.isBit(i32e)) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x.isXMM() ? Xmm(r.getIdx()) : Ymm(r.getIdx()), cvtIdx0(x), x, T_0F | T_W0 | T_YMM, 0x50); }
void vmovntdq(const Address& addr, const Xmm& x) { opVex(x, 0, addr, T_0F | T_66 | T_YMM | T_EVEX | T_EW0, 0xE7); }
@@ -1144,99 +1357,99 @@ void vmovntdqa(const Xmm& x, const Address& addr) { opVex(x, 0, addr, T_0F38 | T
void vmovntpd(const Address& addr, const Xmm& x) { opVex(x, 0, addr, T_0F | T_66 | T_YMM | T_EVEX | T_EW1, 0x2B); }
void vmovntps(const Address& addr, const Xmm& x) { opVex(x, 0, addr, T_0F | T_YMM | T_EVEX | T_EW0, 0x2B); }
void vmovq(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, T_0F | T_66 | T_EVEX | T_EW1 | T_N8, x.getIdx() < 16 ? 0xD6 : 0x7E); }
-void vmovq(const Xmm& x, const Address& addr) { int type, code; if (x.getIdx() < 16) { type = T_0F | T_F3; code = 0x7E; } else { type = T_0F | T_66 | T_EVEX | T_EW1 | T_N8; code = 0x6E; } opAVX_X_X_XM(x, xm0, addr, type, code); }
+void vmovq(const Xmm& x, const Address& addr) { uint64_t type; uint8_t code; if (x.getIdx() < 16) { type = T_0F | T_F3; code = 0x7E; } else { type = T_0F | T_66 | T_EVEX | T_EW1 | T_N8; code = 0x6E; } opAVX_X_X_XM(x, xm0, addr, type, code); }
void vmovq(const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x1, xm0, x2, T_0F | T_F3 | T_EVEX | T_EW1 | T_N8, 0x7E); }
-void vmovsd(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, T_N8 | T_F2 | T_0F | T_EW1 | T_EVEX | T_M_K, 0x11); }
-void vmovsd(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, T_N8 | T_F2 | T_0F | T_EW1 | T_EVEX, 0x10); }
-void vmovsd(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x1, x2, op, T_N8 | T_F2 | T_0F | T_EW1 | T_EVEX, 0x10); }
-void vmovshdup(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_F3 | T_0F | T_EW0 | T_YMM | T_EVEX, 0x16); }
-void vmovsldup(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_F3 | T_0F | T_EW0 | T_YMM | T_EVEX, 0x12); }
-void vmovss(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, T_N4 | T_F3 | T_0F | T_EW0 | T_EVEX | T_M_K, 0x11); }
-void vmovss(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, T_N4 | T_F3 | T_0F | T_EW0 | T_EVEX, 0x10); }
-void vmovss(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x1, x2, op, T_N4 | T_F3 | T_0F | T_EW0 | T_EVEX, 0x10); }
-void vmovupd(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_M_K, 0x11); }
-void vmovupd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX, 0x10); }
-void vmovups(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_0F | T_EW0 | T_YMM | T_EVEX | T_M_K, 0x11); }
-void vmovups(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_EW0 | T_YMM | T_EVEX, 0x10); }
-void vmpsadbw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_YMM, 0x42, imm); }
+void vmovsd(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, T_N8|T_F2|T_0F|T_EW1|T_EVEX | T_M_K, 0x11); }
+void vmovsd(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, T_N8|T_F2|T_0F|T_EW1|T_EVEX, 0x10); }
+void vmovsd(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x1, x2, op, T_N8|T_F2|T_0F|T_EW1|T_EVEX, 0x10); }
+void vmovshdup(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_F3|T_0F|T_EW0|T_YMM|T_EVEX, 0x16); }
+void vmovsldup(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_F3|T_0F|T_EW0|T_YMM|T_EVEX, 0x12); }
+void vmovss(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, T_N4|T_F3|T_0F|T_EW0|T_EVEX | T_M_K, 0x11); }
+void vmovss(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, T_N4|T_F3|T_0F|T_EW0|T_EVEX, 0x10); }
+void vmovss(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x1, x2, op, T_N4|T_F3|T_0F|T_EW0|T_EVEX, 0x10); }
+void vmovupd(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_66|T_0F|T_EW1|T_YMM|T_EVEX|T_M_K, 0x11); }
+void vmovupd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66|T_0F|T_EW1|T_YMM|T_EVEX, 0x10); }
+void vmovups(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_0F|T_EW0|T_YMM|T_EVEX|T_M_K, 0x11); }
+void vmovups(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F|T_EW0|T_YMM|T_EVEX, 0x10); }
+void vmpsadbw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F3A|T_W0|T_YMM, 0x42, imm); }
void vmulpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x59); }
void vmulps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x59); }
void vmulsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER_X | T_N8, 0x59); }
void vmulss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_EW0 | T_EVEX | T_ER_X | T_N4, 0x59); }
void vorpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x56); }
void vorps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x56); }
-void vpabsb(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F38 | T_YMM | T_EVEX, 0x1C); }
-void vpabsd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x1E); }
-void vpabsw(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F38 | T_YMM | T_EVEX, 0x1D); }
-void vpackssdw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32, 0x6B); }
-void vpacksswb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0x63); }
-void vpackusdw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x2B); }
-void vpackuswb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0x67); }
-void vpaddb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xFC); }
-void vpaddd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32, 0xFE); }
-void vpaddq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_B64, 0xD4); }
-void vpaddsb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xEC); }
-void vpaddsw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xED); }
-void vpaddusb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xDC); }
-void vpaddusw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xDD); }
-void vpaddw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xFD); }
-void vpalignr(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_YMM | T_EVEX, 0x0F, imm); }
-void vpand(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM, 0xDB); }
-void vpandn(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM, 0xDF); }
-void vpavgb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xE0); }
-void vpavgw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xE3); }
-void vpblendd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_YMM, 0x02, imm); }
+void vpabsb(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66|T_0F38|T_YMM|T_EVEX, 0x1C); }
+void vpabsd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66|T_0F38|T_EW0|T_YMM|T_EVEX|T_B32, 0x1E); }
+void vpabsw(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66|T_0F38|T_YMM|T_EVEX, 0x1D); }
+void vpackssdw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_EW0|T_YMM|T_EVEX|T_B32, 0x6B); }
+void vpacksswb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_YMM|T_EVEX, 0x63); }
+void vpackusdw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW0|T_YMM|T_EVEX|T_B32, 0x2B); }
+void vpackuswb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_YMM|T_EVEX, 0x67); }
+void vpaddb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_YMM|T_EVEX, 0xFC); }
+void vpaddd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_EW0|T_YMM|T_EVEX|T_B32, 0xFE); }
+void vpaddq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_EW1|T_YMM|T_EVEX|T_B64, 0xD4); }
+void vpaddsb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_YMM|T_EVEX, 0xEC); }
+void vpaddsw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_YMM|T_EVEX, 0xED); }
+void vpaddusb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_YMM|T_EVEX, 0xDC); }
+void vpaddusw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_YMM|T_EVEX, 0xDD); }
+void vpaddw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_YMM|T_EVEX, 0xFD); }
+void vpalignr(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F3A|T_YMM|T_EVEX, 0x0F, imm); }
+void vpand(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_YMM, 0xDB); }
+void vpandn(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_YMM, 0xDF); }
+void vpavgb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_YMM|T_EVEX, 0xE0); }
+void vpavgw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_YMM|T_EVEX, 0xE3); }
+void vpblendd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F3A|T_W0|T_YMM, 0x02, imm); }
void vpblendvb(const Xmm& x1, const Xmm& x2, const Operand& op, const Xmm& x4) { opAVX_X_X_XM(x1, x2, op, T_0F3A | T_66 | T_YMM, 0x4C, x4.getIdx() << 4); }
-void vpblendw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_YMM, 0x0E, imm); }
-void vpbroadcastb(const Xmm& x, const Operand& op) { if (!(op.isXMM() || op.isMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_XM_IMM(x, op, T_N1 | T_66 | T_0F38 | T_W0 | T_YMM | T_EVEX, 0x78); }
-void vpbroadcastd(const Xmm& x, const Operand& op) { if (!(op.isXMM() || op.isMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_XM_IMM(x, op, T_N4 | T_66 | T_0F38 | T_W0 | T_YMM | T_EVEX, 0x58); }
-void vpbroadcastq(const Xmm& x, const Operand& op) { if (!(op.isXMM() || op.isMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_XM_IMM(x, op, T_N8 | T_66 | T_0F38 | T_W0 | T_EW1 | T_YMM | T_EVEX, 0x59); }
-void vpbroadcastw(const Xmm& x, const Operand& op) { if (!(op.isXMM() || op.isMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_XM_IMM(x, op, T_N2 | T_66 | T_0F38 | T_W0 | T_YMM | T_EVEX, 0x79); }
+void vpblendw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F3A|T_W0|T_YMM, 0x0E, imm); }
+void vpbroadcastb(const Xmm& x, const Operand& op) { if (!(op.isXMM() || op.isMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_XM_IMM(x, op, T_N1|T_66|T_0F38|T_W0|T_YMM|T_EVEX, 0x78); }
+void vpbroadcastd(const Xmm& x, const Operand& op) { if (!(op.isXMM() || op.isMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_XM_IMM(x, op, T_N4|T_66|T_0F38|T_W0|T_YMM|T_EVEX, 0x58); }
+void vpbroadcastq(const Xmm& x, const Operand& op) { if (!(op.isXMM() || op.isMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_XM_IMM(x, op, T_N8|T_66|T_0F38|T_W0|T_EW1|T_YMM|T_EVEX, 0x59); }
+void vpbroadcastw(const Xmm& x, const Operand& op) { if (!(op.isXMM() || op.isMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_XM_IMM(x, op, T_N2|T_66|T_0F38|T_W0|T_YMM|T_EVEX, 0x79); }
void vpclmulhqhqdq(const Xmm& x1, const Xmm& x2, const Operand& op) { vpclmulqdq(x1, x2, op, 0x11); }
void vpclmulhqlqdq(const Xmm& x1, const Xmm& x2, const Operand& op) { vpclmulqdq(x1, x2, op, 0x01); }
void vpclmullqhqdq(const Xmm& x1, const Xmm& x2, const Operand& op) { vpclmulqdq(x1, x2, op, 0x10); }
void vpclmullqlqdq(const Xmm& x1, const Xmm& x2, const Operand& op) { vpclmulqdq(x1, x2, op, 0x00); }
-void vpclmulqdq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_YMM | T_EVEX, 0x44, imm); }
-void vpcmpeqb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM, 0x74); }
-void vpcmpeqd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM, 0x76); }
-void vpcmpeqq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x29); }
-void vpcmpeqw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM, 0x75); }
-void vpcmpestri(const Xmm& xm, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F3A, 0x61, imm); }
-void vpcmpestrm(const Xmm& xm, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F3A, 0x60, imm); }
-void vpcmpgtb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM, 0x64); }
-void vpcmpgtd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM, 0x66); }
-void vpcmpgtq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x37); }
-void vpcmpgtw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM, 0x65); }
-void vpcmpistri(const Xmm& xm, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F3A, 0x63, imm); }
-void vpcmpistrm(const Xmm& xm, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F3A, 0x62, imm); }
-void vpdpbssd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F2 | T_0F38 | T_W0 | T_YMM, 0x50); }
-void vpdpbssds(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F2 | T_0F38 | T_W0 | T_YMM, 0x51); }
-void vpdpbsud(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F3 | T_0F38 | T_W0 | T_YMM, 0x50); }
-void vpdpbsuds(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F3 | T_0F38 | T_W0 | T_YMM, 0x51); }
-void vpdpbusd(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_B32, 0x50, encoding); }
-void vpdpbusds(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_B32, 0x51, encoding); }
-void vpdpbuud(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F38 | T_W0 | T_YMM, 0x50); }
-void vpdpbuuds(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F38 | T_W0 | T_YMM, 0x51); }
-void vpdpwssd(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_B32, 0x52, encoding); }
-void vpdpwssds(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_B32, 0x53, encoding); }
-void vpdpwsud(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F3 | T_0F38 | T_W0 | T_YMM, 0xD2); }
-void vpdpwsuds(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F3 | T_0F38 | T_W0 | T_YMM, 0xD3); }
-void vpdpwusd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_YMM, 0xD2); }
-void vpdpwusds(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_YMM, 0xD3); }
-void vpdpwuud(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F38 | T_W0 | T_YMM, 0xD2); }
-void vpdpwuuds(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F38 | T_W0 | T_YMM, 0xD3); }
+void vpclmulqdq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F3A|T_W0|T_YMM|T_EVEX, 0x44, imm); }
+void vpcmpeqb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_YMM, 0x74); }
+void vpcmpeqd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_YMM, 0x76); }
+void vpcmpeqq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_YMM, 0x29); }
+void vpcmpeqw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_YMM, 0x75); }
+void vpcmpestri(const Xmm& xm, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(xm, op, T_66|T_0F3A, 0x61, imm); }
+void vpcmpestrm(const Xmm& xm, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(xm, op, T_66|T_0F3A, 0x60, imm); }
+void vpcmpgtb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_YMM, 0x64); }
+void vpcmpgtd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_YMM, 0x66); }
+void vpcmpgtq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_YMM, 0x37); }
+void vpcmpgtw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_YMM, 0x65); }
+void vpcmpistri(const Xmm& xm, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(xm, op, T_66|T_0F3A, 0x63, imm); }
+void vpcmpistrm(const Xmm& xm, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(xm, op, T_66|T_0F3A, 0x62, imm); }
+void vpdpbssd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F2|T_0F38|T_W0|T_YMM, 0x50); }
+void vpdpbssds(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F2|T_0F38|T_W0|T_YMM, 0x51); }
+void vpdpbsud(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F3|T_0F38|T_W0|T_YMM, 0x50); }
+void vpdpbsuds(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F3|T_0F38|T_W0|T_YMM, 0x51); }
+void vpdpbusd(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_66|T_0F38|T_EW0|T_YMM|T_SAE_Z|T_B32, 0x50, encoding); }
+void vpdpbusds(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_66|T_0F38|T_EW0|T_YMM|T_SAE_Z|T_B32, 0x51, encoding); }
+void vpdpbuud(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F38|T_W0|T_YMM, 0x50); }
+void vpdpbuuds(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F38|T_W0|T_YMM, 0x51); }
+void vpdpwssd(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_66|T_0F38|T_EW0|T_YMM|T_SAE_Z|T_B32, 0x52, encoding); }
+void vpdpwssds(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_66|T_0F38|T_EW0|T_YMM|T_SAE_Z|T_B32, 0x53, encoding); }
+void vpdpwsud(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F3|T_0F38|T_W0|T_YMM, 0xD2); }
+void vpdpwsuds(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F3|T_0F38|T_W0|T_YMM, 0xD3); }
+void vpdpwusd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_YMM, 0xD2); }
+void vpdpwusds(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_YMM, 0xD3); }
+void vpdpwuud(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F38|T_W0|T_YMM, 0xD2); }
+void vpdpwuuds(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F38|T_W0|T_YMM, 0xD3); }
void vperm2f128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { if (!(y1.isYMM() && y2.isYMM() && op.isYMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x06, imm); }
void vperm2i128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { if (!(y1.isYMM() && y2.isYMM() && op.isYMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x46, imm); }
-void vpermd(const Ymm& y1, const Ymm& y2, const Operand& op) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x36); }
-void vpermilpd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x0D); }
-void vpermilpd(const Xmm& xm, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_EVEX | T_B64, 0x05, imm); }
-void vpermilps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x0C); }
-void vpermilps(const Xmm& xm, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_EVEX | T_B32, 0x04, imm); }
-void vpermpd(const Ymm& y, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(y, op, T_66 | T_0F3A | T_W1 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x01, imm); }
-void vpermpd(const Ymm& y1, const Ymm& y2, const Operand& op) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x16); }
-void vpermps(const Ymm& y1, const Ymm& y2, const Operand& op) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x16); }
-void vpermq(const Ymm& y, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(y, op, T_66 | T_0F3A | T_W1 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x00, imm); }
-void vpermq(const Ymm& y1, const Ymm& y2, const Operand& op) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F38 | T_W0 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x36); }
+void vpermd(const Ymm& y1, const Ymm& y2, const Operand& op) { opAVX_X_X_XM(y1, y2, op, T_66|T_0F38|T_W0|T_EW0|T_YMM|T_EVEX|T_B32, 0x36); }
+void vpermilpd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_EW1|T_YMM|T_EVEX|T_B64, 0x0D); }
+void vpermilpd(const Xmm& xm, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(xm, op, T_66|T_0F3A|T_EW1|T_YMM|T_EVEX|T_B64, 0x05, imm); }
+void vpermilps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_EW0|T_YMM|T_EVEX|T_B32, 0x0C); }
+void vpermilps(const Xmm& xm, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(xm, op, T_66|T_0F3A|T_EW0|T_YMM|T_EVEX|T_B32, 0x04, imm); }
+void vpermpd(const Ymm& y, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(y, op, T_66|T_0F3A|T_W1|T_EW1|T_YMM|T_EVEX|T_B64, 0x01, imm); }
+void vpermpd(const Ymm& y1, const Ymm& y2, const Operand& op) { opAVX_X_X_XM(y1, y2, op, T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX|T_B64, 0x16); }
+void vpermps(const Ymm& y1, const Ymm& y2, const Operand& op) { opAVX_X_X_XM(y1, y2, op, T_66|T_0F38|T_W0|T_EW0|T_YMM|T_EVEX|T_B32, 0x16); }
+void vpermq(const Ymm& y, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(y, op, T_66|T_0F3A|T_W1|T_EW1|T_YMM|T_EVEX|T_B64, 0x00, imm); }
+void vpermq(const Ymm& y1, const Ymm& y2, const Operand& op) { opAVX_X_X_XM(y1, y2, op, T_66|T_0F38|T_W0|T_EW1|T_YMM|T_EVEX|T_B64, 0x36); }
void vpextrb(const Operand& op, const Xmm& x, uint8_t imm) { if (!((op.isREG(8|16|i32e) || op.isMEM()) && x.isXMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x, 0, op, T_0F3A | T_66 | T_EVEX | T_N1, 0x14, imm); }
void vpextrd(const Operand& op, const Xmm& x, uint8_t imm) { if (!((op.isREG(32) || op.isMEM()) && x.isXMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x, 0, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N4, 0x16, imm); }
void vpextrq(const Operand& op, const Xmm& x, uint8_t imm) { if (!((op.isREG(64) || op.isMEM()) && x.isXMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x, 0, op, T_0F3A | T_66 | T_W1 | T_EVEX | T_EW1 | T_N8, 0x16, imm); }
@@ -1245,142 +1458,142 @@ void vpgatherdd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1
void vpgatherdq(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W1, 0x90, 0); }
void vpgatherqd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W0, 0x91, 2); }
void vpgatherqq(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W1, 0x91, 1); }
-void vphaddd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x02); }
-void vphaddsw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x03); }
-void vphaddw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x01); }
-void vphminposuw(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F38, 0x41); }
-void vphsubd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x06); }
-void vphsubsw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x07); }
-void vphsubw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x05); }
+void vphaddd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_YMM, 0x02); }
+void vphaddsw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_YMM, 0x03); }
+void vphaddw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_YMM, 0x01); }
+void vphminposuw(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66|T_0F38, 0x41); }
+void vphsubd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_YMM, 0x06); }
+void vphsubsw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_YMM, 0x07); }
+void vphsubw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_YMM, 0x05); }
void vpinsrb(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(32) || op.isMEM()))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x1, &x2, op, T_0F3A | T_66 | T_EVEX | T_N1, 0x20, imm); }
void vpinsrd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(32) || op.isMEM()))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x1, &x2, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N4, 0x22, imm); }
void vpinsrq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(64) || op.isMEM()))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x1, &x2, op, T_0F3A | T_66 | T_W1 | T_EVEX | T_EW1 | T_N8, 0x22, imm); }
void vpinsrw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(32) || op.isMEM()))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x1, &x2, op, T_0F | T_66 | T_EVEX | T_N2, 0xC4, imm); }
-void vpmadd52huq(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_B64, 0xB5, encoding); }
-void vpmadd52luq(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_B64, 0xB4, encoding); }
-void vpmaddubsw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM | T_EVEX, 0x04); }
-void vpmaddwd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xF5); }
+void vpmadd52huq(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_66|T_0F38|T_EW1|T_YMM|T_B64, 0xB5, encoding); }
+void vpmadd52luq(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_66|T_0F38|T_EW1|T_YMM|T_B64, 0xB4, encoding); }
+void vpmaddubsw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_YMM|T_EVEX, 0x04); }
+void vpmaddwd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_YMM|T_EVEX, 0xF5); }
void vpmaskmovd(const Address& addr, const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x2, x1, addr, T_0F38 | T_66 | T_W0 | T_YMM, 0x8E); }
void vpmaskmovd(const Xmm& x1, const Xmm& x2, const Address& addr) { opAVX_X_X_XM(x1, x2, addr, T_0F38 | T_66 | T_W0 | T_YMM, 0x8C); }
void vpmaskmovq(const Address& addr, const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x2, x1, addr, T_0F38 | T_66 | T_W1 | T_YMM, 0x8E); }
void vpmaskmovq(const Xmm& x1, const Xmm& x2, const Address& addr) { opAVX_X_X_XM(x1, x2, addr, T_0F38 | T_66 | T_W1 | T_YMM, 0x8C); }
-void vpmaxsb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM | T_EVEX, 0x3C); }
-void vpmaxsd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x3D); }
-void vpmaxsw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xEE); }
-void vpmaxub(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xDE); }
-void vpmaxud(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x3F); }
-void vpmaxuw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM | T_EVEX, 0x3E); }
-void vpminsb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM | T_EVEX, 0x38); }
-void vpminsd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x39); }
-void vpminsw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xEA); }
-void vpminub(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xDA); }
-void vpminud(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x3B); }
-void vpminuw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM | T_EVEX, 0x3A); }
+void vpmaxsb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_YMM|T_EVEX, 0x3C); }
+void vpmaxsd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW0|T_YMM|T_EVEX|T_B32, 0x3D); }
+void vpmaxsw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_YMM|T_EVEX, 0xEE); }
+void vpmaxub(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_YMM|T_EVEX, 0xDE); }
+void vpmaxud(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW0|T_YMM|T_EVEX|T_B32, 0x3F); }
+void vpmaxuw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_YMM|T_EVEX, 0x3E); }
+void vpminsb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_YMM|T_EVEX, 0x38); }
+void vpminsd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW0|T_YMM|T_EVEX|T_B32, 0x39); }
+void vpminsw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_YMM|T_EVEX, 0xEA); }
+void vpminub(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_YMM|T_EVEX, 0xDA); }
+void vpminud(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW0|T_YMM|T_EVEX|T_B32, 0x3B); }
+void vpminuw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_YMM|T_EVEX, 0x3A); }
void vpmovmskb(const Reg32e& r, const Xmm& x) { if (!x.is(Operand::XMM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x.isYMM() ? Ymm(r.getIdx()) : Xmm(r.getIdx()), 0, x, T_0F | T_66 | T_YMM, 0xD7); }
-void vpmovsxbd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_N4 | T_N_VL | T_66 | T_0F38 | T_YMM | T_EVEX, 0x21); }
-void vpmovsxbq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_N2 | T_N_VL | T_66 | T_0F38 | T_YMM | T_EVEX, 0x22); }
-void vpmovsxbw(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_N8 | T_N_VL | T_66 | T_0F38 | T_YMM | T_EVEX, 0x20); }
-void vpmovsxdq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_N8 | T_N_VL | T_66 | T_0F38 | T_EW0 | T_YMM | T_EVEX, 0x25); }
-void vpmovsxwd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_N8 | T_N_VL | T_66 | T_0F38 | T_YMM | T_EVEX, 0x23); }
-void vpmovsxwq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_N4 | T_N_VL | T_66 | T_0F38 | T_YMM | T_EVEX, 0x24); }
-void vpmovzxbd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_N4 | T_N_VL | T_66 | T_0F38 | T_YMM | T_EVEX, 0x31); }
-void vpmovzxbq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_N2 | T_N_VL | T_66 | T_0F38 | T_YMM | T_EVEX, 0x32); }
-void vpmovzxbw(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_N8 | T_N_VL | T_66 | T_0F38 | T_YMM | T_EVEX, 0x30); }
-void vpmovzxdq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_N8 | T_N_VL | T_66 | T_0F38 | T_EW0 | T_YMM | T_EVEX, 0x35); }
-void vpmovzxwd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_N8 | T_N_VL | T_66 | T_0F38 | T_YMM | T_EVEX, 0x33); }
-void vpmovzxwq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_N4 | T_N_VL | T_66 | T_0F38 | T_YMM | T_EVEX, 0x34); }
-void vpmuldq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x28); }
-void vpmulhrsw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM | T_EVEX, 0x0B); }
-void vpmulhuw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xE4); }
-void vpmulhw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xE5); }
-void vpmulld(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x40); }
-void vpmullw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xD5); }
-void vpmuludq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_B64, 0xF4); }
-void vpor(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM, 0xEB); }
-void vpsadbw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xF6); }
-void vpshufb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM | T_EVEX, 0x00); }
-void vpshufd(const Xmm& xm, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32, 0x70, imm); }
-void vpshufhw(const Xmm& xm, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(xm, op, T_F3 | T_0F | T_YMM | T_EVEX, 0x70, imm); }
-void vpshuflw(const Xmm& xm, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(xm, op, T_F2 | T_0F | T_YMM | T_EVEX, 0x70, imm); }
-void vpsignb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x08); }
-void vpsignd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x0A); }
-void vpsignw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x09); }
-void vpslld(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), 6), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32 | T_MEM_EVEX, 0x72, imm); }
-void vpslld(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_EW0 | T_YMM | T_EVEX, 0xF2); }
-void vpslldq(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), 7), x, op, T_66 | T_0F | T_YMM | T_EVEX | T_MEM_EVEX, 0x73, imm); }
-void vpsllq(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), 6), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_B64 | T_MEM_EVEX, 0x73, imm); }
-void vpsllq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_EW1 | T_YMM | T_EVEX, 0xF3); }
-void vpsllvd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x47); }
-void vpsllvq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W1 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x47); }
-void vpsllw(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), 6), x, op, T_66 | T_0F | T_YMM | T_EVEX | T_MEM_EVEX, 0x71, imm); }
-void vpsllw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_YMM | T_EVEX, 0xF1); }
-void vpsrad(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), 4), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32 | T_MEM_EVEX, 0x72, imm); }
-void vpsrad(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_EW0 | T_YMM | T_EVEX, 0xE2); }
-void vpsravd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x46); }
-void vpsraw(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), 4), x, op, T_66 | T_0F | T_YMM | T_EVEX | T_MEM_EVEX, 0x71, imm); }
-void vpsraw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_YMM | T_EVEX, 0xE1); }
-void vpsrld(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), 2), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32 | T_MEM_EVEX, 0x72, imm); }
-void vpsrld(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_EW0 | T_YMM | T_EVEX, 0xD2); }
-void vpsrldq(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), 3), x, op, T_66 | T_0F | T_YMM | T_EVEX | T_MEM_EVEX, 0x73, imm); }
-void vpsrlq(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), 2), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_B64 | T_MEM_EVEX, 0x73, imm); }
-void vpsrlq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_EW1 | T_YMM | T_EVEX, 0xD3); }
-void vpsrlvd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x45); }
-void vpsrlvq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W1 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x45); }
-void vpsrlw(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), 2), x, op, T_66 | T_0F | T_YMM | T_EVEX | T_MEM_EVEX, 0x71, imm); }
-void vpsrlw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_YMM | T_EVEX, 0xD1); }
-void vpsubb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xF8); }
-void vpsubd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32, 0xFA); }
-void vpsubq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_B64, 0xFB); }
-void vpsubsb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xE8); }
-void vpsubsw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xE9); }
-void vpsubusb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xD8); }
-void vpsubusw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xD9); }
-void vpsubw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xF9); }
-void vptest(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F38 | T_YMM, 0x17); }
-void vpunpckhbw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0x68); }
-void vpunpckhdq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32, 0x6A); }
-void vpunpckhqdq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_B64, 0x6D); }
-void vpunpckhwd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0x69); }
-void vpunpcklbw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0x60); }
-void vpunpckldq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32, 0x62); }
-void vpunpcklqdq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_B64, 0x6C); }
-void vpunpcklwd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0x61); }
-void vpxor(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM, 0xEF); }
-void vrcpps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_YMM, 0x53); }
-void vrcpss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F3 | T_0F, 0x53); }
-void vroundpd(const Xmm& xm, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F3A | T_YMM, 0x09, imm); }
-void vroundps(const Xmm& xm, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F3A | T_YMM, 0x08, imm); }
-void vroundsd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0, 0x0B, imm); }
-void vroundss(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0, 0x0A, imm); }
-void vrsqrtps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_YMM, 0x52); }
-void vrsqrtss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F3 | T_0F, 0x52); }
+void vpmovsxbd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_N4|T_N_VL|T_66|T_0F38|T_YMM|T_EVEX, 0x21); }
+void vpmovsxbq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_N2|T_N_VL|T_66|T_0F38|T_YMM|T_EVEX, 0x22); }
+void vpmovsxbw(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_N8|T_N_VL|T_66|T_0F38|T_YMM|T_EVEX, 0x20); }
+void vpmovsxdq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_N8|T_N_VL|T_66|T_0F38|T_EW0|T_YMM|T_EVEX, 0x25); }
+void vpmovsxwd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_N8|T_N_VL|T_66|T_0F38|T_YMM|T_EVEX, 0x23); }
+void vpmovsxwq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_N4|T_N_VL|T_66|T_0F38|T_YMM|T_EVEX, 0x24); }
+void vpmovzxbd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_N4|T_N_VL|T_66|T_0F38|T_YMM|T_EVEX, 0x31); }
+void vpmovzxbq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_N2|T_N_VL|T_66|T_0F38|T_YMM|T_EVEX, 0x32); }
+void vpmovzxbw(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_N8|T_N_VL|T_66|T_0F38|T_YMM|T_EVEX, 0x30); }
+void vpmovzxdq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_N8|T_N_VL|T_66|T_0F38|T_EW0|T_YMM|T_EVEX, 0x35); }
+void vpmovzxwd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_N8|T_N_VL|T_66|T_0F38|T_YMM|T_EVEX, 0x33); }
+void vpmovzxwq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_N4|T_N_VL|T_66|T_0F38|T_YMM|T_EVEX, 0x34); }
+void vpmuldq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW1|T_YMM|T_EVEX|T_B64, 0x28); }
+void vpmulhrsw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_YMM|T_EVEX, 0x0B); }
+void vpmulhuw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_YMM|T_EVEX, 0xE4); }
+void vpmulhw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_YMM|T_EVEX, 0xE5); }
+void vpmulld(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW0|T_YMM|T_EVEX|T_B32, 0x40); }
+void vpmullw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_YMM|T_EVEX, 0xD5); }
+void vpmuludq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_EW1|T_YMM|T_EVEX|T_B64, 0xF4); }
+void vpor(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_YMM, 0xEB); }
+void vpsadbw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_YMM|T_EVEX, 0xF6); }
+void vpshufb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_YMM|T_EVEX, 0x00); }
+void vpshufd(const Xmm& xm, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(xm, op, T_66|T_0F|T_EW0|T_YMM|T_EVEX|T_B32, 0x70, imm); }
+void vpshufhw(const Xmm& xm, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(xm, op, T_F3|T_0F|T_YMM|T_EVEX, 0x70, imm); }
+void vpshuflw(const Xmm& xm, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(xm, op, T_F2|T_0F|T_YMM|T_EVEX, 0x70, imm); }
+void vpsignb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_YMM, 0x08); }
+void vpsignd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_YMM, 0x0A); }
+void vpsignw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_YMM, 0x09); }
+void vpslld(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), 6), x, op, T_66|T_0F|T_EW0|T_YMM|T_EVEX|T_B32|T_MEM_EVEX, 0x72, imm); }
+void vpslld(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16|T_66|T_0F|T_EW0|T_YMM|T_EVEX, 0xF2); }
+void vpslldq(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), 7), x, op, T_66|T_0F|T_YMM|T_EVEX|T_MEM_EVEX, 0x73, imm); }
+void vpsllq(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), 6), x, op, T_66|T_0F|T_EW1|T_YMM|T_EVEX|T_B64|T_MEM_EVEX, 0x73, imm); }
+void vpsllq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16|T_66|T_0F|T_EW1|T_YMM|T_EVEX, 0xF3); }
+void vpsllvd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_EW0|T_YMM|T_EVEX|T_B32, 0x47); }
+void vpsllvq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W1|T_EW1|T_YMM|T_EVEX|T_B64, 0x47); }
+void vpsllw(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), 6), x, op, T_66|T_0F|T_YMM|T_EVEX|T_MEM_EVEX, 0x71, imm); }
+void vpsllw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16|T_66|T_0F|T_YMM|T_EVEX, 0xF1); }
+void vpsrad(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), 4), x, op, T_66|T_0F|T_EW0|T_YMM|T_EVEX|T_B32|T_MEM_EVEX, 0x72, imm); }
+void vpsrad(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16|T_66|T_0F|T_EW0|T_YMM|T_EVEX, 0xE2); }
+void vpsravd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_EW0|T_YMM|T_EVEX|T_B32, 0x46); }
+void vpsraw(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), 4), x, op, T_66|T_0F|T_YMM|T_EVEX|T_MEM_EVEX, 0x71, imm); }
+void vpsraw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16|T_66|T_0F|T_YMM|T_EVEX, 0xE1); }
+void vpsrld(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), 2), x, op, T_66|T_0F|T_EW0|T_YMM|T_EVEX|T_B32|T_MEM_EVEX, 0x72, imm); }
+void vpsrld(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16|T_66|T_0F|T_EW0|T_YMM|T_EVEX, 0xD2); }
+void vpsrldq(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), 3), x, op, T_66|T_0F|T_YMM|T_EVEX|T_MEM_EVEX, 0x73, imm); }
+void vpsrlq(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), 2), x, op, T_66|T_0F|T_EW1|T_YMM|T_EVEX|T_B64|T_MEM_EVEX, 0x73, imm); }
+void vpsrlq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16|T_66|T_0F|T_EW1|T_YMM|T_EVEX, 0xD3); }
+void vpsrlvd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_EW0|T_YMM|T_EVEX|T_B32, 0x45); }
+void vpsrlvq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W1|T_EW1|T_YMM|T_EVEX|T_B64, 0x45); }
+void vpsrlw(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), 2), x, op, T_66|T_0F|T_YMM|T_EVEX|T_MEM_EVEX, 0x71, imm); }
+void vpsrlw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16|T_66|T_0F|T_YMM|T_EVEX, 0xD1); }
+void vpsubb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_YMM|T_EVEX, 0xF8); }
+void vpsubd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_EW0|T_YMM|T_EVEX|T_B32, 0xFA); }
+void vpsubq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_EW1|T_YMM|T_EVEX|T_B64, 0xFB); }
+void vpsubsb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_YMM|T_EVEX, 0xE8); }
+void vpsubsw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_YMM|T_EVEX, 0xE9); }
+void vpsubusb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_YMM|T_EVEX, 0xD8); }
+void vpsubusw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_YMM|T_EVEX, 0xD9); }
+void vpsubw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_YMM|T_EVEX, 0xF9); }
+void vptest(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66|T_0F38|T_YMM, 0x17); }
+void vpunpckhbw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_YMM|T_EVEX, 0x68); }
+void vpunpckhdq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_EW0|T_YMM|T_EVEX|T_B32, 0x6A); }
+void vpunpckhqdq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_EW1|T_YMM|T_EVEX|T_B64, 0x6D); }
+void vpunpckhwd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_YMM|T_EVEX, 0x69); }
+void vpunpcklbw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_YMM|T_EVEX, 0x60); }
+void vpunpckldq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_EW0|T_YMM|T_EVEX|T_B32, 0x62); }
+void vpunpcklqdq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_EW1|T_YMM|T_EVEX|T_B64, 0x6C); }
+void vpunpcklwd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_YMM|T_EVEX, 0x61); }
+void vpxor(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_YMM, 0xEF); }
+void vrcpps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F|T_YMM, 0x53); }
+void vrcpss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F3|T_0F, 0x53); }
+void vroundpd(const Xmm& xm, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(xm, op, T_66|T_0F3A|T_YMM, 0x09, imm); }
+void vroundps(const Xmm& xm, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(xm, op, T_66|T_0F3A|T_YMM, 0x08, imm); }
+void vroundsd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F3A|T_W0, 0x0B, imm); }
+void vroundss(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F3A|T_W0, 0x0A, imm); }
+void vrsqrtps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F|T_YMM, 0x52); }
+void vrsqrtss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F3|T_0F, 0x52); }
void vsha512msg1(const Ymm& y, const Xmm& x) { if (!(y.isYMM() && x.isXMM())) XBYAK_THROW(ERR_BAD_PARAMETER) opVex(y, 0, x, T_F2 | T_0F38 | T_W0 | T_YMM, 0xCC); }
void vsha512msg2(const Ymm& y1, const Ymm& y2) { if (!(y1.isYMM() && y2.isYMM())) XBYAK_THROW(ERR_BAD_PARAMETER) opVex(y1, 0, y2, T_F2 | T_0F38 | T_W0 | T_YMM, 0xCD); }
void vsha512rnds2(const Ymm& y1, const Ymm& y2, const Xmm& x) { if (!(y1.isYMM() && y2.isYMM() && x.isXMM())) XBYAK_THROW(ERR_BAD_PARAMETER) opVex(y1, &y2, x, T_F2 | T_0F38 | T_W0 | T_YMM, 0xCB); }
-void vshufpd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_B64, 0xC6, imm); }
-void vshufps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_0F | T_EW0 | T_YMM | T_EVEX | T_B32, 0xC6, imm); }
-void vsm3msg1(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F38 | T_W0 | T_EW0 | T_EVEX, 0xDA); }
-void vsm3msg2(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_EVEX, 0xDA); }
-void vsm3rnds2(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_EW0 | T_EVEX, 0xDE, imm); }
-void vsm4key4(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F3 | T_0F38 | T_W0 | T_EW0 | T_EVEX, 0xDA); }
-void vsm4rnds4(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F2 | T_0F38 | T_W0 | T_EW0 | T_EVEX, 0xDA); }
-void vsqrtpd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x51); }
-void vsqrtps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x51); }
-void vsqrtsd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_F2 | T_0F | T_EW1 | T_EVEX | T_ER_X, 0x51); }
-void vsqrtss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_F3 | T_0F | T_EW0 | T_EVEX | T_ER_X, 0x51); }
+void vshufpd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_EW1|T_YMM|T_EVEX|T_B64, 0xC6, imm); }
+void vshufps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_0F|T_EW0|T_YMM|T_EVEX|T_B32, 0xC6, imm); }
+void vsm3msg1(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F38|T_W0|T_EW0|T_EVEX, 0xDA); }
+void vsm3msg2(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_EW0|T_EVEX, 0xDA); }
+void vsm3rnds2(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F3A|T_W0|T_EW0|T_EVEX, 0xDE, imm); }
+void vsm4key4(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F3|T_0F38|T_W0|T_EW0|T_EVEX, 0xDA); }
+void vsm4rnds4(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F2|T_0F38|T_W0|T_EW0|T_EVEX, 0xDA); }
+void vsqrtpd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66|T_0F|T_EW1|T_YMM|T_EVEX|T_ER_Z|T_B64, 0x51); }
+void vsqrtps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F|T_EW0|T_YMM|T_EVEX|T_ER_Z|T_B32, 0x51); }
+void vsqrtsd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8|T_F2|T_0F|T_EW1|T_EVEX|T_ER_X, 0x51); }
+void vsqrtss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4|T_F3|T_0F|T_EW0|T_EVEX|T_ER_X, 0x51); }
void vstmxcsr(const Address& addr) { opAVX_X_X_XM(xm3, xm0, addr, T_0F, 0xAE); }
void vsubpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x5C); }
void vsubps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x5C); }
void vsubsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER_X | T_N8, 0x5C); }
void vsubss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_EW0 | T_EVEX | T_ER_X | T_N4, 0x5C); }
-void vtestpd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F38 | T_YMM, 0x0F); }
-void vtestps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F38 | T_YMM, 0x0E); }
-void vucomisd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_N8 | T_66 | T_0F | T_EW1 | T_EVEX | T_SAE_X, 0x2E); }
-void vucomiss(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_N4 | T_0F | T_EW0 | T_EVEX | T_SAE_X, 0x2E); }
-void vunpckhpd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_B64, 0x15); }
-void vunpckhps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F | T_EW0 | T_YMM | T_EVEX | T_B32, 0x15); }
-void vunpcklpd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_B64, 0x14); }
-void vunpcklps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F | T_EW0 | T_YMM | T_EVEX | T_B32, 0x14); }
+void vtestpd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66|T_0F38|T_YMM, 0x0F); }
+void vtestps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66|T_0F38|T_YMM, 0x0E); }
+void vucomisd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_N8|T_66|T_0F|T_EW1|T_EVEX|T_SAE_X, 0x2E); }
+void vucomiss(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_N4|T_0F|T_EW0|T_EVEX|T_SAE_X, 0x2E); }
+void vunpckhpd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_EW1|T_YMM|T_EVEX|T_B64, 0x15); }
+void vunpckhps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F|T_EW0|T_YMM|T_EVEX|T_B32, 0x15); }
+void vunpcklpd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_EW1|T_YMM|T_EVEX|T_B64, 0x14); }
+void vunpcklps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F|T_EW0|T_YMM|T_EVEX|T_B32, 0x14); }
void vxorpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x57); }
void vxorps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x57); }
void vzeroall() { db(0xC5); db(0xFC); db(0x77); }
@@ -1389,15 +1602,17 @@ void wait() { db(0x9B); }
void wbinvd() { db(0x0F); db(0x09); }
void wrmsr() { db(0x0F); db(0x30); }
void xabort(uint8_t imm) { db(0xC6); db(0xF8); db(imm); }
-void xadd(const Operand& op, const Reg& reg) { opModRM(reg, op, (op.isREG() && reg.isREG() && op.getBit() == reg.getBit()), op.isMEM(), 0x0F, 0xC0 | (reg.isBit(8) ? 0 : 1)); }
+void xadd(const Operand& op, const Reg& reg) { opRO(reg, op, T_0F, 0xC0 | (reg.isBit(8) ? 0 : 1), op.getBit() == reg.getBit()); }
void xbegin(uint32_t rel) { db(0xC7); db(0xF8); dd(rel); }
void xend() { db(0x0F); db(0x01); db(0xD5); }
void xgetbv() { db(0x0F); db(0x01); db(0xD0); }
void xlatb() { db(0xD7); }
-void xor_(const Operand& op, uint32_t imm) { opRM_I(op, imm, 0x30, 6); }
-void xor_(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x30); }
-void xorpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x57, 0x66, isXMM_XMMorMEM); }
-void xorps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x57, 0x100, isXMM_XMMorMEM); }
+void xor_(const Operand& op, uint32_t imm) { opOI(op, imm, 0x30, 6); }
+void xor_(const Operand& op1, const Operand& op2) { opRO_MR(op1, op2, 0x30); }
+void xor_(const Reg& d, const Operand& op, uint32_t imm) { opROI(d, op, imm, T_NF|T_CODE1_IF1, 6); }
+void xor_(const Reg& d, const Operand& op1, const Operand& op2) { opROO(d, op1, op2, T_NF|T_CODE1_IF1, 0x30); }
+void xorpd(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F | T_66, 0x57, isXMM_XMMorMEM); }
+void xorps(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F, 0x57, isXMM_XMMorMEM); }
#ifdef XBYAK_ENABLE_OMITTED_OPERAND
void vblendpd(const Xmm& x, const Operand& op, uint8_t imm) { vblendpd(x, x, op, imm); }
void vblendps(const Xmm& x, const Operand& op, uint8_t imm) { vblendps(x, x, op, imm); }
@@ -1676,42 +1891,47 @@ void clui() { db(0xF3); db(0x0F); db(0x01); db(0xEE); }
void stui() { db(0xF3); db(0x0F); db(0x01); db(0xEF); }
void testui() { db(0xF3); db(0x0F); db(0x01); db(0xED); }
void uiret() { db(0xF3); db(0x0F); db(0x01); db(0xEC); }
-void cmpxchg16b(const Address& addr) { opModM(addr, Reg64(1), 0x0F, 0xC7); }
-void fxrstor64(const Address& addr) { opModM(addr, Reg64(1), 0x0F, 0xAE); }
-void movq(const Reg64& reg, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opModR(mmx, reg, 0x0F, 0x7E); }
-void movq(const Mmx& mmx, const Reg64& reg) { if (mmx.isXMM()) db(0x66); opModR(mmx, reg, 0x0F, 0x6E); }
-void movsxd(const Reg64& reg, const Operand& op) { if (!op.isBit(32)) XBYAK_THROW(ERR_BAD_COMBINATION) opModRM(reg, op, op.isREG(), op.isMEM(), 0x63); }
-void pextrq(const Operand& op, const Xmm& xmm, uint8_t imm) { if (!op.isREG(64) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opGen(Reg64(xmm.getIdx()), op, 0x16, 0x66, 0, imm, 0x3A); }
-void pinsrq(const Xmm& xmm, const Operand& op, uint8_t imm) { if (!op.isREG(64) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opGen(Reg64(xmm.getIdx()), op, 0x22, 0x66, 0, imm, 0x3A); }
-void senduipi(const Reg64& r) { db(0xF3); opModR(Reg32(6), r.cvt32(), 0x0F, 0xC7); }
+void cmpxchg16b(const Address& addr) { opMR(addr, Reg64(1), T_0F, 0xC7); }
+void fxrstor64(const Address& addr) { opMR(addr, Reg64(1), T_0F, 0xAE); }
+void movq(const Reg64& reg, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opRR(mmx, reg, T_0F, 0x7E); }
+void movq(const Mmx& mmx, const Reg64& reg) { if (mmx.isXMM()) db(0x66); opRR(mmx, reg, T_0F, 0x6E); }
+void movsxd(const Reg64& reg, const Operand& op) { if (!op.isBit(32)) XBYAK_THROW(ERR_BAD_COMBINATION) opRO(reg, op, 0, 0x63); }
+void pextrq(const Operand& op, const Xmm& xmm, uint8_t imm) { if (!op.isREG(64) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opSSE(Reg64(xmm.getIdx()), op, T_66 | T_0F3A, 0x16, 0, imm); }
+void pinsrq(const Xmm& xmm, const Operand& op, uint8_t imm) { if (!op.isREG(64) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opSSE(Reg64(xmm.getIdx()), op, T_66 | T_0F3A, 0x22, 0, imm); }
+void senduipi(const Reg64& r) { opRR(Reg32(6), r.cvt32(), T_F3 | T_0F, 0xC7); }
void vcvtss2si(const Reg64& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F3 | T_W1 | T_EVEX | T_EW1 | T_ER_X | T_N8, 0x2D); }
void vcvttss2si(const Reg64& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F3 | T_W1 | T_EVEX | T_EW1 | T_SAE_X | T_N8, 0x2C); }
void vcvtsd2si(const Reg64& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F2 | T_W1 | T_EVEX | T_EW1 | T_N4 | T_ER_X, 0x2D); }
void vcvttsd2si(const Reg64& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F2 | T_W1 | T_EVEX | T_EW1 | T_N4 | T_SAE_X, 0x2C); }
void vmovq(const Xmm& x, const Reg64& r) { opAVX_X_X_XM(x, xm0, Xmm(r.getIdx()), T_66 | T_0F | T_W1 | T_EVEX | T_EW1, 0x6E); }
void vmovq(const Reg64& r, const Xmm& x) { opAVX_X_X_XM(x, xm0, Xmm(r.getIdx()), T_66 | T_0F | T_W1 | T_EVEX | T_EW1, 0x7E); }
-void cmpbexadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opGpr(r1, addr, r2, T_66 | T_0F38, 0xE6, false); }
-void cmpbxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opGpr(r1, addr, r2, T_66 | T_0F38, 0xE2, false); }
-void cmplexadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opGpr(r1, addr, r2, T_66 | T_0F38, 0xEE, false); }
-void cmplxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opGpr(r1, addr, r2, T_66 | T_0F38, 0xEC, false); }
-void cmpnbexadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opGpr(r1, addr, r2, T_66 | T_0F38, 0xE7, false); }
-void cmpnbxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opGpr(r1, addr, r2, T_66 | T_0F38, 0xE3, false); }
-void cmpnlexadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opGpr(r1, addr, r2, T_66 | T_0F38, 0xEF, false); }
-void cmpnlxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opGpr(r1, addr, r2, T_66 | T_0F38, 0xED, false); }
-void cmpnoxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opGpr(r1, addr, r2, T_66 | T_0F38, 0xE1, false); }
-void cmpnpxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opGpr(r1, addr, r2, T_66 | T_0F38, 0xEB, false); }
-void cmpnsxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opGpr(r1, addr, r2, T_66 | T_0F38, 0xE9, false); }
-void cmpnzxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opGpr(r1, addr, r2, T_66 | T_0F38, 0xE5, false); }
-void cmpoxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opGpr(r1, addr, r2, T_66 | T_0F38, 0xE0, false); }
-void cmppxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opGpr(r1, addr, r2, T_66 | T_0F38, 0xEA, false); }
-void cmpsxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opGpr(r1, addr, r2, T_66 | T_0F38, 0xE8, false); }
-void cmpzxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opGpr(r1, addr, r2, T_66 | T_0F38, 0xE4, false); }
-void ldtilecfg(const Address& addr) { opVex(tmm0, &tmm0, addr, T_0F38 | T_W0, 0x49); }
-void sttilecfg(const Address& addr) { opVex(tmm0, &tmm0, addr, T_66 | T_0F38 | T_W0, 0x49); }
-void tileloadd(const Tmm& tm, const Address& addr) { opAMX(tm, addr, T_F2 | T_0F38 | T_W0, 0x4b); }
-void tileloaddt1(const Tmm& tm, const Address& addr) { opAMX(tm, addr, T_66 | T_0F38 | T_W0, 0x4b); }
+void jmpabs(uint64_t addr) { db(0xD5); db(0x00); db(0xA1); dq(addr); }
+void push2(const Reg64& r1, const Reg64& r2) { opROO(r1, r2, Reg64(6), T_APX|T_ND1|T_W0, 0xFF); }
+void push2p(const Reg64& r1, const Reg64& r2) { opROO(r1, r2, Reg64(6), T_APX|T_ND1|T_W1, 0xFF); }
+void pop2(const Reg64& r1, const Reg64& r2) { opROO(r1, r2, Reg64(0), T_APX|T_ND1|T_W0, 0x8F); }
+void pop2p(const Reg64& r1, const Reg64& r2) { opROO(r1, r2, Reg64(0), T_APX|T_ND1|T_W1, 0x8F); }
+void cmpbexadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opRRO(r1, r2, addr, T_APX|T_66|T_0F38, 0xE6); }
+void cmpbxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opRRO(r1, r2, addr, T_APX|T_66|T_0F38, 0xE2); }
+void cmplexadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opRRO(r1, r2, addr, T_APX|T_66|T_0F38, 0xEE); }
+void cmplxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opRRO(r1, r2, addr, T_APX|T_66|T_0F38, 0xEC); }
+void cmpnbexadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opRRO(r1, r2, addr, T_APX|T_66|T_0F38, 0xE7); }
+void cmpnbxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opRRO(r1, r2, addr, T_APX|T_66|T_0F38, 0xE3); }
+void cmpnlexadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opRRO(r1, r2, addr, T_APX|T_66|T_0F38, 0xEF); }
+void cmpnlxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opRRO(r1, r2, addr, T_APX|T_66|T_0F38, 0xED); }
+void cmpnoxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opRRO(r1, r2, addr, T_APX|T_66|T_0F38, 0xE1); }
+void cmpnpxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opRRO(r1, r2, addr, T_APX|T_66|T_0F38, 0xEB); }
+void cmpnsxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opRRO(r1, r2, addr, T_APX|T_66|T_0F38, 0xE9); }
+void cmpnzxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opRRO(r1, r2, addr, T_APX|T_66|T_0F38, 0xE5); }
+void cmpoxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opRRO(r1, r2, addr, T_APX|T_66|T_0F38, 0xE0); }
+void cmppxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opRRO(r1, r2, addr, T_APX|T_66|T_0F38, 0xEA); }
+void cmpsxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opRRO(r1, r2, addr, T_APX|T_66|T_0F38, 0xE8); }
+void cmpzxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opRRO(r1, r2, addr, T_APX|T_66|T_0F38, 0xE4); }
+void ldtilecfg(const Address& addr) { if (opROO(Reg(), addr, tmm0, T_APX|T_0F38|T_W0, 0x49)) return; opVex(tmm0, &tmm0, addr, T_0F38|T_W0, 0x49); }
+void sttilecfg(const Address& addr) { if (opROO(Reg(), addr, tmm0, T_APX|T_66|T_0F38|T_W0, 0x49)) return; opVex(tmm0, &tmm0, addr, T_66|T_0F38 | T_W0, 0x49); }
+void tileloadd(const Tmm& tm, const Address& addr) { opAMX(tm, addr, T_F2|T_0F38|T_W0, 0x4B); }
+void tileloaddt1(const Tmm& tm, const Address& addr) { opAMX(tm, addr, T_66|T_0F38|T_W0, 0x4B); }
void tilerelease() { db(0xc4); db(0xe2); db(0x78); db(0x49); db(0xc0); }
-void tilestored(const Address& addr, const Tmm& tm) { opVex(tm, &tmm0, addr, T_F3 | T_0F38 | T_W0, 0x4b); }
+void tilestored(const Address& addr, const Tmm& tm) { if (opROO(Reg(), addr, tm, T_APX|T_F3|T_0F38|T_W0, 0x4B)) return; opVex(tm, &tmm0, addr, T_F3|T_0F38|T_W0, 0x4B); }
void tilezero(const Tmm& Tmm) { opVex(Tmm, &tmm0, tmm0, T_F2 | T_0F38 | T_W0, 0x49); }
void tdpbssd(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_F2 | T_0F38 | T_W0, 0x5e); }
void tdpbsud(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_F3 | T_0F38 | T_W0, 0x5e); }
@@ -1737,8 +1957,8 @@ void pusha() { db(0x60); }
void pushad() { db(0x60); }
void pushfd() { db(0x9C); }
void popa() { db(0x61); }
-void lds(const Reg& reg, const Address& addr) { opLoadSeg(addr, reg, 0xC5, 0x100); }
-void les(const Reg& reg, const Address& addr) { opLoadSeg(addr, reg, 0xC4, 0x100); }
+void lds(const Reg& reg, const Address& addr) { opLoadSeg(addr, reg, T_NONE, 0xC5); }
+void les(const Reg& reg, const Address& addr) { opLoadSeg(addr, reg, T_NONE, 0xC4); }
#endif
#ifndef XBYAK_NO_OP_NAMES
void and(const Operand& op1, const Operand& op2) { and_(op1, op2); }
@@ -1762,20 +1982,17 @@ void kandnq(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r
void kandnw(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_W0, 0x42); }
void kandq(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_W1, 0x41); }
void kandw(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_W0, 0x41); }
-void kmovb(const Address& addr, const Opmask& k) { opVex(k, 0, addr, T_L0 | T_0F | T_66 | T_W0, 0x91); }
-void kmovb(const Opmask& k, const Operand& op) { if (!op.isMEM() && !op.isOPMASK()) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(k, 0, op, T_L0 | T_0F | T_66 | T_W0, 0x90); }
-void kmovb(const Opmask& k, const Reg32& r) { opVex(k, 0, r, T_L0 | T_0F | T_66 | T_W0, 0x92); }
-void kmovb(const Reg32& r, const Opmask& k) { opVex(r, 0, k, T_L0 | T_0F | T_66 | T_W0, 0x93); }
-void kmovd(const Address& addr, const Opmask& k) { opVex(k, 0, addr, T_L0 | T_0F | T_66 | T_W1, 0x91); }
-void kmovd(const Opmask& k, const Operand& op) { if (!op.isMEM() && !op.isOPMASK()) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(k, 0, op, T_L0 | T_0F | T_66 | T_W1, 0x90); }
-void kmovd(const Opmask& k, const Reg32& r) { opVex(k, 0, r, T_L0 | T_0F | T_F2 | T_W0, 0x92); }
-void kmovd(const Reg32& r, const Opmask& k) { opVex(r, 0, k, T_L0 | T_0F | T_F2 | T_W0, 0x93); }
-void kmovq(const Address& addr, const Opmask& k) { opVex(k, 0, addr, T_L0 | T_0F | T_W1, 0x91); }
-void kmovq(const Opmask& k, const Operand& op) { if (!op.isMEM() && !op.isOPMASK()) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(k, 0, op, T_L0 | T_0F | T_W1, 0x90); }
-void kmovw(const Address& addr, const Opmask& k) { opVex(k, 0, addr, T_L0 | T_0F | T_W0, 0x91); }
-void kmovw(const Opmask& k, const Operand& op) { if (!op.isMEM() && !op.isOPMASK()) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(k, 0, op, T_L0 | T_0F | T_W0, 0x90); }
-void kmovw(const Opmask& k, const Reg32& r) { opVex(k, 0, r, T_L0 | T_0F | T_W0, 0x92); }
-void kmovw(const Reg32& r, const Opmask& k) { opVex(r, 0, k, T_L0 | T_0F | T_W0, 0x93); }
+void kmovb(const Address& addr, const Opmask& k) { opKmov(k, addr, true, 8); }
+void kmovb(const Opmask& k, const Operand& op) { opKmov(k, op, false, 8); }
+void kmovb(const Reg32& r, const Opmask& k) { opKmov(k, r, true, 8); }
+void kmovd(const Address& addr, const Opmask& k) { opKmov(k, addr, true, 32); }
+void kmovd(const Opmask& k, const Operand& op) { opKmov(k, op, false, 32); }
+void kmovd(const Reg32& r, const Opmask& k) { opKmov(k, r, true, 32); }
+void kmovq(const Address& addr, const Opmask& k) { opKmov(k, addr, true, 64); }
+void kmovq(const Opmask& k, const Operand& op) { opKmov(k, op, false, 64); }
+void kmovw(const Address& addr, const Opmask& k) { opKmov(k, addr, true, 16); }
+void kmovw(const Opmask& k, const Operand& op) { opKmov(k, op, false, 16); }
+void kmovw(const Reg32& r, const Opmask& k) { opKmov(k, r, true, 16); }
void knotb(const Opmask& r1, const Opmask& r2) { opVex(r1, 0, r2, T_0F | T_66 | T_W0, 0x44); }
void knotd(const Opmask& r1, const Opmask& r2) { opVex(r1, 0, r2, T_0F | T_66 | T_W1, 0x44); }
void knotq(const Opmask& r1, const Opmask& r2) { opVex(r1, 0, r2, T_0F | T_W1, 0x44); }
@@ -1817,10 +2034,10 @@ void v4fnmaddps(const Zmm& z1, const Zmm& z2, const Address& addr) { opAVX_X_X_X
void v4fnmaddss(const Xmm& x1, const Xmm& x2, const Address& addr) { opAVX_X_X_XM(x1, x2, addr, T_0F38 | T_F2 | T_EW0 | T_MUST_EVEX | T_N16, 0xAB); }
void vaddph(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_EW0 | T_YMM | T_MUST_EVEX | T_ER_Z | T_B16, 0x58); }
void vaddsh(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_F3 | T_EW0 | T_MUST_EVEX | T_ER_X | T_N2, 0x58); }
-void valignd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x03, imm); }
-void valignq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x03, imm); }
-void vblendmpd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x65); }
-void vblendmps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x65); }
+void valignd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F3A|T_EW0|T_YMM|T_MUST_EVEX, 0x03, imm); }
+void valignq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F3A|T_EW1|T_YMM|T_MUST_EVEX, 0x03, imm); }
+void vblendmpd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX|T_B64, 0x65); }
+void vblendmps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW0|T_YMM|T_MUST_EVEX|T_B32, 0x65); }
void vbroadcastf32x2(const Ymm& y, const Operand& op) { opAVX_X_XM_IMM(y, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N8, 0x19); }
void vbroadcastf32x4(const Ymm& y, const Address& addr) { opAVX_X_XM_IMM(y, addr, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N16, 0x1A); }
void vbroadcastf32x8(const Zmm& y, const Address& addr) { opAVX_X_XM_IMM(y, addr, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N32, 0x1B); }
@@ -1943,12 +2160,12 @@ void vcmpordpd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x,
void vcmpordps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 7); }
void vcmpordsd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 7); }
void vcmpordss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 7); }
-void vcmppd(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0xC2, imm); }
-void vcmpph(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_0F3A | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B16, 0xC2, imm); }
-void vcmpps(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_0F | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0xC2, imm); }
-void vcmpsd(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_N8 | T_F2 | T_0F | T_EW1 | T_SAE_Z | T_MUST_EVEX, 0xC2, imm); }
-void vcmpsh(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_N2 | T_F3 | T_0F3A | T_EW0 | T_SAE_X | T_MUST_EVEX, 0xC2, imm); }
-void vcmpss(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_N4 | T_F3 | T_0F | T_EW0 | T_SAE_Z | T_MUST_EVEX, 0xC2, imm); }
+void vcmppd(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_66|T_0F|T_EW1|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B64, 0xC2, imm); }
+void vcmpph(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_0F3A|T_EW0|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B16, 0xC2, imm); }
+void vcmpps(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_0F|T_EW0|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B32, 0xC2, imm); }
+void vcmpsd(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_N8|T_F2|T_0F|T_EW1|T_SAE_Z|T_MUST_EVEX, 0xC2, imm); }
+void vcmpsh(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_N2|T_F3|T_0F3A|T_EW0|T_SAE_X|T_MUST_EVEX, 0xC2, imm); }
+void vcmpss(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_N4|T_F3|T_0F|T_EW0|T_SAE_Z|T_MUST_EVEX, 0xC2, imm); }
void vcmptrue_uspd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 31); }
void vcmptrue_usps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 31); }
void vcmptrue_ussd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 31); }
@@ -1966,181 +2183,181 @@ void vcmpunordps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x
void vcmpunordsd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 3); }
void vcmpunordss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 3); }
void vcomish(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_MAP5 | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N2, 0x2F); }
-void vcompressb(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N1 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x63); }
-void vcompresspd(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x8A); }
-void vcompressps(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x8A); }
-void vcompressw(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N2 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x63); }
-void vcvtdq2ph(const Xmm& x, const Operand& op) { checkCvt4(x, op); opCvt(x, op, T_N16 | T_N_VL | T_MAP5 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B32, 0x5B); }
-void vcvtne2ps2bf16(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F2 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x72); }
-void vcvtpd2ph(const Xmm& x, const Operand& op) { opCvt5(x, op, T_N16 | T_N_VL | T_66 | T_MAP5 | T_EW1 | T_ER_Z | T_MUST_EVEX | T_B64, 0x5A); }
-void vcvtpd2qq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F | T_EW1 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B64, 0x7B); }
-void vcvtpd2udq(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_EW1 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B64, 0x79); }
-void vcvtpd2uqq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F | T_EW1 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B64, 0x79); }
-void vcvtph2dq(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_N8 | T_N_VL | T_66 | T_MAP5 | T_EW0 | T_YMM | T_ER_Y | T_MUST_EVEX | T_B16, 0x5B); }
-void vcvtph2pd(const Xmm& x, const Operand& op) { if (!op.isXMM() && !op.isMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(x, 0, op, T_N4 | T_N_VL | T_MAP5 | T_EW0 | T_YMM | T_SAE_X | T_MUST_EVEX | T_B16, 0x5A); }
-void vcvtph2psx(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_N8 | T_N_VL | T_66 | T_MAP6 | T_EW0 | T_YMM | T_SAE_Y | T_MUST_EVEX | T_B16, 0x13); }
-void vcvtph2qq(const Xmm& x, const Operand& op) { if (!op.isXMM() && !op.isMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(x, 0, op, T_N4 | T_N_VL | T_66 | T_MAP5 | T_EW0 | T_YMM | T_ER_X | T_MUST_EVEX | T_B16, 0x7B); }
-void vcvtph2udq(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_N8 | T_N_VL | T_MAP5 | T_EW0 | T_YMM | T_ER_Y | T_MUST_EVEX | T_B16, 0x79); }
-void vcvtph2uqq(const Xmm& x, const Operand& op) { if (!op.isXMM() && !op.isMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(x, 0, op, T_N4 | T_N_VL | T_66 | T_MAP5 | T_EW0 | T_YMM | T_ER_X | T_MUST_EVEX | T_B16, 0x79); }
-void vcvtph2uw(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_MAP5 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B16, 0x7D); }
-void vcvtph2w(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_MAP5 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B16, 0x7D); }
-void vcvtps2phx(const Xmm& x, const Operand& op) { checkCvt4(x, op); opCvt(x, op, T_N16 | T_N_VL | T_66 | T_MAP5 | T_EW0 | T_ER_Z | T_MUST_EVEX | T_B32, 0x1D); }
-void vcvtps2qq(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_N8 | T_N_VL | T_66 | T_0F | T_EW0 | T_YMM | T_ER_Y | T_MUST_EVEX | T_B32, 0x7B); }
-void vcvtps2udq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_0F | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B32, 0x79); }
-void vcvtps2uqq(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_N8 | T_N_VL | T_66 | T_0F | T_EW0 | T_YMM | T_ER_Y | T_MUST_EVEX | T_B32, 0x79); }
-void vcvtqq2pd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F3 | T_0F | T_EW1 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B64, 0xE6); }
-void vcvtqq2ph(const Xmm& x, const Operand& op) { opCvt5(x, op, T_N16 | T_N_VL | T_MAP5 | T_EW1 | T_ER_Z | T_MUST_EVEX | T_B64, 0x5B); }
-void vcvtqq2ps(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_EW1 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B64, 0x5B); }
-void vcvtsd2sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_F2 | T_MAP5 | T_EW1 | T_ER_X | T_MUST_EVEX, 0x5A); }
-void vcvtsd2usi(const Reg32e& r, const Operand& op) { int type = (T_N8 | T_F2 | T_0F | T_ER_X | T_MUST_EVEX) | (r.isREG(64) ? T_EW1 : T_EW0); opVex(r, &xm0, op, type, 0x79); }
-void vcvtsh2sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2 | T_F3 | T_MAP5 | T_EW0 | T_SAE_X | T_MUST_EVEX, 0x5A); }
-void vcvtsh2si(const Reg32e& r, const Operand& op) { int type = (T_N2 | T_F3 | T_MAP5 | T_ER_X | T_MUST_EVEX) | (r.isREG(64) ? T_EW1 : T_EW0); opVex(r, &xm0, op, type, 0x2D); }
-void vcvtsh2ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2 | T_MAP6 | T_EW0 | T_SAE_X | T_MUST_EVEX, 0x13); }
-void vcvtsh2usi(const Reg32e& r, const Operand& op) { int type = (T_N2 | T_F3 | T_MAP5 | T_ER_X | T_MUST_EVEX) | (r.isREG(64) ? T_EW1 : T_EW0); opVex(r, &xm0, op, type, 0x79); }
-void vcvtsi2sh(const Xmm& x1, const Xmm& x2, const Operand& op) { if (!(x1.isXMM() && x2.isXMM() && op.isBit(32|64))) XBYAK_THROW(ERR_BAD_COMBINATION) int type = (T_F3 | T_MAP5 | T_ER_R | T_MUST_EVEX | T_M_K) | (op.isBit(32) ? (T_EW0 | T_N4) : (T_EW1 | T_N8)); opVex(x1, &x2, op, type, 0x2A); }
-void vcvtss2sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_MAP5 | T_EW0 | T_ER_X | T_MUST_EVEX, 0x1D); }
-void vcvtss2usi(const Reg32e& r, const Operand& op) { int type = (T_N4 | T_F3 | T_0F | T_ER_X | T_MUST_EVEX) | (r.isREG(64) ? T_EW1 : T_EW0); opVex(r, &xm0, op, type, 0x79); }
-void vcvttpd2qq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x7A); }
-void vcvttpd2udq(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x78); }
-void vcvttpd2uqq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x78); }
-void vcvttph2dq(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_N8 | T_N_VL | T_F3 | T_MAP5 | T_EW0 | T_YMM | T_SAE_Y | T_MUST_EVEX | T_B16, 0x5B); }
-void vcvttph2qq(const Xmm& x, const Operand& op) { if (!op.isXMM() && !op.isMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(x, 0, op, T_N4 | T_N_VL | T_66 | T_MAP5 | T_EW0 | T_YMM | T_SAE_X | T_MUST_EVEX | T_B16, 0x7A); }
-void vcvttph2udq(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_N8 | T_N_VL | T_MAP5 | T_EW0 | T_YMM | T_SAE_Y | T_MUST_EVEX | T_B16, 0x78); }
-void vcvttph2uqq(const Xmm& x, const Operand& op) { if (!op.isXMM() && !op.isMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(x, 0, op, T_N4 | T_N_VL | T_66 | T_MAP5 | T_EW0 | T_YMM | T_SAE_X | T_MUST_EVEX | T_B16, 0x78); }
-void vcvttph2uw(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_MAP5 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B16, 0x7C); }
-void vcvttph2w(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_MAP5 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B16, 0x7C); }
-void vcvttps2qq(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_N8 | T_N_VL | T_66 | T_0F | T_EW0 | T_YMM | T_SAE_Y | T_MUST_EVEX | T_B32, 0x7A); }
-void vcvttps2udq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_0F | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x78); }
-void vcvttps2uqq(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_N8 | T_N_VL | T_66 | T_0F | T_EW0 | T_YMM | T_SAE_Y | T_MUST_EVEX | T_B32, 0x78); }
-void vcvttsd2usi(const Reg32e& r, const Operand& op) { int type = (T_N8 | T_F2 | T_0F | T_SAE_X | T_MUST_EVEX) | (r.isREG(64) ? T_EW1 : T_EW0); opVex(r, &xm0, op, type, 0x78); }
-void vcvttsh2si(const Reg32e& r, const Operand& op) { int type = (T_N2 | T_F3 | T_MAP5 | T_EW0 | T_SAE_X | T_MUST_EVEX) | (r.isREG(64) ? T_EW1 : T_EW0); opVex(r, &xm0, op, type, 0x2C); }
-void vcvttsh2usi(const Reg32e& r, const Operand& op) { int type = (T_N2 | T_F3 | T_MAP5 | T_EW0 | T_SAE_X | T_MUST_EVEX) | (r.isREG(64) ? T_EW1 : T_EW0); opVex(r, &xm0, op, type, 0x78); }
-void vcvttss2usi(const Reg32e& r, const Operand& op) { int type = (T_N4 | T_F3 | T_0F | T_SAE_X | T_MUST_EVEX) | (r.isREG(64) ? T_EW1 : T_EW0); opVex(r, &xm0, op, type, 0x78); }
-void vcvtudq2pd(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_N8 | T_N_VL | T_F3 | T_0F | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x7A); }
-void vcvtudq2ph(const Xmm& x, const Operand& op) { checkCvt4(x, op); opCvt(x, op, T_N16 | T_N_VL | T_F2 | T_MAP5 | T_EW0 | T_ER_Z | T_MUST_EVEX | T_B32, 0x7A); }
-void vcvtudq2ps(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F2 | T_0F | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B32, 0x7A); }
-void vcvtuqq2pd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F3 | T_0F | T_EW1 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B64, 0x7A); }
-void vcvtuqq2ph(const Xmm& x, const Operand& op) { opCvt5(x, op, T_N16 | T_N_VL | T_F2 | T_MAP5 | T_EW1 | T_ER_Z | T_MUST_EVEX | T_B64, 0x7A); }
-void vcvtuqq2ps(const Xmm& x, const Operand& op) { opCvt2(x, op, T_F2 | T_0F | T_EW1 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B64, 0x7A); }
+void vcompressb(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N1|T_66|T_0F38|T_EW0|T_YMM|T_MUST_EVEX, 0x63); }
+void vcompresspd(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N8|T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX, 0x8A); }
+void vcompressps(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N4|T_66|T_0F38|T_EW0|T_YMM|T_MUST_EVEX, 0x8A); }
+void vcompressw(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N2|T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX, 0x63); }
+void vcvtdq2ph(const Xmm& x, const Operand& op) { checkCvt4(x, op); opCvt(x, op, T_N16|T_N_VL|T_MAP5|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B32, 0x5B); }
+void vcvtne2ps2bf16(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F2|T_0F38|T_EW0|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B32, 0x72); }
+void vcvtpd2ph(const Xmm& x, const Operand& op) { opCvt5(x, op, T_N16|T_N_VL|T_66|T_MAP5|T_EW1|T_ER_Z|T_MUST_EVEX|T_B64, 0x5A); }
+void vcvtpd2qq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_0F|T_EW1|T_YMM|T_ER_Z|T_MUST_EVEX|T_B64, 0x7B); }
+void vcvtpd2udq(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F|T_EW1|T_YMM|T_ER_Z|T_MUST_EVEX|T_B64, 0x79); }
+void vcvtpd2uqq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_0F|T_EW1|T_YMM|T_ER_Z|T_MUST_EVEX|T_B64, 0x79); }
+void vcvtph2dq(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_N8|T_N_VL|T_66|T_MAP5|T_EW0|T_YMM|T_ER_Y|T_MUST_EVEX|T_B16, 0x5B); }
+void vcvtph2pd(const Xmm& x, const Operand& op) { if (!op.isXMM() && !op.isMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(x, 0, op, T_N4|T_N_VL|T_MAP5|T_EW0|T_YMM|T_SAE_X|T_MUST_EVEX|T_B16, 0x5A); }
+void vcvtph2psx(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_N8|T_N_VL|T_66|T_MAP6|T_EW0|T_YMM|T_SAE_Y|T_MUST_EVEX|T_B16, 0x13); }
+void vcvtph2qq(const Xmm& x, const Operand& op) { if (!op.isXMM() && !op.isMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(x, 0, op, T_N4|T_N_VL|T_66|T_MAP5|T_EW0|T_YMM|T_ER_X|T_MUST_EVEX|T_B16, 0x7B); }
+void vcvtph2udq(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_N8|T_N_VL|T_MAP5|T_EW0|T_YMM|T_ER_Y|T_MUST_EVEX|T_B16, 0x79); }
+void vcvtph2uqq(const Xmm& x, const Operand& op) { if (!op.isXMM() && !op.isMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(x, 0, op, T_N4|T_N_VL|T_66|T_MAP5|T_EW0|T_YMM|T_ER_X|T_MUST_EVEX|T_B16, 0x79); }
+void vcvtph2uw(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_MAP5|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0x7D); }
+void vcvtph2w(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_MAP5|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0x7D); }
+void vcvtps2phx(const Xmm& x, const Operand& op) { checkCvt4(x, op); opCvt(x, op, T_N16|T_N_VL|T_66|T_MAP5|T_EW0|T_ER_Z|T_MUST_EVEX|T_B32, 0x1D); }
+void vcvtps2qq(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_N8|T_N_VL|T_66|T_0F|T_EW0|T_YMM|T_ER_Y|T_MUST_EVEX|T_B32, 0x7B); }
+void vcvtps2udq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_0F|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B32, 0x79); }
+void vcvtps2uqq(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_N8|T_N_VL|T_66|T_0F|T_EW0|T_YMM|T_ER_Y|T_MUST_EVEX|T_B32, 0x79); }
+void vcvtqq2pd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F3|T_0F|T_EW1|T_YMM|T_ER_Z|T_MUST_EVEX|T_B64, 0xE6); }
+void vcvtqq2ph(const Xmm& x, const Operand& op) { opCvt5(x, op, T_N16|T_N_VL|T_MAP5|T_EW1|T_ER_Z|T_MUST_EVEX|T_B64, 0x5B); }
+void vcvtqq2ps(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F|T_EW1|T_YMM|T_ER_Z|T_MUST_EVEX|T_B64, 0x5B); }
+void vcvtsd2sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8|T_F2|T_MAP5|T_EW1|T_ER_X|T_MUST_EVEX, 0x5A); }
+void vcvtsd2usi(const Reg32e& r, const Operand& op) { uint64_t type = (T_N8|T_F2|T_0F|T_ER_X|T_MUST_EVEX) | (r.isREG(64) ? T_EW1 : T_EW0); opVex(r, &xm0, op, type, 0x79); }
+void vcvtsh2sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2|T_F3|T_MAP5|T_EW0|T_SAE_X|T_MUST_EVEX, 0x5A); }
+void vcvtsh2si(const Reg32e& r, const Operand& op) { uint64_t type = (T_N2|T_F3|T_MAP5|T_ER_X|T_MUST_EVEX) | (r.isREG(64) ? T_EW1 : T_EW0); opVex(r, &xm0, op, type, 0x2D); }
+void vcvtsh2ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2|T_MAP6|T_EW0|T_SAE_X|T_MUST_EVEX, 0x13); }
+void vcvtsh2usi(const Reg32e& r, const Operand& op) { uint64_t type = (T_N2|T_F3|T_MAP5|T_ER_X|T_MUST_EVEX) | (r.isREG(64) ? T_EW1 : T_EW0); opVex(r, &xm0, op, type, 0x79); }
+void vcvtsi2sh(const Xmm& x1, const Xmm& x2, const Operand& op) { if (!(x1.isXMM() && x2.isXMM() && op.isBit(32|64))) XBYAK_THROW(ERR_BAD_COMBINATION) uint64_t type = (T_F3|T_MAP5|T_ER_R|T_MUST_EVEX|T_M_K) | (op.isBit(32) ? (T_EW0 | T_N4) : (T_EW1 | T_N8)); opVex(x1, &x2, op, type, 0x2A); }
+void vcvtss2sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4|T_MAP5|T_EW0|T_ER_X|T_MUST_EVEX, 0x1D); }
+void vcvtss2usi(const Reg32e& r, const Operand& op) { uint64_t type = (T_N4|T_F3|T_0F|T_ER_X|T_MUST_EVEX) | (r.isREG(64) ? T_EW1 : T_EW0); opVex(r, &xm0, op, type, 0x79); }
+void vcvttpd2qq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_0F|T_EW1|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B64, 0x7A); }
+void vcvttpd2udq(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F|T_EW1|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B64, 0x78); }
+void vcvttpd2uqq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_0F|T_EW1|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B64, 0x78); }
+void vcvttph2dq(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_N8|T_N_VL|T_F3|T_MAP5|T_EW0|T_YMM|T_SAE_Y|T_MUST_EVEX|T_B16, 0x5B); }
+void vcvttph2qq(const Xmm& x, const Operand& op) { if (!op.isXMM() && !op.isMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(x, 0, op, T_N4|T_N_VL|T_66|T_MAP5|T_EW0|T_YMM|T_SAE_X|T_MUST_EVEX|T_B16, 0x7A); }
+void vcvttph2udq(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_N8|T_N_VL|T_MAP5|T_EW0|T_YMM|T_SAE_Y|T_MUST_EVEX|T_B16, 0x78); }
+void vcvttph2uqq(const Xmm& x, const Operand& op) { if (!op.isXMM() && !op.isMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(x, 0, op, T_N4|T_N_VL|T_66|T_MAP5|T_EW0|T_YMM|T_SAE_X|T_MUST_EVEX|T_B16, 0x78); }
+void vcvttph2uw(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_MAP5|T_EW0|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B16, 0x7C); }
+void vcvttph2w(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_MAP5|T_EW0|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B16, 0x7C); }
+void vcvttps2qq(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_N8|T_N_VL|T_66|T_0F|T_EW0|T_YMM|T_SAE_Y|T_MUST_EVEX|T_B32, 0x7A); }
+void vcvttps2udq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_0F|T_EW0|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B32, 0x78); }
+void vcvttps2uqq(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_N8|T_N_VL|T_66|T_0F|T_EW0|T_YMM|T_SAE_Y|T_MUST_EVEX|T_B32, 0x78); }
+void vcvttsd2usi(const Reg32e& r, const Operand& op) { uint64_t type = (T_N8|T_F2|T_0F|T_SAE_X|T_MUST_EVEX) | (r.isREG(64) ? T_EW1 : T_EW0); opVex(r, &xm0, op, type, 0x78); }
+void vcvttsh2si(const Reg32e& r, const Operand& op) { uint64_t type = (T_N2|T_F3|T_MAP5|T_EW0|T_SAE_X|T_MUST_EVEX) | (r.isREG(64) ? T_EW1 : T_EW0); opVex(r, &xm0, op, type, 0x2C); }
+void vcvttsh2usi(const Reg32e& r, const Operand& op) { uint64_t type = (T_N2|T_F3|T_MAP5|T_EW0|T_SAE_X|T_MUST_EVEX) | (r.isREG(64) ? T_EW1 : T_EW0); opVex(r, &xm0, op, type, 0x78); }
+void vcvttss2usi(const Reg32e& r, const Operand& op) { uint64_t type = (T_N4|T_F3|T_0F|T_SAE_X|T_MUST_EVEX) | (r.isREG(64) ? T_EW1 : T_EW0); opVex(r, &xm0, op, type, 0x78); }
+void vcvtudq2pd(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_N8|T_N_VL|T_F3|T_0F|T_EW0|T_YMM|T_MUST_EVEX|T_B32, 0x7A); }
+void vcvtudq2ph(const Xmm& x, const Operand& op) { checkCvt4(x, op); opCvt(x, op, T_N16|T_N_VL|T_F2|T_MAP5|T_EW0|T_ER_Z|T_MUST_EVEX|T_B32, 0x7A); }
+void vcvtudq2ps(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F2|T_0F|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B32, 0x7A); }
+void vcvtuqq2pd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F3|T_0F|T_EW1|T_YMM|T_ER_Z|T_MUST_EVEX|T_B64, 0x7A); }
+void vcvtuqq2ph(const Xmm& x, const Operand& op) { opCvt5(x, op, T_N16|T_N_VL|T_F2|T_MAP5|T_EW1|T_ER_Z|T_MUST_EVEX|T_B64, 0x7A); }
+void vcvtuqq2ps(const Xmm& x, const Operand& op) { opCvt2(x, op, T_F2|T_0F|T_EW1|T_YMM|T_ER_Z|T_MUST_EVEX|T_B64, 0x7A); }
void vcvtusi2sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt3(x1, x2, op, T_F2 | T_0F | T_MUST_EVEX, T_W1 | T_EW1 | T_ER_X | T_N8, T_W0 | T_EW0 | T_N4, 0x7B); }
-void vcvtusi2sh(const Xmm& x1, const Xmm& x2, const Operand& op) { if (!(x1.isXMM() && x2.isXMM() && op.isBit(32|64))) XBYAK_THROW(ERR_BAD_COMBINATION) int type = (T_F3 | T_MAP5 | T_ER_R | T_MUST_EVEX | T_M_K) | (op.isBit(32) ? (T_EW0 | T_N4) : (T_EW1 | T_N8)); opVex(x1, &x2, op, type, 0x7B); }
+void vcvtusi2sh(const Xmm& x1, const Xmm& x2, const Operand& op) { if (!(x1.isXMM() && x2.isXMM() && op.isBit(32|64))) XBYAK_THROW(ERR_BAD_COMBINATION) uint64_t type = (T_F3|T_MAP5|T_ER_R|T_MUST_EVEX|T_M_K) | (op.isBit(32) ? (T_EW0 | T_N4) : (T_EW1 | T_N8)); opVex(x1, &x2, op, type, 0x7B); }
void vcvtusi2ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt3(x1, x2, op, T_F3 | T_0F | T_MUST_EVEX | T_ER_X, T_W1 | T_EW1 | T_N8, T_W0 | T_EW0 | T_N4, 0x7B); }
-void vcvtuw2ph(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F2 | T_MAP5 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B16, 0x7D); }
-void vcvtw2ph(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F3 | T_MAP5 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B16, 0x7D); }
-void vdbpsadbw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x42, imm); }
+void vcvtuw2ph(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F2|T_MAP5|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0x7D); }
+void vcvtw2ph(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F3|T_MAP5|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0x7D); }
+void vdbpsadbw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F3A|T_EW0|T_YMM|T_MUST_EVEX, 0x42, imm); }
void vdivph(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_EW0 | T_YMM | T_MUST_EVEX | T_ER_Z | T_B16, 0x5E); }
void vdivsh(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_F3 | T_EW0 | T_MUST_EVEX | T_ER_X | T_N2, 0x5E); }
-void vdpbf16ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F3 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x52); }
+void vdpbf16ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F3|T_0F38|T_EW0|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B32, 0x52); }
void vexp2pd(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_SAE_Z, 0xC8); }
void vexp2ps(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_SAE_Z, 0xC8); }
-void vexpandpd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x88); }
-void vexpandps(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x88); }
-void vextractf32x4(const Operand& op, const Ymm& r, uint8_t imm) { if (!op.is(Operand::MEM | Operand::XMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r, 0, op, T_N16 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x19, imm); }
-void vextractf32x8(const Operand& op, const Zmm& r, uint8_t imm) { if (!op.is(Operand::MEM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r, 0, op, T_N32 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x1B, imm); }
-void vextractf64x2(const Operand& op, const Ymm& r, uint8_t imm) { if (!op.is(Operand::MEM | Operand::XMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r, 0, op, T_N16 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x19, imm); }
-void vextractf64x4(const Operand& op, const Zmm& r, uint8_t imm) { if (!op.is(Operand::MEM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r, 0, op, T_N32 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x1B, imm); }
-void vextracti32x4(const Operand& op, const Ymm& r, uint8_t imm) { if (!op.is(Operand::MEM | Operand::XMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r, 0, op, T_N16 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x39, imm); }
-void vextracti32x8(const Operand& op, const Zmm& r, uint8_t imm) { if (!op.is(Operand::MEM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r, 0, op, T_N32 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x3B, imm); }
-void vextracti64x2(const Operand& op, const Ymm& r, uint8_t imm) { if (!op.is(Operand::MEM | Operand::XMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r, 0, op, T_N16 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x39, imm); }
-void vextracti64x4(const Operand& op, const Zmm& r, uint8_t imm) { if (!op.is(Operand::MEM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r, 0, op, T_N32 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x3B, imm); }
-void vfcmaddcph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F2 | T_MAP6 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B32, 0x56); }
-void vfcmulcph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F2 | T_MAP6 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B32, 0xD6); }
-void vfixupimmpd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x54, imm); }
-void vfixupimmps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x54, imm); }
-void vfixupimmsd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F3A | T_EW1 | T_SAE_Z | T_MUST_EVEX, 0x55, imm); }
-void vfixupimmss(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F3A | T_EW0 | T_SAE_Z | T_MUST_EVEX, 0x55, imm); }
-void vfmadd132ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_MAP6 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B16, 0x98); }
-void vfmadd132sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2 | T_66 | T_MAP6 | T_EW0 | T_ER_X | T_MUST_EVEX, 0x99); }
-void vfmadd213ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_MAP6 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B16, 0xA8); }
-void vfmadd213sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2 | T_66 | T_MAP6 | T_EW0 | T_ER_X | T_MUST_EVEX, 0xA9); }
-void vfmadd231ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_MAP6 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B16, 0xB8); }
-void vfmadd231sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2 | T_66 | T_MAP6 | T_EW0 | T_ER_X | T_MUST_EVEX, 0xB9); }
-void vfmaddcph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F3 | T_MAP6 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B32, 0x56); }
-void vfmaddsub132ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_MAP6 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B16, 0x96); }
-void vfmaddsub213ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_MAP6 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B16, 0xA6); }
-void vfmaddsub231ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_MAP6 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B16, 0xB6); }
-void vfmsub132ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_MAP6 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B16, 0x9A); }
-void vfmsub132sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2 | T_66 | T_MAP6 | T_EW0 | T_ER_X | T_MUST_EVEX, 0x9B); }
-void vfmsub213ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_MAP6 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B16, 0xAA); }
-void vfmsub213sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2 | T_66 | T_MAP6 | T_EW0 | T_ER_X | T_MUST_EVEX, 0xAB); }
-void vfmsub231ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_MAP6 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B16, 0xBA); }
-void vfmsub231sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2 | T_66 | T_MAP6 | T_EW0 | T_ER_X | T_MUST_EVEX, 0xBB); }
-void vfmsubadd132ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_MAP6 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B16, 0x97); }
-void vfmsubadd213ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_MAP6 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B16, 0xA7); }
-void vfmsubadd231ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_MAP6 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B16, 0xB7); }
-void vfmulcph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F3 | T_MAP6 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B32, 0xD6); }
-void vfnmadd132ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_MAP6 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B16, 0x9C); }
-void vfnmadd132sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2 | T_66 | T_MAP6 | T_EW0 | T_ER_X | T_MUST_EVEX, 0x9D); }
-void vfnmadd213ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_MAP6 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B16, 0xAC); }
-void vfnmadd213sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2 | T_66 | T_MAP6 | T_EW0 | T_ER_X | T_MUST_EVEX, 0xAD); }
-void vfnmadd231ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_MAP6 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B16, 0xBC); }
-void vfnmadd231sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2 | T_66 | T_MAP6 | T_EW0 | T_ER_X | T_MUST_EVEX, 0xBD); }
-void vfnmsub132ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_MAP6 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B16, 0x9E); }
-void vfnmsub132sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2 | T_66 | T_MAP6 | T_EW0 | T_ER_X | T_MUST_EVEX, 0x9F); }
-void vfnmsub213ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_MAP6 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B16, 0xAE); }
-void vfnmsub213sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2 | T_66 | T_MAP6 | T_EW0 | T_ER_X | T_MUST_EVEX, 0xAF); }
-void vfnmsub231ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_MAP6 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B16, 0xBE); }
-void vfnmsub231sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2 | T_66 | T_MAP6 | T_EW0 | T_ER_X | T_MUST_EVEX, 0xBF); }
+void vexpandpd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N8|T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX, 0x88); }
+void vexpandps(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N4|T_66|T_0F38|T_EW0|T_YMM|T_MUST_EVEX, 0x88); }
+void vextractf32x4(const Operand& op, const Ymm& r, uint8_t imm) { if (!op.is(Operand::MEM | Operand::XMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r, 0, op, T_N16|T_66|T_0F3A|T_EW0|T_YMM|T_MUST_EVEX, 0x19, imm); }
+void vextractf32x8(const Operand& op, const Zmm& r, uint8_t imm) { if (!op.is(Operand::MEM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r, 0, op, T_N32|T_66|T_0F3A|T_EW0|T_YMM|T_MUST_EVEX, 0x1B, imm); }
+void vextractf64x2(const Operand& op, const Ymm& r, uint8_t imm) { if (!op.is(Operand::MEM | Operand::XMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r, 0, op, T_N16|T_66|T_0F3A|T_EW1|T_YMM|T_MUST_EVEX, 0x19, imm); }
+void vextractf64x4(const Operand& op, const Zmm& r, uint8_t imm) { if (!op.is(Operand::MEM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r, 0, op, T_N32|T_66|T_0F3A|T_EW1|T_YMM|T_MUST_EVEX, 0x1B, imm); }
+void vextracti32x4(const Operand& op, const Ymm& r, uint8_t imm) { if (!op.is(Operand::MEM | Operand::XMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r, 0, op, T_N16|T_66|T_0F3A|T_EW0|T_YMM|T_MUST_EVEX, 0x39, imm); }
+void vextracti32x8(const Operand& op, const Zmm& r, uint8_t imm) { if (!op.is(Operand::MEM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r, 0, op, T_N32|T_66|T_0F3A|T_EW0|T_YMM|T_MUST_EVEX, 0x3B, imm); }
+void vextracti64x2(const Operand& op, const Ymm& r, uint8_t imm) { if (!op.is(Operand::MEM | Operand::XMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r, 0, op, T_N16|T_66|T_0F3A|T_EW1|T_YMM|T_MUST_EVEX, 0x39, imm); }
+void vextracti64x4(const Operand& op, const Zmm& r, uint8_t imm) { if (!op.is(Operand::MEM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r, 0, op, T_N32|T_66|T_0F3A|T_EW1|T_YMM|T_MUST_EVEX, 0x3B, imm); }
+void vfcmaddcph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F2|T_MAP6|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B32, 0x56); }
+void vfcmulcph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F2|T_MAP6|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B32, 0xD6); }
+void vfixupimmpd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F3A|T_EW1|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B64, 0x54, imm); }
+void vfixupimmps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F3A|T_EW0|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B32, 0x54, imm); }
+void vfixupimmsd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N8|T_66|T_0F3A|T_EW1|T_SAE_Z|T_MUST_EVEX, 0x55, imm); }
+void vfixupimmss(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N4|T_66|T_0F3A|T_EW0|T_SAE_Z|T_MUST_EVEX, 0x55, imm); }
+void vfmadd132ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0x98); }
+void vfmadd132sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2|T_66|T_MAP6|T_EW0|T_ER_X|T_MUST_EVEX, 0x99); }
+void vfmadd213ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0xA8); }
+void vfmadd213sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2|T_66|T_MAP6|T_EW0|T_ER_X|T_MUST_EVEX, 0xA9); }
+void vfmadd231ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0xB8); }
+void vfmadd231sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2|T_66|T_MAP6|T_EW0|T_ER_X|T_MUST_EVEX, 0xB9); }
+void vfmaddcph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F3|T_MAP6|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B32, 0x56); }
+void vfmaddsub132ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0x96); }
+void vfmaddsub213ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0xA6); }
+void vfmaddsub231ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0xB6); }
+void vfmsub132ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0x9A); }
+void vfmsub132sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2|T_66|T_MAP6|T_EW0|T_ER_X|T_MUST_EVEX, 0x9B); }
+void vfmsub213ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0xAA); }
+void vfmsub213sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2|T_66|T_MAP6|T_EW0|T_ER_X|T_MUST_EVEX, 0xAB); }
+void vfmsub231ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0xBA); }
+void vfmsub231sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2|T_66|T_MAP6|T_EW0|T_ER_X|T_MUST_EVEX, 0xBB); }
+void vfmsubadd132ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0x97); }
+void vfmsubadd213ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0xA7); }
+void vfmsubadd231ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0xB7); }
+void vfmulcph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F3|T_MAP6|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B32, 0xD6); }
+void vfnmadd132ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0x9C); }
+void vfnmadd132sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2|T_66|T_MAP6|T_EW0|T_ER_X|T_MUST_EVEX, 0x9D); }
+void vfnmadd213ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0xAC); }
+void vfnmadd213sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2|T_66|T_MAP6|T_EW0|T_ER_X|T_MUST_EVEX, 0xAD); }
+void vfnmadd231ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0xBC); }
+void vfnmadd231sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2|T_66|T_MAP6|T_EW0|T_ER_X|T_MUST_EVEX, 0xBD); }
+void vfnmsub132ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0x9E); }
+void vfnmsub132sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2|T_66|T_MAP6|T_EW0|T_ER_X|T_MUST_EVEX, 0x9F); }
+void vfnmsub213ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0xAE); }
+void vfnmsub213sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2|T_66|T_MAP6|T_EW0|T_ER_X|T_MUST_EVEX, 0xAF); }
+void vfnmsub231ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0xBE); }
+void vfnmsub231sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2|T_66|T_MAP6|T_EW0|T_ER_X|T_MUST_EVEX, 0xBF); }
void vfpclasspd(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isBit(128|256|512)) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k.changeBit(op.getBit()), 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, 0x66, imm); }
void vfpclassph(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isBit(128|256|512)) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k.changeBit(op.getBit()), 0, op, T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B16, 0x66, imm); }
void vfpclassps(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isBit(128|256|512)) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k.changeBit(op.getBit()), 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, 0x66, imm); }
void vfpclasssd(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isXMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_N8, 0x67, imm); }
void vfpclasssh(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isXMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k, 0, op, T_0F3A | T_MUST_EVEX | T_EW0 | T_N2, 0x67, imm); }
void vfpclassss(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isXMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_N4, 0x67, imm); }
-void vgatherdpd(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_VSIB, 0x92, 1); }
-void vgatherdps(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_VSIB, 0x92, 0); }
-void vgatherpf0dpd(const Address& addr) { opGatherFetch(addr, zm1, T_N8 | T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC6, Operand::YMM); }
-void vgatherpf0dps(const Address& addr) { opGatherFetch(addr, zm1, T_N4 | T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC6, Operand::ZMM); }
-void vgatherpf0qpd(const Address& addr) { opGatherFetch(addr, zm1, T_N8 | T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC7, Operand::ZMM); }
-void vgatherpf0qps(const Address& addr) { opGatherFetch(addr, zm1, T_N4 | T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC7, Operand::ZMM); }
-void vgatherpf1dpd(const Address& addr) { opGatherFetch(addr, zm2, T_N8 | T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC6, Operand::YMM); }
-void vgatherpf1dps(const Address& addr) { opGatherFetch(addr, zm2, T_N4 | T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC6, Operand::ZMM); }
-void vgatherpf1qpd(const Address& addr) { opGatherFetch(addr, zm2, T_N8 | T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC7, Operand::ZMM); }
-void vgatherpf1qps(const Address& addr) { opGatherFetch(addr, zm2, T_N4 | T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC7, Operand::ZMM); }
-void vgatherqpd(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_VSIB, 0x93, 0); }
-void vgatherqps(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_VSIB, 0x93, 2); }
-void vgetexppd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x42); }
-void vgetexpph(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_MAP6 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B16, 0x42); }
-void vgetexpps(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x42); }
-void vgetexpsd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_SAE_X | T_MUST_EVEX, 0x43); }
-void vgetexpsh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2 | T_66 | T_MAP6 | T_EW0 | T_SAE_X | T_MUST_EVEX, 0x43); }
-void vgetexpss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F38 | T_EW0 | T_SAE_X | T_MUST_EVEX, 0x43); }
-void vgetmantpd(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x26, imm); }
-void vgetmantph(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_0F3A | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B16, 0x26, imm); }
-void vgetmantps(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x26, imm); }
-void vgetmantsd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F3A | T_EW1 | T_SAE_X | T_MUST_EVEX, 0x27, imm); }
-void vgetmantsh(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N2 | T_0F3A | T_EW0 | T_SAE_X | T_MUST_EVEX, 0x27, imm); }
-void vgetmantss(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F3A | T_EW0 | T_SAE_X | T_MUST_EVEX, 0x27, imm); }
-void vinsertf32x4(const Ymm& r1, const Ymm& r2, const Operand& op, uint8_t imm) {if (!(r1.getKind() == r2.getKind() && op.is(Operand::MEM | Operand::XMM))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r1, &r2, op, T_N16 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x18, imm); }
-void vinsertf32x8(const Zmm& r1, const Zmm& r2, const Operand& op, uint8_t imm) {if (!op.is(Operand::MEM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r1, &r2, op, T_N32 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x1A, imm); }
-void vinsertf64x2(const Ymm& r1, const Ymm& r2, const Operand& op, uint8_t imm) {if (!(r1.getKind() == r2.getKind() && op.is(Operand::MEM | Operand::XMM))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r1, &r2, op, T_N16 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x18, imm); }
-void vinsertf64x4(const Zmm& r1, const Zmm& r2, const Operand& op, uint8_t imm) {if (!op.is(Operand::MEM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r1, &r2, op, T_N32 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x1A, imm); }
-void vinserti32x4(const Ymm& r1, const Ymm& r2, const Operand& op, uint8_t imm) {if (!(r1.getKind() == r2.getKind() && op.is(Operand::MEM | Operand::XMM))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r1, &r2, op, T_N16 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x38, imm); }
-void vinserti32x8(const Zmm& r1, const Zmm& r2, const Operand& op, uint8_t imm) {if (!op.is(Operand::MEM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r1, &r2, op, T_N32 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x3A, imm); }
-void vinserti64x2(const Ymm& r1, const Ymm& r2, const Operand& op, uint8_t imm) {if (!(r1.getKind() == r2.getKind() && op.is(Operand::MEM | Operand::XMM))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r1, &r2, op, T_N16 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x38, imm); }
-void vinserti64x4(const Zmm& r1, const Zmm& r2, const Operand& op, uint8_t imm) {if (!op.is(Operand::MEM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r1, &r2, op, T_N32 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x3A, imm); }
+void vgatherdpd(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N8|T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX|T_VSIB, 0x92, 1); }
+void vgatherdps(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N4|T_66|T_0F38|T_EW0|T_YMM|T_MUST_EVEX|T_VSIB, 0x92, 0); }
+void vgatherpf0dpd(const Address& addr) { opGatherFetch(addr, zm1, T_N8|T_66|T_0F38|T_EW1|T_MUST_EVEX|T_M_K|T_VSIB, 0xC6, Operand::YMM); }
+void vgatherpf0dps(const Address& addr) { opGatherFetch(addr, zm1, T_N4|T_66|T_0F38|T_EW0|T_MUST_EVEX|T_M_K|T_VSIB, 0xC6, Operand::ZMM); }
+void vgatherpf0qpd(const Address& addr) { opGatherFetch(addr, zm1, T_N8|T_66|T_0F38|T_EW1|T_MUST_EVEX|T_M_K|T_VSIB, 0xC7, Operand::ZMM); }
+void vgatherpf0qps(const Address& addr) { opGatherFetch(addr, zm1, T_N4|T_66|T_0F38|T_EW0|T_MUST_EVEX|T_M_K|T_VSIB, 0xC7, Operand::ZMM); }
+void vgatherpf1dpd(const Address& addr) { opGatherFetch(addr, zm2, T_N8|T_66|T_0F38|T_EW1|T_MUST_EVEX|T_M_K|T_VSIB, 0xC6, Operand::YMM); }
+void vgatherpf1dps(const Address& addr) { opGatherFetch(addr, zm2, T_N4|T_66|T_0F38|T_EW0|T_MUST_EVEX|T_M_K|T_VSIB, 0xC6, Operand::ZMM); }
+void vgatherpf1qpd(const Address& addr) { opGatherFetch(addr, zm2, T_N8|T_66|T_0F38|T_EW1|T_MUST_EVEX|T_M_K|T_VSIB, 0xC7, Operand::ZMM); }
+void vgatherpf1qps(const Address& addr) { opGatherFetch(addr, zm2, T_N4|T_66|T_0F38|T_EW0|T_MUST_EVEX|T_M_K|T_VSIB, 0xC7, Operand::ZMM); }
+void vgatherqpd(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N8|T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX|T_VSIB, 0x93, 0); }
+void vgatherqps(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N4|T_66|T_0F38|T_EW0|T_YMM|T_MUST_EVEX|T_VSIB, 0x93, 2); }
+void vgetexppd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_0F38|T_EW1|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B64, 0x42); }
+void vgetexpph(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_MAP6|T_EW0|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B16, 0x42); }
+void vgetexpps(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_0F38|T_EW0|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B32, 0x42); }
+void vgetexpsd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8|T_66|T_0F38|T_EW1|T_SAE_X|T_MUST_EVEX, 0x43); }
+void vgetexpsh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2|T_66|T_MAP6|T_EW0|T_SAE_X|T_MUST_EVEX, 0x43); }
+void vgetexpss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4|T_66|T_0F38|T_EW0|T_SAE_X|T_MUST_EVEX, 0x43); }
+void vgetmantpd(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_66|T_0F3A|T_EW1|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B64, 0x26, imm); }
+void vgetmantph(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_0F3A|T_EW0|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B16, 0x26, imm); }
+void vgetmantps(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_66|T_0F3A|T_EW0|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B32, 0x26, imm); }
+void vgetmantsd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N8|T_66|T_0F3A|T_EW1|T_SAE_X|T_MUST_EVEX, 0x27, imm); }
+void vgetmantsh(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N2|T_0F3A|T_EW0|T_SAE_X|T_MUST_EVEX, 0x27, imm); }
+void vgetmantss(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N4|T_66|T_0F3A|T_EW0|T_SAE_X|T_MUST_EVEX, 0x27, imm); }
+void vinsertf32x4(const Ymm& r1, const Ymm& r2, const Operand& op, uint8_t imm) {if (!(r1.getKind() == r2.getKind() && op.is(Operand::MEM | Operand::XMM))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r1, &r2, op, T_N16|T_66|T_0F3A|T_EW0|T_YMM|T_MUST_EVEX, 0x18, imm); }
+void vinsertf32x8(const Zmm& r1, const Zmm& r2, const Operand& op, uint8_t imm) {if (!op.is(Operand::MEM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r1, &r2, op, T_N32|T_66|T_0F3A|T_EW0|T_YMM|T_MUST_EVEX, 0x1A, imm); }
+void vinsertf64x2(const Ymm& r1, const Ymm& r2, const Operand& op, uint8_t imm) {if (!(r1.getKind() == r2.getKind() && op.is(Operand::MEM | Operand::XMM))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r1, &r2, op, T_N16|T_66|T_0F3A|T_EW1|T_YMM|T_MUST_EVEX, 0x18, imm); }
+void vinsertf64x4(const Zmm& r1, const Zmm& r2, const Operand& op, uint8_t imm) {if (!op.is(Operand::MEM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r1, &r2, op, T_N32|T_66|T_0F3A|T_EW1|T_YMM|T_MUST_EVEX, 0x1A, imm); }
+void vinserti32x4(const Ymm& r1, const Ymm& r2, const Operand& op, uint8_t imm) {if (!(r1.getKind() == r2.getKind() && op.is(Operand::MEM | Operand::XMM))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r1, &r2, op, T_N16|T_66|T_0F3A|T_EW0|T_YMM|T_MUST_EVEX, 0x38, imm); }
+void vinserti32x8(const Zmm& r1, const Zmm& r2, const Operand& op, uint8_t imm) {if (!op.is(Operand::MEM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r1, &r2, op, T_N32|T_66|T_0F3A|T_EW0|T_YMM|T_MUST_EVEX, 0x3A, imm); }
+void vinserti64x2(const Ymm& r1, const Ymm& r2, const Operand& op, uint8_t imm) {if (!(r1.getKind() == r2.getKind() && op.is(Operand::MEM | Operand::XMM))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r1, &r2, op, T_N16|T_66|T_0F3A|T_EW1|T_YMM|T_MUST_EVEX, 0x38, imm); }
+void vinserti64x4(const Zmm& r1, const Zmm& r2, const Operand& op, uint8_t imm) {if (!op.is(Operand::MEM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r1, &r2, op, T_N32|T_66|T_0F3A|T_EW1|T_YMM|T_MUST_EVEX, 0x3A, imm); }
void vmaxph(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_EW0 | T_YMM | T_MUST_EVEX | T_ER_Z | T_B16, 0x5F); }
void vmaxsh(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_F3 | T_EW0 | T_MUST_EVEX | T_ER_X | T_N2, 0x5F); }
void vminph(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_EW0 | T_YMM | T_MUST_EVEX | T_ER_Z | T_B16, 0x5D); }
void vminsh(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_F3 | T_EW0 | T_MUST_EVEX | T_ER_X | T_N2, 0x5D); }
-void vmovdqa32(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_66 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX | T_M_K, 0x7F); }
-void vmovdqa32(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x6F); }
-void vmovdqa64(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_66 | T_0F | T_EW1 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX | T_M_K, 0x7F); }
-void vmovdqa64(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F | T_EW1 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x6F); }
-void vmovdqu16(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_F2 | T_0F | T_EW1 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX | T_M_K, 0x7F); }
-void vmovdqu16(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F2 | T_0F | T_EW1 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x6F); }
-void vmovdqu32(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_F3 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX | T_M_K, 0x7F); }
-void vmovdqu32(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F3 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x6F); }
-void vmovdqu64(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_F3 | T_0F | T_EW1 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX | T_M_K, 0x7F); }
-void vmovdqu64(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F3 | T_0F | T_EW1 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x6F); }
-void vmovdqu8(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_F2 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX | T_M_K, 0x7F); }
-void vmovdqu8(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F2 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x6F); }
-void vmovsh(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_N2 | T_F3 | T_MAP5 | T_EW0 | T_MUST_EVEX | T_M_K, 0x11); }
-void vmovsh(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, T_N2 | T_F3 | T_MAP5 | T_EW0 | T_MUST_EVEX, 0x10); }
-void vmovsh(const Xmm& x1, const Xmm& x2, const Xmm& x3) { opAVX_X_X_XM(x1, x2, x3, T_N2 | T_F3 | T_MAP5 | T_EW0 | T_MUST_EVEX, 0x10); }
-void vmovw(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_N2 | T_66 | T_MAP5 | T_MUST_EVEX, 0x7E); }
-void vmovw(const Reg32e& r, const Xmm& x) { opAVX_X_X_XM(x, xm0, r, T_N2 | T_66 | T_MAP5 | T_MUST_EVEX, 0x7E); }
-void vmovw(const Xmm& x, const Operand& op) { if (!op.isREG(32|64) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, xm0, op, T_N2 | T_66 | T_MAP5 | T_MUST_EVEX, 0x6E); }
+void vmovdqa32(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_66|T_0F|T_EW0|T_YMM|T_ER_X|T_ER_Y|T_ER_Z|T_MUST_EVEX|T_M_K, 0x7F); }
+void vmovdqa32(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_0F|T_EW0|T_YMM|T_ER_X|T_ER_Y|T_ER_Z|T_MUST_EVEX, 0x6F); }
+void vmovdqa64(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_66|T_0F|T_EW1|T_YMM|T_ER_X|T_ER_Y|T_ER_Z|T_MUST_EVEX|T_M_K, 0x7F); }
+void vmovdqa64(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_0F|T_EW1|T_YMM|T_ER_X|T_ER_Y|T_ER_Z|T_MUST_EVEX, 0x6F); }
+void vmovdqu16(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_F2|T_0F|T_EW1|T_YMM|T_ER_X|T_ER_Y|T_ER_Z|T_MUST_EVEX|T_M_K, 0x7F); }
+void vmovdqu16(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F2|T_0F|T_EW1|T_YMM|T_ER_X|T_ER_Y|T_ER_Z|T_MUST_EVEX, 0x6F); }
+void vmovdqu32(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_F3|T_0F|T_EW0|T_YMM|T_ER_X|T_ER_Y|T_ER_Z|T_MUST_EVEX|T_M_K, 0x7F); }
+void vmovdqu32(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F3|T_0F|T_EW0|T_YMM|T_ER_X|T_ER_Y|T_ER_Z|T_MUST_EVEX, 0x6F); }
+void vmovdqu64(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_F3|T_0F|T_EW1|T_YMM|T_ER_X|T_ER_Y|T_ER_Z|T_MUST_EVEX|T_M_K, 0x7F); }
+void vmovdqu64(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F3|T_0F|T_EW1|T_YMM|T_ER_X|T_ER_Y|T_ER_Z|T_MUST_EVEX, 0x6F); }
+void vmovdqu8(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_F2|T_0F|T_EW0|T_YMM|T_ER_X|T_ER_Y|T_ER_Z|T_MUST_EVEX|T_M_K, 0x7F); }
+void vmovdqu8(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F2|T_0F|T_EW0|T_YMM|T_ER_X|T_ER_Y|T_ER_Z|T_MUST_EVEX, 0x6F); }
+void vmovsh(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_N2|T_F3|T_MAP5|T_EW0|T_MUST_EVEX|T_M_K, 0x11); }
+void vmovsh(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, T_N2|T_F3|T_MAP5|T_EW0|T_MUST_EVEX, 0x10); }
+void vmovsh(const Xmm& x1, const Xmm& x2, const Xmm& x3) { opAVX_X_X_XM(x1, x2, x3, T_N2|T_F3|T_MAP5|T_EW0|T_MUST_EVEX, 0x10); }
+void vmovw(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_N2|T_66|T_MAP5|T_MUST_EVEX, 0x7E); }
+void vmovw(const Reg32e& r, const Xmm& x) { opAVX_X_X_XM(x, xm0, r, T_N2|T_66|T_MAP5|T_MUST_EVEX, 0x7E); }
+void vmovw(const Xmm& x, const Operand& op) { if (!op.isREG(32|64) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, xm0, op, T_N2|T_66|T_MAP5|T_MUST_EVEX, 0x6E); }
void vmulph(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_EW0 | T_YMM | T_MUST_EVEX | T_ER_Z | T_B16, 0x59); }
void vmulsh(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_F3 | T_EW0 | T_MUST_EVEX | T_ER_X | T_N2, 0x59); }
void vp2intersectd(const Opmask& k, const Xmm& x, const Operand& op) { if (k.getOpmaskIdx() != 0) XBYAK_THROW(ERR_OPMASK_IS_ALREADY_SET) opAVX_K_X_XM(k, x, op, T_F2 | T_0F38 | T_YMM | T_EVEX | T_EW0 | T_B32, 0x68); }
@@ -2148,210 +2365,209 @@ void vp2intersectq(const Opmask& k, const Xmm& x, const Operand& op) { if (k.get
void vp4dpwssd(const Zmm& z1, const Zmm& z2, const Address& addr) { opAVX_X_X_XM(z1, z2, addr, T_0F38 | T_F2 | T_EW0 | T_YMM | T_MUST_EVEX | T_N16, 0x52); }
void vp4dpwssds(const Zmm& z1, const Zmm& z2, const Address& addr) { opAVX_X_X_XM(z1, z2, addr, T_0F38 | T_F2 | T_EW0 | T_YMM | T_MUST_EVEX | T_N16, 0x53); }
void vpabsq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_MUST_EVEX | T_EW1 | T_B64 | T_YMM, 0x1F); }
-void vpandd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0xDB); }
-void vpandnd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0xDF); }
-void vpandnq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0xDF); }
-void vpandq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0xDB); }
-void vpblendmb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x66); }
-void vpblendmd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x64); }
-void vpblendmq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x64); }
-void vpblendmw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x66); }
-void vpbroadcastb(const Xmm& x, const Reg8& r) { opVex(x, 0, r, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x7A); }
-void vpbroadcastd(const Xmm& x, const Reg32& r) { opVex(x, 0, r, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x7C); }
+void vpandd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_EW0|T_YMM|T_MUST_EVEX|T_B32, 0xDB); }
+void vpandnd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_EW0|T_YMM|T_MUST_EVEX|T_B32, 0xDF); }
+void vpandnq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_EW1|T_YMM|T_MUST_EVEX|T_B64, 0xDF); }
+void vpandq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_EW1|T_YMM|T_MUST_EVEX|T_B64, 0xDB); }
+void vpblendmb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW0|T_YMM|T_MUST_EVEX, 0x66); }
+void vpblendmd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW0|T_YMM|T_MUST_EVEX|T_B32, 0x64); }
+void vpblendmq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX|T_B64, 0x64); }
+void vpblendmw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX, 0x66); }
+void vpbroadcastb(const Xmm& x, const Reg8& r) { opVex(x, 0, r, T_66|T_0F38|T_EW0|T_YMM|T_MUST_EVEX, 0x7A); }
+void vpbroadcastd(const Xmm& x, const Reg32& r) { opVex(x, 0, r, T_66|T_0F38|T_EW0|T_YMM|T_MUST_EVEX, 0x7C); }
void vpbroadcastmb2q(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1, 0x2A); }
void vpbroadcastmw2d(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0, 0x3A); }
-void vpbroadcastw(const Xmm& x, const Reg16& r) { opVex(x, 0, r, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x7B); }
-void vpcmpb(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x3F, imm); }
-void vpcmpd(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x1F, imm); }
-void vpcmpeqb(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_66 | T_0F | T_YMM | T_MUST_EVEX, 0x74); }
-void vpcmpeqd(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_66 | T_0F | T_YMM | T_MUST_EVEX | T_B32, 0x76); }
-void vpcmpeqq(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x29); }
-void vpcmpeqw(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_66 | T_0F | T_YMM | T_MUST_EVEX, 0x75); }
-void vpcmpgtb(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_66 | T_0F | T_YMM | T_MUST_EVEX, 0x64); }
-void vpcmpgtd(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_66 | T_0F | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x66); }
-void vpcmpgtq(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x37); }
-void vpcmpgtw(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_66 | T_0F | T_YMM | T_MUST_EVEX, 0x65); }
-void vpcmpq(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x1F, imm); }
-void vpcmpub(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x3E, imm); }
-void vpcmpud(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x1E, imm); }
-void vpcmpuq(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x1E, imm); }
-void vpcmpuw(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x3E, imm); }
-void vpcmpw(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x3F, imm); }
-void vpcompressd(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x8B); }
-void vpcompressq(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x8B); }
-void vpconflictd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0xC4); }
-void vpconflictq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0xC4); }
-void vpermb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x8D); }
-void vpermi2b(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x75); }
-void vpermi2d(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x76); }
-void vpermi2pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x77); }
-void vpermi2ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x77); }
-void vpermi2q(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x76); }
-void vpermi2w(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x75); }
-void vpermt2b(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x7D); }
-void vpermt2d(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x7E); }
-void vpermt2pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x7F); }
-void vpermt2ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x7F); }
-void vpermt2q(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x7E); }
-void vpermt2w(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x7D); }
-void vpermw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x8D); }
-void vpexpandb(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N1 | T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX, 0x62); }
-void vpexpandd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x89); }
-void vpexpandq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x89); }
-void vpexpandw(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N2 | T_66 | T_0F38 | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX, 0x62); }
-void vpgatherdd(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_VSIB, 0x90, 0); }
-void vpgatherdq(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_VSIB, 0x90, 1); }
-void vpgatherqd(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_VSIB, 0x91, 2); }
-void vpgatherqq(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_VSIB, 0x91, 0); }
-void vplzcntd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x44); }
-void vplzcntq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x44); }
-void vpmaxsq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x3D); }
-void vpmaxuq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x3F); }
-void vpminsq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x39); }
-void vpminuq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x3B); }
+void vpbroadcastw(const Xmm& x, const Reg16& r) { opVex(x, 0, r, T_66|T_0F38|T_EW0|T_YMM|T_MUST_EVEX, 0x7B); }
+void vpcmpb(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_66|T_0F3A|T_EW0|T_YMM|T_MUST_EVEX, 0x3F, imm); }
+void vpcmpd(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_66|T_0F3A|T_EW0|T_YMM|T_MUST_EVEX|T_B32, 0x1F, imm); }
+void vpcmpeqb(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_66|T_0F|T_YMM|T_MUST_EVEX, 0x74); }
+void vpcmpeqd(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_66|T_0F|T_YMM|T_MUST_EVEX|T_B32, 0x76); }
+void vpcmpeqq(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX|T_B64, 0x29); }
+void vpcmpeqw(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_66|T_0F|T_YMM|T_MUST_EVEX, 0x75); }
+void vpcmpgtb(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_66|T_0F|T_YMM|T_MUST_EVEX, 0x64); }
+void vpcmpgtd(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_66|T_0F|T_EW0|T_YMM|T_MUST_EVEX|T_B32, 0x66); }
+void vpcmpgtq(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX|T_B64, 0x37); }
+void vpcmpgtw(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_66|T_0F|T_YMM|T_MUST_EVEX, 0x65); }
+void vpcmpq(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_66|T_0F3A|T_EW1|T_YMM|T_MUST_EVEX|T_B64, 0x1F, imm); }
+void vpcmpub(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_66|T_0F3A|T_EW0|T_YMM|T_MUST_EVEX, 0x3E, imm); }
+void vpcmpud(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_66|T_0F3A|T_EW0|T_YMM|T_MUST_EVEX|T_B32, 0x1E, imm); }
+void vpcmpuq(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_66|T_0F3A|T_EW1|T_YMM|T_MUST_EVEX|T_B64, 0x1E, imm); }
+void vpcmpuw(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_66|T_0F3A|T_EW1|T_YMM|T_MUST_EVEX, 0x3E, imm); }
+void vpcmpw(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_66|T_0F3A|T_EW1|T_YMM|T_MUST_EVEX, 0x3F, imm); }
+void vpcompressd(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N4|T_66|T_0F38|T_EW0|T_YMM|T_MUST_EVEX, 0x8B); }
+void vpcompressq(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N8|T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX, 0x8B); }
+void vpconflictd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_0F38|T_EW0|T_YMM|T_MUST_EVEX|T_B32, 0xC4); }
+void vpconflictq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX|T_B64, 0xC4); }
+void vpermb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW0|T_YMM|T_MUST_EVEX, 0x8D); }
+void vpermi2b(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW0|T_YMM|T_MUST_EVEX, 0x75); }
+void vpermi2d(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW0|T_YMM|T_MUST_EVEX|T_B32, 0x76); }
+void vpermi2pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX|T_B64, 0x77); }
+void vpermi2ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW0|T_YMM|T_MUST_EVEX|T_B32, 0x77); }
+void vpermi2q(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX|T_B64, 0x76); }
+void vpermi2w(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX, 0x75); }
+void vpermt2b(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW0|T_YMM|T_MUST_EVEX, 0x7D); }
+void vpermt2d(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW0|T_YMM|T_MUST_EVEX|T_B32, 0x7E); }
+void vpermt2pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX|T_B64, 0x7F); }
+void vpermt2ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW0|T_YMM|T_MUST_EVEX|T_B32, 0x7F); }
+void vpermt2q(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX|T_B64, 0x7E); }
+void vpermt2w(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX, 0x7D); }
+void vpermw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX, 0x8D); }
+void vpexpandb(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N1|T_66|T_0F38|T_EW0|T_YMM|T_SAE_Z|T_MUST_EVEX, 0x62); }
+void vpexpandd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N4|T_66|T_0F38|T_EW0|T_YMM|T_MUST_EVEX, 0x89); }
+void vpexpandq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N8|T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX, 0x89); }
+void vpexpandw(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N2|T_66|T_0F38|T_EW1|T_YMM|T_SAE_Z|T_MUST_EVEX, 0x62); }
+void vpgatherdd(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N4|T_66|T_0F38|T_EW0|T_YMM|T_MUST_EVEX|T_VSIB, 0x90, 0); }
+void vpgatherdq(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N8|T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX|T_VSIB, 0x90, 1); }
+void vpgatherqd(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N4|T_66|T_0F38|T_EW0|T_YMM|T_MUST_EVEX|T_VSIB, 0x91, 2); }
+void vpgatherqq(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N8|T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX|T_VSIB, 0x91, 0); }
+void vplzcntd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_0F38|T_EW0|T_YMM|T_MUST_EVEX|T_B32, 0x44); }
+void vplzcntq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX|T_B64, 0x44); }
+void vpmaxsq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX|T_B64, 0x3D); }
+void vpmaxuq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX|T_B64, 0x3F); }
+void vpminsq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX|T_B64, 0x39); }
+void vpminuq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX|T_B64, 0x3B); }
void vpmovb2m(const Opmask& k, const Xmm& x) { opVex(k, 0, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, 0x29); }
void vpmovd2m(const Opmask& k, const Xmm& x) { opVex(k, 0, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, 0x39); }
-void vpmovdb(const Operand& op, const Xmm& x) { opVmov(op, x, T_N4 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0x31, false); }
-void vpmovdw(const Operand& op, const Xmm& x) { opVmov(op, x, T_N8 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0x33, true); }
+void vpmovdb(const Operand& op, const Xmm& x) { opVmov(op, x, T_N4|T_N_VL|T_F3|T_0F38|T_EW0|T_YMM|T_MUST_EVEX|T_M_K, 0x31, false); }
+void vpmovdw(const Operand& op, const Xmm& x) { opVmov(op, x, T_N8|T_N_VL|T_F3|T_0F38|T_EW0|T_YMM|T_MUST_EVEX|T_M_K, 0x33, true); }
void vpmovm2b(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, 0x28); }
void vpmovm2d(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, 0x38); }
void vpmovm2q(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, 0x38); }
void vpmovm2w(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, 0x28); }
void vpmovq2m(const Opmask& k, const Xmm& x) { opVex(k, 0, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, 0x39); }
-void vpmovqb(const Operand& op, const Xmm& x) { opVmov(op, x, T_N2 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0x32, false); }
-void vpmovqd(const Operand& op, const Xmm& x) { opVmov(op, x, T_N8 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0x35, true); }
-void vpmovqw(const Operand& op, const Xmm& x) { opVmov(op, x, T_N4 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0x34, false); }
-void vpmovsdb(const Operand& op, const Xmm& x) { opVmov(op, x, T_N4 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0x21, false); }
-void vpmovsdw(const Operand& op, const Xmm& x) { opVmov(op, x, T_N8 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0x23, true); }
-void vpmovsqb(const Operand& op, const Xmm& x) { opVmov(op, x, T_N2 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0x22, false); }
-void vpmovsqd(const Operand& op, const Xmm& x) { opVmov(op, x, T_N8 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0x25, true); }
-void vpmovsqw(const Operand& op, const Xmm& x) { opVmov(op, x, T_N4 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0x24, false); }
-void vpmovswb(const Operand& op, const Xmm& x) { opVmov(op, x, T_N8 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0x20, true); }
-void vpmovusdb(const Operand& op, const Xmm& x) { opVmov(op, x, T_N4 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0x11, false); }
-void vpmovusdw(const Operand& op, const Xmm& x) { opVmov(op, x, T_N8 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0x13, true); }
-void vpmovusqb(const Operand& op, const Xmm& x) { opVmov(op, x, T_N2 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0x12, false); }
-void vpmovusqd(const Operand& op, const Xmm& x) { opVmov(op, x, T_N8 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0x15, true); }
-void vpmovusqw(const Operand& op, const Xmm& x) { opVmov(op, x, T_N4 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0x14, false); }
-void vpmovuswb(const Operand& op, const Xmm& x) { opVmov(op, x, T_N8 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0x10, true); }
+void vpmovqb(const Operand& op, const Xmm& x) { opVmov(op, x, T_N2|T_N_VL|T_F3|T_0F38|T_EW0|T_YMM|T_MUST_EVEX|T_M_K, 0x32, false); }
+void vpmovqd(const Operand& op, const Xmm& x) { opVmov(op, x, T_N8|T_N_VL|T_F3|T_0F38|T_EW0|T_YMM|T_MUST_EVEX|T_M_K, 0x35, true); }
+void vpmovqw(const Operand& op, const Xmm& x) { opVmov(op, x, T_N4|T_N_VL|T_F3|T_0F38|T_EW0|T_YMM|T_MUST_EVEX|T_M_K, 0x34, false); }
+void vpmovsdb(const Operand& op, const Xmm& x) { opVmov(op, x, T_N4|T_N_VL|T_F3|T_0F38|T_EW0|T_YMM|T_MUST_EVEX|T_M_K, 0x21, false); }
+void vpmovsdw(const Operand& op, const Xmm& x) { opVmov(op, x, T_N8|T_N_VL|T_F3|T_0F38|T_EW0|T_YMM|T_MUST_EVEX|T_M_K, 0x23, true); }
+void vpmovsqb(const Operand& op, const Xmm& x) { opVmov(op, x, T_N2|T_N_VL|T_F3|T_0F38|T_EW0|T_YMM|T_MUST_EVEX|T_M_K, 0x22, false); }
+void vpmovsqd(const Operand& op, const Xmm& x) { opVmov(op, x, T_N8|T_N_VL|T_F3|T_0F38|T_EW0|T_YMM|T_MUST_EVEX|T_M_K, 0x25, true); }
+void vpmovsqw(const Operand& op, const Xmm& x) { opVmov(op, x, T_N4|T_N_VL|T_F3|T_0F38|T_EW0|T_YMM|T_MUST_EVEX|T_M_K, 0x24, false); }
+void vpmovswb(const Operand& op, const Xmm& x) { opVmov(op, x, T_N8|T_N_VL|T_F3|T_0F38|T_EW0|T_YMM|T_MUST_EVEX|T_M_K, 0x20, true); }
+void vpmovusdb(const Operand& op, const Xmm& x) { opVmov(op, x, T_N4|T_N_VL|T_F3|T_0F38|T_EW0|T_YMM|T_MUST_EVEX|T_M_K, 0x11, false); }
+void vpmovusdw(const Operand& op, const Xmm& x) { opVmov(op, x, T_N8|T_N_VL|T_F3|T_0F38|T_EW0|T_YMM|T_MUST_EVEX|T_M_K, 0x13, true); }
+void vpmovusqb(const Operand& op, const Xmm& x) { opVmov(op, x, T_N2|T_N_VL|T_F3|T_0F38|T_EW0|T_YMM|T_MUST_EVEX|T_M_K, 0x12, false); }
+void vpmovusqd(const Operand& op, const Xmm& x) { opVmov(op, x, T_N8|T_N_VL|T_F3|T_0F38|T_EW0|T_YMM|T_MUST_EVEX|T_M_K, 0x15, true); }
+void vpmovusqw(const Operand& op, const Xmm& x) { opVmov(op, x, T_N4|T_N_VL|T_F3|T_0F38|T_EW0|T_YMM|T_MUST_EVEX|T_M_K, 0x14, false); }
+void vpmovuswb(const Operand& op, const Xmm& x) { opVmov(op, x, T_N8|T_N_VL|T_F3|T_0F38|T_EW0|T_YMM|T_MUST_EVEX|T_M_K, 0x10, true); }
void vpmovw2m(const Opmask& k, const Xmm& x) { opVex(k, 0, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, 0x29); }
-void vpmovwb(const Operand& op, const Xmm& x) { opVmov(op, x, T_N8 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0x30, true); }
-void vpmullq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x40); }
-void vpmultishiftqb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x83); }
-void vpopcntb(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX, 0x54); }
-void vpopcntd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x55); }
-void vpopcntq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x55); }
-void vpopcntw(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX, 0x54); }
-void vpord(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0xEB); }
-void vporq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0xEB); }
-void vprold(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), 1), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x72, imm); }
-void vprolq(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), 1), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x72, imm); }
-void vprolvd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x15); }
-void vprolvq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x15); }
-void vprord(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), 0), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x72, imm); }
-void vprorq(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), 0), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x72, imm); }
-void vprorvd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x14); }
-void vprorvq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x14); }
-void vpscatterdd(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K | T_VSIB, 0xA0, 0); }
-void vpscatterdq(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_M_K | T_VSIB, 0xA0, 1); }
-void vpscatterqd(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K | T_VSIB, 0xA1, 2); }
-void vpscatterqq(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_M_K | T_VSIB, 0xA1, 0); }
-void vpshldd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x71, imm); }
-void vpshldq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x71, imm); }
-void vpshldvd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x71); }
-void vpshldvq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x71); }
-void vpshldvw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX, 0x70); }
-void vpshldw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX, 0x70, imm); }
-void vpshrdd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x73, imm); }
-void vpshrdq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x73, imm); }
-void vpshrdvd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x73); }
-void vpshrdvq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x73); }
-void vpshrdvw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX, 0x72); }
-void vpshrdw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX, 0x72, imm); }
+void vpmovwb(const Operand& op, const Xmm& x) { opVmov(op, x, T_N8|T_N_VL|T_F3|T_0F38|T_EW0|T_YMM|T_MUST_EVEX|T_M_K, 0x30, true); }
+void vpmullq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX|T_B64, 0x40); }
+void vpmultishiftqb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX|T_B64, 0x83); }
+void vpopcntb(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_0F38|T_EW0|T_YMM|T_SAE_Z|T_MUST_EVEX, 0x54); }
+void vpopcntd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_0F38|T_EW0|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B32, 0x55); }
+void vpopcntq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_0F38|T_EW1|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B64, 0x55); }
+void vpopcntw(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_0F38|T_EW1|T_YMM|T_SAE_Z|T_MUST_EVEX, 0x54); }
+void vpord(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_EW0|T_YMM|T_MUST_EVEX|T_B32, 0xEB); }
+void vporq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_EW1|T_YMM|T_MUST_EVEX|T_B64, 0xEB); }
+void vprold(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), 1), x, op, T_66|T_0F|T_EW0|T_YMM|T_MUST_EVEX|T_B32, 0x72, imm); }
+void vprolq(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), 1), x, op, T_66|T_0F|T_EW1|T_YMM|T_MUST_EVEX|T_B64, 0x72, imm); }
+void vprolvd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW0|T_YMM|T_MUST_EVEX|T_B32, 0x15); }
+void vprolvq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX|T_B64, 0x15); }
+void vprord(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), 0), x, op, T_66|T_0F|T_EW0|T_YMM|T_MUST_EVEX|T_B32, 0x72, imm); }
+void vprorq(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), 0), x, op, T_66|T_0F|T_EW1|T_YMM|T_MUST_EVEX|T_B64, 0x72, imm); }
+void vprorvd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW0|T_YMM|T_MUST_EVEX|T_B32, 0x14); }
+void vprorvq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX|T_B64, 0x14); }
+void vpscatterdd(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N4|T_66|T_0F38|T_EW0|T_YMM|T_MUST_EVEX|T_M_K|T_VSIB, 0xA0, 0); }
+void vpscatterdq(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N8|T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX|T_M_K|T_VSIB, 0xA0, 1); }
+void vpscatterqd(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N4|T_66|T_0F38|T_EW0|T_YMM|T_MUST_EVEX|T_M_K|T_VSIB, 0xA1, 2); }
+void vpscatterqq(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N8|T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX|T_M_K|T_VSIB, 0xA1, 0); }
+void vpshldd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F3A|T_EW0|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B32, 0x71, imm); }
+void vpshldq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F3A|T_EW1|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B64, 0x71, imm); }
+void vpshldvd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW0|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B32, 0x71); }
+void vpshldvq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW1|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B64, 0x71); }
+void vpshldvw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW1|T_YMM|T_SAE_Z|T_MUST_EVEX, 0x70); }
+void vpshldw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F3A|T_EW1|T_YMM|T_SAE_Z|T_MUST_EVEX, 0x70, imm); }
+void vpshrdd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F3A|T_EW0|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B32, 0x73, imm); }
+void vpshrdq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F3A|T_EW1|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B64, 0x73, imm); }
+void vpshrdvd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW0|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B32, 0x73); }
+void vpshrdvq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW1|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B64, 0x73); }
+void vpshrdvw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW1|T_YMM|T_SAE_Z|T_MUST_EVEX, 0x72); }
+void vpshrdw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F3A|T_EW1|T_YMM|T_SAE_Z|T_MUST_EVEX, 0x72, imm); }
void vpshufbitqmb(const Opmask& k, const Xmm& x, const Operand& op) { opVex(k, &x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x8F); }
-void vpsllvw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x12); }
-void vpsraq(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), 4), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x72, imm); }
-void vpsraq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_EW1 | T_YMM | T_MUST_EVEX, 0xE2); }
-void vpsravq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x46); }
-void vpsravw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x11); }
-void vpsrlvw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x10); }
-void vpternlogd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x25, imm); }
-void vpternlogq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x25, imm); }
-void vptestmb(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x26); }
-void vptestmd(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x27); }
-void vptestmq(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x27); }
-void vptestmw(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x26); }
-void vptestnmb(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x26); }
-void vptestnmd(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x27); }
-void vptestnmq(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_F3 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x27); }
-void vptestnmw(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_F3 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x26); }
-void vpxord(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0xEF); }
-void vpxorq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0xEF); }
-void vrangepd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x50, imm); }
-void vrangeps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x50, imm); }
-void vrangesd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F3A | T_EW1 | T_SAE_X | T_MUST_EVEX, 0x51, imm); }
-void vrangess(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F3A | T_EW0 | T_SAE_X | T_MUST_EVEX, 0x51, imm); }
-void vrcp14pd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x4C); }
-void vrcp14ps(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x4C); }
-void vrcp14sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_MUST_EVEX, 0x4D); }
-void vrcp14ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F38 | T_EW0 | T_MUST_EVEX, 0x4D); }
+void vpsllvw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX, 0x12); }
+void vpsraq(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), 4), x, op, T_66|T_0F|T_EW1|T_YMM|T_MUST_EVEX|T_B64, 0x72, imm); }
+void vpsraq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16|T_66|T_0F|T_EW1|T_YMM|T_MUST_EVEX, 0xE2); }
+void vpsravq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX|T_B64, 0x46); }
+void vpsravw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX, 0x11); }
+void vpsrlvw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX, 0x10); }
+void vpternlogd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F3A|T_EW0|T_YMM|T_MUST_EVEX|T_B32, 0x25, imm); }
+void vpternlogq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F3A|T_EW1|T_YMM|T_MUST_EVEX|T_B64, 0x25, imm); }
+void vptestmb(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_66|T_0F38|T_EW0|T_YMM|T_MUST_EVEX, 0x26); }
+void vptestmd(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_66|T_0F38|T_EW0|T_YMM|T_MUST_EVEX|T_B32, 0x27); }
+void vptestmq(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX|T_B64, 0x27); }
+void vptestmw(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX, 0x26); }
+void vptestnmb(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_F3|T_0F38|T_EW0|T_YMM|T_MUST_EVEX, 0x26); }
+void vptestnmd(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_F3|T_0F38|T_EW0|T_YMM|T_MUST_EVEX|T_B32, 0x27); }
+void vptestnmq(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_F3|T_0F38|T_EW1|T_YMM|T_MUST_EVEX|T_B64, 0x27); }
+void vptestnmw(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_F3|T_0F38|T_EW1|T_YMM|T_MUST_EVEX, 0x26); }
+void vpxord(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_EW0|T_YMM|T_MUST_EVEX|T_B32, 0xEF); }
+void vpxorq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_EW1|T_YMM|T_MUST_EVEX|T_B64, 0xEF); }
+void vrangepd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F3A|T_EW1|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B64, 0x50, imm); }
+void vrangeps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F3A|T_EW0|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B32, 0x50, imm); }
+void vrangesd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N8|T_66|T_0F3A|T_EW1|T_SAE_X|T_MUST_EVEX, 0x51, imm); }
+void vrangess(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N4|T_66|T_0F3A|T_EW0|T_SAE_X|T_MUST_EVEX, 0x51, imm); }
+void vrcp14pd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX|T_B64, 0x4C); }
+void vrcp14ps(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_0F38|T_EW0|T_YMM|T_MUST_EVEX|T_B32, 0x4C); }
+void vrcp14sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8|T_66|T_0F38|T_EW1|T_MUST_EVEX, 0x4D); }
+void vrcp14ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4|T_66|T_0F38|T_EW0|T_MUST_EVEX, 0x4D); }
void vrcp28pd(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_SAE_Z, 0xCA); }
void vrcp28ps(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_SAE_Z, 0xCA); }
-void vrcp28sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_SAE_X | T_MUST_EVEX, 0xCB); }
-void vrcp28ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F38 | T_EW0 | T_SAE_X | T_MUST_EVEX, 0xCB); }
-void vrcpph(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_MAP6 | T_EW0 | T_YMM | T_MUST_EVEX | T_B16, 0x4C); }
-void vrcpsh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2 | T_66 | T_MAP6 | T_EW0 | T_MUST_EVEX, 0x4D); }
-void vreducepd(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x56, imm); }
-void vreduceph(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_0F3A | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B16, 0x56, imm); }
-void vreduceps(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x56, imm); }
-void vreducesd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F3A | T_EW1 | T_SAE_X | T_MUST_EVEX, 0x57, imm); }
-void vreducesh(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N2 | T_0F3A | T_EW0 | T_SAE_X | T_MUST_EVEX, 0x57, imm); }
-void vreducess(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F3A | T_EW0 | T_SAE_X | T_MUST_EVEX, 0x57, imm); }
-void vrndscalepd(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x09, imm); }
-void vrndscaleph(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_0F3A | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B16, 0x08, imm); }
-void vrndscaleps(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x08, imm); }
-void vrndscalesd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F3A | T_EW1 | T_SAE_X | T_MUST_EVEX, 0x0B, imm); }
-void vrndscalesh(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N2 | T_0F3A | T_EW0 | T_SAE_X | T_MUST_EVEX, 0x0A, imm); }
-void vrndscaless(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F3A | T_EW0 | T_SAE_X | T_MUST_EVEX, 0x0A, imm); }
-void vrsqrt14pd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x4E); }
-void vrsqrt14ps(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x4E); }
-void vrsqrt14sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x4F); }
-void vrsqrt14ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x4F); }
+void vrcp28sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8|T_66|T_0F38|T_EW1|T_SAE_X|T_MUST_EVEX, 0xCB); }
+void vrcp28ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4|T_66|T_0F38|T_EW0|T_SAE_X|T_MUST_EVEX, 0xCB); }
+void vrcpph(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_MAP6|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x4C); }
+void vrcpsh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2|T_66|T_MAP6|T_EW0|T_MUST_EVEX, 0x4D); }
+void vreducepd(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_66|T_0F3A|T_EW1|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B64, 0x56, imm); }
+void vreduceph(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_0F3A|T_EW0|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B16, 0x56, imm); }
+void vreduceps(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_66|T_0F3A|T_EW0|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B32, 0x56, imm); }
+void vreducesd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N8|T_66|T_0F3A|T_EW1|T_SAE_X|T_MUST_EVEX, 0x57, imm); }
+void vreducesh(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N2|T_0F3A|T_EW0|T_SAE_X|T_MUST_EVEX, 0x57, imm); }
+void vreducess(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N4|T_66|T_0F3A|T_EW0|T_SAE_X|T_MUST_EVEX, 0x57, imm); }
+void vrndscalepd(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_66|T_0F3A|T_EW1|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B64, 0x09, imm); }
+void vrndscaleph(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_0F3A|T_EW0|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B16, 0x08, imm); }
+void vrndscaleps(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_66|T_0F3A|T_EW0|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B32, 0x08, imm); }
+void vrndscalesd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N8|T_66|T_0F3A|T_EW1|T_SAE_X|T_MUST_EVEX, 0x0B, imm); }
+void vrndscalesh(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N2|T_0F3A|T_EW0|T_SAE_X|T_MUST_EVEX, 0x0A, imm); }
+void vrndscaless(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N4|T_66|T_0F3A|T_EW0|T_SAE_X|T_MUST_EVEX, 0x0A, imm); }
+void vrsqrt14pd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX|T_B64, 0x4E); }
+void vrsqrt14ps(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_0F38|T_EW0|T_YMM|T_MUST_EVEX|T_B32, 0x4E); }
+void vrsqrt14sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8|T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX, 0x4F); }
+void vrsqrt14ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4|T_66|T_0F38|T_EW0|T_YMM|T_MUST_EVEX, 0x4F); }
void vrsqrt28pd(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_SAE_Z, 0xCC); }
void vrsqrt28ps(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_SAE_Z, 0xCC); }
-void vrsqrt28sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_SAE_X | T_MUST_EVEX, 0xCD); }
-void vrsqrt28ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F38 | T_EW0 | T_SAE_X | T_MUST_EVEX, 0xCD); }
-void vrsqrtph(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_MAP6 | T_EW0 | T_YMM | T_MUST_EVEX | T_B16, 0x4E); }
-void vrsqrtsh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2 | T_66 | T_MAP6 | T_EW0 | T_MUST_EVEX, 0x4F); }
-void vscalefpd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B64, 0x2C); }
-void vscalefph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_MAP6 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B16, 0x2C); }
-void vscalefps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B32, 0x2C); }
-void vscalefsd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_ER_X | T_MUST_EVEX, 0x2D); }
-void vscalefsh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2 | T_66 | T_MAP6 | T_EW0 | T_ER_X | T_MUST_EVEX, 0x2D); }
-void vscalefss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F38 | T_EW0 | T_ER_X | T_MUST_EVEX, 0x2D); }
-void vscatterdpd(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_M_K | T_VSIB, 0xA2, 1); }
-void vscatterdps(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K | T_VSIB, 0xA2, 0); }
-void vscatterpf0dpd(const Address& addr) { opGatherFetch(addr, zm5, T_N8 | T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC6, Operand::YMM); }
-void vscatterpf0dps(const Address& addr) { opGatherFetch(addr, zm5, T_N4 | T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC6, Operand::ZMM); }
-void vscatterpf0qpd(const Address& addr) { opGatherFetch(addr, zm5, T_N8 | T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC7, Operand::ZMM); }
-void vscatterpf0qps(const Address& addr) { opGatherFetch(addr, zm5, T_N4 | T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC7, Operand::ZMM); }
-void vscatterpf1dpd(const Address& addr) { opGatherFetch(addr, zm6, T_N8 | T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC6, Operand::YMM); }
-void vscatterpf1dps(const Address& addr) { opGatherFetch(addr, zm6, T_N4 | T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC6, Operand::ZMM); }
-void vscatterpf1qpd(const Address& addr) { opGatherFetch(addr, zm6, T_N8 | T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC7, Operand::ZMM); }
-void vscatterpf1qps(const Address& addr) { opGatherFetch(addr, zm6, T_N4 | T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC7, Operand::ZMM); }
-void vscatterqpd(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_M_K | T_VSIB, 0xA3, 0); }
-void vscatterqps(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K | T_VSIB, 0xA3, 2); }
+void vrsqrt28sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8|T_66|T_0F38|T_EW1|T_SAE_X|T_MUST_EVEX, 0xCD); }
+void vrsqrt28ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4|T_66|T_0F38|T_EW0|T_SAE_X|T_MUST_EVEX, 0xCD); }
+void vrsqrtph(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_MAP6|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x4E); }
+void vrsqrtsh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2|T_66|T_MAP6|T_EW0|T_MUST_EVEX, 0x4F); }
+void vscalefpd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW1|T_YMM|T_ER_Z|T_MUST_EVEX|T_B64, 0x2C); }
+void vscalefph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0x2C); }
+void vscalefps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B32, 0x2C); }
+void vscalefsd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8|T_66|T_0F38|T_EW1|T_ER_X|T_MUST_EVEX, 0x2D); }
+void vscalefsh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2|T_66|T_MAP6|T_EW0|T_ER_X|T_MUST_EVEX, 0x2D); }
+void vscalefss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4|T_66|T_0F38|T_EW0|T_ER_X|T_MUST_EVEX, 0x2D); }
+void vscatterdpd(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N8|T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX|T_M_K|T_VSIB, 0xA2, 1); }
+void vscatterdps(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N4|T_66|T_0F38|T_EW0|T_YMM|T_MUST_EVEX|T_M_K|T_VSIB, 0xA2, 0); }
+void vscatterpf0dpd(const Address& addr) { opGatherFetch(addr, zm5, T_N8|T_66|T_0F38|T_EW1|T_MUST_EVEX|T_M_K|T_VSIB, 0xC6, Operand::YMM); }
+void vscatterpf0dps(const Address& addr) { opGatherFetch(addr, zm5, T_N4|T_66|T_0F38|T_EW0|T_MUST_EVEX|T_M_K|T_VSIB, 0xC6, Operand::ZMM); }
+void vscatterpf0qpd(const Address& addr) { opGatherFetch(addr, zm5, T_N8|T_66|T_0F38|T_EW1|T_MUST_EVEX|T_M_K|T_VSIB, 0xC7, Operand::ZMM); }
+void vscatterpf0qps(const Address& addr) { opGatherFetch(addr, zm5, T_N4|T_66|T_0F38|T_EW0|T_MUST_EVEX|T_M_K|T_VSIB, 0xC7, Operand::ZMM); }
+void vscatterpf1dpd(const Address& addr) { opGatherFetch(addr, zm6, T_N8|T_66|T_0F38|T_EW1|T_MUST_EVEX|T_M_K|T_VSIB, 0xC6, Operand::YMM); }
+void vscatterpf1dps(const Address& addr) { opGatherFetch(addr, zm6, T_N4|T_66|T_0F38|T_EW0|T_MUST_EVEX|T_M_K|T_VSIB, 0xC6, Operand::ZMM); }
+void vscatterpf1qpd(const Address& addr) { opGatherFetch(addr, zm6, T_N8|T_66|T_0F38|T_EW1|T_MUST_EVEX|T_M_K|T_VSIB, 0xC7, Operand::ZMM); }
+void vscatterpf1qps(const Address& addr) { opGatherFetch(addr, zm6, T_N4|T_66|T_0F38|T_EW0|T_MUST_EVEX|T_M_K|T_VSIB, 0xC7, Operand::ZMM); }
+void vscatterqpd(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N8|T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX|T_M_K|T_VSIB, 0xA3, 0); }
+void vscatterqps(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N4|T_66|T_0F38|T_EW0|T_YMM|T_MUST_EVEX|T_M_K|T_VSIB, 0xA3, 2); }
void vshuff32x4(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, 0x23, imm); }
void vshuff64x2(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, 0x23, imm); }
void vshufi32x4(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, 0x43, imm); }
void vshufi64x2(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, 0x43, imm); }
-void vsqrtph(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_MAP5 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B16, 0x51); }
-void vsqrtsh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2 | T_F3 | T_MAP5 | T_EW0 | T_ER_X | T_MUST_EVEX, 0x51); }
+void vsqrtph(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_MAP5|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0x51); }
+void vsqrtsh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2|T_F3|T_MAP5|T_EW0|T_ER_X|T_MUST_EVEX, 0x51); }
void vsubph(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_EW0 | T_YMM | T_MUST_EVEX | T_ER_Z | T_B16, 0x5C); }
void vsubsh(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_F3 | T_EW0 | T_MUST_EVEX | T_ER_X | T_N2, 0x5C); }
void vucomish(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_MAP5 | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N2, 0x2E); }
#ifdef XBYAK64
-void kmovq(const Opmask& k, const Reg64& r) { opVex(k, 0, r, T_L0 | T_0F | T_F2 | T_W1, 0x92); }
-void kmovq(const Reg64& r, const Opmask& k) { opVex(r, 0, k, T_L0 | T_0F | T_F2 | T_W1, 0x93); }
-void vpbroadcastq(const Xmm& x, const Reg64& r) { opVex(x, 0, r, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x7C); }
+void kmovq(const Reg64& r, const Opmask& k) { opKmov(k, r, true, 64); }
+void vpbroadcastq(const Xmm& x, const Reg64& r) { opVex(x, 0, r, T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX, 0x7C); }
#endif
#endif