aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorMITSUNARI Shigeo <[email protected]>2024-10-31 09:18:58 +0900
committerMITSUNARI Shigeo <[email protected]>2024-10-31 09:18:58 +0900
commit2c02730b822d461aa2010f5b9cec6a8595243866 (patch)
tree609d40ad2a33fa1dffa190a6e7754dfe6addaf6e
parent97b66116ffd26fdbbe27cb6b56236e7078dbd3a0 (diff)
parente8ba033af9eaaf3ae94981652efba1ce861c552a (diff)
downloadxbyak-2c02730b822d461aa2010f5b9cec6a8595243866.tar.gz
xbyak-2c02730b822d461aa2010f5b9cec6a8595243866.zip
Merge branch 'dev'v7.21
-rw-r--r--CMakeLists.txt2
-rw-r--r--doc/changelog.md1
-rw-r--r--gen/gen_code.cpp41
-rw-r--r--meson.build2
-rw-r--r--readme.md2
-rw-r--r--readme.txt3
-rw-r--r--test/misc.cpp36
-rw-r--r--xbyak/xbyak.h8
-rw-r--r--xbyak/xbyak_mnemonic.h61
9 files changed, 94 insertions, 62 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 5b06512..d38ec83 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,6 +1,6 @@
cmake_minimum_required(VERSION 3.5)
-project(xbyak LANGUAGES CXX VERSION 7.20.1)
+project(xbyak LANGUAGES CXX VERSION 7.21)
file(GLOB headers xbyak/*.h)
diff --git a/doc/changelog.md b/doc/changelog.md
index 47117d8..4406559 100644
--- a/doc/changelog.md
+++ b/doc/changelog.md
@@ -1,5 +1,6 @@
# History
+* 2024/Oct/31 ver 7.21 Enhance XMM register validation in SSE instructions
* 2024/Oct/17 ver 7.20.1 Updated to comply with AVX10.2 specification rev 2.0
* 2024/Oct/15 ver 7.20 Fixed the specification of setDefaultEncoding, setDefaultEncodingAVX10.
* 2024/Oct/15 ver 7.11 Added full support for AVX10.2
diff --git a/gen/gen_code.cpp b/gen/gen_code.cpp
index c2db4ac..65bd933 100644
--- a/gen/gen_code.cpp
+++ b/gen/gen_code.cpp
@@ -413,7 +413,7 @@ void put()
for (size_t i = 0; i < NUM_OF_ARRAY(mmxTbl6); i++) {
const MmxTbl6 *p = &mmxTbl6[i];
printf("void %s(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x%02X, T_0F, %s); }\n", p->name, p->code, p->pref);
- printf("void %s(const Address& addr, const Xmm& xmm) { opMR(addr, xmm, T_0F|%s, 0x%02X); }\n", p->name, p->pref, p->code2);
+ printf("void %s(const Address& addr, const Xmm& xmm) { opSSE(xmm, addr, T_0F|%s, 0x%02X); }\n", p->name, p->pref, p->code2);
}
}
{
@@ -484,7 +484,7 @@ void put()
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
std::string s = type2String(p->type);
- printf("void %s(const Xmm& reg1, const Xmm& reg2) { opRR(reg1, reg2, %s, 0x%02X); }\n", p->name, s.c_str(), p->code);
+ printf("void %s(const Xmm& reg1, const Xmm& reg2) { opSSE(reg1, reg2, %s, 0x%02X); }\n", p->name, s.c_str(), p->code);
}
}
{
@@ -1095,7 +1095,7 @@ void put()
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
// cast xmm register to 16bit register to put 0x66
- printf("void %s(const Address& addr, const Xmm& reg) { opMR(addr, Reg16(reg.getIdx()), T_0F, 0x%02X); }\n", p->name, p->code);
+ printf("void %s(const Address& addr, const Xmm& reg) { if (reg.getIdx() >= 16) XBYAK_THROW(ERR_BAD_PARAMETER) opSSE(Reg16(reg.getIdx()), addr, T_0F, 0x%02X); }\n", p->name, p->code);
}
}
{
@@ -1165,24 +1165,22 @@ void put()
puts("void pinsrb(const Xmm& xmm, const Operand& op, uint8_t imm) { opSSE(xmm, op, T_66 | T_0F3A, 0x20, isXMM_REG32orMEM, imm); }");
puts("void pinsrd(const Xmm& xmm, const Operand& op, uint8_t imm) { opSSE(xmm, op, T_66 | T_0F3A, 0x22, isXMM_REG32orMEM, imm); }");
- puts("void pmovmskb(const Reg32e& reg, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opRR(reg, mmx, T_0F, 0xD7); }");
- puts("void maskmovq(const Mmx& reg1, const Mmx& reg2) { if (!reg1.isMMX() || !reg2.isMMX()) XBYAK_THROW(ERR_BAD_COMBINATION) opRR(reg1, reg2, T_0F, 0xF7); }");
- puts("void movmskps(const Reg32e& reg, const Xmm& xmm) { opRR(reg, xmm, T_0F, 0x50); }");
+ puts("void pmovmskb(const Reg32e& reg, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opSSE(reg, mmx, T_0F, 0xD7); }");
+ puts("void maskmovq(const Mmx& reg1, const Mmx& reg2) { opSSE(reg1, reg2, T_0F, 0xF7); }");
+ puts("void movmskps(const Reg32e& reg, const Xmm& xmm) { opSSE(reg, xmm, T_0F, 0x50); }");
puts("void movmskpd(const Reg32e& reg, const Xmm& xmm) { db(0x66); movmskps(reg, xmm); }");
- puts("void movntps(const Address& addr, const Xmm& xmm) { opMR(addr, Mmx(xmm.getIdx()), T_0F, 0x2B); }");
- puts("void movntdqa(const Xmm& xmm, const Address& addr) { opMR(addr, xmm, T_66 | T_0F38, 0x2A); }");
- puts("void lddqu(const Xmm& xmm, const Address& addr) { opMR(addr, xmm, T_F2 | T_0F, 0xF0); }");
+ puts("void movntps(const Address& addr, const Xmm& xmm) { opSSE(Xmm(xmm.getIdx()), addr, T_0F, 0x2B); }");
+ puts("void movntdqa(const Xmm& xmm, const Address& addr) { opSSE(xmm, addr, T_66 | T_0F38, 0x2A); }");
+ puts("void lddqu(const Xmm& xmm, const Address& addr) { opSSE(xmm, addr, T_F2 | T_0F, 0xF0); }");
puts("void movnti(const Address& addr, const Reg32e& reg) { opMR(addr, reg, T_0F, 0xC3); }");
- puts("void movntq(const Address& addr, const Mmx& mmx) { if (!mmx.isMMX()) XBYAK_THROW(ERR_BAD_COMBINATION) opMR(addr, mmx, T_0F, 0xE7); }");
-
- puts("void movd(const Address& addr, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opMR(addr, mmx, T_0F, 0x7E); }");
- puts("void movd(const Reg32& reg, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opRR(mmx, reg, T_0F, 0x7E); }");
- puts("void movd(const Mmx& mmx, const Address& addr) { if (mmx.isXMM()) db(0x66); opMR(addr, mmx, T_0F, 0x6E); }");
- puts("void movd(const Mmx& mmx, const Reg32& reg) { if (mmx.isXMM()) db(0x66); opRR(mmx, reg, T_0F, 0x6E); }");
- puts("void movq2dq(const Xmm& xmm, const Mmx& mmx) { opRR(xmm, mmx, T_F3 | T_0F, 0xD6); }");
- puts("void movdq2q(const Mmx& mmx, const Xmm& xmm) { opRR(mmx, xmm, T_F2 | T_0F, 0xD6); }");
- puts("void movq(const Mmx& mmx, const Operand& op) { if (mmx.isXMM()) db(0xF3); opRO(mmx, op, T_0F, mmx.isXMM() ? 0x7E : 0x6F, mmx.getKind() == op.getKind()); }");
- puts("void movq(const Address& addr, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opMR(addr, mmx, T_0F, mmx.isXMM() ? 0xD6 : 0x7F); }");
+ puts("void movntq(const Address& addr, const Mmx& mmx) { if (!mmx.isMMX()) XBYAK_THROW(ERR_BAD_COMBINATION) opSSE(mmx, addr, T_0F, 0xE7); }");
+
+ puts("void movd(const Operand& op, const Mmx& mmx) { if (!(op.isMEM() || op.isREG(32))) XBYAK_THROW(ERR_BAD_COMBINATION) if (mmx.isXMM()) db(0x66); opSSE(mmx, op, T_0F, 0x7E); }");
+ puts("void movd(const Mmx& mmx, const Operand& op) { if (!(op.isMEM() || op.isREG(32))) XBYAK_THROW(ERR_BAD_COMBINATION) if (mmx.isXMM()) db(0x66); opSSE(mmx, op, T_0F, 0x6E); }");
+ puts("void movq2dq(const Xmm& xmm, const Mmx& mmx) { opSSE(xmm, mmx, T_F3 | T_0F, 0xD6); }");
+ puts("void movdq2q(const Mmx& mmx, const Xmm& xmm) { opSSE(mmx, xmm, T_F2 | T_0F, 0xD6); }");
+ puts("void movq(const Mmx& mmx, const Operand& op) { if (!op.isMEM() && mmx.getKind() != op.getKind()) XBYAK_THROW(ERR_BAD_COMBINATION) if (mmx.isXMM()) db(0xF3); opSSE(mmx, op, T_0F, mmx.isXMM() ? 0x7E : 0x6F); }");
+ puts("void movq(const Address& addr, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opSSE(mmx, addr, T_0F, mmx.isXMM() ? 0xD6 : 0x7F); }");
puts("void rdrand(const Reg& r) { if (r.isBit(8)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) opRR(Reg(6, Operand::REG, r.getBit()), r, T_0F, 0xC7); }");
puts("void rdseed(const Reg& r) { if (r.isBit(8)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) opRR(Reg(7, Operand::REG, r.getBit()), r, T_0F, 0xC7); }");
puts("void crc32(const Reg32e& r, const Operand& op) { if (!((r.isBit(32) && op.isBit(8|16|32)) || (r.isBit(64) && op.isBit(8|64)))) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) int code = 0xF0 | (op.isBit(8) ? 0 : 1); uint64_t type = op.isBit(16) ? T_66:0; if (opROO(Reg(), op, static_cast<const Reg&>(r), T_APX|type, code)) return; opRO(r, op, T_F2|T_0F38|type, code); }");
@@ -1443,7 +1441,6 @@ void put()
printf("void %s(const Xmm& x, const Operand& op) { opSSE_APX(x, op, T_0F38, 0x%02X, T_MUST_EVEX, 0x%02X); }\n", p->name, p->code, p->code2);
}
puts("void sha1rnds4(const Xmm& x, const Operand& op, uint8_t imm) { opSSE_APX(x, op, T_0F3A, 0xCC, T_MUST_EVEX, 0xD4, imm); }");
- puts("void sha1msg12(const Xmm& x, const Operand& op) { opROO(Reg(), op, x, T_MUST_EVEX, 0xD9); }");
}
// (m, x), (m, y)
{
@@ -1952,8 +1949,8 @@ void put64()
putMemOp("cmpxchg16b", "T_0F", 1, 0xC7, 64);
putMemOp("fxrstor64", "T_0F", 1, 0xAE, 64);
- puts("void movq(const Reg64& reg, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opRR(mmx, reg, T_0F, 0x7E); }");
- puts("void movq(const Mmx& mmx, const Reg64& reg) { if (mmx.isXMM()) db(0x66); opRR(mmx, reg, T_0F, 0x6E); }");
+ puts("void movq(const Reg64& reg, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opSSE(mmx, reg, T_0F, 0x7E); }");
+ puts("void movq(const Mmx& mmx, const Reg64& reg) { if (mmx.isXMM()) db(0x66); opSSE(mmx, reg, T_0F, 0x6E); }");
puts("void movsxd(const Reg64& reg, const Operand& op) { if (!op.isBit(32)) XBYAK_THROW(ERR_BAD_COMBINATION) opRO(reg, op, 0, 0x63); }");
puts("void pextrq(const Operand& op, const Xmm& xmm, uint8_t imm) { if (!op.isREG(64) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opSSE(Reg64(xmm.getIdx()), op, T_66 | T_0F3A, 0x16, 0, imm); }");
puts("void pinsrq(const Xmm& xmm, const Operand& op, uint8_t imm) { if (!op.isREG(64) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opSSE(Reg64(xmm.getIdx()), op, T_66 | T_0F3A, 0x22, 0, imm); }");
diff --git a/meson.build b/meson.build
index 5bb3b78..b7b465e 100644
--- a/meson.build
+++ b/meson.build
@@ -5,7 +5,7 @@
project(
'xbyak',
'cpp',
- version: '7.20.1',
+ version: '7.21',
license: 'BSD-3-Clause',
default_options: 'b_ndebug=if-release'
)
diff --git a/readme.md b/readme.md
index a5dabdd..63a0c00 100644
--- a/readme.md
+++ b/readme.md
@@ -1,5 +1,5 @@
-# Xbyak 7.20.1 [![Badge Build]][Build Status]
+# Xbyak 7.21 [![Badge Build]][Build Status]
*A JIT assembler for x86/x64 architectures supporting advanced instruction sets up to AVX10.2*
diff --git a/readme.txt b/readme.txt
index 82083cd..d9fef26 100644
--- a/readme.txt
+++ b/readme.txt
@@ -1,5 +1,5 @@
- C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 7.20.1
+ C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 7.21
-----------------------------------------------------------------------------
◎概要
@@ -404,6 +404,7 @@ sample/{echo,hello}.bfは http://www.kmonos.net/alang/etc/brainfuck.php から
-----------------------------------------------------------------------------
◎履歴
+2024/10/31 ver 7.21 SSE命令のXMMレジスタのチェックを厳密化
2024/10/17 ver 7.20.1 AVX10.2 rev 2.0仕様書の変更に追従
2024/10/15 ver 7.20 setDefaultEncoding/setDefaultEncodingAVX10の仕様確定
2024/10/15 ver 7.11 AVX10.2完全サポート
diff --git a/test/misc.cpp b/test/misc.cpp
index 3ebb74c..c4b53fe 100644
--- a/test/misc.cpp
+++ b/test/misc.cpp
@@ -35,6 +35,42 @@ CYBOZU_TEST_AUTO(badSSE)
CYBOZU_TEST_EXCEPTION(movapd(xm16, xm1), Xbyak::Error);
CYBOZU_TEST_EXCEPTION(movhpd(xm16, ptr[eax]), Xbyak::Error);
CYBOZU_TEST_EXCEPTION(pextrb(eax, xm16, 1), Xbyak::Error);
+
+ CYBOZU_TEST_EXCEPTION(lddqu(xm16, ptr[rax]), Error);
+ CYBOZU_TEST_EXCEPTION(maskmovdqu(xm16, xm1), Error);
+ CYBOZU_TEST_EXCEPTION(maskmovq(xm16, xm1), Error);
+ CYBOZU_TEST_EXCEPTION(movapd(ptr[rax], xm16), Error);
+ CYBOZU_TEST_EXCEPTION(movaps(ptr[rax], xm16), Error);
+ CYBOZU_TEST_EXCEPTION(movd(ptr[rax], xm16), Error);
+ CYBOZU_TEST_EXCEPTION(movd(xm16, ptr[rax]), Error);
+ CYBOZU_TEST_EXCEPTION(movd(eax, xm16), Error);
+ CYBOZU_TEST_EXCEPTION(movd(xm16, eax), Error);
+ CYBOZU_TEST_EXCEPTION(movdq2q(mm1, xm16), Error);
+ CYBOZU_TEST_EXCEPTION(movdqa(ptr[rax], xm16), Error);
+ CYBOZU_TEST_EXCEPTION(movdqu(ptr[rax], xm16), Error);
+ CYBOZU_TEST_EXCEPTION(movhlps(xm16, xm1), Error);
+ CYBOZU_TEST_EXCEPTION(movlhps(xm16, xm1), Error);
+ CYBOZU_TEST_EXCEPTION(movmskpd(rax, xm16), Error);
+ CYBOZU_TEST_EXCEPTION(movmskps(rax, xm16), Error);
+ CYBOZU_TEST_EXCEPTION(movntdq(ptr[rax], xmm16), Error);
+ CYBOZU_TEST_EXCEPTION(movntdqa(xm16, ptr[rax]), Error);
+ CYBOZU_TEST_EXCEPTION(movntpd(ptr[rax], xmm16), Error);
+ CYBOZU_TEST_EXCEPTION(movntps(ptr[rax], xm16), Error);
+ CYBOZU_TEST_EXCEPTION(movntq(ptr[rax], xm16), Error);
+ CYBOZU_TEST_EXCEPTION(movq(ptr[rax], xm16), Error);
+ CYBOZU_TEST_EXCEPTION(movq(xm16, ptr[rax]), Error);
+ CYBOZU_TEST_EXCEPTION(movq(rax, xm16), Error);
+ CYBOZU_TEST_EXCEPTION(movq(xm16, rax), Error);
+ CYBOZU_TEST_EXCEPTION(movq2dq(xm16, mm1), Error);
+ CYBOZU_TEST_EXCEPTION(movsd(ptr[rax], xm16), Error);
+ CYBOZU_TEST_EXCEPTION(movss(ptr[rax], xm16), Error);
+ CYBOZU_TEST_EXCEPTION(movupd(ptr[rax], xm16), Error);
+ CYBOZU_TEST_EXCEPTION(movups(ptr[rax], xm16), Error);
+ CYBOZU_TEST_EXCEPTION(extractps(ptr[rax], xm16, 3), Error);
+ CYBOZU_TEST_EXCEPTION(pextrb(ptr[rax], xm16, 3), Error);
+ CYBOZU_TEST_EXCEPTION(pextrd(ptr[rax], xm16, 3), Error);
+ CYBOZU_TEST_EXCEPTION(pextrw(ptr[rax], xm16, 3), Error);
+ CYBOZU_TEST_EXCEPTION(pmovmskb(eax, xm16), Error);
}
} code;
}
diff --git a/xbyak/xbyak.h b/xbyak/xbyak.h
index 5982a5d..28af005 100644
--- a/xbyak/xbyak.h
+++ b/xbyak/xbyak.h
@@ -155,7 +155,7 @@ namespace Xbyak {
enum {
DEFAULT_MAX_CODE_SIZE = 4096,
- VERSION = 0x7201 /* 0xABCD = A.BC(.D) */
+ VERSION = 0x7210 /* 0xABCD = A.BC(.D) */
};
#ifndef MIE_INTEGER_TYPE_DEFINED
@@ -1734,10 +1734,10 @@ private:
{
return op1.isREG(i32e) && ((op2.isREG(i32e) && op1.getBit() == op2.getBit()) || op2.isMEM());
}
- static inline bool isValidSSE(const Operand& op1)
+ static inline bool isValidSSE(const Operand& op)
{
// SSE instructions do not support XMM16 - XMM31
- return !(op1.isXMM() && op1.getIdx() >= 16);
+ return !(op.isXMM() && op.getIdx() >= 16);
}
static inline uint8_t rexRXB(int bit, int bit3, const Reg& r, const Reg& b, const Reg& x = Reg())
{
@@ -2172,7 +2172,7 @@ private:
}
}
}
- void opSSE(const Reg& r, const Operand& op, uint64_t type, int code, bool isValid(const Operand&, const Operand&), int imm8 = NONE)
+ void opSSE(const Reg& r, const Operand& op, uint64_t type, int code, bool isValid(const Operand&, const Operand&) = 0, int imm8 = NONE)
{
if (isValid && !isValid(r, op)) XBYAK_THROW(ERR_BAD_COMBINATION)
if (!isValidSSE(r) || !isValidSSE(op)) XBYAK_THROW(ERR_NOT_SUPPORTED)
diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h
index 4854e0c..64930ab 100644
--- a/xbyak/xbyak_mnemonic.h
+++ b/xbyak/xbyak_mnemonic.h
@@ -1,4 +1,4 @@
-const char *getVersionString() const { return "7.20.1"; }
+const char *getVersionString() const { return "7.21"; }
void aadd(const Address& addr, const Reg32e &reg) { opMR(addr, reg, T_0F38, 0x0FC, T_APX); }
void aand(const Address& addr, const Reg32e &reg) { opMR(addr, reg, T_0F38|T_66, 0x0FC, T_APX|T_66); }
void adc(const Operand& op, uint32_t imm) { opOI(op, imm, 0x10, 2); }
@@ -645,7 +645,7 @@ void jz(const char *label, LabelType type = T_AUTO) { jz(std::string(label), typ
void jz(const void *addr) { opJmpAbs(addr, T_NEAR, 0x74, 0x84, 0x0F); }//-V524
void jz(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x74, 0x84, 0x0F); }//-V524
void lahf() { db(0x9F); }
-void lddqu(const Xmm& xmm, const Address& addr) { opMR(addr, xmm, T_F2 | T_0F, 0xF0); }
+void lddqu(const Xmm& xmm, const Address& addr) { opSSE(xmm, addr, T_F2 | T_0F, 0xF0); }
void ldmxcsr(const Address& addr) { opMR(addr, Reg32(2), T_0F, 0xAE); }
void lea(const Reg& reg, const Address& addr) { if (!reg.isBit(16 | i32e)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) opMR(addr, reg, 0, 0x8D); }
void leave() { db(0xC9); }
@@ -667,8 +667,8 @@ void loopne(const char *label) { loopne(std::string(label)); }
void loopne(std::string label) { opJmp(label, T_SHORT, 0xE0, 0, 0); }
void lss(const Reg& reg, const Address& addr) { opLoadSeg(addr, reg, T_0F, 0xB2); }
void lzcnt(const Reg&reg, const Operand& op) { if (opROO(Reg(), op, reg, T_APX|T_NF, 0xF5)) return; opCnt(reg, op, 0xBD); }
-void maskmovdqu(const Xmm& reg1, const Xmm& reg2) { opRR(reg1, reg2, T_66|T_0F, 0xF7); }
-void maskmovq(const Mmx& reg1, const Mmx& reg2) { if (!reg1.isMMX() || !reg2.isMMX()) XBYAK_THROW(ERR_BAD_COMBINATION) opRR(reg1, reg2, T_0F, 0xF7); }
+void maskmovdqu(const Xmm& reg1, const Xmm& reg2) { opSSE(reg1, reg2, T_66|T_0F, 0xF7); }
+void maskmovq(const Mmx& reg1, const Mmx& reg2) { opSSE(reg1, reg2, T_0F, 0xF7); }
void maxpd(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F | T_66, 0x5F, isXMM_XMMorMEM); }
void maxps(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F, 0x5F, isXMM_XMMorMEM); }
void maxsd(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F | T_F2, 0x5F, isXMM_XMMorMEM); }
@@ -680,54 +680,52 @@ void minsd(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F | T_F2, 0x5D
void minss(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F | T_F3, 0x5D, isXMM_XMMorMEM); }
void monitor() { db(0x0F); db(0x01); db(0xC8); }
void monitorx() { db(0x0F); db(0x01); db(0xFA); }
-void movapd(const Address& addr, const Xmm& xmm) { opMR(addr, xmm, T_0F|T_66, 0x29); }
+void movapd(const Address& addr, const Xmm& xmm) { opSSE(xmm, addr, T_0F|T_66, 0x29); }
void movapd(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x28, T_0F, T_66); }
-void movaps(const Address& addr, const Xmm& xmm) { opMR(addr, xmm, T_0F|T_NONE, 0x29); }
+void movaps(const Address& addr, const Xmm& xmm) { opSSE(xmm, addr, T_0F|T_NONE, 0x29); }
void movaps(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x28, T_0F, T_NONE); }
void movbe(const Address& addr, const Reg& reg) { opMR(addr, reg, T_0F38, 0xF1, T_APX, 0x61); }
void movbe(const Reg& reg, const Address& addr) { opMR(addr, reg, T_0F38, 0xF0, T_APX, 0x60); }
-void movd(const Address& addr, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opMR(addr, mmx, T_0F, 0x7E); }
-void movd(const Mmx& mmx, const Address& addr) { if (mmx.isXMM()) db(0x66); opMR(addr, mmx, T_0F, 0x6E); }
-void movd(const Mmx& mmx, const Reg32& reg) { if (mmx.isXMM()) db(0x66); opRR(mmx, reg, T_0F, 0x6E); }
-void movd(const Reg32& reg, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opRR(mmx, reg, T_0F, 0x7E); }
+void movd(const Mmx& mmx, const Operand& op) { if (!(op.isMEM() || op.isREG(32))) XBYAK_THROW(ERR_BAD_COMBINATION) if (mmx.isXMM()) db(0x66); opSSE(mmx, op, T_0F, 0x6E); }
+void movd(const Operand& op, const Mmx& mmx) { if (!(op.isMEM() || op.isREG(32))) XBYAK_THROW(ERR_BAD_COMBINATION) if (mmx.isXMM()) db(0x66); opSSE(mmx, op, T_0F, 0x7E); }
void movddup(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_DUP|T_F2|T_0F|T_EW1|T_YMM|T_EVEX|T_ER_X|T_ER_Y|T_ER_Z, 0x12, isXMM_XMMorMEM, NONE); }
void movdir64b(const Reg& reg, const Address& addr) { opMR(addr, reg.cvt32(), T_66|T_0F38, 0xF8, T_APX|T_66); }
void movdiri(const Address& addr, const Reg32e& reg) { opMR(addr, reg, T_0F38, 0xF9, T_APX); }
-void movdq2q(const Mmx& mmx, const Xmm& xmm) { opRR(mmx, xmm, T_F2 | T_0F, 0xD6); }
-void movdqa(const Address& addr, const Xmm& xmm) { opMR(addr, xmm, T_0F|T_66, 0x7F); }
+void movdq2q(const Mmx& mmx, const Xmm& xmm) { opSSE(mmx, xmm, T_F2 | T_0F, 0xD6); }
+void movdqa(const Address& addr, const Xmm& xmm) { opSSE(xmm, addr, T_0F|T_66, 0x7F); }
void movdqa(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x6F, T_0F, T_66); }
-void movdqu(const Address& addr, const Xmm& xmm) { opMR(addr, xmm, T_0F|T_F3, 0x7F); }
+void movdqu(const Address& addr, const Xmm& xmm) { opSSE(xmm, addr, T_0F|T_F3, 0x7F); }
void movdqu(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x6F, T_0F, T_F3); }
-void movhlps(const Xmm& reg1, const Xmm& reg2) { opRR(reg1, reg2, T_0F, 0x12); }
+void movhlps(const Xmm& reg1, const Xmm& reg2) { opSSE(reg1, reg2, T_0F, 0x12); }
void movhpd(const Operand& op1, const Operand& op2) { opMovXMM(op1, op2, T_66|T_0F, 0x16); }
void movhps(const Operand& op1, const Operand& op2) { opMovXMM(op1, op2, T_0F, 0x16); }
-void movlhps(const Xmm& reg1, const Xmm& reg2) { opRR(reg1, reg2, T_0F, 0x16); }
+void movlhps(const Xmm& reg1, const Xmm& reg2) { opSSE(reg1, reg2, T_0F, 0x16); }
void movlpd(const Operand& op1, const Operand& op2) { opMovXMM(op1, op2, T_66|T_0F, 0x12); }
void movlps(const Operand& op1, const Operand& op2) { opMovXMM(op1, op2, T_0F, 0x12); }
void movmskpd(const Reg32e& reg, const Xmm& xmm) { db(0x66); movmskps(reg, xmm); }
-void movmskps(const Reg32e& reg, const Xmm& xmm) { opRR(reg, xmm, T_0F, 0x50); }
-void movntdq(const Address& addr, const Xmm& reg) { opMR(addr, Reg16(reg.getIdx()), T_0F, 0xE7); }
-void movntdqa(const Xmm& xmm, const Address& addr) { opMR(addr, xmm, T_66 | T_0F38, 0x2A); }
+void movmskps(const Reg32e& reg, const Xmm& xmm) { opSSE(reg, xmm, T_0F, 0x50); }
+void movntdq(const Address& addr, const Xmm& reg) { if (reg.getIdx() >= 16) XBYAK_THROW(ERR_BAD_PARAMETER) opSSE(Reg16(reg.getIdx()), addr, T_0F, 0xE7); }
+void movntdqa(const Xmm& xmm, const Address& addr) { opSSE(xmm, addr, T_66 | T_0F38, 0x2A); }
void movnti(const Address& addr, const Reg32e& reg) { opMR(addr, reg, T_0F, 0xC3); }
-void movntpd(const Address& addr, const Xmm& reg) { opMR(addr, Reg16(reg.getIdx()), T_0F, 0x2B); }
-void movntps(const Address& addr, const Xmm& xmm) { opMR(addr, Mmx(xmm.getIdx()), T_0F, 0x2B); }
-void movntq(const Address& addr, const Mmx& mmx) { if (!mmx.isMMX()) XBYAK_THROW(ERR_BAD_COMBINATION) opMR(addr, mmx, T_0F, 0xE7); }
-void movq(const Address& addr, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opMR(addr, mmx, T_0F, mmx.isXMM() ? 0xD6 : 0x7F); }
-void movq(const Mmx& mmx, const Operand& op) { if (mmx.isXMM()) db(0xF3); opRO(mmx, op, T_0F, mmx.isXMM() ? 0x7E : 0x6F, mmx.getKind() == op.getKind()); }
-void movq2dq(const Xmm& xmm, const Mmx& mmx) { opRR(xmm, mmx, T_F3 | T_0F, 0xD6); }
+void movntpd(const Address& addr, const Xmm& reg) { if (reg.getIdx() >= 16) XBYAK_THROW(ERR_BAD_PARAMETER) opSSE(Reg16(reg.getIdx()), addr, T_0F, 0x2B); }
+void movntps(const Address& addr, const Xmm& xmm) { opSSE(Xmm(xmm.getIdx()), addr, T_0F, 0x2B); }
+void movntq(const Address& addr, const Mmx& mmx) { if (!mmx.isMMX()) XBYAK_THROW(ERR_BAD_COMBINATION) opSSE(mmx, addr, T_0F, 0xE7); }
+void movq(const Address& addr, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opSSE(mmx, addr, T_0F, mmx.isXMM() ? 0xD6 : 0x7F); }
+void movq(const Mmx& mmx, const Operand& op) { if (!op.isMEM() && mmx.getKind() != op.getKind()) XBYAK_THROW(ERR_BAD_COMBINATION) if (mmx.isXMM()) db(0xF3); opSSE(mmx, op, T_0F, mmx.isXMM() ? 0x7E : 0x6F); }
+void movq2dq(const Xmm& xmm, const Mmx& mmx) { opSSE(xmm, mmx, T_F3 | T_0F, 0xD6); }
void movsb() { db(0xA4); }
void movsd() { db(0xA5); }
-void movsd(const Address& addr, const Xmm& xmm) { opMR(addr, xmm, T_0F|T_F2, 0x11); }
+void movsd(const Address& addr, const Xmm& xmm) { opSSE(xmm, addr, T_0F|T_F2, 0x11); }
void movsd(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x10, T_0F, T_F2); }
void movshdup(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_F3|T_0F|T_EW0|T_YMM|T_EVEX, 0x16, isXMM_XMMorMEM, NONE); }
void movsldup(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_F3|T_0F|T_EW0|T_YMM|T_EVEX, 0x12, isXMM_XMMorMEM, NONE); }
-void movss(const Address& addr, const Xmm& xmm) { opMR(addr, xmm, T_0F|T_F3, 0x11); }
+void movss(const Address& addr, const Xmm& xmm) { opSSE(xmm, addr, T_0F|T_F3, 0x11); }
void movss(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x10, T_0F, T_F3); }
void movsw() { db(0x66); db(0xA5); }
void movsx(const Reg& reg, const Operand& op) { opMovxx(reg, op, 0xBE); }
-void movupd(const Address& addr, const Xmm& xmm) { opMR(addr, xmm, T_0F|T_66, 0x11); }
+void movupd(const Address& addr, const Xmm& xmm) { opSSE(xmm, addr, T_0F|T_66, 0x11); }
void movupd(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x10, T_0F, T_66); }
-void movups(const Address& addr, const Xmm& xmm) { opMR(addr, xmm, T_0F|T_NONE, 0x11); }
+void movups(const Address& addr, const Xmm& xmm) { opSSE(xmm, addr, T_0F|T_NONE, 0x11); }
void movups(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x10, T_0F, T_NONE); }
void movzx(const Reg& reg, const Operand& op) { opMovxx(reg, op, 0xB6); }
void mpsadbw(const Xmm& xmm, const Operand& op, int imm) { opSSE(xmm, op, T_66 | T_0F3A, 0x42, isXMM_XMMorMEM, static_cast<uint8_t>(imm)); }
@@ -823,7 +821,7 @@ void pminsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xEA); }
void pminub(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xDA); }
void pminud(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_66 | T_0F38, 0x3B, isXMM_XMMorMEM); }
void pminuw(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_66 | T_0F38, 0x3A, isXMM_XMMorMEM); }
-void pmovmskb(const Reg32e& reg, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opRR(reg, mmx, T_0F, 0xD7); }
+void pmovmskb(const Reg32e& reg, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opSSE(reg, mmx, T_0F, 0xD7); }
void pmovsxbd(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_N4|T_N_VL|T_66|T_0F38|T_YMM|T_EVEX, 0x21, isXMM_XMMorMEM, NONE); }
void pmovsxbq(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_N2|T_N_VL|T_66|T_0F38|T_YMM|T_EVEX, 0x22, isXMM_XMMorMEM, NONE); }
void pmovsxbw(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_N8|T_N_VL|T_66|T_0F38|T_YMM|T_EVEX, 0x20, isXMM_XMMorMEM, NONE); }
@@ -988,7 +986,6 @@ void sets(const Operand& op) { opSetCC(op, 8); }//-V524
void setz(const Operand& op) { opSetCC(op, 4); }//-V524
void sfence() { db(0x0F); db(0xAE); db(0xF8); }
void sha1msg1(const Xmm& x, const Operand& op) { opSSE_APX(x, op, T_0F38, 0xC9, T_MUST_EVEX, 0xD9); }
-void sha1msg12(const Xmm& x, const Operand& op) { opROO(Reg(), op, x, T_MUST_EVEX, 0xD9); }
void sha1msg2(const Xmm& x, const Operand& op) { opSSE_APX(x, op, T_0F38, 0xCA, T_MUST_EVEX, 0xDA); }
void sha1nexte(const Xmm& x, const Operand& op) { opSSE_APX(x, op, T_0F38, 0xC8, T_MUST_EVEX, 0xD8); }
void sha1rnds4(const Xmm& x, const Operand& op, uint8_t imm) { opSSE_APX(x, op, T_0F3A, 0xCC, T_MUST_EVEX, 0xD4, imm); }
@@ -1877,8 +1874,8 @@ void testui() { db(0xF3); db(0x0F); db(0x01); db(0xED); }
void uiret() { db(0xF3); db(0x0F); db(0x01); db(0xEC); }
void cmpxchg16b(const Address& addr) { opMR(addr, Reg64(1), T_0F, 0xC7); }
void fxrstor64(const Address& addr) { opMR(addr, Reg64(1), T_0F, 0xAE); }
-void movq(const Reg64& reg, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opRR(mmx, reg, T_0F, 0x7E); }
-void movq(const Mmx& mmx, const Reg64& reg) { if (mmx.isXMM()) db(0x66); opRR(mmx, reg, T_0F, 0x6E); }
+void movq(const Reg64& reg, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opSSE(mmx, reg, T_0F, 0x7E); }
+void movq(const Mmx& mmx, const Reg64& reg) { if (mmx.isXMM()) db(0x66); opSSE(mmx, reg, T_0F, 0x6E); }
void movsxd(const Reg64& reg, const Operand& op) { if (!op.isBit(32)) XBYAK_THROW(ERR_BAD_COMBINATION) opRO(reg, op, 0, 0x63); }
void pextrq(const Operand& op, const Xmm& xmm, uint8_t imm) { if (!op.isREG(64) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opSSE(Reg64(xmm.getIdx()), op, T_66 | T_0F3A, 0x16, 0, imm); }
void pinsrq(const Xmm& xmm, const Operand& op, uint8_t imm) { if (!op.isREG(64) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opSSE(Reg64(xmm.getIdx()), op, T_66 | T_0F3A, 0x22, 0, imm); }