aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorMITSUNARI Shigeo <[email protected]>2018-09-19 15:45:15 +0900
committerMITSUNARI Shigeo <[email protected]>2018-09-19 15:45:15 +0900
commit42462ef922893f0d3f2156d005fa27ba6898498b (patch)
tree4f1cd4d28f6136130ab51967a0b6aa78ff887719
parentda9117a93f4d3a9a00288b2acc8452f72151aad1 (diff)
downloadxbyak-42462ef922893f0d3f2156d005fa27ba6898498b.tar.gz
xbyak-42462ef922893f0d3f2156d005fa27ba6898498b.zip
use evex encoding for vpslld/vpslldq/vpsraw/...(reg, mem, imm);v5.73
-rw-r--r--gen/avx_type.hpp5
-rw-r--r--gen/gen_code.cpp20
-rw-r--r--readme.md3
-rw-r--r--readme.txt3
-rw-r--r--test/make_512.cpp244
-rw-r--r--xbyak/xbyak.h5
-rw-r--r--xbyak/xbyak_mnemonic.h22
7 files changed, 156 insertions, 146 deletions
diff --git a/gen/avx_type.hpp b/gen/avx_type.hpp
index 6f51166..a659699 100644
--- a/gen/avx_type.hpp
+++ b/gen/avx_type.hpp
@@ -37,6 +37,7 @@
T_B64 = 1 << 27, // m64bcst
T_M_K = 1 << 28, // mem{k}
T_VSIB = 1 << 29,
+ T_MEM_EVEX = 1 << 30, // use evex if mem
T_XXX
};
@@ -161,5 +162,9 @@ std::string type2String(int type)
if (!str.empty()) str += " | ";
str += "T_VSIB";
}
+ if (type & T_MEM_EVEX) {
+ if (!str.empty()) str += " | ";
+ str += "T_MEM_EVEX";
+ }
return str;
}
diff --git a/gen/gen_code.cpp b/gen/gen_code.cpp
index 70db4fe..43984c0 100644
--- a/gen/gen_code.cpp
+++ b/gen/gen_code.cpp
@@ -1491,16 +1491,16 @@ void put()
int idx;
int type;
} tbl[] = {
- { "pslldq", 0x73, 7, T_0F | T_66 | T_YMM | T_EVEX },
- { "psrldq", 0x73, 3, T_0F | T_66 | T_YMM | T_EVEX },
- { "psllw", 0x71, 6, T_0F | T_66 | T_YMM | T_EVEX },
- { "pslld", 0x72, 6, T_0F | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32 },
- { "psllq", 0x73, 6, T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64 },
- { "psraw", 0x71, 4, T_0F | T_66 | T_YMM | T_EVEX },
- { "psrad", 0x72, 4, T_0F | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32 },
- { "psrlw", 0x71, 2, T_0F | T_66 | T_YMM | T_EVEX },
- { "psrld", 0x72, 2, T_0F | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32 },
- { "psrlq", 0x73, 2, T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64 },
+ { "pslldq", 0x73, 7, T_0F | T_66 | T_YMM | T_EVEX | T_MEM_EVEX },
+ { "psrldq", 0x73, 3, T_0F | T_66 | T_YMM | T_EVEX | T_MEM_EVEX },
+ { "psllw", 0x71, 6, T_0F | T_66 | T_YMM | T_EVEX | T_MEM_EVEX },
+ { "pslld", 0x72, 6, T_0F | T_66 | T_YMM | T_EVEX | T_MEM_EVEX | T_EW0 | T_B32 },
+ { "psllq", 0x73, 6, T_0F | T_66 | T_YMM | T_EVEX | T_MEM_EVEX | T_EW1 | T_B64 },
+ { "psraw", 0x71, 4, T_0F | T_66 | T_YMM | T_EVEX | T_MEM_EVEX },
+ { "psrad", 0x72, 4, T_0F | T_66 | T_YMM | T_EVEX | T_MEM_EVEX | T_EW0 | T_B32 },
+ { "psrlw", 0x71, 2, T_0F | T_66 | T_YMM | T_EVEX | T_MEM_EVEX },
+ { "psrld", 0x72, 2, T_0F | T_66 | T_YMM | T_EVEX | T_MEM_EVEX | T_EW0 | T_B32 },
+ { "psrlq", 0x73, 2, T_0F | T_66 | T_YMM | T_EVEX | T_MEM_EVEX | T_EW1 | T_B64 },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl& p = tbl[i];
diff --git a/readme.md b/readme.md
index 704c6da..52f2d26 100644
--- a/readme.md
+++ b/readme.md
@@ -1,5 +1,5 @@
-# Xbyak 5.72 ; JIT assembler for x86(IA32), x64(AMD64, x86-64) by C++
+# Xbyak 5.73 ; JIT assembler for x86(IA32), x64(AMD64, x86-64) by C++
## Abstract
@@ -392,6 +392,7 @@ modified new BSD License
http://opensource.org/licenses/BSD-3-Clause
## History
+* 2018/Sep/19 ver 5.73 fix evex encoding of vpslld, vpslldq, vpsllw, etc for (reg, mem, imm8)
* 2018/Sep/19 ver 5.72 fix the encoding of vinsertps for disp8N(Thanks to petercaday)
* 2018/Sep/04 ver 5.71 L() returns a new label instance
* 2018/Aug/27 ver 5.70 support setProtectMode() and DontUseProtect for read/exec setting
diff --git a/readme.txt b/readme.txt
index 3b43f1a..7bdde28 100644
--- a/readme.txt
+++ b/readme.txt
@@ -1,5 +1,5 @@
- C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 5.72
+ C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 5.73
-----------------------------------------------------------------------------
◎概要
@@ -373,6 +373,7 @@ sample/{echo,hello}.bfは http://www.kmonos.net/alang/etc/brainfuck.php から
-----------------------------------------------------------------------------
◎履歴
+2018/09/19 ver 5.73 vpslld, vpslldq, vpsllwなどの(reg, mem, imm8)に対するevexエンコーディング修整
2018/09/19 ver 5.72 fix the encoding of vinsertps for disp8N(Thanks to petercaday)
2018/08/27 ver 5.71 新しいlabelインスタンスを返すL()を追加
2018/08/27 ver 5.70 read/exec設定のためのsetProtectMode()とDontUseProtectの追加
diff --git a/test/make_512.cpp b/test/make_512.cpp
index 38f989c..49d082c 100644
--- a/test/make_512.cpp
+++ b/test/make_512.cpp
@@ -73,7 +73,6 @@ const uint64 YMM_ER = 1ULL << 36;
const uint64 VM32Y_K = 1ULL << 37;
const uint64 IMM_2 = 1ULL << 38;
const uint64 IMM = IMM_1 | IMM_2;
-const uint64 XMM = _XMM | _XMM2;
const uint64 YMM = _YMM | _YMM2;
const uint64 K = 1ULL << 43;
const uint64 _ZMM = 1ULL << 44;
@@ -90,7 +89,10 @@ const uint64 ZMM_SAE = 1ULL << 48;
const uint64 ZMM_ER = 1ULL << 49;
#ifdef XBYAK64
const uint64 _XMM3 = 1ULL << 50;
+#else
+const uint64 _XMM3 = 0;
#endif
+const uint64 XMM = _XMM | _XMM2 | _XMM3;
const uint64 XMM_SAE = 1ULL << 51;
#ifdef XBYAK64
const uint64 XMM_KZ = 1ULL << 52;
@@ -608,7 +610,7 @@ public:
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
- put(p->name, K, _XMM, _XMM | MEM, IMM8);
+ put(p->name, K, XMM, _XMM | MEM, IMM8);
if (!p->supportYMM) continue;
put(p->name, K, _YMM, _YMM | MEM, IMM8);
put(p->name, K, _ZMM, _ZMM | MEM, IMM8);
@@ -627,10 +629,10 @@ public:
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
- put(p->name, XMM | _XMM3, XMM_SAE | XMM | MEM);
+ put(p->name, XMM, XMM_SAE | XMM | MEM);
}
}
- put("vcomiss", _XMM3, XMM | MEM);
+ put("vcomiss", XMM, _XMM3 | MEM);
put("vcomiss", XMM, XMM_SAE);
#endif
}
@@ -674,10 +676,10 @@ public:
"vpbroadcastq",
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
- put(tbl[i], XMM_KZ | ZMM_KZ, _XMM | _MEM);
+ put(tbl[i], XMM_KZ | ZMM_KZ, XMM | _MEM);
}
}
- put("vbroadcasti32x2", XMM_KZ | YMM_KZ | ZMM_KZ, _XMM | _MEM);
+ put("vbroadcasti32x2", XMM_KZ | YMM_KZ | ZMM_KZ, XMM | _MEM);
put("vbroadcasti32x4", YMM_KZ | ZMM_KZ, _MEM);
put("vbroadcasti64x2", YMM_KZ | ZMM_KZ, _MEM);
put("vbroadcasti32x8", ZMM_KZ, _MEM);
@@ -685,14 +687,14 @@ public:
}
void putMisc1()
{
- put("vmaskmovps", XMM, XMM, MEM);
+ put("vmaskmovps", _XMM, _XMM, MEM);
put("vmaskmovps", YMM, YMM, MEM);
put("vmaskmovpd", YMM, YMM, MEM);
- put("vmaskmovpd", XMM, XMM, MEM);
+ put("vmaskmovpd", _XMM, _XMM, MEM);
- put("vmaskmovps", MEM, XMM, XMM);
- put("vmaskmovpd", MEM, XMM, XMM);
+ put("vmaskmovps", MEM, _XMM, _XMM);
+ put("vmaskmovpd", MEM, _XMM, _XMM);
put("vbroadcastf128", YMM, MEM);
put("vbroadcasti128", YMM, MEM);
@@ -711,8 +713,8 @@ public:
}
}
- put("vinsertf128", YMM, YMM, XMM | MEM, IMM8);
- put("vinserti128", YMM, YMM, XMM | MEM, IMM8);
+ put("vinsertf128", YMM, YMM, _XMM | _XMM2 | MEM, IMM8);
+ put("vinserti128", YMM, YMM, _XMM | _XMM2 | MEM, IMM8);
put("vperm2f128", YMM, YMM, YMM | MEM, IMM8);
put("vperm2i128", YMM, YMM, YMM | MEM, IMM8);
@@ -722,9 +724,9 @@ public:
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const char *name = tbl[i];
- put(name, XMM, XMM, MEM);
+ put(name, _XMM, _XMM, MEM);
put(name, YMM, YMM, MEM);
- put(name, MEM, XMM, XMM);
+ put(name, MEM, _XMM, _XMM);
put(name, MEM, YMM, YMM);
}
}
@@ -761,29 +763,29 @@ public:
put(name, MEM, ZMM);
put(name, ZMM, MEM);
#ifdef XBYAK64
- put(name, MEM, _XMM3);
- put(name, _XMM3, MEM);
+ put(name, MEM, XMM);
+ put(name, XMM, MEM);
#endif
}
}
void put_vmov()
{
#ifdef XBYAK64
- put("vmovd", _XMM3, MEM|REG32);
- put("vmovd", MEM|REG32, _XMM3);
- put("vmovq", _XMM3, MEM|REG64|XMM);
- put("vmovq", MEM|REG64|XMM, _XMM3);
- put("vmovhlps", _XMM3, _XMM3, _XMM3);
- put("vmovlhps", _XMM3, _XMM3, _XMM3);
- put("vmovntdqa", _XMM3|_YMM3|ZMM, MEM);
- put("vmovntdq", MEM, _XMM3 | _YMM3 | ZMM);
- put("vmovntpd", MEM, _XMM3 | _YMM3 | ZMM);
- put("vmovntps", MEM, _XMM3 | _YMM3 | ZMM);
-
- put("vmovsd", XMM_KZ, _XMM3, _XMM3);
+ put("vmovd", XMM, MEM|REG32);
+ put("vmovd", MEM|REG32, XMM);
+ put("vmovq", XMM, MEM|REG64|XMM);
+ put("vmovq", MEM|REG64|XMM, XMM);
+ put("vmovhlps", XMM, _XMM3, _XMM3);
+ put("vmovlhps", XMM, _XMM3, _XMM3);
+ put("vmovntdqa", XMM|_YMM3|ZMM, MEM);
+ put("vmovntdq", MEM, XMM | _YMM3 | ZMM);
+ put("vmovntpd", MEM, XMM | _YMM3 | ZMM);
+ put("vmovntps", MEM, XMM | _YMM3 | ZMM);
+
+ put("vmovsd", XMM_KZ, XMM, _XMM3);
put("vmovsd", XMM_KZ, MEM);
put("vmovsd", MEM_K, XMM);
- put("vmovss", XMM_KZ, _XMM3, _XMM3);
+ put("vmovss", XMM_KZ, XMM, _XMM3);
put("vmovss", XMM_KZ, MEM);
put("vmovss", MEM_K, XMM);
@@ -798,7 +800,7 @@ public:
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const char *name = tbl[i];
- put(name, XMM_KZ, _XMM, _XMM | MEM, IMM);
+ put(name, XMM_KZ, XMM, _XMM | MEM, IMM);
put(name, _YMM3, _YMM3, _YMM3 | _MEM, IMM);
put(name, _ZMM, _ZMM, _ZMM | _MEM, IMM);
}
@@ -811,7 +813,7 @@ public:
"vmovlps",
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
- put(tbl[i], _XMM3, _XMM3, MEM);
+ put(tbl[i], XMM, _XMM3, MEM);
put(tbl[i], MEM, _XMM3);
}
}
@@ -837,11 +839,11 @@ public:
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl& p = tbl[i];
- put(p.name, _XMM|XMM_KZ, _XMM|MEM);
+ put(p.name, XMM|XMM_KZ, _XMM|MEM);
put(p.name, _YMM|YMM_KZ, _YMM|MEM);
put(p.name, _ZMM|ZMM_KZ, _ZMM|MEM);
if (!p.M_X) continue;
- put(p.name, MEM|MEM_K, _XMM);
+ put(p.name, MEM|MEM_K, XMM);
put(p.name, MEM|MEM_K, _YMM);
put(p.name, MEM|MEM_K, _ZMM);
}
@@ -858,7 +860,7 @@ public:
put("vpabsd", ZMM_KZ, M_1to16 | _MEM);
put("vpabsq", ZMM_KZ, M_1to8 | _MEM);
- put("vbroadcastf32x2", YMM_KZ | ZMM_KZ, _XMM | _MEM);
+ put("vbroadcastf32x2", YMM_KZ | ZMM_KZ, XMM | _MEM);
put("vbroadcastf32x4", YMM_KZ | ZMM_KZ, _MEM);
put("vbroadcastf64x2", YMM_KZ | ZMM_KZ, _MEM);
@@ -880,7 +882,7 @@ public:
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl& p = tbl[i];
- put(p.name, XMM_KZ, _XMM, _XMM|p.mem);
+ put(p.name, XMM_KZ, XMM, _XMM|p.mem);
}
}
void put512_X3()
@@ -892,54 +894,54 @@ public:
uint64_t x2;
uint64_t xm;
} tbl[] = {
- { "vpacksswb", XMM_KZ, _XMM, _XMM | _MEM },
+ { "vpacksswb", XMM_KZ, XMM, _XMM | _MEM },
{ "vpacksswb", YMM_KZ, _YMM, _YMM | _MEM },
{ "vpacksswb", ZMM_KZ, _ZMM, _ZMM | _MEM },
- { "vpackssdw", XMM_KZ, _XMM, _XMM | M_1to4 | _MEM },
+ { "vpackssdw", XMM_KZ, XMM, _XMM | M_1to4 | _MEM },
{ "vpackssdw", YMM_KZ, _YMM, _YMM | M_1to8 | _MEM },
{ "vpackssdw", ZMM_KZ, _ZMM, _ZMM | M_1to16 | _MEM },
- { "vpackusdw", XMM_KZ, _XMM, _XMM | M_1to4 | _MEM },
+ { "vpackusdw", XMM_KZ, XMM, _XMM | M_1to4 | _MEM },
{ "vpackusdw", YMM_KZ, _YMM, _YMM | M_1to8 | _MEM },
{ "vpackusdw", ZMM_KZ, _ZMM, _ZMM | M_1to16 | _MEM },
- { "vpackuswb", XMM_KZ, _XMM, _XMM | _MEM },
+ { "vpackuswb", XMM_KZ, XMM, _XMM | _MEM },
{ "vpackuswb", YMM_KZ, _YMM, _YMM | _MEM },
{ "vpackuswb", ZMM_KZ, _ZMM, _ZMM | _MEM },
- { "vpaddb", XMM_KZ, _XMM, _XMM | _MEM },
+ { "vpaddb", XMM_KZ, XMM, _XMM | _MEM },
{ "vpaddw", XMM_KZ, _XMM, _XMM | _MEM },
{ "vpaddd", XMM_KZ, _XMM, _XMM | M_1to4 | _MEM },
{ "vpaddq", ZMM_KZ, _ZMM, M_1to8 | _MEM },
- { "vpaddsb", XMM_KZ, _XMM, _XMM | _MEM },
+ { "vpaddsb", XMM_KZ, XMM, _XMM | _MEM },
{ "vpaddsb", ZMM_KZ, _ZMM, _ZMM | _MEM },
- { "vpaddsw", XMM_KZ, _XMM, _XMM | _MEM },
+ { "vpaddsw", XMM_KZ, XMM, _XMM | _MEM },
{ "vpaddsw", ZMM_KZ, _ZMM, _ZMM | _MEM },
- { "vpaddusb", XMM_KZ, _XMM, _XMM | MEM },
+ { "vpaddusb", XMM_KZ, XMM, _XMM | MEM },
{ "vpaddusb", ZMM_KZ, _ZMM, _ZMM | MEM },
- { "vpaddusw", XMM_KZ, _XMM, _XMM | MEM },
+ { "vpaddusw", XMM_KZ, XMM, _XMM | MEM },
{ "vpaddusw", ZMM_KZ, _ZMM, _ZMM | MEM },
- { "vpsubb", XMM_KZ, _XMM, _XMM | _MEM },
- { "vpsubw", XMM_KZ, _XMM, _XMM | _MEM },
- { "vpsubd", XMM_KZ, _XMM, _XMM | M_1to4 | _MEM },
+ { "vpsubb", XMM_KZ, XMM, _XMM | _MEM },
+ { "vpsubw", XMM_KZ, XMM, _XMM | _MEM },
+ { "vpsubd", XMM_KZ, XMM, _XMM | M_1to4 | _MEM },
{ "vpsubq", ZMM_KZ, _ZMM, M_1to8 | _MEM },
- { "vpsubsb", XMM_KZ, _XMM, _XMM | _MEM },
+ { "vpsubsb", XMM_KZ, XMM, _XMM | _MEM },
{ "vpsubsb", ZMM_KZ, _ZMM, _ZMM | _MEM },
- { "vpsubsw", XMM_KZ, _XMM, _XMM | _MEM },
+ { "vpsubsw", XMM_KZ, XMM, _XMM | _MEM },
{ "vpsubsw", ZMM_KZ, _ZMM, _ZMM | _MEM },
- { "vpsubusb", XMM_KZ, _XMM, _XMM | MEM },
+ { "vpsubusb", XMM_KZ, XMM, _XMM | MEM },
{ "vpsubusb", ZMM_KZ, _ZMM, _ZMM | MEM },
- { "vpsubusw", XMM_KZ, _XMM, _XMM | MEM },
+ { "vpsubusw", XMM_KZ, XMM, _XMM | MEM },
{ "vpsubusw", ZMM_KZ, _ZMM, _ZMM | MEM },
{ "vpandd", ZMM_KZ, _ZMM, _ZMM | M_1to16 | _MEM },
@@ -984,137 +986,137 @@ public:
{ "vpminud", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to16 },
{ "vpminuq", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to8 },
- { "vpslldq", _XMM3, _XMM3 | _MEM, IMM8 },
+ { "vpslldq", XMM, _XMM3 | _MEM, IMM8 },
{ "vpslldq", _YMM3, _YMM3 | _MEM, IMM8 },
{ "vpslldq", _ZMM, _ZMM | _MEM, IMM8 },
- { "vpsrldq", _XMM3, _XMM3 | _MEM, IMM8 },
+ { "vpsrldq", XMM, _XMM3 | _MEM, IMM8 },
{ "vpsrldq", _YMM3, _YMM3 | _MEM, IMM8 },
{ "vpsrldq", _ZMM, _ZMM | _MEM, IMM8 },
- { "vpsraw", XMM_KZ, _XMM | _MEM, IMM8 },
+ { "vpsraw", XMM_KZ, XMM | _MEM, IMM8 },
{ "vpsraw", ZMM_KZ, _ZMM | _MEM, IMM8 },
- { "vpsrad", XMM_KZ, _XMM | M_1to4 | _MEM, IMM8 },
+ { "vpsrad", XMM_KZ, XMM | M_1to4 | _MEM, IMM8 },
{ "vpsrad", ZMM_KZ, _ZMM | M_1to16 | _MEM, IMM8 },
{ "vpsraq", XMM, XMM, IMM8 },
- { "vpsraq", XMM_KZ, _XMM | M_1to2 | _MEM, IMM8 },
+ { "vpsraq", XMM_KZ, XMM | M_1to2 | _MEM, IMM8 },
{ "vpsraq", ZMM_KZ, _ZMM | M_1to8 | _MEM, IMM8 },
- { "vpsllw", _XMM3, _XMM3 | _MEM, IMM8 },
- { "vpslld", _XMM3, _XMM3 | _MEM | M_1to4, IMM8 },
- { "vpsllq", _XMM3, _XMM3 | _MEM | M_1to2, IMM8 },
+ { "vpsllw", XMM, _XMM3 | _MEM, IMM8 },
+ { "vpslld", XMM, _XMM3 | _MEM | M_1to4, IMM8 },
+ { "vpsllq", XMM, _XMM3 | _MEM | M_1to2, IMM8 },
- { "vpsrlw", XMM_KZ, _XMM | _MEM, IMM8 },
+ { "vpsrlw", XMM_KZ, XMM | _MEM, IMM8 },
{ "vpsrlw", ZMM_KZ, _ZMM | _MEM, IMM8 },
- { "vpsrld", XMM_KZ, _XMM | M_1to4 | _MEM, IMM8 },
+ { "vpsrld", XMM_KZ, XMM | M_1to4 | _MEM, IMM8 },
{ "vpsrld", ZMM_KZ, _ZMM | M_1to16 | _MEM, IMM8 },
- { "vpsrlq", _XMM3, _XMM3 | _MEM | M_1to2, IMM8 },
+ { "vpsrlq", XMM, _XMM3 | _MEM | M_1to2, IMM8 },
{ "vpsrlq", _ZMM, _ZMM | _MEM | M_1to8, IMM8 },
- { "vpsravw", XMM_KZ | _XMM, _XMM, _XMM | _MEM },
+ { "vpsravw", XMM_KZ | XMM, _XMM, _XMM | _MEM },
{ "vpsravw", _ZMM, _ZMM, _MEM },
- { "vpsravd", XMM_KZ | _XMM, _XMM, _XMM | _MEM },
+ { "vpsravd", XMM_KZ | XMM, _XMM, _XMM | _MEM },
{ "vpsravd", _ZMM, _ZMM, M_1to16 | _MEM },
- { "vpsravq", XMM_KZ | _XMM, _XMM, _XMM | _MEM },
+ { "vpsravq", XMM_KZ | XMM, _XMM, _XMM | _MEM },
{ "vpsravq", _ZMM, _ZMM, M_1to8 | _MEM },
- { "vpsllvw", XMM_KZ | _XMM, _XMM, _XMM | _MEM },
+ { "vpsllvw", XMM_KZ | XMM, _XMM, _XMM | _MEM },
{ "vpsllvw", _ZMM, _ZMM, _MEM },
- { "vpsllvd", XMM_KZ | _XMM, _XMM, _XMM | _MEM },
+ { "vpsllvd", XMM_KZ | XMM, _XMM, _XMM | _MEM },
{ "vpsllvd", _ZMM, _ZMM, M_1to16 | _MEM },
- { "vpsllvq", XMM_KZ | _XMM, _XMM, _XMM | _MEM },
+ { "vpsllvq", XMM_KZ | XMM, _XMM, _XMM | _MEM },
{ "vpsllvq", _ZMM, _ZMM, M_1to8 | _MEM },
- { "vpsrlvw", XMM_KZ | _XMM, _XMM, _XMM | _MEM },
+ { "vpsrlvw", XMM_KZ | XMM, _XMM, _XMM | _MEM },
{ "vpsrlvw", _ZMM, _ZMM, _MEM },
- { "vpsrlvd", XMM_KZ | _XMM, _XMM, _XMM | _MEM },
+ { "vpsrlvd", XMM_KZ | XMM, _XMM, _XMM | _MEM },
{ "vpsrlvd", _ZMM, _ZMM, M_1to16 | _MEM },
- { "vpsrlvq", XMM_KZ | _XMM, _XMM, _XMM | _MEM },
+ { "vpsrlvq", XMM_KZ | XMM, _XMM, _XMM | _MEM },
{ "vpsrlvq", _ZMM, _ZMM, M_1to8 | _MEM },
- { "vpshufb", _XMM | XMM_KZ, _XMM, _XMM | _MEM },
+ { "vpshufb", XMM | XMM_KZ, _XMM, _XMM | _MEM },
{ "vpshufb", ZMM_KZ, _ZMM, _MEM },
- { "vpshufhw", _XMM | XMM_KZ, _XMM | _MEM, IMM8 },
+ { "vpshufhw", XMM | XMM_KZ, _XMM | _MEM, IMM8 },
{ "vpshufhw", ZMM_KZ, _MEM, IMM8 },
- { "vpshuflw", _XMM | XMM_KZ, _XMM | _MEM, IMM8 },
+ { "vpshuflw", XMM | XMM_KZ, _XMM | _MEM, IMM8 },
{ "vpshuflw", ZMM_KZ, _MEM, IMM8 },
- { "vpshufd", _XMM | XMM_KZ, _XMM | M_1to4 | _MEM, IMM8 },
+ { "vpshufd", XMM | XMM_KZ, _XMM | M_1to4 | _MEM, IMM8 },
{ "vpshufd", _ZMM | ZMM_KZ, _ZMM | M_1to16 | _MEM, IMM8 },
- { "vpord", _XMM | XMM_KZ, _XMM, _XMM | M_1to4 | _MEM },
+ { "vpord", XMM | XMM_KZ, _XMM, _XMM | M_1to4 | _MEM },
{ "vpord", _ZMM | ZMM_KZ, _ZMM, M_1to16 | _MEM },
- { "vporq", _XMM | XMM_KZ, _XMM, _XMM | M_1to2 | _MEM },
+ { "vporq", XMM | XMM_KZ, _XMM, _XMM | M_1to2 | _MEM },
{ "vporq", _ZMM | ZMM_KZ, _ZMM, M_1to8 | _MEM },
- { "vpxord", _XMM | XMM_KZ, _XMM, _XMM | M_1to4 | _MEM },
+ { "vpxord", XMM | XMM_KZ, _XMM, _XMM | M_1to4 | _MEM },
{ "vpxord", _ZMM | ZMM_KZ, _ZMM, M_1to16 | _MEM },
- { "vpxorq", _XMM | XMM_KZ, _XMM, _XMM | M_1to2 | _MEM },
+ { "vpxorq", XMM | XMM_KZ, _XMM, _XMM | M_1to2 | _MEM },
{ "vpxorq", _ZMM | ZMM_KZ, _ZMM, M_1to8 | _MEM },
- { "vpsadbw", _XMM3, _XMM, _XMM | _MEM },
+ { "vpsadbw", XMM, _XMM, _XMM | _MEM },
{ "vpsadbw", _ZMM, _ZMM, _MEM },
- { "vpmuldq", _XMM3, _XMM, _XMM | M_1to2 | _MEM },
+ { "vpmuldq", XMM, _XMM, _XMM | M_1to2 | _MEM },
{ "vpmuldq", ZMM_KZ, _ZMM, M_1to8 | _MEM },
- { "vpmulhrsw", _XMM3, _XMM, _XMM | _MEM },
+ { "vpmulhrsw", XMM, _XMM, _XMM | _MEM },
{ "vpmulhrsw", ZMM_KZ, _ZMM, _MEM },
- { "vpmulhuw", _XMM3, _XMM, _XMM | _MEM },
+ { "vpmulhuw", XMM, _XMM, _XMM | _MEM },
{ "vpmulhuw", ZMM_KZ, _ZMM, _MEM },
- { "vpmulhw", _XMM3, _XMM, _XMM | _MEM },
+ { "vpmulhw", XMM, _XMM, _XMM | _MEM },
{ "vpmulhw", ZMM_KZ, _ZMM, _MEM },
- { "vpmullw", _XMM3, _XMM, _XMM | _MEM },
+ { "vpmullw", XMM, _XMM, _XMM | _MEM },
{ "vpmullw", ZMM_KZ, _ZMM, _MEM },
- { "vpmulld", _XMM3, _XMM, M_1to4 | _MEM },
+ { "vpmulld", XMM, _XMM, M_1to4 | _MEM },
{ "vpmulld", ZMM_KZ, _ZMM, M_1to16 | _MEM },
- { "vpmullq", _XMM3, _XMM, M_1to2 | _MEM },
+ { "vpmullq", XMM, _XMM, M_1to2 | _MEM },
{ "vpmullq", ZMM_KZ, _ZMM, M_1to8 | _MEM },
- { "vpmuludq", _XMM3, _XMM, M_1to2 | _MEM },
+ { "vpmuludq", XMM, _XMM, M_1to2 | _MEM },
{ "vpmuludq", ZMM_KZ, _ZMM, M_1to8 | _MEM },
- { "vpunpckhbw", _XMM3, _XMM, _XMM | _MEM },
+ { "vpunpckhbw", XMM, _XMM, _XMM | _MEM },
{ "vpunpckhbw", _ZMM, _ZMM, _MEM },
- { "vpunpckhwd", _XMM3, _XMM, _XMM | _MEM },
+ { "vpunpckhwd", XMM, _XMM, _XMM | _MEM },
{ "vpunpckhwd", _ZMM, _ZMM, _MEM },
- { "vpunpckhdq", _XMM3, _XMM, M_1to4 | _MEM },
+ { "vpunpckhdq", XMM, _XMM, M_1to4 | _MEM },
{ "vpunpckhdq", _ZMM, _ZMM, M_1to16 | _MEM },
- { "vpunpckhqdq", _XMM3, _XMM, M_1to2 | _MEM },
+ { "vpunpckhqdq", XMM, _XMM, M_1to2 | _MEM },
{ "vpunpckhqdq", _ZMM, _ZMM, M_1to8 | _MEM },
- { "vpunpcklbw", _XMM3, _XMM, _XMM | _MEM },
+ { "vpunpcklbw", XMM, _XMM, _XMM | _MEM },
{ "vpunpcklbw", _ZMM, _ZMM, _MEM },
- { "vpunpcklwd", _XMM3, _XMM, _XMM | _MEM },
+ { "vpunpcklwd", XMM, _XMM, _XMM | _MEM },
{ "vpunpcklwd", _ZMM, _ZMM, _MEM },
- { "vpunpckldq", _XMM3, _XMM, M_1to4 | _MEM },
+ { "vpunpckldq", XMM, _XMM, M_1to4 | _MEM },
{ "vpunpckldq", _ZMM, _ZMM, M_1to16 | _MEM },
- { "vpunpcklqdq", _XMM3, _XMM, M_1to2 | _MEM },
+ { "vpunpcklqdq", XMM, _XMM, M_1to2 | _MEM },
{ "vpunpcklqdq", _ZMM, _ZMM, M_1to8 | _MEM },
{ "vextractf32x4", _XMM | XMM_KZ | _MEM, _YMM | _ZMM, IMM8 },
@@ -1127,7 +1129,7 @@ public:
{ "vextracti32x8", _YMM | YMM_KZ | _MEM, _ZMM, IMM8 },
{ "vextracti64x4", _YMM | YMM_KZ | _MEM, _ZMM, IMM8 },
- { "vextractps", REG32 | _MEM, _XMM3, IMM8 },
+ { "vextractps", REG32 | _MEM, XMM, IMM8 },
{ "vpermb", XMM_KZ, _XMM, _XMM | _MEM },
{ "vpermb", ZMM_KZ, _ZMM, _ZMM | _MEM },
@@ -1176,7 +1178,7 @@ public:
uint64_t xm;
} tbl[] = {
#ifdef XBYAK64
- { "vinsertps", _XMM3, _XMM, _XMM3 | _MEM },
+ { "vinsertps", XMM, _XMM, _XMM3 | _MEM },
{ "vshufpd", XMM_KZ, _XMM, M_1to2 | _MEM },
{ "vshufpd", ZMM_KZ, _ZMM, M_1to8 | _MEM },
@@ -1209,14 +1211,14 @@ public:
put(p.name, p.x1, p.x2, p.xm, IMM8);
}
#ifdef XBYAK64
- put("vpextrb", _REG64 | _MEM, _XMM3, IMM8);
- put("vpextrw", _REG64 | _MEM, _XMM3, IMM8);
- put("vpextrd", _REG32 | _MEM, _XMM3, IMM8);
- put("vpextrq", _REG64 | _MEM, _XMM3, IMM8);
- put("vpinsrb", _XMM3, _XMM3, _REG32 | _MEM, IMM8);
- put("vpinsrw", _XMM3, _XMM3, _REG32 | _MEM, IMM8);
- put("vpinsrd", _XMM3, _XMM3, _REG32 | _MEM, IMM8);
- put("vpinsrq", _XMM3, _XMM3, _REG64 | _MEM, IMM8);
+ put("vpextrb", _REG64 | _MEM, XMM, IMM8);
+ put("vpextrw", _REG64 | _MEM, XMM, IMM8);
+ put("vpextrd", _REG32 | _MEM, XMM, IMM8);
+ put("vpextrq", _REG64 | _MEM, XMM, IMM8);
+ put("vpinsrb", XMM, _XMM3, _REG32 | _MEM, IMM8);
+ put("vpinsrw", XMM, _XMM3, _REG32 | _MEM, IMM8);
+ put("vpinsrd", XMM, _XMM3, _REG32 | _MEM, IMM8);
+ put("vpinsrq", XMM, _XMM3, _REG64 | _MEM, IMM8);
#endif
}
void put512_FMA()
@@ -1346,7 +1348,7 @@ public:
} else if (suf == "ps") {
mem = M_1to4;
}
- put(p, _XMM3 | XMM_KZ, _XMM, mem | _MEM);
+ put(p, XMM | XMM_KZ, _XMM, mem | _MEM);
if (!sufTbl[j].supportYMM) continue;
mem = 0;
if (suf == "pd") {
@@ -1467,23 +1469,23 @@ public:
put("vcvtqq2ps", XMM_KZ, _YMM | M_yword | MY_1to4);
put("vcvtqq2ps", YMM_KZ, ZMM | _MEM | M_1to8 | ZMM_ER);
- put("vcvtsd2si", REG32 | REG64, _XMM3 | _MEM | XMM_ER);
+ put("vcvtsd2si", REG32 | REG64, XMM | _MEM | XMM_ER);
- put("vcvtsd2usi", REG32 | REG64, _XMM3 | _MEM | XMM_ER);
+ put("vcvtsd2usi", REG32 | REG64, XMM | _MEM | XMM_ER);
- put("vcvtsd2ss", XMM_KZ, _XMM3, _XMM3 | _MEM | XMM_ER);
+ put("vcvtsd2ss", XMM_KZ, XMM, _XMM3 | _MEM | XMM_ER);
- put("vcvtsi2sd", _XMM3, _XMM3, REG32 | REG64 | MEM32 | MEM64);
+ put("vcvtsi2sd", XMM, _XMM3, REG32 | REG64 | MEM32 | MEM64);
put("vcvtsi2sd", XMM, XMM_ER, REG64);
- put("vcvtsi2ss", _XMM3, _XMM3, REG32 | REG64 | MEM32 | MEM64);
+ put("vcvtsi2ss", XMM, _XMM3, REG32 | REG64 | MEM32 | MEM64);
put("vcvtsi2ss", XMM, XMM_ER, REG32 | REG64);
- put("vcvtss2sd", XMM_KZ, _XMM3, _XMM3 | _MEM | XMM_SAE);
+ put("vcvtss2sd", XMM_KZ, XMM, _XMM3 | _MEM | XMM_SAE);
- put("vcvtss2si", REG32 | REG64, _XMM3 | _MEM | XMM_ER);
+ put("vcvtss2si", REG32 | REG64, XMM | _MEM | XMM_ER);
- put("vcvtss2usi", REG32 | REG64, _XMM3 | _MEM | XMM_ER);
+ put("vcvtss2usi", REG32 | REG64, XMM | _MEM | XMM_ER);
put("vcvtpd2dq", XMM_KZ, _XMM | M_xword | M_1to2);
put("vcvtpd2dq", XMM_KZ, _YMM | M_yword | MY_1to4);
@@ -1517,13 +1519,13 @@ public:
put("vcvttps2uqq", YMM_KZ, _XMM | _MEM | M_1to4);
put("vcvttps2uqq", ZMM_KZ, _YMM | _MEM | M_1to8 | YMM_SAE);
- put("vcvttsd2si", REG32 | REG64, _XMM3 | _MEM | XMM_SAE);
+ put("vcvttsd2si", REG32 | REG64, XMM | _MEM | XMM_SAE);
- put("vcvttsd2usi", REG32 | REG64, _XMM3 | _MEM | XMM_SAE);
+ put("vcvttsd2usi", REG32 | REG64, XMM | _MEM | XMM_SAE);
- put("vcvttss2si", REG32 | REG64, _XMM3 | _MEM | XMM_SAE);
+ put("vcvttss2si", REG32 | REG64, XMM | _MEM | XMM_SAE);
- put("vcvttss2usi", REG32 | REG64, _XMM3 | _MEM | XMM_SAE);
+ put("vcvttss2usi", REG32 | REG64, XMM | _MEM | XMM_SAE);
put("vcvtudq2pd", XMM_KZ, _XMM | _MEM | M_1to2);
put("vcvtudq2pd", YMM_KZ, _XMM | _MEM | M_1to4);
@@ -1541,10 +1543,10 @@ public:
put("vcvtuqq2ps", XMM_KZ, _YMM | M_yword | MY_1to4);
put("vcvtuqq2ps", YMM_KZ, ZMM | _MEM | M_1to8 | ZMM_ER);
- put("vcvtusi2sd", _XMM3, _XMM3, REG32 | REG64 | MEM32 | MEM64);
+ put("vcvtusi2sd", XMM, _XMM3, REG32 | REG64 | MEM32 | MEM64);
put("vcvtusi2sd", XMM, XMM_ER, REG64);
- put("vcvtusi2ss", _XMM3, _XMM3, REG32 | REG64 | MEM32 | MEM64);
+ put("vcvtusi2ss", XMM, _XMM3, REG32 | REG64 | MEM32 | MEM64);
put("vcvtusi2ss", XMM, XMM_ER, REG32 | REG64);
#endif
}
diff --git a/xbyak/xbyak.h b/xbyak/xbyak.h
index cf52e32..42974e3 100644
--- a/xbyak/xbyak.h
+++ b/xbyak/xbyak.h
@@ -105,7 +105,7 @@ namespace Xbyak {
enum {
DEFAULT_MAX_CODE_SIZE = 4096,
- VERSION = 0x5720 /* 0xABCD = A.BC(D) */
+ VERSION = 0x5730 /* 0xABCD = A.BC(D) */
};
#ifndef MIE_INTEGER_TYPE_DEFINED
@@ -1476,6 +1476,7 @@ private:
T_B64 = 1 << 27, // m64bcst
T_M_K = 1 << 28, // mem{k}
T_VSIB = 1 << 29,
+ T_MEM_EVEX = 1 << 30, // use evex if mem
T_XXX
};
void vex(const Reg& reg, const Reg& base, const Operand *v, int type, int code, bool x = false)
@@ -1952,7 +1953,7 @@ private:
if (BIT == 64 && addr.is32bit()) db(0x67);
int disp8N = 0;
bool x = index.isExtIdx();
- if ((type & T_MUST_EVEX) || r.hasEvex() || (p1 && p1->hasEvex()) || addr.isBroadcast() || addr.getOpmaskIdx()) {
+ if ((type & (T_MUST_EVEX|T_MEM_EVEX)) || r.hasEvex() || (p1 && p1->hasEvex()) || addr.isBroadcast() || addr.getOpmaskIdx()) {
int aaa = addr.getOpmaskIdx();
if (aaa && !(type & T_M_K)) throw Error(ERR_INVALID_OPMASK_WITH_MEMORY);
bool b = false;
diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h
index 6111619..92a31f9 100644
--- a/xbyak/xbyak_mnemonic.h
+++ b/xbyak/xbyak_mnemonic.h
@@ -1,4 +1,4 @@
-const char *getVersionString() const { return "5.72"; }
+const char *getVersionString() const { return "5.73"; }
void adc(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x10, 2); }
void adc(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x10); }
void adcx(const Reg32e& reg, const Operand& op) { opGen(reg, op, 0xF6, 0x66, isREG32_REG32orMEM, NONE, 0x38); }
@@ -1206,28 +1206,28 @@ void vpshuflw(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm,
void vpsignb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x08); }
void vpsignd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x0A); }
void vpsignw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x09); }
-void vpslld(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 6), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32, 0x72, imm); }
+void vpslld(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 6), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32 | T_MEM_EVEX, 0x72, imm); }
void vpslld(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_EW0 | T_YMM | T_EVEX, 0xF2); }
-void vpslldq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 7), x, op, T_66 | T_0F | T_YMM | T_EVEX, 0x73, imm); }
-void vpsllq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 6), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_B64, 0x73, imm); }
+void vpslldq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 7), x, op, T_66 | T_0F | T_YMM | T_EVEX | T_MEM_EVEX, 0x73, imm); }
+void vpsllq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 6), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_B64 | T_MEM_EVEX, 0x73, imm); }
void vpsllq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_EW1 | T_YMM | T_EVEX, 0xF3); }
void vpsllvd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x47); }
void vpsllvq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W1 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x47); }
-void vpsllw(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 6), x, op, T_66 | T_0F | T_YMM | T_EVEX, 0x71, imm); }
+void vpsllw(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 6), x, op, T_66 | T_0F | T_YMM | T_EVEX | T_MEM_EVEX, 0x71, imm); }
void vpsllw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_YMM | T_EVEX, 0xF1); }
-void vpsrad(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 4), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32, 0x72, imm); }
+void vpsrad(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 4), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32 | T_MEM_EVEX, 0x72, imm); }
void vpsrad(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_EW0 | T_YMM | T_EVEX, 0xE2); }
void vpsravd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x46); }
-void vpsraw(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 4), x, op, T_66 | T_0F | T_YMM | T_EVEX, 0x71, imm); }
+void vpsraw(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 4), x, op, T_66 | T_0F | T_YMM | T_EVEX | T_MEM_EVEX, 0x71, imm); }
void vpsraw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_YMM | T_EVEX, 0xE1); }
-void vpsrld(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 2), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32, 0x72, imm); }
+void vpsrld(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 2), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32 | T_MEM_EVEX, 0x72, imm); }
void vpsrld(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_EW0 | T_YMM | T_EVEX, 0xD2); }
-void vpsrldq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 3), x, op, T_66 | T_0F | T_YMM | T_EVEX, 0x73, imm); }
-void vpsrlq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 2), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_B64, 0x73, imm); }
+void vpsrldq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 3), x, op, T_66 | T_0F | T_YMM | T_EVEX | T_MEM_EVEX, 0x73, imm); }
+void vpsrlq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 2), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_B64 | T_MEM_EVEX, 0x73, imm); }
void vpsrlq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_EW1 | T_YMM | T_EVEX, 0xD3); }
void vpsrlvd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x45); }
void vpsrlvq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W1 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x45); }
-void vpsrlw(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 2), x, op, T_66 | T_0F | T_YMM | T_EVEX, 0x71, imm); }
+void vpsrlw(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 2), x, op, T_66 | T_0F | T_YMM | T_EVEX | T_MEM_EVEX, 0x71, imm); }
void vpsrlw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_YMM | T_EVEX, 0xD1); }
void vpsubb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xF8); }
void vpsubd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32, 0xFA); }