diff options
author | MITSUNARI Shigeo <[email protected]> | 2018-09-19 15:45:15 +0900 |
---|---|---|
committer | MITSUNARI Shigeo <[email protected]> | 2018-09-19 15:45:15 +0900 |
commit | 42462ef922893f0d3f2156d005fa27ba6898498b (patch) | |
tree | 4f1cd4d28f6136130ab51967a0b6aa78ff887719 | |
parent | da9117a93f4d3a9a00288b2acc8452f72151aad1 (diff) | |
download | xbyak-42462ef922893f0d3f2156d005fa27ba6898498b.tar.gz xbyak-42462ef922893f0d3f2156d005fa27ba6898498b.zip |
use evex encoding for vpslld/vpslldq/vpsraw/...(reg, mem, imm);v5.73
-rw-r--r-- | gen/avx_type.hpp | 5 | ||||
-rw-r--r-- | gen/gen_code.cpp | 20 | ||||
-rw-r--r-- | readme.md | 3 | ||||
-rw-r--r-- | readme.txt | 3 | ||||
-rw-r--r-- | test/make_512.cpp | 244 | ||||
-rw-r--r-- | xbyak/xbyak.h | 5 | ||||
-rw-r--r-- | xbyak/xbyak_mnemonic.h | 22 |
7 files changed, 156 insertions, 146 deletions
diff --git a/gen/avx_type.hpp b/gen/avx_type.hpp index 6f51166..a659699 100644 --- a/gen/avx_type.hpp +++ b/gen/avx_type.hpp @@ -37,6 +37,7 @@ T_B64 = 1 << 27, // m64bcst T_M_K = 1 << 28, // mem{k} T_VSIB = 1 << 29, + T_MEM_EVEX = 1 << 30, // use evex if mem T_XXX }; @@ -161,5 +162,9 @@ std::string type2String(int type) if (!str.empty()) str += " | "; str += "T_VSIB"; } + if (type & T_MEM_EVEX) { + if (!str.empty()) str += " | "; + str += "T_MEM_EVEX"; + } return str; } diff --git a/gen/gen_code.cpp b/gen/gen_code.cpp index 70db4fe..43984c0 100644 --- a/gen/gen_code.cpp +++ b/gen/gen_code.cpp @@ -1491,16 +1491,16 @@ void put() int idx; int type; } tbl[] = { - { "pslldq", 0x73, 7, T_0F | T_66 | T_YMM | T_EVEX }, - { "psrldq", 0x73, 3, T_0F | T_66 | T_YMM | T_EVEX }, - { "psllw", 0x71, 6, T_0F | T_66 | T_YMM | T_EVEX }, - { "pslld", 0x72, 6, T_0F | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32 }, - { "psllq", 0x73, 6, T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64 }, - { "psraw", 0x71, 4, T_0F | T_66 | T_YMM | T_EVEX }, - { "psrad", 0x72, 4, T_0F | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32 }, - { "psrlw", 0x71, 2, T_0F | T_66 | T_YMM | T_EVEX }, - { "psrld", 0x72, 2, T_0F | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32 }, - { "psrlq", 0x73, 2, T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64 }, + { "pslldq", 0x73, 7, T_0F | T_66 | T_YMM | T_EVEX | T_MEM_EVEX }, + { "psrldq", 0x73, 3, T_0F | T_66 | T_YMM | T_EVEX | T_MEM_EVEX }, + { "psllw", 0x71, 6, T_0F | T_66 | T_YMM | T_EVEX | T_MEM_EVEX }, + { "pslld", 0x72, 6, T_0F | T_66 | T_YMM | T_EVEX | T_MEM_EVEX | T_EW0 | T_B32 }, + { "psllq", 0x73, 6, T_0F | T_66 | T_YMM | T_EVEX | T_MEM_EVEX | T_EW1 | T_B64 }, + { "psraw", 0x71, 4, T_0F | T_66 | T_YMM | T_EVEX | T_MEM_EVEX }, + { "psrad", 0x72, 4, T_0F | T_66 | T_YMM | T_EVEX | T_MEM_EVEX | T_EW0 | T_B32 }, + { "psrlw", 0x71, 2, T_0F | T_66 | T_YMM | T_EVEX | T_MEM_EVEX }, + { "psrld", 0x72, 2, T_0F | T_66 | T_YMM | T_EVEX | T_MEM_EVEX | T_EW0 | T_B32 }, + { "psrlq", 0x73, 2, T_0F | T_66 | T_YMM | T_EVEX | T_MEM_EVEX | T_EW1 | T_B64 }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl& p = tbl[i]; @@ -1,5 +1,5 @@ -# Xbyak 5.72 ; JIT assembler for x86(IA32), x64(AMD64, x86-64) by C++ +# Xbyak 5.73 ; JIT assembler for x86(IA32), x64(AMD64, x86-64) by C++ ## Abstract @@ -392,6 +392,7 @@ modified new BSD License http://opensource.org/licenses/BSD-3-Clause ## History +* 2018/Sep/19 ver 5.73 fix evex encoding of vpslld, vpslldq, vpsllw, etc for (reg, mem, imm8) * 2018/Sep/19 ver 5.72 fix the encoding of vinsertps for disp8N(Thanks to petercaday) * 2018/Sep/04 ver 5.71 L() returns a new label instance * 2018/Aug/27 ver 5.70 support setProtectMode() and DontUseProtect for read/exec setting @@ -1,5 +1,5 @@ - C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 5.72
+ C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 5.73
-----------------------------------------------------------------------------
◎概要
@@ -373,6 +373,7 @@ sample/{echo,hello}.bfは http://www.kmonos.net/alang/etc/brainfuck.php から -----------------------------------------------------------------------------
◎履歴
+2018/09/19 ver 5.73 vpslld, vpslldq, vpsllwなどの(reg, mem, imm8)に対するevexエンコーディング修整
2018/09/19 ver 5.72 fix the encoding of vinsertps for disp8N(Thanks to petercaday)
2018/08/27 ver 5.71 新しいlabelインスタンスを返すL()を追加
2018/08/27 ver 5.70 read/exec設定のためのsetProtectMode()とDontUseProtectの追加
diff --git a/test/make_512.cpp b/test/make_512.cpp index 38f989c..49d082c 100644 --- a/test/make_512.cpp +++ b/test/make_512.cpp @@ -73,7 +73,6 @@ const uint64 YMM_ER = 1ULL << 36; const uint64 VM32Y_K = 1ULL << 37; const uint64 IMM_2 = 1ULL << 38; const uint64 IMM = IMM_1 | IMM_2; -const uint64 XMM = _XMM | _XMM2; const uint64 YMM = _YMM | _YMM2; const uint64 K = 1ULL << 43; const uint64 _ZMM = 1ULL << 44; @@ -90,7 +89,10 @@ const uint64 ZMM_SAE = 1ULL << 48; const uint64 ZMM_ER = 1ULL << 49; #ifdef XBYAK64 const uint64 _XMM3 = 1ULL << 50; +#else +const uint64 _XMM3 = 0; #endif +const uint64 XMM = _XMM | _XMM2 | _XMM3; const uint64 XMM_SAE = 1ULL << 51; #ifdef XBYAK64 const uint64 XMM_KZ = 1ULL << 52; @@ -608,7 +610,7 @@ public: }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; - put(p->name, K, _XMM, _XMM | MEM, IMM8); + put(p->name, K, XMM, _XMM | MEM, IMM8); if (!p->supportYMM) continue; put(p->name, K, _YMM, _YMM | MEM, IMM8); put(p->name, K, _ZMM, _ZMM | MEM, IMM8); @@ -627,10 +629,10 @@ public: }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; - put(p->name, XMM | _XMM3, XMM_SAE | XMM | MEM); + put(p->name, XMM, XMM_SAE | XMM | MEM); } } - put("vcomiss", _XMM3, XMM | MEM); + put("vcomiss", XMM, _XMM3 | MEM); put("vcomiss", XMM, XMM_SAE); #endif } @@ -674,10 +676,10 @@ public: "vpbroadcastq", }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { - put(tbl[i], XMM_KZ | ZMM_KZ, _XMM | _MEM); + put(tbl[i], XMM_KZ | ZMM_KZ, XMM | _MEM); } } - put("vbroadcasti32x2", XMM_KZ | YMM_KZ | ZMM_KZ, _XMM | _MEM); + put("vbroadcasti32x2", XMM_KZ | YMM_KZ | ZMM_KZ, XMM | _MEM); put("vbroadcasti32x4", YMM_KZ | ZMM_KZ, _MEM); put("vbroadcasti64x2", YMM_KZ | ZMM_KZ, _MEM); put("vbroadcasti32x8", ZMM_KZ, _MEM); @@ -685,14 +687,14 @@ public: } void putMisc1() { - put("vmaskmovps", XMM, XMM, MEM); + put("vmaskmovps", _XMM, _XMM, MEM); put("vmaskmovps", YMM, YMM, MEM); put("vmaskmovpd", YMM, YMM, MEM); - put("vmaskmovpd", XMM, XMM, MEM); + put("vmaskmovpd", _XMM, _XMM, MEM); - put("vmaskmovps", MEM, XMM, XMM); - put("vmaskmovpd", MEM, XMM, XMM); + put("vmaskmovps", MEM, _XMM, _XMM); + put("vmaskmovpd", MEM, _XMM, _XMM); put("vbroadcastf128", YMM, MEM); put("vbroadcasti128", YMM, MEM); @@ -711,8 +713,8 @@ public: } } - put("vinsertf128", YMM, YMM, XMM | MEM, IMM8); - put("vinserti128", YMM, YMM, XMM | MEM, IMM8); + put("vinsertf128", YMM, YMM, _XMM | _XMM2 | MEM, IMM8); + put("vinserti128", YMM, YMM, _XMM | _XMM2 | MEM, IMM8); put("vperm2f128", YMM, YMM, YMM | MEM, IMM8); put("vperm2i128", YMM, YMM, YMM | MEM, IMM8); @@ -722,9 +724,9 @@ public: }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const char *name = tbl[i]; - put(name, XMM, XMM, MEM); + put(name, _XMM, _XMM, MEM); put(name, YMM, YMM, MEM); - put(name, MEM, XMM, XMM); + put(name, MEM, _XMM, _XMM); put(name, MEM, YMM, YMM); } } @@ -761,29 +763,29 @@ public: put(name, MEM, ZMM); put(name, ZMM, MEM); #ifdef XBYAK64 - put(name, MEM, _XMM3); - put(name, _XMM3, MEM); + put(name, MEM, XMM); + put(name, XMM, MEM); #endif } } void put_vmov() { #ifdef XBYAK64 - put("vmovd", _XMM3, MEM|REG32); - put("vmovd", MEM|REG32, _XMM3); - put("vmovq", _XMM3, MEM|REG64|XMM); - put("vmovq", MEM|REG64|XMM, _XMM3); - put("vmovhlps", _XMM3, _XMM3, _XMM3); - put("vmovlhps", _XMM3, _XMM3, _XMM3); - put("vmovntdqa", _XMM3|_YMM3|ZMM, MEM); - put("vmovntdq", MEM, _XMM3 | _YMM3 | ZMM); - put("vmovntpd", MEM, _XMM3 | _YMM3 | ZMM); - put("vmovntps", MEM, _XMM3 | _YMM3 | ZMM); - - put("vmovsd", XMM_KZ, _XMM3, _XMM3); + put("vmovd", XMM, MEM|REG32); + put("vmovd", MEM|REG32, XMM); + put("vmovq", XMM, MEM|REG64|XMM); + put("vmovq", MEM|REG64|XMM, XMM); + put("vmovhlps", XMM, _XMM3, _XMM3); + put("vmovlhps", XMM, _XMM3, _XMM3); + put("vmovntdqa", XMM|_YMM3|ZMM, MEM); + put("vmovntdq", MEM, XMM | _YMM3 | ZMM); + put("vmovntpd", MEM, XMM | _YMM3 | ZMM); + put("vmovntps", MEM, XMM | _YMM3 | ZMM); + + put("vmovsd", XMM_KZ, XMM, _XMM3); put("vmovsd", XMM_KZ, MEM); put("vmovsd", MEM_K, XMM); - put("vmovss", XMM_KZ, _XMM3, _XMM3); + put("vmovss", XMM_KZ, XMM, _XMM3); put("vmovss", XMM_KZ, MEM); put("vmovss", MEM_K, XMM); @@ -798,7 +800,7 @@ public: }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const char *name = tbl[i]; - put(name, XMM_KZ, _XMM, _XMM | MEM, IMM); + put(name, XMM_KZ, XMM, _XMM | MEM, IMM); put(name, _YMM3, _YMM3, _YMM3 | _MEM, IMM); put(name, _ZMM, _ZMM, _ZMM | _MEM, IMM); } @@ -811,7 +813,7 @@ public: "vmovlps", }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { - put(tbl[i], _XMM3, _XMM3, MEM); + put(tbl[i], XMM, _XMM3, MEM); put(tbl[i], MEM, _XMM3); } } @@ -837,11 +839,11 @@ public: }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl& p = tbl[i]; - put(p.name, _XMM|XMM_KZ, _XMM|MEM); + put(p.name, XMM|XMM_KZ, _XMM|MEM); put(p.name, _YMM|YMM_KZ, _YMM|MEM); put(p.name, _ZMM|ZMM_KZ, _ZMM|MEM); if (!p.M_X) continue; - put(p.name, MEM|MEM_K, _XMM); + put(p.name, MEM|MEM_K, XMM); put(p.name, MEM|MEM_K, _YMM); put(p.name, MEM|MEM_K, _ZMM); } @@ -858,7 +860,7 @@ public: put("vpabsd", ZMM_KZ, M_1to16 | _MEM); put("vpabsq", ZMM_KZ, M_1to8 | _MEM); - put("vbroadcastf32x2", YMM_KZ | ZMM_KZ, _XMM | _MEM); + put("vbroadcastf32x2", YMM_KZ | ZMM_KZ, XMM | _MEM); put("vbroadcastf32x4", YMM_KZ | ZMM_KZ, _MEM); put("vbroadcastf64x2", YMM_KZ | ZMM_KZ, _MEM); @@ -880,7 +882,7 @@ public: }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl& p = tbl[i]; - put(p.name, XMM_KZ, _XMM, _XMM|p.mem); + put(p.name, XMM_KZ, XMM, _XMM|p.mem); } } void put512_X3() @@ -892,54 +894,54 @@ public: uint64_t x2; uint64_t xm; } tbl[] = { - { "vpacksswb", XMM_KZ, _XMM, _XMM | _MEM }, + { "vpacksswb", XMM_KZ, XMM, _XMM | _MEM }, { "vpacksswb", YMM_KZ, _YMM, _YMM | _MEM }, { "vpacksswb", ZMM_KZ, _ZMM, _ZMM | _MEM }, - { "vpackssdw", XMM_KZ, _XMM, _XMM | M_1to4 | _MEM }, + { "vpackssdw", XMM_KZ, XMM, _XMM | M_1to4 | _MEM }, { "vpackssdw", YMM_KZ, _YMM, _YMM | M_1to8 | _MEM }, { "vpackssdw", ZMM_KZ, _ZMM, _ZMM | M_1to16 | _MEM }, - { "vpackusdw", XMM_KZ, _XMM, _XMM | M_1to4 | _MEM }, + { "vpackusdw", XMM_KZ, XMM, _XMM | M_1to4 | _MEM }, { "vpackusdw", YMM_KZ, _YMM, _YMM | M_1to8 | _MEM }, { "vpackusdw", ZMM_KZ, _ZMM, _ZMM | M_1to16 | _MEM }, - { "vpackuswb", XMM_KZ, _XMM, _XMM | _MEM }, + { "vpackuswb", XMM_KZ, XMM, _XMM | _MEM }, { "vpackuswb", YMM_KZ, _YMM, _YMM | _MEM }, { "vpackuswb", ZMM_KZ, _ZMM, _ZMM | _MEM }, - { "vpaddb", XMM_KZ, _XMM, _XMM | _MEM }, + { "vpaddb", XMM_KZ, XMM, _XMM | _MEM }, { "vpaddw", XMM_KZ, _XMM, _XMM | _MEM }, { "vpaddd", XMM_KZ, _XMM, _XMM | M_1to4 | _MEM }, { "vpaddq", ZMM_KZ, _ZMM, M_1to8 | _MEM }, - { "vpaddsb", XMM_KZ, _XMM, _XMM | _MEM }, + { "vpaddsb", XMM_KZ, XMM, _XMM | _MEM }, { "vpaddsb", ZMM_KZ, _ZMM, _ZMM | _MEM }, - { "vpaddsw", XMM_KZ, _XMM, _XMM | _MEM }, + { "vpaddsw", XMM_KZ, XMM, _XMM | _MEM }, { "vpaddsw", ZMM_KZ, _ZMM, _ZMM | _MEM }, - { "vpaddusb", XMM_KZ, _XMM, _XMM | MEM }, + { "vpaddusb", XMM_KZ, XMM, _XMM | MEM }, { "vpaddusb", ZMM_KZ, _ZMM, _ZMM | MEM }, - { "vpaddusw", XMM_KZ, _XMM, _XMM | MEM }, + { "vpaddusw", XMM_KZ, XMM, _XMM | MEM }, { "vpaddusw", ZMM_KZ, _ZMM, _ZMM | MEM }, - { "vpsubb", XMM_KZ, _XMM, _XMM | _MEM }, - { "vpsubw", XMM_KZ, _XMM, _XMM | _MEM }, - { "vpsubd", XMM_KZ, _XMM, _XMM | M_1to4 | _MEM }, + { "vpsubb", XMM_KZ, XMM, _XMM | _MEM }, + { "vpsubw", XMM_KZ, XMM, _XMM | _MEM }, + { "vpsubd", XMM_KZ, XMM, _XMM | M_1to4 | _MEM }, { "vpsubq", ZMM_KZ, _ZMM, M_1to8 | _MEM }, - { "vpsubsb", XMM_KZ, _XMM, _XMM | _MEM }, + { "vpsubsb", XMM_KZ, XMM, _XMM | _MEM }, { "vpsubsb", ZMM_KZ, _ZMM, _ZMM | _MEM }, - { "vpsubsw", XMM_KZ, _XMM, _XMM | _MEM }, + { "vpsubsw", XMM_KZ, XMM, _XMM | _MEM }, { "vpsubsw", ZMM_KZ, _ZMM, _ZMM | _MEM }, - { "vpsubusb", XMM_KZ, _XMM, _XMM | MEM }, + { "vpsubusb", XMM_KZ, XMM, _XMM | MEM }, { "vpsubusb", ZMM_KZ, _ZMM, _ZMM | MEM }, - { "vpsubusw", XMM_KZ, _XMM, _XMM | MEM }, + { "vpsubusw", XMM_KZ, XMM, _XMM | MEM }, { "vpsubusw", ZMM_KZ, _ZMM, _ZMM | MEM }, { "vpandd", ZMM_KZ, _ZMM, _ZMM | M_1to16 | _MEM }, @@ -984,137 +986,137 @@ public: { "vpminud", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to16 }, { "vpminuq", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to8 }, - { "vpslldq", _XMM3, _XMM3 | _MEM, IMM8 }, + { "vpslldq", XMM, _XMM3 | _MEM, IMM8 }, { "vpslldq", _YMM3, _YMM3 | _MEM, IMM8 }, { "vpslldq", _ZMM, _ZMM | _MEM, IMM8 }, - { "vpsrldq", _XMM3, _XMM3 | _MEM, IMM8 }, + { "vpsrldq", XMM, _XMM3 | _MEM, IMM8 }, { "vpsrldq", _YMM3, _YMM3 | _MEM, IMM8 }, { "vpsrldq", _ZMM, _ZMM | _MEM, IMM8 }, - { "vpsraw", XMM_KZ, _XMM | _MEM, IMM8 }, + { "vpsraw", XMM_KZ, XMM | _MEM, IMM8 }, { "vpsraw", ZMM_KZ, _ZMM | _MEM, IMM8 }, - { "vpsrad", XMM_KZ, _XMM | M_1to4 | _MEM, IMM8 }, + { "vpsrad", XMM_KZ, XMM | M_1to4 | _MEM, IMM8 }, { "vpsrad", ZMM_KZ, _ZMM | M_1to16 | _MEM, IMM8 }, { "vpsraq", XMM, XMM, IMM8 }, - { "vpsraq", XMM_KZ, _XMM | M_1to2 | _MEM, IMM8 }, + { "vpsraq", XMM_KZ, XMM | M_1to2 | _MEM, IMM8 }, { "vpsraq", ZMM_KZ, _ZMM | M_1to8 | _MEM, IMM8 }, - { "vpsllw", _XMM3, _XMM3 | _MEM, IMM8 }, - { "vpslld", _XMM3, _XMM3 | _MEM | M_1to4, IMM8 }, - { "vpsllq", _XMM3, _XMM3 | _MEM | M_1to2, IMM8 }, + { "vpsllw", XMM, _XMM3 | _MEM, IMM8 }, + { "vpslld", XMM, _XMM3 | _MEM | M_1to4, IMM8 }, + { "vpsllq", XMM, _XMM3 | _MEM | M_1to2, IMM8 }, - { "vpsrlw", XMM_KZ, _XMM | _MEM, IMM8 }, + { "vpsrlw", XMM_KZ, XMM | _MEM, IMM8 }, { "vpsrlw", ZMM_KZ, _ZMM | _MEM, IMM8 }, - { "vpsrld", XMM_KZ, _XMM | M_1to4 | _MEM, IMM8 }, + { "vpsrld", XMM_KZ, XMM | M_1to4 | _MEM, IMM8 }, { "vpsrld", ZMM_KZ, _ZMM | M_1to16 | _MEM, IMM8 }, - { "vpsrlq", _XMM3, _XMM3 | _MEM | M_1to2, IMM8 }, + { "vpsrlq", XMM, _XMM3 | _MEM | M_1to2, IMM8 }, { "vpsrlq", _ZMM, _ZMM | _MEM | M_1to8, IMM8 }, - { "vpsravw", XMM_KZ | _XMM, _XMM, _XMM | _MEM }, + { "vpsravw", XMM_KZ | XMM, _XMM, _XMM | _MEM }, { "vpsravw", _ZMM, _ZMM, _MEM }, - { "vpsravd", XMM_KZ | _XMM, _XMM, _XMM | _MEM }, + { "vpsravd", XMM_KZ | XMM, _XMM, _XMM | _MEM }, { "vpsravd", _ZMM, _ZMM, M_1to16 | _MEM }, - { "vpsravq", XMM_KZ | _XMM, _XMM, _XMM | _MEM }, + { "vpsravq", XMM_KZ | XMM, _XMM, _XMM | _MEM }, { "vpsravq", _ZMM, _ZMM, M_1to8 | _MEM }, - { "vpsllvw", XMM_KZ | _XMM, _XMM, _XMM | _MEM }, + { "vpsllvw", XMM_KZ | XMM, _XMM, _XMM | _MEM }, { "vpsllvw", _ZMM, _ZMM, _MEM }, - { "vpsllvd", XMM_KZ | _XMM, _XMM, _XMM | _MEM }, + { "vpsllvd", XMM_KZ | XMM, _XMM, _XMM | _MEM }, { "vpsllvd", _ZMM, _ZMM, M_1to16 | _MEM }, - { "vpsllvq", XMM_KZ | _XMM, _XMM, _XMM | _MEM }, + { "vpsllvq", XMM_KZ | XMM, _XMM, _XMM | _MEM }, { "vpsllvq", _ZMM, _ZMM, M_1to8 | _MEM }, - { "vpsrlvw", XMM_KZ | _XMM, _XMM, _XMM | _MEM }, + { "vpsrlvw", XMM_KZ | XMM, _XMM, _XMM | _MEM }, { "vpsrlvw", _ZMM, _ZMM, _MEM }, - { "vpsrlvd", XMM_KZ | _XMM, _XMM, _XMM | _MEM }, + { "vpsrlvd", XMM_KZ | XMM, _XMM, _XMM | _MEM }, { "vpsrlvd", _ZMM, _ZMM, M_1to16 | _MEM }, - { "vpsrlvq", XMM_KZ | _XMM, _XMM, _XMM | _MEM }, + { "vpsrlvq", XMM_KZ | XMM, _XMM, _XMM | _MEM }, { "vpsrlvq", _ZMM, _ZMM, M_1to8 | _MEM }, - { "vpshufb", _XMM | XMM_KZ, _XMM, _XMM | _MEM }, + { "vpshufb", XMM | XMM_KZ, _XMM, _XMM | _MEM }, { "vpshufb", ZMM_KZ, _ZMM, _MEM }, - { "vpshufhw", _XMM | XMM_KZ, _XMM | _MEM, IMM8 }, + { "vpshufhw", XMM | XMM_KZ, _XMM | _MEM, IMM8 }, { "vpshufhw", ZMM_KZ, _MEM, IMM8 }, - { "vpshuflw", _XMM | XMM_KZ, _XMM | _MEM, IMM8 }, + { "vpshuflw", XMM | XMM_KZ, _XMM | _MEM, IMM8 }, { "vpshuflw", ZMM_KZ, _MEM, IMM8 }, - { "vpshufd", _XMM | XMM_KZ, _XMM | M_1to4 | _MEM, IMM8 }, + { "vpshufd", XMM | XMM_KZ, _XMM | M_1to4 | _MEM, IMM8 }, { "vpshufd", _ZMM | ZMM_KZ, _ZMM | M_1to16 | _MEM, IMM8 }, - { "vpord", _XMM | XMM_KZ, _XMM, _XMM | M_1to4 | _MEM }, + { "vpord", XMM | XMM_KZ, _XMM, _XMM | M_1to4 | _MEM }, { "vpord", _ZMM | ZMM_KZ, _ZMM, M_1to16 | _MEM }, - { "vporq", _XMM | XMM_KZ, _XMM, _XMM | M_1to2 | _MEM }, + { "vporq", XMM | XMM_KZ, _XMM, _XMM | M_1to2 | _MEM }, { "vporq", _ZMM | ZMM_KZ, _ZMM, M_1to8 | _MEM }, - { "vpxord", _XMM | XMM_KZ, _XMM, _XMM | M_1to4 | _MEM }, + { "vpxord", XMM | XMM_KZ, _XMM, _XMM | M_1to4 | _MEM }, { "vpxord", _ZMM | ZMM_KZ, _ZMM, M_1to16 | _MEM }, - { "vpxorq", _XMM | XMM_KZ, _XMM, _XMM | M_1to2 | _MEM }, + { "vpxorq", XMM | XMM_KZ, _XMM, _XMM | M_1to2 | _MEM }, { "vpxorq", _ZMM | ZMM_KZ, _ZMM, M_1to8 | _MEM }, - { "vpsadbw", _XMM3, _XMM, _XMM | _MEM }, + { "vpsadbw", XMM, _XMM, _XMM | _MEM }, { "vpsadbw", _ZMM, _ZMM, _MEM }, - { "vpmuldq", _XMM3, _XMM, _XMM | M_1to2 | _MEM }, + { "vpmuldq", XMM, _XMM, _XMM | M_1to2 | _MEM }, { "vpmuldq", ZMM_KZ, _ZMM, M_1to8 | _MEM }, - { "vpmulhrsw", _XMM3, _XMM, _XMM | _MEM }, + { "vpmulhrsw", XMM, _XMM, _XMM | _MEM }, { "vpmulhrsw", ZMM_KZ, _ZMM, _MEM }, - { "vpmulhuw", _XMM3, _XMM, _XMM | _MEM }, + { "vpmulhuw", XMM, _XMM, _XMM | _MEM }, { "vpmulhuw", ZMM_KZ, _ZMM, _MEM }, - { "vpmulhw", _XMM3, _XMM, _XMM | _MEM }, + { "vpmulhw", XMM, _XMM, _XMM | _MEM }, { "vpmulhw", ZMM_KZ, _ZMM, _MEM }, - { "vpmullw", _XMM3, _XMM, _XMM | _MEM }, + { "vpmullw", XMM, _XMM, _XMM | _MEM }, { "vpmullw", ZMM_KZ, _ZMM, _MEM }, - { "vpmulld", _XMM3, _XMM, M_1to4 | _MEM }, + { "vpmulld", XMM, _XMM, M_1to4 | _MEM }, { "vpmulld", ZMM_KZ, _ZMM, M_1to16 | _MEM }, - { "vpmullq", _XMM3, _XMM, M_1to2 | _MEM }, + { "vpmullq", XMM, _XMM, M_1to2 | _MEM }, { "vpmullq", ZMM_KZ, _ZMM, M_1to8 | _MEM }, - { "vpmuludq", _XMM3, _XMM, M_1to2 | _MEM }, + { "vpmuludq", XMM, _XMM, M_1to2 | _MEM }, { "vpmuludq", ZMM_KZ, _ZMM, M_1to8 | _MEM }, - { "vpunpckhbw", _XMM3, _XMM, _XMM | _MEM }, + { "vpunpckhbw", XMM, _XMM, _XMM | _MEM }, { "vpunpckhbw", _ZMM, _ZMM, _MEM }, - { "vpunpckhwd", _XMM3, _XMM, _XMM | _MEM }, + { "vpunpckhwd", XMM, _XMM, _XMM | _MEM }, { "vpunpckhwd", _ZMM, _ZMM, _MEM }, - { "vpunpckhdq", _XMM3, _XMM, M_1to4 | _MEM }, + { "vpunpckhdq", XMM, _XMM, M_1to4 | _MEM }, { "vpunpckhdq", _ZMM, _ZMM, M_1to16 | _MEM }, - { "vpunpckhqdq", _XMM3, _XMM, M_1to2 | _MEM }, + { "vpunpckhqdq", XMM, _XMM, M_1to2 | _MEM }, { "vpunpckhqdq", _ZMM, _ZMM, M_1to8 | _MEM }, - { "vpunpcklbw", _XMM3, _XMM, _XMM | _MEM }, + { "vpunpcklbw", XMM, _XMM, _XMM | _MEM }, { "vpunpcklbw", _ZMM, _ZMM, _MEM }, - { "vpunpcklwd", _XMM3, _XMM, _XMM | _MEM }, + { "vpunpcklwd", XMM, _XMM, _XMM | _MEM }, { "vpunpcklwd", _ZMM, _ZMM, _MEM }, - { "vpunpckldq", _XMM3, _XMM, M_1to4 | _MEM }, + { "vpunpckldq", XMM, _XMM, M_1to4 | _MEM }, { "vpunpckldq", _ZMM, _ZMM, M_1to16 | _MEM }, - { "vpunpcklqdq", _XMM3, _XMM, M_1to2 | _MEM }, + { "vpunpcklqdq", XMM, _XMM, M_1to2 | _MEM }, { "vpunpcklqdq", _ZMM, _ZMM, M_1to8 | _MEM }, { "vextractf32x4", _XMM | XMM_KZ | _MEM, _YMM | _ZMM, IMM8 }, @@ -1127,7 +1129,7 @@ public: { "vextracti32x8", _YMM | YMM_KZ | _MEM, _ZMM, IMM8 }, { "vextracti64x4", _YMM | YMM_KZ | _MEM, _ZMM, IMM8 }, - { "vextractps", REG32 | _MEM, _XMM3, IMM8 }, + { "vextractps", REG32 | _MEM, XMM, IMM8 }, { "vpermb", XMM_KZ, _XMM, _XMM | _MEM }, { "vpermb", ZMM_KZ, _ZMM, _ZMM | _MEM }, @@ -1176,7 +1178,7 @@ public: uint64_t xm; } tbl[] = { #ifdef XBYAK64 - { "vinsertps", _XMM3, _XMM, _XMM3 | _MEM }, + { "vinsertps", XMM, _XMM, _XMM3 | _MEM }, { "vshufpd", XMM_KZ, _XMM, M_1to2 | _MEM }, { "vshufpd", ZMM_KZ, _ZMM, M_1to8 | _MEM }, @@ -1209,14 +1211,14 @@ public: put(p.name, p.x1, p.x2, p.xm, IMM8); } #ifdef XBYAK64 - put("vpextrb", _REG64 | _MEM, _XMM3, IMM8); - put("vpextrw", _REG64 | _MEM, _XMM3, IMM8); - put("vpextrd", _REG32 | _MEM, _XMM3, IMM8); - put("vpextrq", _REG64 | _MEM, _XMM3, IMM8); - put("vpinsrb", _XMM3, _XMM3, _REG32 | _MEM, IMM8); - put("vpinsrw", _XMM3, _XMM3, _REG32 | _MEM, IMM8); - put("vpinsrd", _XMM3, _XMM3, _REG32 | _MEM, IMM8); - put("vpinsrq", _XMM3, _XMM3, _REG64 | _MEM, IMM8); + put("vpextrb", _REG64 | _MEM, XMM, IMM8); + put("vpextrw", _REG64 | _MEM, XMM, IMM8); + put("vpextrd", _REG32 | _MEM, XMM, IMM8); + put("vpextrq", _REG64 | _MEM, XMM, IMM8); + put("vpinsrb", XMM, _XMM3, _REG32 | _MEM, IMM8); + put("vpinsrw", XMM, _XMM3, _REG32 | _MEM, IMM8); + put("vpinsrd", XMM, _XMM3, _REG32 | _MEM, IMM8); + put("vpinsrq", XMM, _XMM3, _REG64 | _MEM, IMM8); #endif } void put512_FMA() @@ -1346,7 +1348,7 @@ public: } else if (suf == "ps") { mem = M_1to4; } - put(p, _XMM3 | XMM_KZ, _XMM, mem | _MEM); + put(p, XMM | XMM_KZ, _XMM, mem | _MEM); if (!sufTbl[j].supportYMM) continue; mem = 0; if (suf == "pd") { @@ -1467,23 +1469,23 @@ public: put("vcvtqq2ps", XMM_KZ, _YMM | M_yword | MY_1to4); put("vcvtqq2ps", YMM_KZ, ZMM | _MEM | M_1to8 | ZMM_ER); - put("vcvtsd2si", REG32 | REG64, _XMM3 | _MEM | XMM_ER); + put("vcvtsd2si", REG32 | REG64, XMM | _MEM | XMM_ER); - put("vcvtsd2usi", REG32 | REG64, _XMM3 | _MEM | XMM_ER); + put("vcvtsd2usi", REG32 | REG64, XMM | _MEM | XMM_ER); - put("vcvtsd2ss", XMM_KZ, _XMM3, _XMM3 | _MEM | XMM_ER); + put("vcvtsd2ss", XMM_KZ, XMM, _XMM3 | _MEM | XMM_ER); - put("vcvtsi2sd", _XMM3, _XMM3, REG32 | REG64 | MEM32 | MEM64); + put("vcvtsi2sd", XMM, _XMM3, REG32 | REG64 | MEM32 | MEM64); put("vcvtsi2sd", XMM, XMM_ER, REG64); - put("vcvtsi2ss", _XMM3, _XMM3, REG32 | REG64 | MEM32 | MEM64); + put("vcvtsi2ss", XMM, _XMM3, REG32 | REG64 | MEM32 | MEM64); put("vcvtsi2ss", XMM, XMM_ER, REG32 | REG64); - put("vcvtss2sd", XMM_KZ, _XMM3, _XMM3 | _MEM | XMM_SAE); + put("vcvtss2sd", XMM_KZ, XMM, _XMM3 | _MEM | XMM_SAE); - put("vcvtss2si", REG32 | REG64, _XMM3 | _MEM | XMM_ER); + put("vcvtss2si", REG32 | REG64, XMM | _MEM | XMM_ER); - put("vcvtss2usi", REG32 | REG64, _XMM3 | _MEM | XMM_ER); + put("vcvtss2usi", REG32 | REG64, XMM | _MEM | XMM_ER); put("vcvtpd2dq", XMM_KZ, _XMM | M_xword | M_1to2); put("vcvtpd2dq", XMM_KZ, _YMM | M_yword | MY_1to4); @@ -1517,13 +1519,13 @@ public: put("vcvttps2uqq", YMM_KZ, _XMM | _MEM | M_1to4); put("vcvttps2uqq", ZMM_KZ, _YMM | _MEM | M_1to8 | YMM_SAE); - put("vcvttsd2si", REG32 | REG64, _XMM3 | _MEM | XMM_SAE); + put("vcvttsd2si", REG32 | REG64, XMM | _MEM | XMM_SAE); - put("vcvttsd2usi", REG32 | REG64, _XMM3 | _MEM | XMM_SAE); + put("vcvttsd2usi", REG32 | REG64, XMM | _MEM | XMM_SAE); - put("vcvttss2si", REG32 | REG64, _XMM3 | _MEM | XMM_SAE); + put("vcvttss2si", REG32 | REG64, XMM | _MEM | XMM_SAE); - put("vcvttss2usi", REG32 | REG64, _XMM3 | _MEM | XMM_SAE); + put("vcvttss2usi", REG32 | REG64, XMM | _MEM | XMM_SAE); put("vcvtudq2pd", XMM_KZ, _XMM | _MEM | M_1to2); put("vcvtudq2pd", YMM_KZ, _XMM | _MEM | M_1to4); @@ -1541,10 +1543,10 @@ public: put("vcvtuqq2ps", XMM_KZ, _YMM | M_yword | MY_1to4); put("vcvtuqq2ps", YMM_KZ, ZMM | _MEM | M_1to8 | ZMM_ER); - put("vcvtusi2sd", _XMM3, _XMM3, REG32 | REG64 | MEM32 | MEM64); + put("vcvtusi2sd", XMM, _XMM3, REG32 | REG64 | MEM32 | MEM64); put("vcvtusi2sd", XMM, XMM_ER, REG64); - put("vcvtusi2ss", _XMM3, _XMM3, REG32 | REG64 | MEM32 | MEM64); + put("vcvtusi2ss", XMM, _XMM3, REG32 | REG64 | MEM32 | MEM64); put("vcvtusi2ss", XMM, XMM_ER, REG32 | REG64); #endif } diff --git a/xbyak/xbyak.h b/xbyak/xbyak.h index cf52e32..42974e3 100644 --- a/xbyak/xbyak.h +++ b/xbyak/xbyak.h @@ -105,7 +105,7 @@ namespace Xbyak { enum { DEFAULT_MAX_CODE_SIZE = 4096, - VERSION = 0x5720 /* 0xABCD = A.BC(D) */ + VERSION = 0x5730 /* 0xABCD = A.BC(D) */ }; #ifndef MIE_INTEGER_TYPE_DEFINED @@ -1476,6 +1476,7 @@ private: T_B64 = 1 << 27, // m64bcst T_M_K = 1 << 28, // mem{k} T_VSIB = 1 << 29, + T_MEM_EVEX = 1 << 30, // use evex if mem T_XXX }; void vex(const Reg& reg, const Reg& base, const Operand *v, int type, int code, bool x = false) @@ -1952,7 +1953,7 @@ private: if (BIT == 64 && addr.is32bit()) db(0x67); int disp8N = 0; bool x = index.isExtIdx(); - if ((type & T_MUST_EVEX) || r.hasEvex() || (p1 && p1->hasEvex()) || addr.isBroadcast() || addr.getOpmaskIdx()) { + if ((type & (T_MUST_EVEX|T_MEM_EVEX)) || r.hasEvex() || (p1 && p1->hasEvex()) || addr.isBroadcast() || addr.getOpmaskIdx()) { int aaa = addr.getOpmaskIdx(); if (aaa && !(type & T_M_K)) throw Error(ERR_INVALID_OPMASK_WITH_MEMORY); bool b = false; diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h index 6111619..92a31f9 100644 --- a/xbyak/xbyak_mnemonic.h +++ b/xbyak/xbyak_mnemonic.h @@ -1,4 +1,4 @@ -const char *getVersionString() const { return "5.72"; } +const char *getVersionString() const { return "5.73"; } void adc(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x10, 2); } void adc(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x10); } void adcx(const Reg32e& reg, const Operand& op) { opGen(reg, op, 0xF6, 0x66, isREG32_REG32orMEM, NONE, 0x38); } @@ -1206,28 +1206,28 @@ void vpshuflw(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, void vpsignb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x08); } void vpsignd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x0A); } void vpsignw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x09); } -void vpslld(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 6), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32, 0x72, imm); } +void vpslld(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 6), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32 | T_MEM_EVEX, 0x72, imm); } void vpslld(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_EW0 | T_YMM | T_EVEX, 0xF2); } -void vpslldq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 7), x, op, T_66 | T_0F | T_YMM | T_EVEX, 0x73, imm); } -void vpsllq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 6), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_B64, 0x73, imm); } +void vpslldq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 7), x, op, T_66 | T_0F | T_YMM | T_EVEX | T_MEM_EVEX, 0x73, imm); } +void vpsllq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 6), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_B64 | T_MEM_EVEX, 0x73, imm); } void vpsllq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_EW1 | T_YMM | T_EVEX, 0xF3); } void vpsllvd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x47); } void vpsllvq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W1 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x47); } -void vpsllw(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 6), x, op, T_66 | T_0F | T_YMM | T_EVEX, 0x71, imm); } +void vpsllw(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 6), x, op, T_66 | T_0F | T_YMM | T_EVEX | T_MEM_EVEX, 0x71, imm); } void vpsllw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_YMM | T_EVEX, 0xF1); } -void vpsrad(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 4), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32, 0x72, imm); } +void vpsrad(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 4), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32 | T_MEM_EVEX, 0x72, imm); } void vpsrad(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_EW0 | T_YMM | T_EVEX, 0xE2); } void vpsravd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x46); } -void vpsraw(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 4), x, op, T_66 | T_0F | T_YMM | T_EVEX, 0x71, imm); } +void vpsraw(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 4), x, op, T_66 | T_0F | T_YMM | T_EVEX | T_MEM_EVEX, 0x71, imm); } void vpsraw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_YMM | T_EVEX, 0xE1); } -void vpsrld(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 2), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32, 0x72, imm); } +void vpsrld(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 2), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32 | T_MEM_EVEX, 0x72, imm); } void vpsrld(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_EW0 | T_YMM | T_EVEX, 0xD2); } -void vpsrldq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 3), x, op, T_66 | T_0F | T_YMM | T_EVEX, 0x73, imm); } -void vpsrlq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 2), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_B64, 0x73, imm); } +void vpsrldq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 3), x, op, T_66 | T_0F | T_YMM | T_EVEX | T_MEM_EVEX, 0x73, imm); } +void vpsrlq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 2), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_B64 | T_MEM_EVEX, 0x73, imm); } void vpsrlq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_EW1 | T_YMM | T_EVEX, 0xD3); } void vpsrlvd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x45); } void vpsrlvq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W1 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x45); } -void vpsrlw(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 2), x, op, T_66 | T_0F | T_YMM | T_EVEX, 0x71, imm); } +void vpsrlw(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 2), x, op, T_66 | T_0F | T_YMM | T_EVEX | T_MEM_EVEX, 0x71, imm); } void vpsrlw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_YMM | T_EVEX, 0xD1); } void vpsubb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xF8); } void vpsubd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32, 0xFA); } |