aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorMITSUNARI Shigeo <[email protected]>2016-07-22 07:28:53 +0900
committerMITSUNARI Shigeo <[email protected]>2016-07-22 07:28:53 +0900
commitfe6349a07ebc67190f350d799be32d179b2dbe8c (patch)
tree0c70ac06801844d6b2d62a43df1607a649718e7f
parent3a91688d1ff1166f4178806bc094a5a700bc809f (diff)
downloadxbyak-fe6349a07ebc67190f350d799be32d179b2dbe8c.tar.gz
xbyak-fe6349a07ebc67190f350d799be32d179b2dbe8c.zip
add vgetmant*, vscale* vrsqrt*
-rw-r--r--gen/gen_avx512.cpp54
-rw-r--r--test/make_512.cpp100
-rw-r--r--xbyak/xbyak_avx512.h28
3 files changed, 171 insertions, 11 deletions
diff --git a/gen/gen_avx512.cpp b/gen/gen_avx512.cpp
index 42b2709..715ed65 100644
--- a/gen/gen_avx512.cpp
+++ b/gen/gen_avx512.cpp
@@ -174,6 +174,8 @@ void putX_XM()
{ 0x89, "vpexpandd", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 },
{ 0x89, "vpexpandq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_N8 },
+ { 0x42, "vgetexppd", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_SAE_Z },
+ { 0x42, "vgetexpps", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_SAE_Z },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
@@ -280,6 +282,30 @@ void putX_X_XM_IMM()
{ 0x25, "vpternlogd", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, true },
{ 0x25, "vpternlogq", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, true },
+
+ { 0x43, "vgetexpsd", T_66 | T_0F38 | T_MUST_EVEX | T_EW1 | T_SAE_X | T_N8, false },
+ { 0x43, "vgetexpss", T_66 | T_0F38 | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N4, false },
+ { 0x27, "vgetmantsd", T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_SAE_X | T_N8, true },
+ { 0x27, "vgetmantss", T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N4, true },
+
+ { 0x54, "vfixupimmpd", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_SAE_Z, true },
+ { 0x54, "vfixupimmps", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_SAE_Z, true },
+ { 0x55, "vfixupimmsd", T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_N8, true },
+ { 0x55, "vfixupimmss", T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_N4, true },
+
+ { 0x4D, "vrcp14sd", T_66 | T_0F38 | T_MUST_EVEX | T_EW1 | T_N8, false },
+ { 0x4D, "vrcp14ss", T_66 | T_0F38 | T_MUST_EVEX | T_EW0 | T_N4, false },
+
+ { 0x4F, "vrsqrt14sd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N8, false },
+ { 0x4F, "vrsqrt14ss", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N4, false },
+
+ { 0x0B, "vrndscalesd", T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_N8, true },
+ { 0x0A, "vrndscaless", T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_N4, true },
+
+ { 0x2C, "vscalefpd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_ER_Z, false },
+ { 0x2C, "vscalefps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_ER_Z, false },
+ { 0x2D, "vscalefsd", T_66 | T_0F38 | T_MUST_EVEX | T_EW1 | T_ER_X | T_N8, false },
+ { 0x2D, "vscalefss", T_66 | T_0F38 | T_MUST_EVEX | T_EW0 | T_ER_X | T_N4, false },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
@@ -512,6 +538,33 @@ void putMov()
}
}
+void putX_XM_IMM()
+{
+ const struct Tbl {
+ uint8 code;
+ const char *name;
+ int type;
+ bool hasIMM;
+ } tbl[] = {
+ { 0x26, "vgetmantpd", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_SAE_Z, true },
+ { 0x26, "vgetmantps", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_SAE_Z, true },
+ { 0x4C, "vrcp14pd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
+ { 0x4C, "vrcp14ps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false },
+
+ { 0x4E, "vrsqrt14pd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
+ { 0x4E, "vrsqrt14ps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false },
+
+ { 0x09, "vrndscalepd", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, true },
+ { 0x08, "vrndscaleps", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, true },
+
+ };
+ for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
+ const Tbl *p = &tbl[i];
+ std::string type = type2String(p->type);
+ printf("void %s(const Xmm& x, const Operand& op%s) { opAVX_X_XM_IMM(x, op, %s, 0x%02X%s); }\n"
+ , p->name, p->hasIMM ? ", uint8 imm" : "", type.c_str(), p->code, p->hasIMM ? ", imm" : "");
+ }
+}
int main()
{
puts("#ifndef XBYAK_DISABLE_AVX512");
@@ -530,5 +583,6 @@ int main()
putGather();
putShuff();
putMov();
+ putX_XM_IMM();
puts("#endif");
}
diff --git a/test/make_512.cpp b/test/make_512.cpp
index db2d9d7..589ed6a 100644
--- a/test/make_512.cpp
+++ b/test/make_512.cpp
@@ -1742,16 +1742,6 @@ public:
put("vshufi64x2", YMM_KZ, _YMM, _YMM | _MEM | M_1to4, IMM8);
put("vshufi64x2", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to8, IMM8);
}
- void putMisc2()
- {
- put("vpternlogd", XMM_KZ, _XMM, _XMM | _MEM | M_1to4, IMM8);
- put("vpternlogd", YMM_KZ, _YMM, _YMM | _MEM | M_1to8, IMM8);
- put("vpternlogd", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to16, IMM8);
-
- put("vpternlogq", XMM_KZ, _XMM, _XMM | _MEM | M_1to2, IMM8);
- put("vpternlogq", YMM_KZ, _YMM, _YMM | _MEM | M_1to4, IMM8);
- put("vpternlogq", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to8, IMM8);
- }
void putMov()
{
put("vpmovm2b", _XMM | _YMM | _ZMM, K);
@@ -1803,10 +1793,98 @@ public:
put("vpmovuswb", XMM_KZ | _MEM, _XMM | _YMM);
put("vpmovuswb", YMM_KZ | _MEM, _ZMM);
}
+ void putMisc2()
+ {
+ put("vpternlogd", XMM_KZ, _XMM, _XMM | _MEM | M_1to4, IMM8);
+ put("vpternlogd", YMM_KZ, _YMM, _YMM | _MEM | M_1to8, IMM8);
+ put("vpternlogd", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to16, IMM8);
+
+ put("vpternlogq", XMM_KZ, _XMM, _XMM | _MEM | M_1to2, IMM8);
+ put("vpternlogq", YMM_KZ, _YMM, _YMM | _MEM | M_1to4, IMM8);
+ put("vpternlogq", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to8, IMM8);
+
+ put("vgetexppd", XMM_KZ, _XMM | MEM | M_1to2);
+ put("vgetexppd", YMM_KZ, _YMM | MEM | M_1to4);
+ put("vgetexppd", ZMM_KZ, _ZMM | MEM | M_1to8 | ZMM_SAE);
+
+ put("vgetexpps", XMM_KZ, _XMM | MEM | M_1to4);
+ put("vgetexpps", YMM_KZ, _YMM | MEM | M_1to8);
+ put("vgetexpps", ZMM_KZ, _ZMM | MEM | M_1to16 | ZMM_SAE);
+
+ put("vgetexpsd", XMM_KZ, _XMM, _XMM | _MEM | XMM_SAE);
+ put("vgetexpss", XMM_KZ, _XMM, _XMM | _MEM | XMM_SAE);
+
+ put("vgetmantpd", XMM_KZ, _XMM | _MEM | M_1to2, IMM8);
+ put("vgetmantpd", YMM_KZ, _YMM | _MEM | M_1to4, IMM8);
+ put("vgetmantpd", ZMM_KZ, _ZMM | _MEM | M_1to8, IMM8);
+
+ put("vgetmantps", XMM_KZ, _XMM | _MEM | M_1to4, IMM8);
+ put("vgetmantps", YMM_KZ, _YMM | _MEM | M_1to8, IMM8);
+ put("vgetmantps", ZMM_KZ, _ZMM | _MEM | M_1to16, IMM8);
+
+ put("vgetmantsd", XMM_KZ, _XMM, _XMM | _MEM | XMM_SAE, IMM8);
+ put("vgetmantss", XMM_KZ, _XMM, _XMM | _MEM | XMM_SAE, IMM8);
+
+ put("vfixupimmpd", XMM_KZ, _XMM, _XMM | _MEM | M_1to2, IMM8);
+ put("vfixupimmpd", YMM_KZ, _YMM, _YMM | _MEM | M_1to4, IMM8);
+ put("vfixupimmpd", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to8, IMM8);
+
+ put("vfixupimmps", XMM_KZ, _XMM, _XMM | _MEM | M_1to4, IMM8);
+ put("vfixupimmps", YMM_KZ, _YMM, _YMM | _MEM | M_1to8, IMM8);
+ put("vfixupimmps", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to16, IMM8);
+
+ put("vfixupimmsd", XMM_KZ, _XMM, _XMM | _MEM, IMM8);
+ put("vfixupimmss", XMM_KZ, _XMM, _XMM | _MEM, IMM8);
+
+ put("vrcp14pd", XMM_KZ, _XMM | _MEM | M_1to2);
+ put("vrcp14pd", YMM_KZ, _YMM | _MEM | M_1to4);
+ put("vrcp14pd", ZMM_KZ, _ZMM | _MEM | M_1to8);
+
+ put("vrcp14ps", XMM_KZ, _XMM | _MEM | M_1to4);
+ put("vrcp14ps", YMM_KZ, _YMM | _MEM | M_1to8);
+ put("vrcp14ps", ZMM_KZ, _ZMM | _MEM | M_1to16);
+
+ put("vrcp14sd", XMM_KZ, _XMM, _XMM | _MEM);
+
+ put("vrcp14ss", XMM_KZ, _XMM, _XMM | _MEM);
+
+ put("vrsqrt14pd", XMM_KZ, _XMM | _MEM | M_1to2);
+ put("vrsqrt14pd", YMM_KZ, _YMM | _MEM | M_1to4);
+ put("vrsqrt14pd", ZMM_KZ, _ZMM | _MEM | M_1to8);
+
+ put("vrsqrt14ps", XMM_KZ, _XMM | _MEM | M_1to4);
+ put("vrsqrt14ps", YMM_KZ, _YMM | _MEM | M_1to8);
+ put("vrsqrt14ps", ZMM_KZ, _ZMM | _MEM | M_1to16);
+
+ put("vrsqrt14sd", XMM_KZ, _XMM, _XMM | _MEM);
+
+ put("vrsqrt14ss", XMM_KZ, _XMM, _XMM | _MEM);
+
+ put("vrndscalepd", XMM_KZ, _XMM | _MEM | M_1to2, IMM8);
+ put("vrndscalepd", YMM_KZ, _YMM | _MEM | M_1to4, IMM8);
+ put("vrndscalepd", ZMM_KZ, _ZMM | _MEM | M_1to8, IMM8);
+
+ put("vrndscaleps", XMM_KZ, _XMM | _MEM | M_1to4, IMM8);
+ put("vrndscaleps", YMM_KZ, _YMM | _MEM | M_1to8, IMM8);
+ put("vrndscaleps", ZMM_KZ, _ZMM | _MEM | M_1to16, IMM8);
+
+ put("vrndscalesd", XMM_KZ, _XMM, _XMM | _MEM, IMM8);
+
+ put("vrndscaless", XMM_KZ, _XMM, _XMM | _MEM, IMM8);
+
+ put("vscalefpd", XMM_KZ, _XMM, _XMM | _MEM | M_1to2);
+ put("vscalefpd", YMM_KZ, _YMM, _YMM | _MEM | M_1to4);
+ put("vscalefpd", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to8 | ZMM_ER);
+
+ put("vscalefps", XMM_KZ, _XMM, _XMM | _MEM | M_1to4);
+ put("vscalefps", YMM_KZ, _YMM, _YMM | _MEM | M_1to8);
+ put("vscalefps", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to16 | ZMM_ER);
+ }
void putMin()
{
#ifdef XBYAK64
- putMov();
+ put("vscalefsd", XMM_KZ, _XMM, _XMM | _MEM | XMM_ER);
+ put("vscalefss", XMM_KZ, _XMM, _XMM | _MEM | XMM_ER);
#endif
}
void putAVX512()
diff --git a/xbyak/xbyak_avx512.h b/xbyak/xbyak_avx512.h
index 1d74a3a..9db2092 100644
--- a/xbyak/xbyak_avx512.h
+++ b/xbyak/xbyak_avx512.h
@@ -111,6 +111,8 @@ void vexpandpd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T
void vexpandps(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_N4, 0x88); }
void vpexpandd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_N4, 0x89); }
void vpexpandq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_N8, 0x89); }
+void vgetexppd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x42); }
+void vgetexpps(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x42); }
void vpabsq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_MUST_EVEX | T_EW1 | T_B64 | T_YMM, 0x1F); }
void vmovdqa32(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_66 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x7F); }
void vmovdqa64(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_66 | T_0F | T_EW1 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x7F); }
@@ -163,6 +165,24 @@ void vpermi2ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x
void vpermi2pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x77); }
void vpternlogd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x25, imm); }
void vpternlogq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x25, imm); }
+void vgetexpsd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_SAE_X | T_MUST_EVEX | T_N8, 0x43); }
+void vgetexpss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_SAE_X | T_MUST_EVEX | T_N4, 0x43); }
+void vgetmantsd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_SAE_X | T_MUST_EVEX | T_N8, 0x27, imm); }
+void vgetmantss(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_SAE_X | T_MUST_EVEX | T_N4, 0x27, imm); }
+void vfixupimmpd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x54, imm); }
+void vfixupimmps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x54, imm); }
+void vfixupimmsd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_SAE_Z | T_MUST_EVEX | T_N8, 0x55, imm); }
+void vfixupimmss(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_SAE_Z | T_MUST_EVEX | T_N4, 0x55, imm); }
+void vrcp14sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_N8, 0x4D); }
+void vrcp14ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_N4, 0x4D); }
+void vrsqrt14sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_N8, 0x4F); }
+void vrsqrt14ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_N4, 0x4F); }
+void vrndscalesd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_MUST_EVEX | T_N8, 0x0B, imm); }
+void vrndscaless(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_MUST_EVEX | T_N4, 0x0A, imm); }
+void vscalefpd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B64, 0x2C); }
+void vscalefps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B32, 0x2C); }
+void vscalefsd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_ER_X | T_MUST_EVEX | T_N8, 0x2D); }
+void vscalefss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_ER_X | T_MUST_EVEX | T_N4, 0x2D); }
void vpsraq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 4), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x72, imm); }
void vextractf32x4(const Operand& op, const Ymm& r, uint8 imm) { opAVX_X_X_XMcvt(r, true, cvtIdx0(r), op, op.isXMM(), Operand::YMM, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX | T_N16, 0x19, imm); }
void vextractf64x2(const Operand& op, const Ymm& r, uint8 imm) { opAVX_X_X_XMcvt(r, true, cvtIdx0(r), op, op.isXMM(), Operand::YMM, T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX | T_N16, 0x19, imm); }
@@ -254,4 +274,12 @@ void vpmovusdw(const Operand& op, const Xmm& x) { opVmov(op, x, T_F3 | T_0F38 |
void vpmovwb(const Operand& op, const Xmm& x) { opVmov(op, x, T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_N8 | T_N_VL, 0x30, true); }
void vpmovswb(const Operand& op, const Xmm& x) { opVmov(op, x, T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_N8 | T_N_VL, 0x20, true); }
void vpmovuswb(const Operand& op, const Xmm& x) { opVmov(op, x, T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_N8 | T_N_VL, 0x10, true); }
+void vgetmantpd(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(x, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x26, imm); }
+void vgetmantps(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(x, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x26, imm); }
+void vrcp14pd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x4C); }
+void vrcp14ps(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x4C); }
+void vrsqrt14pd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x4E); }
+void vrsqrt14ps(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x4E); }
+void vrndscalepd(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(x, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x09, imm); }
+void vrndscaleps(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(x, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x08, imm); }
#endif