aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--gen/gen_avx512.cpp3
-rw-r--r--gen/gen_code.cpp2
-rw-r--r--test/avx10/misc.txt9
-rw-r--r--test/avx10_test.cpp24
-rw-r--r--test/test_by_xed.cpp3
-rw-r--r--test/test_by_xed.py5
-rw-r--r--xbyak/xbyak.h16
-rw-r--r--xbyak/xbyak_mnemonic.h2
8 files changed, 51 insertions, 13 deletions
diff --git a/gen/gen_avx512.cpp b/gen/gen_avx512.cpp
index 109afc6..9840844 100644
--- a/gen/gen_avx512.cpp
+++ b/gen/gen_avx512.cpp
@@ -447,7 +447,6 @@ void putX_X_XM_IMM()
{ 0x1B, "vcvtne2ph2hf8s", T_MUST_EVEX | T_F2 | T_MAP5 | T_EW0 | T_YMM | T_B16 | T_N1, false },
{ 0x52, "vdpphps", T_MUST_EVEX | T_0F38 | T_EW0 | T_YMM | T_B32, false },
-// { 0x42, "vmpsadbw", T_MUST_EVEX | T_F3 | T_0F3A | T_EW0 | T_YMM | T_B32, true },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
@@ -455,7 +454,7 @@ void putX_X_XM_IMM()
printf("void %s(const Xmm& x1, const Xmm& x2, const Operand& op%s) { opAVX_X_X_XM(x1, x2, op, %s, 0x%02X%s); }\n"
, p->name, p->hasIMM ? ", uint8_t imm" : "", s.c_str(), p->code, p->hasIMM ? ", imm" : "");
}
-// puts("void vmpsadbw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_0F3A | T_YMM, 0x42, encoding, imm, T_66 | T_W0 | T_YMM, T_F3 | T_EW0 | T_B32); }");
+ puts("void vmpsadbw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_0F3A | T_YMM, 0x42, encoding, imm, T_66 | T_W0 | T_YMM, T_F3 | T_0F3A | T_EW0 | T_B32, 1); }");
}
void putShift()
diff --git a/gen/gen_code.cpp b/gen/gen_code.cpp
index 764d118..58c176a 100644
--- a/gen/gen_code.cpp
+++ b/gen/gen_code.cpp
@@ -57,7 +57,7 @@ void putX_X_XM(bool omitOnly)
{ 0x0C, "blendps", T_0F3A | T_66 | T_W0 | T_YMM, true, true, 3 },
{ 0x41, "dppd", T_0F3A | T_66 | T_W0, true, true, 3 },
{ 0x40, "dpps", T_0F3A | T_66 | T_W0 | T_YMM, true, true, 3 },
- { 0x42, "mpsadbw", T_0F3A | T_66 | T_W0 | T_YMM, true, true, 3 },
+ { 0x42, "mpsadbw", T_0F3A | T_66 | T_W0 | T_YMM, true, true, 1 },
{ 0x0E, "pblendw", T_0F3A | T_66 | T_W0 | T_YMM, true, true, 3 },
{ 0x02, "pblendd", T_0F3A | T_66 | T_W0 | T_YMM, true, true, 2 },
{ 0x0B, "roundsd", T_0F3A | T_66 | T_W0, true, true, 3 },
diff --git a/test/avx10/misc.txt b/test/avx10/misc.txt
index f7d1351..5c39e81 100644
--- a/test/avx10/misc.txt
+++ b/test/avx10/misc.txt
@@ -10,4 +10,11 @@ vdpphps(zm1, zm2, zm3);
vdpphps(zm1, zm2, ptr[rax+128]);
vdpphps(zm1, zm2, ptr_b[rax+128]);
-// skip vmpsadbw
+vmpsadbw(xm1, xm3, xm15, 3);
+vmpsadbw(xm1|T_z, xm4, ptr[rax+128], 5);
+
+vmpsadbw(ym1|k4, ym3, ym15, 3);
+vmpsadbw(ym1, ym4, ptr[rax+128], 5);
+
+vmpsadbw(zm1|k4, zm3, zm15, 3);
+vmpsadbw(zm1, zm4, ptr[rax+128], 5);
diff --git a/test/avx10_test.cpp b/test/avx10_test.cpp
index 9a4a848..5f742fe 100644
--- a/test/avx10_test.cpp
+++ b/test/avx10_test.cpp
@@ -228,3 +228,27 @@ CYBOZU_TEST_AUTO(ymm_with_sae)
CYBOZU_TEST_EQUAL(c.getSize(), n);
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
}
+
+CYBOZU_TEST_AUTO(vmpsadbw)
+{
+ struct Code : Xbyak::CodeGenerator {
+ Code()
+ {
+ setDefaultEncoding();
+ vmpsadbw(xm1, xm3, xm15, 3); // vex(avx)
+ vmpsadbw(ym1, ym3, ptr[rax+128], 3); // vex(avx2)
+ setDefaultEncoding(VexEncoding, EvexEncoding);
+ vmpsadbw(ym1, ym3, ym15, 3); // evex(avx10.2)
+ vmpsadbw(ym1, ym3, ptr[rax+128], 3); // evex(avx10.2)
+ }
+ } c;
+ const uint8_t tbl[] = {
+ 0xc4, 0xc3, 0x61, 0x42, 0xcf, 0x03,
+ 0xc4, 0xe3, 0x65, 0x42, 0x88, 0x80, 0x00, 0x00, 0x00, 0x03,
+ 0x62, 0xd3, 0x66, 0x28, 0x42, 0xcf, 0x03,
+ 0x62, 0xf3, 0x66, 0x28, 0x42, 0x48, 0x04, 0x03,
+ };
+ const size_t n = sizeof(tbl) / sizeof(tbl[0]);
+ CYBOZU_TEST_EQUAL(c.getSize(), n);
+ CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
+}
diff --git a/test/test_by_xed.cpp b/test/test_by_xed.cpp
index 93c370c..ddac779 100644
--- a/test/test_by_xed.cpp
+++ b/test/test_by_xed.cpp
@@ -1,10 +1,13 @@
#include <stdio.h>
#include <xbyak/xbyak.h>
+using namespace Xbyak;
+
struct Code : Xbyak::CodeGenerator {
Code()
: Xbyak::CodeGenerator(4096*8)
{
+ setDefaultEncoding(VexEncoding, EvexEncoding);
#include "tmp.cpp"
}
};
diff --git a/test/test_by_xed.py b/test/test_by_xed.py
index 5b84995..afd77d8 100644
--- a/test/test_by_xed.py
+++ b/test/test_by_xed.py
@@ -210,6 +210,11 @@ def parseNmemonic(s):
args = []
attrs = []
+ # remove Xbyak::{Evex,Vex}Encoding
+ r = re.search(r'(,[^,]*Encoding)', s)
+ if r:
+ s = s.replace(r.group(1), '')
+
(s, broadcast) = parseBroadcast(s)
# replace xm0 with xmm0
diff --git a/xbyak/xbyak.h b/xbyak/xbyak.h
index 1642290..c5de008 100644
--- a/xbyak/xbyak.h
+++ b/xbyak/xbyak.h
@@ -2661,11 +2661,11 @@ private:
if (addr.getRegExp().getIndex().getKind() != kind) XBYAK_THROW(ERR_BAD_VSIB_ADDRESSING)
opVex(x, 0, addr, type, code);
}
- void opEncoding(const Xmm& x1, const Xmm& x2, const Operand& op, uint64_t type, int code, PreferredEncoding encoding, int sel = 0)
+ void opEncoding(const Xmm& x1, const Xmm& x2, const Operand& op, uint64_t type, int code, PreferredEncoding encoding, int imm = NONE, uint64_t typeVex = 0, uint64_t typeEvex = 0, int sel = 0)
{
- opAVX_X_X_XM(x1, x2, op, type | orEvexIf(encoding, sel), code);
+ opAVX_X_X_XM(x1, x2, op, type | orEvexIf(encoding, typeVex, typeEvex, sel), code, imm);
}
- int orEvexIf(PreferredEncoding encoding, int sel = 0) {
+ int orEvexIf(PreferredEncoding encoding, uint64_t typeVex, uint64_t typeEvex, int sel) {
if (encoding == DefaultEncoding) {
encoding = defaultEncoding_[sel];
}
@@ -2673,9 +2673,9 @@ private:
#ifdef XBYAK_DISABLE_AVX512
XBYAK_THROW(ERR_EVEX_IS_INVALID)
#endif
- return T_MUST_EVEX;
+ return T_MUST_EVEX | typeEvex;
}
- return 0;
+ return typeVex;
}
void opInOut(const Reg& a, const Reg& d, uint8_t code)
{
@@ -3132,8 +3132,8 @@ public:
#endif
, isDefaultJmpNEAR_(false)
{
- defaultEncoding_[0] = EvexEncoding; // use avx512-vnni not avx-vnni
- defaultEncoding_[1] = VexEncoding; // use vmpsadbw(avx) not avx10.2
+ // select avx512-vnni, vmpsadbw(avx)
+ setDefaultEncoding();
labelMgr_.set(this);
}
void reset()
@@ -3171,7 +3171,7 @@ public:
#endif
// set default encoding to select Vex or Evex
- void setDefaultEncoding(PreferredEncoding vnniEnc, PreferredEncoding mpsadbwEnc = VexEncoding)
+ void setDefaultEncoding(PreferredEncoding vnniEnc = EvexEncoding, PreferredEncoding mpsadbwEnc = VexEncoding)
{ defaultEncoding_[0] = vnniEnc; defaultEncoding_[1] = mpsadbwEnc; }
void sha1msg12(const Xmm& x, const Operand& op)
diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h
index daafcd1..8515e41 100644
--- a/xbyak/xbyak_mnemonic.h
+++ b/xbyak/xbyak_mnemonic.h
@@ -1369,7 +1369,6 @@ void vmovupd(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_
void vmovupd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66|T_0F|T_EW1|T_YMM|T_EVEX, 0x10); }
void vmovups(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_0F|T_EW0|T_YMM|T_EVEX|T_M_K, 0x11); }
void vmovups(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F|T_EW0|T_YMM|T_EVEX, 0x10); }
-void vmpsadbw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F3A|T_W0|T_YMM, 0x42, imm); }
void vmulpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x59); }
void vmulps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x59); }
void vmulsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER_X | T_N8, 0x59); }
@@ -2408,6 +2407,7 @@ void vmovsh(const Xmm& x1, const Xmm& x2, const Xmm& x3) { opAVX_X_X_XM(x1, x2,
void vmovw(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_N2|T_66|T_MAP5|T_MUST_EVEX, 0x7E); }
void vmovw(const Reg32e& r, const Xmm& x) { opAVX_X_X_XM(x, xm0, r, T_N2|T_66|T_MAP5|T_MUST_EVEX, 0x7E); }
void vmovw(const Xmm& x, const Operand& op) { if (!op.isREG(32|64) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, xm0, op, T_N2|T_66|T_MAP5|T_MUST_EVEX, 0x6E); }
+void vmpsadbw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_0F3A | T_YMM, 0x42, encoding, imm, T_66 | T_W0 | T_YMM, T_F3 | T_0F3A | T_EW0 | T_B32, 1); }
void vmulnepbf16(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP5|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x59); }
void vmulph(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_EW0 | T_YMM | T_MUST_EVEX | T_ER_Z | T_B16, 0x59); }
void vmulsh(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_F3 | T_EW0 | T_MUST_EVEX | T_ER_X | T_N2, 0x59); }