aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorMITSUNARI Shigeo <[email protected]>2024-10-19 08:54:22 +0900
committerMITSUNARI Shigeo <[email protected]>2024-10-19 08:54:22 +0900
commit4e2efab94ef6302cce5f4f5b6eaec3312cd4eb00 (patch)
treef595d11d8b7db5903838ee522b1f512d41cd02bf
parentd7ed9fdefd39f0e3829758d8135f74aa4bd426e2 (diff)
downloadxbyak-4e2efab94ef6302cce5f4f5b6eaec3312cd4eb00.tar.gz
xbyak-4e2efab94ef6302cce5f4f5b6eaec3312cd4eb00.zip
The encoding of vmovd(w) with REG-to-XMM operands is not affected by the encoding flag.
-rw-r--r--test/misc.cpp44
-rw-r--r--xbyak/xbyak.h9
2 files changed, 49 insertions, 4 deletions
diff --git a/test/misc.cpp b/test/misc.cpp
index bc5083b..b4874a7 100644
--- a/test/misc.cpp
+++ b/test/misc.cpp
@@ -2284,4 +2284,48 @@ CYBOZU_TEST_AUTO(avx_vnni_int)
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
}
+CYBOZU_TEST_AUTO(vmovd)
+{
+ struct Code : Xbyak::CodeGenerator {
+ Code()
+ {
+ setDefaultEncodingAVX10(PreAVX10v2Encoding);
+ vmovd(eax, xm1); // always AVX10.1
+ vmovd(xm3, xm1); // always AVX10.2
+ // AVX-512 (AVX10.1)
+ vmovd(ptr[rax+128], xm1);
+ vmovd(xm1, ptr[rax+128]);
+ vmovd(ptr[rax+128], xm30);
+ vmovd(xm30, ptr[rax+128]);
+
+ setDefaultEncodingAVX10(AVX10v2Encoding);
+ vmovd(eax, xm1); // always AVX10.1
+ vmovd(xm3, xm1); // always AVX10.2
+ // AVX10.2
+ vmovd(ptr[rax+128], xm1);
+ vmovd(xm1, ptr[rax+128]);
+ vmovd(ptr[rax+128], xm30);
+ vmovd(xm30, ptr[rax+128]);
+ }
+ } c;
+ const uint8_t tbl[] = {
+ 0xc5, 0xf9, 0x7e, 0xc8, // avx10.1
+ 0x62, 0xf1, 0x7e, 0x08, 0x7e, 0xd9, // avx10.2
+ 0xc5, 0xf9, 0x7e, 0x88, 0x80, 0x00, 0x00, 0x00, // avx
+ 0xc5, 0xf9, 0x6e, 0x88, 0x80, 0x00, 0x00, 0x00, // avx
+ 0x62, 0x61, 0x7d, 0x08, 0x7e, 0x70, 0x20, // avx10.1
+ 0x62, 0x61, 0x7d, 0x08, 0x6e, 0x70, 0x20, // avx10.1
+
+ 0xc5, 0xf9, 0x7e, 0xc8, // avx10.1
+ 0x62, 0xf1, 0x7e, 0x08, 0x7e, 0xd9, // avx10.2
+ 0x62, 0xf1, 0x7d, 0x08, 0xd6, 0x48, 0x20, // avx10.2
+ 0x62, 0xf1, 0x7e, 0x08, 0x7e, 0x48, 0x20, // avx10.2
+ 0x62, 0x61, 0x7d, 0x08, 0xd6, 0x70, 0x20, // avx10.2
+ 0x62, 0x61, 0x7e, 0x08, 0x7e, 0x70, 0x20, // avx10.2
+ };
+ const size_t n = sizeof(tbl) / sizeof(tbl[0]);
+ CYBOZU_TEST_EQUAL(c.getSize(), n);
+ CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
+}
+
#endif
diff --git a/xbyak/xbyak.h b/xbyak/xbyak.h
index b3a4df8..001f5ae 100644
--- a/xbyak/xbyak.h
+++ b/xbyak/xbyak.h
@@ -2809,11 +2809,12 @@ private:
std::swap(p1, p2);
rev = !rev;
}
+ enc = getEncoding(enc, 1);
int sel = -1;
- if (getEncoding(enc, 1) == AVX10v2Encoding) {
- if ((p1->isXMM() || p1->isMEM()) && p2->isXMM()) sel = 2 + int(rev);
- } else {
- if ((p1->isREG(bit) || p1->isMEM()) && p2->isXMM()) sel = int(rev);
+ if (p1->isXMM() || (p1->isMEM() && enc == AVX10v2Encoding)) {
+ sel = 2 + int(rev);
+ } else if (p1->isREG(bit) || p1->isMEM()) {
+ sel = int(rev);
}
if (sel == -1) XBYAK_THROW(ERR_BAD_COMBINATION)
opAVX_X_X_XM(*static_cast<const Xmm*>(p2), xm0, *p1, typeTbl[sel], codeTbl[sel]);