aboutsummaryrefslogtreecommitdiffhomepage
path: root/test
diff options
context:
space:
mode:
authorMITSUNARI Shigeo <[email protected]>2024-10-30 06:39:15 +0900
committerMITSUNARI Shigeo <[email protected]>2024-10-30 06:39:15 +0900
commit97b66116ffd26fdbbe27cb6b56236e7078dbd3a0 (patch)
tree75b388c9f93df2ab1336a6d3f28d06207844157c /test
parentcf209c915b849141ed9821fea883fd04bcc34859 (diff)
parent565ad4e809c1aa80e295613347420812b3b5ac1a (diff)
downloadxbyak-97b66116ffd26fdbbe27cb6b56236e7078dbd3a0.tar.gz
xbyak-97b66116ffd26fdbbe27cb6b56236e7078dbd3a0.zip
Merge branch 'dev'v7.20.1
Diffstat (limited to 'test')
-rw-r--r--test/Makefile3
-rw-r--r--test/avx10/bf16.txt18
-rw-r--r--test/misc.cpp96
-rw-r--r--test/test_by_xed.bat6
-rw-r--r--test/test_by_xed.py76
-rw-r--r--test/test_by_xed_all.bat5
6 files changed, 177 insertions, 27 deletions
diff --git a/test/Makefile b/test/Makefile
index cf5c716..a61895f 100644
--- a/test/Makefile
+++ b/test/Makefile
@@ -60,7 +60,8 @@ apx: apx.cpp $(XBYAK_INC)
avx10_test: avx10_test.cpp $(XBYAK_INC)
$(CXX) $(CFLAGS) avx10_test.cpp -o $@ -DXBYAK64
-TEST_FILES=old.txt new-ymm.txt bf16.txt comp.txt misc.txt convert.txt minmax.txt saturation.txt
+#TEST_FILES=old.txt new-ymm.txt bf16.txt comp.txt misc.txt convert.txt minmax.txt saturation.txt
+TEST_FILES=old.txt new-ymm.txt bf16.txt misc.txt convert.txt minmax.txt saturation.txt
xed_test:
@set -e; \
for target in $(addprefix avx10/, $(TEST_FILES)); do \
diff --git a/test/avx10/bf16.txt b/test/avx10/bf16.txt
index c544e02..a387c61 100644
--- a/test/avx10/bf16.txt
+++ b/test/avx10/bf16.txt
@@ -113,17 +113,17 @@ vfpclasspbf16(k7|k5, zword_b[rax+128], 13);
vcomsbf16(xm2, xm3);
vcomsbf16(xm2, ptr[rax+128]);
-vgetexppbf16(xm1|k3, xmm2);
-vgetexppbf16(xm1|k3, ptr[rax+128]);
-vgetexppbf16(xm1|k3, ptr_b[rax+128]);
+//vgetexppbf16(xm1|k3, xmm2);
+//vgetexppbf16(xm1|k3, ptr[rax+128]);
+//vgetexppbf16(xm1|k3, ptr_b[rax+128]);
-vgetexppbf16(ym1|k3, ymm2);
-vgetexppbf16(ym1|k3, ptr[rax+128]);
-vgetexppbf16(ym1|k3, ptr_b[rax+128]);
+//vgetexppbf16(ym1|k3, ymm2);
+//vgetexppbf16(ym1|k3, ptr[rax+128]);
+//vgetexppbf16(ym1|k3, ptr_b[rax+128]);
-vgetexppbf16(zm1|k3, zmm2);
-vgetexppbf16(zm1|k3, ptr[rax+128]);
-vgetexppbf16(zm1|k3, ptr_b[rax+128]);
+//vgetexppbf16(zm1|k3, zmm2);
+//vgetexppbf16(zm1|k3, ptr[rax+128]);
+//vgetexppbf16(zm1|k3, ptr_b[rax+128]);
vgetmantpbf16(xm1|k3, xmm2, 3);
vgetmantpbf16(xm1|k3, ptr[rax+128], 5);
diff --git a/test/misc.cpp b/test/misc.cpp
index bc5083b..3ebb74c 100644
--- a/test/misc.cpp
+++ b/test/misc.cpp
@@ -2284,4 +2284,100 @@ CYBOZU_TEST_AUTO(avx_vnni_int)
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
}
+CYBOZU_TEST_AUTO(vmovd)
+{
+ struct Code : Xbyak::CodeGenerator {
+ Code()
+ {
+ setDefaultEncodingAVX10(PreAVX10v2Encoding);
+ vmovd(eax, xm1); // always AVX10.1
+ vmovd(xm1, eax); // always AVX10.1
+ vmovd(xm3, xm1); // always AVX10.2
+ // AVX-512 (AVX10.1)
+ vmovd(ptr[rax+128], xm1);
+ vmovd(xm1, ptr[rax+128]);
+ vmovd(ptr[rax+128], xm30);
+ vmovd(xm30, ptr[rax+128]);
+
+ setDefaultEncodingAVX10(AVX10v2Encoding);
+ vmovd(eax, xm1); // always AVX10.1
+ vmovd(xm1, eax); // always AVX10.1
+ vmovd(xm3, xm1); // always AVX10.2
+ // AVX10.2
+ vmovd(ptr[rax+128], xm1);
+ vmovd(xm1, ptr[rax+128]);
+ vmovd(ptr[rax+128], xm30);
+ vmovd(xm30, ptr[rax+128]);
+ }
+ } c;
+ const uint8_t tbl[] = {
+ 0xc5, 0xf9, 0x7e, 0xc8, // avx10.1
+ 0xc5, 0xf9, 0x6e, 0xc8, // avx10.1
+ 0x62, 0xf1, 0x7e, 0x08, 0x7e, 0xd9, // avx10.2
+ 0xc5, 0xf9, 0x7e, 0x88, 0x80, 0x00, 0x00, 0x00, // avx
+ 0xc5, 0xf9, 0x6e, 0x88, 0x80, 0x00, 0x00, 0x00, // avx
+ 0x62, 0x61, 0x7d, 0x08, 0x7e, 0x70, 0x20, // avx10.1
+ 0x62, 0x61, 0x7d, 0x08, 0x6e, 0x70, 0x20, // avx10.1
+
+ 0xc5, 0xf9, 0x7e, 0xc8, // avx10.1
+ 0xc5, 0xf9, 0x6e, 0xc8, // avx10.1
+ 0x62, 0xf1, 0x7e, 0x08, 0x7e, 0xd9, // avx10.2
+ 0x62, 0xf1, 0x7d, 0x08, 0xd6, 0x48, 0x20, // avx10.2
+ 0x62, 0xf1, 0x7e, 0x08, 0x7e, 0x48, 0x20, // avx10.2
+ 0x62, 0x61, 0x7d, 0x08, 0xd6, 0x70, 0x20, // avx10.2
+ 0x62, 0x61, 0x7e, 0x08, 0x7e, 0x70, 0x20, // avx10.2
+ };
+ const size_t n = sizeof(tbl) / sizeof(tbl[0]);
+ CYBOZU_TEST_EQUAL(c.getSize(), n);
+ CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
+}
+
+CYBOZU_TEST_AUTO(vmovw)
+{
+ struct Code : Xbyak::CodeGenerator {
+ Code()
+ {
+ setDefaultEncodingAVX10(PreAVX10v2Encoding);
+ vmovw(eax, xm1); // always avx10.1
+ vmovw(xm1, eax); // always avx10.1
+ vmovw(xm3, xm1); // always avx10.2
+ // AVX10.1
+ vmovw(ptr[rax+128], xm1);
+ vmovw(xm1, ptr[rax+128]);
+ vmovw(ptr[rax+128], xm30);
+ vmovw(xm30, ptr[rax+128]);
+
+ setDefaultEncodingAVX10(AVX10v2Encoding);
+ vmovw(eax, xm1); // always avx10.1
+ vmovw(xm1, eax); // always avx10.1
+ vmovw(xm3, xm1); // always avx10.2
+ // AVX10.2
+ vmovw(ptr[rax+128], xm1);
+ vmovw(xm1, ptr[rax+128]);
+ vmovw(ptr[rax+128], xm30);
+ vmovw(xm30, ptr[rax+128]);
+ }
+ } c;
+ const uint8_t tbl[] = {
+ 0x62, 0xf5, 0x7d, 0x08, 0x7e, 0xc8,
+ 0x62, 0xf5, 0x7d, 0x08, 0x6e, 0xc8,
+ 0x62, 0xf5, 0x7e, 0x08, 0x6e, 0xd9,
+ 0x62, 0xf5, 0x7d, 0x08, 0x7e, 0x48, 0x40,
+ 0x62, 0xf5, 0x7d, 0x08, 0x6e, 0x48, 0x40,
+ 0x62, 0x65, 0x7d, 0x08, 0x7e, 0x70, 0x40,
+ 0x62, 0x65, 0x7d, 0x08, 0x6e, 0x70, 0x40,
+
+ 0x62, 0xf5, 0x7d, 0x08, 0x7e, 0xc8,
+ 0x62, 0xf5, 0x7d, 0x08, 0x6e, 0xc8,
+ 0x62, 0xf5, 0x7e, 0x08, 0x6e, 0xd9,
+ 0x62, 0xf5, 0x7e, 0x08, 0x7e, 0x48, 0x40,
+ 0x62, 0xf5, 0x7e, 0x08, 0x6e, 0x48, 0x40,
+ 0x62, 0x65, 0x7e, 0x08, 0x7e, 0x70, 0x40,
+ 0x62, 0x65, 0x7e, 0x08, 0x6e, 0x70, 0x40,
+ };
+ const size_t n = sizeof(tbl) / sizeof(tbl[0]);
+ CYBOZU_TEST_EQUAL(c.getSize(), n);
+ CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
+}
+
#endif
diff --git a/test/test_by_xed.bat b/test/test_by_xed.bat
new file mode 100644
index 0000000..bf6ee5e
--- /dev/null
+++ b/test/test_by_xed.bat
@@ -0,0 +1,6 @@
+@echo off
+set CFLAGS=-I ../ /EHsc /nologo
+copy %1% tmp.cpp
+cl %CFLAGS% test_by_xed.cpp && test_by_xed.exe
+%XED% -64 -ir bin > out.txt
+python3 test_by_xed.py %1% out.txt
diff --git a/test/test_by_xed.py b/test/test_by_xed.py
index 1e84c6a..13d695c 100644
--- a/test/test_by_xed.py
+++ b/test/test_by_xed.py
@@ -76,7 +76,7 @@ def newReg(s):
return s
class Memory:
- def __init__(self, size=0, base=None, index=None, scale=0, disp=0, broadcast=False):
+ def __init__(self, size=0, base=None, index=None, scale=0, disp=0, broadcast=0):
self.size = size
self.base = newReg(base)
self.index = newReg(index)
@@ -85,8 +85,12 @@ class Memory:
self.broadcast = broadcast
def __str__(self):
- s = 'ptr' if self.size == 0 else g_sizeTbl[int(math.log2(self.size))]
- if self.broadcast:
+ if self.size == 0:
+ s = 'ptr'
+ else:
+ idx = self.size * max(self.broadcast, 1)
+ s = g_sizeTbl[int(math.log2(idx))]
+ if self.broadcast > 0:
s += '_b'
s += ' ['
needPlus = False
@@ -107,23 +111,36 @@ class Memory:
s += ']'
return s
+ # Xbyak uses 'ptr' when it can be automatically detected, so we should consider this in the comparison.
def __eq__(self, rhs):
- # xbyak uses ptr if it is automatically detected, so xword == ptr is true
- if self.broadcast != rhs.broadcast: return False
-# if not self.broadcast and 0 < self.size <= 8 and 0 < rhs.size <= 8 and self.size != rhs.size: return False
- if not self.broadcast and self.size > 0 and rhs.size > 0 and self.size != rhs.size: return False
+ if self.broadcast > rhs.broadcast:
+ return rhs == self
+ assert(self.broadcast <= rhs.broadcast)
+ if self.broadcast == 0:
+ if rhs.broadcast > 0: return False
+ # Xbyak uses 'ptr' when it is automatically detected.
+ # Therefore, the comparison is true if 'ptr' (i.e., size = 0) is used.
+ if 0 < self.size and 0 < rhs.size and self.size != rhs.size: return False
+ if self.broadcast == 1: # _b
+ if rhs.broadcast == 1: # compare ptr_b with ptr_b
+ if self.size != rhs.size:
+ return False
+ if self.size > 0 and (self.size != rhs.size * rhs.broadcast): # compare ptr_b with {1toX}
+ return False
+ else:
+ if self.broadcast != rhs.broadcast: return False
r = self.base == rhs.base and self.index == rhs.index and self.scale == rhs.scale and self.disp == rhs.disp
return r
def parseBroadcast(s):
if '_b' in s:
- return (s.replace('_b', ''), True)
- r = re.search(r'({1to\d+})', s)
+ return (s.replace('_b', ''), 1)
+ r = re.search(r'({1to(\d+)})', s)
if not r:
- return (s, False)
- return (s.replace(r.group(1), ''), True)
+ return (s, 0)
+ return (s.replace(r.group(1), ''), int(r.group(2)))
-def parseMemory(s, broadcast=False):
+def parseMemory(s, broadcast=0):
org_s = s
s = s.replace(' ', '').lower()
@@ -133,7 +150,7 @@ def parseMemory(s, broadcast=False):
scale = 0
disp = 0
- if not broadcast:
+ if broadcast == 0:
(s, broadcast) = parseBroadcast(s)
# Parse size
@@ -157,7 +174,7 @@ def parseMemory(s, broadcast=False):
s = s[3:]
if s.startswith('_b'):
- broadcast = True
+ broadcast = 1
s = s[2:]
# Extract the content inside brackets
@@ -335,7 +352,7 @@ def parseMemoryTest():
('[]', Memory()),
('[rax]', Memory(0, rax)),
('ptr[rax]', Memory(0, rax)),
- ('ptr_b[rax]', Memory(0, rax, broadcast=True)),
+ ('ptr_b[rax]', Memory(0, rax, broadcast=1)),
('dword[rbx]', Memory(4, rbx)),
('xword ptr[rcx]', Memory(16, rcx)),
('xmmword ptr[rcx]', Memory(16, rcx)),
@@ -344,11 +361,36 @@ def parseMemoryTest():
('[0x12345]', Memory(0, None, None, 0, 0x12345)),
('yword [rax+rdx*4]', Memory(32, rax, rdx, 4)),
('zword [rax+rdx*4+123]', Memory(64, rax, rdx, 4, 123)),
+ ('xword_b [rax]', Memory(16, rax, None, 0, 0, 1)),
+ ('dword [rax]{1to4}', Memory(16, rax, None, 0, 0, 1)),
+ ('yword_b [rax]', Memory(32, rax, None, 0, 0, 1)),
+ ('dword [rax]{1to8}', Memory(32, rax, None, 0, 0, 1)),
]
for (s, expected) in tbl:
my = parseMemory(s)
assertEqualStr(my, expected)
+ print('compare test')
+ tbl = [
+ ('ptr[rax]', 'dword[rax]', True),
+ ('byte[rax]', 'dword[rax]', False),
+ ('yword_b[rax]', 'dword [rax]{1to8}', True),
+ ('yword_b[rax]', 'word [rax]{1to16}', True),
+ ('zword_b[rax]', 'word [rax]{1to32}', True),
+ ('zword_b[rax]', 'word [rax]{1to16}', False),
+ ('dword [rax]{1to2}', 'dword [rax] {1to4}', False),
+ ('zword_b[rax]', 'xword_b [rax]', False),
+ ('ptr_b[rax]', 'word [rax]{1to32}', True), # ignore size
+ ]
+ for (lhs, rhs, eq) in tbl:
+ a = parseMemory(lhs)
+ b = parseMemory(rhs)
+ if eq:
+ assertEqual(a, b)
+ assertEqual(b, a)
+ else:
+ assert(parseMemory(lhs) != parseMemory(rhs))
+
def parseNmemonicTest():
print('parseNmemonicTest')
tbl = [
@@ -364,8 +406,8 @@ def parseNmemonicTest():
('vpcompressw(zmm30 | k2 |T_z, zmm1);', Nmemonic('vpcompressw', [zmm30, zmm1], [k2, T_z])),
('vpcompressw zmm30{k2}{z}, zmm1', Nmemonic('vpcompressw', [zmm30, zmm1], [k2, T_z])),
('vpshldw(xmm9|k3|T_z, xmm2, ptr [rax + 0x40], 5);', Nmemonic('vpshldw', [xmm9, xmm2, Memory(0, rax, None, 0, 0x40), 5], [k3, T_z])),
- ('vpshrdd(xmm5|k3|T_z, xmm2, ptr_b [rax + 0x40], 5);', Nmemonic('vpshrdd', [xmm5, xmm2, Memory(0, rax, None, 0, 0x40, True), 5], [k3, T_z])),
- ('vpshrdd xmm5{k3}{z}, xmm2, dword ptr [rax+0x40]{1to4}, 0x5', Nmemonic('vpshrdd', [xmm5, xmm2, Memory(0, rax, None, 0, 0x40, True), 5], [k3, T_z])),
+ ('vpshrdd(xmm5|k3|T_z, xmm2, ptr_b [rax + 0x40], 5);', Nmemonic('vpshrdd', [xmm5, xmm2, Memory(0, rax, None, 0, 0x40, 1), 5], [k3, T_z])),
+ ('vpshrdd xmm5{k3}{z}, xmm2, dword ptr [rax+0x40]{1to4}, 0x5', Nmemonic('vpshrdd', [xmm5, xmm2, Memory(0, rax, None, 0, 0x40, 4), 5], [k3, T_z])),
('vcmpph(k1, xmm15, ptr[rax+64], 1);', Nmemonic('vcmpph', [k1, xmm15, Memory(0, rax, None, 0, 64), 1])),
]
for (s, expected) in tbl:
diff --git a/test/test_by_xed_all.bat b/test/test_by_xed_all.bat
new file mode 100644
index 0000000..bb57cb4
--- /dev/null
+++ b/test/test_by_xed_all.bat
@@ -0,0 +1,5 @@
+set TARGETS=old.txt new-ymm.txt bf16.txt misc.txt convert.txt minmax.txt saturation.txt
+for %%f in (%TARGETS%) do (
+ echo %%f
+ call test_by_xed.bat avx10\%%f
+) \ No newline at end of file