diff options
author | MITSUNARI Shigeo <[email protected]> | 2024-10-30 06:39:15 +0900 |
---|---|---|
committer | MITSUNARI Shigeo <[email protected]> | 2024-10-30 06:39:15 +0900 |
commit | 97b66116ffd26fdbbe27cb6b56236e7078dbd3a0 (patch) | |
tree | 75b388c9f93df2ab1336a6d3f28d06207844157c /test | |
parent | cf209c915b849141ed9821fea883fd04bcc34859 (diff) | |
parent | 565ad4e809c1aa80e295613347420812b3b5ac1a (diff) | |
download | xbyak-97b66116ffd26fdbbe27cb6b56236e7078dbd3a0.tar.gz xbyak-97b66116ffd26fdbbe27cb6b56236e7078dbd3a0.zip |
Merge branch 'dev'v7.20.1
Diffstat (limited to 'test')
-rw-r--r-- | test/Makefile | 3 | ||||
-rw-r--r-- | test/avx10/bf16.txt | 18 | ||||
-rw-r--r-- | test/misc.cpp | 96 | ||||
-rw-r--r-- | test/test_by_xed.bat | 6 | ||||
-rw-r--r-- | test/test_by_xed.py | 76 | ||||
-rw-r--r-- | test/test_by_xed_all.bat | 5 |
6 files changed, 177 insertions, 27 deletions
diff --git a/test/Makefile b/test/Makefile index cf5c716..a61895f 100644 --- a/test/Makefile +++ b/test/Makefile @@ -60,7 +60,8 @@ apx: apx.cpp $(XBYAK_INC) avx10_test: avx10_test.cpp $(XBYAK_INC) $(CXX) $(CFLAGS) avx10_test.cpp -o $@ -DXBYAK64 -TEST_FILES=old.txt new-ymm.txt bf16.txt comp.txt misc.txt convert.txt minmax.txt saturation.txt +#TEST_FILES=old.txt new-ymm.txt bf16.txt comp.txt misc.txt convert.txt minmax.txt saturation.txt +TEST_FILES=old.txt new-ymm.txt bf16.txt misc.txt convert.txt minmax.txt saturation.txt xed_test: @set -e; \ for target in $(addprefix avx10/, $(TEST_FILES)); do \ diff --git a/test/avx10/bf16.txt b/test/avx10/bf16.txt index c544e02..a387c61 100644 --- a/test/avx10/bf16.txt +++ b/test/avx10/bf16.txt @@ -113,17 +113,17 @@ vfpclasspbf16(k7|k5, zword_b[rax+128], 13); vcomsbf16(xm2, xm3); vcomsbf16(xm2, ptr[rax+128]); -vgetexppbf16(xm1|k3, xmm2); -vgetexppbf16(xm1|k3, ptr[rax+128]); -vgetexppbf16(xm1|k3, ptr_b[rax+128]); +//vgetexppbf16(xm1|k3, xmm2); +//vgetexppbf16(xm1|k3, ptr[rax+128]); +//vgetexppbf16(xm1|k3, ptr_b[rax+128]); -vgetexppbf16(ym1|k3, ymm2); -vgetexppbf16(ym1|k3, ptr[rax+128]); -vgetexppbf16(ym1|k3, ptr_b[rax+128]); +//vgetexppbf16(ym1|k3, ymm2); +//vgetexppbf16(ym1|k3, ptr[rax+128]); +//vgetexppbf16(ym1|k3, ptr_b[rax+128]); -vgetexppbf16(zm1|k3, zmm2); -vgetexppbf16(zm1|k3, ptr[rax+128]); -vgetexppbf16(zm1|k3, ptr_b[rax+128]); +//vgetexppbf16(zm1|k3, zmm2); +//vgetexppbf16(zm1|k3, ptr[rax+128]); +//vgetexppbf16(zm1|k3, ptr_b[rax+128]); vgetmantpbf16(xm1|k3, xmm2, 3); vgetmantpbf16(xm1|k3, ptr[rax+128], 5); diff --git a/test/misc.cpp b/test/misc.cpp index bc5083b..3ebb74c 100644 --- a/test/misc.cpp +++ b/test/misc.cpp @@ -2284,4 +2284,100 @@ CYBOZU_TEST_AUTO(avx_vnni_int) CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n); } +CYBOZU_TEST_AUTO(vmovd) +{ + struct Code : Xbyak::CodeGenerator { + Code() + { + setDefaultEncodingAVX10(PreAVX10v2Encoding); + vmovd(eax, xm1); // always AVX10.1 + vmovd(xm1, eax); // always AVX10.1 + vmovd(xm3, xm1); // always AVX10.2 + // AVX-512 (AVX10.1) + vmovd(ptr[rax+128], xm1); + vmovd(xm1, ptr[rax+128]); + vmovd(ptr[rax+128], xm30); + vmovd(xm30, ptr[rax+128]); + + setDefaultEncodingAVX10(AVX10v2Encoding); + vmovd(eax, xm1); // always AVX10.1 + vmovd(xm1, eax); // always AVX10.1 + vmovd(xm3, xm1); // always AVX10.2 + // AVX10.2 + vmovd(ptr[rax+128], xm1); + vmovd(xm1, ptr[rax+128]); + vmovd(ptr[rax+128], xm30); + vmovd(xm30, ptr[rax+128]); + } + } c; + const uint8_t tbl[] = { + 0xc5, 0xf9, 0x7e, 0xc8, // avx10.1 + 0xc5, 0xf9, 0x6e, 0xc8, // avx10.1 + 0x62, 0xf1, 0x7e, 0x08, 0x7e, 0xd9, // avx10.2 + 0xc5, 0xf9, 0x7e, 0x88, 0x80, 0x00, 0x00, 0x00, // avx + 0xc5, 0xf9, 0x6e, 0x88, 0x80, 0x00, 0x00, 0x00, // avx + 0x62, 0x61, 0x7d, 0x08, 0x7e, 0x70, 0x20, // avx10.1 + 0x62, 0x61, 0x7d, 0x08, 0x6e, 0x70, 0x20, // avx10.1 + + 0xc5, 0xf9, 0x7e, 0xc8, // avx10.1 + 0xc5, 0xf9, 0x6e, 0xc8, // avx10.1 + 0x62, 0xf1, 0x7e, 0x08, 0x7e, 0xd9, // avx10.2 + 0x62, 0xf1, 0x7d, 0x08, 0xd6, 0x48, 0x20, // avx10.2 + 0x62, 0xf1, 0x7e, 0x08, 0x7e, 0x48, 0x20, // avx10.2 + 0x62, 0x61, 0x7d, 0x08, 0xd6, 0x70, 0x20, // avx10.2 + 0x62, 0x61, 0x7e, 0x08, 0x7e, 0x70, 0x20, // avx10.2 + }; + const size_t n = sizeof(tbl) / sizeof(tbl[0]); + CYBOZU_TEST_EQUAL(c.getSize(), n); + CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n); +} + +CYBOZU_TEST_AUTO(vmovw) +{ + struct Code : Xbyak::CodeGenerator { + Code() + { + setDefaultEncodingAVX10(PreAVX10v2Encoding); + vmovw(eax, xm1); // always avx10.1 + vmovw(xm1, eax); // always avx10.1 + vmovw(xm3, xm1); // always avx10.2 + // AVX10.1 + vmovw(ptr[rax+128], xm1); + vmovw(xm1, ptr[rax+128]); + vmovw(ptr[rax+128], xm30); + vmovw(xm30, ptr[rax+128]); + + setDefaultEncodingAVX10(AVX10v2Encoding); + vmovw(eax, xm1); // always avx10.1 + vmovw(xm1, eax); // always avx10.1 + vmovw(xm3, xm1); // always avx10.2 + // AVX10.2 + vmovw(ptr[rax+128], xm1); + vmovw(xm1, ptr[rax+128]); + vmovw(ptr[rax+128], xm30); + vmovw(xm30, ptr[rax+128]); + } + } c; + const uint8_t tbl[] = { + 0x62, 0xf5, 0x7d, 0x08, 0x7e, 0xc8, + 0x62, 0xf5, 0x7d, 0x08, 0x6e, 0xc8, + 0x62, 0xf5, 0x7e, 0x08, 0x6e, 0xd9, + 0x62, 0xf5, 0x7d, 0x08, 0x7e, 0x48, 0x40, + 0x62, 0xf5, 0x7d, 0x08, 0x6e, 0x48, 0x40, + 0x62, 0x65, 0x7d, 0x08, 0x7e, 0x70, 0x40, + 0x62, 0x65, 0x7d, 0x08, 0x6e, 0x70, 0x40, + + 0x62, 0xf5, 0x7d, 0x08, 0x7e, 0xc8, + 0x62, 0xf5, 0x7d, 0x08, 0x6e, 0xc8, + 0x62, 0xf5, 0x7e, 0x08, 0x6e, 0xd9, + 0x62, 0xf5, 0x7e, 0x08, 0x7e, 0x48, 0x40, + 0x62, 0xf5, 0x7e, 0x08, 0x6e, 0x48, 0x40, + 0x62, 0x65, 0x7e, 0x08, 0x7e, 0x70, 0x40, + 0x62, 0x65, 0x7e, 0x08, 0x6e, 0x70, 0x40, + }; + const size_t n = sizeof(tbl) / sizeof(tbl[0]); + CYBOZU_TEST_EQUAL(c.getSize(), n); + CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n); +} + #endif diff --git a/test/test_by_xed.bat b/test/test_by_xed.bat new file mode 100644 index 0000000..bf6ee5e --- /dev/null +++ b/test/test_by_xed.bat @@ -0,0 +1,6 @@ +@echo off +set CFLAGS=-I ../ /EHsc /nologo +copy %1% tmp.cpp +cl %CFLAGS% test_by_xed.cpp && test_by_xed.exe +%XED% -64 -ir bin > out.txt +python3 test_by_xed.py %1% out.txt diff --git a/test/test_by_xed.py b/test/test_by_xed.py index 1e84c6a..13d695c 100644 --- a/test/test_by_xed.py +++ b/test/test_by_xed.py @@ -76,7 +76,7 @@ def newReg(s): return s class Memory: - def __init__(self, size=0, base=None, index=None, scale=0, disp=0, broadcast=False): + def __init__(self, size=0, base=None, index=None, scale=0, disp=0, broadcast=0): self.size = size self.base = newReg(base) self.index = newReg(index) @@ -85,8 +85,12 @@ class Memory: self.broadcast = broadcast def __str__(self): - s = 'ptr' if self.size == 0 else g_sizeTbl[int(math.log2(self.size))] - if self.broadcast: + if self.size == 0: + s = 'ptr' + else: + idx = self.size * max(self.broadcast, 1) + s = g_sizeTbl[int(math.log2(idx))] + if self.broadcast > 0: s += '_b' s += ' [' needPlus = False @@ -107,23 +111,36 @@ class Memory: s += ']' return s + # Xbyak uses 'ptr' when it can be automatically detected, so we should consider this in the comparison. def __eq__(self, rhs): - # xbyak uses ptr if it is automatically detected, so xword == ptr is true - if self.broadcast != rhs.broadcast: return False -# if not self.broadcast and 0 < self.size <= 8 and 0 < rhs.size <= 8 and self.size != rhs.size: return False - if not self.broadcast and self.size > 0 and rhs.size > 0 and self.size != rhs.size: return False + if self.broadcast > rhs.broadcast: + return rhs == self + assert(self.broadcast <= rhs.broadcast) + if self.broadcast == 0: + if rhs.broadcast > 0: return False + # Xbyak uses 'ptr' when it is automatically detected. + # Therefore, the comparison is true if 'ptr' (i.e., size = 0) is used. + if 0 < self.size and 0 < rhs.size and self.size != rhs.size: return False + if self.broadcast == 1: # _b + if rhs.broadcast == 1: # compare ptr_b with ptr_b + if self.size != rhs.size: + return False + if self.size > 0 and (self.size != rhs.size * rhs.broadcast): # compare ptr_b with {1toX} + return False + else: + if self.broadcast != rhs.broadcast: return False r = self.base == rhs.base and self.index == rhs.index and self.scale == rhs.scale and self.disp == rhs.disp return r def parseBroadcast(s): if '_b' in s: - return (s.replace('_b', ''), True) - r = re.search(r'({1to\d+})', s) + return (s.replace('_b', ''), 1) + r = re.search(r'({1to(\d+)})', s) if not r: - return (s, False) - return (s.replace(r.group(1), ''), True) + return (s, 0) + return (s.replace(r.group(1), ''), int(r.group(2))) -def parseMemory(s, broadcast=False): +def parseMemory(s, broadcast=0): org_s = s s = s.replace(' ', '').lower() @@ -133,7 +150,7 @@ def parseMemory(s, broadcast=False): scale = 0 disp = 0 - if not broadcast: + if broadcast == 0: (s, broadcast) = parseBroadcast(s) # Parse size @@ -157,7 +174,7 @@ def parseMemory(s, broadcast=False): s = s[3:] if s.startswith('_b'): - broadcast = True + broadcast = 1 s = s[2:] # Extract the content inside brackets @@ -335,7 +352,7 @@ def parseMemoryTest(): ('[]', Memory()), ('[rax]', Memory(0, rax)), ('ptr[rax]', Memory(0, rax)), - ('ptr_b[rax]', Memory(0, rax, broadcast=True)), + ('ptr_b[rax]', Memory(0, rax, broadcast=1)), ('dword[rbx]', Memory(4, rbx)), ('xword ptr[rcx]', Memory(16, rcx)), ('xmmword ptr[rcx]', Memory(16, rcx)), @@ -344,11 +361,36 @@ def parseMemoryTest(): ('[0x12345]', Memory(0, None, None, 0, 0x12345)), ('yword [rax+rdx*4]', Memory(32, rax, rdx, 4)), ('zword [rax+rdx*4+123]', Memory(64, rax, rdx, 4, 123)), + ('xword_b [rax]', Memory(16, rax, None, 0, 0, 1)), + ('dword [rax]{1to4}', Memory(16, rax, None, 0, 0, 1)), + ('yword_b [rax]', Memory(32, rax, None, 0, 0, 1)), + ('dword [rax]{1to8}', Memory(32, rax, None, 0, 0, 1)), ] for (s, expected) in tbl: my = parseMemory(s) assertEqualStr(my, expected) + print('compare test') + tbl = [ + ('ptr[rax]', 'dword[rax]', True), + ('byte[rax]', 'dword[rax]', False), + ('yword_b[rax]', 'dword [rax]{1to8}', True), + ('yword_b[rax]', 'word [rax]{1to16}', True), + ('zword_b[rax]', 'word [rax]{1to32}', True), + ('zword_b[rax]', 'word [rax]{1to16}', False), + ('dword [rax]{1to2}', 'dword [rax] {1to4}', False), + ('zword_b[rax]', 'xword_b [rax]', False), + ('ptr_b[rax]', 'word [rax]{1to32}', True), # ignore size + ] + for (lhs, rhs, eq) in tbl: + a = parseMemory(lhs) + b = parseMemory(rhs) + if eq: + assertEqual(a, b) + assertEqual(b, a) + else: + assert(parseMemory(lhs) != parseMemory(rhs)) + def parseNmemonicTest(): print('parseNmemonicTest') tbl = [ @@ -364,8 +406,8 @@ def parseNmemonicTest(): ('vpcompressw(zmm30 | k2 |T_z, zmm1);', Nmemonic('vpcompressw', [zmm30, zmm1], [k2, T_z])), ('vpcompressw zmm30{k2}{z}, zmm1', Nmemonic('vpcompressw', [zmm30, zmm1], [k2, T_z])), ('vpshldw(xmm9|k3|T_z, xmm2, ptr [rax + 0x40], 5);', Nmemonic('vpshldw', [xmm9, xmm2, Memory(0, rax, None, 0, 0x40), 5], [k3, T_z])), - ('vpshrdd(xmm5|k3|T_z, xmm2, ptr_b [rax + 0x40], 5);', Nmemonic('vpshrdd', [xmm5, xmm2, Memory(0, rax, None, 0, 0x40, True), 5], [k3, T_z])), - ('vpshrdd xmm5{k3}{z}, xmm2, dword ptr [rax+0x40]{1to4}, 0x5', Nmemonic('vpshrdd', [xmm5, xmm2, Memory(0, rax, None, 0, 0x40, True), 5], [k3, T_z])), + ('vpshrdd(xmm5|k3|T_z, xmm2, ptr_b [rax + 0x40], 5);', Nmemonic('vpshrdd', [xmm5, xmm2, Memory(0, rax, None, 0, 0x40, 1), 5], [k3, T_z])), + ('vpshrdd xmm5{k3}{z}, xmm2, dword ptr [rax+0x40]{1to4}, 0x5', Nmemonic('vpshrdd', [xmm5, xmm2, Memory(0, rax, None, 0, 0x40, 4), 5], [k3, T_z])), ('vcmpph(k1, xmm15, ptr[rax+64], 1);', Nmemonic('vcmpph', [k1, xmm15, Memory(0, rax, None, 0, 64), 1])), ] for (s, expected) in tbl: diff --git a/test/test_by_xed_all.bat b/test/test_by_xed_all.bat new file mode 100644 index 0000000..bb57cb4 --- /dev/null +++ b/test/test_by_xed_all.bat @@ -0,0 +1,5 @@ +set TARGETS=old.txt new-ymm.txt bf16.txt misc.txt convert.txt minmax.txt saturation.txt +for %%f in (%TARGETS%) do ( + echo %%f + call test_by_xed.bat avx10\%%f +)
\ No newline at end of file |