diff options
author | MITSUNARI Shigeo <[email protected]> | 2024-11-11 16:03:43 +0900 |
---|---|---|
committer | MITSUNARI Shigeo <[email protected]> | 2024-11-11 16:03:43 +0900 |
commit | 4e44f4614ddbf038f2a6296f5b906d5c72691e0f (patch) | |
tree | b25625180accb6ca28387f9f5087346f7144d78e | |
parent | 2c02730b822d461aa2010f5b9cec6a8595243866 (diff) | |
parent | 2d6794ca7bcd9d6dec4fbd365ed3a20a594dc041 (diff) | |
download | xbyak-master.tar.gz xbyak-master.zip |
-rw-r--r-- | .github/workflows/main.yml | 11 | ||||
-rw-r--r-- | CMakeLists.txt | 2 | ||||
-rw-r--r-- | doc/changelog.md | 1 | ||||
-rw-r--r-- | meson.build | 2 | ||||
-rw-r--r-- | readme.md | 2 | ||||
-rw-r--r-- | readme.txt | 3 | ||||
-rw-r--r-- | test/Makefile | 3 | ||||
-rw-r--r-- | test/avx10/bf16.txt | 18 | ||||
-rw-r--r-- | test/cvt_test.cpp | 88 | ||||
-rw-r--r-- | xbyak/xbyak.h | 23 | ||||
-rw-r--r-- | xbyak/xbyak_mnemonic.h | 2 |
11 files changed, 110 insertions, 45 deletions
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 3d520a3..e2346bd 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -19,11 +19,14 @@ jobs: steps: - uses: actions/checkout@v4 - run: apt -y update - - run: apt -y install g++-multilib libboost-dev make nasm yasm wget xz-utils python3 + - run: apt -y install g++-multilib libboost-dev make nasm yasm wget python3 #xz-utils - run: make test - run: make -C sample CXXFLAGS="-DXBYAK_NO_EXCEPTION" - run: | cd test - wget https://downloadmirror.intel.com/831748/sde-external-9.44.0-2024-08-22-lin.tar.xz - tar xvf sde-external-9.44.0-2024-08-22-lin.tar.xz - env XED=sde-external-9.44.0-2024-08-22-lin/xed64 make xed_test + #wget https://downloadmirror.intel.com/831748/sde-external-9.44.0-2024-08-22-lin.tar.xz + #tar xvf sde-external-9.44.0-2024-08-22-lin.tar.xz + wget https://github.com/herumi/xed-bin/raw/refs/heads/main/xed.tgz + tar xvf xed.tgz + ./xed -version + env XED=./xed make xed_test diff --git a/CMakeLists.txt b/CMakeLists.txt index d38ec83..2892588 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,6 +1,6 @@ cmake_minimum_required(VERSION 3.5) -project(xbyak LANGUAGES CXX VERSION 7.21) +project(xbyak LANGUAGES CXX VERSION 7.22) file(GLOB headers xbyak/*.h) diff --git a/doc/changelog.md b/doc/changelog.md index 4406559..3a7fb85 100644 --- a/doc/changelog.md +++ b/doc/changelog.md @@ -1,5 +1,6 @@ # History +* 2024/Nov/11 ver 7.22 add Reg::cvt{128,256,512}(). tested by xed 2024.11.04 * 2024/Oct/31 ver 7.21 Enhance XMM register validation in SSE instructions * 2024/Oct/17 ver 7.20.1 Updated to comply with AVX10.2 specification rev 2.0 * 2024/Oct/15 ver 7.20 Fixed the specification of setDefaultEncoding, setDefaultEncodingAVX10. diff --git a/meson.build b/meson.build index b7b465e..f47e207 100644 --- a/meson.build +++ b/meson.build @@ -5,7 +5,7 @@ project( 'xbyak', 'cpp', - version: '7.21', + version: '7.22', license: 'BSD-3-Clause', default_options: 'b_ndebug=if-release' ) @@ -1,5 +1,5 @@ -# Xbyak 7.21 [![Badge Build]][Build Status] +# Xbyak 7.22 [![Badge Build]][Build Status] *A JIT assembler for x86/x64 architectures supporting advanced instruction sets up to AVX10.2* @@ -1,5 +1,5 @@ - C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 7.21
+ C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 7.22
-----------------------------------------------------------------------------
◎概要
@@ -404,6 +404,7 @@ sample/{echo,hello}.bfは http://www.kmonos.net/alang/etc/brainfuck.php から -----------------------------------------------------------------------------
◎履歴
+2024/11/11 ver 7.22 Reg::cvt{128,256,512}(). xed 2024.11.04でテスト
2024/10/31 ver 7.21 SSE命令のXMMレジスタのチェックを厳密化
2024/10/17 ver 7.20.1 AVX10.2 rev 2.0仕様書の変更に追従
2024/10/15 ver 7.20 setDefaultEncoding/setDefaultEncodingAVX10の仕様確定
diff --git a/test/Makefile b/test/Makefile index a61895f..cf5c716 100644 --- a/test/Makefile +++ b/test/Makefile @@ -60,8 +60,7 @@ apx: apx.cpp $(XBYAK_INC) avx10_test: avx10_test.cpp $(XBYAK_INC) $(CXX) $(CFLAGS) avx10_test.cpp -o $@ -DXBYAK64 -#TEST_FILES=old.txt new-ymm.txt bf16.txt comp.txt misc.txt convert.txt minmax.txt saturation.txt -TEST_FILES=old.txt new-ymm.txt bf16.txt misc.txt convert.txt minmax.txt saturation.txt +TEST_FILES=old.txt new-ymm.txt bf16.txt comp.txt misc.txt convert.txt minmax.txt saturation.txt xed_test: @set -e; \ for target in $(addprefix avx10/, $(TEST_FILES)); do \ diff --git a/test/avx10/bf16.txt b/test/avx10/bf16.txt index a387c61..c544e02 100644 --- a/test/avx10/bf16.txt +++ b/test/avx10/bf16.txt @@ -113,17 +113,17 @@ vfpclasspbf16(k7|k5, zword_b[rax+128], 13); vcomsbf16(xm2, xm3); vcomsbf16(xm2, ptr[rax+128]); -//vgetexppbf16(xm1|k3, xmm2); -//vgetexppbf16(xm1|k3, ptr[rax+128]); -//vgetexppbf16(xm1|k3, ptr_b[rax+128]); +vgetexppbf16(xm1|k3, xmm2); +vgetexppbf16(xm1|k3, ptr[rax+128]); +vgetexppbf16(xm1|k3, ptr_b[rax+128]); -//vgetexppbf16(ym1|k3, ymm2); -//vgetexppbf16(ym1|k3, ptr[rax+128]); -//vgetexppbf16(ym1|k3, ptr_b[rax+128]); +vgetexppbf16(ym1|k3, ymm2); +vgetexppbf16(ym1|k3, ptr[rax+128]); +vgetexppbf16(ym1|k3, ptr_b[rax+128]); -//vgetexppbf16(zm1|k3, zmm2); -//vgetexppbf16(zm1|k3, ptr[rax+128]); -//vgetexppbf16(zm1|k3, ptr_b[rax+128]); +vgetexppbf16(zm1|k3, zmm2); +vgetexppbf16(zm1|k3, ptr[rax+128]); +vgetexppbf16(zm1|k3, ptr_b[rax+128]); vgetmantpbf16(xm1|k3, xmm2, 3); vgetmantpbf16(xm1|k3, ptr[rax+128], 5); diff --git a/test/cvt_test.cpp b/test/cvt_test.cpp index e5e9748..20befef 100644 --- a/test/cvt_test.cpp +++ b/test/cvt_test.cpp @@ -12,38 +12,45 @@ const struct Ptn { Reg16 reg16; Reg32 reg32; Reg64 reg64; + Xmm x; + Ymm y; + Zmm z; } tbl[] = { - { &al, ax, eax, rax }, - { &bl, bx, ebx, rbx }, - { &cl, cx, ecx, rcx }, - { &dl, dx, edx, rdx }, - { &sil, si, esi, rsi }, - { &dil, di, edi, rdi }, - { &bpl, bp, ebp, rbp }, - { &spl, sp, esp, rsp }, - { &r8b, r8w, r8d, r8 }, - { &r9b, r9w, r9d, r9 }, - { &r10b, r10w, r10d, r10 }, - { &r11b, r11w, r11d, r11 }, - { &r12b, r12w, r12d, r12 }, - { &r13b, r13w, r13d, r13 }, - { &r14b, r14w, r14d, r14 }, - { &r15b, r15w, r15d, r15 }, + { &al, ax, eax, rax, xmm0, ymm0, zmm0 }, + { &bl, bx, ebx, rbx, xmm3, ymm3, zmm3 }, + { &cl, cx, ecx, rcx, xmm1, ymm1, zmm1 }, + { &dl, dx, edx, rdx, xmm2, ymm2, zmm2 }, + { &sil, si, esi, rsi, xmm6, ymm6, zmm6 }, + { &dil, di, edi, rdi, xmm7, ymm7, zmm7 }, + { &bpl, bp, ebp, rbp, xmm5, ymm5, zmm5 }, + { &spl, sp, esp, rsp, xmm4, ymm4, zmm4 }, + { &r8b, r8w, r8d, r8, xmm8, ymm8, zmm8 }, + { &r9b, r9w, r9d, r9, xmm9, ymm9, zmm9 }, + { &r10b, r10w, r10d, r10, xmm10, ymm10, zmm10 }, + { &r11b, r11w, r11d, r11, xmm11, ymm11, zmm11 }, + { &r12b, r12w, r12d, r12, xmm12, ymm12, zmm12 }, + { &r13b, r13w, r13d, r13, xmm13, ymm13, zmm13 }, + { &r14b, r14w, r14d, r14, xmm14, ymm14, zmm14 }, + { &r15b, r15w, r15d, r15, xmm15, ymm15, zmm15 }, + { &r31b, r31w, r31d, r31, xmm31, ymm31, zmm31 }, }; #else const struct Ptn { const Reg8 *reg8; Reg16 reg16; Reg32 reg32; + Xmm x; + Ymm y; + Zmm z; } tbl[] = { - { &al, ax, eax }, - { &bl, bx, ebx }, - { &cl, cx, ecx }, - { &dl, dx, edx }, - { 0, si, esi }, - { 0, di, edi }, - { 0, bp, ebp }, - { 0, sp, esp }, + { &al, ax, eax, xmm0, ymm0, zmm0 }, + { &bl, bx, ebx, xmm3, ymm3, zmm3 }, + { &cl, cx, ecx, xmm1, ymm1, zmm1 }, + { &dl, dx, edx, xmm2, ymm2, zmm2 }, + { 0, si, esi, xmm6, ymm6, zmm6 }, + { 0, di, edi, xmm7, ymm7, zmm7 }, + { 0, bp, ebp, xmm5, ymm5, zmm5 }, + { 0, sp, esp, xmm4, ymm4, zmm4 }, }; #endif @@ -54,13 +61,40 @@ CYBOZU_TEST_AUTO(cvt) CYBOZU_TEST_ASSERT(tbl[i].reg8->cvt8() == *tbl[i].reg8); CYBOZU_TEST_ASSERT(tbl[i].reg8->cvt16() == tbl[i].reg16); CYBOZU_TEST_ASSERT(tbl[i].reg8->cvt32() == tbl[i].reg32); + CYBOZU_TEST_ASSERT(tbl[i].reg8->cvt128() == tbl[i].x); + CYBOZU_TEST_ASSERT(tbl[i].reg8->cvt256() == tbl[i].y); + CYBOZU_TEST_ASSERT(tbl[i].reg8->cvt512() == tbl[i].z); CYBOZU_TEST_ASSERT(tbl[i].reg16.cvt8() == *tbl[i].reg8); CYBOZU_TEST_ASSERT(tbl[i].reg32.cvt8() == *tbl[i].reg8); + CYBOZU_TEST_ASSERT(tbl[i].x.cvt8() == *tbl[i].reg8); + CYBOZU_TEST_ASSERT(tbl[i].y.cvt8() == *tbl[i].reg8); + CYBOZU_TEST_ASSERT(tbl[i].z.cvt8() == *tbl[i].reg8); } CYBOZU_TEST_ASSERT(tbl[i].reg16.cvt16() == tbl[i].reg16); CYBOZU_TEST_ASSERT(tbl[i].reg16.cvt32() == tbl[i].reg32); + CYBOZU_TEST_ASSERT(tbl[i].reg16.cvt128() == tbl[i].x); + CYBOZU_TEST_ASSERT(tbl[i].reg16.cvt256() == tbl[i].y); + CYBOZU_TEST_ASSERT(tbl[i].reg16.cvt512() == tbl[i].z); CYBOZU_TEST_ASSERT(tbl[i].reg32.cvt16() == tbl[i].reg16); CYBOZU_TEST_ASSERT(tbl[i].reg32.cvt32() == tbl[i].reg32); + CYBOZU_TEST_ASSERT(tbl[i].reg32.cvt128() == tbl[i].x); + CYBOZU_TEST_ASSERT(tbl[i].reg32.cvt256() == tbl[i].y); + CYBOZU_TEST_ASSERT(tbl[i].reg32.cvt512() == tbl[i].z); + CYBOZU_TEST_ASSERT(tbl[i].x.cvt16() == tbl[i].reg16); + CYBOZU_TEST_ASSERT(tbl[i].x.cvt32() == tbl[i].reg32); + CYBOZU_TEST_ASSERT(tbl[i].x.cvt128() == tbl[i].x); + CYBOZU_TEST_ASSERT(tbl[i].x.cvt256() == tbl[i].y); + CYBOZU_TEST_ASSERT(tbl[i].x.cvt512() == tbl[i].z); + CYBOZU_TEST_ASSERT(tbl[i].y.cvt16() == tbl[i].reg16); + CYBOZU_TEST_ASSERT(tbl[i].y.cvt32() == tbl[i].reg32); + CYBOZU_TEST_ASSERT(tbl[i].y.cvt128() == tbl[i].x); + CYBOZU_TEST_ASSERT(tbl[i].y.cvt256() == tbl[i].y); + CYBOZU_TEST_ASSERT(tbl[i].y.cvt512() == tbl[i].z); + CYBOZU_TEST_ASSERT(tbl[i].z.cvt16() == tbl[i].reg16); + CYBOZU_TEST_ASSERT(tbl[i].z.cvt32() == tbl[i].reg32); + CYBOZU_TEST_ASSERT(tbl[i].z.cvt128() == tbl[i].x); + CYBOZU_TEST_ASSERT(tbl[i].z.cvt256() == tbl[i].y); + CYBOZU_TEST_ASSERT(tbl[i].y.cvt512() == tbl[i].z); #ifdef XBYAK64 if (tbl[i].reg8) { CYBOZU_TEST_ASSERT(tbl[i].reg64.cvt8() == *tbl[i].reg8); @@ -69,8 +103,14 @@ CYBOZU_TEST_AUTO(cvt) CYBOZU_TEST_ASSERT(tbl[i].reg64.cvt16() == tbl[i].reg16); CYBOZU_TEST_ASSERT(tbl[i].reg64.cvt32() == tbl[i].reg32); CYBOZU_TEST_ASSERT(tbl[i].reg64.cvt64() == tbl[i].reg64); + CYBOZU_TEST_ASSERT(tbl[i].reg64.cvt128() == tbl[i].x); + CYBOZU_TEST_ASSERT(tbl[i].reg64.cvt256() == tbl[i].y); + CYBOZU_TEST_ASSERT(tbl[i].reg64.cvt512() == tbl[i].z); CYBOZU_TEST_ASSERT(tbl[i].reg16.cvt64() == tbl[i].reg64); CYBOZU_TEST_ASSERT(tbl[i].reg32.cvt64() == tbl[i].reg64); + CYBOZU_TEST_ASSERT(tbl[i].x.cvt64() == tbl[i].reg64); + CYBOZU_TEST_ASSERT(tbl[i].y.cvt64() == tbl[i].reg64); + CYBOZU_TEST_ASSERT(tbl[i].z.cvt64() == tbl[i].reg64); #endif } { diff --git a/xbyak/xbyak.h b/xbyak/xbyak.h index 28af005..ed7706a 100644 --- a/xbyak/xbyak.h +++ b/xbyak/xbyak.h @@ -155,7 +155,7 @@ namespace Xbyak { enum { DEFAULT_MAX_CODE_SIZE = 4096, - VERSION = 0x7210 /* 0xABCD = A.BC(.D) */ + VERSION = 0x7220 /* 0xABCD = A.BC(.D) */ }; #ifndef MIE_INTEGER_TYPE_DEFINED @@ -786,6 +786,9 @@ class Label; struct Reg8; struct Reg16; struct Reg32; +struct Xmm; +struct Ymm; +struct Zmm; #ifdef XBYAK64 struct Reg64; #endif @@ -801,6 +804,9 @@ public: #ifdef XBYAK64 Reg64 cvt64() const; #endif + Xmm cvt128() const; + Ymm cvt256() const; + Zmm cvt512() const; Reg operator|(const ApxFlagNF&) const { Reg r(*this); r.setNF(); return r; } Reg operator|(const ApxFlagZU&) const { Reg r(*this); r.setZU(); return r; } }; @@ -938,6 +944,21 @@ inline Reg64 Reg::cvt64() const } #endif +inline Xmm Reg::cvt128() const +{ + return Xmm(changeBit(128).getIdx()); +} + +inline Ymm Reg::cvt256() const +{ + return Ymm(changeBit(256).getIdx()); +} + +inline Zmm Reg::cvt512() const +{ + return Zmm(changeBit(512).getIdx()); +} + #ifndef XBYAK_DISABLE_SEGMENT // not derived from Reg class Segment { diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h index 64930ab..1f49146 100644 --- a/xbyak/xbyak_mnemonic.h +++ b/xbyak/xbyak_mnemonic.h @@ -1,4 +1,4 @@ -const char *getVersionString() const { return "7.21"; } +const char *getVersionString() const { return "7.22"; } void aadd(const Address& addr, const Reg32e ®) { opMR(addr, reg, T_0F38, 0x0FC, T_APX); } void aand(const Address& addr, const Reg32e ®) { opMR(addr, reg, T_0F38|T_66, 0x0FC, T_APX|T_66); } void adc(const Operand& op, uint32_t imm) { opOI(op, imm, 0x10, 2); } |