aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorMITSUNARI Shigeo <[email protected]>2024-11-11 16:03:43 +0900
committerMITSUNARI Shigeo <[email protected]>2024-11-11 16:03:43 +0900
commit4e44f4614ddbf038f2a6296f5b906d5c72691e0f (patch)
treeb25625180accb6ca28387f9f5087346f7144d78e
parent2c02730b822d461aa2010f5b9cec6a8595243866 (diff)
parent2d6794ca7bcd9d6dec4fbd365ed3a20a594dc041 (diff)
downloadxbyak-master.tar.gz
xbyak-master.zip
Merge branch 'dev'HEADv7.22master
-rw-r--r--.github/workflows/main.yml11
-rw-r--r--CMakeLists.txt2
-rw-r--r--doc/changelog.md1
-rw-r--r--meson.build2
-rw-r--r--readme.md2
-rw-r--r--readme.txt3
-rw-r--r--test/Makefile3
-rw-r--r--test/avx10/bf16.txt18
-rw-r--r--test/cvt_test.cpp88
-rw-r--r--xbyak/xbyak.h23
-rw-r--r--xbyak/xbyak_mnemonic.h2
11 files changed, 110 insertions, 45 deletions
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 3d520a3..e2346bd 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -19,11 +19,14 @@ jobs:
steps:
- uses: actions/checkout@v4
- run: apt -y update
- - run: apt -y install g++-multilib libboost-dev make nasm yasm wget xz-utils python3
+ - run: apt -y install g++-multilib libboost-dev make nasm yasm wget python3 #xz-utils
- run: make test
- run: make -C sample CXXFLAGS="-DXBYAK_NO_EXCEPTION"
- run: |
cd test
- wget https://downloadmirror.intel.com/831748/sde-external-9.44.0-2024-08-22-lin.tar.xz
- tar xvf sde-external-9.44.0-2024-08-22-lin.tar.xz
- env XED=sde-external-9.44.0-2024-08-22-lin/xed64 make xed_test
+ #wget https://downloadmirror.intel.com/831748/sde-external-9.44.0-2024-08-22-lin.tar.xz
+ #tar xvf sde-external-9.44.0-2024-08-22-lin.tar.xz
+ wget https://github.com/herumi/xed-bin/raw/refs/heads/main/xed.tgz
+ tar xvf xed.tgz
+ ./xed -version
+ env XED=./xed make xed_test
diff --git a/CMakeLists.txt b/CMakeLists.txt
index d38ec83..2892588 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,6 +1,6 @@
cmake_minimum_required(VERSION 3.5)
-project(xbyak LANGUAGES CXX VERSION 7.21)
+project(xbyak LANGUAGES CXX VERSION 7.22)
file(GLOB headers xbyak/*.h)
diff --git a/doc/changelog.md b/doc/changelog.md
index 4406559..3a7fb85 100644
--- a/doc/changelog.md
+++ b/doc/changelog.md
@@ -1,5 +1,6 @@
# History
+* 2024/Nov/11 ver 7.22 add Reg::cvt{128,256,512}(). tested by xed 2024.11.04
* 2024/Oct/31 ver 7.21 Enhance XMM register validation in SSE instructions
* 2024/Oct/17 ver 7.20.1 Updated to comply with AVX10.2 specification rev 2.0
* 2024/Oct/15 ver 7.20 Fixed the specification of setDefaultEncoding, setDefaultEncodingAVX10.
diff --git a/meson.build b/meson.build
index b7b465e..f47e207 100644
--- a/meson.build
+++ b/meson.build
@@ -5,7 +5,7 @@
project(
'xbyak',
'cpp',
- version: '7.21',
+ version: '7.22',
license: 'BSD-3-Clause',
default_options: 'b_ndebug=if-release'
)
diff --git a/readme.md b/readme.md
index 63a0c00..073167c 100644
--- a/readme.md
+++ b/readme.md
@@ -1,5 +1,5 @@
-# Xbyak 7.21 [![Badge Build]][Build Status]
+# Xbyak 7.22 [![Badge Build]][Build Status]
*A JIT assembler for x86/x64 architectures supporting advanced instruction sets up to AVX10.2*
diff --git a/readme.txt b/readme.txt
index d9fef26..9cf8dd3 100644
--- a/readme.txt
+++ b/readme.txt
@@ -1,5 +1,5 @@
- C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 7.21
+ C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 7.22
-----------------------------------------------------------------------------
◎概要
@@ -404,6 +404,7 @@ sample/{echo,hello}.bfは http://www.kmonos.net/alang/etc/brainfuck.php から
-----------------------------------------------------------------------------
◎履歴
+2024/11/11 ver 7.22 Reg::cvt{128,256,512}(). xed 2024.11.04でテスト
2024/10/31 ver 7.21 SSE命令のXMMレジスタのチェックを厳密化
2024/10/17 ver 7.20.1 AVX10.2 rev 2.0仕様書の変更に追従
2024/10/15 ver 7.20 setDefaultEncoding/setDefaultEncodingAVX10の仕様確定
diff --git a/test/Makefile b/test/Makefile
index a61895f..cf5c716 100644
--- a/test/Makefile
+++ b/test/Makefile
@@ -60,8 +60,7 @@ apx: apx.cpp $(XBYAK_INC)
avx10_test: avx10_test.cpp $(XBYAK_INC)
$(CXX) $(CFLAGS) avx10_test.cpp -o $@ -DXBYAK64
-#TEST_FILES=old.txt new-ymm.txt bf16.txt comp.txt misc.txt convert.txt minmax.txt saturation.txt
-TEST_FILES=old.txt new-ymm.txt bf16.txt misc.txt convert.txt minmax.txt saturation.txt
+TEST_FILES=old.txt new-ymm.txt bf16.txt comp.txt misc.txt convert.txt minmax.txt saturation.txt
xed_test:
@set -e; \
for target in $(addprefix avx10/, $(TEST_FILES)); do \
diff --git a/test/avx10/bf16.txt b/test/avx10/bf16.txt
index a387c61..c544e02 100644
--- a/test/avx10/bf16.txt
+++ b/test/avx10/bf16.txt
@@ -113,17 +113,17 @@ vfpclasspbf16(k7|k5, zword_b[rax+128], 13);
vcomsbf16(xm2, xm3);
vcomsbf16(xm2, ptr[rax+128]);
-//vgetexppbf16(xm1|k3, xmm2);
-//vgetexppbf16(xm1|k3, ptr[rax+128]);
-//vgetexppbf16(xm1|k3, ptr_b[rax+128]);
+vgetexppbf16(xm1|k3, xmm2);
+vgetexppbf16(xm1|k3, ptr[rax+128]);
+vgetexppbf16(xm1|k3, ptr_b[rax+128]);
-//vgetexppbf16(ym1|k3, ymm2);
-//vgetexppbf16(ym1|k3, ptr[rax+128]);
-//vgetexppbf16(ym1|k3, ptr_b[rax+128]);
+vgetexppbf16(ym1|k3, ymm2);
+vgetexppbf16(ym1|k3, ptr[rax+128]);
+vgetexppbf16(ym1|k3, ptr_b[rax+128]);
-//vgetexppbf16(zm1|k3, zmm2);
-//vgetexppbf16(zm1|k3, ptr[rax+128]);
-//vgetexppbf16(zm1|k3, ptr_b[rax+128]);
+vgetexppbf16(zm1|k3, zmm2);
+vgetexppbf16(zm1|k3, ptr[rax+128]);
+vgetexppbf16(zm1|k3, ptr_b[rax+128]);
vgetmantpbf16(xm1|k3, xmm2, 3);
vgetmantpbf16(xm1|k3, ptr[rax+128], 5);
diff --git a/test/cvt_test.cpp b/test/cvt_test.cpp
index e5e9748..20befef 100644
--- a/test/cvt_test.cpp
+++ b/test/cvt_test.cpp
@@ -12,38 +12,45 @@ const struct Ptn {
Reg16 reg16;
Reg32 reg32;
Reg64 reg64;
+ Xmm x;
+ Ymm y;
+ Zmm z;
} tbl[] = {
- { &al, ax, eax, rax },
- { &bl, bx, ebx, rbx },
- { &cl, cx, ecx, rcx },
- { &dl, dx, edx, rdx },
- { &sil, si, esi, rsi },
- { &dil, di, edi, rdi },
- { &bpl, bp, ebp, rbp },
- { &spl, sp, esp, rsp },
- { &r8b, r8w, r8d, r8 },
- { &r9b, r9w, r9d, r9 },
- { &r10b, r10w, r10d, r10 },
- { &r11b, r11w, r11d, r11 },
- { &r12b, r12w, r12d, r12 },
- { &r13b, r13w, r13d, r13 },
- { &r14b, r14w, r14d, r14 },
- { &r15b, r15w, r15d, r15 },
+ { &al, ax, eax, rax, xmm0, ymm0, zmm0 },
+ { &bl, bx, ebx, rbx, xmm3, ymm3, zmm3 },
+ { &cl, cx, ecx, rcx, xmm1, ymm1, zmm1 },
+ { &dl, dx, edx, rdx, xmm2, ymm2, zmm2 },
+ { &sil, si, esi, rsi, xmm6, ymm6, zmm6 },
+ { &dil, di, edi, rdi, xmm7, ymm7, zmm7 },
+ { &bpl, bp, ebp, rbp, xmm5, ymm5, zmm5 },
+ { &spl, sp, esp, rsp, xmm4, ymm4, zmm4 },
+ { &r8b, r8w, r8d, r8, xmm8, ymm8, zmm8 },
+ { &r9b, r9w, r9d, r9, xmm9, ymm9, zmm9 },
+ { &r10b, r10w, r10d, r10, xmm10, ymm10, zmm10 },
+ { &r11b, r11w, r11d, r11, xmm11, ymm11, zmm11 },
+ { &r12b, r12w, r12d, r12, xmm12, ymm12, zmm12 },
+ { &r13b, r13w, r13d, r13, xmm13, ymm13, zmm13 },
+ { &r14b, r14w, r14d, r14, xmm14, ymm14, zmm14 },
+ { &r15b, r15w, r15d, r15, xmm15, ymm15, zmm15 },
+ { &r31b, r31w, r31d, r31, xmm31, ymm31, zmm31 },
};
#else
const struct Ptn {
const Reg8 *reg8;
Reg16 reg16;
Reg32 reg32;
+ Xmm x;
+ Ymm y;
+ Zmm z;
} tbl[] = {
- { &al, ax, eax },
- { &bl, bx, ebx },
- { &cl, cx, ecx },
- { &dl, dx, edx },
- { 0, si, esi },
- { 0, di, edi },
- { 0, bp, ebp },
- { 0, sp, esp },
+ { &al, ax, eax, xmm0, ymm0, zmm0 },
+ { &bl, bx, ebx, xmm3, ymm3, zmm3 },
+ { &cl, cx, ecx, xmm1, ymm1, zmm1 },
+ { &dl, dx, edx, xmm2, ymm2, zmm2 },
+ { 0, si, esi, xmm6, ymm6, zmm6 },
+ { 0, di, edi, xmm7, ymm7, zmm7 },
+ { 0, bp, ebp, xmm5, ymm5, zmm5 },
+ { 0, sp, esp, xmm4, ymm4, zmm4 },
};
#endif
@@ -54,13 +61,40 @@ CYBOZU_TEST_AUTO(cvt)
CYBOZU_TEST_ASSERT(tbl[i].reg8->cvt8() == *tbl[i].reg8);
CYBOZU_TEST_ASSERT(tbl[i].reg8->cvt16() == tbl[i].reg16);
CYBOZU_TEST_ASSERT(tbl[i].reg8->cvt32() == tbl[i].reg32);
+ CYBOZU_TEST_ASSERT(tbl[i].reg8->cvt128() == tbl[i].x);
+ CYBOZU_TEST_ASSERT(tbl[i].reg8->cvt256() == tbl[i].y);
+ CYBOZU_TEST_ASSERT(tbl[i].reg8->cvt512() == tbl[i].z);
CYBOZU_TEST_ASSERT(tbl[i].reg16.cvt8() == *tbl[i].reg8);
CYBOZU_TEST_ASSERT(tbl[i].reg32.cvt8() == *tbl[i].reg8);
+ CYBOZU_TEST_ASSERT(tbl[i].x.cvt8() == *tbl[i].reg8);
+ CYBOZU_TEST_ASSERT(tbl[i].y.cvt8() == *tbl[i].reg8);
+ CYBOZU_TEST_ASSERT(tbl[i].z.cvt8() == *tbl[i].reg8);
}
CYBOZU_TEST_ASSERT(tbl[i].reg16.cvt16() == tbl[i].reg16);
CYBOZU_TEST_ASSERT(tbl[i].reg16.cvt32() == tbl[i].reg32);
+ CYBOZU_TEST_ASSERT(tbl[i].reg16.cvt128() == tbl[i].x);
+ CYBOZU_TEST_ASSERT(tbl[i].reg16.cvt256() == tbl[i].y);
+ CYBOZU_TEST_ASSERT(tbl[i].reg16.cvt512() == tbl[i].z);
CYBOZU_TEST_ASSERT(tbl[i].reg32.cvt16() == tbl[i].reg16);
CYBOZU_TEST_ASSERT(tbl[i].reg32.cvt32() == tbl[i].reg32);
+ CYBOZU_TEST_ASSERT(tbl[i].reg32.cvt128() == tbl[i].x);
+ CYBOZU_TEST_ASSERT(tbl[i].reg32.cvt256() == tbl[i].y);
+ CYBOZU_TEST_ASSERT(tbl[i].reg32.cvt512() == tbl[i].z);
+ CYBOZU_TEST_ASSERT(tbl[i].x.cvt16() == tbl[i].reg16);
+ CYBOZU_TEST_ASSERT(tbl[i].x.cvt32() == tbl[i].reg32);
+ CYBOZU_TEST_ASSERT(tbl[i].x.cvt128() == tbl[i].x);
+ CYBOZU_TEST_ASSERT(tbl[i].x.cvt256() == tbl[i].y);
+ CYBOZU_TEST_ASSERT(tbl[i].x.cvt512() == tbl[i].z);
+ CYBOZU_TEST_ASSERT(tbl[i].y.cvt16() == tbl[i].reg16);
+ CYBOZU_TEST_ASSERT(tbl[i].y.cvt32() == tbl[i].reg32);
+ CYBOZU_TEST_ASSERT(tbl[i].y.cvt128() == tbl[i].x);
+ CYBOZU_TEST_ASSERT(tbl[i].y.cvt256() == tbl[i].y);
+ CYBOZU_TEST_ASSERT(tbl[i].y.cvt512() == tbl[i].z);
+ CYBOZU_TEST_ASSERT(tbl[i].z.cvt16() == tbl[i].reg16);
+ CYBOZU_TEST_ASSERT(tbl[i].z.cvt32() == tbl[i].reg32);
+ CYBOZU_TEST_ASSERT(tbl[i].z.cvt128() == tbl[i].x);
+ CYBOZU_TEST_ASSERT(tbl[i].z.cvt256() == tbl[i].y);
+ CYBOZU_TEST_ASSERT(tbl[i].y.cvt512() == tbl[i].z);
#ifdef XBYAK64
if (tbl[i].reg8) {
CYBOZU_TEST_ASSERT(tbl[i].reg64.cvt8() == *tbl[i].reg8);
@@ -69,8 +103,14 @@ CYBOZU_TEST_AUTO(cvt)
CYBOZU_TEST_ASSERT(tbl[i].reg64.cvt16() == tbl[i].reg16);
CYBOZU_TEST_ASSERT(tbl[i].reg64.cvt32() == tbl[i].reg32);
CYBOZU_TEST_ASSERT(tbl[i].reg64.cvt64() == tbl[i].reg64);
+ CYBOZU_TEST_ASSERT(tbl[i].reg64.cvt128() == tbl[i].x);
+ CYBOZU_TEST_ASSERT(tbl[i].reg64.cvt256() == tbl[i].y);
+ CYBOZU_TEST_ASSERT(tbl[i].reg64.cvt512() == tbl[i].z);
CYBOZU_TEST_ASSERT(tbl[i].reg16.cvt64() == tbl[i].reg64);
CYBOZU_TEST_ASSERT(tbl[i].reg32.cvt64() == tbl[i].reg64);
+ CYBOZU_TEST_ASSERT(tbl[i].x.cvt64() == tbl[i].reg64);
+ CYBOZU_TEST_ASSERT(tbl[i].y.cvt64() == tbl[i].reg64);
+ CYBOZU_TEST_ASSERT(tbl[i].z.cvt64() == tbl[i].reg64);
#endif
}
{
diff --git a/xbyak/xbyak.h b/xbyak/xbyak.h
index 28af005..ed7706a 100644
--- a/xbyak/xbyak.h
+++ b/xbyak/xbyak.h
@@ -155,7 +155,7 @@ namespace Xbyak {
enum {
DEFAULT_MAX_CODE_SIZE = 4096,
- VERSION = 0x7210 /* 0xABCD = A.BC(.D) */
+ VERSION = 0x7220 /* 0xABCD = A.BC(.D) */
};
#ifndef MIE_INTEGER_TYPE_DEFINED
@@ -786,6 +786,9 @@ class Label;
struct Reg8;
struct Reg16;
struct Reg32;
+struct Xmm;
+struct Ymm;
+struct Zmm;
#ifdef XBYAK64
struct Reg64;
#endif
@@ -801,6 +804,9 @@ public:
#ifdef XBYAK64
Reg64 cvt64() const;
#endif
+ Xmm cvt128() const;
+ Ymm cvt256() const;
+ Zmm cvt512() const;
Reg operator|(const ApxFlagNF&) const { Reg r(*this); r.setNF(); return r; }
Reg operator|(const ApxFlagZU&) const { Reg r(*this); r.setZU(); return r; }
};
@@ -938,6 +944,21 @@ inline Reg64 Reg::cvt64() const
}
#endif
+inline Xmm Reg::cvt128() const
+{
+ return Xmm(changeBit(128).getIdx());
+}
+
+inline Ymm Reg::cvt256() const
+{
+ return Ymm(changeBit(256).getIdx());
+}
+
+inline Zmm Reg::cvt512() const
+{
+ return Zmm(changeBit(512).getIdx());
+}
+
#ifndef XBYAK_DISABLE_SEGMENT
// not derived from Reg
class Segment {
diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h
index 64930ab..1f49146 100644
--- a/xbyak/xbyak_mnemonic.h
+++ b/xbyak/xbyak_mnemonic.h
@@ -1,4 +1,4 @@
-const char *getVersionString() const { return "7.21"; }
+const char *getVersionString() const { return "7.22"; }
void aadd(const Address& addr, const Reg32e &reg) { opMR(addr, reg, T_0F38, 0x0FC, T_APX); }
void aand(const Address& addr, const Reg32e &reg) { opMR(addr, reg, T_0F38|T_66, 0x0FC, T_APX|T_66); }
void adc(const Operand& op, uint32_t imm) { opOI(op, imm, 0x10, 2); }