aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--.github/workflows/main.yml5
-rw-r--r--CMakeLists.txt2
-rw-r--r--doc/changelog.md2
-rw-r--r--gen/gen_avx512.cpp4
-rw-r--r--meson.build2
-rw-r--r--readme.md2
-rw-r--r--readme.txt2
-rw-r--r--test/Makefile6
-rw-r--r--test/misc.cpp28
-rw-r--r--test/target/avx10.txt149
-rw-r--r--test/test_by_xed.cpp23
-rw-r--r--test/test_by_xed.py287
-rwxr-xr-xtest/test_by_xed.sh23
-rw-r--r--xbyak/xbyak.h2
-rw-r--r--xbyak/xbyak_mnemonic.h6
15 files changed, 516 insertions, 27 deletions
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 2e72a89..814a85b 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -1,5 +1,8 @@
name: test
-on: [push]
+on:
+ push:
+ branches:
+ - '*'
defaults:
run:
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 28b35cb..79b0f51 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,6 +1,6 @@
cmake_minimum_required(VERSION 3.5)
-project(xbyak LANGUAGES CXX VERSION 7.09)
+project(xbyak LANGUAGES CXX VERSION 7.09.1)
file(GLOB headers xbyak/*.h)
diff --git a/doc/changelog.md b/doc/changelog.md
index 0829929..10f6a9d 100644
--- a/doc/changelog.md
+++ b/doc/changelog.md
@@ -1,6 +1,6 @@
# History
-* 2024/Oct/08 ver 7.09 support YMM embedded rounding of AVX10.2 and fix some nmemonics with {sae}/{er}.
+* 2024/Oct/08 ver 7.09 support YMM embedded rounding of AVX10.2 and fix some mnemonics with {sae}/{er}.
* 2024/Oct/07 ver 7.08 support rdfsbase etc.
* 2024/Aug/29 ver 7.07.1 adapt to NASM 2.16.03 output of xchg (The functionality stays the same.)
* 2024/Jun/11 ver 7.07 support xresldtrk/xsusldtrk
diff --git a/gen/gen_avx512.cpp b/gen/gen_avx512.cpp
index 2b294ee..79ec79a 100644
--- a/gen/gen_avx512.cpp
+++ b/gen/gen_avx512.cpp
@@ -251,8 +251,8 @@ void putXM_X()
{ 0x8B, "vpcompressd", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 },
{ 0x8B, "vpcompressq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_N8 },
- { 0x63, "vcompressb", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N1 },
- { 0x63, "vcompressw", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_N2 },
+ { 0x63, "vpcompressb", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N1 },
+ { 0x63, "vpcompressw", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_N2 },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
diff --git a/meson.build b/meson.build
index 663c68b..0fea416 100644
--- a/meson.build
+++ b/meson.build
@@ -5,7 +5,7 @@
project(
'xbyak',
'cpp',
- version: '7.09',
+ version: '7.09.1',
license: 'BSD-3-Clause',
default_options: 'b_ndebug=if-release'
)
diff --git a/readme.md b/readme.md
index 12f5bc7..3ee7dd1 100644
--- a/readme.md
+++ b/readme.md
@@ -1,5 +1,5 @@
-# Xbyak 7.09 [![Badge Build]][Build Status]
+# Xbyak 7.09.1 [![Badge Build]][Build Status]
*A C++ JIT assembler for x86 (IA32), x64 (AMD64, x86-64)*
diff --git a/readme.txt b/readme.txt
index ae0f973..a82c408 100644
--- a/readme.txt
+++ b/readme.txt
@@ -1,5 +1,5 @@
- C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 7.09
+ C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 7.09.1
-----------------------------------------------------------------------------
◎概要
diff --git a/test/Makefile b/test/Makefile
index 862c110..ca2f0bb 100644
--- a/test/Makefile
+++ b/test/Makefile
@@ -60,6 +60,10 @@ apx: apx.cpp $(XBYAK_INC)
avx10_test: avx10_test.cpp $(XBYAK_INC)
$(CXX) $(CFLAGS) avx10_test.cpp -o $@ -DXBYAK64
+TEST_FILES=avx10.txt misc.txt
+xed_test:
+ @for target in $(addprefix target/, $(TEST_FILES)); do ./test_by_xed.sh $$target; done
+
test_nm: normalize_prefix $(TARGET)
$(MAKE) -C ../gen
ifneq ($(ONLY_64BIT),1)
@@ -118,7 +122,7 @@ test: detect_x32
$(MAKE) test_avx512
clean:
- $(RM) a.asm *.lst *.obj *.o $(TARGET) lib_run nm.cpp nm_frame make_512 avx10_test
+ $(RM) a.asm *.lst *.obj *.o $(TARGET) lib_run nm.cpp nm_frame make_512 avx10_test detect_x32
lib_run: lib_test.cpp lib_run.cpp lib.h
$(CXX) $(CFLAGS) lib_run.cpp lib_test.cpp -o lib_run
diff --git a/test/misc.cpp b/test/misc.cpp
index 18760d4..bc5083b 100644
--- a/test/misc.cpp
+++ b/test/misc.cpp
@@ -285,24 +285,24 @@ CYBOZU_TEST_AUTO(vpclmulqdq)
CYBOZU_TEST_EQUAL(c.getSize(), n);
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
}
-CYBOZU_TEST_AUTO(vcompressb_w)
+CYBOZU_TEST_AUTO(vpcompressb_w)
{
struct Code : Xbyak::CodeGenerator {
Code()
{
- vcompressb(ptr[rax + 64], xmm1);
- vcompressb(xmm30 | k5, xmm1);
- vcompressb(ptr[rax + 64], ymm1);
- vcompressb(ymm30 | k3 |T_z, ymm1);
- vcompressb(ptr[rax + 64], zmm1);
- vcompressb(zmm30 | k2 |T_z, zmm1);
-
- vcompressw(ptr[rax + 64], xmm1);
- vcompressw(xmm30 | k5, xmm1);
- vcompressw(ptr[rax + 64], ymm1);
- vcompressw(ymm30 | k3 |T_z, ymm1);
- vcompressw(ptr[rax + 64], zmm1);
- vcompressw(zmm30 | k2 |T_z, zmm1);
+ vpcompressb(ptr[rax + 64], xmm1);
+ vpcompressb(xmm30 | k5, xmm1);
+ vpcompressb(ptr[rax + 64], ymm1);
+ vpcompressb(ymm30 | k3 |T_z, ymm1);
+ vpcompressb(ptr[rax + 64], zmm1);
+ vpcompressb(zmm30 | k2 |T_z, zmm1);
+
+ vpcompressw(ptr[rax + 64], xmm1);
+ vpcompressw(xmm30 | k5, xmm1);
+ vpcompressw(ptr[rax + 64], ymm1);
+ vpcompressw(ymm30 | k3 |T_z, ymm1);
+ vpcompressw(ptr[rax + 64], zmm1);
+ vpcompressw(zmm30 | k2 |T_z, zmm1);
}
} c;
const uint8_t tbl[] = {
diff --git a/test/target/avx10.txt b/test/target/avx10.txt
new file mode 100644
index 0000000..8ee52ca
--- /dev/null
+++ b/test/target/avx10.txt
@@ -0,0 +1,149 @@
+vaddpd(ymm1, ymm2, ymm3 |T_rn_sae);
+vaddph(ymm1, ymm2, ymm3 |T_rn_sae);
+vaddps(ymm1, ymm2, ymm3 |T_rn_sae);
+vcmppd(k1, ymm2, ymm3 |T_sae, 3);
+vcmpph(k1, ymm2, ymm3 |T_sae, 3);
+vcmpps(k1, ymm2, ymm3 |T_sae, 3);
+vcvtdq2ph(xmm1, ymm2 |T_rn_sae);
+vcvtdq2ps(ymm1, ymm2 |T_rn_sae);
+vcvtpd2dq(xmm1, ymm2 |T_rn_sae);
+vcvtpd2ph(xmm1, ymm2 |T_rn_sae);
+vcvtpd2ps(xmm1, ymm2 |T_rn_sae);
+vcvtpd2qq(ymm1, ymm2 |T_rn_sae);
+vcvtpd2udq(xmm1, ymm2 |T_rn_sae);
+vcvtpd2uqq(ymm1, ymm2 |T_rn_sae);
+vcvtph2dq(ymm1, xmm2 |T_rn_sae);
+vcvtph2pd(ymm1, xmm2 |T_sae);
+vcvtph2ps(ymm1, xmm2 |T_sae);
+vcvtph2psx(ymm1, xmm2 |T_sae);
+vcvtph2qq(ymm1, xmm2 |T_rn_sae);
+vcvtph2udq(ymm1, xmm2 |T_rn_sae);
+vcvtph2uqq(ymm1, xmm2 |T_rn_sae);
+vcvtph2uw(ymm1, ymm2 |T_rn_sae);
+vcvtph2w(ymm1, ymm2 |T_rn_sae);
+vcvtps2dq(ymm1, ymm2 |T_rn_sae);
+vcvtps2pd(ymm1, xmm2 |T_sae);
+vcvtps2ph(xmm1, ymm2 |T_sae, 3);
+vcvtps2phx(xmm1, ymm2 |T_rn_sae);
+vcvtps2qq(ymm1, xmm2 |T_rn_sae);
+vcvtps2udq(ymm1, ymm2 |T_rn_sae);
+vcvtps2uqq(ymm1, xmm2 |T_rn_sae);
+vcvtqq2pd(ymm1, ymm2 |T_rn_sae);
+vcvtqq2ph(xmm1, ymm2 |T_rn_sae);
+vcvtqq2ps(xmm1, ymm2 |T_rn_sae);
+vcvttpd2dq(xmm1, ymm2 |T_sae);
+vcvttpd2qq(ymm1, ymm2 |T_sae);
+vcvttpd2udq(xmm1, ymm2 |T_sae);
+vcvttpd2uqq(ymm1, ymm2 |T_sae);
+vcvttph2dq(ymm1, xmm2 |T_sae);
+vcvttph2qq(ymm1, xmm2 |T_sae);
+vcvttph2udq(ymm1, xmm2 |T_sae);
+vcvttph2uqq(ymm1, xmm2 |T_sae);
+vcvttph2uw(ymm1, ymm2 |T_sae);
+vcvttph2w(ymm1, ymm2 |T_sae);
+vcvttps2dq(ymm1, ymm2 |T_sae);
+vcvttps2qq(ymm1, xmm2 |T_sae);
+vcvttps2udq(ymm1, ymm2 |T_sae);
+vcvttps2uqq(ymm1, xmm2 |T_sae);
+vcvtudq2ph(xmm1, ymm2 |T_rn_sae);
+vcvtudq2ps(ymm1, ymm2 |T_rn_sae);
+vcvtuqq2pd(ymm1, ymm2 |T_rn_sae);
+vcvtuqq2ph(xmm1, ymm2 |T_rn_sae);
+vcvtuqq2ps(xmm1, ymm2 |T_rn_sae);
+vcvtuw2ph(ymm1, ymm2 |T_rn_sae);
+vcvtw2ph(ymm1, ymm2 |T_rn_sae);
+vdivpd(ymm1, ymm2, ymm3 |T_rn_sae);
+vdivph(ymm1, ymm2, ymm3 |T_rn_sae);
+vdivps(ymm1, ymm2, ymm3 |T_rn_sae);
+vfcmaddcph(ymm1, ymm2, ymm3 |T_rn_sae);
+vfcmulcph(ymm1, ymm2, ymm3 |T_rn_sae);
+vfixupimmpd(ymm1, ymm2, ymm3 |T_sae, 3);
+vfixupimmps(ymm1, ymm2, ymm3 |T_sae, 3);
+vfmadd132pd(ymm1, ymm2, ymm3 |T_rn_sae);
+vfmadd132ph(ymm1, ymm2, ymm3 |T_rn_sae);
+vfmadd132ps(ymm1, ymm2, ymm3 |T_rn_sae);
+vfmadd213pd(ymm1, ymm2, ymm3 |T_rn_sae);
+vfmadd213ph(ymm1, ymm2, ymm3 |T_rn_sae);
+vfmadd213ps(ymm1, ymm2, ymm3 |T_rn_sae);
+vfmadd231pd(ymm1, ymm2, ymm3 |T_rn_sae);
+vfmadd231ph(ymm1, ymm2, ymm3 |T_rn_sae);
+vfmadd231ps(ymm1, ymm2, ymm3 |T_rn_sae);
+vfmaddcph(ymm1, ymm2, ymm3 |T_rn_sae);
+vfmaddsub132pd(ymm1, ymm2, ymm3 |T_rn_sae);
+vfmaddsub132ph(ymm1, ymm2, ymm3 |T_rn_sae);
+vfmaddsub132ps(ymm1, ymm2, ymm3 |T_rn_sae);
+vfmaddsub213pd(ymm1, ymm2, ymm3 |T_rn_sae);
+vfmaddsub213ph(ymm1, ymm2, ymm3 |T_rn_sae);
+vfmaddsub213ps(ymm1, ymm2, ymm3 |T_rn_sae);
+vfmaddsub231pd(ymm1, ymm2, ymm3 |T_rn_sae);
+vfmaddsub231ph(ymm1, ymm2, ymm3 |T_rn_sae);
+vfmaddsub231ps(ymm1, ymm2, ymm3 |T_rn_sae);
+vfmsub132pd(ymm1, ymm2, ymm3 |T_rn_sae);
+vfmsub132ph(ymm1, ymm2, ymm3 |T_rn_sae);
+vfmsub132ps(ymm1, ymm2, ymm3 |T_rn_sae);
+vfmsub213pd(ymm1, ymm2, ymm3 |T_rn_sae);
+vfmsub213ph(ymm1, ymm2, ymm3 |T_rn_sae);
+vfmsub213ps(ymm1, ymm2, ymm3 |T_rn_sae);
+vfmsub231pd(ymm1, ymm2, ymm3 |T_rn_sae);
+vfmsub231ph(ymm1, ymm2, ymm3 |T_rn_sae);
+vfmsub231ps(ymm1, ymm2, ymm3 |T_rn_sae);
+vfmsubadd132pd(ymm1, ymm2, ymm3 |T_rn_sae);
+vfmsubadd132ph(ymm1, ymm2, ymm3 |T_rn_sae);
+vfmsubadd132ps(ymm1, ymm2, ymm3 |T_rn_sae);
+vfmsubadd213pd(ymm1, ymm2, ymm3 |T_rn_sae);
+vfmsubadd213ph(ymm1, ymm2, ymm3 |T_rn_sae);
+vfmsubadd213ps(ymm1, ymm2, ymm3 |T_rn_sae);
+vfmsubadd231pd(ymm1, ymm2, ymm3 |T_rn_sae);
+vfmsubadd231ph(ymm1, ymm2, ymm3 |T_rn_sae);
+vfmsubadd231ps(ymm1, ymm2, ymm3 |T_rn_sae);
+vfmulcph(ymm1, ymm2, ymm3 |T_rn_sae);
+vfnmadd132pd(ymm1, ymm2, ymm3 |T_rn_sae);
+vfnmadd132ph(ymm1, ymm2, ymm3 |T_rn_sae);
+vfnmadd132ps(ymm1, ymm2, ymm3 |T_rn_sae);
+vfnmadd213pd(ymm1, ymm2, ymm3 |T_rn_sae);
+vfnmadd213ph(ymm1, ymm2, ymm3 |T_rn_sae);
+vfnmadd213ps(ymm1, ymm2, ymm3 |T_rn_sae);
+vfnmadd231pd(ymm1, ymm2, ymm3 |T_rn_sae);
+vfnmadd231ph(ymm1, ymm2, ymm3 |T_rn_sae);
+vfnmadd231ps(ymm1, ymm2, ymm3 |T_rn_sae);
+vfnmsub132pd(ymm1, ymm2, ymm3 |T_rn_sae);
+vfnmsub132ph(ymm1, ymm2, ymm3 |T_rn_sae);
+vfnmsub132ps(ymm1, ymm2, ymm3 |T_rn_sae);
+vfnmsub213pd(ymm1, ymm2, ymm3 |T_rn_sae);
+vfnmsub213ph(ymm1, ymm2, ymm3 |T_rn_sae);
+vfnmsub213ps(ymm1, ymm2, ymm3 |T_rn_sae);
+vfnmsub231pd(ymm1, ymm2, ymm3 |T_rn_sae);
+vfnmsub231ph(ymm1, ymm2, ymm3 |T_rn_sae);
+vfnmsub231ps(ymm1, ymm2, ymm3 |T_rn_sae);
+vgetexppd(ymm1, ymm2 |T_sae);
+vgetexpph(ymm1, ymm2 |T_sae);
+vgetexpps(ymm1, ymm2 |T_sae);
+vgetmantpd(ymm1, ymm2 |T_sae, 3);
+vgetmantph(ymm1, ymm2 |T_sae, 3);
+vgetmantps(ymm1, ymm2 |T_sae, 3);
+vmaxpd(ymm1, ymm2, ymm3 |T_sae);
+vmaxph(ymm1, ymm2, ymm3 |T_sae);
+vmaxps(ymm1, ymm2, ymm3 |T_sae);
+vminpd(ymm1, ymm2, ymm3 |T_sae);
+vminph(ymm1, ymm2, ymm3 |T_sae);
+vminps(ymm1, ymm2, ymm3 |T_sae);
+vmulpd(ymm1, ymm2, ymm3 |T_rn_sae);
+vmulph(ymm1, ymm2, ymm3 |T_rn_sae);
+vmulps(ymm1, ymm2, ymm3 |T_rn_sae);
+vrangepd(ymm1, ymm2, ymm3 |T_sae, 3);
+vrangeps(ymm1, ymm2, ymm3 |T_sae, 3);
+vreducepd(ymm1, ymm2 |T_sae, 3);
+vreduceph(ymm1, ymm2 |T_sae, 3);
+vreduceps(ymm1, ymm2 |T_sae, 3);
+vrndscalepd(ymm1, ymm2 |T_sae, 3);
+vrndscaleph(ymm1, ymm2 |T_sae, 3);
+vrndscaleps(ymm1, ymm2 |T_sae, 3);
+vscalefpd(ymm1, ymm2, ymm3 |T_rn_sae);
+vscalefph(ymm1, ymm2, ymm3 |T_rn_sae);
+vscalefps(ymm1, ymm2, ymm3 |T_rn_sae);
+vsqrtpd(ymm1, ymm2 |T_rn_sae);
+vsqrtph(ymm1, ymm2 |T_rn_sae);
+vsqrtps(ymm1, ymm2 |T_rn_sae);
+vsubpd(ymm1, ymm2, ymm3 |T_rn_sae);
+vsubph(ymm1, ymm2, ymm3 |T_rn_sae);
+vsubps(ymm1, ymm2, ymm3 |T_rn_sae);
diff --git a/test/test_by_xed.cpp b/test/test_by_xed.cpp
new file mode 100644
index 0000000..08dc8af
--- /dev/null
+++ b/test/test_by_xed.cpp
@@ -0,0 +1,23 @@
+#include <stdio.h>
+#include <xbyak/xbyak.h>
+
+struct Code : Xbyak::CodeGenerator {
+ Code()
+ {
+#include "cpp.txt"
+ }
+};
+
+int main()
+ try
+{
+ Code c;
+ FILE *fp = fopen("bin", "wb");
+ if (fp) {
+ fwrite(c.getCode(), 1, c.getSize(), fp);
+ fclose(fp);
+ }
+} catch (std::exception& e) {
+ printf("ERR %s\n", e.what());
+ return 1;
+}
diff --git a/test/test_by_xed.py b/test/test_by_xed.py
new file mode 100644
index 0000000..f24d7f6
--- /dev/null
+++ b/test/test_by_xed.py
@@ -0,0 +1,287 @@
+import re
+import math
+import sys
+
+class Reg:
+ def __init__(self, s):
+ self.name = s
+ def __str__(self):
+ return self.name
+
+g_regTbl = '''
+eax ecx edx ebx esp ebp esi edi
+ax cx dx bx sp bp si di
+al cl dl bl ah ch dh bh
+k1 k2 k3 k4 k5 k6 k7
+rax rcx rdx rbx rsp rbp rsi rdi r8 r9 r10 r11 r12 r13 r14 r15
+r16 r17 r18 r19 r20 r21 r22 r23 r24 r25 r26 r27 r28 r29 r30 r31
+r8d r9d r10d r11d r12d r13d r14d r15d
+r16d r17d r18d r19d r20d r21d r22d r23d r24d r25d r26d r27d r28d r29d r30d r31d
+r8w r9w r10w r11w r12w r13w r14w r15w
+r16w r17w r18w r19w r20w r21w r22w r23w r24w r25w r26w r27w r28w r29w r30w r31w
+r8b r9b r10b r11b r12b r13b r14b r15b
+r16b r17b r18b r19b r20b r21b r22b r23b r24b r25b r26b r27b r28b r29b r30b r31b
+spl bpl sil dil
+xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7
+xmm8 xmm9 xmm10 xmm11 xmm12 xmm13 xmm14 xmm15
+xmm16 xmm17 xmm18 xmm19 xmm20 xmm21 xmm22 xmm23
+xmm24 xmm25 xmm26 xmm27 xmm28 xmm29 xmm30 xmm31
+ymm0 ymm1 ymm2 ymm3 ymm4 ymm5 ymm6 ymm7
+ymm8 ymm9 ymm10 ymm11 ymm12 ymm13 ymm14 ymm15
+ymm16 ymm17 ymm18 ymm19 ymm20 ymm21 ymm22 ymm23
+ymm24 ymm25 ymm26 ymm27 ymm28 ymm29 ymm30 ymm31
+zmm0 zmm1 zmm2 zmm3 zmm4 zmm5 zmm6 zmm7
+zmm8 zmm9 zmm10 zmm11 zmm12 zmm13 zmm14 zmm15
+zmm16 zmm17 zmm18 zmm19 zmm20 zmm21 zmm22 zmm23
+zmm24 zmm25 zmm26 zmm27 zmm28 zmm29 zmm30 zmm31
+'''.split()
+
+# define global constants
+for e in g_regTbl:
+ globals()[e] = Reg(e)
+
+g_replaceCharTbl = '{}();|,'
+g_replaceChar = str.maketrans(g_replaceCharTbl, ' '*len(g_replaceCharTbl))
+g_sizeTbl = ['byte', 'word', 'dword', 'qword', 'xword', 'yword', 'zword']
+g_attrTbl = ['T_sae', 'T_rn_sae', 'T_rd_sae', 'T_ru_sae', 'T_rz_sae'] #, 'T_z']
+g_attrXedTbl = ['sae', 'rne-sae', 'rd-sae', 'ru-sae', 'rz-sae']
+
+class Attr:
+ def __init__(self, s):
+ self.name = s
+ def __str__(self):
+ return self.name
+
+for e in g_attrTbl:
+ globals()[e] = Attr(e)
+
+class Memory:
+ def __init__(self, size=0, base=None, index=None, scale=0, disp=0):
+ self.size = size
+ self.base = base
+ self.index = index
+ self.scale = scale
+ self.disp = disp
+
+ def __str__(self):
+ s = 'ptr' if self.size == 0 else g_sizeTbl[int(math.log2(self.size))]
+ s += ' ['
+ needPlus = False
+ if self.base:
+ s += str(self.base)
+ needPlus = True
+ if self.index:
+ if needPlus:
+ s += '+'
+ s += str(self.index)
+ if self.scale > 1:
+ s += f'*{self.scale}'
+ needPlus = True
+ if self.disp:
+ if needPlus:
+ s += '+'
+ s += hex(self.disp)
+ s += ']'
+ return s
+
+
+ def __eq__(self, rhs):
+ return str(self) == str(rhs)
+
+def parseMemory(s):
+ sizeTbl = {
+ 'byte': 1, 'word': 2, 'dword': 4, 'qword': 8,
+ 'xword': 16, 'yword': 32, 'zword': 64
+ }
+
+ s = s.replace(' ', '').lower()
+
+ # Parse size
+ size = 0
+ for i in range(len(g_sizeTbl)):
+ w = g_sizeTbl[i]
+ if s.startswith(w):
+ size = 1<<i
+ s = s[len(w):]
+
+ # Remove 'ptr' if present
+ if s.startswith('ptr'):
+ s = s[3:]
+
+ # Extract the content inside brackets
+ r = re.match(r'\[(.*)\]', s)
+ if not r:
+ raise ValueError(f'bad format {s=}')
+
+ # Parse components
+ elems = re.findall(r'([a-z0-9]+)(?:\*([0-9]+))?|([+-])', r.group(1))
+
+ base = index = None
+ scale = 0
+ disp = 0
+
+ for i, e in enumerate(elems):
+ if e[2]: # This is a '+' or '-' sign
+ continue
+
+ if e[0].isalpha():
+ if base is None and (not e[1] or int(e[1]) == 1):
+ base = e[0]
+ elif index is None:
+ index = e[0]
+ scale = int(e[1]) if e[1] else 1
+ else:
+ raise ValueError(f'bad format2 {s=}')
+ else:
+ sign = -1 if i > 0 and elems[i-1][2] == '-' else 1
+ b = 16 if e[0].startswith('0x') else 10
+ disp += sign * int(e[0], b)
+
+ return Memory(size, base, index, scale, disp)
+
+class Nmemonic:
+ def __init__(self, name, args=[], attrs=[]):
+ self.name = name
+ self.args = args
+ self.attrs = attrs
+ def __str__(self):
+ s = f'{self.name}('
+ for i in range(len(self.args)):
+ if i > 0:
+ s += ', '
+ s += str(self.args[i])
+ for e in self.attrs:
+ s += f'|{e}'
+ s += ');'
+ return s
+
+def parseNmemonic(s):
+ s = s.translate(g_replaceChar)
+
+ # reconstruct memory string
+ v = []
+ inMemory = False
+ for e in s.split():
+ if inMemory:
+ v[-1] += e
+ if ']' in e:
+ inMemory = False
+ else:
+ v.append(e)
+ if e in g_sizeTbl or e == 'ptr':
+ v[-1] += ' ' # to avoid 'byteptr'
+ inMemory = True
+
+ name = v[0]
+ args = []
+ attrs = []
+ for e in v[1:]:
+ if e.startswith('0x'):
+ args.append(int(e, 16))
+ elif e[0] in '0123456789':
+ args.append(int(e))
+ elif e in g_attrTbl:
+ attrs.append(Attr(e))
+ elif e in g_attrXedTbl:
+ attrs.append(Attr(g_attrTbl[g_attrXedTbl.index(e)]))
+ elif e in g_regTbl:
+ args.append(e)
+ else:
+ args.append(parseMemory(e))
+ return Nmemonic(name, args, attrs)
+
+def loadFile(name):
+ with open(name) as f:
+ r = []
+ for line in f.read().split('\n'):
+ if line:
+ if line[0] == '#':
+ continue
+ r.append(line)
+ return r
+
+# remove top 5 information
+# e.g. XDIS 0: AVX512 AVX512EVEX 62F1E91858CB vaddpd ymm1{rne-sae}, ymm2, ymm3
+def removeExtraInfo(s):
+ v = s.split()
+ return ' '.join(v[5:])
+
+def run(cppText, xedText):
+ cpp = loadFile(cppText)
+ xed = loadFile(xedText)
+ for i in range(len(cpp)):
+ line1 = cpp[i]
+ line2 = removeExtraInfo(xed[i])
+ m1 = parseNmemonic(line1)
+ m2 = parseNmemonic(line2)
+
+ assertEqualStr(m1, m2, f'{i}')
+ print('run ok')
+
+def assertEqualStr(a, b, msg=None):
+ if str(a) != str(b):
+ raise Exception(f'assert fail {msg}:', str(a), str(b))
+
+def MemoryTest():
+ tbl = [
+ (Memory(0, rax), 'ptr [rax]'),
+ (Memory(4, rax), 'dword [rax]'),
+ (Memory(8, rax, rcx), 'qword [rax+rcx]'),
+ (Memory(8, rax, rcx, 4), 'qword [rax+rcx*4]'),
+ (Memory(8, None, rcx, 4), 'qword [rcx*4]'),
+ (Memory(8, rax, None, 0, 5), 'qword [rax+0x5]'),
+ (Memory(8, None, None, 0, 255), 'qword [0xff]'),
+ ]
+ for (m, expected) in tbl:
+ assertEqualStr(m, expected)
+
+def parseMemoryTest():
+ print('parseMemoryTest')
+ tbl = [
+ ('[]', Memory()),
+ ('[rax]', Memory(0, rax)),
+ ('ptr[rax]', Memory(0, rax)),
+ ('dword[rbx]', Memory(4, rbx)),
+ ('xword ptr[rcx]', Memory(16, rcx)),
+ ('xword ptr[rdx*8]', Memory(16, None, rdx, 8)),
+ ('[12345]', Memory(0, None, None, 0, 12345)),
+ ('[0x12345]', Memory(0, None, None, 0, 0x12345)),
+ ('yword [rax+rdx*4]', Memory(32, rax, rdx, 4)),
+ ('zword [rax+rdx*4+123]', Memory(64, rax, rdx, 4, 123)),
+ ]
+ for (s, expected) in tbl:
+ my = parseMemory(s)
+ assertEqualStr(my, expected)
+
+def parseNmemonicTest():
+ print('parseNmemonicTest')
+ tbl = [
+ ('vaddpd(ymm1, ymm2, ymm3 |T_rn_sae);', Nmemonic('vaddpd', [ymm1, ymm2, ymm3], [T_rn_sae])),
+ ('vaddpd ymm1{rne-sae}, ymm2, ymm3', Nmemonic('vaddpd', [ymm1, ymm2, ymm3], [T_rn_sae])),
+ ('mov(rax, dword ptr [rcx + rdx * 8 ] );', Nmemonic('mov', [rax, Memory(4, rcx, rdx, 8)])),
+ ('mov(rax, ptr [rcx + rdx * 8 ] );', Nmemonic('mov', [rax, Memory(0, rcx, rdx, 8)])),
+ ('vcmppd(k1, ymm2, ymm3 |T_sae, 3);', Nmemonic('vcmppd', [k1, ymm2, ymm3, 3], [T_sae])),
+ ('vcmppd k1{sae}, ymm2, ymm3, 0x3', Nmemonic('vcmppd', [k1, ymm2, ymm3, 3], [T_sae])),
+ ]
+ for (s, expected) in tbl:
+ e = parseNmemonic(s)
+ assertEqualStr(e, expected)
+
+def test():
+ print('test start')
+ MemoryTest()
+ parseMemoryTest()
+ parseNmemonicTest()
+ print('test end')
+
+def main():
+ if len(sys.argv) == 2 and sys.argv[1] == 'test':
+ test()
+ elif len(sys.argv) == 3:
+ run(sys.argv[1], sys.argv[2])
+ else:
+ print(f'{__name__} <cpp-text> <xed-text> # compare cpp-text and xed-text generated by xed')
+ print(f'{__name__} test # for test')
+
+if __name__ == '__main__':
+ main()
diff --git a/test/test_by_xed.sh b/test/test_by_xed.sh
new file mode 100755
index 0000000..6d820bd
--- /dev/null
+++ b/test/test_by_xed.sh
@@ -0,0 +1,23 @@
+#!/bin/sh
+
+set -e
+XED=${XED:=xed}
+CXX=${CXX:=g++}
+PYTHON=${PYTHON:=python3}
+
+if [ $# -ne 1 ]; then
+ echo "./test_by_xed.sh <xbyak-cpp>"
+ exit 1
+fi
+
+TARGET=$1
+
+CFLAGS="-Wall -Wextra -I ../"
+
+echo "test:" $TARGET
+cp $TARGET cpp.txt
+$CXX $CFLAGS test_by_xed.cpp -o test_by_xed
+./test_by_xed
+$XED -64 -ir bin > out.txt
+$PYTHON test_by_xed.py cpp.txt out.txt
+
diff --git a/xbyak/xbyak.h b/xbyak/xbyak.h
index c9b6269..f0d99db 100644
--- a/xbyak/xbyak.h
+++ b/xbyak/xbyak.h
@@ -155,7 +155,7 @@ namespace Xbyak {
enum {
DEFAULT_MAX_CODE_SIZE = 4096,
- VERSION = 0x7090 /* 0xABCD = A.BC(.D) */
+ VERSION = 0x7091 /* 0xABCD = A.BC(.D) */
};
#ifndef MIE_INTEGER_TYPE_DEFINED
diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h
index a1b61db..8316bd9 100644
--- a/xbyak/xbyak_mnemonic.h
+++ b/xbyak/xbyak_mnemonic.h
@@ -1,4 +1,4 @@
-const char *getVersionString() const { return "7.09"; }
+const char *getVersionString() const { return "7.09.1"; }
void aadd(const Address& addr, const Reg32e &reg) { opMR(addr, reg, T_0F38, 0x0FC, T_APX); }
void aand(const Address& addr, const Reg32e &reg) { opMR(addr, reg, T_0F38|T_66, 0x0FC, T_APX|T_66); }
void adc(const Operand& op, uint32_t imm) { opOI(op, imm, 0x10, 2); }
@@ -2198,10 +2198,8 @@ void vcmpunordps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x
void vcmpunordsd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 3); }
void vcmpunordss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 3); }
void vcomish(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_MAP5 | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N2, 0x2F); }
-void vcompressb(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N1|T_66|T_0F38|T_EW0|T_YMM|T_MUST_EVEX, 0x63); }
void vcompresspd(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N8|T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX, 0x8A); }
void vcompressps(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N4|T_66|T_0F38|T_EW0|T_YMM|T_MUST_EVEX, 0x8A); }
-void vcompressw(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N2|T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX, 0x63); }
void vcvtdq2ph(const Xmm& x, const Operand& op) { checkCvt4(x, op); opCvt(x, op, T_N16|T_N_VL|T_MAP5|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B32, 0x5B); }
void vcvtne2ps2bf16(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F2|T_0F38|T_EW0|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B32, 0x72); }
void vcvtpd2ph(const Xmm& x, const Operand& op) { opCvt5(x, op, T_N16|T_N_VL|T_66|T_MAP5|T_EW1|T_ER_Z|T_MUST_EVEX|T_B64, 0x5A); }
@@ -2409,8 +2407,10 @@ void vpcmpud(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { op
void vpcmpuq(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_66|T_0F3A|T_EW1|T_YMM|T_MUST_EVEX|T_B64, 0x1E, imm); }
void vpcmpuw(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_66|T_0F3A|T_EW1|T_YMM|T_MUST_EVEX, 0x3E, imm); }
void vpcmpw(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_66|T_0F3A|T_EW1|T_YMM|T_MUST_EVEX, 0x3F, imm); }
+void vpcompressb(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N1|T_66|T_0F38|T_EW0|T_YMM|T_MUST_EVEX, 0x63); }
void vpcompressd(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N4|T_66|T_0F38|T_EW0|T_YMM|T_MUST_EVEX, 0x8B); }
void vpcompressq(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N8|T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX, 0x8B); }
+void vpcompressw(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N2|T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX, 0x63); }
void vpconflictd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_0F38|T_EW0|T_YMM|T_MUST_EVEX|T_B32, 0xC4); }
void vpconflictq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX|T_B64, 0xC4); }
void vpermb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW0|T_YMM|T_MUST_EVEX, 0x8D); }