diff options
author | MITSUNARI Shigeo <[email protected]> | 2022-11-25 18:44:43 +0900 |
---|---|---|
committer | MITSUNARI Shigeo <[email protected]> | 2022-11-25 18:44:43 +0900 |
commit | b5ac7b0f0a71dbb8d1adbbf3c1c0ec88df458531 (patch) | |
tree | 9069f6a9768ce87831af5bfd2f4b8add7ec7c793 | |
parent | 0ecef5c2847959192ca677a27faf08255055db42 (diff) | |
parent | 05dd400e02e06e80df68d276e3710741ede74a14 (diff) | |
download | xbyak-b5ac7b0f0a71dbb8d1adbbf3c1c0ec88df458531.tar.gz xbyak-b5ac7b0f0a71dbb8d1adbbf3c1c0ec88df458531.zip |
Merge branch 'dev'v6.66
-rw-r--r-- | CMakeLists.txt | 2 | ||||
-rw-r--r-- | doc/changelog.md | 1 | ||||
-rw-r--r-- | gen/Makefile | 2 | ||||
-rw-r--r-- | gen/gen_code.cpp | 17 | ||||
-rw-r--r-- | meson.build | 2 | ||||
-rw-r--r-- | readme.md | 2 | ||||
-rw-r--r-- | readme.txt | 3 | ||||
-rw-r--r-- | sample/Makefile | 7 | ||||
-rw-r--r-- | sample/quantize.cpp | 2 | ||||
-rw-r--r-- | sample/test_util.cpp | 1 | ||||
-rw-r--r-- | sample/toyvm.cpp | 8 | ||||
-rw-r--r-- | test/Makefile | 33 | ||||
-rw-r--r-- | test/make_nm.cpp | 8 | ||||
-rw-r--r-- | test/misc.cpp | 63 | ||||
-rw-r--r-- | test/noexception.cpp | 2 | ||||
-rwxr-xr-x | test/test_address.sh | 8 | ||||
-rwxr-xr-x | test/test_avx.sh | 7 | ||||
-rwxr-xr-x | test/test_avx512.sh | 7 | ||||
-rwxr-xr-x | test/test_nm.sh | 7 | ||||
-rw-r--r-- | xbyak/xbyak.h | 2 | ||||
-rw-r--r-- | xbyak/xbyak_mnemonic.h | 6 | ||||
-rw-r--r-- | xbyak/xbyak_util.h | 2 |
22 files changed, 143 insertions, 49 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index d786752..1d91b0e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,6 +1,6 @@ cmake_minimum_required(VERSION 2.6...3.0.2) -project(xbyak LANGUAGES CXX VERSION 6.65) +project(xbyak LANGUAGES CXX VERSION 6.66) file(GLOB headers xbyak/*.h) diff --git a/doc/changelog.md b/doc/changelog.md index b976c90..5e6fa9a 100644 --- a/doc/changelog.md +++ b/doc/changelog.md @@ -1,5 +1,6 @@ # History +* 2022/Nov/25 ver 6.66 support RAO-INT * 2022/Nov/22 ver 6.65 consider x32 * 2022/Nov/04 ver 6.64 some vmov* support addressing with mask * 2022/Oct/06 ver 6.63 vpmadd52{h,l}uq support AVX-IFMA diff --git a/gen/Makefile b/gen/Makefile index 97a6846..f254d71 100644 --- a/gen/Makefile +++ b/gen/Makefile @@ -1,6 +1,6 @@ TARGET=../xbyak/xbyak_mnemonic.h BIN=sortline gen_code gen_avx512 -CFLAGS=-I../ -O2 -DXBYAK_NO_OP_NAMES -Wall -Wextra -Wno-missing-field-initializers $(CXXFLAGS) $(CPPFLAGS) $(LDFLAGS) +CFLAGS=-I../ -O2 -Wall -Wextra -Wno-missing-field-initializers $(CXXFLAGS) $(CPPFLAGS) $(LDFLAGS) all: $(TARGET) ../CMakeLists.txt ../meson.build ../readme.md ../readme.txt sortline: sortline.cpp $(CXX) $(CFLAGS) $< -o $@ diff --git a/gen/gen_code.cpp b/gen/gen_code.cpp index b89009b..7a5c575 100644 --- a/gen/gen_code.cpp +++ b/gen/gen_code.cpp @@ -807,6 +807,23 @@ void put() printf("void %s(const Operand& op, uint32_t imm) { opRM_I(op, imm, 0x%02X, %d); }\n", p->name, p->code, p->ext); } } + { + const struct Tbl { + const char *name; + uint8_t prefix; + } tbl[] = { + { "aadd", 0 }, + { "aand", 0x66 }, + { "aor", 0xF2 }, + { "axor", 0xF3 }, + }; + for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { + const Tbl *p = &tbl[i]; + printf("void %s(const Address& addr, const Reg32e ®) { ", p->name); + if (p->prefix) printf("db(0x%02X); ", p->prefix); + printf("opModM(addr, reg, 0x0F, 0x38, 0x0FC); }\n"); + } + } { const struct Tbl { diff --git a/meson.build b/meson.build index 971126b..f296532 100644 --- a/meson.build +++ b/meson.build @@ -5,7 +5,7 @@ project( 'xbyak', 'cpp', - version: '6.65', + version: '6.66', license: 'BSD-3-Clause', default_options: 'b_ndebug=if-release' ) @@ -1,5 +1,5 @@ -# Xbyak 6.65 [![Badge Build]][Build Status] +# Xbyak 6.66 [![Badge Build]][Build Status] *A C++ JIT assembler for x86 (IA32), x64 (AMD64, x86-64)* @@ -1,5 +1,5 @@ - C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 6.65
+ C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 6.66
-----------------------------------------------------------------------------
◎概要
@@ -402,6 +402,7 @@ sample/{echo,hello}.bfは http://www.kmonos.net/alang/etc/brainfuck.php から -----------------------------------------------------------------------------
◎履歴
+2022/11/25 ver 6.66 RAO-INTサポート
2022/11/22 ver 6.65 x32動作確認
2022/11/04 ver 6.64 vmov*命令をmaskつきアドレッシング対応修正
2022/10/06 ver 6.63 AVX-IFMA用のvpmadd52{h,l}uq対応
diff --git a/sample/Makefile b/sample/Makefile index 7c910bb..7e8ab7b 100644 --- a/sample/Makefile +++ b/sample/Makefile @@ -1,6 +1,9 @@ XBYAK_INC=../xbyak/xbyak.h +CXX?=g++ -BOOST_EXIST=$(shell echo "\#include <boost/spirit/core.hpp>" | (gcc -E - 2>/dev/null) | grep "boost/spirit/core.hpp" >/dev/null && echo "1") +#BOOST_EXIST=$(shell echo "#include <boost/spirit/core.hpp>" | $CXX -x c++ -c - 2>/dev/null && echo 1) +# I don't know why the above code causes an error on GitHub action. +BOOST_EXIST?=0 UNAME_M=$(shell uname -m) ONLY_64BIT=0 @@ -104,7 +107,7 @@ profiler-vtune: profiler.cpp ../xbyak/xbyak_util.h $(CXX) $(CFLAGS) profiler.cpp -o $@ -DXBYAK_USE_VTUNE -I /opt/intel/vtune_amplifier/include/ -L /opt/intel/vtune_amplifier/lib64 -ljitprofiling -ldl clean: - rm -rf *.o $(TARGET) *.exe profiler profiler-vtune + rm -rf $(TARGET) profiler profiler-vtune test : test0.cpp $(XBYAK_INC) test64: test0.cpp $(XBYAK_INC) diff --git a/sample/quantize.cpp b/sample/quantize.cpp index 6bdf0d0..ba0fd22 100644 --- a/sample/quantize.cpp +++ b/sample/quantize.cpp @@ -199,7 +199,7 @@ int main(int argc, char *argv[]) quantize2(dest2, src, qTbl); for (int i = 0; i < N; i++) { if (dest[i] != dest2[i]) { - printf("err[%d] %d %d\n", i, dest[i], dest2[i]); + printf("err[%d] %u %u\n", i, dest[i], dest2[i]); } } diff --git a/sample/test_util.cpp b/sample/test_util.cpp index a20d2df..ef6e3fa 100644 --- a/sample/test_util.cpp +++ b/sample/test_util.cpp @@ -93,6 +93,7 @@ void putCPUinfo(bool onlyCpuidFeature) { Cpu::tAVX_VNNI_INT8, "avx_vnni_int8" }, { Cpu::tAVX_NE_CONVERT, "avx_ne_convert" }, { Cpu::tAVX_IFMA, "avx_ifma" }, + { Cpu::tRAO_INT, "rao-int" }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { if (cpu.has(tbl[i].type)) printf(" %s", tbl[i].str); diff --git a/sample/toyvm.cpp b/sample/toyvm.cpp index 1e558ff..dff0cb7 100644 --- a/sample/toyvm.cpp +++ b/sample/toyvm.cpp @@ -5,8 +5,8 @@ mem_ 4byte x 65536 - ���٤Ƥ�̿���4byte���� - ¨�ͤ�����16bit + all instructions are fixed at 4 bytes. + all immediate values are 16-bit. R = A or B vldiR, imm ; R = imm @@ -109,7 +109,7 @@ public: reg[r] -= imm; break; case PUT: - printf("%c %8d(0x%08x)\n", 'A' + r, reg[r], reg[r]); + printf("%c %8u(0x%08x)\n", 'A' + r, reg[r], reg[r]); break; case JNZ: if (reg[r] != 0) pc += static_cast<signed short>(imm); @@ -294,7 +294,7 @@ lp: p = t; n--; if (n != 0) goto lp; - printf("c=%d(0x%08x)\n", c, c); + printf("c=%u(0x%08x)\n", c, c); } int main() diff --git a/test/Makefile b/test/Makefile index 2b9bd1a..feef445 100644 --- a/test/Makefile +++ b/test/Makefile @@ -23,9 +23,9 @@ endif all: $(TARGET) -CFLAGS_WARN=-Wall -Wextra -Wformat=2 -Wcast-qual -Wcast-align -Wwrite-strings -Wfloat-equal -Wpointer-arith +CFLAGS_WARN=-Wall -Wextra -Wformat=2 -Wcast-qual -Wwrite-strings -Wfloat-equal -Wpointer-arith -CFLAGS=-O2 -fomit-frame-pointer -Wall -fno-operator-names -I../ -I./ $(CFLAGS_WARN) $(CXXFLAGS) $(CPPFLAGS) $(LDFLAGS) #-std=c++0x +CFLAGS=-O2 -Wall -I../ -I./ $(CFLAGS_WARN) $(CXXFLAGS) $(CPPFLAGS) $(LDFLAGS) #-std=c++0x make_nm: $(CXX) $(CFLAGS) make_nm.cpp -o $@ normalize_prefix: normalize_prefix.cpp ../xbyak/xbyak.h @@ -56,12 +56,11 @@ noexception: noexception.cpp ../xbyak/xbyak.h test_nm: normalize_prefix $(TARGET) $(MAKE) -C ../gen ifneq ($(ONLY_64BIT),1) - ./test_nm.sh - ./test_nm.sh noexcept - ./noexception - ./test_nm.sh Y - ./test_nm.sh avx512 - ./test_address.sh + env CXX=$(CXX) sh -e ./test_nm.sh + env CXX=$(CXX) sh -e ./test_nm.sh noexcept + env CXX=$(CXX) sh -e ./test_nm.sh Y + env CXX=$(CXX) sh -e ./test_nm.sh avx512 + env CXX=$(CXX) sh -e ./test_address.sh ./jmp ./cvt_test32 endif @@ -70,32 +69,32 @@ endif ./misc32 ./cvt_test ifeq ($(BIT),64) - ./test_address.sh 64 + env CXX=$(CXX) sh -e ./test_address.sh 64 ifneq ($(X32),1) - ./test_nm.sh 64 - ./test_nm.sh Y64 + env CXX=$(CXX) sh -e ./test_nm.sh 64 + env CXX=$(CXX) sh -e ./test_nm.sh Y64 endif ./jmp64 endif test_avx: normalize_prefix ifneq ($(ONLY_64BIT),0) - ./test_avx.sh - ./test_avx.sh Y + env CXX=$(CXX) sh -e ./test_avx.sh + env CXX=$(CXX) sh -e ./test_avx.sh Y endif ifeq ($(BIT),64) - ./test_avx.sh 64 + env CXX=$(CXX) sh -e ./test_avx.sh 64 ifneq ($(X32),1) - ./test_avx.sh Y64 + env CXX=$(CXX) sh -e ./test_avx.sh Y64 endif endif test_avx512: normalize_prefix ifneq ($(ONLY_64BIT),0) - ./test_avx512.sh + env CXX=$(CXX) sh -e ./test_avx512.sh endif ifeq ($(BIT),64) - ./test_avx512.sh 64 + env CXX=$(CXX) sh -e ./test_avx512.sh 64 endif detect_x32: detect_x32.c diff --git a/test/make_nm.cpp b/test/make_nm.cpp index 5106bf9..e5939eb 100644 --- a/test/make_nm.cpp +++ b/test/make_nm.cpp @@ -1018,9 +1018,7 @@ class Test { } void putCmov() const { - const struct { - const char *s; - } tbl[] = { + const char tbl[][4] = { "o", "no", "b", @@ -1054,11 +1052,11 @@ class Test { }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { char buf[32]; - snprintf(buf, sizeof(buf), "cmov%s", tbl[i].s); + snprintf(buf, sizeof(buf), "cmov%s", tbl[i]); put(buf, REG16, REG16|MEM); put(buf, REG32, REG32|MEM); put(buf, REG64, REG64|MEM); - snprintf(buf, sizeof(buf), "set%s", tbl[i].s); + snprintf(buf, sizeof(buf), "set%s", tbl[i]); put(buf, REG8|REG8_3|MEM); } } diff --git a/test/misc.cpp b/test/misc.cpp index 6225690..7653673 100644 --- a/test/misc.cpp +++ b/test/misc.cpp @@ -1993,3 +1993,66 @@ CYBOZU_TEST_AUTO(minmax) CYBOZU_TEST_EQUAL((std::min)(3, 4), local::min_(3, 4)); CYBOZU_TEST_EQUAL((std::max)(3, 4), local::max_(3, 4)); } + +CYBOZU_TEST_AUTO(rao_int) +{ + struct Code : Xbyak::CodeGenerator { + Code() + { +#ifdef XBYAK64 + aadd(ptr[rax], ecx); + aadd(ptr[eax], ecx); + aadd(ptr[rax], r10); + aand(ptr[rax], ecx); + aand(ptr[eax], ecx); + aand(ptr[rax], r10); + aor(ptr[rax], ecx); + aor(ptr[eax], ecx); + aor(ptr[rax], r10); + axor(ptr[rax], ecx); + axor(ptr[eax], ecx); + axor(ptr[rax], r10); +#else + aadd(ptr[eax], ecx); + aand(ptr[eax], ecx); + aor(ptr[eax], ecx); + axor(ptr[eax], ecx); +#endif + } + } c; + const uint8_t tbl[] = { +#ifdef XBYAK64 + // aadd + 0x0f, 0x38, 0xfc, 0x08, + 0x67, 0x0f, 0x38, 0xfc, 0x08, + 0x4c, 0x0f, 0x38, 0xfc, 0x10, + + // aand + 0x66, 0x0f, 0x38, 0xfc, 0x08, + 0x66, 0x67, 0x0f, 0x38, 0xfc, 0x08, + 0x66, 0x4c, 0x0f, 0x38, 0xfc, 0x10, + + // aor + 0xf2, 0x0f, 0x38, 0xfc, 0x08, + 0xf2, 0x67, 0x0f, 0x38, 0xfc, 0x08, + 0xf2, 0x4c, 0x0f, 0x38, 0xfc, 0x10, + + // axor + 0xf3, 0x0f, 0x38, 0xfc, 0x08, + 0xf3, 0x67, 0x0f, 0x38, 0xfc, 0x08, + 0xf3, 0x4c, 0x0f, 0x38, 0xfc, 0x10, +#else + // aadd + 0x0f, 0x38, 0xfc, 0x08, + // aand + 0x66, 0x0f, 0x38, 0xfc, 0x08, + // aor + 0xf2, 0x0f, 0x38, 0xfc, 0x08, + // axor + 0xf3, 0x0f, 0x38, 0xfc, 0x08, +#endif + }; + const size_t n = sizeof(tbl) / sizeof(tbl[0]); + CYBOZU_TEST_EQUAL(c.getSize(), n); + CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n); +} diff --git a/test/noexception.cpp b/test/noexception.cpp index 04a6dbc..9ef0ee8 100644 --- a/test/noexception.cpp +++ b/test/noexception.cpp @@ -56,7 +56,7 @@ void test2() void test3() { static struct EmptyAllocator : Xbyak::Allocator { - uint8_t *alloc() { return 0; } + uint8_t *alloc(size_t) { return 0; } } emptyAllocator; struct Code : CodeGenerator { Code() : CodeGenerator(8, 0, &emptyAllocator) diff --git a/test/test_address.sh b/test/test_address.sh index d283a5f..7960700 100755 --- a/test/test_address.sh +++ b/test/test_address.sh @@ -5,9 +5,11 @@ FILTER="grep -v warning" sub() { -CFLAGS="-Wall -fno-operator-names -I../ $OPT2" +CFLAGS="-Wall -I../ $OPT2" +CXX=${CXX:=g++} + echo "compile address.cpp" -g++ $CFLAGS address.cpp -o address +$CXX $CFLAGS address.cpp -o address ./address $1 > a.asm echo "asm" @@ -17,7 +19,7 @@ awk '{printf "%s", sub(/-$/, "", $3) ? $3 : $3 ORS}' a.lst | $FILTER > ok.lst echo "xbyak" ./address $1 jit > nm.cpp echo "compile nm_frame.cpp" -g++ $CFLAGS -DXBYAK_TEST nm_frame.cpp -o nm_frame +$CXX $CFLAGS -DXBYAK_TEST nm_frame.cpp -o nm_frame ./nm_frame > x.lst diff ok.lst x.lst && echo "ok" diff --git a/test/test_avx.sh b/test/test_avx.sh index 34dc1e5..35087cd 100755 --- a/test/test_avx.sh +++ b/test/test_avx.sh @@ -1,6 +1,7 @@ #!/bin/sh FILTER="grep -v warning" +CXX=${CXX:=g++} case $1 in Y) @@ -31,9 +32,9 @@ Y64) ;; esac -CFLAGS="-Wall -fno-operator-names -I../ $OPT2 -DUSE_AVX" +CFLAGS="-Wall -I../ $OPT2 -DUSE_AVX" echo "compile make_nm.cpp" -g++ $CFLAGS make_nm.cpp -o make_nm +$CXX $CFLAGS make_nm.cpp -o make_nm ./make_nm > a.asm echo "asm" @@ -43,6 +44,6 @@ awk '$3 != "1+1" {printf "%s", sub(/-$/, "", $3) ? $3 : $3 ORS}' a.lst | $FILTER echo "xbyak" ./make_nm jit > nm.cpp echo "compile nm_frame.cpp" -g++ $CFLAGS -DXBYAK_TEST nm_frame.cpp -o nm_frame +$CXX $CFLAGS -DXBYAK_TEST nm_frame.cpp -o nm_frame ./nm_frame | $FILTER > x.lst diff -B ok.lst x.lst && echo "ok" diff --git a/test/test_avx512.sh b/test/test_avx512.sh index 17edfee..90d14df 100755 --- a/test/test_avx512.sh +++ b/test/test_avx512.sh @@ -1,6 +1,7 @@ #!/bin/sh FILTER="grep -v warning" +CXX=${CXX:=g++} case $1 in 64) @@ -18,9 +19,9 @@ case $1 in ;; esac -CFLAGS="-Wall -fno-operator-names -I../ $OPT2 -DUSE_AVX512" +CFLAGS="-Wall -I../ $OPT2 -DUSE_AVX512" echo "compile make_512.cpp" -g++ $CFLAGS make_512.cpp -o make_512 +$CXX $CFLAGS make_512.cpp -o make_512 ./make_512 > a.asm echo "asm" @@ -30,6 +31,6 @@ awk '{printf "%s", sub(/-$/, "", $3) ? $3 : $3 ORS}' a.lst | $FILTER > ok.lst echo "xbyak" ./make_512 jit > nm.cpp echo "compile nm_frame.cpp" -g++ $CFLAGS -DXBYAK_TEST nm_frame.cpp -o nm_frame -DXBYAK_AVX512 +$CXX $CFLAGS -DXBYAK_TEST nm_frame.cpp -o nm_frame -DXBYAK_AVX512 ./nm_frame | $FILTER > x.lst diff -B ok.lst x.lst && echo "ok" diff --git a/test/test_nm.sh b/test/test_nm.sh index afa2b1e..019f278 100755 --- a/test/test_nm.sh +++ b/test/test_nm.sh @@ -1,6 +1,7 @@ #!/bin/sh FILTER=cat +CXX=${CXX:=g++} case $1 in Y) @@ -44,9 +45,9 @@ noexcept) ;; esac -CFLAGS="-Wall -fno-operator-names -I../ $OPT2" +CFLAGS="-Wall -I../ $OPT2" echo "compile make_nm.cpp with $CFLAGS" -g++ $CFLAGS make_nm.cpp -o make_nm +$CXX $CFLAGS make_nm.cpp -o make_nm ./make_nm > a.asm echo "asm" @@ -56,6 +57,6 @@ awk '$3 != "1+1" {printf "%s", sub(/-$/, "", $3) ? $3 : $3 ORS}' a.lst | $FILTER echo "xbyak" ./make_nm jit > nm.cpp echo "compile nm_frame.cpp" -g++ $CFLAGS -DXBYAK_TEST nm_frame.cpp -o nm_frame +$CXX $CFLAGS -DXBYAK_TEST nm_frame.cpp -o nm_frame ./nm_frame | $FILTER > x.lst diff -B ok.lst x.lst && echo "ok" diff --git a/xbyak/xbyak.h b/xbyak/xbyak.h index 2a9c031..86372a0 100644 --- a/xbyak/xbyak.h +++ b/xbyak/xbyak.h @@ -155,7 +155,7 @@ namespace Xbyak { enum { DEFAULT_MAX_CODE_SIZE = 4096, - VERSION = 0x6650 /* 0xABCD = A.BC(.D) */ + VERSION = 0x6660 /* 0xABCD = A.BC(.D) */ }; #ifndef MIE_INTEGER_TYPE_DEFINED diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h index 33bd5c0..13e52e1 100644 --- a/xbyak/xbyak_mnemonic.h +++ b/xbyak/xbyak_mnemonic.h @@ -1,4 +1,6 @@ -const char *getVersionString() const { return "6.65"; } +const char *getVersionString() const { return "6.66"; } +void aadd(const Address& addr, const Reg32e ®) { opModM(addr, reg, 0x0F, 0x38, 0x0FC); } +void aand(const Address& addr, const Reg32e ®) { db(0x66); opModM(addr, reg, 0x0F, 0x38, 0x0FC); } void adc(const Operand& op, uint32_t imm) { opRM_I(op, imm, 0x10, 2); } void adc(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x10); } void adcx(const Reg32e& reg, const Operand& op) { opGen(reg, op, 0xF6, 0x66, isREG32_REG32orMEM, NONE, 0x38); } @@ -24,6 +26,8 @@ void andnpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x55, 0x66, isXM void andnps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x55, 0x100, isXMM_XMMorMEM); } void andpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x54, 0x66, isXMM_XMMorMEM); } void andps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x54, 0x100, isXMM_XMMorMEM); } +void aor(const Address& addr, const Reg32e ®) { db(0xF2); opModM(addr, reg, 0x0F, 0x38, 0x0FC); } +void axor(const Address& addr, const Reg32e ®) { db(0xF3); opModM(addr, reg, 0x0F, 0x38, 0x0FC); } void bextr(const Reg32e& r1, const Operand& op, const Reg32e& r2) { opGpr(r1, op, r2, T_0F38, 0xf7, false); } void blendpd(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x0D, 0x66, isXMM_XMMorMEM, static_cast<uint8_t>(imm), 0x3A); } void blendps(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x0C, 0x66, isXMM_XMMorMEM, static_cast<uint8_t>(imm), 0x3A); } diff --git a/xbyak/xbyak_util.h b/xbyak/xbyak_util.h index 8431925..e1e4476 100644 --- a/xbyak/xbyak_util.h +++ b/xbyak/xbyak_util.h @@ -414,6 +414,7 @@ public: XBYAK_DEFINE_TYPE(69, tAVX_VNNI_INT8); XBYAK_DEFINE_TYPE(70, tAVX_NE_CONVERT); XBYAK_DEFINE_TYPE(71, tAVX_IFMA); + XBYAK_DEFINE_TYPE(72, tRAO_INT); #undef XBYAK_SPLIT_ID #undef XBYAK_DEFINE_TYPE @@ -553,6 +554,7 @@ public: if (EDX & (1U << 22)) type_ |= tAMX_BF16; if (maxNumSubLeaves >= 1) { getCpuidEx(7, 1, data); + if (EAX & (1U << 3)) type_ |= tRAO_INT; if (EAX & (1U << 4)) type_ |= tAVX_VNNI; if (type_ & tAVX512F) { if (EAX & (1U << 5)) type_ |= tAVX512_BF16; |