diff options
author | MITSUNARI Shigeo <[email protected]> | 2024-10-11 11:22:35 +0900 |
---|---|---|
committer | MITSUNARI Shigeo <[email protected]> | 2024-10-13 13:51:06 +0900 |
commit | 864fd0c49ce07fc534b16250758987c445bb9c70 (patch) | |
tree | 4615ae3631cf2e9ca7325c39b45a1bc121b926fb | |
parent | 183e17f94d4ae2f72d6bc84063c1c01cf2512eb5 (diff) | |
download | xbyak-864fd0c49ce07fc534b16250758987c445bb9c70.tar.gz xbyak-864fd0c49ce07fc534b16250758987c445bb9c70.zip |
add vaddnepbf16
-rw-r--r-- | .gitignore | 1 | ||||
-rw-r--r-- | gen/gen_avx512.cpp | 22 | ||||
-rw-r--r-- | test/Makefile | 4 | ||||
-rw-r--r-- | test/avx10/bf16.txt | 4 | ||||
-rw-r--r-- | test/avx10/new-ymm.txt (renamed from test/target/avx10.txt) | 0 | ||||
-rw-r--r-- | test/avx10/old.txt (renamed from test/target/misc.txt) | 0 | ||||
-rw-r--r-- | test/test_by_xed.py | 5 | ||||
-rw-r--r-- | xbyak/xbyak_mnemonic.h | 1 |
8 files changed, 34 insertions, 3 deletions
@@ -1 +1,2 @@ /build* # cmake +*CVS diff --git a/gen/gen_avx512.cpp b/gen/gen_avx512.cpp index 79ec79a..23923b0 100644 --- a/gen/gen_avx512.cpp +++ b/gen/gen_avx512.cpp @@ -944,6 +944,22 @@ void putFP16_2() } } +void putAVX10_BF16() +{ + const struct Tbl { + const char *name; + uint64_t type; + uint8_t code; + } tbl[] = { + { "vaddnepbf16", T_66 | T_MAP5 | T_EW0 | T_YMM | T_B16, 0x58 }, + }; + for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { + const Tbl& p = tbl[i]; + std::string s = type2String(p.type | T_MUST_EVEX); + printf("void %s(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, %s, 0x%2X); }\n" , p.name, s.c_str(), p.code); + } +} + void putFP16() { putFP16_1(); @@ -952,6 +968,11 @@ void putFP16() putFP16_2(); } +void putAVX10() +{ + putAVX10_BF16(); +} + int main(int argc, char *[]) { bool only64bit = argc == 2; @@ -977,4 +998,5 @@ int main(int argc, char *[]) putScatter(); putV4FMA(); putFP16(); + putAVX10(); } diff --git a/test/Makefile b/test/Makefile index ca2f0bb..4d0b85d 100644 --- a/test/Makefile +++ b/test/Makefile @@ -60,9 +60,9 @@ apx: apx.cpp $(XBYAK_INC) avx10_test: avx10_test.cpp $(XBYAK_INC) $(CXX) $(CFLAGS) avx10_test.cpp -o $@ -DXBYAK64 -TEST_FILES=avx10.txt misc.txt +TEST_FILES=old.txt new-ymm.txt bf16.txt xed_test: - @for target in $(addprefix target/, $(TEST_FILES)); do ./test_by_xed.sh $$target; done + @for target in $(addprefix avx10/, $(TEST_FILES)); do ./test_by_xed.sh $$target; done test_nm: normalize_prefix $(TARGET) $(MAKE) -C ../gen diff --git a/test/avx10/bf16.txt b/test/avx10/bf16.txt new file mode 100644 index 0000000..1c77f93 --- /dev/null +++ b/test/avx10/bf16.txt @@ -0,0 +1,4 @@ +vaddnepbf16(xm1, xm2, xm3); +vaddnepbf16(ym1|k1, ym2, ptr[rax+128]); +vaddnepbf16(ym1|k1, ym2, ptr_b[rax+128]); +vaddnepbf16(zm1|k2|T_z, zm2, ptr_b[rax+128]); diff --git a/test/target/avx10.txt b/test/avx10/new-ymm.txt index 8ee52ca..8ee52ca 100644 --- a/test/target/avx10.txt +++ b/test/avx10/new-ymm.txt diff --git a/test/target/misc.txt b/test/avx10/old.txt index 9e4f097..9e4f097 100644 --- a/test/target/misc.txt +++ b/test/avx10/old.txt diff --git a/test/test_by_xed.py b/test/test_by_xed.py index 3e4b98f..cd6b7bb 100644 --- a/test/test_by_xed.py +++ b/test/test_by_xed.py @@ -273,7 +273,7 @@ def loadFile(name): r = [] for line in f.read().split('\n'): if line: - if line[0] == '#': + if line[0] == '#' or line.startswith('//'): continue r.append(line) return r @@ -287,6 +287,9 @@ def removeExtraInfo(s): def run(cppText, xedText): cpp = loadFile(cppText) xed = loadFile(xedText) + if len(cpp) != len(xed): + raise Exception(f'different line {len(cpp)} {len(xed)}') + for i in range(len(cpp)): line1 = cpp[i] line2 = removeExtraInfo(xed[i]) diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h index 8316bd9..f98e001 100644 --- a/xbyak/xbyak_mnemonic.h +++ b/xbyak/xbyak_mnemonic.h @@ -2047,6 +2047,7 @@ void v4fmaddps(const Zmm& z1, const Zmm& z2, const Address& addr) { opAVX_X_X_XM void v4fmaddss(const Xmm& x1, const Xmm& x2, const Address& addr) { opAVX_X_X_XM(x1, x2, addr, T_0F38 | T_F2 | T_EW0 | T_MUST_EVEX | T_N16, 0x9B); } void v4fnmaddps(const Zmm& z1, const Zmm& z2, const Address& addr) { opAVX_X_X_XM(z1, z2, addr, T_0F38 | T_F2 | T_EW0 | T_YMM | T_MUST_EVEX | T_N16, 0xAA); } void v4fnmaddss(const Xmm& x1, const Xmm& x2, const Address& addr) { opAVX_X_X_XM(x1, x2, addr, T_0F38 | T_F2 | T_EW0 | T_MUST_EVEX | T_N16, 0xAB); } +void vaddnepbf16(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP5|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x58); } void vaddph(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_EW0 | T_YMM | T_MUST_EVEX | T_ER_Z | T_B16, 0x58); } void vaddsh(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_F3 | T_EW0 | T_MUST_EVEX | T_ER_X | T_N2, 0x58); } void valignd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F3A|T_EW0|T_YMM|T_MUST_EVEX, 0x03, imm); } |