aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorMITSUNARI Shigeo <[email protected]>2024-10-11 11:22:35 +0900
committerMITSUNARI Shigeo <[email protected]>2024-10-13 13:51:06 +0900
commit864fd0c49ce07fc534b16250758987c445bb9c70 (patch)
tree4615ae3631cf2e9ca7325c39b45a1bc121b926fb
parent183e17f94d4ae2f72d6bc84063c1c01cf2512eb5 (diff)
downloadxbyak-864fd0c49ce07fc534b16250758987c445bb9c70.tar.gz
xbyak-864fd0c49ce07fc534b16250758987c445bb9c70.zip
add vaddnepbf16
-rw-r--r--.gitignore1
-rw-r--r--gen/gen_avx512.cpp22
-rw-r--r--test/Makefile4
-rw-r--r--test/avx10/bf16.txt4
-rw-r--r--test/avx10/new-ymm.txt (renamed from test/target/avx10.txt)0
-rw-r--r--test/avx10/old.txt (renamed from test/target/misc.txt)0
-rw-r--r--test/test_by_xed.py5
-rw-r--r--xbyak/xbyak_mnemonic.h1
8 files changed, 34 insertions, 3 deletions
diff --git a/.gitignore b/.gitignore
index 24b0b1d..507091e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,2 @@
/build* # cmake
+*CVS
diff --git a/gen/gen_avx512.cpp b/gen/gen_avx512.cpp
index 79ec79a..23923b0 100644
--- a/gen/gen_avx512.cpp
+++ b/gen/gen_avx512.cpp
@@ -944,6 +944,22 @@ void putFP16_2()
}
}
+void putAVX10_BF16()
+{
+ const struct Tbl {
+ const char *name;
+ uint64_t type;
+ uint8_t code;
+ } tbl[] = {
+ { "vaddnepbf16", T_66 | T_MAP5 | T_EW0 | T_YMM | T_B16, 0x58 },
+ };
+ for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
+ const Tbl& p = tbl[i];
+ std::string s = type2String(p.type | T_MUST_EVEX);
+ printf("void %s(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, %s, 0x%2X); }\n" , p.name, s.c_str(), p.code);
+ }
+}
+
void putFP16()
{
putFP16_1();
@@ -952,6 +968,11 @@ void putFP16()
putFP16_2();
}
+void putAVX10()
+{
+ putAVX10_BF16();
+}
+
int main(int argc, char *[])
{
bool only64bit = argc == 2;
@@ -977,4 +998,5 @@ int main(int argc, char *[])
putScatter();
putV4FMA();
putFP16();
+ putAVX10();
}
diff --git a/test/Makefile b/test/Makefile
index ca2f0bb..4d0b85d 100644
--- a/test/Makefile
+++ b/test/Makefile
@@ -60,9 +60,9 @@ apx: apx.cpp $(XBYAK_INC)
avx10_test: avx10_test.cpp $(XBYAK_INC)
$(CXX) $(CFLAGS) avx10_test.cpp -o $@ -DXBYAK64
-TEST_FILES=avx10.txt misc.txt
+TEST_FILES=old.txt new-ymm.txt bf16.txt
xed_test:
- @for target in $(addprefix target/, $(TEST_FILES)); do ./test_by_xed.sh $$target; done
+ @for target in $(addprefix avx10/, $(TEST_FILES)); do ./test_by_xed.sh $$target; done
test_nm: normalize_prefix $(TARGET)
$(MAKE) -C ../gen
diff --git a/test/avx10/bf16.txt b/test/avx10/bf16.txt
new file mode 100644
index 0000000..1c77f93
--- /dev/null
+++ b/test/avx10/bf16.txt
@@ -0,0 +1,4 @@
+vaddnepbf16(xm1, xm2, xm3);
+vaddnepbf16(ym1|k1, ym2, ptr[rax+128]);
+vaddnepbf16(ym1|k1, ym2, ptr_b[rax+128]);
+vaddnepbf16(zm1|k2|T_z, zm2, ptr_b[rax+128]);
diff --git a/test/target/avx10.txt b/test/avx10/new-ymm.txt
index 8ee52ca..8ee52ca 100644
--- a/test/target/avx10.txt
+++ b/test/avx10/new-ymm.txt
diff --git a/test/target/misc.txt b/test/avx10/old.txt
index 9e4f097..9e4f097 100644
--- a/test/target/misc.txt
+++ b/test/avx10/old.txt
diff --git a/test/test_by_xed.py b/test/test_by_xed.py
index 3e4b98f..cd6b7bb 100644
--- a/test/test_by_xed.py
+++ b/test/test_by_xed.py
@@ -273,7 +273,7 @@ def loadFile(name):
r = []
for line in f.read().split('\n'):
if line:
- if line[0] == '#':
+ if line[0] == '#' or line.startswith('//'):
continue
r.append(line)
return r
@@ -287,6 +287,9 @@ def removeExtraInfo(s):
def run(cppText, xedText):
cpp = loadFile(cppText)
xed = loadFile(xedText)
+ if len(cpp) != len(xed):
+ raise Exception(f'different line {len(cpp)} {len(xed)}')
+
for i in range(len(cpp)):
line1 = cpp[i]
line2 = removeExtraInfo(xed[i])
diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h
index 8316bd9..f98e001 100644
--- a/xbyak/xbyak_mnemonic.h
+++ b/xbyak/xbyak_mnemonic.h
@@ -2047,6 +2047,7 @@ void v4fmaddps(const Zmm& z1, const Zmm& z2, const Address& addr) { opAVX_X_X_XM
void v4fmaddss(const Xmm& x1, const Xmm& x2, const Address& addr) { opAVX_X_X_XM(x1, x2, addr, T_0F38 | T_F2 | T_EW0 | T_MUST_EVEX | T_N16, 0x9B); }
void v4fnmaddps(const Zmm& z1, const Zmm& z2, const Address& addr) { opAVX_X_X_XM(z1, z2, addr, T_0F38 | T_F2 | T_EW0 | T_YMM | T_MUST_EVEX | T_N16, 0xAA); }
void v4fnmaddss(const Xmm& x1, const Xmm& x2, const Address& addr) { opAVX_X_X_XM(x1, x2, addr, T_0F38 | T_F2 | T_EW0 | T_MUST_EVEX | T_N16, 0xAB); }
+void vaddnepbf16(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP5|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x58); }
void vaddph(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_EW0 | T_YMM | T_MUST_EVEX | T_ER_Z | T_B16, 0x58); }
void vaddsh(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_F3 | T_EW0 | T_MUST_EVEX | T_ER_X | T_N2, 0x58); }
void valignd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F3A|T_EW0|T_YMM|T_MUST_EVEX, 0x03, imm); }