diff options
author | MITSUNARI Shigeo <[email protected]> | 2024-10-14 05:15:24 +0900 |
---|---|---|
committer | MITSUNARI Shigeo <[email protected]> | 2024-10-14 05:15:24 +0900 |
commit | 5f942b59145c66b514085216320ac0de96f841cf (patch) | |
tree | 1784e2b56e19ba9395c9c435cfd4be6f51ac908d /test | |
parent | 86e532fe1a1027f8b13e4833d1558697c6512f76 (diff) | |
download | xbyak-5f942b59145c66b514085216320ac0de96f841cf.tar.gz xbyak-5f942b59145c66b514085216320ac0de96f841cf.zip |
under developing saturation
Diffstat (limited to 'test')
-rw-r--r-- | test/Makefile | 2 | ||||
-rw-r--r-- | test/avx10/saturation.txt | 202 |
2 files changed, 203 insertions, 1 deletions
diff --git a/test/Makefile b/test/Makefile index 336dcaf..d5613e4 100644 --- a/test/Makefile +++ b/test/Makefile @@ -60,7 +60,7 @@ apx: apx.cpp $(XBYAK_INC) avx10_test: avx10_test.cpp $(XBYAK_INC) $(CXX) $(CFLAGS) avx10_test.cpp -o $@ -DXBYAK64 -TEST_FILES=old.txt new-ymm.txt bf16.txt comp.txt convert.txt +TEST_FILES=old.txt new-ymm.txt bf16.txt comp.txt convert.txt minmax.txt saturation.txt xed_test: @for target in $(addprefix avx10/, $(TEST_FILES)); do ./test_by_xed.sh $$target; done diff --git a/test/avx10/saturation.txt b/test/avx10/saturation.txt new file mode 100644 index 0000000..591960e --- /dev/null +++ b/test/avx10/saturation.txt @@ -0,0 +1,202 @@ +// +vcvtnebf162ibs(xm1, xm2); +vcvtnebf162ibs(xm1, ptr[rax+128]); +vcvtnebf162ibs(xm1, ptr_b[rax+128]); + +vcvtnebf162ibs(ym1, ym2); +vcvtnebf162ibs(ym1, ptr[rax+128]); +vcvtnebf162ibs(ym1, ptr_b[rax+128]); + +vcvtnebf162ibs(zm1, zm2); +vcvtnebf162ibs(zm1, ptr[rax+128]); +vcvtnebf162ibs(zm1, ptr_b[rax+128]); +// +vcvtnebf162iubs(xm1, xm2); +vcvtnebf162iubs(xm1, ptr[rax+128]); +vcvtnebf162iubs(xm1, ptr_b[rax+128]); + +vcvtnebf162iubs(ym1, ym2); +vcvtnebf162iubs(ym1, ptr[rax+128]); +vcvtnebf162iubs(ym1, ptr_b[rax+128]); + +vcvtnebf162iubs(zm1, zm2); +vcvtnebf162iubs(zm1, ptr[rax+128]); +vcvtnebf162iubs(zm1, ptr_b[rax+128]); +// +vcvttnebf162ibs(xm1, xm2); +vcvttnebf162ibs(xm1, ptr[rax+128]); +vcvttnebf162ibs(xm1, ptr_b[rax+128]); + +vcvttnebf162ibs(ym1, ym2); +vcvttnebf162ibs(ym1, ptr[rax+128]); +vcvttnebf162ibs(ym1, ptr_b[rax+128]); + +vcvttnebf162ibs(zm1, zm2); +vcvttnebf162ibs(zm1, ptr[rax+128]); +vcvttnebf162ibs(zm1, ptr_b[rax+128]); +// +vcvttnebf162iubs(xm1, xm2); +vcvttnebf162iubs(xm1, ptr[rax+128]); +vcvttnebf162iubs(xm1, ptr_b[rax+128]); + +vcvttnebf162iubs(ym1, ym2); +vcvttnebf162iubs(ym1, ptr[rax+128]); +vcvttnebf162iubs(ym1, ptr_b[rax+128]); + +vcvttnebf162iubs(zm1, zm2); +vcvttnebf162iubs(zm1, ptr[rax+128]); +vcvttnebf162iubs(zm1, ptr_b[rax+128]); +// +vcvttpd2qqs(xm1, xm2); +vcvttpd2qqs(xm1, ptr[rax+128]); +vcvttpd2qqs(xm1, ptr_b[rax+128]); + +vcvttpd2qqs(ym1, ym2); +vcvttpd2qqs(ym1, ym2|T_sae); +vcvttpd2qqs(ym1, ptr[rax+128]); +vcvttpd2qqs(ym1, ptr_b[rax+128]); + +vcvttpd2qqs(zm1, zm2); +vcvttpd2qqs(zm1, zm2|T_sae); +vcvttpd2qqs(zm1, ptr[rax+128]); +vcvttpd2qqs(zm1, ptr_b[rax+128]); +// +vcvttpd2uqqs(xm1, xm2); +vcvttpd2uqqs(xm1, ptr[rax+128]); +vcvttpd2uqqs(xm1, ptr_b[rax+128]); + +vcvttpd2uqqs(ym1, ym2); +vcvttpd2uqqs(ym1, ym2|T_sae); +vcvttpd2uqqs(ym1, ptr[rax+128]); +vcvttpd2uqqs(ym1, ptr_b[rax+128]); + +vcvttpd2uqqs(zm1, zm2); +vcvttpd2uqqs(zm1, zm2|T_sae); +vcvttpd2uqqs(zm1, ptr[rax+128]); +vcvttpd2uqqs(zm1, ptr_b[rax+128]); +// +vcvtph2ibs(xm1, xm2); +vcvtph2ibs(xm1, ptr[rax+128]); +vcvtph2ibs(xm1, ptr_b[rax+128]); + +vcvtph2ibs(ym1, ym2); +vcvtph2ibs(ym1, ym2|T_rd_sae); +vcvtph2ibs(ym1, ptr[rax+128]); +vcvtph2ibs(ym1, ptr_b[rax+128]); + +vcvtph2ibs(zm1, zm2); +vcvtph2ibs(zm1, zm2|T_ru_sae); +vcvtph2ibs(zm1, ptr[rax+128]); +vcvtph2ibs(zm1, ptr_b[rax+128]); +// +vcvtph2iubs(xm1, xm2); +vcvtph2iubs(xm1, ptr[rax+128]); +vcvtph2iubs(xm1, ptr_b[rax+128]); + +vcvtph2iubs(ym1, ym2); +vcvtph2iubs(ym1, ym2|T_rd_sae); +vcvtph2iubs(ym1, ptr[rax+128]); +vcvtph2iubs(ym1, ptr_b[rax+128]); + +vcvtph2iubs(zm1, zm2); +vcvtph2iubs(zm1, zm2|T_ru_sae); +vcvtph2iubs(zm1, ptr[rax+128]); +vcvtph2iubs(zm1, ptr_b[rax+128]); +// +vcvttph2ibs(xm1, xm2); +vcvttph2ibs(xm1, ptr[rax+128]); +vcvttph2ibs(xm1, ptr_b[rax+128]); + +vcvttph2ibs(ym1, ym2); +vcvttph2ibs(ym1, ym2|T_rd_sae); +vcvttph2ibs(ym1, ptr[rax+128]); +vcvttph2ibs(ym1, ptr_b[rax+128]); + +vcvttph2ibs(zm1, zm2); +vcvttph2ibs(zm1, zm2|T_ru_sae); +vcvttph2ibs(zm1, ptr[rax+128]); +vcvttph2ibs(zm1, ptr_b[rax+128]); +// +vcvttph2iubs(xm1, xm2); +vcvttph2iubs(xm1, ptr[rax+128]); +vcvttph2iubs(xm1, ptr_b[rax+128]); + +vcvttph2iubs(ym1, ym2); +vcvttph2iubs(ym1, ym2|T_rd_sae); +vcvttph2iubs(ym1, ptr[rax+128]); +vcvttph2iubs(ym1, ptr_b[rax+128]); + +vcvttph2iubs(zm1, zm2); +vcvttph2iubs(zm1, zm2|T_ru_sae); +vcvttph2iubs(zm1, ptr[rax+128]); +vcvttph2iubs(zm1, ptr_b[rax+128]); +// +vcvttps2dqs(xm1, xm2); +vcvttps2dqs(xm1, ptr[rax+128]); +vcvttps2dqs(xm1, ptr_b[rax+128]); + +vcvttps2dqs(ym1, ym2); +vcvttps2dqs(ym1, ym2|T_sae); +vcvttps2dqs(ym1, ptr[rax+128]); +vcvttps2dqs(ym1, ptr_b[rax+128]); + +vcvttps2dqs(zm1, zm2); +vcvttps2dqs(zm1, zm2|T_sae); +vcvttps2dqs(zm1, ptr[rax+128]); +vcvttps2dqs(zm1, ptr_b[rax+128]); +// +vcvtps2ibs(xm1, xm2); +vcvtps2ibs(xm1, ptr[rax+128]); +vcvtps2ibs(xm1, ptr_b[rax+128]); + +vcvtps2ibs(ym1, ym2); +vcvtps2ibs(ym1, ym2|T_rd_sae); +vcvtps2ibs(ym1, ptr[rax+128]); +vcvtps2ibs(ym1, ptr_b[rax+128]); + +vcvtps2ibs(zm1, zm2); +vcvtps2ibs(zm1, zm2|T_ru_sae); +vcvtps2ibs(zm1, ptr[rax+128]); +vcvtps2ibs(zm1, ptr_b[rax+128]); +// +vcvtps2iubs(xm1, xm2); +vcvtps2iubs(xm1, ptr[rax+128]); +vcvtps2iubs(xm1, ptr_b[rax+128]); + +vcvtps2iubs(ym1, ym2); +vcvtps2iubs(ym1, ym2|T_rd_sae); +vcvtps2iubs(ym1, ptr[rax+128]); +vcvtps2iubs(ym1, ptr_b[rax+128]); + +vcvtps2iubs(zm1, zm2); +vcvtps2iubs(zm1, zm2|T_ru_sae); +vcvtps2iubs(zm1, ptr[rax+128]); +vcvtps2iubs(zm1, ptr_b[rax+128]); +// +vcvttps2ibs(xm1, xm2); +vcvttps2ibs(xm1, ptr[rax+128]); +vcvttps2ibs(xm1, ptr_b[rax+128]); + +vcvttps2ibs(ym1, ym2); +vcvttps2ibs(ym1, ym2|T_rd_sae); +vcvttps2ibs(ym1, ptr[rax+128]); +vcvttps2ibs(ym1, ptr_b[rax+128]); + +vcvttps2ibs(zm1, zm2); +vcvttps2ibs(zm1, zm2|T_ru_sae); +vcvttps2ibs(zm1, ptr[rax+128]); +vcvttps2ibs(zm1, ptr_b[rax+128]); +// +vcvttps2iubs(xm1, xm2); +vcvttps2iubs(xm1, ptr[rax+128]); +vcvttps2iubs(xm1, ptr_b[rax+128]); + +vcvttps2iubs(ym1, ym2); +vcvttps2iubs(ym1, ym2|T_rd_sae); +vcvttps2iubs(ym1, ptr[rax+128]); +vcvttps2iubs(ym1, ptr_b[rax+128]); + +vcvttps2iubs(zm1, zm2); +vcvttps2iubs(zm1, zm2|T_ru_sae); +vcvttps2iubs(zm1, ptr[rax+128]); +vcvttps2iubs(zm1, ptr_b[rax+128]); |