diff options
author | MITSUNARI Shigeo <[email protected]> | 2024-10-11 12:21:48 +0900 |
---|---|---|
committer | MITSUNARI Shigeo <[email protected]> | 2024-10-13 13:51:06 +0900 |
commit | a84866bcbc8411416e51f53b210c7d9f06e3e763 (patch) | |
tree | 73ca1567c1bdc39b53cea1b1ae216f1e09c5096f /test/avx10/bf16.txt | |
parent | 3ca7e64c63daac8c3dd1c3cbafdc26ac011fa6ab (diff) | |
download | xbyak-a84866bcbc8411416e51f53b210c7d9f06e3e763.tar.gz xbyak-a84866bcbc8411416e51f53b210c7d9f06e3e763.zip |
add vf[,n]m[add,sub][132,213,231]nebf16
Diffstat (limited to 'test/avx10/bf16.txt')
-rw-r--r-- | test/avx10/bf16.txt | 60 |
1 files changed, 60 insertions, 0 deletions
diff --git a/test/avx10/bf16.txt b/test/avx10/bf16.txt index d8f4c5a..7dcdb25 100644 --- a/test/avx10/bf16.txt +++ b/test/avx10/bf16.txt @@ -32,3 +32,63 @@ vsubnepbf16(xm1, xm2, xm3); vsubnepbf16(ym1|k1, ym2, ptr[rax+128]); vsubnepbf16(ym1|k1, ym2, ptr_b[rax+128]); vsubnepbf16(zm1|k2|T_z, zm2, ptr_b[rax+128]); +// madd +vfmadd132nepbf16(xm1, xm2, xm3); +vfmadd132nepbf16(ym1|k1, ym2, ptr[rax+128]); +vfmadd132nepbf16(ym1|k1, ym2, ptr_b[rax+128]); +vfmadd132nepbf16(zm1|k2|T_z, zm2, ptr_b[rax+128]); + +vfmadd213nepbf16(xm1, xm2, xm3); +vfmadd213nepbf16(ym1|k1, ym2, ptr[rax+128]); +vfmadd213nepbf16(ym1|k1, ym2, ptr_b[rax+128]); +vfmadd213nepbf16(zm1|k2|T_z, zm2, ptr_b[rax+128]); + +vfmadd231nepbf16(xm1, xm2, xm3); +vfmadd231nepbf16(ym1|k1, ym2, ptr[rax+128]); +vfmadd231nepbf16(ym1|k1, ym2, ptr_b[rax+128]); +vfmadd231nepbf16(zm1|k2|T_z, zm2, ptr_b[rax+128]); +// nmadd +vfnmadd132nepbf16(xm1, xm2, xm3); +vfnmadd132nepbf16(ym1|k1, ym2, ptr[rax+128]); +vfnmadd132nepbf16(ym1|k1, ym2, ptr_b[rax+128]); +vfnmadd132nepbf16(zm1|k2|T_z, zm2, ptr_b[rax+128]); + +vfnmadd213nepbf16(xm1, xm2, xm3); +vfnmadd213nepbf16(ym1|k1, ym2, ptr[rax+128]); +vfnmadd213nepbf16(ym1|k1, ym2, ptr_b[rax+128]); +vfnmadd213nepbf16(zm1|k2|T_z, zm2, ptr_b[rax+128]); + +vfnmadd231nepbf16(xm1, xm2, xm3); +vfnmadd231nepbf16(ym1|k1, ym2, ptr[rax+128]); +vfnmadd231nepbf16(ym1|k1, ym2, ptr_b[rax+128]); +vfnmadd231nepbf16(zm1|k2|T_z, zm2, ptr_b[rax+128]); +// msub +vfmsub132nepbf16(xm1, xm2, xm3); +vfmsub132nepbf16(ym1|k1, ym2, ptr[rax+128]); +vfmsub132nepbf16(ym1|k1, ym2, ptr_b[rax+128]); +vfmsub132nepbf16(zm1|k2|T_z, zm2, ptr_b[rax+128]); + +vfmsub213nepbf16(xm1, xm2, xm3); +vfmsub213nepbf16(ym1|k1, ym2, ptr[rax+128]); +vfmsub213nepbf16(ym1|k1, ym2, ptr_b[rax+128]); +vfmsub213nepbf16(zm1|k2|T_z, zm2, ptr_b[rax+128]); + +vfmsub231nepbf16(xm1, xm2, xm3); +vfmsub231nepbf16(ym1|k1, ym2, ptr[rax+128]); +vfmsub231nepbf16(ym1|k1, ym2, ptr_b[rax+128]); +vfmsub231nepbf16(zm1|k2|T_z, zm2, ptr_b[rax+128]); +// nmsub +vfnmsub132nepbf16(xm1, xm2, xm3); +vfnmsub132nepbf16(ym1|k1, ym2, ptr[rax+128]); +vfnmsub132nepbf16(ym1|k1, ym2, ptr_b[rax+128]); +vfnmsub132nepbf16(zm1|k2|T_z, zm2, ptr_b[rax+128]); + +vfnmsub213nepbf16(xm1, xm2, xm3); +vfnmsub213nepbf16(ym1|k1, ym2, ptr[rax+128]); +vfnmsub213nepbf16(ym1|k1, ym2, ptr_b[rax+128]); +vfnmsub213nepbf16(zm1|k2|T_z, zm2, ptr_b[rax+128]); + +vfnmsub231nepbf16(xm1, xm2, xm3); +vfnmsub231nepbf16(ym1|k1, ym2, ptr[rax+128]); +vfnmsub231nepbf16(ym1|k1, ym2, ptr_b[rax+128]); +vfnmsub231nepbf16(zm1|k2|T_z, zm2, ptr_b[rax+128]); |