aboutsummaryrefslogtreecommitdiffhomepage
path: root/test/avx10/bf16.txt
diff options
context:
space:
mode:
authorMITSUNARI Shigeo <[email protected]>2024-10-11 12:21:48 +0900
committerMITSUNARI Shigeo <[email protected]>2024-10-13 13:51:06 +0900
commita84866bcbc8411416e51f53b210c7d9f06e3e763 (patch)
tree73ca1567c1bdc39b53cea1b1ae216f1e09c5096f /test/avx10/bf16.txt
parent3ca7e64c63daac8c3dd1c3cbafdc26ac011fa6ab (diff)
downloadxbyak-a84866bcbc8411416e51f53b210c7d9f06e3e763.tar.gz
xbyak-a84866bcbc8411416e51f53b210c7d9f06e3e763.zip
add vf[,n]m[add,sub][132,213,231]nebf16
Diffstat (limited to 'test/avx10/bf16.txt')
-rw-r--r--test/avx10/bf16.txt60
1 files changed, 60 insertions, 0 deletions
diff --git a/test/avx10/bf16.txt b/test/avx10/bf16.txt
index d8f4c5a..7dcdb25 100644
--- a/test/avx10/bf16.txt
+++ b/test/avx10/bf16.txt
@@ -32,3 +32,63 @@ vsubnepbf16(xm1, xm2, xm3);
vsubnepbf16(ym1|k1, ym2, ptr[rax+128]);
vsubnepbf16(ym1|k1, ym2, ptr_b[rax+128]);
vsubnepbf16(zm1|k2|T_z, zm2, ptr_b[rax+128]);
+// madd
+vfmadd132nepbf16(xm1, xm2, xm3);
+vfmadd132nepbf16(ym1|k1, ym2, ptr[rax+128]);
+vfmadd132nepbf16(ym1|k1, ym2, ptr_b[rax+128]);
+vfmadd132nepbf16(zm1|k2|T_z, zm2, ptr_b[rax+128]);
+
+vfmadd213nepbf16(xm1, xm2, xm3);
+vfmadd213nepbf16(ym1|k1, ym2, ptr[rax+128]);
+vfmadd213nepbf16(ym1|k1, ym2, ptr_b[rax+128]);
+vfmadd213nepbf16(zm1|k2|T_z, zm2, ptr_b[rax+128]);
+
+vfmadd231nepbf16(xm1, xm2, xm3);
+vfmadd231nepbf16(ym1|k1, ym2, ptr[rax+128]);
+vfmadd231nepbf16(ym1|k1, ym2, ptr_b[rax+128]);
+vfmadd231nepbf16(zm1|k2|T_z, zm2, ptr_b[rax+128]);
+// nmadd
+vfnmadd132nepbf16(xm1, xm2, xm3);
+vfnmadd132nepbf16(ym1|k1, ym2, ptr[rax+128]);
+vfnmadd132nepbf16(ym1|k1, ym2, ptr_b[rax+128]);
+vfnmadd132nepbf16(zm1|k2|T_z, zm2, ptr_b[rax+128]);
+
+vfnmadd213nepbf16(xm1, xm2, xm3);
+vfnmadd213nepbf16(ym1|k1, ym2, ptr[rax+128]);
+vfnmadd213nepbf16(ym1|k1, ym2, ptr_b[rax+128]);
+vfnmadd213nepbf16(zm1|k2|T_z, zm2, ptr_b[rax+128]);
+
+vfnmadd231nepbf16(xm1, xm2, xm3);
+vfnmadd231nepbf16(ym1|k1, ym2, ptr[rax+128]);
+vfnmadd231nepbf16(ym1|k1, ym2, ptr_b[rax+128]);
+vfnmadd231nepbf16(zm1|k2|T_z, zm2, ptr_b[rax+128]);
+// msub
+vfmsub132nepbf16(xm1, xm2, xm3);
+vfmsub132nepbf16(ym1|k1, ym2, ptr[rax+128]);
+vfmsub132nepbf16(ym1|k1, ym2, ptr_b[rax+128]);
+vfmsub132nepbf16(zm1|k2|T_z, zm2, ptr_b[rax+128]);
+
+vfmsub213nepbf16(xm1, xm2, xm3);
+vfmsub213nepbf16(ym1|k1, ym2, ptr[rax+128]);
+vfmsub213nepbf16(ym1|k1, ym2, ptr_b[rax+128]);
+vfmsub213nepbf16(zm1|k2|T_z, zm2, ptr_b[rax+128]);
+
+vfmsub231nepbf16(xm1, xm2, xm3);
+vfmsub231nepbf16(ym1|k1, ym2, ptr[rax+128]);
+vfmsub231nepbf16(ym1|k1, ym2, ptr_b[rax+128]);
+vfmsub231nepbf16(zm1|k2|T_z, zm2, ptr_b[rax+128]);
+// nmsub
+vfnmsub132nepbf16(xm1, xm2, xm3);
+vfnmsub132nepbf16(ym1|k1, ym2, ptr[rax+128]);
+vfnmsub132nepbf16(ym1|k1, ym2, ptr_b[rax+128]);
+vfnmsub132nepbf16(zm1|k2|T_z, zm2, ptr_b[rax+128]);
+
+vfnmsub213nepbf16(xm1, xm2, xm3);
+vfnmsub213nepbf16(ym1|k1, ym2, ptr[rax+128]);
+vfnmsub213nepbf16(ym1|k1, ym2, ptr_b[rax+128]);
+vfnmsub213nepbf16(zm1|k2|T_z, zm2, ptr_b[rax+128]);
+
+vfnmsub231nepbf16(xm1, xm2, xm3);
+vfnmsub231nepbf16(ym1|k1, ym2, ptr[rax+128]);
+vfnmsub231nepbf16(ym1|k1, ym2, ptr_b[rax+128]);
+vfnmsub231nepbf16(zm1|k2|T_z, zm2, ptr_b[rax+128]);