aboutsummaryrefslogtreecommitdiffhomepage
path: root/test
diff options
context:
space:
mode:
authorMITSUNARI Shigeo <[email protected]>2024-10-11 14:40:00 +0900
committerMITSUNARI Shigeo <[email protected]>2024-10-13 13:51:06 +0900
commit842c3cc83f209c0789f15a809ae45cf30a8b2f5f (patch)
tree7f0ca97100c6eae52d6394681607cae34537675a /test
parent6dc564185b85d16e240f9f15aad5be08036176d9 (diff)
downloadxbyak-842c3cc83f209c0789f15a809ae45cf30a8b2f5f.tar.gz
xbyak-842c3cc83f209c0789f15a809ae45cf30a8b2f5f.zip
support all avx10 bf16 instructions
Diffstat (limited to 'test')
-rw-r--r--test/avx10/bf16.txt99
1 files changed, 99 insertions, 0 deletions
diff --git a/test/avx10/bf16.txt b/test/avx10/bf16.txt
index f3e44a6..c544e02 100644
--- a/test/avx10/bf16.txt
+++ b/test/avx10/bf16.txt
@@ -109,3 +109,102 @@ vfpclasspbf16(k5|k5, yword[rax+128], 10);
vfpclasspbf16(k6|k5, yword_b[rax+128], 11);
vfpclasspbf16(k7|k5, zword[rax+128], 12);
vfpclasspbf16(k7|k5, zword_b[rax+128], 13);
+
+vcomsbf16(xm2, xm3);
+vcomsbf16(xm2, ptr[rax+128]);
+
+vgetexppbf16(xm1|k3, xmm2);
+vgetexppbf16(xm1|k3, ptr[rax+128]);
+vgetexppbf16(xm1|k3, ptr_b[rax+128]);
+
+vgetexppbf16(ym1|k3, ymm2);
+vgetexppbf16(ym1|k3, ptr[rax+128]);
+vgetexppbf16(ym1|k3, ptr_b[rax+128]);
+
+vgetexppbf16(zm1|k3, zmm2);
+vgetexppbf16(zm1|k3, ptr[rax+128]);
+vgetexppbf16(zm1|k3, ptr_b[rax+128]);
+
+vgetmantpbf16(xm1|k3, xmm2, 3);
+vgetmantpbf16(xm1|k3, ptr[rax+128], 5);
+vgetmantpbf16(xm1|k3, ptr_b[rax+128], 9);
+
+vgetmantpbf16(ym1|k3, ymm2, 3);
+vgetmantpbf16(ym1|k3, ptr[rax+128], 5);
+vgetmantpbf16(ym1|k3, ptr_b[rax+128], 9);
+
+vgetmantpbf16(zm1|k3, zmm2, 3);
+vgetmantpbf16(zm1|k3, ptr[rax+128], 5);
+vgetmantpbf16(zm1|k3, ptr_b[rax+128], 9);
+
+vrcppbf16(xm1|k5, xm2);
+vrcppbf16(xm1|k5, ptr[rcx+128]);
+vrcppbf16(xm1|k5, ptr_b[rcx+128]);
+
+vrcppbf16(ym1|k5, ym2);
+vrcppbf16(ym1|k5, ptr[rcx+128]);
+vrcppbf16(ym1|k5, ptr_b[rcx+128]);
+
+vrcppbf16(zm1|k5, zm2);
+vrcppbf16(zm1|k5, ptr[rcx+128]);
+vrcppbf16(zm1|k5, ptr_b[rcx+128]);
+
+vreducenepbf16(xm1|k4, xm2, 1);
+vreducenepbf16(xm1|k4, ptr[rax+128], 1);
+vreducenepbf16(xm1|k4, ptr_b[rax+128], 1);
+
+vreducenepbf16(ym1|k4, ym2, 1);
+vreducenepbf16(ym1|k4, ptr[rax+128], 1);
+vreducenepbf16(ym1|k4, ptr_b[rax+128], 1);
+
+vreducenepbf16(zm1|k4, zm2, 1);
+vreducenepbf16(zm1|k4, ptr[rax+128], 1);
+vreducenepbf16(zm1|k4, ptr_b[rax+128], 1);
+
+vrndscalenepbf16(xm1|k4, xm2, 1);
+vrndscalenepbf16(xm1|k4, ptr[rax+128], 1);
+vrndscalenepbf16(xm1|k4, ptr_b[rax+128], 1);
+
+vrndscalenepbf16(ym1|k4, ym2, 1);
+vrndscalenepbf16(ym1|k4, ptr[rax+128], 1);
+vrndscalenepbf16(ym1|k4, ptr_b[rax+128], 1);
+
+vrndscalenepbf16(zm1|k4, zm2, 1);
+vrndscalenepbf16(zm1|k4, ptr[rax+128], 1);
+vrndscalenepbf16(zm1|k4, ptr_b[rax+128], 1);
+
+vrsqrtpbf16(xm1|k5, xm2);
+vrsqrtpbf16(xm1|k5, ptr[rcx+128]);
+vrsqrtpbf16(xm1|k5, ptr_b[rcx+128]);
+
+vrsqrtpbf16(ym1|k5, ym2);
+vrsqrtpbf16(ym1|k5, ptr[rcx+128]);
+vrsqrtpbf16(ym1|k5, ptr_b[rcx+128]);
+
+vrsqrtpbf16(zm1|k5, zm2);
+vrsqrtpbf16(zm1|k5, ptr[rcx+128]);
+vrsqrtpbf16(zm1|k5, ptr_b[rcx+128]);
+
+vscalefpbf16(xm1|k5, xm5, xm2);
+vscalefpbf16(xm1|k5, xm5, ptr[rcx+128]);
+vscalefpbf16(xm1|k5, xm5, ptr_b[rcx+128]);
+
+vscalefpbf16(ym1|k5, ym9, ym2);
+vscalefpbf16(ym1|k5, ym9, ptr[rcx+128]);
+vscalefpbf16(ym1|k5, ym9, ptr_b[rcx+128]);
+
+vscalefpbf16(zm1|k5, zm30, zm2);
+vscalefpbf16(zm1|k5, zm30, ptr[rcx+128]);
+vscalefpbf16(zm1|k5, zm30, ptr_b[rcx+128]);
+
+vsqrtnepbf16(xm5|k3, xmm4);
+vsqrtnepbf16(xm5|k3, ptr[rax+128]);
+vsqrtnepbf16(xm5|k3, ptr_b[rax+128]);
+
+vsqrtnepbf16(ym5|k3, ymm4);
+vsqrtnepbf16(ym5|k3, ptr[rax+128]);
+vsqrtnepbf16(ym5|k3, ptr_b[rax+128]);
+
+vsqrtnepbf16(zm5|k3, zmm4);
+vsqrtnepbf16(zm5|k3, ptr[rax+128]);
+vsqrtnepbf16(zm5|k3, ptr_b[rax+128]);