aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorSrinivas Putta <[email protected]>2022-10-03 14:09:09 -0700
committerSrinivas Putta <[email protected]>2022-10-03 14:09:09 -0700
commitcd14d07b1cf30638261116873c5c2b2e7c691d98 (patch)
tree2ae255c088715f08430154db68830e10d9852438
parent7811f593c0b1114266060cc5fda8d96d9d649b83 (diff)
downloadxbyak-cd14d07b1cf30638261116873c5c2b2e7c691d98.tar.gz
xbyak-cd14d07b1cf30638261116873c5c2b2e7c691d98.zip
add AMX-FP16 instruction
-rw-r--r--xbyak/xbyak_mnemonic.h1
-rw-r--r--xbyak/xbyak_util.h2
2 files changed, 3 insertions, 0 deletions
diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h
index c63ca2b..f8c098e 100644
--- a/xbyak/xbyak_mnemonic.h
+++ b/xbyak/xbyak_mnemonic.h
@@ -1655,6 +1655,7 @@ void tdpbsud(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T
void tdpbusd(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_66 | T_0F38 | T_W0, 0x5e); }
void tdpbuud(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_0F38 | T_W0, 0x5e); }
void tdpbf16ps(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_F3 | T_0F38 | T_W0, 0x5c); }
+void tdpfp16ps(const Tmm &x1, const Tmm &x2, const Tmm &x3) { opVex(x1, &x3, x2, T_F2 | T_0F38 | T_W0, 0x5c); }
#else
void jcxz(std::string label) { db(0x67); opJmp(label, T_SHORT, 0xe3, 0, 0); }
void jcxz(const Label& label) { db(0x67); opJmp(label, T_SHORT, 0xe3, 0, 0); }
diff --git a/xbyak/xbyak_util.h b/xbyak/xbyak_util.h
index 12c0068..8ab92e9 100644
--- a/xbyak/xbyak_util.h
+++ b/xbyak/xbyak_util.h
@@ -410,6 +410,7 @@ public:
XBYAK_DEFINE_TYPE(65, tMOVDIRI);
XBYAK_DEFINE_TYPE(66, tMOVDIR64B);
XBYAK_DEFINE_TYPE(67, tCLZERO); // AMD Zen
+ XBYAK_DEFINE_TYPE(68, tAMX_FP16);
#undef XBYAK_SPLIT_ID
#undef XBYAK_DEFINE_TYPE
@@ -553,6 +554,7 @@ public:
if (type_ & tAVX512F) {
if (EAX & (1U << 5)) type_ |= tAVX512_BF16;
}
+ if (EAX & (1U << 21)) type_ |= tAMX_FP16;
}
}
setFamily();