diff options
author | Srinivas Putta <[email protected]> | 2022-10-03 14:09:09 -0700 |
---|---|---|
committer | Srinivas Putta <[email protected]> | 2022-10-03 14:09:09 -0700 |
commit | cd14d07b1cf30638261116873c5c2b2e7c691d98 (patch) | |
tree | 2ae255c088715f08430154db68830e10d9852438 | |
parent | 7811f593c0b1114266060cc5fda8d96d9d649b83 (diff) | |
download | xbyak-cd14d07b1cf30638261116873c5c2b2e7c691d98.tar.gz xbyak-cd14d07b1cf30638261116873c5c2b2e7c691d98.zip |
add AMX-FP16 instruction
-rw-r--r-- | xbyak/xbyak_mnemonic.h | 1 | ||||
-rw-r--r-- | xbyak/xbyak_util.h | 2 |
2 files changed, 3 insertions, 0 deletions
diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h index c63ca2b..f8c098e 100644 --- a/xbyak/xbyak_mnemonic.h +++ b/xbyak/xbyak_mnemonic.h @@ -1655,6 +1655,7 @@ void tdpbsud(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T void tdpbusd(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_66 | T_0F38 | T_W0, 0x5e); } void tdpbuud(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_0F38 | T_W0, 0x5e); } void tdpbf16ps(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_F3 | T_0F38 | T_W0, 0x5c); } +void tdpfp16ps(const Tmm &x1, const Tmm &x2, const Tmm &x3) { opVex(x1, &x3, x2, T_F2 | T_0F38 | T_W0, 0x5c); } #else void jcxz(std::string label) { db(0x67); opJmp(label, T_SHORT, 0xe3, 0, 0); } void jcxz(const Label& label) { db(0x67); opJmp(label, T_SHORT, 0xe3, 0, 0); } diff --git a/xbyak/xbyak_util.h b/xbyak/xbyak_util.h index 12c0068..8ab92e9 100644 --- a/xbyak/xbyak_util.h +++ b/xbyak/xbyak_util.h @@ -410,6 +410,7 @@ public: XBYAK_DEFINE_TYPE(65, tMOVDIRI); XBYAK_DEFINE_TYPE(66, tMOVDIR64B); XBYAK_DEFINE_TYPE(67, tCLZERO); // AMD Zen + XBYAK_DEFINE_TYPE(68, tAMX_FP16); #undef XBYAK_SPLIT_ID #undef XBYAK_DEFINE_TYPE @@ -553,6 +554,7 @@ public: if (type_ & tAVX512F) { if (EAX & (1U << 5)) type_ |= tAVX512_BF16; } + if (EAX & (1U << 21)) type_ |= tAMX_FP16; } } setFamily(); |