diff options
author | Srinivas Putta <[email protected]> | 2022-10-03 14:09:09 -0700 |
---|---|---|
committer | Srinivas Putta <[email protected]> | 2022-10-03 14:09:09 -0700 |
commit | 564fe9acd37cfe3fa68f01c8cf6517a28b532e28 (patch) | |
tree | 521a4ab767b3fa6ee03cd65d1411e4695783fcd5 | |
parent | cd14d07b1cf30638261116873c5c2b2e7c691d98 (diff) | |
download | xbyak-564fe9acd37cfe3fa68f01c8cf6517a28b532e28.tar.gz xbyak-564fe9acd37cfe3fa68f01c8cf6517a28b532e28.zip |
add AVX-VNNI-INT8 instructions
-rw-r--r-- | xbyak/xbyak_mnemonic.h | 6 | ||||
-rw-r--r-- | xbyak/xbyak_util.h | 2 |
2 files changed, 8 insertions, 0 deletions
diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h index f8c098e..ed25e8c 100644 --- a/xbyak/xbyak_mnemonic.h +++ b/xbyak/xbyak_mnemonic.h @@ -1192,10 +1192,16 @@ void vpcmpgtq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1 void vpcmpgtw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM, 0x65); } void vpcmpistri(const Xmm& xm, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F3A, 0x63, imm); } void vpcmpistrm(const Xmm& xm, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F3A, 0x62, imm); } +void vpdpbssd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F2 | T_0F38 | T_W0 | T_YMM, 0x50); } +void vpdpbssds(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F2 | T_0F38 | T_W0 | T_YMM, 0x51); } +void vpdpbsud(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F3 | T_0F38 | T_W0 | T_YMM, 0x50); } +void vpdpbsuds(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F3 | T_0F38 | T_W0 | T_YMM, 0x51); } void vpdpbusd(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opVnni(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_B32, 0x50, encoding); } void vpdpbusds(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opVnni(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_B32, 0x51, encoding); } void vpdpwssd(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opVnni(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_B32, 0x52, encoding); } void vpdpwssds(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opVnni(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_B32, 0x53, encoding); } +void vpdpbuud(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F38 | T_W0 | T_YMM, 0x50); } +void vpdpbuuds(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F38 | T_W0 | T_YMM, 0x51); } void vperm2f128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { if (!(y1.isYMM() && y2.isYMM() && op.isYMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x06, imm); } void vperm2i128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { if (!(y1.isYMM() && y2.isYMM() && op.isYMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x46, imm); } void vpermd(const Ymm& y1, const Ymm& y2, const Operand& op) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x36); } diff --git a/xbyak/xbyak_util.h b/xbyak/xbyak_util.h index 8ab92e9..bde9c5f 100644 --- a/xbyak/xbyak_util.h +++ b/xbyak/xbyak_util.h @@ -411,6 +411,7 @@ public: XBYAK_DEFINE_TYPE(66, tMOVDIR64B); XBYAK_DEFINE_TYPE(67, tCLZERO); // AMD Zen XBYAK_DEFINE_TYPE(68, tAMX_FP16); + XBYAK_DEFINE_TYPE(69, tAVX_VNNI_INT8); #undef XBYAK_SPLIT_ID #undef XBYAK_DEFINE_TYPE @@ -555,6 +556,7 @@ public: if (EAX & (1U << 5)) type_ |= tAVX512_BF16; } if (EAX & (1U << 21)) type_ |= tAMX_FP16; + if (EDX & (1U << 4)) type_ |= tAVX_VNNI_INT8; } } setFamily(); |