aboutsummaryrefslogtreecommitdiffhomepage
path: root/xbyak/xbyak.h
diff options
context:
space:
mode:
authorMITSUNARI Shigeo <[email protected]>2024-10-15 10:02:23 +0900
committerMITSUNARI Shigeo <[email protected]>2024-10-15 10:02:23 +0900
commit3ee31be62de4d3cc07289724da8baaddb3083834 (patch)
tree3256a36e612750cdc6a1e3f9db2e07edb4ae747f /xbyak/xbyak.h
parentd067f0d3f55696ae8bc9a25ad7012ee80f221d54 (diff)
parent2d70c949056ef78d0ffe9b7231544fdab6c3fdc0 (diff)
downloadxbyak-3ee31be62de4d3cc07289724da8baaddb3083834.tar.gz
xbyak-3ee31be62de4d3cc07289724da8baaddb3083834.zip
Merge branch 'dev'v7.20
Diffstat (limited to 'xbyak/xbyak.h')
-rw-r--r--xbyak/xbyak.h95
1 files changed, 74 insertions, 21 deletions
diff --git a/xbyak/xbyak.h b/xbyak/xbyak.h
index 552e451..c0bd83e 100644
--- a/xbyak/xbyak.h
+++ b/xbyak/xbyak.h
@@ -155,7 +155,7 @@ namespace Xbyak {
enum {
DEFAULT_MAX_CODE_SIZE = 4096,
- VERSION = 0x7100 /* 0xABCD = A.BC(.D) */
+ VERSION = 0x7200 /* 0xABCD = A.BC(.D) */
};
#ifndef MIE_INTEGER_TYPE_DEFINED
@@ -232,6 +232,7 @@ enum {
ERR_CANT_USE_REX2,
ERR_INVALID_DFV,
ERR_INVALID_REG_IDX,
+ ERR_BAD_ENCODING_MODE,
ERR_INTERNAL // Put it at last.
};
@@ -290,6 +291,7 @@ inline const char *ConvertErrorToString(int err)
"can't use rex2",
"invalid dfv",
"invalid reg index",
+ "bad encoding mode",
"internal error"
};
assert(ERR_INTERNAL + 1 == sizeof(errTbl) / sizeof(*errTbl));
@@ -1673,7 +1675,9 @@ inline const uint8_t* Label::getAddress() const
typedef enum {
DefaultEncoding,
VexEncoding,
- EvexEncoding
+ EvexEncoding,
+ PreAVX10v2Encoding,
+ AVX10v2Encoding
} PreferredEncoding;
class CodeGenerator : public CodeArray {
@@ -2661,21 +2665,24 @@ private:
if (addr.getRegExp().getIndex().getKind() != kind) XBYAK_THROW(ERR_BAD_VSIB_ADDRESSING)
opVex(x, 0, addr, type, code);
}
- void opEncoding(const Xmm& x1, const Xmm& x2, const Operand& op, uint64_t type, int code, PreferredEncoding encoding, int imm = NONE, uint64_t typeVex = 0, uint64_t typeEvex = 0, int sel = 0)
+ void opEncoding(const Xmm& x1, const Xmm& x2, const Operand& op, uint64_t type, int code, PreferredEncoding enc, int imm = NONE, uint64_t typeVex = 0, uint64_t typeEvex = 0, int sel = 0)
{
- opAVX_X_X_XM(x1, x2, op, type | orEvexIf(encoding, typeVex, typeEvex, sel), code, imm);
+ opAVX_X_X_XM(x1, x2, op, type | orEvexIf(enc, typeVex, typeEvex, sel), code, imm);
}
- int orEvexIf(PreferredEncoding encoding, uint64_t typeVex, uint64_t typeEvex, int sel) {
- if (encoding == DefaultEncoding) {
- encoding = defaultEncoding_[sel];
+ PreferredEncoding getEncoding(PreferredEncoding enc, int sel) const
+ {
+ if (enc == DefaultEncoding) {
+ enc = defaultEncoding_[sel];
}
- if (encoding == EvexEncoding) {
+ if ((sel == 0 && enc != VexEncoding && enc != EvexEncoding) || (sel == 1 && enc != PreAVX10v2Encoding && enc != AVX10v2Encoding)) XBYAK_THROW_RET(ERR_BAD_ENCODING_MODE, VexEncoding)
#ifdef XBYAK_DISABLE_AVX512
- XBYAK_THROW(ERR_EVEX_IS_INVALID)
+ if (enc == EvexEncoding || enc == AVX10v2Encoding) XBYAK_THROW(ERR_EVEX_IS_INVALID)
#endif
- return T_MUST_EVEX | typeEvex;
- }
- return typeVex;
+ return enc;
+ }
+ uint64_t orEvexIf(PreferredEncoding enc, uint64_t typeVex, uint64_t typeEvex, int sel) {
+ enc = getEncoding(enc, sel);
+ return ((sel == 0 && enc == VexEncoding) || (sel == 1 && enc != AVX10v2Encoding)) ? typeVex : (T_MUST_EVEX | typeEvex);
}
void opInOut(const Reg& a, const Reg& d, uint8_t code)
{
@@ -3132,8 +3139,8 @@ public:
#endif
, isDefaultJmpNEAR_(false)
{
- // select avx512-vnni, vmpsadbw(avx)
setDefaultEncoding();
+ setDefaultEncodingAVX10();
labelMgr_.set(this);
}
void reset()
@@ -3170,16 +3177,20 @@ public:
#undef jnl
#endif
- // set default encoding
- // vnniEnc : control AVX512_VNNI (evex:default) or AVX-VNNI (vex)
- // avx10Enc : control mpsadbw, AVX-VNNI-INT8 (vex:default) or AVX10.2 (evex)
- void setDefaultEncoding(PreferredEncoding vnniEnc = EvexEncoding, PreferredEncoding avx10Enc = VexEncoding)
- { defaultEncoding_[0] = vnniEnc; defaultEncoding_[1] = avx10Enc; }
-
- void sha1msg12(const Xmm& x, const Operand& op)
+ // set default encoding of VNNI
+ // EvexEncoding : AVX512_VNNI, VexEncoding : AVX-VNNI
+ void setDefaultEncoding(PreferredEncoding enc = EvexEncoding)
+ {
+ if (enc != VexEncoding && enc != EvexEncoding) XBYAK_THROW(ERR_BAD_ENCODING_MODE)
+ defaultEncoding_[0] = enc;
+ }
+ // default : PreferredEncoding : AVX-VNNI-INT8/AVX512-FP16
+ void setDefaultEncodingAVX10(PreferredEncoding enc = PreAVX10v2Encoding)
{
- opROO(Reg(), op, x, T_MUST_EVEX, 0xD9);
+ if (enc != PreAVX10v2Encoding && enc != AVX10v2Encoding) XBYAK_THROW(ERR_BAD_ENCODING_MODE)
+ defaultEncoding_[1] = enc;
}
+
void bswap(const Reg32e& r)
{
int idx = r.getIdx();
@@ -3192,6 +3203,48 @@ public:
}
db(0xC8 + (idx & 7));
}
+ // AVX10 zero-extending for vmovd, vmovw
+ void opAVX10ZeroExt(const Operand& op1, const Operand& op2, const uint64_t typeTbl[4], const int codeTbl[4], PreferredEncoding enc, int bit)
+ {
+ const Operand *p1 = &op1;
+ const Operand *p2 = &op2;
+ bool rev = false;
+ if (p1->isMEM()) {
+ std::swap(p1, p2);
+ rev = true;
+ }
+ if (p1->isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION)
+ if (p1->isXMM()) {
+ std::swap(p1, p2);
+ rev = !rev;
+ }
+ int sel = -1;
+ if (getEncoding(enc, 1) == AVX10v2Encoding) {
+ if ((p1->isXMM() || p1->isMEM()) && p2->isXMM()) sel = 2 + int(rev);
+ } else {
+ if ((p1->isREG(bit) || p1->isMEM()) && p2->isXMM()) sel = int(rev);
+ }
+ if (sel == -1) XBYAK_THROW(ERR_BAD_COMBINATION)
+ opAVX_X_X_XM(*static_cast<const Xmm*>(p2), xm0, *p1, typeTbl[sel], codeTbl[sel]);
+ }
+ void vmovd(const Operand& op1, const Operand& op2, PreferredEncoding enc = DefaultEncoding)
+ {
+ const uint64_t typeTbl[] = {
+ T_EVEX|T_66|T_0F|T_W0|T_N4, T_EVEX|T_66|T_0F|T_W0|T_N4, // legacy, avx, avx512
+ T_MUST_EVEX|T_66|T_0F|T_EW0|T_N4, T_MUST_EVEX|T_F3|T_0F|T_EW0|T_N4, // avx10.2
+ };
+ const int codeTbl[] = { 0x7E, 0x6E, 0xD6, 0x7E };
+ opAVX10ZeroExt(op1, op2, typeTbl, codeTbl, enc, 32);
+ }
+ void vmovw(const Operand& op1, const Operand& op2, PreferredEncoding enc = DefaultEncoding)
+ {
+ const uint64_t typeTbl[] = {
+ T_MUST_EVEX|T_66|T_MAP5|T_N2, T_MUST_EVEX|T_66|T_MAP5|T_N2, // avx512-fp16
+ T_MUST_EVEX|T_F3|T_MAP5|T_EW0|T_N2, T_MUST_EVEX|T_F3|T_MAP5|T_EW0|T_N2, // avx10.2
+ };
+ const int codeTbl[] = { 0x7E, 0x6E, 0x7E, 0x6E };
+ opAVX10ZeroExt(op1, op2, typeTbl, codeTbl, enc, 16|32|64);
+ }
/*
use single byte nop if useMultiByteNop = false
*/