aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorMITSUNARI Shigeo <[email protected]>2024-10-15 08:51:01 +0900
committerMITSUNARI Shigeo <[email protected]>2024-10-15 09:33:14 +0900
commitae76be35ac33f1fc1b94b866c6d85549969682a7 (patch)
treeeac1055fae07aba8349b9c01988cc91b0f739a48
parent0c2f7fc6dbd713b2d690a5859f562746b4dd568d (diff)
downloadxbyak-ae76be35ac33f1fc1b94b866c6d85549969682a7.tar.gz
xbyak-ae76be35ac33f1fc1b94b866c6d85549969682a7.zip
setDefaultEncoding has changed.
-rw-r--r--doc/changelog.md1
-rw-r--r--doc/usage.md10
-rw-r--r--readme.txt5
-rw-r--r--test/avx10_test.cpp4
-rw-r--r--test/test_by_xed.cpp2
-rw-r--r--xbyak/xbyak.h59
6 files changed, 46 insertions, 35 deletions
diff --git a/doc/changelog.md b/doc/changelog.md
index 5e25c2d..1d39ae6 100644
--- a/doc/changelog.md
+++ b/doc/changelog.md
@@ -1,5 +1,6 @@
# History
+* 2024/Oct/15 ver 7.11 Added full support for AVX10.2
* 2024/Oct/13 ver 7.10 support AVX10 integer and fp16 vnni, media new instructions. setDefaultEncoding is extended.
* 2024/Oct/10 ver 7.09.1 fix the names of vpcompressb and vpcompressw
* 2024/Oct/08 ver 7.09 support YMM embedded rounding of AVX10.2 and fix some mnemonics with {sae}/{er}.
diff --git a/doc/usage.md b/doc/usage.md
index ef38d63..9015bff 100644
--- a/doc/usage.md
+++ b/doc/usage.md
@@ -111,13 +111,13 @@ vfpclassps k5{k3}, [rax+64]{1to4}, 5 --> vfpclassps(k5|k3, yword_b [rax+64],
## Selecting AVX512-VNNI, AVX-VNNI, AVX-VNNI-INT8 etc.
Some mnemonics have some types of encodings: VEX, EVEX, AVX10.2.
The functions for these mnemonics include an optional parameter as the last argument to specify the encoding.
-The default behavior depends on the order in which the instruction was introduced (whether VEX or EVEX came first),
+The default behavior depends on the order in which the instruction was introduced (whether VEX, EVEX or AVX10.2 came first),
and can be specified using setDefaultEncoding.
```
vpdpbusd(xm0, xm1, xm2); // default encoding: EVEX (AVX512-VNNI)
-vpdpbusd(xm0, xm1, xm2, EvexEncoding); // same as the above
-vpdpbusd(xm0, xm1, xm2, VexEncoding); // VEX (AVX-VNNI)
+vpdpbusd(xm0, xm1, xm2, AVX10v2Encoding); // same as the above
+vpdpbusd(xm0, xm1, xm2, PreAVXv2Encoding); // VEX (AVX-VNNI)
setDefaultEncoding(VexEncoding); // default encoding is VEX
vpdpbusd(xm0, xm1, xm2); // VEX
@@ -128,7 +128,7 @@ setDefaultEncoding(VexEncoding, AVX10v2Encoding); // use 2nd argument.
vmpsadbw(xm1, xm3, xm15, 3); // EVEX (AVX10.2)
```
-- `setDefaultEncoding(PreferredEncoding vnniEnc = EvexEncoding, PreferredEncoding avx10Enc = VexEncoding)`
+- `setDefaultEncoding(PreferredEncoding vnniEnc = EvexEncoding, PreferredEncoding avx10Enc = PreAVXv2Encoding)`
Control the default encoding of mnemonics with `Xbyak::PreferredEncoding` param.
param|vnniEnc|avx10Enc
@@ -137,7 +137,7 @@ VexEncoding|AVX-VNNI|-
EvexEncoding|AVX512-VNNI|-
PreAVX10v2Encoding|-|AVX-VNNI-INT8, AVX512-FP16
AVX10v2Encoding|-|AVX10.2
-default|EvexEncoding|VexEncoding
+default|EvexEncoding|PreAVXv2Encoding
mnemonic|vpdpbusd, vpdpbusds, vpdpwssd, vpdpwssds|vmpsadbw, vpdpbssd, vpdpbssds, vpdpbsud, vpdpbsuds, vpdpbuud, vpdpbuuds, vpdpwsud vpdpwsuds vpdpwusd vpdpwusds vpdpwuud, vpdpwuuds, vmovd, vmovw
### Remark
diff --git a/readme.txt b/readme.txt
index 417c50e..2fb242b 100644
--- a/readme.txt
+++ b/readme.txt
@@ -14,7 +14,7 @@
xbyak.hをインクルードするだけですぐ利用することができます。
C++の枠組み内で閉じているため、外部アセンブラは不要です。
32bit/64bit両対応です。
- 対応ニーモニック:特権命令除くx86, MMX/MMX2/SSE/SSE2/SSE3/SSSE3/SSE4/FPU(一部)/AVX/AVX2/FMA/VEX-encoded GPR
+ 対応ニーモニック:特権命令除くx86, MMX/MMX2/SSE/SSE2/SSE3/SSSE3/SSE4/FPU(一部)/AVX/AVX2/FMA/AVX-512/APX/AVX10.2
・Windows Xp(32bit, 64bit), Windows 7/Linux(32bit, 64bit)/Intel Mac対応
Windows Xp, Windows 7上ではVC2008, VC2010, VC2012
@@ -46,7 +46,7 @@ Linuxではmake installで/usr/local/include/xbyakにコピーされます。
-----------------------------------------------------------------------------
◎新機能
-APX/AVX10対応
+APX/AVX10.2対応
例外なしモード追加
XBYAK_NO_EXCEPTIONを定義してコンパイルするとgcc/clangで-fno-exceptionsオプションでコンパイルできます。
@@ -404,6 +404,7 @@ sample/{echo,hello}.bfは http://www.kmonos.net/alang/etc/brainfuck.php から
-----------------------------------------------------------------------------
◎履歴
+2024/10/15 ver 7.11 AVX10.2完全サポート
2024/10/13 ver 7.10 AVX10 integer and fp16 vnni, mediaの新命令対応. setDefaultEncodingの拡張.
2024/10/10 ver 7.09.1 vpcompressbとvpcompresswの名前修正
2024/10/08 ver 7.09 AVX10.2のYMMレジスタの埋め込み丸め対応
diff --git a/test/avx10_test.cpp b/test/avx10_test.cpp
index 5f742fe..1ceb52a 100644
--- a/test/avx10_test.cpp
+++ b/test/avx10_test.cpp
@@ -234,10 +234,10 @@ CYBOZU_TEST_AUTO(vmpsadbw)
struct Code : Xbyak::CodeGenerator {
Code()
{
- setDefaultEncoding();
+ setDefaultEncodingAVX10();
vmpsadbw(xm1, xm3, xm15, 3); // vex(avx)
vmpsadbw(ym1, ym3, ptr[rax+128], 3); // vex(avx2)
- setDefaultEncoding(VexEncoding, EvexEncoding);
+ setDefaultEncodingAVX10(AVX10v2Encoding);
vmpsadbw(ym1, ym3, ym15, 3); // evex(avx10.2)
vmpsadbw(ym1, ym3, ptr[rax+128], 3); // evex(avx10.2)
}
diff --git a/test/test_by_xed.cpp b/test/test_by_xed.cpp
index af39296..9be9199 100644
--- a/test/test_by_xed.cpp
+++ b/test/test_by_xed.cpp
@@ -7,7 +7,7 @@ struct Code : Xbyak::CodeGenerator {
Code()
: Xbyak::CodeGenerator(4096*8)
{
- setDefaultEncoding(EvexEncoding, AVX10v2Encoding);
+ setDefaultEncodingAVX10(AVX10v2Encoding);
#include "tmp.cpp"
}
};
diff --git a/xbyak/xbyak.h b/xbyak/xbyak.h
index 5367d83..b56bfb4 100644
--- a/xbyak/xbyak.h
+++ b/xbyak/xbyak.h
@@ -232,6 +232,7 @@ enum {
ERR_CANT_USE_REX2,
ERR_INVALID_DFV,
ERR_INVALID_REG_IDX,
+ ERR_BAD_ENCODING_MODE,
ERR_INTERNAL // Put it at last.
};
@@ -290,6 +291,7 @@ inline const char *ConvertErrorToString(int err)
"can't use rex2",
"invalid dfv",
"invalid reg index",
+ "bad encoding mode",
"internal error"
};
assert(ERR_INTERNAL + 1 == sizeof(errTbl) / sizeof(*errTbl));
@@ -1674,7 +1676,7 @@ typedef enum {
DefaultEncoding,
VexEncoding,
EvexEncoding,
- PreAVX10v2Encoding = EvexEncoding,
+ PreAVX10v2Encoding,
AVX10v2Encoding
} PreferredEncoding;
@@ -2663,25 +2665,24 @@ private:
if (addr.getRegExp().getIndex().getKind() != kind) XBYAK_THROW(ERR_BAD_VSIB_ADDRESSING)
opVex(x, 0, addr, type, code);
}
- void opEncoding(const Xmm& x1, const Xmm& x2, const Operand& op, uint64_t type, int code, PreferredEncoding encoding, int imm = NONE, uint64_t typeVex = 0, uint64_t typeEvex = 0, int sel = 0)
+ void opEncoding(const Xmm& x1, const Xmm& x2, const Operand& op, uint64_t type, int code, PreferredEncoding enc, int imm = NONE, uint64_t typeVex = 0, uint64_t typeEvex = 0, int sel = 0)
{
- opAVX_X_X_XM(x1, x2, op, type | orEvexIf(encoding, typeVex, typeEvex, sel), code, imm);
+ opAVX_X_X_XM(x1, x2, op, type | orEvexIf(enc, typeVex, typeEvex, sel), code, imm);
}
- PreferredEncoding getEncoding(PreferredEncoding encoding, int sel) const
+ PreferredEncoding getEncoding(PreferredEncoding enc, int sel) const
{
- if (encoding == DefaultEncoding) {
- encoding = defaultEncoding_[sel];
+ if (enc == DefaultEncoding) {
+ enc = defaultEncoding_[sel];
}
- if (encoding == EvexEncoding) {
+ if ((sel == 0 && enc != VexEncoding && enc != EvexEncoding) || (sel == 1 && enc != PreAVX10v2Encoding && enc != AVX10v2Encoding)) XBYAK_THROW_RET(ERR_BAD_ENCODING_MODE, VexEncoding)
#ifdef XBYAK_DISABLE_AVX512
- XBYAK_THROW(ERR_EVEX_IS_INVALID)
+ if (enc == EvexEncoding || enc == AVX10v2Encoding) XBYAK_THROW(ERR_EVEX_IS_INVALID)
#endif
- }
- return encoding;
+ return enc;
}
- uint64_t orEvexIf(PreferredEncoding encoding, uint64_t typeVex, uint64_t typeEvex, int sel) {
- bool isVex = getEncoding(encoding, sel) == VexEncoding;
- return isVex ? typeVex : T_MUST_EVEX | typeEvex;
+ uint64_t orEvexIf(PreferredEncoding enc, uint64_t typeVex, uint64_t typeEvex, int sel) {
+ enc = getEncoding(enc, sel);
+ return ((sel == 0 && enc == VexEncoding) || (sel == 1 && enc != AVX10v2Encoding)) ? typeVex : (T_MUST_EVEX | typeEvex);
}
void opInOut(const Reg& a, const Reg& d, uint8_t code)
{
@@ -3138,8 +3139,8 @@ public:
#endif
, isDefaultJmpNEAR_(false)
{
- // select avx512-vnni, vmpsadbw(avx)
setDefaultEncoding();
+ setDefaultEncodingAVX10();
labelMgr_.set(this);
}
void reset()
@@ -3176,11 +3177,19 @@ public:
#undef jnl
#endif
- // set default encoding
- // vnniEnc : AVX512_VNNI (default:EvexEncoding) or AVX-VNNI (VexEncoding)
- // avx10Enc : mpsadbw etc., AVX-VNNI-INT8/AVX512-FP16 (default:PreAVX10v2Encoding) or AVX10.2 (AVX10v2Encoding)
- void setDefaultEncoding(PreferredEncoding vnniEnc = EvexEncoding, PreferredEncoding avx10Enc = PreAVX10v2Encoding)
- { defaultEncoding_[0] = vnniEnc; defaultEncoding_[1] = avx10Enc; }
+ // set default encoding of VNNI
+ // EvexEncoding : AVX512_VNNI, VexEncoding : AVX-VNNI
+ void setDefaultEncoding(PreferredEncoding enc = EvexEncoding)
+ {
+ if (enc != VexEncoding && enc != EvexEncoding) XBYAK_THROW(ERR_BAD_ENCODING_MODE)
+ defaultEncoding_[0] = enc;
+ }
+ // default : PreferredEncoding : AVX-VNNI-INT8/AVX512-FP16
+ void setDefaultEncodingAVX10(PreferredEncoding enc = PreAVX10v2Encoding)
+ {
+ if (enc != PreAVX10v2Encoding && enc != AVX10v2Encoding) XBYAK_THROW(ERR_BAD_ENCODING_MODE)
+ defaultEncoding_[1] = enc;
+ }
void bswap(const Reg32e& r)
{
@@ -3195,7 +3204,7 @@ public:
db(0xC8 + (idx & 7));
}
// AVX10 zero-extending for vmovd, vmovw
- void opAVX10ZeroExt(const Operand& op1, const Operand& op2, const uint64_t typeTbl[4], const int codeTbl[4], PreferredEncoding encoding, int bit)
+ void opAVX10ZeroExt(const Operand& op1, const Operand& op2, const uint64_t typeTbl[4], const int codeTbl[4], PreferredEncoding enc, int bit)
{
const Operand *p1 = &op1;
const Operand *p2 = &op2;
@@ -3210,7 +3219,7 @@ public:
rev = !rev;
}
int sel = -1;
- if (getEncoding(encoding, 1) == AVX10v2Encoding) {
+ if (getEncoding(enc, 1) == AVX10v2Encoding) {
if ((p1->isXMM() || p1->isMEM()) && p2->isXMM()) sel = 2 + int(rev);
} else {
if ((p1->isREG(bit) || p1->isMEM()) && p2->isXMM()) sel = int(rev);
@@ -3218,23 +3227,23 @@ public:
if (sel == -1) XBYAK_THROW(ERR_BAD_COMBINATION)
opAVX_X_X_XM(*static_cast<const Xmm*>(p2), xm0, *p1, typeTbl[sel], codeTbl[sel]);
}
- void vmovd(const Operand& op1, const Operand& op2, PreferredEncoding encoding = DefaultEncoding)
+ void vmovd(const Operand& op1, const Operand& op2, PreferredEncoding enc = DefaultEncoding)
{
const uint64_t typeTbl[] = {
T_EVEX|T_66|T_0F|T_W0|T_N4, T_EVEX|T_66|T_0F|T_W0|T_N4, // legacy, avx, avx512
T_MUST_EVEX|T_66|T_0F|T_EW0|T_N4, T_MUST_EVEX|T_F3|T_0F|T_EW0|T_N4, // avx10.2
};
const int codeTbl[] = { 0x7E, 0x6E, 0xD6, 0x7E };
- opAVX10ZeroExt(op1, op2, typeTbl, codeTbl, encoding, 32);
+ opAVX10ZeroExt(op1, op2, typeTbl, codeTbl, enc, 32);
}
- void vmovw(const Operand& op1, const Operand& op2, PreferredEncoding encoding = DefaultEncoding)
+ void vmovw(const Operand& op1, const Operand& op2, PreferredEncoding enc = DefaultEncoding)
{
const uint64_t typeTbl[] = {
T_MUST_EVEX|T_66|T_MAP5|T_N2, T_MUST_EVEX|T_66|T_MAP5|T_N2, // avx512-fp16
T_MUST_EVEX|T_F3|T_MAP5|T_EW0|T_N2, T_MUST_EVEX|T_F3|T_MAP5|T_EW0|T_N2, // avx10.2
};
const int codeTbl[] = { 0x7E, 0x6E, 0x7E, 0x6E };
- opAVX10ZeroExt(op1, op2, typeTbl, codeTbl, encoding, 16|32|64);
+ opAVX10ZeroExt(op1, op2, typeTbl, codeTbl, enc, 16|32|64);
}
/*
use single byte nop if useMultiByteNop = false