aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorMITSUNARI Shigeo <[email protected]>2022-05-12 15:22:33 +0900
committerMITSUNARI Shigeo <[email protected]>2022-05-12 15:22:33 +0900
commitec15751df2fdf9d436c8acb154a0cfbcf7f26c0f (patch)
tree558953ac14d989ee3c54bd0287c6b67e9f7e98a4
parent3ea8e45d33bbe22b4130d1bf01edc91b8aadf7fa (diff)
parent4831b3fb3d0e10c5f22f1d62a3ffc93635a85781 (diff)
downloadxbyak-ec15751df2fdf9d436c8acb154a0cfbcf7f26c0f.tar.gz
xbyak-ec15751df2fdf9d436c8acb154a0cfbcf7f26c0f.zip
Merge branch 'dev'v6.05
-rw-r--r--CMakeLists.txt2
-rw-r--r--doc/changelog.md1
-rw-r--r--gen/Makefile2
-rw-r--r--gen/gen_code.cpp4
-rw-r--r--meson.build2
-rw-r--r--readme.md2
-rw-r--r--readme.txt3
-rw-r--r--sample/test_util.cpp4
-rw-r--r--test/make_nm.cpp1
-rw-r--r--test/misc.cpp43
-rw-r--r--xbyak/xbyak.h2
-rw-r--r--xbyak/xbyak_mnemonic.h6
-rw-r--r--xbyak/xbyak_util.h278
13 files changed, 267 insertions, 83 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 8045d6e..d970fc9 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,6 +1,6 @@
cmake_minimum_required(VERSION 2.6...3.0.2)
-project(xbyak LANGUAGES CXX VERSION 6.041)
+project(xbyak LANGUAGES CXX VERSION 6.05)
file(GLOB headers xbyak/*.h)
diff --git a/doc/changelog.md b/doc/changelog.md
index c586e1d..c913d1d 100644
--- a/doc/changelog.md
+++ b/doc/changelog.md
@@ -1,5 +1,6 @@
# History
+* 2022/Mar/12 ver 6.05 add movdiri, movdir64b, clwb, cldemote
* 2022/Apr/22 ver 6.041 consider Android and mingw
* 2022/Apr/05 ver 6.04 add tpause, umonitor, umwait
* 2022/Mar/08 ver 6.03 MmapAllocator supports memfd with user-defined strings.
diff --git a/gen/Makefile b/gen/Makefile
index ae7605e..97a6846 100644
--- a/gen/Makefile
+++ b/gen/Makefile
@@ -30,7 +30,7 @@ VER=$(shell head -n 1 ../xbyak/xbyak_mnemonic.h|grep -o "[0-9.]*")
sed -i -e "s/version: '[0-9.]*',/version: '$(VER)',/" $@
../readme.md: $(TARGET)
- sed -l 2 -i -e "s/Xbyak [0-9.]*/Xbyak $(VER)/" $@
+ sed -l 2 -i -e "s/# Xbyak [0-9.]*/# Xbyak $(VER)/" $@
../readme.txt: $(TARGET)
sed -l 2 -i -e "s/Xbyak [0-9.]*/Xbyak $(VER)/" $@
diff --git a/gen/gen_code.cpp b/gen/gen_code.cpp
index e23b552..4cfcc1e 100644
--- a/gen/gen_code.cpp
+++ b/gen/gen_code.cpp
@@ -1051,6 +1051,8 @@ void put()
puts("void cmpxchg(const Operand& op, const Reg& reg) { opModRM(reg, op, (op.isREG() && reg.isREG() && op.getBit() == reg.getBit()), op.isMEM(), 0x0F, 0xB0 | (reg.isBit(8) ? 0 : 1)); }");
puts("void movbe(const Reg& reg, const Address& addr) { opModM(addr, reg, 0x0F, 0x38, 0xF0); }");
puts("void movbe(const Address& addr, const Reg& reg) { opModM(addr, reg, 0x0F, 0x38, 0xF1); }");
+ puts("void movdiri(const Address& addr, const Reg32e& reg) { opModM(addr, reg, 0x0F, 0x38, 0xF9); }");
+ puts("void movdir64b(const Reg& reg, const Address& addr) { db(0x66); opModM(addr, reg.cvt32(), 0x0F, 0x38, 0xF8); }");
puts("void adcx(const Reg32e& reg, const Operand& op) { opGen(reg, op, 0xF6, 0x66, isREG32_REG32orMEM, NONE, 0x38); }");
puts("void adox(const Reg32e& reg, const Operand& op) { opGen(reg, op, 0xF6, 0xF3, isREG32_REG32orMEM, NONE, 0x38); }");
puts("void cmpxchg8b(const Address& addr) { opModM(addr, Reg32(1), 0x0F, 0xC7); }");
@@ -1088,6 +1090,8 @@ void put()
puts("void tpause(const Reg32& r) { int idx = r.getIdx(); if (idx > 7) XBYAK_THROW(ERR_BAD_PARAMETER) db(0x66); db(0x0F); db(0xAE); setModRM(3, 6, idx); }");
puts("void umonitor(const Reg& r) { int idx = r.getIdx(); if (idx > 7) XBYAK_THROW(ERR_BAD_PARAMETER) int bit = r.getBit(); if (BIT != bit) { if ((BIT == 32 && bit == 16) || (BIT == 64 && bit == 32)) { db(0x67); } else { XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) } } db(0xF3); db(0x0F); db(0xAE); setModRM(3, 6, idx); }");
puts("void umwait(const Reg32& r) { int idx = r.getIdx(); if (idx > 7) XBYAK_THROW(ERR_BAD_PARAMETER) db(0xF2); db(0x0F); db(0xAE); setModRM(3, 6, idx); }");
+ puts("void clwb(const Address& addr) { db(0x66); opMIB(addr, esi, 0x0F, 0xAE); }");
+ puts("void cldemote(const Address& addr) { opMIB(addr, eax, 0x0F, 0x1C); }");
}
{
const struct Tbl {
diff --git a/meson.build b/meson.build
index 065f2fa..da7ce84 100644
--- a/meson.build
+++ b/meson.build
@@ -5,7 +5,7 @@
project(
'xbyak',
'cpp',
- version: '6.041',
+ version: '6.05',
license: 'BSD-3-Clause',
default_options: 'b_ndebug=if-release'
)
diff --git a/readme.md b/readme.md
index 1e64d05..ac39de3 100644
--- a/readme.md
+++ b/readme.md
@@ -1,5 +1,5 @@
-# Xbyak 6.041 [![Badge Build]][Build Status]
+# Xbyak 6.05 [![Badge Build]][Build Status]
*A C++ JIT assembler for x86 (IA32), x64 (AMD64, x86-64)*
diff --git a/readme.txt b/readme.txt
index 3c3c9b6..dd40097 100644
--- a/readme.txt
+++ b/readme.txt
@@ -1,5 +1,5 @@
- C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 6.041
+ C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 6.05
-----------------------------------------------------------------------------
◎概要
@@ -400,6 +400,7 @@ sample/{echo,hello}.bfは http://www.kmonos.net/alang/etc/brainfuck.php から
-----------------------------------------------------------------------------
◎履歴
+2022/05/12 ver 6.05 movdiri, movdir64b, clwb, cldemoteを追加
2022/04/05 ver 6.04 tpause, umonitor, umwaitを追加
2022/03/08 ver 6.03 MmapAllocatorがmemfd用のユーザ定義文字列をサポート
2022/01/28 ver 6.02 dispacementの32bit範囲チェックの厳密化
diff --git a/sample/test_util.cpp b/sample/test_util.cpp
index 60cf77a..7c930f0 100644
--- a/sample/test_util.cpp
+++ b/sample/test_util.cpp
@@ -84,6 +84,10 @@ void putCPUinfo()
{ Cpu::tAVX_VNNI, "avx_vnni" },
{ Cpu::tAVX512_FP16, "avx512_fp16" },
{ Cpu::tWAITPKG, "waitpkg" },
+ { Cpu::tCLFLUSHOPT, "clflushopt" },
+ { Cpu::tCLDEMOTE, "cldemote" },
+ { Cpu::tMOVDIRI, "movdiri" },
+ { Cpu::tMOVDIR64B, "movdir64b" },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
if (cpu.has(tbl[i].type)) printf(" %s", tbl[i].str);
diff --git a/test/make_nm.cpp b/test/make_nm.cpp
index e1cf112..5cfd441 100644
--- a/test/make_nm.cpp
+++ b/test/make_nm.cpp
@@ -637,6 +637,7 @@ class Test {
"fstsw",
"fnstsw",
"fxrstor",
+ "clwb",
};
for (size_t i = 0; i < NUM_OF_ARRAY(memTbl); i++) {
put(memTbl[i], MEM);
diff --git a/test/misc.cpp b/test/misc.cpp
index f46239b..ec08e49 100644
--- a/test/misc.cpp
+++ b/test/misc.cpp
@@ -1924,3 +1924,46 @@ CYBOZU_TEST_AUTO(waitpkg)
CYBOZU_TEST_EQUAL(c.getSize(), n);
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
}
+
+CYBOZU_TEST_AUTO(misc)
+{
+ struct Code : Xbyak::CodeGenerator {
+ Code()
+ {
+ cldemote(ptr[eax+esi*4+0x12]);
+ movdiri(ptr[edx+esi*2+4], eax);
+ movdir64b(eax, ptr[edx]);
+#ifdef XBYAK64
+ cldemote(ptr[rax+rdi*8+0x123]);
+ movdiri(ptr[rax+r12], r9);
+ movdiri(ptr[rax+r12*2+4], r9d);
+ movdir64b(r10, ptr[r8]);
+#endif
+ }
+ } c;
+ const uint8_t tbl[] = {
+#ifdef XBYAK64
+ 0x67,
+#endif
+ 0x0f, 0x1c, 0x44, 0xb0, 0x12, // cldemote
+#ifdef XBYAK64
+ 0x67,
+#endif
+ 0x0f, 0x38, 0xf9, 0x44, 0x72, 0x04, // movdiri
+
+ 0x66,
+#ifdef XBYAK64
+ 0x67,
+#endif
+ 0x0f, 0x38, 0xf8, 0x02, // movdir64b
+#ifdef XBYAK64
+ 0x0f, 0x1c, 0x84, 0xf8, 0x23, 0x01, 0x00, 0x00, // cldemote
+ 0x4e, 0x0f, 0x38, 0xf9, 0x0c, 0x20, // movdiri
+ 0x46, 0x0f, 0x38, 0xf9, 0x4c, 0x60, 0x04, // movdiri
+ 0x66, 0x45, 0x0f, 0x38, 0xf8, 0x10, // movdir64b
+#endif
+ };
+ const size_t n = sizeof(tbl) / sizeof(tbl[0]);
+ CYBOZU_TEST_EQUAL(c.getSize(), n);
+ CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
+}
diff --git a/xbyak/xbyak.h b/xbyak/xbyak.h
index 071d730..4f87826 100644
--- a/xbyak/xbyak.h
+++ b/xbyak/xbyak.h
@@ -144,7 +144,7 @@ namespace Xbyak {
enum {
DEFAULT_MAX_CODE_SIZE = 4096,
- VERSION = 0x6041 /* 0xABCD = A.BC(D) */
+ VERSION = 0x6050 /* 0xABCD = A.BC(D) */
};
#ifndef MIE_INTEGER_TYPE_DEFINED
diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h
index 24b8c3f..18bac94 100644
--- a/xbyak/xbyak_mnemonic.h
+++ b/xbyak/xbyak_mnemonic.h
@@ -1,4 +1,4 @@
-const char *getVersionString() const { return "6.041"; }
+const char *getVersionString() const { return "6.05"; }
void adc(const Operand& op, uint32_t imm) { opRM_I(op, imm, 0x10, 2); }
void adc(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x10); }
void adcx(const Reg32e& reg, const Operand& op) { opGen(reg, op, 0xF6, 0x66, isREG32_REG32orMEM, NONE, 0x38); }
@@ -57,9 +57,11 @@ void cbw() { db(0x66); db(0x98); }
void cdq() { db(0x99); }
void clc() { db(0xF8); }
void cld() { db(0xFC); }
+void cldemote(const Address& addr) { opMIB(addr, eax, 0x0F, 0x1C); }
void clflush(const Address& addr) { opModM(addr, Reg32(7), 0x0F, 0xAE); }
void clflushopt(const Address& addr) { db(0x66); opModM(addr, Reg32(7), 0x0F, 0xAE); }
void cli() { db(0xFA); }
+void clwb(const Address& addr) { db(0x66); opMIB(addr, esi, 0x0F, 0xAE); }
void clzero() { db(0x0F); db(0x01); db(0xFC); }
void cmc() { db(0xF5); }
void cmova(const Reg& reg, const Operand& op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0x40 | 7); }//-V524
@@ -501,6 +503,8 @@ void movd(const Mmx& mmx, const Address& addr) { if (mmx.isXMM()) db(0x66); opMo
void movd(const Mmx& mmx, const Reg32& reg) { if (mmx.isXMM()) db(0x66); opModR(mmx, reg, 0x0F, 0x6E); }
void movd(const Reg32& reg, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opModR(mmx, reg, 0x0F, 0x7E); }
void movddup(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x12, 0xF2, isXMM_XMMorMEM, NONE, NONE); }
+void movdir64b(const Reg& reg, const Address& addr) { db(0x66); opModM(addr, reg.cvt32(), 0x0F, 0x38, 0xF8); }
+void movdiri(const Address& addr, const Reg32e& reg) { opModM(addr, reg, 0x0F, 0x38, 0xF9); }
void movdq2q(const Mmx& mmx, const Xmm& xmm) { db(0xF2); opModR(mmx, xmm, 0x0F, 0xD6); }
void movdqa(const Address& addr, const Xmm& xmm) { db(0x66); opModM(addr, xmm, 0x0F, 0x7F); }
void movdqa(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x6F, 0x66); }
diff --git a/xbyak/xbyak_util.h b/xbyak/xbyak_util.h
index 0f4548b..ab0bd57 100644
--- a/xbyak/xbyak_util.h
+++ b/xbyak/xbyak_util.h
@@ -87,11 +87,53 @@ typedef enum {
CoreLevel = 2
} IntelCpuTopologyLevel;
+namespace local {
+
+class Type {
+ uint64_t L;
+ uint64_t H;
+public:
+ Type(uint64_t L = 0, uint64_t H = 0) : L(L), H(H) { }
+ Type& operator&=(const Type& rhs)
+ {
+ L &= rhs.L;
+ H &= rhs.H;
+ return *this;
+ }
+ Type& operator|=(const Type& rhs)
+ {
+ L |= rhs.L;
+ H |= rhs.H;
+ return *this;
+ }
+ Type operator&(const Type& rhs) const
+ {
+ Type t = *this;
+ t &= rhs;
+ return t;
+ }
+ Type operator|(const Type& rhs) const
+ {
+ Type t = *this;
+ t |= rhs;
+ return t;
+ }
+ // without explicit because backward compatilibity
+ operator bool() const { return (H | L) != 0; }
+ uint64_t getL() const { return L; }
+ uint64_t getH() const { return H; }
+};
+
/**
CPU detection class
+ @note static inline const member is supported by c++17 or later, so use template hack
*/
-class Cpu {
- uint64_t type_;
+template<int dummy=0>
+class CpuT {
+public:
+ typedef local::Type Type;
+private:
+ Type type_;
//system topology
bool x2APIC_supported_;
static const size_t maxTopologyLevels = 2;
@@ -297,80 +339,82 @@ public:
return 0;
#endif
}
- typedef uint64_t Type;
-
- static const Type NONE = 0;
- static const Type tMMX = 1 << 0;
- static const Type tMMX2 = 1 << 1;
- static const Type tCMOV = 1 << 2;
- static const Type tSSE = 1 << 3;
- static const Type tSSE2 = 1 << 4;
- static const Type tSSE3 = 1 << 5;
- static const Type tSSSE3 = 1 << 6;
- static const Type tSSE41 = 1 << 7;
- static const Type tSSE42 = 1 << 8;
- static const Type tPOPCNT = 1 << 9;
- static const Type tAESNI = 1 << 10;
- static const Type tAVX512_FP16 = 1 << 11;
- static const Type tOSXSAVE = 1 << 12;
- static const Type tPCLMULQDQ = 1 << 13;
- static const Type tAVX = 1 << 14;
- static const Type tFMA = 1 << 15;
-
- static const Type t3DN = 1 << 16;
- static const Type tE3DN = 1 << 17;
- static const Type tWAITPKG = 1 << 18;
- static const Type tRDTSCP = 1 << 19;
- static const Type tAVX2 = 1 << 20;
- static const Type tBMI1 = 1 << 21; // andn, bextr, blsi, blsmsk, blsr, tzcnt
- static const Type tBMI2 = 1 << 22; // bzhi, mulx, pdep, pext, rorx, sarx, shlx, shrx
- static const Type tLZCNT = 1 << 23;
-
- static const Type tINTEL = 1 << 24;
- static const Type tAMD = 1 << 25;
-
- static const Type tENHANCED_REP = 1 << 26; // enhanced rep movsb/stosb
- static const Type tRDRAND = 1 << 27;
- static const Type tADX = 1 << 28; // adcx, adox
- static const Type tRDSEED = 1 << 29; // rdseed
- static const Type tSMAP = 1 << 30; // stac
- static const Type tHLE = uint64_t(1) << 31; // xacquire, xrelease, xtest
- static const Type tRTM = uint64_t(1) << 32; // xbegin, xend, xabort
- static const Type tF16C = uint64_t(1) << 33; // vcvtph2ps, vcvtps2ph
- static const Type tMOVBE = uint64_t(1) << 34; // mobve
- static const Type tAVX512F = uint64_t(1) << 35;
- static const Type tAVX512DQ = uint64_t(1) << 36;
- static const Type tAVX512_IFMA = uint64_t(1) << 37;
- static const Type tAVX512IFMA = tAVX512_IFMA;
- static const Type tAVX512PF = uint64_t(1) << 38;
- static const Type tAVX512ER = uint64_t(1) << 39;
- static const Type tAVX512CD = uint64_t(1) << 40;
- static const Type tAVX512BW = uint64_t(1) << 41;
- static const Type tAVX512VL = uint64_t(1) << 42;
- static const Type tAVX512_VBMI = uint64_t(1) << 43;
- static const Type tAVX512VBMI = tAVX512_VBMI; // changed by Intel's manual
- static const Type tAVX512_4VNNIW = uint64_t(1) << 44;
- static const Type tAVX512_4FMAPS = uint64_t(1) << 45;
- static const Type tPREFETCHWT1 = uint64_t(1) << 46;
- static const Type tPREFETCHW = uint64_t(1) << 47;
- static const Type tSHA = uint64_t(1) << 48;
- static const Type tMPX = uint64_t(1) << 49;
- static const Type tAVX512_VBMI2 = uint64_t(1) << 50;
- static const Type tGFNI = uint64_t(1) << 51;
- static const Type tVAES = uint64_t(1) << 52;
- static const Type tVPCLMULQDQ = uint64_t(1) << 53;
- static const Type tAVX512_VNNI = uint64_t(1) << 54;
- static const Type tAVX512_BITALG = uint64_t(1) << 55;
- static const Type tAVX512_VPOPCNTDQ = uint64_t(1) << 56;
- static const Type tAVX512_BF16 = uint64_t(1) << 57;
- static const Type tAVX512_VP2INTERSECT = uint64_t(1) << 58;
- static const Type tAMX_TILE = uint64_t(1) << 59;
- static const Type tAMX_INT8 = uint64_t(1) << 60;
- static const Type tAMX_BF16 = uint64_t(1) << 61;
- static const Type tAVX_VNNI = uint64_t(1) << 62;
- // 18, 63
-
- Cpu()
+
+ static const Type NONE;
+ static const Type tMMX;
+ static const Type tMMX2;
+ static const Type tCMOV;
+ static const Type tSSE;
+ static const Type tSSE2;
+ static const Type tSSE3;
+ static const Type tSSSE3;
+ static const Type tSSE41;
+ static const Type tSSE42;
+ static const Type tPOPCNT;
+ static const Type tAESNI;
+ static const Type tAVX512_FP16;
+ static const Type tOSXSAVE;
+ static const Type tPCLMULQDQ;
+ static const Type tAVX;
+ static const Type tFMA;
+
+ static const Type t3DN;
+ static const Type tE3DN;
+ static const Type tWAITPKG;
+ static const Type tRDTSCP;
+ static const Type tAVX2;
+ static const Type tBMI1; // andn, bextr, blsi, blsmsk, blsr, tzcnt
+ static const Type tBMI2; // bzhi, mulx, pdep, pext, rorx, sarx, shlx, shrx
+ static const Type tLZCNT;
+
+ static const Type tINTEL;
+ static const Type tAMD;
+
+ static const Type tENHANCED_REP; // enhanced rep movsb/stosb
+ static const Type tRDRAND;
+ static const Type tADX; // adcx, adox
+ static const Type tRDSEED; // rdseed
+ static const Type tSMAP; // stac
+ static const Type tHLE; // xacquire, xrelease, xtest
+ static const Type tRTM; // xbegin, xend, xabort
+ static const Type tF16C; // vcvtph2ps, vcvtps2ph
+ static const Type tMOVBE; // mobve
+ static const Type tAVX512F;
+ static const Type tAVX512DQ;
+ static const Type tAVX512_IFMA;
+ static const Type tAVX512IFMA;
+ static const Type tAVX512PF;
+ static const Type tAVX512ER;
+ static const Type tAVX512CD;
+ static const Type tAVX512BW;
+ static const Type tAVX512VL;
+ static const Type tAVX512_VBMI;
+ static const Type tAVX512VBMI; // changed by Intel's manual
+ static const Type tAVX512_4VNNIW;
+ static const Type tAVX512_4FMAPS;
+ static const Type tPREFETCHWT1;
+ static const Type tPREFETCHW;
+ static const Type tSHA;
+ static const Type tMPX;
+ static const Type tAVX512_VBMI2;
+ static const Type tGFNI;
+ static const Type tVAES;
+ static const Type tVPCLMULQDQ;
+ static const Type tAVX512_VNNI;
+ static const Type tAVX512_BITALG;
+ static const Type tAVX512_VPOPCNTDQ;
+ static const Type tAVX512_BF16;
+ static const Type tAVX512_VP2INTERSECT;
+ static const Type tAMX_TILE;
+ static const Type tAMX_INT8;
+ static const Type tAMX_BF16;
+ static const Type tAVX_VNNI;
+ static const Type tCLFLUSHOPT;
+ static const Type tCLDEMOTE;
+ static const Type tMOVDIRI;
+ static const Type tMOVDIR64B;
+
+ CpuT()
: type_(NONE)
, x2APIC_supported_(false)
, numCores_()
@@ -484,12 +528,16 @@ public:
if (EBX & (1U << 18)) type_ |= tRDSEED;
if (EBX & (1U << 19)) type_ |= tADX;
if (EBX & (1U << 20)) type_ |= tSMAP;
+ if (EBX & (1U << 23)) type_ |= tCLFLUSHOPT;
if (EBX & (1U << 4)) type_ |= tHLE;
if (EBX & (1U << 11)) type_ |= tRTM;
if (EBX & (1U << 14)) type_ |= tMPX;
if (EBX & (1U << 29)) type_ |= tSHA;
if (ECX & (1U << 0)) type_ |= tPREFETCHWT1;
if (ECX & (1U << 5)) type_ |= tWAITPKG;
+ if (ECX & (1U << 25)) type_ |= tCLDEMOTE;
+ if (ECX & (1U << 27)) type_ |= tMOVDIRI;
+ if (ECX & (1U << 28)) type_ |= tMOVDIR64B;
if (EDX & (1U << 24)) type_ |= tAMX_TILE;
if (EDX & (1U << 25)) type_ |= tAMX_INT8;
if (EDX & (1U << 22)) type_ |= tAMX_BF16;
@@ -519,6 +567,84 @@ public:
}
};
+template<int dummy> const Type CpuT<dummy>::NONE = 0;
+template<int dummy> const Type CpuT<dummy>::tMMX = 1 << 0;
+template<int dummy> const Type CpuT<dummy>::tMMX2 = 1 << 1;
+template<int dummy> const Type CpuT<dummy>::tCMOV = 1 << 2;
+template<int dummy> const Type CpuT<dummy>::tSSE = 1 << 3;
+template<int dummy> const Type CpuT<dummy>::tSSE2 = 1 << 4;
+template<int dummy> const Type CpuT<dummy>::tSSE3 = 1 << 5;
+template<int dummy> const Type CpuT<dummy>::tSSSE3 = 1 << 6;
+template<int dummy> const Type CpuT<dummy>::tSSE41 = 1 << 7;
+template<int dummy> const Type CpuT<dummy>::tSSE42 = 1 << 8;
+template<int dummy> const Type CpuT<dummy>::tPOPCNT = 1 << 9;
+template<int dummy> const Type CpuT<dummy>::tAESNI = 1 << 10;
+template<int dummy> const Type CpuT<dummy>::tAVX512_FP16 = 1 << 11;
+template<int dummy> const Type CpuT<dummy>::tOSXSAVE = 1 << 12;
+template<int dummy> const Type CpuT<dummy>::tPCLMULQDQ = 1 << 13;
+template<int dummy> const Type CpuT<dummy>::tAVX = 1 << 14;
+template<int dummy> const Type CpuT<dummy>::tFMA = 1 << 15;
+
+template<int dummy> const Type CpuT<dummy>::t3DN = 1 << 16;
+template<int dummy> const Type CpuT<dummy>::tE3DN = 1 << 17;
+template<int dummy> const Type CpuT<dummy>::tWAITPKG = 1 << 18;
+template<int dummy> const Type CpuT<dummy>::tRDTSCP = 1 << 19;
+template<int dummy> const Type CpuT<dummy>::tAVX2 = 1 << 20;
+template<int dummy> const Type CpuT<dummy>::tBMI1 = 1 << 21; // andn, bextr, blsi, blsmsk, blsr, tzcnt
+template<int dummy> const Type CpuT<dummy>::tBMI2 = 1 << 22; // bzhi, mulx, pdep, pext, rorx, sarx, shlx, shrx
+template<int dummy> const Type CpuT<dummy>::tLZCNT = 1 << 23;
+
+template<int dummy> const Type CpuT<dummy>::tINTEL = 1 << 24;
+template<int dummy> const Type CpuT<dummy>::tAMD = 1 << 25;
+
+template<int dummy> const Type CpuT<dummy>::tENHANCED_REP = 1 << 26; // enhanced rep movsb/stosb
+template<int dummy> const Type CpuT<dummy>::tRDRAND = 1 << 27;
+template<int dummy> const Type CpuT<dummy>::tADX = 1 << 28; // adcx, adox
+template<int dummy> const Type CpuT<dummy>::tRDSEED = 1 << 29; // rdseed
+template<int dummy> const Type CpuT<dummy>::tSMAP = 1 << 30; // stac
+template<int dummy> const Type CpuT<dummy>::tHLE = uint64_t(1) << 31; // xacquire, xrelease, xtest
+template<int dummy> const Type CpuT<dummy>::tRTM = uint64_t(1) << 32; // xbegin, xend, xabort
+template<int dummy> const Type CpuT<dummy>::tF16C = uint64_t(1) << 33; // vcvtph2ps, vcvtps2ph
+template<int dummy> const Type CpuT<dummy>::tMOVBE = uint64_t(1) << 34; // mobve
+template<int dummy> const Type CpuT<dummy>::tAVX512F = uint64_t(1) << 35;
+template<int dummy> const Type CpuT<dummy>::tAVX512DQ = uint64_t(1) << 36;
+template<int dummy> const Type CpuT<dummy>::tAVX512_IFMA = uint64_t(1) << 37;
+template<int dummy> const Type CpuT<dummy>::tAVX512IFMA = tAVX512_IFMA;
+template<int dummy> const Type CpuT<dummy>::tAVX512PF = uint64_t(1) << 38;
+template<int dummy> const Type CpuT<dummy>::tAVX512ER = uint64_t(1) << 39;
+template<int dummy> const Type CpuT<dummy>::tAVX512CD = uint64_t(1) << 40;
+template<int dummy> const Type CpuT<dummy>::tAVX512BW = uint64_t(1) << 41;
+template<int dummy> const Type CpuT<dummy>::tAVX512VL = uint64_t(1) << 42;
+template<int dummy> const Type CpuT<dummy>::tAVX512_VBMI = uint64_t(1) << 43;
+template<int dummy> const Type CpuT<dummy>::tAVX512VBMI = tAVX512_VBMI; // changed by Intel's manual
+template<int dummy> const Type CpuT<dummy>::tAVX512_4VNNIW = uint64_t(1) << 44;
+template<int dummy> const Type CpuT<dummy>::tAVX512_4FMAPS = uint64_t(1) << 45;
+template<int dummy> const Type CpuT<dummy>::tPREFETCHWT1 = uint64_t(1) << 46;
+template<int dummy> const Type CpuT<dummy>::tPREFETCHW = uint64_t(1) << 47;
+template<int dummy> const Type CpuT<dummy>::tSHA = uint64_t(1) << 48;
+template<int dummy> const Type CpuT<dummy>::tMPX = uint64_t(1) << 49;
+template<int dummy> const Type CpuT<dummy>::tAVX512_VBMI2 = uint64_t(1) << 50;
+template<int dummy> const Type CpuT<dummy>::tGFNI = uint64_t(1) << 51;
+template<int dummy> const Type CpuT<dummy>::tVAES = uint64_t(1) << 52;
+template<int dummy> const Type CpuT<dummy>::tVPCLMULQDQ = uint64_t(1) << 53;
+template<int dummy> const Type CpuT<dummy>::tAVX512_VNNI = uint64_t(1) << 54;
+template<int dummy> const Type CpuT<dummy>::tAVX512_BITALG = uint64_t(1) << 55;
+template<int dummy> const Type CpuT<dummy>::tAVX512_VPOPCNTDQ = uint64_t(1) << 56;
+template<int dummy> const Type CpuT<dummy>::tAVX512_BF16 = uint64_t(1) << 57;
+template<int dummy> const Type CpuT<dummy>::tAVX512_VP2INTERSECT = uint64_t(1) << 58;
+template<int dummy> const Type CpuT<dummy>::tAMX_TILE = uint64_t(1) << 59;
+template<int dummy> const Type CpuT<dummy>::tAMX_INT8 = uint64_t(1) << 60;
+template<int dummy> const Type CpuT<dummy>::tAMX_BF16 = uint64_t(1) << 61;
+template<int dummy> const Type CpuT<dummy>::tAVX_VNNI = uint64_t(1) << 62;
+template<int dummy> const Type CpuT<dummy>::tCLFLUSHOPT = uint64_t(1) << 63;
+template<int dummy> const Type CpuT<dummy>::tCLDEMOTE = Type(0, 1 << 0);
+template<int dummy> const Type CpuT<dummy>::tMOVDIRI = Type(0, 1 << 1);
+template<int dummy> const Type CpuT<dummy>::tMOVDIR64B = Type(0, 1 << 2);
+
+} // local
+
+typedef local::CpuT<> Cpu;
+
#ifndef XBYAK_ONLY_CLASS_CPU
class Clock {
public: