diff options
author | MITSUNARI Shigeo <[email protected]> | 2022-05-12 15:22:33 +0900 |
---|---|---|
committer | MITSUNARI Shigeo <[email protected]> | 2022-05-12 15:22:33 +0900 |
commit | ec15751df2fdf9d436c8acb154a0cfbcf7f26c0f (patch) | |
tree | 558953ac14d989ee3c54bd0287c6b67e9f7e98a4 | |
parent | 3ea8e45d33bbe22b4130d1bf01edc91b8aadf7fa (diff) | |
parent | 4831b3fb3d0e10c5f22f1d62a3ffc93635a85781 (diff) | |
download | xbyak-ec15751df2fdf9d436c8acb154a0cfbcf7f26c0f.tar.gz xbyak-ec15751df2fdf9d436c8acb154a0cfbcf7f26c0f.zip |
Merge branch 'dev'v6.05
-rw-r--r-- | CMakeLists.txt | 2 | ||||
-rw-r--r-- | doc/changelog.md | 1 | ||||
-rw-r--r-- | gen/Makefile | 2 | ||||
-rw-r--r-- | gen/gen_code.cpp | 4 | ||||
-rw-r--r-- | meson.build | 2 | ||||
-rw-r--r-- | readme.md | 2 | ||||
-rw-r--r-- | readme.txt | 3 | ||||
-rw-r--r-- | sample/test_util.cpp | 4 | ||||
-rw-r--r-- | test/make_nm.cpp | 1 | ||||
-rw-r--r-- | test/misc.cpp | 43 | ||||
-rw-r--r-- | xbyak/xbyak.h | 2 | ||||
-rw-r--r-- | xbyak/xbyak_mnemonic.h | 6 | ||||
-rw-r--r-- | xbyak/xbyak_util.h | 278 |
13 files changed, 267 insertions, 83 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 8045d6e..d970fc9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,6 +1,6 @@ cmake_minimum_required(VERSION 2.6...3.0.2) -project(xbyak LANGUAGES CXX VERSION 6.041) +project(xbyak LANGUAGES CXX VERSION 6.05) file(GLOB headers xbyak/*.h) diff --git a/doc/changelog.md b/doc/changelog.md index c586e1d..c913d1d 100644 --- a/doc/changelog.md +++ b/doc/changelog.md @@ -1,5 +1,6 @@ # History +* 2022/Mar/12 ver 6.05 add movdiri, movdir64b, clwb, cldemote * 2022/Apr/22 ver 6.041 consider Android and mingw * 2022/Apr/05 ver 6.04 add tpause, umonitor, umwait * 2022/Mar/08 ver 6.03 MmapAllocator supports memfd with user-defined strings. diff --git a/gen/Makefile b/gen/Makefile index ae7605e..97a6846 100644 --- a/gen/Makefile +++ b/gen/Makefile @@ -30,7 +30,7 @@ VER=$(shell head -n 1 ../xbyak/xbyak_mnemonic.h|grep -o "[0-9.]*") sed -i -e "s/version: '[0-9.]*',/version: '$(VER)',/" $@ ../readme.md: $(TARGET) - sed -l 2 -i -e "s/Xbyak [0-9.]*/Xbyak $(VER)/" $@ + sed -l 2 -i -e "s/# Xbyak [0-9.]*/# Xbyak $(VER)/" $@ ../readme.txt: $(TARGET) sed -l 2 -i -e "s/Xbyak [0-9.]*/Xbyak $(VER)/" $@ diff --git a/gen/gen_code.cpp b/gen/gen_code.cpp index e23b552..4cfcc1e 100644 --- a/gen/gen_code.cpp +++ b/gen/gen_code.cpp @@ -1051,6 +1051,8 @@ void put() puts("void cmpxchg(const Operand& op, const Reg& reg) { opModRM(reg, op, (op.isREG() && reg.isREG() && op.getBit() == reg.getBit()), op.isMEM(), 0x0F, 0xB0 | (reg.isBit(8) ? 0 : 1)); }"); puts("void movbe(const Reg& reg, const Address& addr) { opModM(addr, reg, 0x0F, 0x38, 0xF0); }"); puts("void movbe(const Address& addr, const Reg& reg) { opModM(addr, reg, 0x0F, 0x38, 0xF1); }"); + puts("void movdiri(const Address& addr, const Reg32e& reg) { opModM(addr, reg, 0x0F, 0x38, 0xF9); }"); + puts("void movdir64b(const Reg& reg, const Address& addr) { db(0x66); opModM(addr, reg.cvt32(), 0x0F, 0x38, 0xF8); }"); puts("void adcx(const Reg32e& reg, const Operand& op) { opGen(reg, op, 0xF6, 0x66, isREG32_REG32orMEM, NONE, 0x38); }"); puts("void adox(const Reg32e& reg, const Operand& op) { opGen(reg, op, 0xF6, 0xF3, isREG32_REG32orMEM, NONE, 0x38); }"); puts("void cmpxchg8b(const Address& addr) { opModM(addr, Reg32(1), 0x0F, 0xC7); }"); @@ -1088,6 +1090,8 @@ void put() puts("void tpause(const Reg32& r) { int idx = r.getIdx(); if (idx > 7) XBYAK_THROW(ERR_BAD_PARAMETER) db(0x66); db(0x0F); db(0xAE); setModRM(3, 6, idx); }"); puts("void umonitor(const Reg& r) { int idx = r.getIdx(); if (idx > 7) XBYAK_THROW(ERR_BAD_PARAMETER) int bit = r.getBit(); if (BIT != bit) { if ((BIT == 32 && bit == 16) || (BIT == 64 && bit == 32)) { db(0x67); } else { XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) } } db(0xF3); db(0x0F); db(0xAE); setModRM(3, 6, idx); }"); puts("void umwait(const Reg32& r) { int idx = r.getIdx(); if (idx > 7) XBYAK_THROW(ERR_BAD_PARAMETER) db(0xF2); db(0x0F); db(0xAE); setModRM(3, 6, idx); }"); + puts("void clwb(const Address& addr) { db(0x66); opMIB(addr, esi, 0x0F, 0xAE); }"); + puts("void cldemote(const Address& addr) { opMIB(addr, eax, 0x0F, 0x1C); }"); } { const struct Tbl { diff --git a/meson.build b/meson.build index 065f2fa..da7ce84 100644 --- a/meson.build +++ b/meson.build @@ -5,7 +5,7 @@ project( 'xbyak', 'cpp', - version: '6.041', + version: '6.05', license: 'BSD-3-Clause', default_options: 'b_ndebug=if-release' ) @@ -1,5 +1,5 @@ -# Xbyak 6.041 [![Badge Build]][Build Status] +# Xbyak 6.05 [![Badge Build]][Build Status] *A C++ JIT assembler for x86 (IA32), x64 (AMD64, x86-64)* @@ -1,5 +1,5 @@ - C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 6.041
+ C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 6.05
-----------------------------------------------------------------------------
◎概要
@@ -400,6 +400,7 @@ sample/{echo,hello}.bfは http://www.kmonos.net/alang/etc/brainfuck.php から -----------------------------------------------------------------------------
◎履歴
+2022/05/12 ver 6.05 movdiri, movdir64b, clwb, cldemoteを追加
2022/04/05 ver 6.04 tpause, umonitor, umwaitを追加
2022/03/08 ver 6.03 MmapAllocatorがmemfd用のユーザ定義文字列をサポート
2022/01/28 ver 6.02 dispacementの32bit範囲チェックの厳密化
diff --git a/sample/test_util.cpp b/sample/test_util.cpp index 60cf77a..7c930f0 100644 --- a/sample/test_util.cpp +++ b/sample/test_util.cpp @@ -84,6 +84,10 @@ void putCPUinfo() { Cpu::tAVX_VNNI, "avx_vnni" }, { Cpu::tAVX512_FP16, "avx512_fp16" }, { Cpu::tWAITPKG, "waitpkg" }, + { Cpu::tCLFLUSHOPT, "clflushopt" }, + { Cpu::tCLDEMOTE, "cldemote" }, + { Cpu::tMOVDIRI, "movdiri" }, + { Cpu::tMOVDIR64B, "movdir64b" }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { if (cpu.has(tbl[i].type)) printf(" %s", tbl[i].str); diff --git a/test/make_nm.cpp b/test/make_nm.cpp index e1cf112..5cfd441 100644 --- a/test/make_nm.cpp +++ b/test/make_nm.cpp @@ -637,6 +637,7 @@ class Test { "fstsw", "fnstsw", "fxrstor", + "clwb", }; for (size_t i = 0; i < NUM_OF_ARRAY(memTbl); i++) { put(memTbl[i], MEM); diff --git a/test/misc.cpp b/test/misc.cpp index f46239b..ec08e49 100644 --- a/test/misc.cpp +++ b/test/misc.cpp @@ -1924,3 +1924,46 @@ CYBOZU_TEST_AUTO(waitpkg) CYBOZU_TEST_EQUAL(c.getSize(), n); CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n); } + +CYBOZU_TEST_AUTO(misc) +{ + struct Code : Xbyak::CodeGenerator { + Code() + { + cldemote(ptr[eax+esi*4+0x12]); + movdiri(ptr[edx+esi*2+4], eax); + movdir64b(eax, ptr[edx]); +#ifdef XBYAK64 + cldemote(ptr[rax+rdi*8+0x123]); + movdiri(ptr[rax+r12], r9); + movdiri(ptr[rax+r12*2+4], r9d); + movdir64b(r10, ptr[r8]); +#endif + } + } c; + const uint8_t tbl[] = { +#ifdef XBYAK64 + 0x67, +#endif + 0x0f, 0x1c, 0x44, 0xb0, 0x12, // cldemote +#ifdef XBYAK64 + 0x67, +#endif + 0x0f, 0x38, 0xf9, 0x44, 0x72, 0x04, // movdiri + + 0x66, +#ifdef XBYAK64 + 0x67, +#endif + 0x0f, 0x38, 0xf8, 0x02, // movdir64b +#ifdef XBYAK64 + 0x0f, 0x1c, 0x84, 0xf8, 0x23, 0x01, 0x00, 0x00, // cldemote + 0x4e, 0x0f, 0x38, 0xf9, 0x0c, 0x20, // movdiri + 0x46, 0x0f, 0x38, 0xf9, 0x4c, 0x60, 0x04, // movdiri + 0x66, 0x45, 0x0f, 0x38, 0xf8, 0x10, // movdir64b +#endif + }; + const size_t n = sizeof(tbl) / sizeof(tbl[0]); + CYBOZU_TEST_EQUAL(c.getSize(), n); + CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n); +} diff --git a/xbyak/xbyak.h b/xbyak/xbyak.h index 071d730..4f87826 100644 --- a/xbyak/xbyak.h +++ b/xbyak/xbyak.h @@ -144,7 +144,7 @@ namespace Xbyak { enum { DEFAULT_MAX_CODE_SIZE = 4096, - VERSION = 0x6041 /* 0xABCD = A.BC(D) */ + VERSION = 0x6050 /* 0xABCD = A.BC(D) */ }; #ifndef MIE_INTEGER_TYPE_DEFINED diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h index 24b8c3f..18bac94 100644 --- a/xbyak/xbyak_mnemonic.h +++ b/xbyak/xbyak_mnemonic.h @@ -1,4 +1,4 @@ -const char *getVersionString() const { return "6.041"; } +const char *getVersionString() const { return "6.05"; } void adc(const Operand& op, uint32_t imm) { opRM_I(op, imm, 0x10, 2); } void adc(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x10); } void adcx(const Reg32e& reg, const Operand& op) { opGen(reg, op, 0xF6, 0x66, isREG32_REG32orMEM, NONE, 0x38); } @@ -57,9 +57,11 @@ void cbw() { db(0x66); db(0x98); } void cdq() { db(0x99); } void clc() { db(0xF8); } void cld() { db(0xFC); } +void cldemote(const Address& addr) { opMIB(addr, eax, 0x0F, 0x1C); } void clflush(const Address& addr) { opModM(addr, Reg32(7), 0x0F, 0xAE); } void clflushopt(const Address& addr) { db(0x66); opModM(addr, Reg32(7), 0x0F, 0xAE); } void cli() { db(0xFA); } +void clwb(const Address& addr) { db(0x66); opMIB(addr, esi, 0x0F, 0xAE); } void clzero() { db(0x0F); db(0x01); db(0xFC); } void cmc() { db(0xF5); } void cmova(const Reg& reg, const Operand& op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0x40 | 7); }//-V524 @@ -501,6 +503,8 @@ void movd(const Mmx& mmx, const Address& addr) { if (mmx.isXMM()) db(0x66); opMo void movd(const Mmx& mmx, const Reg32& reg) { if (mmx.isXMM()) db(0x66); opModR(mmx, reg, 0x0F, 0x6E); } void movd(const Reg32& reg, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opModR(mmx, reg, 0x0F, 0x7E); } void movddup(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x12, 0xF2, isXMM_XMMorMEM, NONE, NONE); } +void movdir64b(const Reg& reg, const Address& addr) { db(0x66); opModM(addr, reg.cvt32(), 0x0F, 0x38, 0xF8); } +void movdiri(const Address& addr, const Reg32e& reg) { opModM(addr, reg, 0x0F, 0x38, 0xF9); } void movdq2q(const Mmx& mmx, const Xmm& xmm) { db(0xF2); opModR(mmx, xmm, 0x0F, 0xD6); } void movdqa(const Address& addr, const Xmm& xmm) { db(0x66); opModM(addr, xmm, 0x0F, 0x7F); } void movdqa(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x6F, 0x66); } diff --git a/xbyak/xbyak_util.h b/xbyak/xbyak_util.h index 0f4548b..ab0bd57 100644 --- a/xbyak/xbyak_util.h +++ b/xbyak/xbyak_util.h @@ -87,11 +87,53 @@ typedef enum { CoreLevel = 2 } IntelCpuTopologyLevel; +namespace local { + +class Type { + uint64_t L; + uint64_t H; +public: + Type(uint64_t L = 0, uint64_t H = 0) : L(L), H(H) { } + Type& operator&=(const Type& rhs) + { + L &= rhs.L; + H &= rhs.H; + return *this; + } + Type& operator|=(const Type& rhs) + { + L |= rhs.L; + H |= rhs.H; + return *this; + } + Type operator&(const Type& rhs) const + { + Type t = *this; + t &= rhs; + return t; + } + Type operator|(const Type& rhs) const + { + Type t = *this; + t |= rhs; + return t; + } + // without explicit because backward compatilibity + operator bool() const { return (H | L) != 0; } + uint64_t getL() const { return L; } + uint64_t getH() const { return H; } +}; + /** CPU detection class + @note static inline const member is supported by c++17 or later, so use template hack */ -class Cpu { - uint64_t type_; +template<int dummy=0> +class CpuT { +public: + typedef local::Type Type; +private: + Type type_; //system topology bool x2APIC_supported_; static const size_t maxTopologyLevels = 2; @@ -297,80 +339,82 @@ public: return 0; #endif } - typedef uint64_t Type; - - static const Type NONE = 0; - static const Type tMMX = 1 << 0; - static const Type tMMX2 = 1 << 1; - static const Type tCMOV = 1 << 2; - static const Type tSSE = 1 << 3; - static const Type tSSE2 = 1 << 4; - static const Type tSSE3 = 1 << 5; - static const Type tSSSE3 = 1 << 6; - static const Type tSSE41 = 1 << 7; - static const Type tSSE42 = 1 << 8; - static const Type tPOPCNT = 1 << 9; - static const Type tAESNI = 1 << 10; - static const Type tAVX512_FP16 = 1 << 11; - static const Type tOSXSAVE = 1 << 12; - static const Type tPCLMULQDQ = 1 << 13; - static const Type tAVX = 1 << 14; - static const Type tFMA = 1 << 15; - - static const Type t3DN = 1 << 16; - static const Type tE3DN = 1 << 17; - static const Type tWAITPKG = 1 << 18; - static const Type tRDTSCP = 1 << 19; - static const Type tAVX2 = 1 << 20; - static const Type tBMI1 = 1 << 21; // andn, bextr, blsi, blsmsk, blsr, tzcnt - static const Type tBMI2 = 1 << 22; // bzhi, mulx, pdep, pext, rorx, sarx, shlx, shrx - static const Type tLZCNT = 1 << 23; - - static const Type tINTEL = 1 << 24; - static const Type tAMD = 1 << 25; - - static const Type tENHANCED_REP = 1 << 26; // enhanced rep movsb/stosb - static const Type tRDRAND = 1 << 27; - static const Type tADX = 1 << 28; // adcx, adox - static const Type tRDSEED = 1 << 29; // rdseed - static const Type tSMAP = 1 << 30; // stac - static const Type tHLE = uint64_t(1) << 31; // xacquire, xrelease, xtest - static const Type tRTM = uint64_t(1) << 32; // xbegin, xend, xabort - static const Type tF16C = uint64_t(1) << 33; // vcvtph2ps, vcvtps2ph - static const Type tMOVBE = uint64_t(1) << 34; // mobve - static const Type tAVX512F = uint64_t(1) << 35; - static const Type tAVX512DQ = uint64_t(1) << 36; - static const Type tAVX512_IFMA = uint64_t(1) << 37; - static const Type tAVX512IFMA = tAVX512_IFMA; - static const Type tAVX512PF = uint64_t(1) << 38; - static const Type tAVX512ER = uint64_t(1) << 39; - static const Type tAVX512CD = uint64_t(1) << 40; - static const Type tAVX512BW = uint64_t(1) << 41; - static const Type tAVX512VL = uint64_t(1) << 42; - static const Type tAVX512_VBMI = uint64_t(1) << 43; - static const Type tAVX512VBMI = tAVX512_VBMI; // changed by Intel's manual - static const Type tAVX512_4VNNIW = uint64_t(1) << 44; - static const Type tAVX512_4FMAPS = uint64_t(1) << 45; - static const Type tPREFETCHWT1 = uint64_t(1) << 46; - static const Type tPREFETCHW = uint64_t(1) << 47; - static const Type tSHA = uint64_t(1) << 48; - static const Type tMPX = uint64_t(1) << 49; - static const Type tAVX512_VBMI2 = uint64_t(1) << 50; - static const Type tGFNI = uint64_t(1) << 51; - static const Type tVAES = uint64_t(1) << 52; - static const Type tVPCLMULQDQ = uint64_t(1) << 53; - static const Type tAVX512_VNNI = uint64_t(1) << 54; - static const Type tAVX512_BITALG = uint64_t(1) << 55; - static const Type tAVX512_VPOPCNTDQ = uint64_t(1) << 56; - static const Type tAVX512_BF16 = uint64_t(1) << 57; - static const Type tAVX512_VP2INTERSECT = uint64_t(1) << 58; - static const Type tAMX_TILE = uint64_t(1) << 59; - static const Type tAMX_INT8 = uint64_t(1) << 60; - static const Type tAMX_BF16 = uint64_t(1) << 61; - static const Type tAVX_VNNI = uint64_t(1) << 62; - // 18, 63 - - Cpu() + + static const Type NONE; + static const Type tMMX; + static const Type tMMX2; + static const Type tCMOV; + static const Type tSSE; + static const Type tSSE2; + static const Type tSSE3; + static const Type tSSSE3; + static const Type tSSE41; + static const Type tSSE42; + static const Type tPOPCNT; + static const Type tAESNI; + static const Type tAVX512_FP16; + static const Type tOSXSAVE; + static const Type tPCLMULQDQ; + static const Type tAVX; + static const Type tFMA; + + static const Type t3DN; + static const Type tE3DN; + static const Type tWAITPKG; + static const Type tRDTSCP; + static const Type tAVX2; + static const Type tBMI1; // andn, bextr, blsi, blsmsk, blsr, tzcnt + static const Type tBMI2; // bzhi, mulx, pdep, pext, rorx, sarx, shlx, shrx + static const Type tLZCNT; + + static const Type tINTEL; + static const Type tAMD; + + static const Type tENHANCED_REP; // enhanced rep movsb/stosb + static const Type tRDRAND; + static const Type tADX; // adcx, adox + static const Type tRDSEED; // rdseed + static const Type tSMAP; // stac + static const Type tHLE; // xacquire, xrelease, xtest + static const Type tRTM; // xbegin, xend, xabort + static const Type tF16C; // vcvtph2ps, vcvtps2ph + static const Type tMOVBE; // mobve + static const Type tAVX512F; + static const Type tAVX512DQ; + static const Type tAVX512_IFMA; + static const Type tAVX512IFMA; + static const Type tAVX512PF; + static const Type tAVX512ER; + static const Type tAVX512CD; + static const Type tAVX512BW; + static const Type tAVX512VL; + static const Type tAVX512_VBMI; + static const Type tAVX512VBMI; // changed by Intel's manual + static const Type tAVX512_4VNNIW; + static const Type tAVX512_4FMAPS; + static const Type tPREFETCHWT1; + static const Type tPREFETCHW; + static const Type tSHA; + static const Type tMPX; + static const Type tAVX512_VBMI2; + static const Type tGFNI; + static const Type tVAES; + static const Type tVPCLMULQDQ; + static const Type tAVX512_VNNI; + static const Type tAVX512_BITALG; + static const Type tAVX512_VPOPCNTDQ; + static const Type tAVX512_BF16; + static const Type tAVX512_VP2INTERSECT; + static const Type tAMX_TILE; + static const Type tAMX_INT8; + static const Type tAMX_BF16; + static const Type tAVX_VNNI; + static const Type tCLFLUSHOPT; + static const Type tCLDEMOTE; + static const Type tMOVDIRI; + static const Type tMOVDIR64B; + + CpuT() : type_(NONE) , x2APIC_supported_(false) , numCores_() @@ -484,12 +528,16 @@ public: if (EBX & (1U << 18)) type_ |= tRDSEED; if (EBX & (1U << 19)) type_ |= tADX; if (EBX & (1U << 20)) type_ |= tSMAP; + if (EBX & (1U << 23)) type_ |= tCLFLUSHOPT; if (EBX & (1U << 4)) type_ |= tHLE; if (EBX & (1U << 11)) type_ |= tRTM; if (EBX & (1U << 14)) type_ |= tMPX; if (EBX & (1U << 29)) type_ |= tSHA; if (ECX & (1U << 0)) type_ |= tPREFETCHWT1; if (ECX & (1U << 5)) type_ |= tWAITPKG; + if (ECX & (1U << 25)) type_ |= tCLDEMOTE; + if (ECX & (1U << 27)) type_ |= tMOVDIRI; + if (ECX & (1U << 28)) type_ |= tMOVDIR64B; if (EDX & (1U << 24)) type_ |= tAMX_TILE; if (EDX & (1U << 25)) type_ |= tAMX_INT8; if (EDX & (1U << 22)) type_ |= tAMX_BF16; @@ -519,6 +567,84 @@ public: } }; +template<int dummy> const Type CpuT<dummy>::NONE = 0; +template<int dummy> const Type CpuT<dummy>::tMMX = 1 << 0; +template<int dummy> const Type CpuT<dummy>::tMMX2 = 1 << 1; +template<int dummy> const Type CpuT<dummy>::tCMOV = 1 << 2; +template<int dummy> const Type CpuT<dummy>::tSSE = 1 << 3; +template<int dummy> const Type CpuT<dummy>::tSSE2 = 1 << 4; +template<int dummy> const Type CpuT<dummy>::tSSE3 = 1 << 5; +template<int dummy> const Type CpuT<dummy>::tSSSE3 = 1 << 6; +template<int dummy> const Type CpuT<dummy>::tSSE41 = 1 << 7; +template<int dummy> const Type CpuT<dummy>::tSSE42 = 1 << 8; +template<int dummy> const Type CpuT<dummy>::tPOPCNT = 1 << 9; +template<int dummy> const Type CpuT<dummy>::tAESNI = 1 << 10; +template<int dummy> const Type CpuT<dummy>::tAVX512_FP16 = 1 << 11; +template<int dummy> const Type CpuT<dummy>::tOSXSAVE = 1 << 12; +template<int dummy> const Type CpuT<dummy>::tPCLMULQDQ = 1 << 13; +template<int dummy> const Type CpuT<dummy>::tAVX = 1 << 14; +template<int dummy> const Type CpuT<dummy>::tFMA = 1 << 15; + +template<int dummy> const Type CpuT<dummy>::t3DN = 1 << 16; +template<int dummy> const Type CpuT<dummy>::tE3DN = 1 << 17; +template<int dummy> const Type CpuT<dummy>::tWAITPKG = 1 << 18; +template<int dummy> const Type CpuT<dummy>::tRDTSCP = 1 << 19; +template<int dummy> const Type CpuT<dummy>::tAVX2 = 1 << 20; +template<int dummy> const Type CpuT<dummy>::tBMI1 = 1 << 21; // andn, bextr, blsi, blsmsk, blsr, tzcnt +template<int dummy> const Type CpuT<dummy>::tBMI2 = 1 << 22; // bzhi, mulx, pdep, pext, rorx, sarx, shlx, shrx +template<int dummy> const Type CpuT<dummy>::tLZCNT = 1 << 23; + +template<int dummy> const Type CpuT<dummy>::tINTEL = 1 << 24; +template<int dummy> const Type CpuT<dummy>::tAMD = 1 << 25; + +template<int dummy> const Type CpuT<dummy>::tENHANCED_REP = 1 << 26; // enhanced rep movsb/stosb +template<int dummy> const Type CpuT<dummy>::tRDRAND = 1 << 27; +template<int dummy> const Type CpuT<dummy>::tADX = 1 << 28; // adcx, adox +template<int dummy> const Type CpuT<dummy>::tRDSEED = 1 << 29; // rdseed +template<int dummy> const Type CpuT<dummy>::tSMAP = 1 << 30; // stac +template<int dummy> const Type CpuT<dummy>::tHLE = uint64_t(1) << 31; // xacquire, xrelease, xtest +template<int dummy> const Type CpuT<dummy>::tRTM = uint64_t(1) << 32; // xbegin, xend, xabort +template<int dummy> const Type CpuT<dummy>::tF16C = uint64_t(1) << 33; // vcvtph2ps, vcvtps2ph +template<int dummy> const Type CpuT<dummy>::tMOVBE = uint64_t(1) << 34; // mobve +template<int dummy> const Type CpuT<dummy>::tAVX512F = uint64_t(1) << 35; +template<int dummy> const Type CpuT<dummy>::tAVX512DQ = uint64_t(1) << 36; +template<int dummy> const Type CpuT<dummy>::tAVX512_IFMA = uint64_t(1) << 37; +template<int dummy> const Type CpuT<dummy>::tAVX512IFMA = tAVX512_IFMA; +template<int dummy> const Type CpuT<dummy>::tAVX512PF = uint64_t(1) << 38; +template<int dummy> const Type CpuT<dummy>::tAVX512ER = uint64_t(1) << 39; +template<int dummy> const Type CpuT<dummy>::tAVX512CD = uint64_t(1) << 40; +template<int dummy> const Type CpuT<dummy>::tAVX512BW = uint64_t(1) << 41; +template<int dummy> const Type CpuT<dummy>::tAVX512VL = uint64_t(1) << 42; +template<int dummy> const Type CpuT<dummy>::tAVX512_VBMI = uint64_t(1) << 43; +template<int dummy> const Type CpuT<dummy>::tAVX512VBMI = tAVX512_VBMI; // changed by Intel's manual +template<int dummy> const Type CpuT<dummy>::tAVX512_4VNNIW = uint64_t(1) << 44; +template<int dummy> const Type CpuT<dummy>::tAVX512_4FMAPS = uint64_t(1) << 45; +template<int dummy> const Type CpuT<dummy>::tPREFETCHWT1 = uint64_t(1) << 46; +template<int dummy> const Type CpuT<dummy>::tPREFETCHW = uint64_t(1) << 47; +template<int dummy> const Type CpuT<dummy>::tSHA = uint64_t(1) << 48; +template<int dummy> const Type CpuT<dummy>::tMPX = uint64_t(1) << 49; +template<int dummy> const Type CpuT<dummy>::tAVX512_VBMI2 = uint64_t(1) << 50; +template<int dummy> const Type CpuT<dummy>::tGFNI = uint64_t(1) << 51; +template<int dummy> const Type CpuT<dummy>::tVAES = uint64_t(1) << 52; +template<int dummy> const Type CpuT<dummy>::tVPCLMULQDQ = uint64_t(1) << 53; +template<int dummy> const Type CpuT<dummy>::tAVX512_VNNI = uint64_t(1) << 54; +template<int dummy> const Type CpuT<dummy>::tAVX512_BITALG = uint64_t(1) << 55; +template<int dummy> const Type CpuT<dummy>::tAVX512_VPOPCNTDQ = uint64_t(1) << 56; +template<int dummy> const Type CpuT<dummy>::tAVX512_BF16 = uint64_t(1) << 57; +template<int dummy> const Type CpuT<dummy>::tAVX512_VP2INTERSECT = uint64_t(1) << 58; +template<int dummy> const Type CpuT<dummy>::tAMX_TILE = uint64_t(1) << 59; +template<int dummy> const Type CpuT<dummy>::tAMX_INT8 = uint64_t(1) << 60; +template<int dummy> const Type CpuT<dummy>::tAMX_BF16 = uint64_t(1) << 61; +template<int dummy> const Type CpuT<dummy>::tAVX_VNNI = uint64_t(1) << 62; +template<int dummy> const Type CpuT<dummy>::tCLFLUSHOPT = uint64_t(1) << 63; +template<int dummy> const Type CpuT<dummy>::tCLDEMOTE = Type(0, 1 << 0); +template<int dummy> const Type CpuT<dummy>::tMOVDIRI = Type(0, 1 << 1); +template<int dummy> const Type CpuT<dummy>::tMOVDIR64B = Type(0, 1 << 2); + +} // local + +typedef local::CpuT<> Cpu; + #ifndef XBYAK_ONLY_CLASS_CPU class Clock { public: |