diff options
author | MITSUNARI Shigeo <[email protected]> | 2022-05-26 18:47:11 +0900 |
---|---|---|
committer | MITSUNARI Shigeo <[email protected]> | 2022-05-26 18:47:11 +0900 |
commit | 8cf41063b31680a876c06c2a84ef5f33f0f610bc (patch) | |
tree | fddcef1928f9305baca951adfbedd945cf5a4adc | |
parent | 66d62968d4c7b647d15084d58f16beda634005a0 (diff) | |
download | xbyak-8cf41063b31680a876c06c2a84ef5f33f0f610bc.tar.gz xbyak-8cf41063b31680a876c06c2a84ef5f33f0f610bc.zip |
refactoring Cpu
-rw-r--r-- | xbyak/xbyak_util.h | 242 |
1 files changed, 85 insertions, 157 deletions
diff --git a/xbyak/xbyak_util.h b/xbyak/xbyak_util.h index 2bb215a..342b0fb 100644 --- a/xbyak/xbyak_util.h +++ b/xbyak/xbyak_util.h @@ -89,11 +89,20 @@ typedef enum { namespace local { +template<uint64_t L, uint64_t H = 0> +struct TypeT { +}; + +template<uint64_t L1, uint64_t H1, uint64_t L2, uint64_t H2> +TypeT<L1 | L2, H1 | H2> operator|(TypeT<L1, H1>, TypeT<L2, H2>) { return TypeT<L1 | L2, H1 | H2>(); } + class Type { uint64_t L; uint64_t H; public: Type(uint64_t L = 0, uint64_t H = 0) : L(L), H(H) { } + template<uint64_t L_, uint64_t H_> + Type(TypeT<L_, H_>) : L(L_), H(H_) {} Type& operator&=(const Type& rhs) { L &= rhs.L; @@ -126,12 +135,13 @@ public: uint64_t getH() const { return H; } }; +} // local + /** CPU detection class @note static inline const member is supported by c++17 or later, so use template hack */ -template<int dummy=0> -class CpuT { +class Cpu { public: typedef local::Type Type; private: @@ -342,82 +352,79 @@ public: #endif } - static const Type NONE; - static const Type tMMX; - static const Type tMMX2; - static const Type tCMOV; - static const Type tSSE; - static const Type tSSE2; - static const Type tSSE3; - static const Type tSSSE3; - static const Type tSSE41; - static const Type tSSE42; - static const Type tPOPCNT; - static const Type tAESNI; - static const Type tAVX512_FP16; - static const Type tOSXSAVE; - static const Type tPCLMULQDQ; - static const Type tAVX; - static const Type tFMA; - - static const Type t3DN; - static const Type tE3DN; - static const Type tWAITPKG; - static const Type tRDTSCP; - static const Type tAVX2; - static const Type tBMI1; // andn, bextr, blsi, blsmsk, blsr, tzcnt - static const Type tBMI2; // bzhi, mulx, pdep, pext, rorx, sarx, shlx, shrx - static const Type tLZCNT; - - static const Type tINTEL; - static const Type tAMD; - - static const Type tENHANCED_REP; // enhanced rep movsb/stosb - static const Type tRDRAND; - static const Type tADX; // adcx, adox - static const Type tRDSEED; // rdseed - static const Type tSMAP; // stac - static const Type tHLE; // xacquire, xrelease, xtest - static const Type tRTM; // xbegin, xend, xabort - static const Type tF16C; // vcvtph2ps, vcvtps2ph - static const Type tMOVBE; // mobve - static const Type tAVX512F; - static const Type tAVX512DQ; - static const Type tAVX512_IFMA; - static const Type tAVX512IFMA; - static const Type tAVX512PF; - static const Type tAVX512ER; - static const Type tAVX512CD; - static const Type tAVX512BW; - static const Type tAVX512VL; - static const Type tAVX512_VBMI; - static const Type tAVX512VBMI; // changed by Intel's manual - static const Type tAVX512_4VNNIW; - static const Type tAVX512_4FMAPS; - static const Type tPREFETCHWT1; - static const Type tPREFETCHW; - static const Type tSHA; - static const Type tMPX; - static const Type tAVX512_VBMI2; - static const Type tGFNI; - static const Type tVAES; - static const Type tVPCLMULQDQ; - static const Type tAVX512_VNNI; - static const Type tAVX512_BITALG; - static const Type tAVX512_VPOPCNTDQ; - static const Type tAVX512_BF16; - static const Type tAVX512_VP2INTERSECT; - static const Type tAMX_TILE; - static const Type tAMX_INT8; - static const Type tAMX_BF16; - static const Type tAVX_VNNI; - static const Type tCLFLUSHOPT; - static const Type tCLDEMOTE; - static const Type tMOVDIRI; - static const Type tMOVDIR64B; - static const Type tCLZERO; - - CpuT() + static const local::TypeT<0> NONE; + static const local::TypeT<1 << 0> tMMX; + static const local::TypeT<1 << 1> tMMX2; + static const local::TypeT<1 << 2> tCMOV; + static const local::TypeT<1 << 3> tSSE; + static const local::TypeT<1 << 4> tSSE2; + static const local::TypeT<1 << 5> tSSE3; + static const local::TypeT<1 << 6> tSSSE3; + static const local::TypeT<1 << 7> tSSE41; + static const local::TypeT<1 << 8> tSSE42; + static const local::TypeT<1 << 9> tPOPCNT; + static const local::TypeT<1 << 10> tAESNI; + static const local::TypeT<1 << 11> tAVX512_FP16; + static const local::TypeT<1 << 12> tOSXSAVE; + static const local::TypeT<1 << 13> tPCLMULQDQ; + static const local::TypeT<1 << 14> tAVX; + static const local::TypeT<1 << 15> tFMA; + static const local::TypeT<1 << 16> t3DN; + static const local::TypeT<1 << 17> tE3DN; + static const local::TypeT<1 << 18> tWAITPKG; + static const local::TypeT<1 << 19> tRDTSCP; + static const local::TypeT<1 << 20> tAVX2; + static const local::TypeT<1 << 21> tBMI1; // andn, bextr, blsi, blsmsk, blsr, tzcnt + static const local::TypeT<1 << 22> tBMI2; // bzhi, mulx, pdep, pext, rorx, sarx, shlx, shrx + static const local::TypeT<1 << 23> tLZCNT; + static const local::TypeT<1 << 24> tINTEL; + static const local::TypeT<1 << 25> tAMD; + static const local::TypeT<1 << 26> tENHANCED_REP; // enhanced rep movsb/stosb + static const local::TypeT<1 << 27> tRDRAND; + static const local::TypeT<1 << 28> tADX; // adcx, adox + static const local::TypeT<1 << 29> tRDSEED; // rdseed + static const local::TypeT<1 << 30> tSMAP; // stac + static const local::TypeT<uint64_t(1) << 31> tHLE; // xacquire, xrelease, xtest + static const local::TypeT<uint64_t(1) << 32> tRTM; // xbegin, xend, xabort + static const local::TypeT<uint64_t(1) << 33> tF16C; // vcvtph2ps, vcvtps2ph + static const local::TypeT<uint64_t(1) << 34> tMOVBE; // mobve + static const local::TypeT<uint64_t(1) << 35> tAVX512F; + static const local::TypeT<uint64_t(1) << 36> tAVX512DQ; + static const local::TypeT<uint64_t(1) << 37> tAVX512_IFMA; + static const local::TypeT<uint64_t(1) << 37> tAVX512IFMA;// = tAVX512_IFMA; + static const local::TypeT<uint64_t(1) << 38> tAVX512PF; + static const local::TypeT<uint64_t(1) << 39> tAVX512ER; + static const local::TypeT<uint64_t(1) << 40> tAVX512CD; + static const local::TypeT<uint64_t(1) << 41> tAVX512BW; + static const local::TypeT<uint64_t(1) << 42> tAVX512VL; + static const local::TypeT<uint64_t(1) << 43> tAVX512_VBMI; + static const local::TypeT<uint64_t(1) << 43> tAVX512VBMI; // = tAVX512_VBMI; // changed by Intel's manual + static const local::TypeT<uint64_t(1) << 44> tAVX512_4VNNIW; + static const local::TypeT<uint64_t(1) << 45> tAVX512_4FMAPS; + static const local::TypeT<uint64_t(1) << 46> tPREFETCHWT1; + static const local::TypeT<uint64_t(1) << 47> tPREFETCHW; + static const local::TypeT<uint64_t(1) << 48> tSHA; + static const local::TypeT<uint64_t(1) << 49> tMPX; + static const local::TypeT<uint64_t(1) << 50> tAVX512_VBMI2; + static const local::TypeT<uint64_t(1) << 51> tGFNI; + static const local::TypeT<uint64_t(1) << 52> tVAES; + static const local::TypeT<uint64_t(1) << 53> tVPCLMULQDQ; + static const local::TypeT<uint64_t(1) << 54> tAVX512_VNNI; + static const local::TypeT<uint64_t(1) << 55> tAVX512_BITALG; + static const local::TypeT<uint64_t(1) << 56> tAVX512_VPOPCNTDQ; + static const local::TypeT<uint64_t(1) << 57> tAVX512_BF16; + static const local::TypeT<uint64_t(1) << 58> tAVX512_VP2INTERSECT; + static const local::TypeT<uint64_t(1) << 59> tAMX_TILE; + static const local::TypeT<uint64_t(1) << 60> tAMX_INT8; + static const local::TypeT<uint64_t(1) << 61> tAMX_BF16; + static const local::TypeT<uint64_t(1) << 62> tAVX_VNNI; + static const local::TypeT<uint64_t(1) << 63> tCLFLUSHOPT; + static const local::TypeT<0, 1 << 0> tCLDEMOTE; + static const local::TypeT<0, 1 << 1> tMOVDIRI; + static const local::TypeT<0, 1 << 2> tMOVDIR64B; + static const local::TypeT<0, 1 << 3> tCLZERO; // AMD Zen + + Cpu() : type_(NONE) , x2APIC_supported_(false) , numCores_() @@ -576,85 +583,6 @@ public: } }; -template<int dummy> const Type CpuT<dummy>::NONE = 0; -template<int dummy> const Type CpuT<dummy>::tMMX = 1 << 0; -template<int dummy> const Type CpuT<dummy>::tMMX2 = 1 << 1; -template<int dummy> const Type CpuT<dummy>::tCMOV = 1 << 2; -template<int dummy> const Type CpuT<dummy>::tSSE = 1 << 3; -template<int dummy> const Type CpuT<dummy>::tSSE2 = 1 << 4; -template<int dummy> const Type CpuT<dummy>::tSSE3 = 1 << 5; -template<int dummy> const Type CpuT<dummy>::tSSSE3 = 1 << 6; -template<int dummy> const Type CpuT<dummy>::tSSE41 = 1 << 7; -template<int dummy> const Type CpuT<dummy>::tSSE42 = 1 << 8; -template<int dummy> const Type CpuT<dummy>::tPOPCNT = 1 << 9; -template<int dummy> const Type CpuT<dummy>::tAESNI = 1 << 10; -template<int dummy> const Type CpuT<dummy>::tAVX512_FP16 = 1 << 11; -template<int dummy> const Type CpuT<dummy>::tOSXSAVE = 1 << 12; -template<int dummy> const Type CpuT<dummy>::tPCLMULQDQ = 1 << 13; -template<int dummy> const Type CpuT<dummy>::tAVX = 1 << 14; -template<int dummy> const Type CpuT<dummy>::tFMA = 1 << 15; - -template<int dummy> const Type CpuT<dummy>::t3DN = 1 << 16; -template<int dummy> const Type CpuT<dummy>::tE3DN = 1 << 17; -template<int dummy> const Type CpuT<dummy>::tWAITPKG = 1 << 18; -template<int dummy> const Type CpuT<dummy>::tRDTSCP = 1 << 19; -template<int dummy> const Type CpuT<dummy>::tAVX2 = 1 << 20; -template<int dummy> const Type CpuT<dummy>::tBMI1 = 1 << 21; // andn, bextr, blsi, blsmsk, blsr, tzcnt -template<int dummy> const Type CpuT<dummy>::tBMI2 = 1 << 22; // bzhi, mulx, pdep, pext, rorx, sarx, shlx, shrx -template<int dummy> const Type CpuT<dummy>::tLZCNT = 1 << 23; - -template<int dummy> const Type CpuT<dummy>::tINTEL = 1 << 24; -template<int dummy> const Type CpuT<dummy>::tAMD = 1 << 25; - -template<int dummy> const Type CpuT<dummy>::tENHANCED_REP = 1 << 26; // enhanced rep movsb/stosb -template<int dummy> const Type CpuT<dummy>::tRDRAND = 1 << 27; -template<int dummy> const Type CpuT<dummy>::tADX = 1 << 28; // adcx, adox -template<int dummy> const Type CpuT<dummy>::tRDSEED = 1 << 29; // rdseed -template<int dummy> const Type CpuT<dummy>::tSMAP = 1 << 30; // stac -template<int dummy> const Type CpuT<dummy>::tHLE = uint64_t(1) << 31; // xacquire, xrelease, xtest -template<int dummy> const Type CpuT<dummy>::tRTM = uint64_t(1) << 32; // xbegin, xend, xabort -template<int dummy> const Type CpuT<dummy>::tF16C = uint64_t(1) << 33; // vcvtph2ps, vcvtps2ph -template<int dummy> const Type CpuT<dummy>::tMOVBE = uint64_t(1) << 34; // mobve -template<int dummy> const Type CpuT<dummy>::tAVX512F = uint64_t(1) << 35; -template<int dummy> const Type CpuT<dummy>::tAVX512DQ = uint64_t(1) << 36; -template<int dummy> const Type CpuT<dummy>::tAVX512_IFMA = uint64_t(1) << 37; -template<int dummy> const Type CpuT<dummy>::tAVX512IFMA = tAVX512_IFMA; -template<int dummy> const Type CpuT<dummy>::tAVX512PF = uint64_t(1) << 38; -template<int dummy> const Type CpuT<dummy>::tAVX512ER = uint64_t(1) << 39; -template<int dummy> const Type CpuT<dummy>::tAVX512CD = uint64_t(1) << 40; -template<int dummy> const Type CpuT<dummy>::tAVX512BW = uint64_t(1) << 41; -template<int dummy> const Type CpuT<dummy>::tAVX512VL = uint64_t(1) << 42; -template<int dummy> const Type CpuT<dummy>::tAVX512_VBMI = uint64_t(1) << 43; -template<int dummy> const Type CpuT<dummy>::tAVX512VBMI = tAVX512_VBMI; // changed by Intel's manual -template<int dummy> const Type CpuT<dummy>::tAVX512_4VNNIW = uint64_t(1) << 44; -template<int dummy> const Type CpuT<dummy>::tAVX512_4FMAPS = uint64_t(1) << 45; -template<int dummy> const Type CpuT<dummy>::tPREFETCHWT1 = uint64_t(1) << 46; -template<int dummy> const Type CpuT<dummy>::tPREFETCHW = uint64_t(1) << 47; -template<int dummy> const Type CpuT<dummy>::tSHA = uint64_t(1) << 48; -template<int dummy> const Type CpuT<dummy>::tMPX = uint64_t(1) << 49; -template<int dummy> const Type CpuT<dummy>::tAVX512_VBMI2 = uint64_t(1) << 50; -template<int dummy> const Type CpuT<dummy>::tGFNI = uint64_t(1) << 51; -template<int dummy> const Type CpuT<dummy>::tVAES = uint64_t(1) << 52; -template<int dummy> const Type CpuT<dummy>::tVPCLMULQDQ = uint64_t(1) << 53; -template<int dummy> const Type CpuT<dummy>::tAVX512_VNNI = uint64_t(1) << 54; -template<int dummy> const Type CpuT<dummy>::tAVX512_BITALG = uint64_t(1) << 55; -template<int dummy> const Type CpuT<dummy>::tAVX512_VPOPCNTDQ = uint64_t(1) << 56; -template<int dummy> const Type CpuT<dummy>::tAVX512_BF16 = uint64_t(1) << 57; -template<int dummy> const Type CpuT<dummy>::tAVX512_VP2INTERSECT = uint64_t(1) << 58; -template<int dummy> const Type CpuT<dummy>::tAMX_TILE = uint64_t(1) << 59; -template<int dummy> const Type CpuT<dummy>::tAMX_INT8 = uint64_t(1) << 60; -template<int dummy> const Type CpuT<dummy>::tAMX_BF16 = uint64_t(1) << 61; -template<int dummy> const Type CpuT<dummy>::tAVX_VNNI = uint64_t(1) << 62; -template<int dummy> const Type CpuT<dummy>::tCLFLUSHOPT = uint64_t(1) << 63; -template<int dummy> const Type CpuT<dummy>::tCLDEMOTE = Type(0, 1 << 0); -template<int dummy> const Type CpuT<dummy>::tMOVDIRI = Type(0, 1 << 1); -template<int dummy> const Type CpuT<dummy>::tMOVDIR64B = Type(0, 1 << 2); -template<int dummy> const Type CpuT<dummy>::tCLZERO = Type(0, 1 << 3); // AMD Zen - -} // local - -typedef local::CpuT<> Cpu; - #ifndef XBYAK_ONLY_CLASS_CPU class Clock { public: |