diff options
-rw-r--r-- | readme.md | 3 | ||||
-rw-r--r-- | readme.txt | 3 | ||||
-rw-r--r-- | sample/test_util.cpp | 3 | ||||
-rw-r--r-- | xbyak/xbyak.h | 2 | ||||
-rw-r--r-- | xbyak/xbyak_mnemonic.h | 2 | ||||
-rw-r--r-- | xbyak/xbyak_util.h | 90 |
6 files changed, 59 insertions, 44 deletions
@@ -1,5 +1,5 @@ -Xbyak 5.61 ; JIT assembler for x86(IA32), x64(AMD64, x86-64) by C++ +Xbyak 5.62 ; JIT assembler for x86(IA32), x64(AMD64, x86-64) by C++ ============= Abstract @@ -333,6 +333,7 @@ The header files under xbyak/ are independent of cybozulib. History ------------- +* 2018/Feb/13 ver 5.62 Cpu::setCacheHierarchy() by mgouicem and rsdubtso * 2018/Feb/07 ver 5.61 vmov* supports mem{k}{z}(I forgot it) * 2018/Jan/24 ver 5.601 add xword, yword, etc. into Xbyak::util namespace * 2018/Jan/05 ver 5.60 support AVX-512 for Ice lake(319433-030.pdf) @@ -1,5 +1,5 @@ - C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 5.610
+ C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 5.62
-----------------------------------------------------------------------------
◎概要
@@ -343,6 +343,7 @@ cybozulibは単体テストでのみ利用されていて、xbyak/ディレク� -----------------------------------------------------------------------------
◎履歴
+2018/02/13 ver 5.62 Cpu::setCacheHierarchy() by mgouicem and rsdubtso
2018/02/07 ver 5.61 vmov*がmem{k}{z}形式対応(忘れてた)
2018/01/24 ver 5.601 xword, ywordなどをXbyak::util名前空間に追加
2018/01/05 ver 5.60 Ice lake系命令対応(319433-030.pdf)
diff --git a/sample/test_util.cpp b/sample/test_util.cpp index bb515db..9b19935 100644 --- a/sample/test_util.cpp +++ b/sample/test_util.cpp @@ -104,6 +104,9 @@ void putCPUinfo() Core i7-3930K 6 2D */ cpu.putFamily(); + for (unsigned int i = 0; i < cpu.getDataCacheLevels(); i++) { + printf("cache level=%u data cache size=%u cores sharing data cache=%u\n", i, cpu.getDataCacheSize(i), cpu.getCoresSharingDataCache(i)); + } } int main() diff --git a/xbyak/xbyak.h b/xbyak/xbyak.h index 2af06fc..96809d8 100644 --- a/xbyak/xbyak.h +++ b/xbyak/xbyak.h @@ -105,7 +105,7 @@ namespace Xbyak { enum { DEFAULT_MAX_CODE_SIZE = 4096, - VERSION = 0x5610 /* 0xABCD = A.BC(D) */ + VERSION = 0x5620 /* 0xABCD = A.BC(D) */ }; #ifndef MIE_INTEGER_TYPE_DEFINED diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h index 29233c3..4661908 100644 --- a/xbyak/xbyak_mnemonic.h +++ b/xbyak/xbyak_mnemonic.h @@ -1,4 +1,4 @@ -const char *getVersionString() const { return "5.61"; } +const char *getVersionString() const { return "5.62"; } void adc(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x10, 2); } void adc(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x10); } void adcx(const Reg32e& reg, const Operand& op) { opGen(reg, op, 0xF6, 0x66, isREG32_REG32orMEM, NONE, 0x38); } diff --git a/xbyak/xbyak_util.h b/xbyak/xbyak_util.h index d0f5c29..36fe3c0 100644 --- a/xbyak/xbyak_util.h +++ b/xbyak/xbyak_util.h @@ -84,52 +84,54 @@ class Cpu { displayModel = model; } } - unsigned int value_from_bits(unsigned int val, unsigned int base, unsigned int end) + unsigned int extractBit(unsigned int val, unsigned int base, unsigned int end) { - unsigned int shift = sizeof(val) * 8 - end - 1; - return (val << shift) >> (shift + base); + return (val >> base) & ((1u << (end - base)) - 1); } void setCacheHierarchy() { - unsigned int cache_type = 42; + if ((type_ & tINTEL) == 0) return; + const unsigned int NO_CACHE = 0; + const unsigned int DATA_CACHE = 1; +// const unsigned int INSTRUCTION_CACHE = 2; + const unsigned int UNIFIED_CACHE = 3; unsigned int smt_width = 0; - unsigned int n_cores; + unsigned int n_cores = 0; unsigned int data[4]; - if ((type_ & tINTEL) == 0) { - fprintf(stderr, "ERR cache hierarchy querying is not supported\n"); - throw Error(ERR_INTERNAL); - } - - // if leaf 11 exists, we use it to get the number of smt cores and cores on socket - // If x2APIC is supported, these are the only correct numbers. + /* + if leaf 11 exists, we use it to get the number of smt cores and cores on socket + If x2APIC is supported, these are the only correct numbers. + */ getCpuidEx(0x0, 0, data); - if(data[0] >= 11){ + if (data[0] >= 11) { getCpuidEx(0xB, 0, data); // CPUID for SMT Level - smt_width = (data[1] & 0x7FFF); + smt_width = data[1] & 0x7FFF; getCpuidEx(0xB, 1, data); // CPUID for CORE Level - n_cores = (data[1] & 0x7FFF); + n_cores = data[1] & 0x7FFF; } - /* Assumptions: - * - the first level of data cache is not shared (which is the - * case for every existing architecture) and use this to - * determine the SMT width for arch not supporting leaf 11 - * - when leaf 4 reports a number of core less than n_cores - * on socket reported by leaf 11, then it is a correct number - * of cores not an upperbound */ - for (int i = 0; ((cache_type != NO_CACHE) && (data_cache_levels < max_number_cache_levels)); i++) { + /* + Assumptions: + the first level of data cache is not shared (which is the + case for every existing architecture) and use this to + determine the SMT width for arch not supporting leaf 11. + when leaf 4 reports a number of core less than n_cores + on socket reported by leaf 11, then it is a correct number + of cores not an upperbound. + */ + for (int i = 0; data_cache_levels < maxNumberCacheLevels; i++) { getCpuidEx(0x4, i, data); - cache_type = value_from_bits(data[0], 0, 4); - if ((cache_type == DATA_CACHE) || (cache_type == UNIFIED_CACHE)) { - int nb_logical_cores = (std::min)(value_from_bits(data[0], 14, 25) + 1, - n_cores); + unsigned int cacheType = extractBit(data[0], 0, 4); + if (cacheType == NO_CACHE) break; + if (cacheType == DATA_CACHE || cacheType == UNIFIED_CACHE) { + unsigned int nb_logical_cores = (std::min)(extractBit(data[0], 14, 25) + 1, n_cores); data_cache_size[data_cache_levels] = - (value_from_bits(data[1], 22, 31) + 1) - * (value_from_bits(data[1], 12, 21) + 1) - * (value_from_bits(data[1], 0, 11) + 1) + (extractBit(data[1], 22, 31) + 1) + * (extractBit(data[1], 12, 21) + 1) + * (extractBit(data[1], 0, 11) + 1) * (data[2] + 1); - if ((cache_type == DATA_CACHE) && (smt_width == 0)) smt_width = nb_logical_cores; + if (cacheType == DATA_CACHE && smt_width == 0) smt_width = nb_logical_cores; assert(smt_width != 0); cores_sharing_data_cache[data_cache_levels] = nb_logical_cores / smt_width; data_cache_levels++; @@ -146,11 +148,24 @@ public: int displayFamily; // family + extFamily int displayModel; // model + extModel - static const unsigned int max_number_cache_levels = 10; - unsigned int data_cache_size[max_number_cache_levels]; - unsigned int cores_sharing_data_cache[max_number_cache_levels]; + // may I move these members into private? + static const unsigned int maxNumberCacheLevels = 10; + unsigned int data_cache_size[maxNumberCacheLevels]; + unsigned int cores_sharing_data_cache[maxNumberCacheLevels]; unsigned int data_cache_levels; + unsigned int getDataCacheLevels() const { return data_cache_levels; } + unsigned int getCoresSharingDataCache(unsigned int i) const + { + if (i >= data_cache_levels) throw Error(ERR_BAD_PARAMETER); + return cores_sharing_data_cache[i]; + } + unsigned int getDataCacheSize(unsigned int i) const + { + if (i >= data_cache_levels) throw Error(ERR_BAD_PARAMETER); + return data_cache_size[i]; + } + /* data[] = { eax, ebx, ecx, edx } */ @@ -183,10 +198,6 @@ public: #endif } typedef uint64 Type; - static const Type NO_CACHE = 0; - static const Type DATA_CACHE = 1; - static const Type INSTRUCTION_CACHE = 2; - static const Type UNIFIED_CACHE = 3; static const Type NONE = 0; static const Type tMMX = 1 << 0; @@ -346,8 +357,7 @@ public: if (ECX & (1U << 0)) type_ |= tPREFETCHWT1; } setFamily(); - if ((type_ & tINTEL) == tINTEL) - setCacheHierarchy(); + setCacheHierarchy(); } void putFamily() const { |