diff options
Diffstat (limited to 'include/oaknut')
27 files changed, 1816 insertions, 151 deletions
diff --git a/include/oaknut/code_block.hpp b/include/oaknut/code_block.hpp index 1c29ad09..bfa87d96 100644 --- a/include/oaknut/code_block.hpp +++ b/include/oaknut/code_block.hpp @@ -36,6 +36,10 @@ public: # else m_memory = (std::uint32_t*)mmap(nullptr, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_ANON | MAP_PRIVATE | MAP_JIT, -1, 0); # endif +#elif defined(__NetBSD__) + m_memory = (std::uint32_t*)mmap(nullptr, size, PROT_MPROTECT(PROT_READ | PROT_WRITE | PROT_EXEC), MAP_ANON | MAP_PRIVATE, -1, 0); +#elif defined(__OpenBSD__) + m_memory = (std::uint32_t*)mmap(nullptr, size, PROT_READ | PROT_EXEC, MAP_ANON | MAP_PRIVATE, -1, 0); #else m_memory = (std::uint32_t*)mmap(nullptr, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_ANON | MAP_PRIVATE, -1, 0); #endif @@ -68,23 +72,19 @@ public: void protect() { -#if defined(__APPLE__) -# if TARGET_OS_IPHONE - mprotect(m_memory, m_size, PROT_READ | PROT_EXEC); -# else +#if defined(__APPLE__) && !TARGET_OS_IPHONE pthread_jit_write_protect_np(1); -# endif +#elif defined(__APPLE__) || defined(__NetBSD__) || defined(__OpenBSD__) + mprotect(m_memory, m_size, PROT_READ | PROT_EXEC); #endif } void unprotect() { -#if defined(__APPLE__) -# if TARGET_OS_IPHONE - mprotect(m_memory, m_size, PROT_READ | PROT_WRITE); -# else +#if defined(__APPLE__) && !TARGET_OS_IPHONE pthread_jit_write_protect_np(0); -# endif +#elif defined(__APPLE__) || defined(__NetBSD__) || defined(__OpenBSD__) + mprotect(m_memory, m_size, PROT_READ | PROT_WRITE); #endif } diff --git a/include/oaknut/dual_code_block.hpp b/include/oaknut/dual_code_block.hpp new file mode 100644 index 00000000..eb6e19d9 --- /dev/null +++ b/include/oaknut/dual_code_block.hpp @@ -0,0 +1,165 @@ +// SPDX-FileCopyrightText: Copyright (c) 2024 merryhime <https://mary.rs> +// SPDX-License-Identifier: MIT + +#pragma once + +#include <algorithm> +#include <cstddef> +#include <cstdint> +#include <new> + +#if defined(_WIN32) +# define NOMINMAX +# include <windows.h> +#elif defined(__APPLE__) +# include <mach/mach.h> +# include <mach/vm_map.h> + +# include <TargetConditionals.h> +# include <libkern/OSCacheControl.h> +# include <pthread.h> +# include <sys/mman.h> +# include <unistd.h> +#else +# if !defined(_GNU_SOURCE) +# define _GNU_SOURCE +# endif +# include <sys/mman.h> +# include <sys/types.h> +# include <unistd.h> +#endif + +namespace oaknut { + +class DualCodeBlock { +public: + explicit DualCodeBlock(std::size_t size) + : m_size(size) + { +#if defined(_WIN32) + m_wmem = m_xmem = (std::uint32_t*)VirtualAlloc(nullptr, size, MEM_COMMIT, PAGE_EXECUTE_READWRITE); + if (m_wmem == nullptr) + throw std::bad_alloc{}; +#elif defined(__APPLE__) + m_wmem = (std::uint32_t*)mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, -1, 0); + if (m_wmem == MAP_FAILED) + throw std::bad_alloc{}; + + vm_prot_t cur_prot, max_prot; + kern_return_t ret = vm_remap(mach_task_self(), (vm_address_t*)&m_xmem, size, 0, VM_FLAGS_ANYWHERE | VM_FLAGS_RANDOM_ADDR, mach_task_self(), (mach_vm_address_t)m_wmem, false, &cur_prot, &max_prot, VM_INHERIT_NONE); + if (ret != KERN_SUCCESS) + throw std::bad_alloc{}; + + mprotect(m_xmem, size, PROT_READ | PROT_EXEC); +#else +# if defined(__OpenBSD__) + char tmpl[] = "oaknut_dual_code_block.XXXXXXXXXX"; + fd = shm_mkstemp(tmpl); + if (fd < 0) + throw std::bad_alloc{}; + shm_unlink(tmpl); +# else + fd = memfd_create("oaknut_dual_code_block", 0); + if (fd < 0) + throw std::bad_alloc{}; +# endif + + int ret = ftruncate(fd, size); + if (ret != 0) + throw std::bad_alloc{}; + + m_wmem = (std::uint32_t*)mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + m_xmem = (std::uint32_t*)mmap(nullptr, size, PROT_READ | PROT_EXEC, MAP_SHARED, fd, 0); + + if (m_wmem == MAP_FAILED || m_xmem == MAP_FAILED) + throw std::bad_alloc{}; +#endif + } + + ~DualCodeBlock() + { +#if defined(_WIN32) + VirtualFree((void*)m_xmem, 0, MEM_RELEASE); +#elif defined(__APPLE__) +#else + munmap(m_wmem, m_size); + munmap(m_xmem, m_size); + close(fd); +#endif + } + + DualCodeBlock(const DualCodeBlock&) = delete; + DualCodeBlock& operator=(const DualCodeBlock&) = delete; + DualCodeBlock(DualCodeBlock&&) = delete; + DualCodeBlock& operator=(DualCodeBlock&&) = delete; + + /// Pointer to executable mirror of memory (permissions: R-X) + std::uint32_t* xptr() const + { + return m_xmem; + } + + /// Pointer to writeable mirror of memory (permissions: RW-) + std::uint32_t* wptr() const + { + return m_wmem; + } + + /// Invalidate should be used with executable memory pointers. + void invalidate(std::uint32_t* mem, std::size_t size) + { +#if defined(__APPLE__) + sys_icache_invalidate(mem, size); +#elif defined(_WIN32) + FlushInstructionCache(GetCurrentProcess(), mem, size); +#else + static std::size_t icache_line_size = 0x10000, dcache_line_size = 0x10000; + + std::uint64_t ctr; + __asm__ volatile("mrs %0, ctr_el0" + : "=r"(ctr)); + + const std::size_t isize = icache_line_size = std::min<std::size_t>(icache_line_size, 4 << ((ctr >> 0) & 0xf)); + const std::size_t dsize = dcache_line_size = std::min<std::size_t>(dcache_line_size, 4 << ((ctr >> 16) & 0xf)); + + const std::uintptr_t end = (std::uintptr_t)mem + size; + + for (std::uintptr_t addr = ((std::uintptr_t)mem) & ~(dsize - 1); addr < end; addr += dsize) { + __asm__ volatile("dc cvau, %0" + : + : "r"(addr) + : "memory"); + } + __asm__ volatile("dsb ish\n" + : + : + : "memory"); + + for (std::uintptr_t addr = ((std::uintptr_t)mem) & ~(isize - 1); addr < end; addr += isize) { + __asm__ volatile("ic ivau, %0" + : + : "r"(addr) + : "memory"); + } + __asm__ volatile("dsb ish\nisb\n" + : + : + : "memory"); +#endif + } + + void invalidate_all() + { + invalidate(m_xmem, m_size); + } + +protected: +#if !defined(_WIN32) && !defined(__APPLE__) + int fd = -1; +#endif + std::uint32_t* m_xmem = nullptr; + std::uint32_t* m_wmem = nullptr; + std::size_t m_size = 0; +}; + +} // namespace oaknut diff --git a/include/oaknut/feature_detection/cpu_feature.hpp b/include/oaknut/feature_detection/cpu_feature.hpp new file mode 100644 index 00000000..9f70c5b8 --- /dev/null +++ b/include/oaknut/feature_detection/cpu_feature.hpp @@ -0,0 +1,107 @@ +// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime <https://mary.rs> +// SPDX-License-Identifier: MIT + +#pragma once + +#include <bitset> +#include <cstddef> +#include <initializer_list> + +#if defined(__cpp_lib_constexpr_bitset) && __cpp_lib_constexpr_bitset >= 202207L +# define OAKNUT_CPU_FEATURES_CONSTEXPR constexpr +#else +# define OAKNUT_CPU_FEATURES_CONSTEXPR +#endif + +namespace oaknut { + +// NOTE: This file contains code that can be compiled on non-arm64 systems. +// For run-time CPU feature detection, include feature_detection.hpp + +enum class CpuFeature { +#define OAKNUT_CPU_FEATURE(name) name, +#include "oaknut/impl/cpu_feature.inc.hpp" +#undef OAKNUT_CPU_FEATURE +}; + +constexpr std::size_t cpu_feature_count = 0 +#define OAKNUT_CPU_FEATURE(name) +1 +#include "oaknut/impl/cpu_feature.inc.hpp" +#undef OAKNUT_CPU_FEATURE + ; + +class CpuFeatures final { +public: + constexpr CpuFeatures() = default; + + OAKNUT_CPU_FEATURES_CONSTEXPR explicit CpuFeatures(std::initializer_list<CpuFeature> features) + { + for (CpuFeature f : features) { + m_bitset.set(static_cast<std::size_t>(f)); + } + } + + constexpr bool has(CpuFeature feature) const + { + if (static_cast<std::size_t>(feature) >= cpu_feature_count) + return false; + return m_bitset[static_cast<std::size_t>(feature)]; + } + + OAKNUT_CPU_FEATURES_CONSTEXPR CpuFeatures& operator&=(const CpuFeatures& other) noexcept + { + m_bitset &= other.m_bitset; + return *this; + } + + OAKNUT_CPU_FEATURES_CONSTEXPR CpuFeatures& operator|=(const CpuFeatures& other) noexcept + { + m_bitset |= other.m_bitset; + return *this; + } + + OAKNUT_CPU_FEATURES_CONSTEXPR CpuFeatures& operator^=(const CpuFeatures& other) noexcept + { + m_bitset ^= other.m_bitset; + return *this; + } + + OAKNUT_CPU_FEATURES_CONSTEXPR CpuFeatures operator~() const noexcept + { + CpuFeatures result; + result.m_bitset = ~m_bitset; + return result; + } + +private: + using bitset = std::bitset<cpu_feature_count>; + + friend OAKNUT_CPU_FEATURES_CONSTEXPR CpuFeatures operator&(const CpuFeatures& a, const CpuFeatures& b) noexcept; + friend OAKNUT_CPU_FEATURES_CONSTEXPR CpuFeatures operator|(const CpuFeatures& a, const CpuFeatures& b) noexcept; + friend OAKNUT_CPU_FEATURES_CONSTEXPR CpuFeatures operator^(const CpuFeatures& a, const CpuFeatures& b) noexcept; + + bitset m_bitset; +}; + +OAKNUT_CPU_FEATURES_CONSTEXPR CpuFeatures operator&(const CpuFeatures& a, const CpuFeatures& b) noexcept +{ + CpuFeatures result; + result.m_bitset = a.m_bitset & b.m_bitset; + return result; +} + +OAKNUT_CPU_FEATURES_CONSTEXPR CpuFeatures operator|(const CpuFeatures& a, const CpuFeatures& b) noexcept +{ + CpuFeatures result; + result.m_bitset = a.m_bitset | b.m_bitset; + return result; +} + +OAKNUT_CPU_FEATURES_CONSTEXPR CpuFeatures operator^(const CpuFeatures& a, const CpuFeatures& b) noexcept +{ + CpuFeatures result; + result.m_bitset = a.m_bitset ^ b.m_bitset; + return result; +} + +} // namespace oaknut diff --git a/include/oaknut/feature_detection/feature_detection.hpp b/include/oaknut/feature_detection/feature_detection.hpp new file mode 100644 index 00000000..1961864d --- /dev/null +++ b/include/oaknut/feature_detection/feature_detection.hpp @@ -0,0 +1,35 @@ +// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime <https://mary.rs> +// SPDX-License-Identifier: MIT + +#pragma once + +#if defined(__APPLE__) +# define OAKNUT_CPU_FEATURE_DETECTION 1 +# define OAKNUT_SUPPORTS_READING_ID_REGISTERS 0 +# include "oaknut/feature_detection/feature_detection_apple.hpp" +#elif defined(__FreeBSD__) +# define OAKNUT_CPU_FEATURE_DETECTION 1 +# define OAKNUT_SUPPORTS_READING_ID_REGISTERS 1 +# include "oaknut/feature_detection/feature_detection_freebsd.hpp" +#elif defined(__linux__) +# define OAKNUT_CPU_FEATURE_DETECTION 1 +# define OAKNUT_SUPPORTS_READING_ID_REGISTERS 1 +# include "oaknut/feature_detection/feature_detection_linux.hpp" +#elif defined(__NetBSD__) +# define OAKNUT_CPU_FEATURE_DETECTION 1 +# define OAKNUT_SUPPORTS_READING_ID_REGISTERS 2 +# include "oaknut/feature_detection/feature_detection_netbsd.hpp" +#elif defined(__OpenBSD__) +# define OAKNUT_CPU_FEATURE_DETECTION 1 +# define OAKNUT_SUPPORTS_READING_ID_REGISTERS 1 +# include "oaknut/feature_detection/feature_detection_openbsd.hpp" +#elif defined(_WIN32) +# define OAKNUT_CPU_FEATURE_DETECTION 1 +# define OAKNUT_SUPPORTS_READING_ID_REGISTERS 2 +# include "oaknut/feature_detection/feature_detection_w32.hpp" +#else +# define OAKNUT_CPU_FEATURE_DETECTION 0 +# define OAKNUT_SUPPORTS_READING_ID_REGISTERS 0 +# warning "Unsupported operating system for CPU feature detection" +# include "oaknut/feature_detection/feature_detection_generic.hpp" +#endif diff --git a/include/oaknut/feature_detection/feature_detection_apple.hpp b/include/oaknut/feature_detection/feature_detection_apple.hpp new file mode 100644 index 00000000..4c17825a --- /dev/null +++ b/include/oaknut/feature_detection/feature_detection_apple.hpp @@ -0,0 +1,112 @@ +// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime <https://mary.rs> +// SPDX-License-Identifier: MIT + +#pragma once + +#include <cstddef> +#include <optional> + +#include <sys/sysctl.h> + +#include "oaknut/feature_detection/cpu_feature.hpp" +#include "oaknut/feature_detection/id_registers.hpp" + +namespace oaknut { + +// Ref: https://developer.apple.com/documentation/kernel/1387446-sysctlbyname/determining_instruction_set_characteristics + +namespace detail { + +inline bool detect_feature(const char* const sysctl_name) +{ + int result = 0; + std::size_t result_size = sizeof(result); + if (::sysctlbyname(sysctl_name, &result, &result_size, nullptr, 0) == 0) { + return result != 0; + } + return false; +} + +} // namespace detail + +inline CpuFeatures detect_features_via_sysctlbyname() +{ + CpuFeatures result; + + if (detail::detect_feature("hw.optional.AdvSIMD") || detail::detect_feature("hw.optional.neon")) + result |= CpuFeatures{CpuFeature::ASIMD}; + if (detail::detect_feature("hw.optional.floatingpoint")) + result |= CpuFeatures{CpuFeature::FP}; + if (detail::detect_feature("hw.optional.AdvSIMD_HPFPCvt") || detail::detect_feature("hw.optional.neon_hpfp")) + result |= CpuFeatures{CpuFeature::FP16Conv}; + if (detail::detect_feature("hw.optional.arm.FEAT_BF16")) + result |= CpuFeatures{CpuFeature::BF16}; + if (detail::detect_feature("hw.optional.arm.FEAT_DotProd")) + result |= CpuFeatures{CpuFeature::DotProd}; + if (detail::detect_feature("hw.optional.arm.FEAT_FCMA") || detail::detect_feature("hw.optional.armv8_3_compnum")) + result |= CpuFeatures{CpuFeature::FCMA}; + if (detail::detect_feature("hw.optional.arm.FEAT_FHM") || detail::detect_feature("hw.optional.armv8_2_fhm")) + result |= CpuFeatures{CpuFeature::FHM}; + if (detail::detect_feature("hw.optional.arm.FEAT_FP16") || detail::detect_feature("hw.optional.neon_fp16")) + result |= CpuFeatures{CpuFeature::FP16}; + if (detail::detect_feature("hw.optional.arm.FEAT_FRINTTS")) + result |= CpuFeatures{CpuFeature::FRINTTS}; + if (detail::detect_feature("hw.optional.arm.FEAT_I8MM")) + result |= CpuFeatures{CpuFeature::I8MM}; + if (detail::detect_feature("hw.optional.arm.FEAT_JSCVT")) + result |= CpuFeatures{CpuFeature::JSCVT}; + if (detail::detect_feature("hw.optional.arm.FEAT_RDM")) + result |= CpuFeatures{CpuFeature::RDM}; + if (detail::detect_feature("hw.optional.arm.FEAT_FlagM")) + result |= CpuFeatures{CpuFeature::FlagM}; + if (detail::detect_feature("hw.optional.arm.FEAT_FlagM2")) + result |= CpuFeatures{CpuFeature::FlagM2}; + if (detail::detect_feature("hw.optional.armv8_crc32")) + result |= CpuFeatures{CpuFeature::CRC32}; + if (detail::detect_feature("hw.optional.arm.FEAT_LRCPC")) + result |= CpuFeatures{CpuFeature::LRCPC}; + if (detail::detect_feature("hw.optional.arm.FEAT_LRCPC2")) + result |= CpuFeatures{CpuFeature::LRCPC2}; + if (detail::detect_feature("hw.optional.arm.FEAT_LSE") || detail::detect_feature("hw.optional.armv8_1_atomics")) + result |= CpuFeatures{CpuFeature::LSE}; + if (detail::detect_feature("hw.optional.arm.FEAT_LSE2")) + result |= CpuFeatures{CpuFeature::LSE2}; + if (detail::detect_feature("hw.optional.arm.FEAT_AES")) + result |= CpuFeatures{CpuFeature::AES}; + if (detail::detect_feature("hw.optional.arm.FEAT_PMULL")) + result |= CpuFeatures{CpuFeature::PMULL}; + if (detail::detect_feature("hw.optional.arm.FEAT_SHA1")) + result |= CpuFeatures{CpuFeature::SHA1}; + if (detail::detect_feature("hw.optional.arm.FEAT_SHA256")) + result |= CpuFeatures{CpuFeature::SHA256}; + if (detail::detect_feature("hw.optional.arm.FEAT_SHA512") || detail::detect_feature("hw.optional.armv8_2_sha512")) + result |= CpuFeatures{CpuFeature::SHA512}; + if (detail::detect_feature("hw.optional.arm.FEAT_SHA3") || detail::detect_feature("hw.optional.armv8_2_sha3")) + result |= CpuFeatures{CpuFeature::SHA3}; + if (detail::detect_feature("hw.optional.arm.FEAT_BTI")) + result |= CpuFeatures{CpuFeature::BTI}; + if (detail::detect_feature("hw.optional.arm.FEAT_DPB")) + result |= CpuFeatures{CpuFeature::DPB}; + if (detail::detect_feature("hw.optional.arm.FEAT_DPB2")) + result |= CpuFeatures{CpuFeature::DPB2}; + if (detail::detect_feature("hw.optional.arm.FEAT_ECV")) + result |= CpuFeatures{CpuFeature::ECV}; + if (detail::detect_feature("hw.optional.arm.FEAT_SB")) + result |= CpuFeatures{CpuFeature::SB}; + if (detail::detect_feature("hw.optional.arm.FEAT_SSBS")) + result |= CpuFeatures{CpuFeature::SSBS}; + + return result; +} + +inline CpuFeatures detect_features() +{ + return detect_features_via_sysctlbyname(); +} + +inline std::optional<id::IdRegisters> read_id_registers() +{ + return std::nullopt; +} + +} // namespace oaknut diff --git a/include/oaknut/feature_detection/feature_detection_freebsd.hpp b/include/oaknut/feature_detection/feature_detection_freebsd.hpp new file mode 100644 index 00000000..efb3c669 --- /dev/null +++ b/include/oaknut/feature_detection/feature_detection_freebsd.hpp @@ -0,0 +1,62 @@ +// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime <https://mary.rs> +// SPDX-License-Identifier: MIT + +#pragma once + +#include <cstdint> +#include <optional> + +#include <sys/auxv.h> +#include <sys/param.h> + +#include "oaknut/feature_detection/cpu_feature.hpp" +#include "oaknut/feature_detection/feature_detection_hwcaps.hpp" +#include "oaknut/feature_detection/id_registers.hpp" +#include "oaknut/feature_detection/read_id_registers_directly.hpp" + +#ifndef AT_HWCAP +# define AT_HWCAP 16 +#endif +#ifndef AT_HWCAP2 +# define AT_HWCAP2 26 +#endif + +#if __FreeBSD_version < 1300114 +# error "Incompatible ABI change (incorrect HWCAP definitions on earlier FreeBSD versions)" +#endif + +namespace oaknut { + +namespace detail { + +inline unsigned long getauxval(int aux) +{ + unsigned long result = 0; + if (::elf_aux_info(aux, &result, static_cast<int>(sizeof result)) == 0) { + return result; + } + return 0; +} + +} // namespace detail + +inline CpuFeatures detect_features_via_hwcap() +{ + const unsigned long hwcap = detail::getauxval(AT_HWCAP); + const unsigned long hwcap2 = detail::getauxval(AT_HWCAP2); + return detect_features_via_hwcap(hwcap, hwcap2); +} + +inline std::optional<id::IdRegisters> read_id_registers() +{ + // HWCAP_CPUID is falsely not set on many FreeBSD kernel versions, + // so we don't bother checking it. + return id::read_id_registers_directly(); +} + +inline CpuFeatures detect_features() +{ + return detect_features_via_hwcap(); +} + +} // namespace oaknut diff --git a/include/oaknut/feature_detection/feature_detection_generic.hpp b/include/oaknut/feature_detection/feature_detection_generic.hpp new file mode 100644 index 00000000..405a9b6a --- /dev/null +++ b/include/oaknut/feature_detection/feature_detection_generic.hpp @@ -0,0 +1,23 @@ +// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime <https://mary.rs> +// SPDX-License-Identifier: MIT + +#pragma once + +#include <optional> + +#include "oaknut/feature_detection/cpu_feature.hpp" +#include "oaknut/feature_detection/id_registers.hpp" + +namespace oaknut { + +inline CpuFeatures detect_features() +{ + return CpuFeatures{CpuFeature::FP, CpuFeature::ASIMD}; +} + +inline std::optional<id::IdRegisters> read_id_registers() +{ + return std::nullopt; +} + +} // namespace oaknut diff --git a/include/oaknut/feature_detection/feature_detection_hwcaps.hpp b/include/oaknut/feature_detection/feature_detection_hwcaps.hpp new file mode 100644 index 00000000..09855258 --- /dev/null +++ b/include/oaknut/feature_detection/feature_detection_hwcaps.hpp @@ -0,0 +1,120 @@ +// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime <https://mary.rs> +// SPDX-License-Identifier: MIT + +#pragma once + +#include <cstddef> + +#include "oaknut/feature_detection/cpu_feature.hpp" + +namespace oaknut { + +namespace detail { + +template<std::size_t... bits> +constexpr bool bit_test(unsigned long value) +{ + return (((value >> bits) & 1) && ...); +} + +} // namespace detail + +inline CpuFeatures detect_features_via_hwcap(unsigned long hwcap, unsigned long hwcap2) +{ + CpuFeatures result; + +#define OAKNUT_DETECT_CAP(FEAT, ...) \ + if (detail::bit_test<__VA_ARGS__>(hwcap)) { \ + result |= CpuFeatures{CpuFeature::FEAT}; \ + } +#define OAKNUT_DETECT_CAP2(FEAT, ...) \ + if (detail::bit_test<__VA_ARGS__>(hwcap2)) { \ + result |= CpuFeatures{CpuFeature::FEAT}; \ + } + + OAKNUT_DETECT_CAP(FP, 0) // HWCAP_FP + OAKNUT_DETECT_CAP(ASIMD, 1) // HWCAP_ASIMD + // HWCAP_EVTSTRM (2) + OAKNUT_DETECT_CAP(AES, 3) // HWCAP_AES + OAKNUT_DETECT_CAP(PMULL, 4) // HWCAP_PMULL + OAKNUT_DETECT_CAP(SHA1, 5) // HWCAP_SHA1 + OAKNUT_DETECT_CAP(SHA256, 6) // HWCAP_SHA2 + OAKNUT_DETECT_CAP(CRC32, 7) // HWCAP_CRC32 + OAKNUT_DETECT_CAP(LSE, 8) // HWCAP_ATOMICS + OAKNUT_DETECT_CAP(FP16Conv, 9, 10) // HWCAP_FPHP && HWCAP_ASIMDHP + OAKNUT_DETECT_CAP(FP16, 9, 10) // HWCAP_FPHP && HWCAP_ASIMDHP + // HWCAP_CPUID (11) + OAKNUT_DETECT_CAP(RDM, 12) // HWCAP_ASIMDRDM + OAKNUT_DETECT_CAP(JSCVT, 13) // HWCAP_JSCVT + OAKNUT_DETECT_CAP(FCMA, 14) // HWCAP_FCMA + OAKNUT_DETECT_CAP(LRCPC, 15) // HWCAP_LRCPC + OAKNUT_DETECT_CAP(DPB, 16) // HWCAP_DCPOP + OAKNUT_DETECT_CAP(SHA3, 17) // HWCAP_SHA3 + OAKNUT_DETECT_CAP(SM3, 18) // HWCAP_SM3 + OAKNUT_DETECT_CAP(SM4, 19) // HWCAP_SM4 + OAKNUT_DETECT_CAP(DotProd, 20) // HWCAP_ASIMDDP + OAKNUT_DETECT_CAP(SHA512, 21) // HWCAP_SHA512 + OAKNUT_DETECT_CAP(SVE, 22) // HWCAP_SVE + OAKNUT_DETECT_CAP(FHM, 23) // HWCAP_ASIMDFHM + OAKNUT_DETECT_CAP(DIT, 24) // HWCAP_DIT + OAKNUT_DETECT_CAP(LSE2, 25) // HWCAP_USCAT + OAKNUT_DETECT_CAP(LRCPC2, 26) // HWCAP_ILRCPC + OAKNUT_DETECT_CAP(FlagM, 27) // HWCAP_FLAGM + OAKNUT_DETECT_CAP(SSBS, 28) // HWCAP_SSBS + OAKNUT_DETECT_CAP(SB, 29) // HWCAP_SB + OAKNUT_DETECT_CAP(PACA, 30) // HWCAP_PACA + OAKNUT_DETECT_CAP(PACG, 31) // HWCAP_PACG + + OAKNUT_DETECT_CAP2(DPB2, 0) // HWCAP2_DCPODP + OAKNUT_DETECT_CAP2(SVE2, 1) // HWCAP2_SVE2 + OAKNUT_DETECT_CAP2(SVE_AES, 2) // HWCAP2_SVEAES + OAKNUT_DETECT_CAP2(SVE_PMULL128, 3) // HWCAP2_SVEPMULL + OAKNUT_DETECT_CAP2(SVE_BITPERM, 4) // HWCAP2_SVEBITPERM + OAKNUT_DETECT_CAP2(SVE_SHA3, 5) // HWCAP2_SVESHA3 + OAKNUT_DETECT_CAP2(SVE_SM4, 6) // HWCAP2_SVESM4 + OAKNUT_DETECT_CAP2(FlagM2, 7) // HWCAP2_FLAGM2 + OAKNUT_DETECT_CAP2(FRINTTS, 8) // HWCAP2_FRINT + OAKNUT_DETECT_CAP2(SVE_I8MM, 9) // HWCAP2_SVEI8MM + OAKNUT_DETECT_CAP2(SVE_F32MM, 10) // HWCAP2_SVEF32MM + OAKNUT_DETECT_CAP2(SVE_F64MM, 11) // HWCAP2_SVEF64MM + OAKNUT_DETECT_CAP2(SVE_BF16, 12) // HWCAP2_SVEBF16 + OAKNUT_DETECT_CAP2(I8MM, 13) // HWCAP2_I8MM + OAKNUT_DETECT_CAP2(BF16, 14) // HWCAP2_BF16 + OAKNUT_DETECT_CAP2(DGH, 15) // HWCAP2_DGH + OAKNUT_DETECT_CAP2(RNG, 16) // HWCAP2_RNG + OAKNUT_DETECT_CAP2(BTI, 17) // HWCAP2_BTI + OAKNUT_DETECT_CAP2(MTE, 18) // HWCAP2_MTE + OAKNUT_DETECT_CAP2(ECV, 19) // HWCAP2_ECV + OAKNUT_DETECT_CAP2(AFP, 20) // HWCAP2_AFP + OAKNUT_DETECT_CAP2(RPRES, 21) // HWCAP2_RPRES + OAKNUT_DETECT_CAP2(MTE3, 22) // HWCAP2_MTE3 + OAKNUT_DETECT_CAP2(SME, 23) // HWCAP2_SME + OAKNUT_DETECT_CAP2(SME_I16I64, 24) // HWCAP2_SME_I16I64 + OAKNUT_DETECT_CAP2(SME_F64F64, 25) // HWCAP2_SME_F64F64 + OAKNUT_DETECT_CAP2(SME_I8I32, 26) // HWCAP2_SME_I8I32 + OAKNUT_DETECT_CAP2(SME_F16F32, 27) // HWCAP2_SME_F16F32 + OAKNUT_DETECT_CAP2(SME_B16F32, 28) // HWCAP2_SME_B16F32 + OAKNUT_DETECT_CAP2(SME_F32F32, 29) // HWCAP2_SME_F32F32 + OAKNUT_DETECT_CAP2(SME_FA64, 30) // HWCAP2_SME_FA64 + OAKNUT_DETECT_CAP2(WFxT, 31) // HWCAP2_WFXT + OAKNUT_DETECT_CAP2(EBF16, 32) // HWCAP2_EBF16 + OAKNUT_DETECT_CAP2(SVE_EBF16, 33) // HWCAP2_SVE_EBF16 + OAKNUT_DETECT_CAP2(CSSC, 34) // HWCAP2_CSSC + OAKNUT_DETECT_CAP2(RPRFM, 35) // HWCAP2_RPRFM + OAKNUT_DETECT_CAP2(SVE2p1, 36) // HWCAP2_SVE2P1 + OAKNUT_DETECT_CAP2(SME2, 37) // HWCAP2_SME2 + OAKNUT_DETECT_CAP2(SME2p1, 38) // HWCAP2_SME2P1 + OAKNUT_DETECT_CAP2(SME_I16I32, 39) // HWCAP2_SME_I16I32 + OAKNUT_DETECT_CAP2(SME_BI32I32, 40) // HWCAP2_SME_BI32I32 + OAKNUT_DETECT_CAP2(SME_B16B16, 41) // HWCAP2_SME_B16B16 + OAKNUT_DETECT_CAP2(SME_F16F16, 42) // HWCAP2_SME_F16F16 + OAKNUT_DETECT_CAP2(MOPS, 43) // HWCAP2_MOPS + OAKNUT_DETECT_CAP2(HBC, 44) // HWCAP2_HBC + +#undef OAKNUT_DETECT_CAP +#undef OAKNUT_DETECT_CAP2 + + return result; +} + +} // namespace oaknut diff --git a/include/oaknut/feature_detection/feature_detection_idregs.hpp b/include/oaknut/feature_detection/feature_detection_idregs.hpp new file mode 100644 index 00000000..c26e7a92 --- /dev/null +++ b/include/oaknut/feature_detection/feature_detection_idregs.hpp @@ -0,0 +1,167 @@ +// SPDX-FileCopyrightText: Copyright (c) 2022 merryhime <https://mary.rs> +// SPDX-License-Identifier: MIT + +#pragma once + +#include "oaknut/feature_detection/cpu_feature.hpp" +#include "oaknut/feature_detection/id_registers.hpp" + +namespace oaknut { + +CpuFeatures detect_features_via_id_registers(id::IdRegisters regs) +{ + CpuFeatures result; + + if (regs.pfr0.FP() >= 0) + result |= CpuFeatures{CpuFeature::FP}; + if (regs.pfr0.AdvSIMD() >= 0) + result |= CpuFeatures{CpuFeature::ASIMD}; + if (regs.isar0.AES() >= 1) + result |= CpuFeatures{CpuFeature::AES}; + if (regs.isar0.AES() >= 2) + result |= CpuFeatures{CpuFeature::PMULL}; + if (regs.isar0.SHA1() >= 1) + result |= CpuFeatures{CpuFeature::SHA1}; + if (regs.isar0.SHA2() >= 1) + result |= CpuFeatures{CpuFeature::SHA256}; + if (regs.isar0.CRC32() >= 1) + result |= CpuFeatures{CpuFeature::CRC32}; + if (regs.isar0.Atomic() >= 2) + result |= CpuFeatures{CpuFeature::LSE}; + if (regs.pfr0.FP() >= 1 && regs.pfr0.AdvSIMD() >= 1) + result |= CpuFeatures{CpuFeature::FP16Conv, CpuFeature::FP16}; + if (regs.isar0.RDM() >= 1) + result |= CpuFeatures{CpuFeature::RDM}; + if (regs.isar1.JSCVT() >= 1) + result |= CpuFeatures{CpuFeature::JSCVT}; + if (regs.isar1.FCMA() >= 1) + result |= CpuFeatures{CpuFeature::FCMA}; + if (regs.isar1.LRCPC() >= 1) + result |= CpuFeatures{CpuFeature::LRCPC}; + if (regs.isar1.DPB() >= 1) + result |= CpuFeatures{CpuFeature::DPB}; + if (regs.isar0.SHA3() >= 1) + result |= CpuFeatures{CpuFeature::SHA3}; + if (regs.isar0.SM3() >= 1) + result |= CpuFeatures{CpuFeature::SM3}; + if (regs.isar0.SM4() >= 1) + result |= CpuFeatures{CpuFeature::SM4}; + if (regs.isar0.DP() >= 1) + result |= CpuFeatures{CpuFeature::DotProd}; + if (regs.isar0.SHA2() >= 2) + result |= CpuFeatures{CpuFeature::SHA512}; + if (regs.pfr0.SVE() >= 1) + result |= CpuFeatures{CpuFeature::SVE}; + if (regs.isar0.FHM() >= 1) + result |= CpuFeatures{CpuFeature::FHM}; + if (regs.pfr0.DIT() >= 1) + result |= CpuFeatures{CpuFeature::DIT}; + if (regs.mmfr2.AT() >= 1) + result |= CpuFeatures{CpuFeature::LSE2}; + if (regs.isar1.LRCPC() >= 2) + result |= CpuFeatures{CpuFeature::LRCPC2}; + if (regs.isar0.TS() >= 1) + result |= CpuFeatures{CpuFeature::FlagM}; + if (regs.pfr1.SSBS() >= 2) + result |= CpuFeatures{CpuFeature::SSBS}; + if (regs.isar1.SB() >= 1) + result |= CpuFeatures{CpuFeature::SB}; + if (regs.isar1.APA() >= 1 || regs.isar1.API() >= 1) + result |= CpuFeatures{CpuFeature::PACA}; + if (regs.isar1.GPA() >= 1 || regs.isar1.GPI() >= 1) + result |= CpuFeatures{CpuFeature::PACG}; + if (regs.isar1.DPB() >= 2) + result |= CpuFeatures{CpuFeature::DPB2}; + if (regs.zfr0.SVEver() >= 1) + result |= CpuFeatures{CpuFeature::SVE2}; + if (regs.zfr0.AES() >= 1) + result |= CpuFeatures{CpuFeature::SVE_AES}; + if (regs.zfr0.AES() >= 2) + result |= CpuFeatures{CpuFeature::SVE_PMULL128}; + if (regs.zfr0.BitPerm() >= 1) + result |= CpuFeatures{CpuFeature::SVE_BITPERM}; + if (regs.zfr0.SHA3() >= 1) + result |= CpuFeatures{CpuFeature::SVE_SHA3}; + if (regs.zfr0.SM4() >= 1) + result |= CpuFeatures{CpuFeature::SVE_SM4}; + if (regs.isar0.TS() >= 2) + result |= CpuFeatures{CpuFeature::FlagM2}; + if (regs.isar1.FRINTTS() >= 1) + result |= CpuFeatures{CpuFeature::FRINTTS}; + if (regs.zfr0.I8MM() >= 1) + result |= CpuFeatures{CpuFeature::SVE_I8MM}; + if (regs.zfr0.F32MM() >= 1) + result |= CpuFeatures{CpuFeature::SVE_F32MM}; + if (regs.zfr0.F64MM() >= 1) + result |= CpuFeatures{CpuFeature::SVE_F64MM}; + if (regs.zfr0.BF16() >= 1) + result |= CpuFeatures{CpuFeature::SVE_BF16}; + if (regs.isar1.I8MM() >= 1) + result |= CpuFeatures{CpuFeature::I8MM}; + if (regs.isar1.BF16() >= 1) + result |= CpuFeatures{CpuFeature::BF16}; + if (regs.isar1.DGH() >= 1) + result |= CpuFeatures{CpuFeature::DGH}; + if (regs.isar0.RNDR() >= 1) + result |= CpuFeatures{CpuFeature::RNG}; + if (regs.pfr1.BT() >= 1) + result |= CpuFeatures{CpuFeature::BTI}; + if (regs.pfr1.MTE() >= 2) + result |= CpuFeatures{CpuFeature::MTE}; + if (regs.mmfr0.ECV() >= 1) + result |= CpuFeatures{CpuFeature::ECV}; + if (regs.mmfr1.AFP() >= 1) + result |= CpuFeatures{CpuFeature::AFP}; + if (regs.isar2.RPRES() >= 1) + result |= CpuFeatures{CpuFeature::RPRES}; + if (regs.pfr1.MTE() >= 3) + result |= CpuFeatures{CpuFeature::MTE3}; + if (regs.pfr1.SME() >= 1) + result |= CpuFeatures{CpuFeature::SME}; + if (regs.smfr0.I16I64() == 0b1111) + result |= CpuFeatures{CpuFeature::SME_I16I64}; + if (regs.smfr0.F64F64() == 0b1) + result |= CpuFeatures{CpuFeature::SME_F64F64}; + if (regs.smfr0.I8I32() == 0b1111) + result |= CpuFeatures{CpuFeature::SME_I8I32}; + if (regs.smfr0.F16F32() == 0b1) + result |= CpuFeatures{CpuFeature::SME_F16F32}; + if (regs.smfr0.B16F32() == 0b1) + result |= CpuFeatures{CpuFeature::SME_B16F32}; + if (regs.smfr0.F32F32() == 0b1) + result |= CpuFeatures{CpuFeature::SME_F32F32}; + if (regs.smfr0.FA64() == 0b1) + result |= CpuFeatures{CpuFeature::SME_FA64}; + if (regs.isar2.WFxT() >= 2) + result |= CpuFeatures{CpuFeature::WFxT}; + if (regs.isar1.BF16() >= 2) + result |= CpuFeatures{CpuFeature::EBF16}; + if (regs.zfr0.BF16() >= 2) + result |= CpuFeatures{CpuFeature::SVE_EBF16}; + if (regs.isar2.CSSC() >= 1) + result |= CpuFeatures{CpuFeature::CSSC}; + if (regs.isar2.RPRFM() >= 1) + result |= CpuFeatures{CpuFeature::RPRFM}; + if (regs.zfr0.SVEver() >= 2) + result |= CpuFeatures{CpuFeature::SVE2p1}; + if (regs.smfr0.SMEver() >= 1) + result |= CpuFeatures{CpuFeature::SME2}; + if (regs.smfr0.SMEver() >= 2) + result |= CpuFeatures{CpuFeature::SME2p1}; + if (regs.smfr0.I16I32() == 0b0101) + result |= CpuFeatures{CpuFeature::SME_I16I32}; + if (regs.smfr0.BI32I32() == 0b1) + result |= CpuFeatures{CpuFeature::SME_BI32I32}; + if (regs.smfr0.B16B16() == 0b1) + result |= CpuFeatures{CpuFeature::SME_B16B16}; + if (regs.smfr0.F16F16() == 0b1) + result |= CpuFeatures{CpuFeature::SME_F16F16}; + if (regs.isar2.MOPS() >= 1) + result |= CpuFeatures{CpuFeature::MOPS}; + if (regs.isar2.BC() >= 1) + result |= CpuFeatures{CpuFeature::HBC}; + + return result; +} + +} // namespace oaknut diff --git a/include/oaknut/feature_detection/feature_detection_linux.hpp b/include/oaknut/feature_detection/feature_detection_linux.hpp new file mode 100644 index 00000000..6310eaca --- /dev/null +++ b/include/oaknut/feature_detection/feature_detection_linux.hpp @@ -0,0 +1,45 @@ +// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime <https://mary.rs> +// SPDX-License-Identifier: MIT + +#pragma once + +#include <optional> + +#include <sys/auxv.h> + +#include "oaknut/feature_detection/cpu_feature.hpp" +#include "oaknut/feature_detection/feature_detection_hwcaps.hpp" +#include "oaknut/feature_detection/id_registers.hpp" +#include "oaknut/feature_detection/read_id_registers_directly.hpp" + +#ifndef AT_HWCAP +# define AT_HWCAP 16 +#endif +#ifndef AT_HWCAP2 +# define AT_HWCAP2 26 +#endif + +namespace oaknut { + +inline CpuFeatures detect_features_via_hwcap() +{ + const unsigned long hwcap = ::getauxval(AT_HWCAP); + const unsigned long hwcap2 = ::getauxval(AT_HWCAP2); + return detect_features_via_hwcap(hwcap, hwcap2); +} + +inline CpuFeatures detect_features() +{ + return detect_features_via_hwcap(); +} + +inline std::optional<id::IdRegisters> read_id_registers() +{ + constexpr unsigned long hwcap_cpuid = (1 << 11); + if (::getauxval(AT_HWCAP) & hwcap_cpuid) { + return id::read_id_registers_directly(); + } + return std::nullopt; +} + +} // namespace oaknut diff --git a/include/oaknut/feature_detection/feature_detection_netbsd.hpp b/include/oaknut/feature_detection/feature_detection_netbsd.hpp new file mode 100644 index 00000000..cdb1deb1 --- /dev/null +++ b/include/oaknut/feature_detection/feature_detection_netbsd.hpp @@ -0,0 +1,81 @@ +// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime <https://mary.rs> +// SPDX-License-Identifier: MIT + +#pragma once + +#include <array> +#include <cstddef> +#include <cstdint> +#include <optional> +#include <string> + +#include <aarch64/armreg.h> +#include <sys/param.h> +#include <sys/sysctl.h> + +#include "oaknut/feature_detection/cpu_feature.hpp" +#include "oaknut/feature_detection/feature_detection_hwcaps.hpp" +#include "oaknut/feature_detection/feature_detection_idregs.hpp" +#include "oaknut/feature_detection/id_registers.hpp" + +namespace oaknut { + +inline std::optional<id::IdRegisters> read_id_registers(std::size_t core_index) +{ + const std::string path = "machdep.cpu" + std::to_string(core_index) + ".cpu_id"; + + aarch64_sysctl_cpu_id id; + std::size_t id_len = sizeof id; + + if (sysctlbyname(path.c_str(), &id, &id_len, nullptr, 0) < 0) + return std::nullopt; + + return id::IdRegisters{ + id.ac_midr, + id::Pfr0Register{id.ac_aa64pfr0}, + id::Pfr1Register{id.ac_aa64pfr1}, + id::Pfr2Register{0}, + id::Zfr0Register{id.ac_aa64zfr0}, + id::Smfr0Register{0}, + id::Isar0Register{id.ac_aa64isar0}, + id::Isar1Register{id.ac_aa64isar1}, + id::Isar2Register{0}, + id::Isar3Register{0}, + id::Mmfr0Register{id.ac_aa64mmfr0}, + id::Mmfr1Register{id.ac_aa64mmfr1}, + id::Mmfr2Register{id.ac_aa64mmfr2}, + id::Mmfr3Register{0}, + id::Mmfr4Register{0}, + }; +} + +inline std::size_t get_core_count() +{ + int result = 0; + size_t result_size = sizeof(result); + const std::array<int, 2> mib{CTL_HW, HW_NCPU}; + if (sysctl(mib.data(), mib.size(), &result, &result_size, nullptr, 0) < 0) + return 0; + return result; +} + +inline CpuFeatures detect_features() +{ + std::optional<CpuFeatures> result; + + const std::size_t core_count = get_core_count(); + for (std::size_t core_index = 0; core_index < core_count; core_index++) { + if (const std::optional<id::IdRegisters> id_regs = read_id_registers(core_index)) { + const CpuFeatures current_features = detect_features_via_id_registers(*id_regs); + if (result) { + result = *result & current_features; + } else { + result = current_features; + } + } + } + + return result.value_or(CpuFeatures{}); +} + +} // namespace oaknut diff --git a/include/oaknut/feature_detection/feature_detection_openbsd.hpp b/include/oaknut/feature_detection/feature_detection_openbsd.hpp new file mode 100644 index 00000000..8514a2bf --- /dev/null +++ b/include/oaknut/feature_detection/feature_detection_openbsd.hpp @@ -0,0 +1,63 @@ +// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime <https://mary.rs> +// SPDX-License-Identifier: MIT + +#pragma once + +#include <array> +#include <cstddef> +#include <cstdint> +#include <optional> + +#include <sys/sysctl.h> +#include <sys/types.h> + +#include "oaknut/feature_detection/cpu_feature.hpp" +#include "oaknut/feature_detection/feature_detection_hwcaps.hpp" +#include "oaknut/feature_detection/feature_detection_idregs.hpp" +#include "oaknut/feature_detection/id_registers.hpp" + +namespace oaknut { + +namespace detail { + +inline std::uint64_t read_id_register(int index) +{ + uint64_t result = 0; + size_t result_size = sizeof(result); + std::array<int, 2> mib{CTL_MACHDEP, index}; + if (sysctl(mib.data(), mib.size(), &result, &result_size, nullptr, 0) < 0) + return 0; + return result; +} + +} // namespace detail + +inline std::optional<id::IdRegisters> read_id_registers() +{ + // See OpenBSD source: sys/arch/arm64/include/cpu.h + + return id::IdRegisters{ + std::nullopt, // No easy way of getting MIDR_EL1 other than reading /proc/cpu + id::Pfr0Register{detail::read_id_register(8)}, // CPU_ID_AA64PFR0 + id::Pfr1Register{detail::read_id_register(9)}, // CPU_ID_AA64PFR1 + id::Pfr2Register{0}, + id::Zfr0Register{detail::read_id_register(11)}, // CPU_ID_AA64ZFR0 + id::Smfr0Register{detail::read_id_register(10)}, // CPU_ID_AA64SMFR0 + id::Isar0Register{detail::read_id_register(2)}, // CPU_ID_AA64ISAR0 + id::Isar1Register{detail::read_id_register(3)}, // CPU_ID_AA64ISAR1 + id::Isar2Register{detail::read_id_register(4)}, // CPU_ID_AA64ISAR2 + id::Isar3Register{0}, + id::Mmfr0Register{detail::read_id_register(5)}, // CPU_ID_AA64MMFR0 + id::Mmfr1Register{detail::read_id_register(6)}, // CPU_ID_AA64MMFR1 + id::Mmfr2Register{detail::read_id_register(7)}, // CPU_ID_AA64MMFR2 + id::Mmfr3Register{0}, + id::Mmfr4Register{0}, + }; +} + +inline CpuFeatures detect_features() +{ + return detect_features_via_id_registers(*read_id_registers()); +} + +} // namespace oaknut diff --git a/include/oaknut/feature_detection/feature_detection_w32.hpp b/include/oaknut/feature_detection/feature_detection_w32.hpp new file mode 100644 index 00000000..366a2600 --- /dev/null +++ b/include/oaknut/feature_detection/feature_detection_w32.hpp @@ -0,0 +1,99 @@ +// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime <https://mary.rs> +// SPDX-License-Identifier: MIT + +#pragma once + +#ifndef WIN32_LEAN_AND_MEAN +# define WIN32_LEAN_AND_MEAN +#endif + +#include <windows.h> + +#include <cstddef> +#include <cstdint> +#include <optional> + +#include <processthreadsapi.h> + +#include "oaknut/feature_detection/cpu_feature.hpp" +#include "oaknut/feature_detection/id_registers.hpp" + +namespace oaknut { + +namespace detail { + +inline std::optional<std::uint64_t> read_registry_hklm(const std::string& subkey, const std::string& name) +{ + std::uint64_t value; + DWORD value_len = sizeof(value); + if (::RegGetValueA(HKEY_LOCAL_MACHINE, subkey.c_str(), name.c_str(), RRF_RT_REG_QWORD, nullptr, &value, &value_len) == ERROR_SUCCESS) { + return value; + } + return std::nullopt; +} + +inline std::uint64_t read_id_register(std::size_t core_index, const std::string& name) +{ + return read_registry_hklm("HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\" + std::to_string(core_index), "CP " + name).value_or(0); +} + +} // namespace detail + +// Ref: https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-isprocessorfeaturepresent + +inline CpuFeatures detect_features_via_IsProcessorFeaturePresent() +{ + CpuFeatures result; + + if (::IsProcessorFeaturePresent(30)) // PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE + result |= CpuFeatures{CpuFeature::AES, CpuFeature::PMULL, CpuFeature::SHA1, CpuFeature::SHA256}; + if (::IsProcessorFeaturePresent(31)) // PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE + result |= CpuFeatures{CpuFeature::CRC32}; + if (::IsProcessorFeaturePresent(34)) // PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE + result |= CpuFeatures{CpuFeature::LSE}; + if (::IsProcessorFeaturePresent(43)) // PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE + result |= CpuFeatures{CpuFeature::DotProd}; + if (::IsProcessorFeaturePresent(44)) // PF_ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE + result |= CpuFeatures{CpuFeature::JSCVT}; + if (::IsProcessorFeaturePresent(45)) // PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE + result |= CpuFeatures{CpuFeature::LRCPC}; + + return result; +} + +inline CpuFeatures detect_features() +{ + CpuFeatures result{CpuFeature::FP, CpuFeature::ASIMD}; + result |= detect_features_via_IsProcessorFeaturePresent(); + return result; +} + +inline std::size_t get_core_count() +{ + ::SYSTEM_INFO sys_info; + ::GetSystemInfo(&sys_info); + return sys_info.dwNumberOfProcessors; +} + +inline std::optional<id::IdRegisters> read_id_registers(std::size_t core_index) +{ + return id::IdRegisters{ + detail::read_id_register(core_index, "4000"), + id::Pfr0Register{detail::read_id_register(core_index, "4020")}, + id::Pfr1Register{detail::read_id_register(core_index, "4021")}, + id::Pfr2Register{detail::read_id_register(core_index, "4022")}, + id::Zfr0Register{detail::read_id_register(core_index, "4024")}, + id::Smfr0Register{detail::read_id_register(core_index, "4025")}, + id::Isar0Register{detail::read_id_register(core_index, "4030")}, + id::Isar1Register{detail::read_id_register(core_index, "4031")}, + id::Isar2Register{detail::read_id_register(core_index, "4032")}, + id::Isar3Register{detail::read_id_register(core_index, "4033")}, + id::Mmfr0Register{detail::read_id_register(core_index, "4038")}, + id::Mmfr1Register{detail::read_id_register(core_index, "4039")}, + id::Mmfr2Register{detail::read_id_register(core_index, "403A")}, + id::Mmfr3Register{detail::read_id_register(core_index, "403B")}, + id::Mmfr4Register{detail::read_id_register(core_index, "403C")}, + }; +} + +} // namespace oaknut diff --git a/include/oaknut/feature_detection/id_registers.hpp b/include/oaknut/feature_detection/id_registers.hpp new file mode 100644 index 00000000..fa779618 --- /dev/null +++ b/include/oaknut/feature_detection/id_registers.hpp @@ -0,0 +1,318 @@ +// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime <https://mary.rs> +// SPDX-License-Identifier: MIT + +#pragma once + +#include <cstddef> +#include <cstdint> +#include <optional> + +namespace oaknut::id { + +namespace detail { + +template<std::size_t lsb> +constexpr unsigned extract_bit(std::uint64_t value) +{ + return (value >> lsb) & 1; +} + +template<std::size_t lsb> +constexpr unsigned extract_field(std::uint64_t value) +{ + return (value >> lsb) & 0xf; +} + +template<std::size_t lsb> +constexpr signed extract_signed_field(std::uint64_t value) +{ + return static_cast<signed>(static_cast<std::int64_t>(value << (60 - lsb)) >> 60); +} + +} // namespace detail + +struct Pfr0Register { + std::uint64_t value; + + constexpr signed FP() const { return detail::extract_signed_field<16>(value); } + constexpr signed AdvSIMD() const { return detail::extract_signed_field<20>(value); } + constexpr unsigned GIC() const { return detail::extract_field<24>(value); } + constexpr unsigned RAS() const { return detail::extract_field<28>(value); } + constexpr unsigned SVE() const { return detail::extract_field<32>(value); } + constexpr unsigned SEL2() const { return detail::extract_field<36>(value); } + constexpr unsigned MPAM() const { return detail::extract_field<40>(value); } + constexpr unsigned AMU() const { return detail::extract_field<44>(value); } + constexpr unsigned DIT() const { return detail::extract_field<48>(value); } + constexpr unsigned RME() const { return detail::extract_field<52>(value); } + constexpr unsigned CSV2() const { return detail::extract_field<56>(value); } + constexpr unsigned CSV3() const { return detail::extract_field<60>(value); } +}; + +struct Pfr1Register { + std::uint64_t value; + + constexpr unsigned BT() const { return detail::extract_field<0>(value); } + constexpr unsigned SSBS() const { return detail::extract_field<4>(value); } + constexpr unsigned MTE() const { return detail::extract_field<8>(value); } + constexpr unsigned RAS_frac() const { return detail::extract_field<12>(value); } + constexpr unsigned MPAM_frac() const { return detail::extract_field<16>(value); } + // [20:23] - reserved + constexpr unsigned SME() const { return detail::extract_field<24>(value); } + constexpr unsigned RNDR_trap() const { return detail::extract_field<28>(value); } + constexpr unsigned CSV2_frac() const { return detail::extract_field<32>(value); } + constexpr unsigned NMI() const { return detail::extract_field<36>(value); } + constexpr unsigned MTE_frac() const { return detail::extract_field<40>(value); } + constexpr unsigned GCS() const { return detail::extract_field<44>(value); } + constexpr unsigned THE() const { return detail::extract_field<48>(value); } + constexpr unsigned MTEX() const { return detail::extract_field<52>(value); } + constexpr unsigned DF2() const { return detail::extract_field<56>(value); } + constexpr unsigned PFAR() const { return detail::extract_field<60>(value); } +}; + +struct Pfr2Register { + std::uint64_t value; + + constexpr unsigned MTEPERM() const { return detail::extract_field<0>(value); } + constexpr unsigned MTESTOREONLY() const { return detail::extract_field<4>(value); } + constexpr unsigned MTEFAR() const { return detail::extract_field<8>(value); } + // [12:31] reserved + constexpr unsigned FPMR() const { return detail::extract_field<32>(value); } + // [36:63] reserved +}; + +struct Zfr0Register { + std::uint64_t value; + + constexpr unsigned SVEver() const { return detail::extract_field<0>(value); } + constexpr unsigned AES() const { return detail::extract_field<4>(value); } + // [8:15] reserved + constexpr unsigned BitPerm() const { return detail::extract_field<16>(value); } + constexpr unsigned BF16() const { return detail::extract_field<20>(value); } + constexpr unsigned B16B16() const { return detail::extract_field<24>(value); } + // [28:31] reserved + constexpr unsigned SHA3() const { return detail::extract_field<32>(value); } + // [36:39] reserved + constexpr unsigned SM4() const { return detail::extract_field<40>(value); } + constexpr unsigned I8MM() const { return detail::extract_field<44>(value); } + // [48:51] reserved + constexpr unsigned F32MM() const { return detail::extract_field<52>(value); } + constexpr unsigned F64MM() const { return detail::extract_field<56>(value); } + // [60:63] reserved +}; + +struct Smfr0Register { + std::uint64_t value; + + // [0:27] reserved + constexpr unsigned SF8DP2() const { return detail::extract_bit<28>(value); } + constexpr unsigned SF8DP4() const { return detail::extract_bit<29>(value); } + constexpr unsigned SF8FMA() const { return detail::extract_bit<30>(value); } + // [31] reserved + constexpr unsigned F32F32() const { return detail::extract_bit<32>(value); } + constexpr unsigned BI32I32() const { return detail::extract_bit<33>(value); } + constexpr unsigned B16F32() const { return detail::extract_bit<34>(value); } + constexpr unsigned F16F32() const { return detail::extract_bit<35>(value); } + constexpr unsigned I8I32() const { return detail::extract_field<36>(value); } + constexpr unsigned F8F32() const { return detail::extract_bit<40>(value); } + constexpr unsigned F8F16() const { return detail::extract_bit<41>(value); } + constexpr unsigned F16F16() const { return detail::extract_bit<42>(value); } + constexpr unsigned B16B16() const { return detail::extract_bit<43>(value); } + constexpr unsigned I16I32() const { return detail::extract_field<44>(value); } + constexpr unsigned F64F64() const { return detail::extract_bit<48>(value); } + // [49:51] reserved + constexpr unsigned I16I64() const { return detail::extract_field<52>(value); } + constexpr unsigned SMEver() const { return detail::extract_field<56>(value); } + constexpr unsigned LUTv2() const { return detail::extract_bit<60>(value); } + // [61:62] reserved + constexpr unsigned FA64() const { return detail::extract_bit<63>(value); } +}; + +struct Isar0Register { + std::uint64_t value; + + // [0:3] reserved + constexpr unsigned AES() const { return detail::extract_field<4>(value); } + constexpr unsigned SHA1() const { return detail::extract_field<8>(value); } + constexpr unsigned SHA2() const { return detail::extract_field<12>(value); } + constexpr unsigned CRC32() const { return detail::extract_field<16>(value); } + constexpr unsigned Atomic() const { return detail::extract_field<20>(value); } + constexpr unsigned TME() const { return detail::extract_field<24>(value); } + constexpr unsigned RDM() const { return detail::extract_field<28>(value); } + constexpr unsigned SHA3() const { return detail::extract_field<32>(value); } + constexpr unsigned SM3() const { return detail::extract_field<36>(value); } + constexpr unsigned SM4() const { return detail::extract_field<40>(value); } + constexpr unsigned DP() const { return detail::extract_field<44>(value); } + constexpr unsigned FHM() const { return detail::extract_field<48>(value); } + constexpr unsigned TS() const { return detail::extract_field<52>(value); } + constexpr unsigned TLB() const { return detail::extract_field<56>(value); } + constexpr unsigned RNDR() const { return detail::extract_field<60>(value); } +}; + +struct Isar1Register { + std::uint64_t value; + + constexpr unsigned DPB() const { return detail::extract_field<0>(value); } + constexpr unsigned APA() const { return detail::extract_field<4>(value); } + constexpr unsigned API() const { return detail::extract_field<8>(value); } + constexpr unsigned JSCVT() const { return detail::extract_field<12>(value); } + constexpr unsigned FCMA() const { return detail::extract_field<16>(value); } + constexpr unsigned LRCPC() const { return detail::extract_field<20>(value); } + constexpr unsigned GPA() const { return detail::extract_field<24>(value); } + constexpr unsigned GPI() const { return detail::extract_field<28>(value); } + constexpr unsigned FRINTTS() const { return detail::extract_field<32>(value); } + constexpr unsigned SB() const { return detail::extract_field<36>(value); } + constexpr unsigned SPECRES() const { return detail::extract_field<40>(value); } + constexpr unsigned BF16() const { return detail::extract_field<44>(value); } + constexpr unsigned DGH() const { return detail::extract_field<48>(value); } + constexpr unsigned I8MM() const { return detail::extract_field<52>(value); } + constexpr unsigned XS() const { return detail::extract_field<56>(value); } + constexpr unsigned LS64() const { return detail::extract_field<60>(value); } +}; + +struct Isar2Register { + std::uint64_t value; + + constexpr unsigned WFxT() const { return detail::extract_field<0>(value); } + constexpr unsigned RPRES() const { return detail::extract_field<4>(value); } + constexpr unsigned GPA3() const { return detail::extract_field<8>(value); } + constexpr unsigned APA3() const { return detail::extract_field<12>(value); } + constexpr unsigned MOPS() const { return detail::extract_field<16>(value); } + constexpr unsigned BC() const { return detail::extract_field<20>(value); } + constexpr unsigned PAC_frac() const { return detail::extract_field<24>(value); } + constexpr unsigned CLRBHB() const { return detail::extract_field<28>(value); } + constexpr unsigned SYSREG_128() const { return detail::extract_field<32>(value); } + constexpr unsigned SYSINSTR_128() const { return detail::extract_field<36>(value); } + constexpr unsigned PRFMSLC() const { return detail::extract_field<40>(value); } + // [44:47] reserved + constexpr unsigned RPRFM() const { return detail::extract_field<48>(value); } + constexpr unsigned CSSC() const { return detail::extract_field<52>(value); } + constexpr unsigned LUT() const { return detail::extract_field<56>(value); } + constexpr unsigned ATS1A() const { return detail::extract_field<60>(value); } +}; + +struct Isar3Register { + std::uint64_t value; + + constexpr unsigned CPA() const { return detail::extract_field<0>(value); } + constexpr unsigned FAMINMAX() const { return detail::extract_field<4>(value); } + constexpr unsigned TLBIW() const { return detail::extract_field<8>(value); } + // [12:63] reserved +}; + +struct Mmfr0Register { + std::uint64_t value; + + constexpr unsigned PARange() const { return detail::extract_field<0>(value); } + constexpr unsigned ASIDBits() const { return detail::extract_field<4>(value); } + constexpr unsigned BigEnd() const { return detail::extract_field<8>(value); } + constexpr unsigned SNSMem() const { return detail::extract_field<12>(value); } + constexpr unsigned BigEndEL0() const { return detail::extract_field<16>(value); } + constexpr unsigned TGran16() const { return detail::extract_field<20>(value); } + constexpr unsigned TGran64() const { return detail::extract_field<24>(value); } + constexpr unsigned TGran4() const { return detail::extract_field<28>(value); } + constexpr unsigned TGran16_2() const { return detail::extract_field<32>(value); } + constexpr unsigned TGran64_2() const { return detail::extract_field<36>(value); } + constexpr unsigned TGran4_2() const { return detail::extract_field<40>(value); } + constexpr unsigned ExS() const { return detail::extract_field<44>(value); } + // [48:55] reserved + constexpr unsigned FGT() const { return detail::extract_field<56>(value); } + constexpr unsigned ECV() const { return detail::extract_field<60>(value); } +}; + +struct Mmfr1Register { + std::uint64_t value; + + constexpr unsigned HAFDBS() const { return detail::extract_field<0>(value); } + constexpr unsigned VMIDBits() const { return detail::extract_field<4>(value); } + constexpr unsigned VH() const { return detail::extract_field<8>(value); } + constexpr unsigned HPDS() const { return detail::extract_field<12>(value); } + constexpr unsigned LO() const { return detail::extract_field<16>(value); } + constexpr unsigned PAN() const { return detail::extract_field<20>(value); } + constexpr unsigned SpecSEI() const { return detail::extract_field<24>(value); } + constexpr unsigned XNX() const { return detail::extract_field<28>(value); } + constexpr unsigned TWED() const { return detail::extract_field<32>(value); } + constexpr unsigned ETS() const { return detail::extract_field<36>(value); } + constexpr unsigned HCX() const { return detail::extract_field<40>(value); } + constexpr unsigned AFP() const { return detail::extract_field<44>(value); } + constexpr unsigned nTLBPA() const { return detail::extract_field<48>(value); } + constexpr unsigned TIDCP1() const { return detail::extract_field<52>(value); } + constexpr unsigned CMOW() const { return detail::extract_field<56>(value); } + constexpr unsigned ECBHB() const { return detail::extract_field<60>(value); } +}; + +struct Mmfr2Register { + std::uint64_t value; + + constexpr unsigned CnP() const { return detail::extract_field<0>(value); } + constexpr unsigned UAO() const { return detail::extract_field<4>(value); } + constexpr unsigned LSM() const { return detail::extract_field<8>(value); } + constexpr unsigned IESB() const { return detail::extract_field<12>(value); } + constexpr unsigned VARange() const { return detail::extract_field<16>(value); } + constexpr unsigned CCIDX() const { return detail::extract_field<20>(value); } + constexpr unsigned NV() const { return detail::extract_field<24>(value); } + constexpr unsigned ST() const { return detail::extract_field<28>(value); } + constexpr unsigned AT() const { return detail::extract_field<32>(value); } + constexpr unsigned IDS() const { return detail::extract_field<36>(value); } + constexpr unsigned FWB() const { return detail::extract_field<40>(value); } + // [44:47] reserved + constexpr unsigned TTL() const { return detail::extract_field<48>(value); } + constexpr unsigned BBM() const { return detail::extract_field<52>(value); } + constexpr unsigned EVT() const { return detail::extract_field<56>(value); } + constexpr unsigned E0PD() const { return detail::extract_field<60>(value); } +}; + +struct Mmfr3Register { + std::uint64_t value; + + constexpr unsigned TCRX() const { return detail::extract_field<0>(value); } + constexpr unsigned SCTLRX() const { return detail::extract_field<4>(value); } + constexpr unsigned S1PIE() const { return detail::extract_field<8>(value); } + constexpr unsigned S2PIE() const { return detail::extract_field<12>(value); } + constexpr unsigned S1POE() const { return detail::extract_field<16>(value); } + constexpr unsigned S2POE() const { return detail::extract_field<20>(value); } + constexpr unsigned AIE() const { return detail::extract_field<24>(value); } + constexpr unsigned MEC() const { return detail::extract_field<28>(value); } + constexpr unsigned D128() const { return detail::extract_field<32>(value); } + constexpr unsigned D128_2() const { return detail::extract_field<36>(value); } + constexpr unsigned SNERR() const { return detail::extract_field<40>(value); } + constexpr unsigned ANERR() const { return detail::extract_field<44>(value); } + // [48:51] reserved + constexpr unsigned SDERR() const { return detail::extract_field<52>(value); } + constexpr unsigned ADERR() const { return detail::extract_field<56>(value); } + constexpr unsigned Spec_FPACC() const { return detail::extract_field<60>(value); } +}; + +struct Mmfr4Register { + std::uint64_t value; + + // [0:3] reserved + constexpr unsigned EIESB() const { return detail::extract_field<4>(value); } + constexpr unsigned ASID2() const { return detail::extract_field<8>(value); } + constexpr unsigned HACDBS() const { return detail::extract_field<12>(value); } + constexpr unsigned FGWTE3() const { return detail::extract_field<16>(value); } + constexpr unsigned NV_frac() const { return detail::extract_field<20>(value); } + constexpr unsigned E2H0() const { return detail::extract_field<24>(value); } + // [28:35] reserved + constexpr unsigned E3DSE() const { return detail::extract_field<36>(value); } + // [40:63] reserved +}; + +struct IdRegisters { + std::optional<std::uint64_t> midr; + Pfr0Register pfr0; + Pfr1Register pfr1; + Pfr2Register pfr2; + Zfr0Register zfr0; + Smfr0Register smfr0; + Isar0Register isar0; + Isar1Register isar1; + Isar2Register isar2; + Isar3Register isar3; + Mmfr0Register mmfr0; + Mmfr1Register mmfr1; + Mmfr2Register mmfr2; + Mmfr3Register mmfr3; + Mmfr4Register mmfr4; +}; + +} // namespace oaknut::id diff --git a/include/oaknut/feature_detection/read_id_registers_directly.hpp b/include/oaknut/feature_detection/read_id_registers_directly.hpp new file mode 100644 index 00000000..04db5188 --- /dev/null +++ b/include/oaknut/feature_detection/read_id_registers_directly.hpp @@ -0,0 +1,52 @@ +#include <cstdint> + +#include "oaknut/feature_detection/id_registers.hpp" + +namespace oaknut::id { + +inline IdRegisters read_id_registers_directly() +{ + std::uint64_t midr, pfr0, pfr1, pfr2, isar0, isar1, isar2, isar3, mmfr0, mmfr1, mmfr2, mmfr3, mmfr4, zfr0, smfr0; + +#define OAKNUT_READ_REGISTER(reg, var) \ + __asm__("mrs %0, " #reg \ + : "=r"(var)) + + OAKNUT_READ_REGISTER(s3_0_c0_c0_0, midr); + OAKNUT_READ_REGISTER(s3_0_c0_c4_0, pfr0); + OAKNUT_READ_REGISTER(s3_0_c0_c4_1, pfr1); + OAKNUT_READ_REGISTER(s3_0_c0_c4_2, pfr2); + OAKNUT_READ_REGISTER(s3_0_c0_c4_4, zfr0); + OAKNUT_READ_REGISTER(s3_0_c0_c4_5, smfr0); + OAKNUT_READ_REGISTER(s3_0_c0_c6_0, isar0); + OAKNUT_READ_REGISTER(s3_0_c0_c6_1, isar1); + OAKNUT_READ_REGISTER(s3_0_c0_c6_2, isar2); + OAKNUT_READ_REGISTER(s3_0_c0_c6_3, isar3); + OAKNUT_READ_REGISTER(s3_0_c0_c7_0, mmfr0); + OAKNUT_READ_REGISTER(s3_0_c0_c7_1, mmfr1); + OAKNUT_READ_REGISTER(s3_0_c0_c7_2, mmfr2); + OAKNUT_READ_REGISTER(s3_0_c0_c7_3, mmfr3); + OAKNUT_READ_REGISTER(s3_0_c0_c7_4, mmfr4); + +#undef OAKNUT_READ_ID_REGISTER + + return IdRegisters{ + midr, + Pfr0Register{pfr0}, + Pfr1Register{pfr1}, + Pfr2Register{pfr2}, + Zfr0Register{zfr0}, + Smfr0Register{smfr0}, + Isar0Register{isar0}, + Isar1Register{isar1}, + Isar2Register{isar2}, + Isar3Register{isar3}, + Mmfr0Register{mmfr0}, + Mmfr1Register{mmfr1}, + Mmfr2Register{mmfr2}, + Mmfr3Register{mmfr3}, + Mmfr4Register{mmfr4}, + }; +} + +} // namespace oaknut::id diff --git a/include/oaknut/impl/arm64_encode_helpers.inc.hpp b/include/oaknut/impl/arm64_encode_helpers.inc.hpp index 3081d943..fb636b78 100644 --- a/include/oaknut/impl/arm64_encode_helpers.inc.hpp +++ b/include/oaknut/impl/arm64_encode_helpers.inc.hpp @@ -8,7 +8,7 @@ static constexpr std::uint32_t pdep(std::uint32_t val) std::uint32_t res = 0; for (std::uint32_t bb = 1; mask; bb += bb) { if (val & bb) - res |= mask & -mask; + res |= mask & (~mask + 1); mask &= mask - 1; } return res; @@ -107,6 +107,61 @@ std::uint32_t encode(List<T, N> v) return encode<splat>(v.m_base); } +template<std::uint32_t splat, std::size_t size, std::size_t align> +std::uint32_t encode(AddrOffset<size, align> v) +{ + static_assert(std::popcount(splat) == size - align); + + const auto encode_fn = [](std::ptrdiff_t current_offset, std::ptrdiff_t target_offset) { + const std::ptrdiff_t diff = target_offset - current_offset; + return pdep<splat>(AddrOffset<size, align>::encode(diff)); + }; + + return std::visit(detail::overloaded{ + [&](std::uint32_t encoding) -> std::uint32_t { + return pdep<splat>(encoding); + }, + [&](Label* label) -> std::uint32_t { + if (label->m_offset) { + return encode_fn(Policy::offset(), *label->m_offset); + } + + label->m_wbs.emplace_back(Label::Writeback{Policy::offset(), ~splat, static_cast<Label::EmitFunctionType>(encode_fn)}); + return 0u; + }, + [&](const void* p) -> std::uint32_t { + const std::ptrdiff_t diff = reinterpret_cast<std::uintptr_t>(p) - Policy::template xptr<std::uintptr_t>(); + return pdep<splat>(AddrOffset<size, align>::encode(diff)); + }, + }, + v.m_payload); +} + +template<std::uint32_t splat, std::size_t size, std::size_t shift_amount> +std::uint32_t encode(PageOffset<size, shift_amount> v) +{ + static_assert(std::popcount(splat) == size); + + const auto encode_fn = [](std::ptrdiff_t current_offset, std::ptrdiff_t target_offset) { + return pdep<splat>(PageOffset<size, shift_amount>::encode(static_cast<std::uintptr_t>(current_offset), static_cast<std::uintptr_t>(target_offset))); + }; + + return std::visit(detail::overloaded{ + [&](Label* label) -> std::uint32_t { + if (label->m_offset) { + return encode_fn(Policy::offset(), *label->m_offset); + } + + label->m_wbs.emplace_back(Label::Writeback{Policy::offset(), ~splat, static_cast<Label::EmitFunctionType>(encode_fn)}); + return 0u; + }, + [&](const void* p) -> std::uint32_t { + return pdep<splat>(PageOffset<size, shift_amount>::encode(Policy::template xptr<std::uintptr_t>(), reinterpret_cast<std::ptrdiff_t>(p))); + }, + }, + v.m_payload); +} + #undef OAKNUT_STD_ENCODE void addsubext_lsl_correction(AddSubExt& ext, XRegSp) diff --git a/include/oaknut/impl/cpu_feature.inc.hpp b/include/oaknut/impl/cpu_feature.inc.hpp new file mode 100644 index 00000000..1f7cd879 --- /dev/null +++ b/include/oaknut/impl/cpu_feature.inc.hpp @@ -0,0 +1,78 @@ +// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime <https://mary.rs> +// SPDX-License-Identifier: MIT + +OAKNUT_CPU_FEATURE(FP) +OAKNUT_CPU_FEATURE(ASIMD) +OAKNUT_CPU_FEATURE(AES) +OAKNUT_CPU_FEATURE(PMULL) +OAKNUT_CPU_FEATURE(SHA1) +OAKNUT_CPU_FEATURE(SHA256) +OAKNUT_CPU_FEATURE(CRC32) +OAKNUT_CPU_FEATURE(LSE) +OAKNUT_CPU_FEATURE(FP16Conv) +OAKNUT_CPU_FEATURE(FP16) +OAKNUT_CPU_FEATURE(RDM) +OAKNUT_CPU_FEATURE(JSCVT) +OAKNUT_CPU_FEATURE(FCMA) +OAKNUT_CPU_FEATURE(LRCPC) +OAKNUT_CPU_FEATURE(DPB) +OAKNUT_CPU_FEATURE(SHA3) +OAKNUT_CPU_FEATURE(SM3) +OAKNUT_CPU_FEATURE(SM4) +OAKNUT_CPU_FEATURE(DotProd) +OAKNUT_CPU_FEATURE(SHA512) +OAKNUT_CPU_FEATURE(SVE) +OAKNUT_CPU_FEATURE(FHM) +OAKNUT_CPU_FEATURE(DIT) +OAKNUT_CPU_FEATURE(LSE2) +OAKNUT_CPU_FEATURE(LRCPC2) +OAKNUT_CPU_FEATURE(FlagM) +OAKNUT_CPU_FEATURE(SSBS) +OAKNUT_CPU_FEATURE(SB) +OAKNUT_CPU_FEATURE(PACA) +OAKNUT_CPU_FEATURE(PACG) +OAKNUT_CPU_FEATURE(DPB2) +OAKNUT_CPU_FEATURE(SVE2) +OAKNUT_CPU_FEATURE(SVE_AES) +OAKNUT_CPU_FEATURE(SVE_PMULL128) +OAKNUT_CPU_FEATURE(SVE_BITPERM) +OAKNUT_CPU_FEATURE(SVE_SHA3) +OAKNUT_CPU_FEATURE(SVE_SM4) +OAKNUT_CPU_FEATURE(FlagM2) +OAKNUT_CPU_FEATURE(FRINTTS) +OAKNUT_CPU_FEATURE(SVE_I8MM) +OAKNUT_CPU_FEATURE(SVE_F32MM) +OAKNUT_CPU_FEATURE(SVE_F64MM) +OAKNUT_CPU_FEATURE(SVE_BF16) +OAKNUT_CPU_FEATURE(I8MM) +OAKNUT_CPU_FEATURE(BF16) +OAKNUT_CPU_FEATURE(DGH) +OAKNUT_CPU_FEATURE(RNG) +OAKNUT_CPU_FEATURE(BTI) +OAKNUT_CPU_FEATURE(MTE) +OAKNUT_CPU_FEATURE(ECV) +OAKNUT_CPU_FEATURE(AFP) +OAKNUT_CPU_FEATURE(RPRES) +OAKNUT_CPU_FEATURE(MTE3) +OAKNUT_CPU_FEATURE(SME) +OAKNUT_CPU_FEATURE(SME_I16I64) +OAKNUT_CPU_FEATURE(SME_F64F64) +OAKNUT_CPU_FEATURE(SME_I8I32) +OAKNUT_CPU_FEATURE(SME_F16F32) +OAKNUT_CPU_FEATURE(SME_B16F32) +OAKNUT_CPU_FEATURE(SME_F32F32) +OAKNUT_CPU_FEATURE(SME_FA64) +OAKNUT_CPU_FEATURE(WFxT) +OAKNUT_CPU_FEATURE(EBF16) +OAKNUT_CPU_FEATURE(SVE_EBF16) +OAKNUT_CPU_FEATURE(CSSC) +OAKNUT_CPU_FEATURE(RPRFM) +OAKNUT_CPU_FEATURE(SVE2p1) +OAKNUT_CPU_FEATURE(SME2) +OAKNUT_CPU_FEATURE(SME2p1) +OAKNUT_CPU_FEATURE(SME_I16I32) +OAKNUT_CPU_FEATURE(SME_BI32I32) +OAKNUT_CPU_FEATURE(SME_B16B16) +OAKNUT_CPU_FEATURE(SME_F16F16) +OAKNUT_CPU_FEATURE(MOPS) +OAKNUT_CPU_FEATURE(HBC) diff --git a/include/oaknut/impl/enum.hpp b/include/oaknut/impl/enum.hpp index 89dc9356..68448b47 100644 --- a/include/oaknut/impl/enum.hpp +++ b/include/oaknut/impl/enum.hpp @@ -85,15 +85,67 @@ enum class PstateField { }; enum class SystemReg { + AMCFGR_EL0 = 0b11'011'1101'0010'001, + AMCGCR_EL0 = 0b11'011'1101'0010'010, + AMCNTENCLR0_EL0 = 0b11'011'1101'0010'100, + AMCNTENCLR1_EL0 = 0b11'011'1101'0011'000, + AMCNTENSET0_EL0 = 0b11'011'1101'0010'101, + AMCNTENSET1_EL0 = 0b11'011'1101'0011'001, + AMCR_EL0 = 0b11'011'1101'0010'000, + AMEVCNTR0_n_EL0 = 0b11'011'1101'0100'000, // n = 0-3 + AMEVCNTR1_n_EL0 = 0b11'011'1101'1100'000, // n = 0-15 + AMEVTYPER0_n_EL0 = 0b11'011'1101'0110'000, // n = 0-3 + AMEVTYPER1_n_EL0 = 0b11'011'1101'1110'000, // n = 0-15 + AMUSERENR_EL0 = 0b11'011'1101'0010'011, CNTFRQ_EL0 = 0b11'011'1110'0000'000, + CNTP_CTL_EL0 = 0b11'011'1110'0010'001, + CNTP_CVAL_EL0 = 0b11'011'1110'0010'010, + CNTP_TVAL_EL0 = 0b11'011'1110'0010'000, CNTPCT_EL0 = 0b11'011'1110'0000'001, + CNTV_CTL_EL0 = 0b11'011'1110'0011'001, + CNTV_CVAL_EL0 = 0b11'011'1110'0011'010, + CNTV_TVAL_EL0 = 0b11'011'1110'0011'000, + CNTVCT_EL0 = 0b11'011'1110'0000'010, CTR_EL0 = 0b11'011'0000'0000'001, + CurrentEL = 0b11'000'0100'0010'010, + DAIF = 0b11'011'0100'0010'001, + DBGDTR_EL0 = 0b10'011'0000'0100'000, + DBGDTRRX_EL0 = 0b10'011'0000'0101'000, + DBGDTRTX_EL0 = 0b10'011'0000'0101'000, DCZID_EL0 = 0b11'011'0000'0000'111, + DIT = 0b11'011'0100'0010'101, + DLR_EL0 = 0b11'011'0100'0101'001, + DSPSR_EL0 = 0b11'011'0100'0101'000, FPCR = 0b11'011'0100'0100'000, FPSR = 0b11'011'0100'0100'001, + MDCCSR_EL0 = 0b10'011'0000'0001'000, NZCV = 0b11'011'0100'0010'000, + PAN = 0b11'000'0100'0010'011, + PMCCFILTR_EL0 = 0b11'011'1110'1111'111, + PMCCNTR_EL0 = 0b11'011'1001'1101'000, + PMCEID0_EL0 = 0b11'011'1001'1100'110, + PMCEID1_EL0 = 0b11'011'1001'1100'111, + PMCNTENCLR_EL0 = 0b11'011'1001'1100'010, + PMCNTENSET_EL0 = 0b11'011'1001'1100'001, + PMCR_EL0 = 0b11'011'1001'1100'000, + PMEVCNTR_n_EL0 = 0b11'011'1110'1000'000, // n = 0-30 + PMEVTYPER_n_EL0 = 0b11'011'1110'1100'000, // n = 0-30 + PMOVSCLR_EL0 = 0b11'011'1001'1100'011, + PMOVSSET_EL0 = 0b11'011'1001'1110'011, + PMSELR_EL0 = 0b11'011'1001'1100'101, + PMSWINC_EL0 = 0b11'011'1001'1100'100, + PMUSERENR_EL0 = 0b11'011'1001'1110'000, + PMXEVCNTR_EL0 = 0b11'011'1001'1101'010, + PMXEVTYPER_EL0 = 0b11'011'1001'1101'001, + SP_EL0 = 0b11'000'0100'0001'000, + SPSel = 0b11'000'0100'0010'000, + SPSR_abt = 0b11'100'0100'0011'001, + SPSR_fiq = 0b11'100'0100'0011'011, + SPSR_irq = 0b11'100'0100'0011'000, + SPSR_und = 0b11'100'0100'0011'010, TPIDR_EL0 = 0b11'011'1101'0000'010, TPIDRRO_EL0 = 0b11'011'1101'0000'011, + UAO = 0b11'000'0100'0010'100, }; enum class AtOp { @@ -199,7 +251,7 @@ enum class TlbiOp { VALE1 = 0b000'0111'101, VAALE1 = 0b000'0111'111, IPAS2E1IS = 0b100'0000'001, - RIPAS2E1IS = 0b100'0000'010, // ARMv8.4-TLBI + RIPAS2E1IS = 0b100'0000'010, // ARMv8.4-TLBI IPAS2LE1IS = 0b100'0000'101, RIPAS2LE1IS = 0b100'0000'110, // ARMv8.4-TLBI ALLE2OS = 0b100'0001'000, // ARMv8.4-TLBI @@ -214,11 +266,11 @@ enum class TlbiOp { ALLE1IS = 0b100'0011'100, VALE2IS = 0b100'0011'101, VMALLS12E1IS = 0b100'0011'110, - IPAS2E1OS = 0b100'0100'000, // ARMv8.4-TLBI + IPAS2E1OS = 0b100'0100'000, // ARMv8.4-TLBI IPAS2E1 = 0b100'0100'001, - RIPAS2E1 = 0b100'0100'010, // ARMv8.4-TLBI - RIPAS2E1OS = 0b100'0100'011, // ARMv8.4-TLBI - IPAS2LE1OS = 0b100'0100'100, // ARMv8.4-TLBI + RIPAS2E1 = 0b100'0100'010, // ARMv8.4-TLBI + RIPAS2E1OS = 0b100'0100'011, // ARMv8.4-TLBI + IPAS2LE1OS = 0b100'0100'100, // ARMv8.4-TLBI IPAS2LE1 = 0b100'0100'101, RIPAS2LE1 = 0b100'0100'110, // ARMv8.4-TLBI RIPAS2LE1OS = 0b100'0100'111, // ARMv8.4-TLBI diff --git a/include/oaknut/impl/imm.hpp b/include/oaknut/impl/imm.hpp index cc90832c..7cde26fe 100644 --- a/include/oaknut/impl/imm.hpp +++ b/include/oaknut/impl/imm.hpp @@ -60,9 +60,9 @@ public: constexpr /* implicit */ AddSubImm(std::uint64_t value_) { if ((value_ & 0xFFF) == value_) { - m_encoded = value_; + m_encoded = static_cast<std::uint32_t>(value_); } else if ((value_ & 0xFFF000) == value_) { - m_encoded = (value_ >> 12) | (1 << 12); + m_encoded = static_cast<std::uint32_t>((value_ >> 12) | (1 << 12)); } else { throw OaknutException{ExceptionType::InvalidAddSubImm}; } @@ -126,18 +126,18 @@ constexpr std::optional<std::uint32_t> encode_bit_imm(std::uint64_t value) if (value == 0 || (~value) == 0) return std::nullopt; - const std::size_t rotation = std::countr_zero(value & (value + 1)); + const int rotation = std::countr_zero(value & (value + 1)); const std::uint64_t rot_value = std::rotr(value, rotation); - const std::size_t esize = std::countr_zero(rot_value & (rot_value + 1)); - const std::size_t ones = std::countr_one(rot_value); + const int esize = std::countr_zero(rot_value & (rot_value + 1)); + const int ones = std::countr_one(rot_value); if (std::rotr(value, esize) != value) return std::nullopt; - const std::uint32_t S = ((-esize) << 1) | (ones - 1); - const std::uint32_t R = (esize - rotation) & (esize - 1); - const std::uint32_t N = (~S >> 6) & 1; + const int S = ((-esize) << 1) | (ones - 1); + const int R = (esize - rotation) & (esize - 1); + const int N = (~S >> 6) & 1; return static_cast<std::uint32_t>((S & 0b111111) | (R << 6) | (N << 12)); } diff --git a/include/oaknut/impl/mnemonics_generic_v8.0.inc.hpp b/include/oaknut/impl/mnemonics_generic_v8.0.inc.hpp index 4f5ca8f0..09e8665f 100644 --- a/include/oaknut/impl/mnemonics_generic_v8.0.inc.hpp +++ b/include/oaknut/impl/mnemonics_generic_v8.0.inc.hpp @@ -167,13 +167,13 @@ void BFI(WReg wd, WReg wn, Imm<5> lsb, Imm<5> width) { if (width.value() == 0 || width.value() > (32 - lsb.value())) throw OaknutException{ExceptionType::InvalidBitWidth}; - emit<"0011001100rrrrrrssssssnnnnnddddd", "d", "n", "r", "s">(wd, wn, (-lsb.value()) & 31, width.value() - 1); + emit<"0011001100rrrrrrssssssnnnnnddddd", "d", "n", "r", "s">(wd, wn, (~lsb.value() + 1) & 31, width.value() - 1); } void BFI(XReg xd, XReg xn, Imm<6> lsb, Imm<6> width) { if (width.value() == 0 || width.value() > (64 - lsb.value())) throw OaknutException{ExceptionType::InvalidBitWidth}; - emit<"1011001101rrrrrrssssssnnnnnddddd", "d", "n", "r", "s">(xd, xn, (-lsb.value()) & 63, width.value() - 1); + emit<"1011001101rrrrrrssssssnnnnnddddd", "d", "n", "r", "s">(xd, xn, (~lsb.value() + 1) & 63, width.value() - 1); } void BFM(WReg wd, WReg wn, Imm<5> immr, Imm<5> imms) { @@ -1231,13 +1231,13 @@ void SBFIZ(WReg wd, WReg wn, Imm<5> lsb, Imm<5> width) { if (width.value() == 0 || width.value() > (32 - lsb.value())) throw OaknutException{ExceptionType::InvalidBitWidth}; - emit<"0001001100rrrrrrssssssnnnnnddddd", "d", "n", "r", "s">(wd, wn, (-lsb.value()) & 31, width.value() - 1); + emit<"0001001100rrrrrrssssssnnnnnddddd", "d", "n", "r", "s">(wd, wn, (~lsb.value() + 1) & 31, width.value() - 1); } void SBFIZ(XReg xd, XReg xn, Imm<6> lsb, Imm<6> width) { if (width.value() == 0 || width.value() > (64 - lsb.value())) throw OaknutException{ExceptionType::InvalidBitWidth}; - emit<"1001001101rrrrrrssssssnnnnnddddd", "d", "n", "r", "s">(xd, xn, (-lsb.value()) & 63, width.value() - 1); + emit<"1001001101rrrrrrssssssnnnnnddddd", "d", "n", "r", "s">(xd, xn, (~lsb.value() + 1) & 63, width.value() - 1); } void SBFM(WReg wd, WReg wn, Imm<5> immr, Imm<5> imms) { @@ -1627,13 +1627,13 @@ void UBFIZ(WReg wd, WReg wn, Imm<5> lsb, Imm<5> width) { if (width.value() == 0 || width.value() > (32 - lsb.value())) throw OaknutException{ExceptionType::InvalidBitWidth}; - emit<"0101001100rrrrrrssssssnnnnnddddd", "d", "n", "r", "s">(wd, wn, (-lsb.value()) & 31, width.value() - 1); + emit<"0101001100rrrrrrssssssnnnnnddddd", "d", "n", "r", "s">(wd, wn, (~lsb.value() + 1) & 31, width.value() - 1); } void UBFIZ(XReg xd, XReg xn, Imm<6> lsb, Imm<6> width) { if (width.value() == 0 || width.value() > (64 - lsb.value())) throw OaknutException{ExceptionType::InvalidBitWidth}; - emit<"1101001101rrrrrrssssssnnnnnddddd", "d", "n", "r", "s">(xd, xn, (-lsb.value()) & 63, width.value() - 1); + emit<"1101001101rrrrrrssssssnnnnnddddd", "d", "n", "r", "s">(xd, xn, (~lsb.value() + 1) & 63, width.value() - 1); } void UBFM(WReg wd, WReg wn, Imm<5> immr, Imm<5> imms) { diff --git a/include/oaknut/impl/mnemonics_generic_v8.2.inc.hpp b/include/oaknut/impl/mnemonics_generic_v8.2.inc.hpp index a5bc5b82..0dffd0e3 100644 --- a/include/oaknut/impl/mnemonics_generic_v8.2.inc.hpp +++ b/include/oaknut/impl/mnemonics_generic_v8.2.inc.hpp @@ -5,13 +5,13 @@ void BFC(WReg wd, Imm<5> lsb, Imm<5> width) { if (width.value() == 0 || width.value() > (32 - lsb.value())) throw OaknutException{ExceptionType::InvalidBitWidth}; - emit<"0011001100rrrrrrssssss11111ddddd", "d", "r", "s">(wd, (-lsb.value()) & 31, width.value() - 1); + emit<"0011001100rrrrrrssssss11111ddddd", "d", "r", "s">(wd, (~lsb.value() + 1) & 31, width.value() - 1); } void BFC(XReg xd, Imm<6> lsb, Imm<6> width) { if (width.value() == 0 || width.value() > (64 - lsb.value())) throw OaknutException{ExceptionType::InvalidBitWidth}; - emit<"1011001101rrrrrrssssss11111ddddd", "d", "r", "s">(xd, (-lsb.value()) & 63, width.value() - 1); + emit<"1011001101rrrrrrssssss11111ddddd", "d", "r", "s">(xd, (~lsb.value() + 1) & 63, width.value() - 1); } void ESB() { diff --git a/include/oaknut/impl/oaknut_exception.inc.hpp b/include/oaknut/impl/oaknut_exception.inc.hpp index 07402362..fc2738f0 100644 --- a/include/oaknut/impl/oaknut_exception.inc.hpp +++ b/include/oaknut/impl/oaknut_exception.inc.hpp @@ -29,6 +29,7 @@ OAKNUT_EXCEPTION(ImmOutOfRange, "outsized Imm value") OAKNUT_EXCEPTION(InvalidAddSubExt, "invalid AddSubExt choice for rm size") OAKNUT_EXCEPTION(InvalidIndexExt, "invalid IndexExt choice for rm size") OAKNUT_EXCEPTION(BitPositionOutOfRange, "bit position exceeds size of rt") +OAKNUT_EXCEPTION(RequiresAbsoluteAddressesContext, "absolute addresses required") // mnemonics_*.inc.hpp OAKNUT_EXCEPTION(InvalidCombination, "InvalidCombination") diff --git a/include/oaknut/impl/offset.hpp b/include/oaknut/impl/offset.hpp index 47859c78..a70941ff 100644 --- a/include/oaknut/impl/offset.hpp +++ b/include/oaknut/impl/offset.hpp @@ -45,7 +45,7 @@ struct AddrOffset { : m_payload(&label) {} - AddrOffset(void* ptr) + AddrOffset(const void* ptr) : m_payload(ptr) {} @@ -63,7 +63,7 @@ struct AddrOffset { private: template<typename Policy> friend class BasicCodeGenerator; - std::variant<std::uint32_t, Label*, void*> m_payload; + std::variant<std::uint32_t, Label*, const void*> m_payload; }; template<std::size_t bitsize, std::size_t shift_amount> @@ -78,13 +78,19 @@ struct PageOffset { static std::uint32_t encode(std::uintptr_t current_addr, std::uintptr_t target) { - std::uint64_t diff = (static_cast<std::uint64_t>(target) >> shift_amount) - (static_cast<std::uint64_t>(current_addr) >> shift_amount); + std::uint64_t diff = static_cast<std::uint64_t>((static_cast<std::int64_t>(target) >> shift_amount) - (static_cast<std::int64_t>(current_addr) >> shift_amount)); if (detail::sign_extend<bitsize>(diff) != diff) throw OaknutException{ExceptionType::OffsetOutOfRange}; diff &= detail::mask_from_size(bitsize); return static_cast<std::uint32_t>(((diff & 3) << (bitsize - 2)) | (diff >> 2)); } + static bool valid(std::uintptr_t current_addr, std::uintptr_t target) + { + std::uint64_t diff = static_cast<std::uint64_t>((static_cast<std::int64_t>(target) >> shift_amount) - (static_cast<std::int64_t>(current_addr) >> shift_amount)); + return detail::sign_extend<bitsize>(diff) == diff; + } + private: template<typename Policy> friend class BasicCodeGenerator; diff --git a/include/oaknut/impl/overloaded.hpp b/include/oaknut/impl/overloaded.hpp new file mode 100644 index 00000000..b15b8392 --- /dev/null +++ b/include/oaknut/impl/overloaded.hpp @@ -0,0 +1,16 @@ +// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime <https://mary.rs> +// SPDX-License-Identifier: MIT + +#pragma once + +namespace oaknut::detail { + +template<class... Ts> +struct overloaded : Ts... { + using Ts::operator()...; +}; + +template<class... Ts> +overloaded(Ts...) -> overloaded<Ts...>; + +} // namespace oaknut::detail diff --git a/include/oaknut/impl/reg.hpp b/include/oaknut/impl/reg.hpp index eab02d84..649e67b3 100644 --- a/include/oaknut/impl/reg.hpp +++ b/include/oaknut/impl/reg.hpp @@ -52,8 +52,8 @@ struct DElem; struct Reg { constexpr explicit Reg(bool is_vector_, unsigned bitsize_, int index_) - : m_index(index_) - , m_bitsize(bitsize_) + : m_index(static_cast<std::int8_t>(index_)) + , m_bitsize(static_cast<std::uint8_t>(bitsize_)) , m_is_vector(is_vector_) { assert(index_ >= -1 && index_ <= 31); @@ -65,8 +65,8 @@ struct Reg { constexpr bool is_vector() const { return m_is_vector; } private: - int m_index : 8; - unsigned m_bitsize : 8; + std::int8_t m_index; + std::uint8_t m_bitsize; bool m_is_vector; }; @@ -190,7 +190,7 @@ struct VReg : public Reg { struct VRegArranged : public Reg { protected: constexpr explicit VRegArranged(unsigned bitsize_, int index_, unsigned esize_) - : Reg(true, bitsize_, index_), m_esize(esize_) + : Reg(true, bitsize_, index_), m_esize(static_cast<std::uint8_t>(esize_)) { assert(esize_ != 0 && (esize_ & (esize_ - 1)) == 0 && "esize must be a power of two"); assert(esize_ <= bitsize_); @@ -200,7 +200,7 @@ protected: friend class BasicCodeGenerator; private: - int m_esize : 8; + std::uint8_t m_esize; }; struct VReg_2H : public VRegArranged { diff --git a/include/oaknut/impl/string_literal.hpp b/include/oaknut/impl/string_literal.hpp index e09dfa65..412203e9 100644 --- a/include/oaknut/impl/string_literal.hpp +++ b/include/oaknut/impl/string_literal.hpp @@ -21,4 +21,22 @@ struct StringLiteral { char value[N]; }; +namespace detail { + +template<StringLiteral<33> haystack, StringLiteral needles> +consteval std::uint32_t find() +{ + std::uint32_t result = 0; + for (std::size_t i = 0; i < 32; i++) { + for (std::size_t a = 0; a < needles.strlen; a++) { + if (haystack.value[i] == needles.value[a]) { + result |= 1 << (31 - i); + } + } + } + return result; +} + +} // namespace detail + } // namespace oaknut diff --git a/include/oaknut/oaknut.hpp b/include/oaknut/oaknut.hpp index b67f7fdd..aa80f81b 100644 --- a/include/oaknut/oaknut.hpp +++ b/include/oaknut/oaknut.hpp @@ -17,84 +17,69 @@ #include "oaknut/impl/list.hpp" #include "oaknut/impl/multi_typed_name.hpp" #include "oaknut/impl/offset.hpp" +#include "oaknut/impl/overloaded.hpp" #include "oaknut/impl/reg.hpp" #include "oaknut/impl/string_literal.hpp" #include "oaknut/oaknut_exception.hpp" namespace oaknut { -namespace detail { - -template<StringLiteral bs, StringLiteral barg> -constexpr std::uint32_t get_bits() -{ - std::uint32_t result = 0; - for (std::size_t i = 0; i < 32; i++) { - for (std::size_t a = 0; a < barg.strlen; a++) { - if (bs.value[i] == barg.value[a]) { - result |= 1 << (31 - i); - } - } - } - return result; -} - -template<class... Ts> -struct overloaded : Ts... { - using Ts::operator()...; -}; - -template<class... Ts> -overloaded(Ts...) -> overloaded<Ts...>; - -} // namespace detail - struct Label { public: Label() = default; + bool is_bound() const + { + return m_offset.has_value(); + } + + std::ptrdiff_t offset() const + { + return m_offset.value(); + } + private: template<typename Policy> friend class BasicCodeGenerator; - explicit Label(std::uintptr_t addr) - : m_addr(addr) + explicit Label(std::ptrdiff_t offset) + : m_offset(offset) {} - using EmitFunctionType = std::uint32_t (*)(std::uintptr_t wb_addr, std::uintptr_t resolved_addr); + using EmitFunctionType = std::uint32_t (*)(std::ptrdiff_t wb_offset, std::ptrdiff_t resolved_offset); struct Writeback { - std::uintptr_t m_wb_addr; + std::ptrdiff_t m_wb_offset; std::uint32_t m_mask; EmitFunctionType m_fn; }; - std::optional<std::uintptr_t> m_addr; + std::optional<std::ptrdiff_t> m_offset; std::vector<Writeback> m_wbs; }; template<typename Policy> class BasicCodeGenerator : public Policy { public: - BasicCodeGenerator(typename Policy::constructor_argument_type arg) - : Policy(arg) + BasicCodeGenerator(typename Policy::constructor_argument_type arg, std::uint32_t* xmem) + : Policy(arg, xmem) {} - Label l() + Label l() const { - return Label{Policy::current_address()}; + return Label{Policy::offset()}; } - void l(Label& label) + void l(Label& label) const { - if (label.m_addr) + if (label.is_bound()) throw OaknutException{ExceptionType::LabelRedefinition}; - const auto target_addr = Policy::current_address(); - label.m_addr = target_addr; + const auto target_offset = Policy::offset(); + label.m_offset = target_offset; for (auto& wb : label.m_wbs) { - const std::uint32_t value = wb.m_fn(wb.m_wb_addr, target_addr); - Policy::set_at_address(wb.m_wb_addr, value, wb.m_mask); + const std::uint32_t value = wb.m_fn(wb.m_wb_offset, target_offset); + Policy::set_at_offset(wb.m_wb_offset, value, wb.m_mask); } label.m_wbs.clear(); } @@ -123,8 +108,8 @@ public: return; if (MovImm16::is_valid(imm)) return MOVZ(wd, imm); - if (MovImm16::is_valid(~imm)) - return MOVN(wd, ~imm); + if (MovImm16::is_valid(static_cast<std::uint32_t>(~imm))) + return MOVN(wd, static_cast<std::uint32_t>(~imm)); if (detail::encode_bit_imm(imm)) return ORR(wd, WzrReg{}, imm); @@ -173,10 +158,10 @@ public: // Convenience function for moving pointers to registers void MOVP2R(XReg xd, const void* addr) { - int64_t diff = reinterpret_cast<uint64_t>(addr) - Policy::current_address(); + const int64_t diff = reinterpret_cast<std::uint64_t>(addr) - Policy::template xptr<std::uintptr_t>(); if (diff >= -0xF'FFFF && diff <= 0xF'FFFF) { ADR(xd, addr); - } else if (diff >= -int64_t{0xFFFF'FFFF} && diff <= int64_t{0xFFFF'FFFF}) { + } else if (PageOffset<21, 12>::valid(Policy::template xptr<std::uintptr_t>(), reinterpret_cast<std::uintptr_t>(addr))) { ADRL(xd, addr); } else { MOV(xd, reinterpret_cast<uint64_t>(addr)); @@ -188,7 +173,7 @@ public: if (alignment < 4 || (alignment & (alignment - 1)) != 0) throw OaknutException{ExceptionType::InvalidAlignment}; - while (Policy::template ptr<std::uintptr_t>() & (alignment - 1)) { + while (Policy::offset() & (alignment - 1)) { NOP(); } } @@ -210,85 +195,55 @@ private: template<StringLiteral bs, StringLiteral... bargs, typename... Ts> void emit(Ts... args) { - std::uint32_t encoding = detail::get_bits<bs, "1">(); - encoding |= (0 | ... | encode<detail::get_bits<bs, bargs>()>(std::forward<Ts>(args))); + constexpr std::uint32_t base = detail::find<bs, "1">(); + std::uint32_t encoding = (base | ... | encode<detail::find<bs, bargs>()>(std::forward<Ts>(args))); Policy::append(encoding); } +}; - template<std::uint32_t splat, std::size_t size, std::size_t align> - std::uint32_t encode(AddrOffset<size, align> v) +struct PointerCodeGeneratorPolicy { +public: + std::ptrdiff_t offset() const { - static_assert(std::popcount(splat) == size - align); - - const auto encode_fn = [](std::uintptr_t current_addr, std::uintptr_t target) { - const std::ptrdiff_t diff = target - current_addr; - return pdep<splat>(AddrOffset<size, align>::encode(diff)); - }; - - return std::visit(detail::overloaded{ - [&](std::uint32_t encoding) { - return pdep<splat>(encoding); - }, - [&](Label* label) { - if (label->m_addr) { - return encode_fn(Policy::current_address(), *label->m_addr); - } - - label->m_wbs.emplace_back(Label::Writeback{Policy::current_address(), ~splat, static_cast<Label::EmitFunctionType>(encode_fn)}); - return 0u; - }, - [&](void* p) { - return encode_fn(Policy::current_address(), reinterpret_cast<std::uintptr_t>(p)); - }, - }, - v.m_payload); + return (m_ptr - m_wmem) * sizeof(std::uint32_t); } - template<std::uint32_t splat, std::size_t size, std::size_t shift_amount> - std::uint32_t encode(PageOffset<size, shift_amount> v) + void set_offset(std::ptrdiff_t offset) { - static_assert(std::popcount(splat) == size); - - const auto encode_fn = [](std::uintptr_t current_addr, std::uintptr_t target) { - return pdep<splat>(PageOffset<size, shift_amount>::encode(current_addr, target)); - }; - - return std::visit(detail::overloaded{ - [&](Label* label) { - if (label->m_addr) { - return encode_fn(Policy::current_address(), *label->m_addr); - } - - label->m_wbs.emplace_back(Label::Writeback{Policy::current_address(), ~splat, static_cast<Label::EmitFunctionType>(encode_fn)}); - return 0u; - }, - [&](const void* p) { - return encode_fn(Policy::current_address(), reinterpret_cast<std::uintptr_t>(p)); - }, - }, - v.m_payload); + if ((offset % sizeof(std::uint32_t)) != 0) + throw OaknutException{ExceptionType::InvalidAlignment}; + m_ptr = m_wmem + offset / sizeof(std::uint32_t); } -}; -struct PointerCodeGeneratorPolicy { -public: template<typename T> - T ptr() + T wptr() const { static_assert(std::is_pointer_v<T> || std::is_same_v<T, std::uintptr_t> || std::is_same_v<T, std::intptr_t>); return reinterpret_cast<T>(m_ptr); } - void set_ptr(std::uint32_t* ptr_) + template<typename T> + T xptr() const + { + static_assert(std::is_pointer_v<T> || std::is_same_v<T, std::uintptr_t> || std::is_same_v<T, std::intptr_t>); + return reinterpret_cast<T>(m_xmem + (m_ptr - m_wmem)); + } + + void set_wptr(std::uint32_t* p) { - m_ptr = ptr_; + m_ptr = p; + } + + void set_xptr(std::uint32_t* p) + { + m_ptr = m_wmem + (p - m_xmem); } protected: using constructor_argument_type = std::uint32_t*; - PointerCodeGeneratorPolicy(std::uint32_t* ptr_) - : m_ptr(ptr_) + PointerCodeGeneratorPolicy(std::uint32_t* wmem, std::uint32_t* xmem) + : m_ptr(wmem), m_wmem(wmem), m_xmem(xmem) {} void append(std::uint32_t instruction) @@ -296,22 +251,57 @@ protected: *m_ptr++ = instruction; } - std::uintptr_t current_address() + void set_at_offset(std::ptrdiff_t offset, std::uint32_t value, std::uint32_t mask) const { - return reinterpret_cast<std::uintptr_t>(m_ptr); + std::uint32_t* p = m_wmem + offset / sizeof(std::uint32_t); + *p = (*p & mask) | value; } - void set_at_address(std::uintptr_t addr, std::uint32_t value, std::uint32_t mask) +private: + std::uint32_t* m_ptr; + std::uint32_t* const m_wmem; + std::uint32_t* const m_xmem; +}; + +struct VectorCodeGeneratorPolicy { +public: + std::ptrdiff_t offset() const { - std::uint32_t* p = reinterpret_cast<std::uint32_t*>(addr); - *p = (*p & mask) | value; + return m_vec.size() * sizeof(std::uint32_t); + } + + template<typename T> + T xptr() const + { + static_assert(std::is_pointer_v<T> || std::is_same_v<T, std::uintptr_t> || std::is_same_v<T, std::intptr_t>); + return reinterpret_cast<T>(m_xmem + m_vec.size()); + } + +protected: + using constructor_argument_type = std::vector<std::uint32_t>&; + + VectorCodeGeneratorPolicy(std::vector<std::uint32_t>& vec, std::uint32_t* xmem) + : m_vec(vec), m_xmem(xmem) + {} + + void append(std::uint32_t instruction) + { + m_vec.push_back(instruction); + } + + void set_at_offset(std::ptrdiff_t offset, std::uint32_t value, std::uint32_t mask) const + { + std::uint32_t& p = m_vec[offset / sizeof(std::uint32_t)]; + p = (p & mask) | value; } private: - std::uint32_t* m_ptr; + std::vector<std::uint32_t>& m_vec; + std::uint32_t* const m_xmem; }; using CodeGenerator = BasicCodeGenerator<PointerCodeGeneratorPolicy>; +using VectorCodeGenerator = BasicCodeGenerator<VectorCodeGeneratorPolicy>; namespace util { |