aboutsummaryrefslogtreecommitdiffhomepage
path: root/include/oaknut
diff options
context:
space:
mode:
Diffstat (limited to 'include/oaknut')
-rw-r--r--include/oaknut/code_block.hpp20
-rw-r--r--include/oaknut/dual_code_block.hpp165
-rw-r--r--include/oaknut/feature_detection/cpu_feature.hpp107
-rw-r--r--include/oaknut/feature_detection/feature_detection.hpp35
-rw-r--r--include/oaknut/feature_detection/feature_detection_apple.hpp112
-rw-r--r--include/oaknut/feature_detection/feature_detection_freebsd.hpp62
-rw-r--r--include/oaknut/feature_detection/feature_detection_generic.hpp23
-rw-r--r--include/oaknut/feature_detection/feature_detection_hwcaps.hpp120
-rw-r--r--include/oaknut/feature_detection/feature_detection_idregs.hpp167
-rw-r--r--include/oaknut/feature_detection/feature_detection_linux.hpp45
-rw-r--r--include/oaknut/feature_detection/feature_detection_netbsd.hpp81
-rw-r--r--include/oaknut/feature_detection/feature_detection_openbsd.hpp63
-rw-r--r--include/oaknut/feature_detection/feature_detection_w32.hpp99
-rw-r--r--include/oaknut/feature_detection/id_registers.hpp318
-rw-r--r--include/oaknut/feature_detection/read_id_registers_directly.hpp52
-rw-r--r--include/oaknut/impl/arm64_encode_helpers.inc.hpp57
-rw-r--r--include/oaknut/impl/cpu_feature.inc.hpp78
-rw-r--r--include/oaknut/impl/enum.hpp62
-rw-r--r--include/oaknut/impl/imm.hpp16
-rw-r--r--include/oaknut/impl/mnemonics_generic_v8.0.inc.hpp12
-rw-r--r--include/oaknut/impl/mnemonics_generic_v8.2.inc.hpp4
-rw-r--r--include/oaknut/impl/oaknut_exception.inc.hpp1
-rw-r--r--include/oaknut/impl/offset.hpp12
-rw-r--r--include/oaknut/impl/overloaded.hpp16
-rw-r--r--include/oaknut/impl/reg.hpp12
-rw-r--r--include/oaknut/impl/string_literal.hpp18
-rw-r--r--include/oaknut/oaknut.hpp210
27 files changed, 1816 insertions, 151 deletions
diff --git a/include/oaknut/code_block.hpp b/include/oaknut/code_block.hpp
index 1c29ad09..bfa87d96 100644
--- a/include/oaknut/code_block.hpp
+++ b/include/oaknut/code_block.hpp
@@ -36,6 +36,10 @@ public:
# else
m_memory = (std::uint32_t*)mmap(nullptr, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_ANON | MAP_PRIVATE | MAP_JIT, -1, 0);
# endif
+#elif defined(__NetBSD__)
+ m_memory = (std::uint32_t*)mmap(nullptr, size, PROT_MPROTECT(PROT_READ | PROT_WRITE | PROT_EXEC), MAP_ANON | MAP_PRIVATE, -1, 0);
+#elif defined(__OpenBSD__)
+ m_memory = (std::uint32_t*)mmap(nullptr, size, PROT_READ | PROT_EXEC, MAP_ANON | MAP_PRIVATE, -1, 0);
#else
m_memory = (std::uint32_t*)mmap(nullptr, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_ANON | MAP_PRIVATE, -1, 0);
#endif
@@ -68,23 +72,19 @@ public:
void protect()
{
-#if defined(__APPLE__)
-# if TARGET_OS_IPHONE
- mprotect(m_memory, m_size, PROT_READ | PROT_EXEC);
-# else
+#if defined(__APPLE__) && !TARGET_OS_IPHONE
pthread_jit_write_protect_np(1);
-# endif
+#elif defined(__APPLE__) || defined(__NetBSD__) || defined(__OpenBSD__)
+ mprotect(m_memory, m_size, PROT_READ | PROT_EXEC);
#endif
}
void unprotect()
{
-#if defined(__APPLE__)
-# if TARGET_OS_IPHONE
- mprotect(m_memory, m_size, PROT_READ | PROT_WRITE);
-# else
+#if defined(__APPLE__) && !TARGET_OS_IPHONE
pthread_jit_write_protect_np(0);
-# endif
+#elif defined(__APPLE__) || defined(__NetBSD__) || defined(__OpenBSD__)
+ mprotect(m_memory, m_size, PROT_READ | PROT_WRITE);
#endif
}
diff --git a/include/oaknut/dual_code_block.hpp b/include/oaknut/dual_code_block.hpp
new file mode 100644
index 00000000..eb6e19d9
--- /dev/null
+++ b/include/oaknut/dual_code_block.hpp
@@ -0,0 +1,165 @@
+// SPDX-FileCopyrightText: Copyright (c) 2024 merryhime <https://mary.rs>
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#include <algorithm>
+#include <cstddef>
+#include <cstdint>
+#include <new>
+
+#if defined(_WIN32)
+# define NOMINMAX
+# include <windows.h>
+#elif defined(__APPLE__)
+# include <mach/mach.h>
+# include <mach/vm_map.h>
+
+# include <TargetConditionals.h>
+# include <libkern/OSCacheControl.h>
+# include <pthread.h>
+# include <sys/mman.h>
+# include <unistd.h>
+#else
+# if !defined(_GNU_SOURCE)
+# define _GNU_SOURCE
+# endif
+# include <sys/mman.h>
+# include <sys/types.h>
+# include <unistd.h>
+#endif
+
+namespace oaknut {
+
+class DualCodeBlock {
+public:
+ explicit DualCodeBlock(std::size_t size)
+ : m_size(size)
+ {
+#if defined(_WIN32)
+ m_wmem = m_xmem = (std::uint32_t*)VirtualAlloc(nullptr, size, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
+ if (m_wmem == nullptr)
+ throw std::bad_alloc{};
+#elif defined(__APPLE__)
+ m_wmem = (std::uint32_t*)mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, -1, 0);
+ if (m_wmem == MAP_FAILED)
+ throw std::bad_alloc{};
+
+ vm_prot_t cur_prot, max_prot;
+ kern_return_t ret = vm_remap(mach_task_self(), (vm_address_t*)&m_xmem, size, 0, VM_FLAGS_ANYWHERE | VM_FLAGS_RANDOM_ADDR, mach_task_self(), (mach_vm_address_t)m_wmem, false, &cur_prot, &max_prot, VM_INHERIT_NONE);
+ if (ret != KERN_SUCCESS)
+ throw std::bad_alloc{};
+
+ mprotect(m_xmem, size, PROT_READ | PROT_EXEC);
+#else
+# if defined(__OpenBSD__)
+ char tmpl[] = "oaknut_dual_code_block.XXXXXXXXXX";
+ fd = shm_mkstemp(tmpl);
+ if (fd < 0)
+ throw std::bad_alloc{};
+ shm_unlink(tmpl);
+# else
+ fd = memfd_create("oaknut_dual_code_block", 0);
+ if (fd < 0)
+ throw std::bad_alloc{};
+# endif
+
+ int ret = ftruncate(fd, size);
+ if (ret != 0)
+ throw std::bad_alloc{};
+
+ m_wmem = (std::uint32_t*)mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+ m_xmem = (std::uint32_t*)mmap(nullptr, size, PROT_READ | PROT_EXEC, MAP_SHARED, fd, 0);
+
+ if (m_wmem == MAP_FAILED || m_xmem == MAP_FAILED)
+ throw std::bad_alloc{};
+#endif
+ }
+
+ ~DualCodeBlock()
+ {
+#if defined(_WIN32)
+ VirtualFree((void*)m_xmem, 0, MEM_RELEASE);
+#elif defined(__APPLE__)
+#else
+ munmap(m_wmem, m_size);
+ munmap(m_xmem, m_size);
+ close(fd);
+#endif
+ }
+
+ DualCodeBlock(const DualCodeBlock&) = delete;
+ DualCodeBlock& operator=(const DualCodeBlock&) = delete;
+ DualCodeBlock(DualCodeBlock&&) = delete;
+ DualCodeBlock& operator=(DualCodeBlock&&) = delete;
+
+ /// Pointer to executable mirror of memory (permissions: R-X)
+ std::uint32_t* xptr() const
+ {
+ return m_xmem;
+ }
+
+ /// Pointer to writeable mirror of memory (permissions: RW-)
+ std::uint32_t* wptr() const
+ {
+ return m_wmem;
+ }
+
+ /// Invalidate should be used with executable memory pointers.
+ void invalidate(std::uint32_t* mem, std::size_t size)
+ {
+#if defined(__APPLE__)
+ sys_icache_invalidate(mem, size);
+#elif defined(_WIN32)
+ FlushInstructionCache(GetCurrentProcess(), mem, size);
+#else
+ static std::size_t icache_line_size = 0x10000, dcache_line_size = 0x10000;
+
+ std::uint64_t ctr;
+ __asm__ volatile("mrs %0, ctr_el0"
+ : "=r"(ctr));
+
+ const std::size_t isize = icache_line_size = std::min<std::size_t>(icache_line_size, 4 << ((ctr >> 0) & 0xf));
+ const std::size_t dsize = dcache_line_size = std::min<std::size_t>(dcache_line_size, 4 << ((ctr >> 16) & 0xf));
+
+ const std::uintptr_t end = (std::uintptr_t)mem + size;
+
+ for (std::uintptr_t addr = ((std::uintptr_t)mem) & ~(dsize - 1); addr < end; addr += dsize) {
+ __asm__ volatile("dc cvau, %0"
+ :
+ : "r"(addr)
+ : "memory");
+ }
+ __asm__ volatile("dsb ish\n"
+ :
+ :
+ : "memory");
+
+ for (std::uintptr_t addr = ((std::uintptr_t)mem) & ~(isize - 1); addr < end; addr += isize) {
+ __asm__ volatile("ic ivau, %0"
+ :
+ : "r"(addr)
+ : "memory");
+ }
+ __asm__ volatile("dsb ish\nisb\n"
+ :
+ :
+ : "memory");
+#endif
+ }
+
+ void invalidate_all()
+ {
+ invalidate(m_xmem, m_size);
+ }
+
+protected:
+#if !defined(_WIN32) && !defined(__APPLE__)
+ int fd = -1;
+#endif
+ std::uint32_t* m_xmem = nullptr;
+ std::uint32_t* m_wmem = nullptr;
+ std::size_t m_size = 0;
+};
+
+} // namespace oaknut
diff --git a/include/oaknut/feature_detection/cpu_feature.hpp b/include/oaknut/feature_detection/cpu_feature.hpp
new file mode 100644
index 00000000..9f70c5b8
--- /dev/null
+++ b/include/oaknut/feature_detection/cpu_feature.hpp
@@ -0,0 +1,107 @@
+// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime <https://mary.rs>
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#include <bitset>
+#include <cstddef>
+#include <initializer_list>
+
+#if defined(__cpp_lib_constexpr_bitset) && __cpp_lib_constexpr_bitset >= 202207L
+# define OAKNUT_CPU_FEATURES_CONSTEXPR constexpr
+#else
+# define OAKNUT_CPU_FEATURES_CONSTEXPR
+#endif
+
+namespace oaknut {
+
+// NOTE: This file contains code that can be compiled on non-arm64 systems.
+// For run-time CPU feature detection, include feature_detection.hpp
+
+enum class CpuFeature {
+#define OAKNUT_CPU_FEATURE(name) name,
+#include "oaknut/impl/cpu_feature.inc.hpp"
+#undef OAKNUT_CPU_FEATURE
+};
+
+constexpr std::size_t cpu_feature_count = 0
+#define OAKNUT_CPU_FEATURE(name) +1
+#include "oaknut/impl/cpu_feature.inc.hpp"
+#undef OAKNUT_CPU_FEATURE
+ ;
+
+class CpuFeatures final {
+public:
+ constexpr CpuFeatures() = default;
+
+ OAKNUT_CPU_FEATURES_CONSTEXPR explicit CpuFeatures(std::initializer_list<CpuFeature> features)
+ {
+ for (CpuFeature f : features) {
+ m_bitset.set(static_cast<std::size_t>(f));
+ }
+ }
+
+ constexpr bool has(CpuFeature feature) const
+ {
+ if (static_cast<std::size_t>(feature) >= cpu_feature_count)
+ return false;
+ return m_bitset[static_cast<std::size_t>(feature)];
+ }
+
+ OAKNUT_CPU_FEATURES_CONSTEXPR CpuFeatures& operator&=(const CpuFeatures& other) noexcept
+ {
+ m_bitset &= other.m_bitset;
+ return *this;
+ }
+
+ OAKNUT_CPU_FEATURES_CONSTEXPR CpuFeatures& operator|=(const CpuFeatures& other) noexcept
+ {
+ m_bitset |= other.m_bitset;
+ return *this;
+ }
+
+ OAKNUT_CPU_FEATURES_CONSTEXPR CpuFeatures& operator^=(const CpuFeatures& other) noexcept
+ {
+ m_bitset ^= other.m_bitset;
+ return *this;
+ }
+
+ OAKNUT_CPU_FEATURES_CONSTEXPR CpuFeatures operator~() const noexcept
+ {
+ CpuFeatures result;
+ result.m_bitset = ~m_bitset;
+ return result;
+ }
+
+private:
+ using bitset = std::bitset<cpu_feature_count>;
+
+ friend OAKNUT_CPU_FEATURES_CONSTEXPR CpuFeatures operator&(const CpuFeatures& a, const CpuFeatures& b) noexcept;
+ friend OAKNUT_CPU_FEATURES_CONSTEXPR CpuFeatures operator|(const CpuFeatures& a, const CpuFeatures& b) noexcept;
+ friend OAKNUT_CPU_FEATURES_CONSTEXPR CpuFeatures operator^(const CpuFeatures& a, const CpuFeatures& b) noexcept;
+
+ bitset m_bitset;
+};
+
+OAKNUT_CPU_FEATURES_CONSTEXPR CpuFeatures operator&(const CpuFeatures& a, const CpuFeatures& b) noexcept
+{
+ CpuFeatures result;
+ result.m_bitset = a.m_bitset & b.m_bitset;
+ return result;
+}
+
+OAKNUT_CPU_FEATURES_CONSTEXPR CpuFeatures operator|(const CpuFeatures& a, const CpuFeatures& b) noexcept
+{
+ CpuFeatures result;
+ result.m_bitset = a.m_bitset | b.m_bitset;
+ return result;
+}
+
+OAKNUT_CPU_FEATURES_CONSTEXPR CpuFeatures operator^(const CpuFeatures& a, const CpuFeatures& b) noexcept
+{
+ CpuFeatures result;
+ result.m_bitset = a.m_bitset ^ b.m_bitset;
+ return result;
+}
+
+} // namespace oaknut
diff --git a/include/oaknut/feature_detection/feature_detection.hpp b/include/oaknut/feature_detection/feature_detection.hpp
new file mode 100644
index 00000000..1961864d
--- /dev/null
+++ b/include/oaknut/feature_detection/feature_detection.hpp
@@ -0,0 +1,35 @@
+// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime <https://mary.rs>
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#if defined(__APPLE__)
+# define OAKNUT_CPU_FEATURE_DETECTION 1
+# define OAKNUT_SUPPORTS_READING_ID_REGISTERS 0
+# include "oaknut/feature_detection/feature_detection_apple.hpp"
+#elif defined(__FreeBSD__)
+# define OAKNUT_CPU_FEATURE_DETECTION 1
+# define OAKNUT_SUPPORTS_READING_ID_REGISTERS 1
+# include "oaknut/feature_detection/feature_detection_freebsd.hpp"
+#elif defined(__linux__)
+# define OAKNUT_CPU_FEATURE_DETECTION 1
+# define OAKNUT_SUPPORTS_READING_ID_REGISTERS 1
+# include "oaknut/feature_detection/feature_detection_linux.hpp"
+#elif defined(__NetBSD__)
+# define OAKNUT_CPU_FEATURE_DETECTION 1
+# define OAKNUT_SUPPORTS_READING_ID_REGISTERS 2
+# include "oaknut/feature_detection/feature_detection_netbsd.hpp"
+#elif defined(__OpenBSD__)
+# define OAKNUT_CPU_FEATURE_DETECTION 1
+# define OAKNUT_SUPPORTS_READING_ID_REGISTERS 1
+# include "oaknut/feature_detection/feature_detection_openbsd.hpp"
+#elif defined(_WIN32)
+# define OAKNUT_CPU_FEATURE_DETECTION 1
+# define OAKNUT_SUPPORTS_READING_ID_REGISTERS 2
+# include "oaknut/feature_detection/feature_detection_w32.hpp"
+#else
+# define OAKNUT_CPU_FEATURE_DETECTION 0
+# define OAKNUT_SUPPORTS_READING_ID_REGISTERS 0
+# warning "Unsupported operating system for CPU feature detection"
+# include "oaknut/feature_detection/feature_detection_generic.hpp"
+#endif
diff --git a/include/oaknut/feature_detection/feature_detection_apple.hpp b/include/oaknut/feature_detection/feature_detection_apple.hpp
new file mode 100644
index 00000000..4c17825a
--- /dev/null
+++ b/include/oaknut/feature_detection/feature_detection_apple.hpp
@@ -0,0 +1,112 @@
+// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime <https://mary.rs>
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#include <cstddef>
+#include <optional>
+
+#include <sys/sysctl.h>
+
+#include "oaknut/feature_detection/cpu_feature.hpp"
+#include "oaknut/feature_detection/id_registers.hpp"
+
+namespace oaknut {
+
+// Ref: https://developer.apple.com/documentation/kernel/1387446-sysctlbyname/determining_instruction_set_characteristics
+
+namespace detail {
+
+inline bool detect_feature(const char* const sysctl_name)
+{
+ int result = 0;
+ std::size_t result_size = sizeof(result);
+ if (::sysctlbyname(sysctl_name, &result, &result_size, nullptr, 0) == 0) {
+ return result != 0;
+ }
+ return false;
+}
+
+} // namespace detail
+
+inline CpuFeatures detect_features_via_sysctlbyname()
+{
+ CpuFeatures result;
+
+ if (detail::detect_feature("hw.optional.AdvSIMD") || detail::detect_feature("hw.optional.neon"))
+ result |= CpuFeatures{CpuFeature::ASIMD};
+ if (detail::detect_feature("hw.optional.floatingpoint"))
+ result |= CpuFeatures{CpuFeature::FP};
+ if (detail::detect_feature("hw.optional.AdvSIMD_HPFPCvt") || detail::detect_feature("hw.optional.neon_hpfp"))
+ result |= CpuFeatures{CpuFeature::FP16Conv};
+ if (detail::detect_feature("hw.optional.arm.FEAT_BF16"))
+ result |= CpuFeatures{CpuFeature::BF16};
+ if (detail::detect_feature("hw.optional.arm.FEAT_DotProd"))
+ result |= CpuFeatures{CpuFeature::DotProd};
+ if (detail::detect_feature("hw.optional.arm.FEAT_FCMA") || detail::detect_feature("hw.optional.armv8_3_compnum"))
+ result |= CpuFeatures{CpuFeature::FCMA};
+ if (detail::detect_feature("hw.optional.arm.FEAT_FHM") || detail::detect_feature("hw.optional.armv8_2_fhm"))
+ result |= CpuFeatures{CpuFeature::FHM};
+ if (detail::detect_feature("hw.optional.arm.FEAT_FP16") || detail::detect_feature("hw.optional.neon_fp16"))
+ result |= CpuFeatures{CpuFeature::FP16};
+ if (detail::detect_feature("hw.optional.arm.FEAT_FRINTTS"))
+ result |= CpuFeatures{CpuFeature::FRINTTS};
+ if (detail::detect_feature("hw.optional.arm.FEAT_I8MM"))
+ result |= CpuFeatures{CpuFeature::I8MM};
+ if (detail::detect_feature("hw.optional.arm.FEAT_JSCVT"))
+ result |= CpuFeatures{CpuFeature::JSCVT};
+ if (detail::detect_feature("hw.optional.arm.FEAT_RDM"))
+ result |= CpuFeatures{CpuFeature::RDM};
+ if (detail::detect_feature("hw.optional.arm.FEAT_FlagM"))
+ result |= CpuFeatures{CpuFeature::FlagM};
+ if (detail::detect_feature("hw.optional.arm.FEAT_FlagM2"))
+ result |= CpuFeatures{CpuFeature::FlagM2};
+ if (detail::detect_feature("hw.optional.armv8_crc32"))
+ result |= CpuFeatures{CpuFeature::CRC32};
+ if (detail::detect_feature("hw.optional.arm.FEAT_LRCPC"))
+ result |= CpuFeatures{CpuFeature::LRCPC};
+ if (detail::detect_feature("hw.optional.arm.FEAT_LRCPC2"))
+ result |= CpuFeatures{CpuFeature::LRCPC2};
+ if (detail::detect_feature("hw.optional.arm.FEAT_LSE") || detail::detect_feature("hw.optional.armv8_1_atomics"))
+ result |= CpuFeatures{CpuFeature::LSE};
+ if (detail::detect_feature("hw.optional.arm.FEAT_LSE2"))
+ result |= CpuFeatures{CpuFeature::LSE2};
+ if (detail::detect_feature("hw.optional.arm.FEAT_AES"))
+ result |= CpuFeatures{CpuFeature::AES};
+ if (detail::detect_feature("hw.optional.arm.FEAT_PMULL"))
+ result |= CpuFeatures{CpuFeature::PMULL};
+ if (detail::detect_feature("hw.optional.arm.FEAT_SHA1"))
+ result |= CpuFeatures{CpuFeature::SHA1};
+ if (detail::detect_feature("hw.optional.arm.FEAT_SHA256"))
+ result |= CpuFeatures{CpuFeature::SHA256};
+ if (detail::detect_feature("hw.optional.arm.FEAT_SHA512") || detail::detect_feature("hw.optional.armv8_2_sha512"))
+ result |= CpuFeatures{CpuFeature::SHA512};
+ if (detail::detect_feature("hw.optional.arm.FEAT_SHA3") || detail::detect_feature("hw.optional.armv8_2_sha3"))
+ result |= CpuFeatures{CpuFeature::SHA3};
+ if (detail::detect_feature("hw.optional.arm.FEAT_BTI"))
+ result |= CpuFeatures{CpuFeature::BTI};
+ if (detail::detect_feature("hw.optional.arm.FEAT_DPB"))
+ result |= CpuFeatures{CpuFeature::DPB};
+ if (detail::detect_feature("hw.optional.arm.FEAT_DPB2"))
+ result |= CpuFeatures{CpuFeature::DPB2};
+ if (detail::detect_feature("hw.optional.arm.FEAT_ECV"))
+ result |= CpuFeatures{CpuFeature::ECV};
+ if (detail::detect_feature("hw.optional.arm.FEAT_SB"))
+ result |= CpuFeatures{CpuFeature::SB};
+ if (detail::detect_feature("hw.optional.arm.FEAT_SSBS"))
+ result |= CpuFeatures{CpuFeature::SSBS};
+
+ return result;
+}
+
+inline CpuFeatures detect_features()
+{
+ return detect_features_via_sysctlbyname();
+}
+
+inline std::optional<id::IdRegisters> read_id_registers()
+{
+ return std::nullopt;
+}
+
+} // namespace oaknut
diff --git a/include/oaknut/feature_detection/feature_detection_freebsd.hpp b/include/oaknut/feature_detection/feature_detection_freebsd.hpp
new file mode 100644
index 00000000..efb3c669
--- /dev/null
+++ b/include/oaknut/feature_detection/feature_detection_freebsd.hpp
@@ -0,0 +1,62 @@
+// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime <https://mary.rs>
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#include <cstdint>
+#include <optional>
+
+#include <sys/auxv.h>
+#include <sys/param.h>
+
+#include "oaknut/feature_detection/cpu_feature.hpp"
+#include "oaknut/feature_detection/feature_detection_hwcaps.hpp"
+#include "oaknut/feature_detection/id_registers.hpp"
+#include "oaknut/feature_detection/read_id_registers_directly.hpp"
+
+#ifndef AT_HWCAP
+# define AT_HWCAP 16
+#endif
+#ifndef AT_HWCAP2
+# define AT_HWCAP2 26
+#endif
+
+#if __FreeBSD_version < 1300114
+# error "Incompatible ABI change (incorrect HWCAP definitions on earlier FreeBSD versions)"
+#endif
+
+namespace oaknut {
+
+namespace detail {
+
+inline unsigned long getauxval(int aux)
+{
+ unsigned long result = 0;
+ if (::elf_aux_info(aux, &result, static_cast<int>(sizeof result)) == 0) {
+ return result;
+ }
+ return 0;
+}
+
+} // namespace detail
+
+inline CpuFeatures detect_features_via_hwcap()
+{
+ const unsigned long hwcap = detail::getauxval(AT_HWCAP);
+ const unsigned long hwcap2 = detail::getauxval(AT_HWCAP2);
+ return detect_features_via_hwcap(hwcap, hwcap2);
+}
+
+inline std::optional<id::IdRegisters> read_id_registers()
+{
+ // HWCAP_CPUID is falsely not set on many FreeBSD kernel versions,
+ // so we don't bother checking it.
+ return id::read_id_registers_directly();
+}
+
+inline CpuFeatures detect_features()
+{
+ return detect_features_via_hwcap();
+}
+
+} // namespace oaknut
diff --git a/include/oaknut/feature_detection/feature_detection_generic.hpp b/include/oaknut/feature_detection/feature_detection_generic.hpp
new file mode 100644
index 00000000..405a9b6a
--- /dev/null
+++ b/include/oaknut/feature_detection/feature_detection_generic.hpp
@@ -0,0 +1,23 @@
+// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime <https://mary.rs>
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#include <optional>
+
+#include "oaknut/feature_detection/cpu_feature.hpp"
+#include "oaknut/feature_detection/id_registers.hpp"
+
+namespace oaknut {
+
+inline CpuFeatures detect_features()
+{
+ return CpuFeatures{CpuFeature::FP, CpuFeature::ASIMD};
+}
+
+inline std::optional<id::IdRegisters> read_id_registers()
+{
+ return std::nullopt;
+}
+
+} // namespace oaknut
diff --git a/include/oaknut/feature_detection/feature_detection_hwcaps.hpp b/include/oaknut/feature_detection/feature_detection_hwcaps.hpp
new file mode 100644
index 00000000..09855258
--- /dev/null
+++ b/include/oaknut/feature_detection/feature_detection_hwcaps.hpp
@@ -0,0 +1,120 @@
+// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime <https://mary.rs>
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#include <cstddef>
+
+#include "oaknut/feature_detection/cpu_feature.hpp"
+
+namespace oaknut {
+
+namespace detail {
+
+template<std::size_t... bits>
+constexpr bool bit_test(unsigned long value)
+{
+ return (((value >> bits) & 1) && ...);
+}
+
+} // namespace detail
+
+inline CpuFeatures detect_features_via_hwcap(unsigned long hwcap, unsigned long hwcap2)
+{
+ CpuFeatures result;
+
+#define OAKNUT_DETECT_CAP(FEAT, ...) \
+ if (detail::bit_test<__VA_ARGS__>(hwcap)) { \
+ result |= CpuFeatures{CpuFeature::FEAT}; \
+ }
+#define OAKNUT_DETECT_CAP2(FEAT, ...) \
+ if (detail::bit_test<__VA_ARGS__>(hwcap2)) { \
+ result |= CpuFeatures{CpuFeature::FEAT}; \
+ }
+
+ OAKNUT_DETECT_CAP(FP, 0) // HWCAP_FP
+ OAKNUT_DETECT_CAP(ASIMD, 1) // HWCAP_ASIMD
+ // HWCAP_EVTSTRM (2)
+ OAKNUT_DETECT_CAP(AES, 3) // HWCAP_AES
+ OAKNUT_DETECT_CAP(PMULL, 4) // HWCAP_PMULL
+ OAKNUT_DETECT_CAP(SHA1, 5) // HWCAP_SHA1
+ OAKNUT_DETECT_CAP(SHA256, 6) // HWCAP_SHA2
+ OAKNUT_DETECT_CAP(CRC32, 7) // HWCAP_CRC32
+ OAKNUT_DETECT_CAP(LSE, 8) // HWCAP_ATOMICS
+ OAKNUT_DETECT_CAP(FP16Conv, 9, 10) // HWCAP_FPHP && HWCAP_ASIMDHP
+ OAKNUT_DETECT_CAP(FP16, 9, 10) // HWCAP_FPHP && HWCAP_ASIMDHP
+ // HWCAP_CPUID (11)
+ OAKNUT_DETECT_CAP(RDM, 12) // HWCAP_ASIMDRDM
+ OAKNUT_DETECT_CAP(JSCVT, 13) // HWCAP_JSCVT
+ OAKNUT_DETECT_CAP(FCMA, 14) // HWCAP_FCMA
+ OAKNUT_DETECT_CAP(LRCPC, 15) // HWCAP_LRCPC
+ OAKNUT_DETECT_CAP(DPB, 16) // HWCAP_DCPOP
+ OAKNUT_DETECT_CAP(SHA3, 17) // HWCAP_SHA3
+ OAKNUT_DETECT_CAP(SM3, 18) // HWCAP_SM3
+ OAKNUT_DETECT_CAP(SM4, 19) // HWCAP_SM4
+ OAKNUT_DETECT_CAP(DotProd, 20) // HWCAP_ASIMDDP
+ OAKNUT_DETECT_CAP(SHA512, 21) // HWCAP_SHA512
+ OAKNUT_DETECT_CAP(SVE, 22) // HWCAP_SVE
+ OAKNUT_DETECT_CAP(FHM, 23) // HWCAP_ASIMDFHM
+ OAKNUT_DETECT_CAP(DIT, 24) // HWCAP_DIT
+ OAKNUT_DETECT_CAP(LSE2, 25) // HWCAP_USCAT
+ OAKNUT_DETECT_CAP(LRCPC2, 26) // HWCAP_ILRCPC
+ OAKNUT_DETECT_CAP(FlagM, 27) // HWCAP_FLAGM
+ OAKNUT_DETECT_CAP(SSBS, 28) // HWCAP_SSBS
+ OAKNUT_DETECT_CAP(SB, 29) // HWCAP_SB
+ OAKNUT_DETECT_CAP(PACA, 30) // HWCAP_PACA
+ OAKNUT_DETECT_CAP(PACG, 31) // HWCAP_PACG
+
+ OAKNUT_DETECT_CAP2(DPB2, 0) // HWCAP2_DCPODP
+ OAKNUT_DETECT_CAP2(SVE2, 1) // HWCAP2_SVE2
+ OAKNUT_DETECT_CAP2(SVE_AES, 2) // HWCAP2_SVEAES
+ OAKNUT_DETECT_CAP2(SVE_PMULL128, 3) // HWCAP2_SVEPMULL
+ OAKNUT_DETECT_CAP2(SVE_BITPERM, 4) // HWCAP2_SVEBITPERM
+ OAKNUT_DETECT_CAP2(SVE_SHA3, 5) // HWCAP2_SVESHA3
+ OAKNUT_DETECT_CAP2(SVE_SM4, 6) // HWCAP2_SVESM4
+ OAKNUT_DETECT_CAP2(FlagM2, 7) // HWCAP2_FLAGM2
+ OAKNUT_DETECT_CAP2(FRINTTS, 8) // HWCAP2_FRINT
+ OAKNUT_DETECT_CAP2(SVE_I8MM, 9) // HWCAP2_SVEI8MM
+ OAKNUT_DETECT_CAP2(SVE_F32MM, 10) // HWCAP2_SVEF32MM
+ OAKNUT_DETECT_CAP2(SVE_F64MM, 11) // HWCAP2_SVEF64MM
+ OAKNUT_DETECT_CAP2(SVE_BF16, 12) // HWCAP2_SVEBF16
+ OAKNUT_DETECT_CAP2(I8MM, 13) // HWCAP2_I8MM
+ OAKNUT_DETECT_CAP2(BF16, 14) // HWCAP2_BF16
+ OAKNUT_DETECT_CAP2(DGH, 15) // HWCAP2_DGH
+ OAKNUT_DETECT_CAP2(RNG, 16) // HWCAP2_RNG
+ OAKNUT_DETECT_CAP2(BTI, 17) // HWCAP2_BTI
+ OAKNUT_DETECT_CAP2(MTE, 18) // HWCAP2_MTE
+ OAKNUT_DETECT_CAP2(ECV, 19) // HWCAP2_ECV
+ OAKNUT_DETECT_CAP2(AFP, 20) // HWCAP2_AFP
+ OAKNUT_DETECT_CAP2(RPRES, 21) // HWCAP2_RPRES
+ OAKNUT_DETECT_CAP2(MTE3, 22) // HWCAP2_MTE3
+ OAKNUT_DETECT_CAP2(SME, 23) // HWCAP2_SME
+ OAKNUT_DETECT_CAP2(SME_I16I64, 24) // HWCAP2_SME_I16I64
+ OAKNUT_DETECT_CAP2(SME_F64F64, 25) // HWCAP2_SME_F64F64
+ OAKNUT_DETECT_CAP2(SME_I8I32, 26) // HWCAP2_SME_I8I32
+ OAKNUT_DETECT_CAP2(SME_F16F32, 27) // HWCAP2_SME_F16F32
+ OAKNUT_DETECT_CAP2(SME_B16F32, 28) // HWCAP2_SME_B16F32
+ OAKNUT_DETECT_CAP2(SME_F32F32, 29) // HWCAP2_SME_F32F32
+ OAKNUT_DETECT_CAP2(SME_FA64, 30) // HWCAP2_SME_FA64
+ OAKNUT_DETECT_CAP2(WFxT, 31) // HWCAP2_WFXT
+ OAKNUT_DETECT_CAP2(EBF16, 32) // HWCAP2_EBF16
+ OAKNUT_DETECT_CAP2(SVE_EBF16, 33) // HWCAP2_SVE_EBF16
+ OAKNUT_DETECT_CAP2(CSSC, 34) // HWCAP2_CSSC
+ OAKNUT_DETECT_CAP2(RPRFM, 35) // HWCAP2_RPRFM
+ OAKNUT_DETECT_CAP2(SVE2p1, 36) // HWCAP2_SVE2P1
+ OAKNUT_DETECT_CAP2(SME2, 37) // HWCAP2_SME2
+ OAKNUT_DETECT_CAP2(SME2p1, 38) // HWCAP2_SME2P1
+ OAKNUT_DETECT_CAP2(SME_I16I32, 39) // HWCAP2_SME_I16I32
+ OAKNUT_DETECT_CAP2(SME_BI32I32, 40) // HWCAP2_SME_BI32I32
+ OAKNUT_DETECT_CAP2(SME_B16B16, 41) // HWCAP2_SME_B16B16
+ OAKNUT_DETECT_CAP2(SME_F16F16, 42) // HWCAP2_SME_F16F16
+ OAKNUT_DETECT_CAP2(MOPS, 43) // HWCAP2_MOPS
+ OAKNUT_DETECT_CAP2(HBC, 44) // HWCAP2_HBC
+
+#undef OAKNUT_DETECT_CAP
+#undef OAKNUT_DETECT_CAP2
+
+ return result;
+}
+
+} // namespace oaknut
diff --git a/include/oaknut/feature_detection/feature_detection_idregs.hpp b/include/oaknut/feature_detection/feature_detection_idregs.hpp
new file mode 100644
index 00000000..c26e7a92
--- /dev/null
+++ b/include/oaknut/feature_detection/feature_detection_idregs.hpp
@@ -0,0 +1,167 @@
+// SPDX-FileCopyrightText: Copyright (c) 2022 merryhime <https://mary.rs>
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#include "oaknut/feature_detection/cpu_feature.hpp"
+#include "oaknut/feature_detection/id_registers.hpp"
+
+namespace oaknut {
+
+CpuFeatures detect_features_via_id_registers(id::IdRegisters regs)
+{
+ CpuFeatures result;
+
+ if (regs.pfr0.FP() >= 0)
+ result |= CpuFeatures{CpuFeature::FP};
+ if (regs.pfr0.AdvSIMD() >= 0)
+ result |= CpuFeatures{CpuFeature::ASIMD};
+ if (regs.isar0.AES() >= 1)
+ result |= CpuFeatures{CpuFeature::AES};
+ if (regs.isar0.AES() >= 2)
+ result |= CpuFeatures{CpuFeature::PMULL};
+ if (regs.isar0.SHA1() >= 1)
+ result |= CpuFeatures{CpuFeature::SHA1};
+ if (regs.isar0.SHA2() >= 1)
+ result |= CpuFeatures{CpuFeature::SHA256};
+ if (regs.isar0.CRC32() >= 1)
+ result |= CpuFeatures{CpuFeature::CRC32};
+ if (regs.isar0.Atomic() >= 2)
+ result |= CpuFeatures{CpuFeature::LSE};
+ if (regs.pfr0.FP() >= 1 && regs.pfr0.AdvSIMD() >= 1)
+ result |= CpuFeatures{CpuFeature::FP16Conv, CpuFeature::FP16};
+ if (regs.isar0.RDM() >= 1)
+ result |= CpuFeatures{CpuFeature::RDM};
+ if (regs.isar1.JSCVT() >= 1)
+ result |= CpuFeatures{CpuFeature::JSCVT};
+ if (regs.isar1.FCMA() >= 1)
+ result |= CpuFeatures{CpuFeature::FCMA};
+ if (regs.isar1.LRCPC() >= 1)
+ result |= CpuFeatures{CpuFeature::LRCPC};
+ if (regs.isar1.DPB() >= 1)
+ result |= CpuFeatures{CpuFeature::DPB};
+ if (regs.isar0.SHA3() >= 1)
+ result |= CpuFeatures{CpuFeature::SHA3};
+ if (regs.isar0.SM3() >= 1)
+ result |= CpuFeatures{CpuFeature::SM3};
+ if (regs.isar0.SM4() >= 1)
+ result |= CpuFeatures{CpuFeature::SM4};
+ if (regs.isar0.DP() >= 1)
+ result |= CpuFeatures{CpuFeature::DotProd};
+ if (regs.isar0.SHA2() >= 2)
+ result |= CpuFeatures{CpuFeature::SHA512};
+ if (regs.pfr0.SVE() >= 1)
+ result |= CpuFeatures{CpuFeature::SVE};
+ if (regs.isar0.FHM() >= 1)
+ result |= CpuFeatures{CpuFeature::FHM};
+ if (regs.pfr0.DIT() >= 1)
+ result |= CpuFeatures{CpuFeature::DIT};
+ if (regs.mmfr2.AT() >= 1)
+ result |= CpuFeatures{CpuFeature::LSE2};
+ if (regs.isar1.LRCPC() >= 2)
+ result |= CpuFeatures{CpuFeature::LRCPC2};
+ if (regs.isar0.TS() >= 1)
+ result |= CpuFeatures{CpuFeature::FlagM};
+ if (regs.pfr1.SSBS() >= 2)
+ result |= CpuFeatures{CpuFeature::SSBS};
+ if (regs.isar1.SB() >= 1)
+ result |= CpuFeatures{CpuFeature::SB};
+ if (regs.isar1.APA() >= 1 || regs.isar1.API() >= 1)
+ result |= CpuFeatures{CpuFeature::PACA};
+ if (regs.isar1.GPA() >= 1 || regs.isar1.GPI() >= 1)
+ result |= CpuFeatures{CpuFeature::PACG};
+ if (regs.isar1.DPB() >= 2)
+ result |= CpuFeatures{CpuFeature::DPB2};
+ if (regs.zfr0.SVEver() >= 1)
+ result |= CpuFeatures{CpuFeature::SVE2};
+ if (regs.zfr0.AES() >= 1)
+ result |= CpuFeatures{CpuFeature::SVE_AES};
+ if (regs.zfr0.AES() >= 2)
+ result |= CpuFeatures{CpuFeature::SVE_PMULL128};
+ if (regs.zfr0.BitPerm() >= 1)
+ result |= CpuFeatures{CpuFeature::SVE_BITPERM};
+ if (regs.zfr0.SHA3() >= 1)
+ result |= CpuFeatures{CpuFeature::SVE_SHA3};
+ if (regs.zfr0.SM4() >= 1)
+ result |= CpuFeatures{CpuFeature::SVE_SM4};
+ if (regs.isar0.TS() >= 2)
+ result |= CpuFeatures{CpuFeature::FlagM2};
+ if (regs.isar1.FRINTTS() >= 1)
+ result |= CpuFeatures{CpuFeature::FRINTTS};
+ if (regs.zfr0.I8MM() >= 1)
+ result |= CpuFeatures{CpuFeature::SVE_I8MM};
+ if (regs.zfr0.F32MM() >= 1)
+ result |= CpuFeatures{CpuFeature::SVE_F32MM};
+ if (regs.zfr0.F64MM() >= 1)
+ result |= CpuFeatures{CpuFeature::SVE_F64MM};
+ if (regs.zfr0.BF16() >= 1)
+ result |= CpuFeatures{CpuFeature::SVE_BF16};
+ if (regs.isar1.I8MM() >= 1)
+ result |= CpuFeatures{CpuFeature::I8MM};
+ if (regs.isar1.BF16() >= 1)
+ result |= CpuFeatures{CpuFeature::BF16};
+ if (regs.isar1.DGH() >= 1)
+ result |= CpuFeatures{CpuFeature::DGH};
+ if (regs.isar0.RNDR() >= 1)
+ result |= CpuFeatures{CpuFeature::RNG};
+ if (regs.pfr1.BT() >= 1)
+ result |= CpuFeatures{CpuFeature::BTI};
+ if (regs.pfr1.MTE() >= 2)
+ result |= CpuFeatures{CpuFeature::MTE};
+ if (regs.mmfr0.ECV() >= 1)
+ result |= CpuFeatures{CpuFeature::ECV};
+ if (regs.mmfr1.AFP() >= 1)
+ result |= CpuFeatures{CpuFeature::AFP};
+ if (regs.isar2.RPRES() >= 1)
+ result |= CpuFeatures{CpuFeature::RPRES};
+ if (regs.pfr1.MTE() >= 3)
+ result |= CpuFeatures{CpuFeature::MTE3};
+ if (regs.pfr1.SME() >= 1)
+ result |= CpuFeatures{CpuFeature::SME};
+ if (regs.smfr0.I16I64() == 0b1111)
+ result |= CpuFeatures{CpuFeature::SME_I16I64};
+ if (regs.smfr0.F64F64() == 0b1)
+ result |= CpuFeatures{CpuFeature::SME_F64F64};
+ if (regs.smfr0.I8I32() == 0b1111)
+ result |= CpuFeatures{CpuFeature::SME_I8I32};
+ if (regs.smfr0.F16F32() == 0b1)
+ result |= CpuFeatures{CpuFeature::SME_F16F32};
+ if (regs.smfr0.B16F32() == 0b1)
+ result |= CpuFeatures{CpuFeature::SME_B16F32};
+ if (regs.smfr0.F32F32() == 0b1)
+ result |= CpuFeatures{CpuFeature::SME_F32F32};
+ if (regs.smfr0.FA64() == 0b1)
+ result |= CpuFeatures{CpuFeature::SME_FA64};
+ if (regs.isar2.WFxT() >= 2)
+ result |= CpuFeatures{CpuFeature::WFxT};
+ if (regs.isar1.BF16() >= 2)
+ result |= CpuFeatures{CpuFeature::EBF16};
+ if (regs.zfr0.BF16() >= 2)
+ result |= CpuFeatures{CpuFeature::SVE_EBF16};
+ if (regs.isar2.CSSC() >= 1)
+ result |= CpuFeatures{CpuFeature::CSSC};
+ if (regs.isar2.RPRFM() >= 1)
+ result |= CpuFeatures{CpuFeature::RPRFM};
+ if (regs.zfr0.SVEver() >= 2)
+ result |= CpuFeatures{CpuFeature::SVE2p1};
+ if (regs.smfr0.SMEver() >= 1)
+ result |= CpuFeatures{CpuFeature::SME2};
+ if (regs.smfr0.SMEver() >= 2)
+ result |= CpuFeatures{CpuFeature::SME2p1};
+ if (regs.smfr0.I16I32() == 0b0101)
+ result |= CpuFeatures{CpuFeature::SME_I16I32};
+ if (regs.smfr0.BI32I32() == 0b1)
+ result |= CpuFeatures{CpuFeature::SME_BI32I32};
+ if (regs.smfr0.B16B16() == 0b1)
+ result |= CpuFeatures{CpuFeature::SME_B16B16};
+ if (regs.smfr0.F16F16() == 0b1)
+ result |= CpuFeatures{CpuFeature::SME_F16F16};
+ if (regs.isar2.MOPS() >= 1)
+ result |= CpuFeatures{CpuFeature::MOPS};
+ if (regs.isar2.BC() >= 1)
+ result |= CpuFeatures{CpuFeature::HBC};
+
+ return result;
+}
+
+} // namespace oaknut
diff --git a/include/oaknut/feature_detection/feature_detection_linux.hpp b/include/oaknut/feature_detection/feature_detection_linux.hpp
new file mode 100644
index 00000000..6310eaca
--- /dev/null
+++ b/include/oaknut/feature_detection/feature_detection_linux.hpp
@@ -0,0 +1,45 @@
+// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime <https://mary.rs>
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#include <optional>
+
+#include <sys/auxv.h>
+
+#include "oaknut/feature_detection/cpu_feature.hpp"
+#include "oaknut/feature_detection/feature_detection_hwcaps.hpp"
+#include "oaknut/feature_detection/id_registers.hpp"
+#include "oaknut/feature_detection/read_id_registers_directly.hpp"
+
+#ifndef AT_HWCAP
+# define AT_HWCAP 16
+#endif
+#ifndef AT_HWCAP2
+# define AT_HWCAP2 26
+#endif
+
+namespace oaknut {
+
+inline CpuFeatures detect_features_via_hwcap()
+{
+ const unsigned long hwcap = ::getauxval(AT_HWCAP);
+ const unsigned long hwcap2 = ::getauxval(AT_HWCAP2);
+ return detect_features_via_hwcap(hwcap, hwcap2);
+}
+
+inline CpuFeatures detect_features()
+{
+ return detect_features_via_hwcap();
+}
+
+inline std::optional<id::IdRegisters> read_id_registers()
+{
+ constexpr unsigned long hwcap_cpuid = (1 << 11);
+ if (::getauxval(AT_HWCAP) & hwcap_cpuid) {
+ return id::read_id_registers_directly();
+ }
+ return std::nullopt;
+}
+
+} // namespace oaknut
diff --git a/include/oaknut/feature_detection/feature_detection_netbsd.hpp b/include/oaknut/feature_detection/feature_detection_netbsd.hpp
new file mode 100644
index 00000000..cdb1deb1
--- /dev/null
+++ b/include/oaknut/feature_detection/feature_detection_netbsd.hpp
@@ -0,0 +1,81 @@
+// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime <https://mary.rs>
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#include <array>
+#include <cstddef>
+#include <cstdint>
+#include <optional>
+#include <string>
+
+#include <aarch64/armreg.h>
+#include <sys/param.h>
+#include <sys/sysctl.h>
+
+#include "oaknut/feature_detection/cpu_feature.hpp"
+#include "oaknut/feature_detection/feature_detection_hwcaps.hpp"
+#include "oaknut/feature_detection/feature_detection_idregs.hpp"
+#include "oaknut/feature_detection/id_registers.hpp"
+
+namespace oaknut {
+
+inline std::optional<id::IdRegisters> read_id_registers(std::size_t core_index)
+{
+ const std::string path = "machdep.cpu" + std::to_string(core_index) + ".cpu_id";
+
+ aarch64_sysctl_cpu_id id;
+ std::size_t id_len = sizeof id;
+
+ if (sysctlbyname(path.c_str(), &id, &id_len, nullptr, 0) < 0)
+ return std::nullopt;
+
+ return id::IdRegisters{
+ id.ac_midr,
+ id::Pfr0Register{id.ac_aa64pfr0},
+ id::Pfr1Register{id.ac_aa64pfr1},
+ id::Pfr2Register{0},
+ id::Zfr0Register{id.ac_aa64zfr0},
+ id::Smfr0Register{0},
+ id::Isar0Register{id.ac_aa64isar0},
+ id::Isar1Register{id.ac_aa64isar1},
+ id::Isar2Register{0},
+ id::Isar3Register{0},
+ id::Mmfr0Register{id.ac_aa64mmfr0},
+ id::Mmfr1Register{id.ac_aa64mmfr1},
+ id::Mmfr2Register{id.ac_aa64mmfr2},
+ id::Mmfr3Register{0},
+ id::Mmfr4Register{0},
+ };
+}
+
+inline std::size_t get_core_count()
+{
+ int result = 0;
+ size_t result_size = sizeof(result);
+ const std::array<int, 2> mib{CTL_HW, HW_NCPU};
+ if (sysctl(mib.data(), mib.size(), &result, &result_size, nullptr, 0) < 0)
+ return 0;
+ return result;
+}
+
+inline CpuFeatures detect_features()
+{
+ std::optional<CpuFeatures> result;
+
+ const std::size_t core_count = get_core_count();
+ for (std::size_t core_index = 0; core_index < core_count; core_index++) {
+ if (const std::optional<id::IdRegisters> id_regs = read_id_registers(core_index)) {
+ const CpuFeatures current_features = detect_features_via_id_registers(*id_regs);
+ if (result) {
+ result = *result & current_features;
+ } else {
+ result = current_features;
+ }
+ }
+ }
+
+ return result.value_or(CpuFeatures{});
+}
+
+} // namespace oaknut
diff --git a/include/oaknut/feature_detection/feature_detection_openbsd.hpp b/include/oaknut/feature_detection/feature_detection_openbsd.hpp
new file mode 100644
index 00000000..8514a2bf
--- /dev/null
+++ b/include/oaknut/feature_detection/feature_detection_openbsd.hpp
@@ -0,0 +1,63 @@
+// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime <https://mary.rs>
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#include <array>
+#include <cstddef>
+#include <cstdint>
+#include <optional>
+
+#include <sys/sysctl.h>
+#include <sys/types.h>
+
+#include "oaknut/feature_detection/cpu_feature.hpp"
+#include "oaknut/feature_detection/feature_detection_hwcaps.hpp"
+#include "oaknut/feature_detection/feature_detection_idregs.hpp"
+#include "oaknut/feature_detection/id_registers.hpp"
+
+namespace oaknut {
+
+namespace detail {
+
+inline std::uint64_t read_id_register(int index)
+{
+ uint64_t result = 0;
+ size_t result_size = sizeof(result);
+ std::array<int, 2> mib{CTL_MACHDEP, index};
+ if (sysctl(mib.data(), mib.size(), &result, &result_size, nullptr, 0) < 0)
+ return 0;
+ return result;
+}
+
+} // namespace detail
+
+inline std::optional<id::IdRegisters> read_id_registers()
+{
+ // See OpenBSD source: sys/arch/arm64/include/cpu.h
+
+ return id::IdRegisters{
+ std::nullopt, // No easy way of getting MIDR_EL1 other than reading /proc/cpu
+ id::Pfr0Register{detail::read_id_register(8)}, // CPU_ID_AA64PFR0
+ id::Pfr1Register{detail::read_id_register(9)}, // CPU_ID_AA64PFR1
+ id::Pfr2Register{0},
+ id::Zfr0Register{detail::read_id_register(11)}, // CPU_ID_AA64ZFR0
+ id::Smfr0Register{detail::read_id_register(10)}, // CPU_ID_AA64SMFR0
+ id::Isar0Register{detail::read_id_register(2)}, // CPU_ID_AA64ISAR0
+ id::Isar1Register{detail::read_id_register(3)}, // CPU_ID_AA64ISAR1
+ id::Isar2Register{detail::read_id_register(4)}, // CPU_ID_AA64ISAR2
+ id::Isar3Register{0},
+ id::Mmfr0Register{detail::read_id_register(5)}, // CPU_ID_AA64MMFR0
+ id::Mmfr1Register{detail::read_id_register(6)}, // CPU_ID_AA64MMFR1
+ id::Mmfr2Register{detail::read_id_register(7)}, // CPU_ID_AA64MMFR2
+ id::Mmfr3Register{0},
+ id::Mmfr4Register{0},
+ };
+}
+
+inline CpuFeatures detect_features()
+{
+ return detect_features_via_id_registers(*read_id_registers());
+}
+
+} // namespace oaknut
diff --git a/include/oaknut/feature_detection/feature_detection_w32.hpp b/include/oaknut/feature_detection/feature_detection_w32.hpp
new file mode 100644
index 00000000..366a2600
--- /dev/null
+++ b/include/oaknut/feature_detection/feature_detection_w32.hpp
@@ -0,0 +1,99 @@
+// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime <https://mary.rs>
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#ifndef WIN32_LEAN_AND_MEAN
+# define WIN32_LEAN_AND_MEAN
+#endif
+
+#include <windows.h>
+
+#include <cstddef>
+#include <cstdint>
+#include <optional>
+
+#include <processthreadsapi.h>
+
+#include "oaknut/feature_detection/cpu_feature.hpp"
+#include "oaknut/feature_detection/id_registers.hpp"
+
+namespace oaknut {
+
+namespace detail {
+
+inline std::optional<std::uint64_t> read_registry_hklm(const std::string& subkey, const std::string& name)
+{
+ std::uint64_t value;
+ DWORD value_len = sizeof(value);
+ if (::RegGetValueA(HKEY_LOCAL_MACHINE, subkey.c_str(), name.c_str(), RRF_RT_REG_QWORD, nullptr, &value, &value_len) == ERROR_SUCCESS) {
+ return value;
+ }
+ return std::nullopt;
+}
+
+inline std::uint64_t read_id_register(std::size_t core_index, const std::string& name)
+{
+ return read_registry_hklm("HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\" + std::to_string(core_index), "CP " + name).value_or(0);
+}
+
+} // namespace detail
+
+// Ref: https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-isprocessorfeaturepresent
+
+inline CpuFeatures detect_features_via_IsProcessorFeaturePresent()
+{
+ CpuFeatures result;
+
+ if (::IsProcessorFeaturePresent(30)) // PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE
+ result |= CpuFeatures{CpuFeature::AES, CpuFeature::PMULL, CpuFeature::SHA1, CpuFeature::SHA256};
+ if (::IsProcessorFeaturePresent(31)) // PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE
+ result |= CpuFeatures{CpuFeature::CRC32};
+ if (::IsProcessorFeaturePresent(34)) // PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE
+ result |= CpuFeatures{CpuFeature::LSE};
+ if (::IsProcessorFeaturePresent(43)) // PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE
+ result |= CpuFeatures{CpuFeature::DotProd};
+ if (::IsProcessorFeaturePresent(44)) // PF_ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE
+ result |= CpuFeatures{CpuFeature::JSCVT};
+ if (::IsProcessorFeaturePresent(45)) // PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE
+ result |= CpuFeatures{CpuFeature::LRCPC};
+
+ return result;
+}
+
+inline CpuFeatures detect_features()
+{
+ CpuFeatures result{CpuFeature::FP, CpuFeature::ASIMD};
+ result |= detect_features_via_IsProcessorFeaturePresent();
+ return result;
+}
+
+inline std::size_t get_core_count()
+{
+ ::SYSTEM_INFO sys_info;
+ ::GetSystemInfo(&sys_info);
+ return sys_info.dwNumberOfProcessors;
+}
+
+inline std::optional<id::IdRegisters> read_id_registers(std::size_t core_index)
+{
+ return id::IdRegisters{
+ detail::read_id_register(core_index, "4000"),
+ id::Pfr0Register{detail::read_id_register(core_index, "4020")},
+ id::Pfr1Register{detail::read_id_register(core_index, "4021")},
+ id::Pfr2Register{detail::read_id_register(core_index, "4022")},
+ id::Zfr0Register{detail::read_id_register(core_index, "4024")},
+ id::Smfr0Register{detail::read_id_register(core_index, "4025")},
+ id::Isar0Register{detail::read_id_register(core_index, "4030")},
+ id::Isar1Register{detail::read_id_register(core_index, "4031")},
+ id::Isar2Register{detail::read_id_register(core_index, "4032")},
+ id::Isar3Register{detail::read_id_register(core_index, "4033")},
+ id::Mmfr0Register{detail::read_id_register(core_index, "4038")},
+ id::Mmfr1Register{detail::read_id_register(core_index, "4039")},
+ id::Mmfr2Register{detail::read_id_register(core_index, "403A")},
+ id::Mmfr3Register{detail::read_id_register(core_index, "403B")},
+ id::Mmfr4Register{detail::read_id_register(core_index, "403C")},
+ };
+}
+
+} // namespace oaknut
diff --git a/include/oaknut/feature_detection/id_registers.hpp b/include/oaknut/feature_detection/id_registers.hpp
new file mode 100644
index 00000000..fa779618
--- /dev/null
+++ b/include/oaknut/feature_detection/id_registers.hpp
@@ -0,0 +1,318 @@
+// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime <https://mary.rs>
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+#include <cstddef>
+#include <cstdint>
+#include <optional>
+
+namespace oaknut::id {
+
+namespace detail {
+
+template<std::size_t lsb>
+constexpr unsigned extract_bit(std::uint64_t value)
+{
+ return (value >> lsb) & 1;
+}
+
+template<std::size_t lsb>
+constexpr unsigned extract_field(std::uint64_t value)
+{
+ return (value >> lsb) & 0xf;
+}
+
+template<std::size_t lsb>
+constexpr signed extract_signed_field(std::uint64_t value)
+{
+ return static_cast<signed>(static_cast<std::int64_t>(value << (60 - lsb)) >> 60);
+}
+
+} // namespace detail
+
+struct Pfr0Register {
+ std::uint64_t value;
+
+ constexpr signed FP() const { return detail::extract_signed_field<16>(value); }
+ constexpr signed AdvSIMD() const { return detail::extract_signed_field<20>(value); }
+ constexpr unsigned GIC() const { return detail::extract_field<24>(value); }
+ constexpr unsigned RAS() const { return detail::extract_field<28>(value); }
+ constexpr unsigned SVE() const { return detail::extract_field<32>(value); }
+ constexpr unsigned SEL2() const { return detail::extract_field<36>(value); }
+ constexpr unsigned MPAM() const { return detail::extract_field<40>(value); }
+ constexpr unsigned AMU() const { return detail::extract_field<44>(value); }
+ constexpr unsigned DIT() const { return detail::extract_field<48>(value); }
+ constexpr unsigned RME() const { return detail::extract_field<52>(value); }
+ constexpr unsigned CSV2() const { return detail::extract_field<56>(value); }
+ constexpr unsigned CSV3() const { return detail::extract_field<60>(value); }
+};
+
+struct Pfr1Register {
+ std::uint64_t value;
+
+ constexpr unsigned BT() const { return detail::extract_field<0>(value); }
+ constexpr unsigned SSBS() const { return detail::extract_field<4>(value); }
+ constexpr unsigned MTE() const { return detail::extract_field<8>(value); }
+ constexpr unsigned RAS_frac() const { return detail::extract_field<12>(value); }
+ constexpr unsigned MPAM_frac() const { return detail::extract_field<16>(value); }
+ // [20:23] - reserved
+ constexpr unsigned SME() const { return detail::extract_field<24>(value); }
+ constexpr unsigned RNDR_trap() const { return detail::extract_field<28>(value); }
+ constexpr unsigned CSV2_frac() const { return detail::extract_field<32>(value); }
+ constexpr unsigned NMI() const { return detail::extract_field<36>(value); }
+ constexpr unsigned MTE_frac() const { return detail::extract_field<40>(value); }
+ constexpr unsigned GCS() const { return detail::extract_field<44>(value); }
+ constexpr unsigned THE() const { return detail::extract_field<48>(value); }
+ constexpr unsigned MTEX() const { return detail::extract_field<52>(value); }
+ constexpr unsigned DF2() const { return detail::extract_field<56>(value); }
+ constexpr unsigned PFAR() const { return detail::extract_field<60>(value); }
+};
+
+struct Pfr2Register {
+ std::uint64_t value;
+
+ constexpr unsigned MTEPERM() const { return detail::extract_field<0>(value); }
+ constexpr unsigned MTESTOREONLY() const { return detail::extract_field<4>(value); }
+ constexpr unsigned MTEFAR() const { return detail::extract_field<8>(value); }
+ // [12:31] reserved
+ constexpr unsigned FPMR() const { return detail::extract_field<32>(value); }
+ // [36:63] reserved
+};
+
+struct Zfr0Register {
+ std::uint64_t value;
+
+ constexpr unsigned SVEver() const { return detail::extract_field<0>(value); }
+ constexpr unsigned AES() const { return detail::extract_field<4>(value); }
+ // [8:15] reserved
+ constexpr unsigned BitPerm() const { return detail::extract_field<16>(value); }
+ constexpr unsigned BF16() const { return detail::extract_field<20>(value); }
+ constexpr unsigned B16B16() const { return detail::extract_field<24>(value); }
+ // [28:31] reserved
+ constexpr unsigned SHA3() const { return detail::extract_field<32>(value); }
+ // [36:39] reserved
+ constexpr unsigned SM4() const { return detail::extract_field<40>(value); }
+ constexpr unsigned I8MM() const { return detail::extract_field<44>(value); }
+ // [48:51] reserved
+ constexpr unsigned F32MM() const { return detail::extract_field<52>(value); }
+ constexpr unsigned F64MM() const { return detail::extract_field<56>(value); }
+ // [60:63] reserved
+};
+
+struct Smfr0Register {
+ std::uint64_t value;
+
+ // [0:27] reserved
+ constexpr unsigned SF8DP2() const { return detail::extract_bit<28>(value); }
+ constexpr unsigned SF8DP4() const { return detail::extract_bit<29>(value); }
+ constexpr unsigned SF8FMA() const { return detail::extract_bit<30>(value); }
+ // [31] reserved
+ constexpr unsigned F32F32() const { return detail::extract_bit<32>(value); }
+ constexpr unsigned BI32I32() const { return detail::extract_bit<33>(value); }
+ constexpr unsigned B16F32() const { return detail::extract_bit<34>(value); }
+ constexpr unsigned F16F32() const { return detail::extract_bit<35>(value); }
+ constexpr unsigned I8I32() const { return detail::extract_field<36>(value); }
+ constexpr unsigned F8F32() const { return detail::extract_bit<40>(value); }
+ constexpr unsigned F8F16() const { return detail::extract_bit<41>(value); }
+ constexpr unsigned F16F16() const { return detail::extract_bit<42>(value); }
+ constexpr unsigned B16B16() const { return detail::extract_bit<43>(value); }
+ constexpr unsigned I16I32() const { return detail::extract_field<44>(value); }
+ constexpr unsigned F64F64() const { return detail::extract_bit<48>(value); }
+ // [49:51] reserved
+ constexpr unsigned I16I64() const { return detail::extract_field<52>(value); }
+ constexpr unsigned SMEver() const { return detail::extract_field<56>(value); }
+ constexpr unsigned LUTv2() const { return detail::extract_bit<60>(value); }
+ // [61:62] reserved
+ constexpr unsigned FA64() const { return detail::extract_bit<63>(value); }
+};
+
+struct Isar0Register {
+ std::uint64_t value;
+
+ // [0:3] reserved
+ constexpr unsigned AES() const { return detail::extract_field<4>(value); }
+ constexpr unsigned SHA1() const { return detail::extract_field<8>(value); }
+ constexpr unsigned SHA2() const { return detail::extract_field<12>(value); }
+ constexpr unsigned CRC32() const { return detail::extract_field<16>(value); }
+ constexpr unsigned Atomic() const { return detail::extract_field<20>(value); }
+ constexpr unsigned TME() const { return detail::extract_field<24>(value); }
+ constexpr unsigned RDM() const { return detail::extract_field<28>(value); }
+ constexpr unsigned SHA3() const { return detail::extract_field<32>(value); }
+ constexpr unsigned SM3() const { return detail::extract_field<36>(value); }
+ constexpr unsigned SM4() const { return detail::extract_field<40>(value); }
+ constexpr unsigned DP() const { return detail::extract_field<44>(value); }
+ constexpr unsigned FHM() const { return detail::extract_field<48>(value); }
+ constexpr unsigned TS() const { return detail::extract_field<52>(value); }
+ constexpr unsigned TLB() const { return detail::extract_field<56>(value); }
+ constexpr unsigned RNDR() const { return detail::extract_field<60>(value); }
+};
+
+struct Isar1Register {
+ std::uint64_t value;
+
+ constexpr unsigned DPB() const { return detail::extract_field<0>(value); }
+ constexpr unsigned APA() const { return detail::extract_field<4>(value); }
+ constexpr unsigned API() const { return detail::extract_field<8>(value); }
+ constexpr unsigned JSCVT() const { return detail::extract_field<12>(value); }
+ constexpr unsigned FCMA() const { return detail::extract_field<16>(value); }
+ constexpr unsigned LRCPC() const { return detail::extract_field<20>(value); }
+ constexpr unsigned GPA() const { return detail::extract_field<24>(value); }
+ constexpr unsigned GPI() const { return detail::extract_field<28>(value); }
+ constexpr unsigned FRINTTS() const { return detail::extract_field<32>(value); }
+ constexpr unsigned SB() const { return detail::extract_field<36>(value); }
+ constexpr unsigned SPECRES() const { return detail::extract_field<40>(value); }
+ constexpr unsigned BF16() const { return detail::extract_field<44>(value); }
+ constexpr unsigned DGH() const { return detail::extract_field<48>(value); }
+ constexpr unsigned I8MM() const { return detail::extract_field<52>(value); }
+ constexpr unsigned XS() const { return detail::extract_field<56>(value); }
+ constexpr unsigned LS64() const { return detail::extract_field<60>(value); }
+};
+
+struct Isar2Register {
+ std::uint64_t value;
+
+ constexpr unsigned WFxT() const { return detail::extract_field<0>(value); }
+ constexpr unsigned RPRES() const { return detail::extract_field<4>(value); }
+ constexpr unsigned GPA3() const { return detail::extract_field<8>(value); }
+ constexpr unsigned APA3() const { return detail::extract_field<12>(value); }
+ constexpr unsigned MOPS() const { return detail::extract_field<16>(value); }
+ constexpr unsigned BC() const { return detail::extract_field<20>(value); }
+ constexpr unsigned PAC_frac() const { return detail::extract_field<24>(value); }
+ constexpr unsigned CLRBHB() const { return detail::extract_field<28>(value); }
+ constexpr unsigned SYSREG_128() const { return detail::extract_field<32>(value); }
+ constexpr unsigned SYSINSTR_128() const { return detail::extract_field<36>(value); }
+ constexpr unsigned PRFMSLC() const { return detail::extract_field<40>(value); }
+ // [44:47] reserved
+ constexpr unsigned RPRFM() const { return detail::extract_field<48>(value); }
+ constexpr unsigned CSSC() const { return detail::extract_field<52>(value); }
+ constexpr unsigned LUT() const { return detail::extract_field<56>(value); }
+ constexpr unsigned ATS1A() const { return detail::extract_field<60>(value); }
+};
+
+struct Isar3Register {
+ std::uint64_t value;
+
+ constexpr unsigned CPA() const { return detail::extract_field<0>(value); }
+ constexpr unsigned FAMINMAX() const { return detail::extract_field<4>(value); }
+ constexpr unsigned TLBIW() const { return detail::extract_field<8>(value); }
+ // [12:63] reserved
+};
+
+struct Mmfr0Register {
+ std::uint64_t value;
+
+ constexpr unsigned PARange() const { return detail::extract_field<0>(value); }
+ constexpr unsigned ASIDBits() const { return detail::extract_field<4>(value); }
+ constexpr unsigned BigEnd() const { return detail::extract_field<8>(value); }
+ constexpr unsigned SNSMem() const { return detail::extract_field<12>(value); }
+ constexpr unsigned BigEndEL0() const { return detail::extract_field<16>(value); }
+ constexpr unsigned TGran16() const { return detail::extract_field<20>(value); }
+ constexpr unsigned TGran64() const { return detail::extract_field<24>(value); }
+ constexpr unsigned TGran4() const { return detail::extract_field<28>(value); }
+ constexpr unsigned TGran16_2() const { return detail::extract_field<32>(value); }
+ constexpr unsigned TGran64_2() const { return detail::extract_field<36>(value); }
+ constexpr unsigned TGran4_2() const { return detail::extract_field<40>(value); }
+ constexpr unsigned ExS() const { return detail::extract_field<44>(value); }
+ // [48:55] reserved
+ constexpr unsigned FGT() const { return detail::extract_field<56>(value); }
+ constexpr unsigned ECV() const { return detail::extract_field<60>(value); }
+};
+
+struct Mmfr1Register {
+ std::uint64_t value;
+
+ constexpr unsigned HAFDBS() const { return detail::extract_field<0>(value); }
+ constexpr unsigned VMIDBits() const { return detail::extract_field<4>(value); }
+ constexpr unsigned VH() const { return detail::extract_field<8>(value); }
+ constexpr unsigned HPDS() const { return detail::extract_field<12>(value); }
+ constexpr unsigned LO() const { return detail::extract_field<16>(value); }
+ constexpr unsigned PAN() const { return detail::extract_field<20>(value); }
+ constexpr unsigned SpecSEI() const { return detail::extract_field<24>(value); }
+ constexpr unsigned XNX() const { return detail::extract_field<28>(value); }
+ constexpr unsigned TWED() const { return detail::extract_field<32>(value); }
+ constexpr unsigned ETS() const { return detail::extract_field<36>(value); }
+ constexpr unsigned HCX() const { return detail::extract_field<40>(value); }
+ constexpr unsigned AFP() const { return detail::extract_field<44>(value); }
+ constexpr unsigned nTLBPA() const { return detail::extract_field<48>(value); }
+ constexpr unsigned TIDCP1() const { return detail::extract_field<52>(value); }
+ constexpr unsigned CMOW() const { return detail::extract_field<56>(value); }
+ constexpr unsigned ECBHB() const { return detail::extract_field<60>(value); }
+};
+
+struct Mmfr2Register {
+ std::uint64_t value;
+
+ constexpr unsigned CnP() const { return detail::extract_field<0>(value); }
+ constexpr unsigned UAO() const { return detail::extract_field<4>(value); }
+ constexpr unsigned LSM() const { return detail::extract_field<8>(value); }
+ constexpr unsigned IESB() const { return detail::extract_field<12>(value); }
+ constexpr unsigned VARange() const { return detail::extract_field<16>(value); }
+ constexpr unsigned CCIDX() const { return detail::extract_field<20>(value); }
+ constexpr unsigned NV() const { return detail::extract_field<24>(value); }
+ constexpr unsigned ST() const { return detail::extract_field<28>(value); }
+ constexpr unsigned AT() const { return detail::extract_field<32>(value); }
+ constexpr unsigned IDS() const { return detail::extract_field<36>(value); }
+ constexpr unsigned FWB() const { return detail::extract_field<40>(value); }
+ // [44:47] reserved
+ constexpr unsigned TTL() const { return detail::extract_field<48>(value); }
+ constexpr unsigned BBM() const { return detail::extract_field<52>(value); }
+ constexpr unsigned EVT() const { return detail::extract_field<56>(value); }
+ constexpr unsigned E0PD() const { return detail::extract_field<60>(value); }
+};
+
+struct Mmfr3Register {
+ std::uint64_t value;
+
+ constexpr unsigned TCRX() const { return detail::extract_field<0>(value); }
+ constexpr unsigned SCTLRX() const { return detail::extract_field<4>(value); }
+ constexpr unsigned S1PIE() const { return detail::extract_field<8>(value); }
+ constexpr unsigned S2PIE() const { return detail::extract_field<12>(value); }
+ constexpr unsigned S1POE() const { return detail::extract_field<16>(value); }
+ constexpr unsigned S2POE() const { return detail::extract_field<20>(value); }
+ constexpr unsigned AIE() const { return detail::extract_field<24>(value); }
+ constexpr unsigned MEC() const { return detail::extract_field<28>(value); }
+ constexpr unsigned D128() const { return detail::extract_field<32>(value); }
+ constexpr unsigned D128_2() const { return detail::extract_field<36>(value); }
+ constexpr unsigned SNERR() const { return detail::extract_field<40>(value); }
+ constexpr unsigned ANERR() const { return detail::extract_field<44>(value); }
+ // [48:51] reserved
+ constexpr unsigned SDERR() const { return detail::extract_field<52>(value); }
+ constexpr unsigned ADERR() const { return detail::extract_field<56>(value); }
+ constexpr unsigned Spec_FPACC() const { return detail::extract_field<60>(value); }
+};
+
+struct Mmfr4Register {
+ std::uint64_t value;
+
+ // [0:3] reserved
+ constexpr unsigned EIESB() const { return detail::extract_field<4>(value); }
+ constexpr unsigned ASID2() const { return detail::extract_field<8>(value); }
+ constexpr unsigned HACDBS() const { return detail::extract_field<12>(value); }
+ constexpr unsigned FGWTE3() const { return detail::extract_field<16>(value); }
+ constexpr unsigned NV_frac() const { return detail::extract_field<20>(value); }
+ constexpr unsigned E2H0() const { return detail::extract_field<24>(value); }
+ // [28:35] reserved
+ constexpr unsigned E3DSE() const { return detail::extract_field<36>(value); }
+ // [40:63] reserved
+};
+
+struct IdRegisters {
+ std::optional<std::uint64_t> midr;
+ Pfr0Register pfr0;
+ Pfr1Register pfr1;
+ Pfr2Register pfr2;
+ Zfr0Register zfr0;
+ Smfr0Register smfr0;
+ Isar0Register isar0;
+ Isar1Register isar1;
+ Isar2Register isar2;
+ Isar3Register isar3;
+ Mmfr0Register mmfr0;
+ Mmfr1Register mmfr1;
+ Mmfr2Register mmfr2;
+ Mmfr3Register mmfr3;
+ Mmfr4Register mmfr4;
+};
+
+} // namespace oaknut::id
diff --git a/include/oaknut/feature_detection/read_id_registers_directly.hpp b/include/oaknut/feature_detection/read_id_registers_directly.hpp
new file mode 100644
index 00000000..04db5188
--- /dev/null
+++ b/include/oaknut/feature_detection/read_id_registers_directly.hpp
@@ -0,0 +1,52 @@
+#include <cstdint>
+
+#include "oaknut/feature_detection/id_registers.hpp"
+
+namespace oaknut::id {
+
+inline IdRegisters read_id_registers_directly()
+{
+ std::uint64_t midr, pfr0, pfr1, pfr2, isar0, isar1, isar2, isar3, mmfr0, mmfr1, mmfr2, mmfr3, mmfr4, zfr0, smfr0;
+
+#define OAKNUT_READ_REGISTER(reg, var) \
+ __asm__("mrs %0, " #reg \
+ : "=r"(var))
+
+ OAKNUT_READ_REGISTER(s3_0_c0_c0_0, midr);
+ OAKNUT_READ_REGISTER(s3_0_c0_c4_0, pfr0);
+ OAKNUT_READ_REGISTER(s3_0_c0_c4_1, pfr1);
+ OAKNUT_READ_REGISTER(s3_0_c0_c4_2, pfr2);
+ OAKNUT_READ_REGISTER(s3_0_c0_c4_4, zfr0);
+ OAKNUT_READ_REGISTER(s3_0_c0_c4_5, smfr0);
+ OAKNUT_READ_REGISTER(s3_0_c0_c6_0, isar0);
+ OAKNUT_READ_REGISTER(s3_0_c0_c6_1, isar1);
+ OAKNUT_READ_REGISTER(s3_0_c0_c6_2, isar2);
+ OAKNUT_READ_REGISTER(s3_0_c0_c6_3, isar3);
+ OAKNUT_READ_REGISTER(s3_0_c0_c7_0, mmfr0);
+ OAKNUT_READ_REGISTER(s3_0_c0_c7_1, mmfr1);
+ OAKNUT_READ_REGISTER(s3_0_c0_c7_2, mmfr2);
+ OAKNUT_READ_REGISTER(s3_0_c0_c7_3, mmfr3);
+ OAKNUT_READ_REGISTER(s3_0_c0_c7_4, mmfr4);
+
+#undef OAKNUT_READ_ID_REGISTER
+
+ return IdRegisters{
+ midr,
+ Pfr0Register{pfr0},
+ Pfr1Register{pfr1},
+ Pfr2Register{pfr2},
+ Zfr0Register{zfr0},
+ Smfr0Register{smfr0},
+ Isar0Register{isar0},
+ Isar1Register{isar1},
+ Isar2Register{isar2},
+ Isar3Register{isar3},
+ Mmfr0Register{mmfr0},
+ Mmfr1Register{mmfr1},
+ Mmfr2Register{mmfr2},
+ Mmfr3Register{mmfr3},
+ Mmfr4Register{mmfr4},
+ };
+}
+
+} // namespace oaknut::id
diff --git a/include/oaknut/impl/arm64_encode_helpers.inc.hpp b/include/oaknut/impl/arm64_encode_helpers.inc.hpp
index 3081d943..fb636b78 100644
--- a/include/oaknut/impl/arm64_encode_helpers.inc.hpp
+++ b/include/oaknut/impl/arm64_encode_helpers.inc.hpp
@@ -8,7 +8,7 @@ static constexpr std::uint32_t pdep(std::uint32_t val)
std::uint32_t res = 0;
for (std::uint32_t bb = 1; mask; bb += bb) {
if (val & bb)
- res |= mask & -mask;
+ res |= mask & (~mask + 1);
mask &= mask - 1;
}
return res;
@@ -107,6 +107,61 @@ std::uint32_t encode(List<T, N> v)
return encode<splat>(v.m_base);
}
+template<std::uint32_t splat, std::size_t size, std::size_t align>
+std::uint32_t encode(AddrOffset<size, align> v)
+{
+ static_assert(std::popcount(splat) == size - align);
+
+ const auto encode_fn = [](std::ptrdiff_t current_offset, std::ptrdiff_t target_offset) {
+ const std::ptrdiff_t diff = target_offset - current_offset;
+ return pdep<splat>(AddrOffset<size, align>::encode(diff));
+ };
+
+ return std::visit(detail::overloaded{
+ [&](std::uint32_t encoding) -> std::uint32_t {
+ return pdep<splat>(encoding);
+ },
+ [&](Label* label) -> std::uint32_t {
+ if (label->m_offset) {
+ return encode_fn(Policy::offset(), *label->m_offset);
+ }
+
+ label->m_wbs.emplace_back(Label::Writeback{Policy::offset(), ~splat, static_cast<Label::EmitFunctionType>(encode_fn)});
+ return 0u;
+ },
+ [&](const void* p) -> std::uint32_t {
+ const std::ptrdiff_t diff = reinterpret_cast<std::uintptr_t>(p) - Policy::template xptr<std::uintptr_t>();
+ return pdep<splat>(AddrOffset<size, align>::encode(diff));
+ },
+ },
+ v.m_payload);
+}
+
+template<std::uint32_t splat, std::size_t size, std::size_t shift_amount>
+std::uint32_t encode(PageOffset<size, shift_amount> v)
+{
+ static_assert(std::popcount(splat) == size);
+
+ const auto encode_fn = [](std::ptrdiff_t current_offset, std::ptrdiff_t target_offset) {
+ return pdep<splat>(PageOffset<size, shift_amount>::encode(static_cast<std::uintptr_t>(current_offset), static_cast<std::uintptr_t>(target_offset)));
+ };
+
+ return std::visit(detail::overloaded{
+ [&](Label* label) -> std::uint32_t {
+ if (label->m_offset) {
+ return encode_fn(Policy::offset(), *label->m_offset);
+ }
+
+ label->m_wbs.emplace_back(Label::Writeback{Policy::offset(), ~splat, static_cast<Label::EmitFunctionType>(encode_fn)});
+ return 0u;
+ },
+ [&](const void* p) -> std::uint32_t {
+ return pdep<splat>(PageOffset<size, shift_amount>::encode(Policy::template xptr<std::uintptr_t>(), reinterpret_cast<std::ptrdiff_t>(p)));
+ },
+ },
+ v.m_payload);
+}
+
#undef OAKNUT_STD_ENCODE
void addsubext_lsl_correction(AddSubExt& ext, XRegSp)
diff --git a/include/oaknut/impl/cpu_feature.inc.hpp b/include/oaknut/impl/cpu_feature.inc.hpp
new file mode 100644
index 00000000..1f7cd879
--- /dev/null
+++ b/include/oaknut/impl/cpu_feature.inc.hpp
@@ -0,0 +1,78 @@
+// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime <https://mary.rs>
+// SPDX-License-Identifier: MIT
+
+OAKNUT_CPU_FEATURE(FP)
+OAKNUT_CPU_FEATURE(ASIMD)
+OAKNUT_CPU_FEATURE(AES)
+OAKNUT_CPU_FEATURE(PMULL)
+OAKNUT_CPU_FEATURE(SHA1)
+OAKNUT_CPU_FEATURE(SHA256)
+OAKNUT_CPU_FEATURE(CRC32)
+OAKNUT_CPU_FEATURE(LSE)
+OAKNUT_CPU_FEATURE(FP16Conv)
+OAKNUT_CPU_FEATURE(FP16)
+OAKNUT_CPU_FEATURE(RDM)
+OAKNUT_CPU_FEATURE(JSCVT)
+OAKNUT_CPU_FEATURE(FCMA)
+OAKNUT_CPU_FEATURE(LRCPC)
+OAKNUT_CPU_FEATURE(DPB)
+OAKNUT_CPU_FEATURE(SHA3)
+OAKNUT_CPU_FEATURE(SM3)
+OAKNUT_CPU_FEATURE(SM4)
+OAKNUT_CPU_FEATURE(DotProd)
+OAKNUT_CPU_FEATURE(SHA512)
+OAKNUT_CPU_FEATURE(SVE)
+OAKNUT_CPU_FEATURE(FHM)
+OAKNUT_CPU_FEATURE(DIT)
+OAKNUT_CPU_FEATURE(LSE2)
+OAKNUT_CPU_FEATURE(LRCPC2)
+OAKNUT_CPU_FEATURE(FlagM)
+OAKNUT_CPU_FEATURE(SSBS)
+OAKNUT_CPU_FEATURE(SB)
+OAKNUT_CPU_FEATURE(PACA)
+OAKNUT_CPU_FEATURE(PACG)
+OAKNUT_CPU_FEATURE(DPB2)
+OAKNUT_CPU_FEATURE(SVE2)
+OAKNUT_CPU_FEATURE(SVE_AES)
+OAKNUT_CPU_FEATURE(SVE_PMULL128)
+OAKNUT_CPU_FEATURE(SVE_BITPERM)
+OAKNUT_CPU_FEATURE(SVE_SHA3)
+OAKNUT_CPU_FEATURE(SVE_SM4)
+OAKNUT_CPU_FEATURE(FlagM2)
+OAKNUT_CPU_FEATURE(FRINTTS)
+OAKNUT_CPU_FEATURE(SVE_I8MM)
+OAKNUT_CPU_FEATURE(SVE_F32MM)
+OAKNUT_CPU_FEATURE(SVE_F64MM)
+OAKNUT_CPU_FEATURE(SVE_BF16)
+OAKNUT_CPU_FEATURE(I8MM)
+OAKNUT_CPU_FEATURE(BF16)
+OAKNUT_CPU_FEATURE(DGH)
+OAKNUT_CPU_FEATURE(RNG)
+OAKNUT_CPU_FEATURE(BTI)
+OAKNUT_CPU_FEATURE(MTE)
+OAKNUT_CPU_FEATURE(ECV)
+OAKNUT_CPU_FEATURE(AFP)
+OAKNUT_CPU_FEATURE(RPRES)
+OAKNUT_CPU_FEATURE(MTE3)
+OAKNUT_CPU_FEATURE(SME)
+OAKNUT_CPU_FEATURE(SME_I16I64)
+OAKNUT_CPU_FEATURE(SME_F64F64)
+OAKNUT_CPU_FEATURE(SME_I8I32)
+OAKNUT_CPU_FEATURE(SME_F16F32)
+OAKNUT_CPU_FEATURE(SME_B16F32)
+OAKNUT_CPU_FEATURE(SME_F32F32)
+OAKNUT_CPU_FEATURE(SME_FA64)
+OAKNUT_CPU_FEATURE(WFxT)
+OAKNUT_CPU_FEATURE(EBF16)
+OAKNUT_CPU_FEATURE(SVE_EBF16)
+OAKNUT_CPU_FEATURE(CSSC)
+OAKNUT_CPU_FEATURE(RPRFM)
+OAKNUT_CPU_FEATURE(SVE2p1)
+OAKNUT_CPU_FEATURE(SME2)
+OAKNUT_CPU_FEATURE(SME2p1)
+OAKNUT_CPU_FEATURE(SME_I16I32)
+OAKNUT_CPU_FEATURE(SME_BI32I32)
+OAKNUT_CPU_FEATURE(SME_B16B16)
+OAKNUT_CPU_FEATURE(SME_F16F16)
+OAKNUT_CPU_FEATURE(MOPS)
+OAKNUT_CPU_FEATURE(HBC)
diff --git a/include/oaknut/impl/enum.hpp b/include/oaknut/impl/enum.hpp
index 89dc9356..68448b47 100644
--- a/include/oaknut/impl/enum.hpp
+++ b/include/oaknut/impl/enum.hpp
@@ -85,15 +85,67 @@ enum class PstateField {
};
enum class SystemReg {
+ AMCFGR_EL0 = 0b11'011'1101'0010'001,
+ AMCGCR_EL0 = 0b11'011'1101'0010'010,
+ AMCNTENCLR0_EL0 = 0b11'011'1101'0010'100,
+ AMCNTENCLR1_EL0 = 0b11'011'1101'0011'000,
+ AMCNTENSET0_EL0 = 0b11'011'1101'0010'101,
+ AMCNTENSET1_EL0 = 0b11'011'1101'0011'001,
+ AMCR_EL0 = 0b11'011'1101'0010'000,
+ AMEVCNTR0_n_EL0 = 0b11'011'1101'0100'000, // n = 0-3
+ AMEVCNTR1_n_EL0 = 0b11'011'1101'1100'000, // n = 0-15
+ AMEVTYPER0_n_EL0 = 0b11'011'1101'0110'000, // n = 0-3
+ AMEVTYPER1_n_EL0 = 0b11'011'1101'1110'000, // n = 0-15
+ AMUSERENR_EL0 = 0b11'011'1101'0010'011,
CNTFRQ_EL0 = 0b11'011'1110'0000'000,
+ CNTP_CTL_EL0 = 0b11'011'1110'0010'001,
+ CNTP_CVAL_EL0 = 0b11'011'1110'0010'010,
+ CNTP_TVAL_EL0 = 0b11'011'1110'0010'000,
CNTPCT_EL0 = 0b11'011'1110'0000'001,
+ CNTV_CTL_EL0 = 0b11'011'1110'0011'001,
+ CNTV_CVAL_EL0 = 0b11'011'1110'0011'010,
+ CNTV_TVAL_EL0 = 0b11'011'1110'0011'000,
+ CNTVCT_EL0 = 0b11'011'1110'0000'010,
CTR_EL0 = 0b11'011'0000'0000'001,
+ CurrentEL = 0b11'000'0100'0010'010,
+ DAIF = 0b11'011'0100'0010'001,
+ DBGDTR_EL0 = 0b10'011'0000'0100'000,
+ DBGDTRRX_EL0 = 0b10'011'0000'0101'000,
+ DBGDTRTX_EL0 = 0b10'011'0000'0101'000,
DCZID_EL0 = 0b11'011'0000'0000'111,
+ DIT = 0b11'011'0100'0010'101,
+ DLR_EL0 = 0b11'011'0100'0101'001,
+ DSPSR_EL0 = 0b11'011'0100'0101'000,
FPCR = 0b11'011'0100'0100'000,
FPSR = 0b11'011'0100'0100'001,
+ MDCCSR_EL0 = 0b10'011'0000'0001'000,
NZCV = 0b11'011'0100'0010'000,
+ PAN = 0b11'000'0100'0010'011,
+ PMCCFILTR_EL0 = 0b11'011'1110'1111'111,
+ PMCCNTR_EL0 = 0b11'011'1001'1101'000,
+ PMCEID0_EL0 = 0b11'011'1001'1100'110,
+ PMCEID1_EL0 = 0b11'011'1001'1100'111,
+ PMCNTENCLR_EL0 = 0b11'011'1001'1100'010,
+ PMCNTENSET_EL0 = 0b11'011'1001'1100'001,
+ PMCR_EL0 = 0b11'011'1001'1100'000,
+ PMEVCNTR_n_EL0 = 0b11'011'1110'1000'000, // n = 0-30
+ PMEVTYPER_n_EL0 = 0b11'011'1110'1100'000, // n = 0-30
+ PMOVSCLR_EL0 = 0b11'011'1001'1100'011,
+ PMOVSSET_EL0 = 0b11'011'1001'1110'011,
+ PMSELR_EL0 = 0b11'011'1001'1100'101,
+ PMSWINC_EL0 = 0b11'011'1001'1100'100,
+ PMUSERENR_EL0 = 0b11'011'1001'1110'000,
+ PMXEVCNTR_EL0 = 0b11'011'1001'1101'010,
+ PMXEVTYPER_EL0 = 0b11'011'1001'1101'001,
+ SP_EL0 = 0b11'000'0100'0001'000,
+ SPSel = 0b11'000'0100'0010'000,
+ SPSR_abt = 0b11'100'0100'0011'001,
+ SPSR_fiq = 0b11'100'0100'0011'011,
+ SPSR_irq = 0b11'100'0100'0011'000,
+ SPSR_und = 0b11'100'0100'0011'010,
TPIDR_EL0 = 0b11'011'1101'0000'010,
TPIDRRO_EL0 = 0b11'011'1101'0000'011,
+ UAO = 0b11'000'0100'0010'100,
};
enum class AtOp {
@@ -199,7 +251,7 @@ enum class TlbiOp {
VALE1 = 0b000'0111'101,
VAALE1 = 0b000'0111'111,
IPAS2E1IS = 0b100'0000'001,
- RIPAS2E1IS = 0b100'0000'010, // ARMv8.4-TLBI
+ RIPAS2E1IS = 0b100'0000'010, // ARMv8.4-TLBI
IPAS2LE1IS = 0b100'0000'101,
RIPAS2LE1IS = 0b100'0000'110, // ARMv8.4-TLBI
ALLE2OS = 0b100'0001'000, // ARMv8.4-TLBI
@@ -214,11 +266,11 @@ enum class TlbiOp {
ALLE1IS = 0b100'0011'100,
VALE2IS = 0b100'0011'101,
VMALLS12E1IS = 0b100'0011'110,
- IPAS2E1OS = 0b100'0100'000, // ARMv8.4-TLBI
+ IPAS2E1OS = 0b100'0100'000, // ARMv8.4-TLBI
IPAS2E1 = 0b100'0100'001,
- RIPAS2E1 = 0b100'0100'010, // ARMv8.4-TLBI
- RIPAS2E1OS = 0b100'0100'011, // ARMv8.4-TLBI
- IPAS2LE1OS = 0b100'0100'100, // ARMv8.4-TLBI
+ RIPAS2E1 = 0b100'0100'010, // ARMv8.4-TLBI
+ RIPAS2E1OS = 0b100'0100'011, // ARMv8.4-TLBI
+ IPAS2LE1OS = 0b100'0100'100, // ARMv8.4-TLBI
IPAS2LE1 = 0b100'0100'101,
RIPAS2LE1 = 0b100'0100'110, // ARMv8.4-TLBI
RIPAS2LE1OS = 0b100'0100'111, // ARMv8.4-TLBI
diff --git a/include/oaknut/impl/imm.hpp b/include/oaknut/impl/imm.hpp
index cc90832c..7cde26fe 100644
--- a/include/oaknut/impl/imm.hpp
+++ b/include/oaknut/impl/imm.hpp
@@ -60,9 +60,9 @@ public:
constexpr /* implicit */ AddSubImm(std::uint64_t value_)
{
if ((value_ & 0xFFF) == value_) {
- m_encoded = value_;
+ m_encoded = static_cast<std::uint32_t>(value_);
} else if ((value_ & 0xFFF000) == value_) {
- m_encoded = (value_ >> 12) | (1 << 12);
+ m_encoded = static_cast<std::uint32_t>((value_ >> 12) | (1 << 12));
} else {
throw OaknutException{ExceptionType::InvalidAddSubImm};
}
@@ -126,18 +126,18 @@ constexpr std::optional<std::uint32_t> encode_bit_imm(std::uint64_t value)
if (value == 0 || (~value) == 0)
return std::nullopt;
- const std::size_t rotation = std::countr_zero(value & (value + 1));
+ const int rotation = std::countr_zero(value & (value + 1));
const std::uint64_t rot_value = std::rotr(value, rotation);
- const std::size_t esize = std::countr_zero(rot_value & (rot_value + 1));
- const std::size_t ones = std::countr_one(rot_value);
+ const int esize = std::countr_zero(rot_value & (rot_value + 1));
+ const int ones = std::countr_one(rot_value);
if (std::rotr(value, esize) != value)
return std::nullopt;
- const std::uint32_t S = ((-esize) << 1) | (ones - 1);
- const std::uint32_t R = (esize - rotation) & (esize - 1);
- const std::uint32_t N = (~S >> 6) & 1;
+ const int S = ((-esize) << 1) | (ones - 1);
+ const int R = (esize - rotation) & (esize - 1);
+ const int N = (~S >> 6) & 1;
return static_cast<std::uint32_t>((S & 0b111111) | (R << 6) | (N << 12));
}
diff --git a/include/oaknut/impl/mnemonics_generic_v8.0.inc.hpp b/include/oaknut/impl/mnemonics_generic_v8.0.inc.hpp
index 4f5ca8f0..09e8665f 100644
--- a/include/oaknut/impl/mnemonics_generic_v8.0.inc.hpp
+++ b/include/oaknut/impl/mnemonics_generic_v8.0.inc.hpp
@@ -167,13 +167,13 @@ void BFI(WReg wd, WReg wn, Imm<5> lsb, Imm<5> width)
{
if (width.value() == 0 || width.value() > (32 - lsb.value()))
throw OaknutException{ExceptionType::InvalidBitWidth};
- emit<"0011001100rrrrrrssssssnnnnnddddd", "d", "n", "r", "s">(wd, wn, (-lsb.value()) & 31, width.value() - 1);
+ emit<"0011001100rrrrrrssssssnnnnnddddd", "d", "n", "r", "s">(wd, wn, (~lsb.value() + 1) & 31, width.value() - 1);
}
void BFI(XReg xd, XReg xn, Imm<6> lsb, Imm<6> width)
{
if (width.value() == 0 || width.value() > (64 - lsb.value()))
throw OaknutException{ExceptionType::InvalidBitWidth};
- emit<"1011001101rrrrrrssssssnnnnnddddd", "d", "n", "r", "s">(xd, xn, (-lsb.value()) & 63, width.value() - 1);
+ emit<"1011001101rrrrrrssssssnnnnnddddd", "d", "n", "r", "s">(xd, xn, (~lsb.value() + 1) & 63, width.value() - 1);
}
void BFM(WReg wd, WReg wn, Imm<5> immr, Imm<5> imms)
{
@@ -1231,13 +1231,13 @@ void SBFIZ(WReg wd, WReg wn, Imm<5> lsb, Imm<5> width)
{
if (width.value() == 0 || width.value() > (32 - lsb.value()))
throw OaknutException{ExceptionType::InvalidBitWidth};
- emit<"0001001100rrrrrrssssssnnnnnddddd", "d", "n", "r", "s">(wd, wn, (-lsb.value()) & 31, width.value() - 1);
+ emit<"0001001100rrrrrrssssssnnnnnddddd", "d", "n", "r", "s">(wd, wn, (~lsb.value() + 1) & 31, width.value() - 1);
}
void SBFIZ(XReg xd, XReg xn, Imm<6> lsb, Imm<6> width)
{
if (width.value() == 0 || width.value() > (64 - lsb.value()))
throw OaknutException{ExceptionType::InvalidBitWidth};
- emit<"1001001101rrrrrrssssssnnnnnddddd", "d", "n", "r", "s">(xd, xn, (-lsb.value()) & 63, width.value() - 1);
+ emit<"1001001101rrrrrrssssssnnnnnddddd", "d", "n", "r", "s">(xd, xn, (~lsb.value() + 1) & 63, width.value() - 1);
}
void SBFM(WReg wd, WReg wn, Imm<5> immr, Imm<5> imms)
{
@@ -1627,13 +1627,13 @@ void UBFIZ(WReg wd, WReg wn, Imm<5> lsb, Imm<5> width)
{
if (width.value() == 0 || width.value() > (32 - lsb.value()))
throw OaknutException{ExceptionType::InvalidBitWidth};
- emit<"0101001100rrrrrrssssssnnnnnddddd", "d", "n", "r", "s">(wd, wn, (-lsb.value()) & 31, width.value() - 1);
+ emit<"0101001100rrrrrrssssssnnnnnddddd", "d", "n", "r", "s">(wd, wn, (~lsb.value() + 1) & 31, width.value() - 1);
}
void UBFIZ(XReg xd, XReg xn, Imm<6> lsb, Imm<6> width)
{
if (width.value() == 0 || width.value() > (64 - lsb.value()))
throw OaknutException{ExceptionType::InvalidBitWidth};
- emit<"1101001101rrrrrrssssssnnnnnddddd", "d", "n", "r", "s">(xd, xn, (-lsb.value()) & 63, width.value() - 1);
+ emit<"1101001101rrrrrrssssssnnnnnddddd", "d", "n", "r", "s">(xd, xn, (~lsb.value() + 1) & 63, width.value() - 1);
}
void UBFM(WReg wd, WReg wn, Imm<5> immr, Imm<5> imms)
{
diff --git a/include/oaknut/impl/mnemonics_generic_v8.2.inc.hpp b/include/oaknut/impl/mnemonics_generic_v8.2.inc.hpp
index a5bc5b82..0dffd0e3 100644
--- a/include/oaknut/impl/mnemonics_generic_v8.2.inc.hpp
+++ b/include/oaknut/impl/mnemonics_generic_v8.2.inc.hpp
@@ -5,13 +5,13 @@ void BFC(WReg wd, Imm<5> lsb, Imm<5> width)
{
if (width.value() == 0 || width.value() > (32 - lsb.value()))
throw OaknutException{ExceptionType::InvalidBitWidth};
- emit<"0011001100rrrrrrssssss11111ddddd", "d", "r", "s">(wd, (-lsb.value()) & 31, width.value() - 1);
+ emit<"0011001100rrrrrrssssss11111ddddd", "d", "r", "s">(wd, (~lsb.value() + 1) & 31, width.value() - 1);
}
void BFC(XReg xd, Imm<6> lsb, Imm<6> width)
{
if (width.value() == 0 || width.value() > (64 - lsb.value()))
throw OaknutException{ExceptionType::InvalidBitWidth};
- emit<"1011001101rrrrrrssssss11111ddddd", "d", "r", "s">(xd, (-lsb.value()) & 63, width.value() - 1);
+ emit<"1011001101rrrrrrssssss11111ddddd", "d", "r", "s">(xd, (~lsb.value() + 1) & 63, width.value() - 1);
}
void ESB()
{
diff --git a/include/oaknut/impl/oaknut_exception.inc.hpp b/include/oaknut/impl/oaknut_exception.inc.hpp
index 07402362..fc2738f0 100644
--- a/include/oaknut/impl/oaknut_exception.inc.hpp
+++ b/include/oaknut/impl/oaknut_exception.inc.hpp
@@ -29,6 +29,7 @@ OAKNUT_EXCEPTION(ImmOutOfRange, "outsized Imm value")
OAKNUT_EXCEPTION(InvalidAddSubExt, "invalid AddSubExt choice for rm size")
OAKNUT_EXCEPTION(InvalidIndexExt, "invalid IndexExt choice for rm size")
OAKNUT_EXCEPTION(BitPositionOutOfRange, "bit position exceeds size of rt")
+OAKNUT_EXCEPTION(RequiresAbsoluteAddressesContext, "absolute addresses required")
// mnemonics_*.inc.hpp
OAKNUT_EXCEPTION(InvalidCombination, "InvalidCombination")
diff --git a/include/oaknut/impl/offset.hpp b/include/oaknut/impl/offset.hpp
index 47859c78..a70941ff 100644
--- a/include/oaknut/impl/offset.hpp
+++ b/include/oaknut/impl/offset.hpp
@@ -45,7 +45,7 @@ struct AddrOffset {
: m_payload(&label)
{}
- AddrOffset(void* ptr)
+ AddrOffset(const void* ptr)
: m_payload(ptr)
{}
@@ -63,7 +63,7 @@ struct AddrOffset {
private:
template<typename Policy>
friend class BasicCodeGenerator;
- std::variant<std::uint32_t, Label*, void*> m_payload;
+ std::variant<std::uint32_t, Label*, const void*> m_payload;
};
template<std::size_t bitsize, std::size_t shift_amount>
@@ -78,13 +78,19 @@ struct PageOffset {
static std::uint32_t encode(std::uintptr_t current_addr, std::uintptr_t target)
{
- std::uint64_t diff = (static_cast<std::uint64_t>(target) >> shift_amount) - (static_cast<std::uint64_t>(current_addr) >> shift_amount);
+ std::uint64_t diff = static_cast<std::uint64_t>((static_cast<std::int64_t>(target) >> shift_amount) - (static_cast<std::int64_t>(current_addr) >> shift_amount));
if (detail::sign_extend<bitsize>(diff) != diff)
throw OaknutException{ExceptionType::OffsetOutOfRange};
diff &= detail::mask_from_size(bitsize);
return static_cast<std::uint32_t>(((diff & 3) << (bitsize - 2)) | (diff >> 2));
}
+ static bool valid(std::uintptr_t current_addr, std::uintptr_t target)
+ {
+ std::uint64_t diff = static_cast<std::uint64_t>((static_cast<std::int64_t>(target) >> shift_amount) - (static_cast<std::int64_t>(current_addr) >> shift_amount));
+ return detail::sign_extend<bitsize>(diff) == diff;
+ }
+
private:
template<typename Policy>
friend class BasicCodeGenerator;
diff --git a/include/oaknut/impl/overloaded.hpp b/include/oaknut/impl/overloaded.hpp
new file mode 100644
index 00000000..b15b8392
--- /dev/null
+++ b/include/oaknut/impl/overloaded.hpp
@@ -0,0 +1,16 @@
+// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime <https://mary.rs>
+// SPDX-License-Identifier: MIT
+
+#pragma once
+
+namespace oaknut::detail {
+
+template<class... Ts>
+struct overloaded : Ts... {
+ using Ts::operator()...;
+};
+
+template<class... Ts>
+overloaded(Ts...) -> overloaded<Ts...>;
+
+} // namespace oaknut::detail
diff --git a/include/oaknut/impl/reg.hpp b/include/oaknut/impl/reg.hpp
index eab02d84..649e67b3 100644
--- a/include/oaknut/impl/reg.hpp
+++ b/include/oaknut/impl/reg.hpp
@@ -52,8 +52,8 @@ struct DElem;
struct Reg {
constexpr explicit Reg(bool is_vector_, unsigned bitsize_, int index_)
- : m_index(index_)
- , m_bitsize(bitsize_)
+ : m_index(static_cast<std::int8_t>(index_))
+ , m_bitsize(static_cast<std::uint8_t>(bitsize_))
, m_is_vector(is_vector_)
{
assert(index_ >= -1 && index_ <= 31);
@@ -65,8 +65,8 @@ struct Reg {
constexpr bool is_vector() const { return m_is_vector; }
private:
- int m_index : 8;
- unsigned m_bitsize : 8;
+ std::int8_t m_index;
+ std::uint8_t m_bitsize;
bool m_is_vector;
};
@@ -190,7 +190,7 @@ struct VReg : public Reg {
struct VRegArranged : public Reg {
protected:
constexpr explicit VRegArranged(unsigned bitsize_, int index_, unsigned esize_)
- : Reg(true, bitsize_, index_), m_esize(esize_)
+ : Reg(true, bitsize_, index_), m_esize(static_cast<std::uint8_t>(esize_))
{
assert(esize_ != 0 && (esize_ & (esize_ - 1)) == 0 && "esize must be a power of two");
assert(esize_ <= bitsize_);
@@ -200,7 +200,7 @@ protected:
friend class BasicCodeGenerator;
private:
- int m_esize : 8;
+ std::uint8_t m_esize;
};
struct VReg_2H : public VRegArranged {
diff --git a/include/oaknut/impl/string_literal.hpp b/include/oaknut/impl/string_literal.hpp
index e09dfa65..412203e9 100644
--- a/include/oaknut/impl/string_literal.hpp
+++ b/include/oaknut/impl/string_literal.hpp
@@ -21,4 +21,22 @@ struct StringLiteral {
char value[N];
};
+namespace detail {
+
+template<StringLiteral<33> haystack, StringLiteral needles>
+consteval std::uint32_t find()
+{
+ std::uint32_t result = 0;
+ for (std::size_t i = 0; i < 32; i++) {
+ for (std::size_t a = 0; a < needles.strlen; a++) {
+ if (haystack.value[i] == needles.value[a]) {
+ result |= 1 << (31 - i);
+ }
+ }
+ }
+ return result;
+}
+
+} // namespace detail
+
} // namespace oaknut
diff --git a/include/oaknut/oaknut.hpp b/include/oaknut/oaknut.hpp
index b67f7fdd..aa80f81b 100644
--- a/include/oaknut/oaknut.hpp
+++ b/include/oaknut/oaknut.hpp
@@ -17,84 +17,69 @@
#include "oaknut/impl/list.hpp"
#include "oaknut/impl/multi_typed_name.hpp"
#include "oaknut/impl/offset.hpp"
+#include "oaknut/impl/overloaded.hpp"
#include "oaknut/impl/reg.hpp"
#include "oaknut/impl/string_literal.hpp"
#include "oaknut/oaknut_exception.hpp"
namespace oaknut {
-namespace detail {
-
-template<StringLiteral bs, StringLiteral barg>
-constexpr std::uint32_t get_bits()
-{
- std::uint32_t result = 0;
- for (std::size_t i = 0; i < 32; i++) {
- for (std::size_t a = 0; a < barg.strlen; a++) {
- if (bs.value[i] == barg.value[a]) {
- result |= 1 << (31 - i);
- }
- }
- }
- return result;
-}
-
-template<class... Ts>
-struct overloaded : Ts... {
- using Ts::operator()...;
-};
-
-template<class... Ts>
-overloaded(Ts...) -> overloaded<Ts...>;
-
-} // namespace detail
-
struct Label {
public:
Label() = default;
+ bool is_bound() const
+ {
+ return m_offset.has_value();
+ }
+
+ std::ptrdiff_t offset() const
+ {
+ return m_offset.value();
+ }
+
private:
template<typename Policy>
friend class BasicCodeGenerator;
- explicit Label(std::uintptr_t addr)
- : m_addr(addr)
+ explicit Label(std::ptrdiff_t offset)
+ : m_offset(offset)
{}
- using EmitFunctionType = std::uint32_t (*)(std::uintptr_t wb_addr, std::uintptr_t resolved_addr);
+ using EmitFunctionType = std::uint32_t (*)(std::ptrdiff_t wb_offset, std::ptrdiff_t resolved_offset);
struct Writeback {
- std::uintptr_t m_wb_addr;
+ std::ptrdiff_t m_wb_offset;
std::uint32_t m_mask;
EmitFunctionType m_fn;
};
- std::optional<std::uintptr_t> m_addr;
+ std::optional<std::ptrdiff_t> m_offset;
std::vector<Writeback> m_wbs;
};
template<typename Policy>
class BasicCodeGenerator : public Policy {
public:
- BasicCodeGenerator(typename Policy::constructor_argument_type arg)
- : Policy(arg)
+ BasicCodeGenerator(typename Policy::constructor_argument_type arg, std::uint32_t* xmem)
+ : Policy(arg, xmem)
{}
- Label l()
+ Label l() const
{
- return Label{Policy::current_address()};
+ return Label{Policy::offset()};
}
- void l(Label& label)
+ void l(Label& label) const
{
- if (label.m_addr)
+ if (label.is_bound())
throw OaknutException{ExceptionType::LabelRedefinition};
- const auto target_addr = Policy::current_address();
- label.m_addr = target_addr;
+ const auto target_offset = Policy::offset();
+ label.m_offset = target_offset;
for (auto& wb : label.m_wbs) {
- const std::uint32_t value = wb.m_fn(wb.m_wb_addr, target_addr);
- Policy::set_at_address(wb.m_wb_addr, value, wb.m_mask);
+ const std::uint32_t value = wb.m_fn(wb.m_wb_offset, target_offset);
+ Policy::set_at_offset(wb.m_wb_offset, value, wb.m_mask);
}
label.m_wbs.clear();
}
@@ -123,8 +108,8 @@ public:
return;
if (MovImm16::is_valid(imm))
return MOVZ(wd, imm);
- if (MovImm16::is_valid(~imm))
- return MOVN(wd, ~imm);
+ if (MovImm16::is_valid(static_cast<std::uint32_t>(~imm)))
+ return MOVN(wd, static_cast<std::uint32_t>(~imm));
if (detail::encode_bit_imm(imm))
return ORR(wd, WzrReg{}, imm);
@@ -173,10 +158,10 @@ public:
// Convenience function for moving pointers to registers
void MOVP2R(XReg xd, const void* addr)
{
- int64_t diff = reinterpret_cast<uint64_t>(addr) - Policy::current_address();
+ const int64_t diff = reinterpret_cast<std::uint64_t>(addr) - Policy::template xptr<std::uintptr_t>();
if (diff >= -0xF'FFFF && diff <= 0xF'FFFF) {
ADR(xd, addr);
- } else if (diff >= -int64_t{0xFFFF'FFFF} && diff <= int64_t{0xFFFF'FFFF}) {
+ } else if (PageOffset<21, 12>::valid(Policy::template xptr<std::uintptr_t>(), reinterpret_cast<std::uintptr_t>(addr))) {
ADRL(xd, addr);
} else {
MOV(xd, reinterpret_cast<uint64_t>(addr));
@@ -188,7 +173,7 @@ public:
if (alignment < 4 || (alignment & (alignment - 1)) != 0)
throw OaknutException{ExceptionType::InvalidAlignment};
- while (Policy::template ptr<std::uintptr_t>() & (alignment - 1)) {
+ while (Policy::offset() & (alignment - 1)) {
NOP();
}
}
@@ -210,85 +195,55 @@ private:
template<StringLiteral bs, StringLiteral... bargs, typename... Ts>
void emit(Ts... args)
{
- std::uint32_t encoding = detail::get_bits<bs, "1">();
- encoding |= (0 | ... | encode<detail::get_bits<bs, bargs>()>(std::forward<Ts>(args)));
+ constexpr std::uint32_t base = detail::find<bs, "1">();
+ std::uint32_t encoding = (base | ... | encode<detail::find<bs, bargs>()>(std::forward<Ts>(args)));
Policy::append(encoding);
}
+};
- template<std::uint32_t splat, std::size_t size, std::size_t align>
- std::uint32_t encode(AddrOffset<size, align> v)
+struct PointerCodeGeneratorPolicy {
+public:
+ std::ptrdiff_t offset() const
{
- static_assert(std::popcount(splat) == size - align);
-
- const auto encode_fn = [](std::uintptr_t current_addr, std::uintptr_t target) {
- const std::ptrdiff_t diff = target - current_addr;
- return pdep<splat>(AddrOffset<size, align>::encode(diff));
- };
-
- return std::visit(detail::overloaded{
- [&](std::uint32_t encoding) {
- return pdep<splat>(encoding);
- },
- [&](Label* label) {
- if (label->m_addr) {
- return encode_fn(Policy::current_address(), *label->m_addr);
- }
-
- label->m_wbs.emplace_back(Label::Writeback{Policy::current_address(), ~splat, static_cast<Label::EmitFunctionType>(encode_fn)});
- return 0u;
- },
- [&](void* p) {
- return encode_fn(Policy::current_address(), reinterpret_cast<std::uintptr_t>(p));
- },
- },
- v.m_payload);
+ return (m_ptr - m_wmem) * sizeof(std::uint32_t);
}
- template<std::uint32_t splat, std::size_t size, std::size_t shift_amount>
- std::uint32_t encode(PageOffset<size, shift_amount> v)
+ void set_offset(std::ptrdiff_t offset)
{
- static_assert(std::popcount(splat) == size);
-
- const auto encode_fn = [](std::uintptr_t current_addr, std::uintptr_t target) {
- return pdep<splat>(PageOffset<size, shift_amount>::encode(current_addr, target));
- };
-
- return std::visit(detail::overloaded{
- [&](Label* label) {
- if (label->m_addr) {
- return encode_fn(Policy::current_address(), *label->m_addr);
- }
-
- label->m_wbs.emplace_back(Label::Writeback{Policy::current_address(), ~splat, static_cast<Label::EmitFunctionType>(encode_fn)});
- return 0u;
- },
- [&](const void* p) {
- return encode_fn(Policy::current_address(), reinterpret_cast<std::uintptr_t>(p));
- },
- },
- v.m_payload);
+ if ((offset % sizeof(std::uint32_t)) != 0)
+ throw OaknutException{ExceptionType::InvalidAlignment};
+ m_ptr = m_wmem + offset / sizeof(std::uint32_t);
}
-};
-struct PointerCodeGeneratorPolicy {
-public:
template<typename T>
- T ptr()
+ T wptr() const
{
static_assert(std::is_pointer_v<T> || std::is_same_v<T, std::uintptr_t> || std::is_same_v<T, std::intptr_t>);
return reinterpret_cast<T>(m_ptr);
}
- void set_ptr(std::uint32_t* ptr_)
+ template<typename T>
+ T xptr() const
+ {
+ static_assert(std::is_pointer_v<T> || std::is_same_v<T, std::uintptr_t> || std::is_same_v<T, std::intptr_t>);
+ return reinterpret_cast<T>(m_xmem + (m_ptr - m_wmem));
+ }
+
+ void set_wptr(std::uint32_t* p)
{
- m_ptr = ptr_;
+ m_ptr = p;
+ }
+
+ void set_xptr(std::uint32_t* p)
+ {
+ m_ptr = m_wmem + (p - m_xmem);
}
protected:
using constructor_argument_type = std::uint32_t*;
- PointerCodeGeneratorPolicy(std::uint32_t* ptr_)
- : m_ptr(ptr_)
+ PointerCodeGeneratorPolicy(std::uint32_t* wmem, std::uint32_t* xmem)
+ : m_ptr(wmem), m_wmem(wmem), m_xmem(xmem)
{}
void append(std::uint32_t instruction)
@@ -296,22 +251,57 @@ protected:
*m_ptr++ = instruction;
}
- std::uintptr_t current_address()
+ void set_at_offset(std::ptrdiff_t offset, std::uint32_t value, std::uint32_t mask) const
{
- return reinterpret_cast<std::uintptr_t>(m_ptr);
+ std::uint32_t* p = m_wmem + offset / sizeof(std::uint32_t);
+ *p = (*p & mask) | value;
}
- void set_at_address(std::uintptr_t addr, std::uint32_t value, std::uint32_t mask)
+private:
+ std::uint32_t* m_ptr;
+ std::uint32_t* const m_wmem;
+ std::uint32_t* const m_xmem;
+};
+
+struct VectorCodeGeneratorPolicy {
+public:
+ std::ptrdiff_t offset() const
{
- std::uint32_t* p = reinterpret_cast<std::uint32_t*>(addr);
- *p = (*p & mask) | value;
+ return m_vec.size() * sizeof(std::uint32_t);
+ }
+
+ template<typename T>
+ T xptr() const
+ {
+ static_assert(std::is_pointer_v<T> || std::is_same_v<T, std::uintptr_t> || std::is_same_v<T, std::intptr_t>);
+ return reinterpret_cast<T>(m_xmem + m_vec.size());
+ }
+
+protected:
+ using constructor_argument_type = std::vector<std::uint32_t>&;
+
+ VectorCodeGeneratorPolicy(std::vector<std::uint32_t>& vec, std::uint32_t* xmem)
+ : m_vec(vec), m_xmem(xmem)
+ {}
+
+ void append(std::uint32_t instruction)
+ {
+ m_vec.push_back(instruction);
+ }
+
+ void set_at_offset(std::ptrdiff_t offset, std::uint32_t value, std::uint32_t mask) const
+ {
+ std::uint32_t& p = m_vec[offset / sizeof(std::uint32_t)];
+ p = (p & mask) | value;
}
private:
- std::uint32_t* m_ptr;
+ std::vector<std::uint32_t>& m_vec;
+ std::uint32_t* const m_xmem;
};
using CodeGenerator = BasicCodeGenerator<PointerCodeGeneratorPolicy>;
+using VectorCodeGenerator = BasicCodeGenerator<VectorCodeGeneratorPolicy>;
namespace util {