From 7ac67a89e9ac08d743242627cacefda518cefd68 Mon Sep 17 00:00:00 2001 From: Andrzej Janik Date: Tue, 10 Dec 2024 21:48:10 +0100 Subject: Enable Geekbench 5 (#304) --- ptx/lib/zluda_ptx_impl.cpp | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) (limited to 'ptx/lib/zluda_ptx_impl.cpp') diff --git a/ptx/lib/zluda_ptx_impl.cpp b/ptx/lib/zluda_ptx_impl.cpp index f86a7fd..7af9729 100644 --- a/ptx/lib/zluda_ptx_impl.cpp +++ b/ptx/lib/zluda_ptx_impl.cpp @@ -1,8 +1,10 @@ -// Every time this file changes it must te rebuilt, you need `rocm-llvm-dev` and `llvm-17`: -// /opt/rocm/llvm/bin/clang -Wall -Wextra -Wsign-compare -Wconversion -x hip zluda_ptx_impl.cpp -nogpulib -O3 -mno-wavefrontsize64 -o zluda_ptx_impl.bc -emit-llvm -c --offload-device-only --offload-arch=gfx1010 && /opt/rocm/llvm/bin/llvm-dis zluda_ptx_impl.bc -o - | sed '/@llvm.used/d' | sed '/wchar_size/d' | sed '/llvm.module.flags/d' | sed 's/define hidden/define linkonce_odr/g' | sed 's/\"target-cpu\"=\"gfx1010\"//g' | sed -E 's/\"target-features\"=\"[^\"]+\"//g' | sed 's/ nneg / /g' | sed 's/ disjoint / /g' | llvm-as-17 - -o zluda_ptx_impl.bc && /opt/rocm/llvm/bin/llvm-dis zluda_ptx_impl.bc +// Every time this file changes it must te rebuilt, you need `rocm-llvm-dev` and `llvm-17` +// `fdenormal-fp-math=dynamic` is required to make functions eligible for inlining +// /opt/rocm/llvm/bin/clang -Xclang -fdenormal-fp-math=dynamic -Wall -Wextra -Wsign-compare -Wconversion -x hip zluda_ptx_impl.cpp -nogpulib -O3 -mno-wavefrontsize64 -o zluda_ptx_impl.bc -emit-llvm -c --offload-device-only --offload-arch=gfx1010 && /opt/rocm/llvm/bin/llvm-dis zluda_ptx_impl.bc -o - | sed '/@llvm.used/d' | sed '/wchar_size/d' | sed '/llvm.module.flags/d' | sed 's/define hidden/define linkonce_odr/g' | sed 's/\"target-cpu\"=\"gfx1010\"//g' | sed -E 's/\"target-features\"=\"[^\"]+\"//g' | sed 's/ nneg / /g' | sed 's/ disjoint / /g' | llvm-as-17 - -o zluda_ptx_impl.bc && /opt/rocm/llvm/bin/llvm-dis zluda_ptx_impl.bc #include #include +#include #define FUNC(NAME) __device__ __attribute__((retain)) __zluda_ptx_impl_##NAME @@ -37,7 +39,7 @@ extern "C" return (uint32_t)__ockl_get_num_groups(member); } - uint32_t __ockl_bfe_u32(uint32_t, uint32_t, uint32_t) __attribute__((device)); + uint32_t __ockl_bfe_u32(uint32_t, uint32_t, uint32_t) __device__; uint32_t FUNC(bfe_u32)(uint32_t base, uint32_t pos_32, uint32_t len_32) { uint32_t pos = pos_32 & 0xFFU; @@ -65,7 +67,7 @@ extern "C" return (base >> pos) & ((1UL << len) - 1UL); } - int32_t __ockl_bfe_i32(int32_t, uint32_t, uint32_t) __attribute__((device)); + int32_t __ockl_bfe_i32(int32_t, uint32_t, uint32_t) __device__; int32_t FUNC(bfe_s32)(int32_t base, uint32_t pos_32, uint32_t len_32) { uint32_t pos = pos_32 & 0xFFU; @@ -120,7 +122,7 @@ extern "C" return (base << (64U - pos - len)) >> (64U - len); } - uint32_t __ockl_bfm_u32(uint32_t count, uint32_t offset) __attribute__((device)); + uint32_t __ockl_bfm_u32(uint32_t count, uint32_t offset) __device__; uint32_t FUNC(bfi_b32)(uint32_t insert, uint32_t base, uint32_t pos_32, uint32_t len_32) { uint32_t pos = pos_32 & 0xFFU; @@ -148,4 +150,20 @@ extern "C" mask = ((1UL << len) - 1UL) << (pos); return (~mask & base) | (mask & (insert << pos)); } + + void FUNC(bar_sync)(uint32_t) + { + __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "workgroup"); + __builtin_amdgcn_s_barrier(); + } + + void FUNC(__assertfail)(uint64_t message, + uint64_t file, + uint32_t line, + uint64_t function, + uint64_t char_size) + { + (void)char_size; + __assert_fail((const char *)message, (const char *)file, line, (const char *)function); + } } -- cgit v1.2.3