From 468cffb0231f7ca5bb3252f35db8a3d4e24df6ab Mon Sep 17 00:00:00 2001 From: Andrzej Janik Date: Wed, 3 Apr 2024 01:46:10 +0200 Subject: Implement dark api alloc chain --- zluda/src/impl/dark_api.rs | 71 ++++++++++++++++++++++++++++++++++++++++++---- zluda/src/impl/mod.rs | 8 +++--- zluda/tests/dark_api.rs | 8 +++--- zluda_dark_api/src/lib.rs | 6 ++-- zluda_dump/src/dark_api.rs | 19 +++++++++++++ 5 files changed, 97 insertions(+), 15 deletions(-) diff --git a/zluda/src/impl/dark_api.rs b/zluda/src/impl/dark_api.rs index e7eb4b1..8fe4cf3 100644 --- a/zluda/src/impl/dark_api.rs +++ b/zluda/src/impl/dark_api.rs @@ -1,8 +1,8 @@ -use super::module; use super::{ context::{self, LocalStorageValue}, device, FromCuda, IntoCuda, LiveCheck, }; +use super::{module, GLOBAL_STATE}; use crate::r#impl::{dark_api, stream}; use cuda_types::*; use hip_common::zluda_ext::CudaResult; @@ -192,14 +192,75 @@ impl CudaDarkApi for CudaDarkApiZluda { destructor: Option, value: usize, ) -> CUresult { - super::unimplemented() + unsafe fn heap_alloc_impl( + destructor: Option, + value: usize, + ) -> Result<*mut zluda_dark_api::HeapAllocRecord, CUresult> { + let state = GLOBAL_STATE.get()?; + let entry = Box::into_raw(Box::new(zluda_dark_api::HeapAllocRecord { + destructor, + value, + prev_alloc: ptr::null_mut(), + next_alloc: ptr::null_mut(), + })); + { + let mut dark_api_heap = state + .dark_api_alloc + .lock() + .map_err(|_| CUresult::CUDA_ERROR_UNKNOWN)?; + let prev_entry = mem::replace(&mut *dark_api_heap, entry); + if prev_entry != ptr::null_mut() { + (&mut *prev_entry).prev_alloc = entry; + (&mut *entry).next_alloc = prev_entry; + } + } + Ok(entry) + } + match heap_alloc_impl(destructor, value) { + Ok(result) => { + *alloc_ptr = result; + CUresult::CUDA_SUCCESS + } + Err(err) => err, + } } unsafe extern "system" fn heap_free( - alloc_ptr: *mut zluda_dark_api::HeapAllocRecord, - value: *mut usize, + alloc_handle: *mut zluda_dark_api::HeapAllocRecord, + value_ptr: *mut usize, ) -> CUresult { - super::unimplemented() + unsafe fn heap_free_impl( + alloc_handle: *mut zluda_dark_api::HeapAllocRecord, + ) -> Result { + let state = GLOBAL_STATE.get()?; + { + let mut dark_api_heap = state + .dark_api_alloc + .lock() + .map_err(|_| CUresult::CUDA_ERROR_UNKNOWN)?; + if alloc_handle == *dark_api_heap { + *dark_api_heap = (&*alloc_handle).next_alloc; + } + if (&*alloc_handle).next_alloc != ptr::null_mut() { + (&mut *(&mut *alloc_handle).next_alloc).prev_alloc = + (&*alloc_handle).prev_alloc; + } + if (&*alloc_handle).prev_alloc != ptr::null_mut() { + (&mut *(&mut *alloc_handle).prev_alloc).next_alloc = + (&*alloc_handle).next_alloc; + } + } + let value = (&*alloc_handle).value; + drop(Box::from_raw(alloc_handle)); + Ok(value) + } + match heap_free_impl(alloc_handle) { + Ok(value) => { + *value_ptr = value; + CUresult::CUDA_SUCCESS + } + Err(err) => err, + } } unsafe extern "system" fn device_get_attribute_ex( diff --git a/zluda/src/impl/mod.rs b/zluda/src/impl/mod.rs index 34566af..42fbd53 100644 --- a/zluda/src/impl/mod.rs +++ b/zluda/src/impl/mod.rs @@ -9,7 +9,7 @@ use std::{ fs, mem::{self, ManuallyDrop, MaybeUninit}, ptr::{self, NonNull}, - sync::{atomic::AtomicI32, Once}, + sync::{atomic::AtomicI32, Mutex, Once}, }; use self::cache::KernelCache; @@ -78,11 +78,11 @@ static GLOBAL_STATE: Lazy = Lazy::INIT; pub(crate) struct GlobalState { pub(crate) devices: Vec, - _dark_api_heap: *mut c_void, pub(crate) kernel_cache: Option, pub(crate) comgr: Comgr, pub(crate) comgr_version: String, pub(crate) zero_buffers: bool, + pub(crate) dark_api_alloc: Mutex<*mut zluda_dark_api::HeapAllocRecord>, } assert_impl_one!(GlobalState: Sync); @@ -148,7 +148,7 @@ impl LiveCheck { outer_ptr as *mut Self } - pub unsafe fn as_ref_unchecked(&self) -> & T { + pub unsafe fn as_ref_unchecked(&self) -> &T { &self.data } @@ -462,7 +462,7 @@ pub(crate) fn init(flags: u32) -> Result<(), CUresult> { GLOBAL_STATE.init(|| GlobalState { devices, kernel_cache, - _dark_api_heap: global_heap, + dark_api_alloc: Mutex::new(ptr::null_mut()), comgr, comgr_version, zero_buffers, diff --git a/zluda/tests/dark_api.rs b/zluda/tests/dark_api.rs index 3e5b3e0..192167a 100644 --- a/zluda/tests/dark_api.rs +++ b/zluda/tests/dark_api.rs @@ -33,14 +33,14 @@ unsafe fn heap_alloc_chain(cuda: T) { assert_eq!((&*record1).destructor, None); assert_eq!((&*record1).value, 1); assert_eq!((&*record1).prev_alloc, record2); - assert_eq!((&*record1).next_alloc, ptr::null()); + assert_eq!((&*record1).next_alloc, ptr::null_mut()); assert_eq!((&*record2).destructor, None); assert_eq!((&*record2).value, 2); assert_eq!((&*record2).prev_alloc, record3); assert_eq!((&*record2).next_alloc, record1); assert_eq!((&*record3).destructor, None); assert_eq!((&*record3).value, 3); - assert_eq!((&*record3).prev_alloc, ptr::null()); + assert_eq!((&*record3).prev_alloc, ptr::null_mut()); assert_eq!((&*record3).next_alloc, record2); } @@ -82,10 +82,10 @@ unsafe fn heap_free(cuda: T) { ); assert_eq!((&*record1).value, 11); assert_eq!((&*record1).prev_alloc, record3); - assert_eq!((&*record1).next_alloc, ptr::null()); + assert_eq!((&*record1).next_alloc, ptr::null_mut()); assert_eq!((&*record3).destructor, None); assert_eq!((&*record3).value, 13); - assert_eq!((&*record3).prev_alloc, ptr::null()); + assert_eq!((&*record3).prev_alloc, ptr::null_mut()); assert_eq!((&*record3).next_alloc, record1); } diff --git a/zluda_dark_api/src/lib.rs b/zluda_dark_api/src/lib.rs index 31e0fea..0642f25 100644 --- a/zluda_dark_api/src/lib.rs +++ b/zluda_dark_api/src/lib.rs @@ -285,6 +285,7 @@ dark_api_table!( [0x19, 0x5B, 0xCB, 0xF4, 0xD6, 0x7D, 0x02, 0x4A, 0xAC, 0xC5, 0x1D, 0x29, 0xCE, 0xA6, 0x31, 0xAE] => HEAP_ACCESS [3] { 0 => SIZE_OF, + #[dump] 1 => heap_alloc( alloc_ptr: *mut *mut HeapAllocRecord, // destructor is called only on CUDA exit, on Windows @@ -292,6 +293,7 @@ dark_api_table!( destructor: Option, value: usize ) -> CUresult, + #[dump] 2 => heap_free(halloc: *mut HeapAllocRecord, value: *mut usize) -> CUresult }, // This fn table is used by OptiX @@ -493,8 +495,8 @@ pub struct HeapAllocRecord { pub value: usize, // The two fields below are mainatined by the driver, // they form a a doubly-linked list - pub prev_alloc: *const HeapAllocRecord, - pub next_alloc: *const HeapAllocRecord, + pub prev_alloc: *mut HeapAllocRecord, + pub next_alloc: *mut HeapAllocRecord, } #[derive(Clone, Copy)] diff --git a/zluda_dump/src/dark_api.rs b/zluda_dump/src/dark_api.rs index a619bca..7c9d0b8 100644 --- a/zluda_dump/src/dark_api.rs +++ b/zluda_dump/src/dark_api.rs @@ -912,6 +912,25 @@ impl CudaDarkApiDump for CudaDarkApiDumpFns { fn_logger.result = Some(original_result); original_result } + + unsafe fn heap_alloc_impl( + guid: &[u8; 16], + idx: usize, + alloc_ptr: *mut *mut zluda_dark_api::HeapAllocRecord, + destructor: Option, + value: usize, + ) -> CUresult { + todo!() + } + + unsafe fn heap_free_impl( + guid: &[u8; 16], + idx: usize, + halloc: *mut zluda_dark_api::HeapAllocRecord, + value: *mut usize, + ) -> CUresult { + todo!() + } } unsafe fn deref_not_null(ptr: *mut *mut T) -> *mut T { -- cgit v1.2.3