diff options
author | Andrzej Janik <[email protected]> | 2024-04-03 01:46:10 +0200 |
---|---|---|
committer | Andrzej Janik <[email protected]> | 2024-04-03 01:46:10 +0200 |
commit | 468cffb0231f7ca5bb3252f35db8a3d4e24df6ab (patch) | |
tree | db533157d2f35ed3b21bf7e00d4655dab43a382f | |
parent | 01cc959b453dec2b1d7a9e5ecad465669137989c (diff) | |
download | ZLUDA-468cffb0231f7ca5bb3252f35db8a3d4e24df6ab.tar.gz ZLUDA-468cffb0231f7ca5bb3252f35db8a3d4e24df6ab.zip |
Implement dark api alloc chainmalloc_private
-rw-r--r-- | zluda/src/impl/dark_api.rs | 71 | ||||
-rw-r--r-- | zluda/src/impl/mod.rs | 8 | ||||
-rw-r--r-- | zluda/tests/dark_api.rs | 8 | ||||
-rw-r--r-- | zluda_dark_api/src/lib.rs | 6 | ||||
-rw-r--r-- | zluda_dump/src/dark_api.rs | 19 |
5 files changed, 97 insertions, 15 deletions
diff --git a/zluda/src/impl/dark_api.rs b/zluda/src/impl/dark_api.rs index e7eb4b1..8fe4cf3 100644 --- a/zluda/src/impl/dark_api.rs +++ b/zluda/src/impl/dark_api.rs @@ -1,8 +1,8 @@ -use super::module; use super::{ context::{self, LocalStorageValue}, device, FromCuda, IntoCuda, LiveCheck, }; +use super::{module, GLOBAL_STATE}; use crate::r#impl::{dark_api, stream}; use cuda_types::*; use hip_common::zluda_ext::CudaResult; @@ -192,14 +192,75 @@ impl CudaDarkApi for CudaDarkApiZluda { destructor: Option<unsafe extern "system" fn(u32, usize)>, value: usize, ) -> CUresult { - super::unimplemented() + unsafe fn heap_alloc_impl( + destructor: Option<unsafe extern "system" fn(u32, usize)>, + value: usize, + ) -> Result<*mut zluda_dark_api::HeapAllocRecord, CUresult> { + let state = GLOBAL_STATE.get()?; + let entry = Box::into_raw(Box::new(zluda_dark_api::HeapAllocRecord { + destructor, + value, + prev_alloc: ptr::null_mut(), + next_alloc: ptr::null_mut(), + })); + { + let mut dark_api_heap = state + .dark_api_alloc + .lock() + .map_err(|_| CUresult::CUDA_ERROR_UNKNOWN)?; + let prev_entry = mem::replace(&mut *dark_api_heap, entry); + if prev_entry != ptr::null_mut() { + (&mut *prev_entry).prev_alloc = entry; + (&mut *entry).next_alloc = prev_entry; + } + } + Ok(entry) + } + match heap_alloc_impl(destructor, value) { + Ok(result) => { + *alloc_ptr = result; + CUresult::CUDA_SUCCESS + } + Err(err) => err, + } } unsafe extern "system" fn heap_free( - alloc_ptr: *mut zluda_dark_api::HeapAllocRecord, - value: *mut usize, + alloc_handle: *mut zluda_dark_api::HeapAllocRecord, + value_ptr: *mut usize, ) -> CUresult { - super::unimplemented() + unsafe fn heap_free_impl( + alloc_handle: *mut zluda_dark_api::HeapAllocRecord, + ) -> Result<usize, CUresult> { + let state = GLOBAL_STATE.get()?; + { + let mut dark_api_heap = state + .dark_api_alloc + .lock() + .map_err(|_| CUresult::CUDA_ERROR_UNKNOWN)?; + if alloc_handle == *dark_api_heap { + *dark_api_heap = (&*alloc_handle).next_alloc; + } + if (&*alloc_handle).next_alloc != ptr::null_mut() { + (&mut *(&mut *alloc_handle).next_alloc).prev_alloc = + (&*alloc_handle).prev_alloc; + } + if (&*alloc_handle).prev_alloc != ptr::null_mut() { + (&mut *(&mut *alloc_handle).prev_alloc).next_alloc = + (&*alloc_handle).next_alloc; + } + } + let value = (&*alloc_handle).value; + drop(Box::from_raw(alloc_handle)); + Ok(value) + } + match heap_free_impl(alloc_handle) { + Ok(value) => { + *value_ptr = value; + CUresult::CUDA_SUCCESS + } + Err(err) => err, + } } unsafe extern "system" fn device_get_attribute_ex( diff --git a/zluda/src/impl/mod.rs b/zluda/src/impl/mod.rs index 34566af..42fbd53 100644 --- a/zluda/src/impl/mod.rs +++ b/zluda/src/impl/mod.rs @@ -9,7 +9,7 @@ use std::{ fs, mem::{self, ManuallyDrop, MaybeUninit}, ptr::{self, NonNull}, - sync::{atomic::AtomicI32, Once}, + sync::{atomic::AtomicI32, Mutex, Once}, }; use self::cache::KernelCache; @@ -78,11 +78,11 @@ static GLOBAL_STATE: Lazy<GlobalState> = Lazy::INIT; pub(crate) struct GlobalState { pub(crate) devices: Vec<device::Device>, - _dark_api_heap: *mut c_void, pub(crate) kernel_cache: Option<KernelCache>, pub(crate) comgr: Comgr, pub(crate) comgr_version: String, pub(crate) zero_buffers: bool, + pub(crate) dark_api_alloc: Mutex<*mut zluda_dark_api::HeapAllocRecord>, } assert_impl_one!(GlobalState: Sync); @@ -148,7 +148,7 @@ impl<T: ZludaObject> LiveCheck<T> { outer_ptr as *mut Self } - pub unsafe fn as_ref_unchecked(&self) -> & T { + pub unsafe fn as_ref_unchecked(&self) -> &T { &self.data } @@ -462,7 +462,7 @@ pub(crate) fn init(flags: u32) -> Result<(), CUresult> { GLOBAL_STATE.init(|| GlobalState { devices, kernel_cache, - _dark_api_heap: global_heap, + dark_api_alloc: Mutex::new(ptr::null_mut()), comgr, comgr_version, zero_buffers, diff --git a/zluda/tests/dark_api.rs b/zluda/tests/dark_api.rs index 3e5b3e0..192167a 100644 --- a/zluda/tests/dark_api.rs +++ b/zluda/tests/dark_api.rs @@ -33,14 +33,14 @@ unsafe fn heap_alloc_chain<T: CudaDriverFns>(cuda: T) { assert_eq!((&*record1).destructor, None);
assert_eq!((&*record1).value, 1);
assert_eq!((&*record1).prev_alloc, record2);
- assert_eq!((&*record1).next_alloc, ptr::null());
+ assert_eq!((&*record1).next_alloc, ptr::null_mut());
assert_eq!((&*record2).destructor, None);
assert_eq!((&*record2).value, 2);
assert_eq!((&*record2).prev_alloc, record3);
assert_eq!((&*record2).next_alloc, record1);
assert_eq!((&*record3).destructor, None);
assert_eq!((&*record3).value, 3);
- assert_eq!((&*record3).prev_alloc, ptr::null());
+ assert_eq!((&*record3).prev_alloc, ptr::null_mut());
assert_eq!((&*record3).next_alloc, record2);
}
@@ -82,10 +82,10 @@ unsafe fn heap_free<T: CudaDriverFns>(cuda: T) { );
assert_eq!((&*record1).value, 11);
assert_eq!((&*record1).prev_alloc, record3);
- assert_eq!((&*record1).next_alloc, ptr::null());
+ assert_eq!((&*record1).next_alloc, ptr::null_mut());
assert_eq!((&*record3).destructor, None);
assert_eq!((&*record3).value, 13);
- assert_eq!((&*record3).prev_alloc, ptr::null());
+ assert_eq!((&*record3).prev_alloc, ptr::null_mut());
assert_eq!((&*record3).next_alloc, record1);
}
diff --git a/zluda_dark_api/src/lib.rs b/zluda_dark_api/src/lib.rs index 31e0fea..0642f25 100644 --- a/zluda_dark_api/src/lib.rs +++ b/zluda_dark_api/src/lib.rs @@ -285,6 +285,7 @@ dark_api_table!( [0x19, 0x5B, 0xCB, 0xF4, 0xD6, 0x7D, 0x02, 0x4A, 0xAC, 0xC5, 0x1D, 0x29, 0xCE, 0xA6, 0x31, 0xAE] => HEAP_ACCESS [3] { 0 => SIZE_OF, + #[dump] 1 => heap_alloc( alloc_ptr: *mut *mut HeapAllocRecord, // destructor is called only on CUDA exit, on Windows @@ -292,6 +293,7 @@ dark_api_table!( destructor: Option<unsafe extern "system" fn(u32, usize)>, value: usize ) -> CUresult, + #[dump] 2 => heap_free(halloc: *mut HeapAllocRecord, value: *mut usize) -> CUresult }, // This fn table is used by OptiX @@ -493,8 +495,8 @@ pub struct HeapAllocRecord { pub value: usize, // The two fields below are mainatined by the driver, // they form a a doubly-linked list - pub prev_alloc: *const HeapAllocRecord, - pub next_alloc: *const HeapAllocRecord, + pub prev_alloc: *mut HeapAllocRecord, + pub next_alloc: *mut HeapAllocRecord, } #[derive(Clone, Copy)] diff --git a/zluda_dump/src/dark_api.rs b/zluda_dump/src/dark_api.rs index a619bca..7c9d0b8 100644 --- a/zluda_dump/src/dark_api.rs +++ b/zluda_dump/src/dark_api.rs @@ -912,6 +912,25 @@ impl CudaDarkApiDump for CudaDarkApiDumpFns { fn_logger.result = Some(original_result);
original_result
}
+
+ unsafe fn heap_alloc_impl(
+ guid: &[u8; 16],
+ idx: usize,
+ alloc_ptr: *mut *mut zluda_dark_api::HeapAllocRecord,
+ destructor: Option<unsafe extern "system" fn(u32, usize)>,
+ value: usize,
+ ) -> CUresult {
+ todo!()
+ }
+
+ unsafe fn heap_free_impl(
+ guid: &[u8; 16],
+ idx: usize,
+ halloc: *mut zluda_dark_api::HeapAllocRecord,
+ value: *mut usize,
+ ) -> CUresult {
+ todo!()
+ }
}
unsafe fn deref_not_null<T>(ptr: *mut *mut T) -> *mut T {
|