diff options
author | Andrzej Janik <[email protected]> | 2024-04-03 16:48:07 +0200 |
---|---|---|
committer | Andrzej Janik <[email protected]> | 2024-04-03 16:48:07 +0200 |
commit | 865cab71d4b6b116addf05bb22ccf476eb7ae06a (patch) | |
tree | 0fecb9bd0a830c595774ba27f254087496af4934 | |
parent | e1d3ed3246fb192328d9341aacefc88e59785318 (diff) | |
parent | 468cffb0231f7ca5bb3252f35db8a3d4e24df6ab (diff) | |
download | ZLUDA-865cab71d4b6b116addf05bb22ccf476eb7ae06a.tar.gz ZLUDA-865cab71d4b6b116addf05bb22ccf476eb7ae06a.zip |
Merge remote-tracking branch 'public/malloc_private' into compat
-rw-r--r-- | zluda/src/impl/dark_api.rs | 77 | ||||
-rw-r--r-- | zluda/src/impl/mod.rs | 6 | ||||
-rw-r--r-- | zluda/tests/dark_api.rs | 92 | ||||
-rw-r--r-- | zluda_dark_api/src/lib.rs | 22 | ||||
-rw-r--r-- | zluda_dump/src/dark_api.rs | 19 |
5 files changed, 197 insertions, 19 deletions
diff --git a/zluda/src/impl/dark_api.rs b/zluda/src/impl/dark_api.rs index c3b596c..8fe4cf3 100644 --- a/zluda/src/impl/dark_api.rs +++ b/zluda/src/impl/dark_api.rs @@ -1,8 +1,8 @@ -use super::module; use super::{ context::{self, LocalStorageValue}, device, FromCuda, IntoCuda, LiveCheck, }; +use super::{module, GLOBAL_STATE}; use crate::r#impl::{dark_api, stream}; use cuda_types::*; use hip_common::zluda_ext::CudaResult; @@ -188,18 +188,79 @@ impl CudaDarkApi for CudaDarkApiZluda { } unsafe extern "system" fn heap_alloc( - _halloc_ptr: *mut *mut zluda_dark_api::HeapAllocRecord, - _param1: usize, - _param2: usize, + alloc_ptr: *mut *mut zluda_dark_api::HeapAllocRecord, + destructor: Option<unsafe extern "system" fn(u32, usize)>, + value: usize, ) -> CUresult { - super::unimplemented() + unsafe fn heap_alloc_impl( + destructor: Option<unsafe extern "system" fn(u32, usize)>, + value: usize, + ) -> Result<*mut zluda_dark_api::HeapAllocRecord, CUresult> { + let state = GLOBAL_STATE.get()?; + let entry = Box::into_raw(Box::new(zluda_dark_api::HeapAllocRecord { + destructor, + value, + prev_alloc: ptr::null_mut(), + next_alloc: ptr::null_mut(), + })); + { + let mut dark_api_heap = state + .dark_api_alloc + .lock() + .map_err(|_| CUresult::CUDA_ERROR_UNKNOWN)?; + let prev_entry = mem::replace(&mut *dark_api_heap, entry); + if prev_entry != ptr::null_mut() { + (&mut *prev_entry).prev_alloc = entry; + (&mut *entry).next_alloc = prev_entry; + } + } + Ok(entry) + } + match heap_alloc_impl(destructor, value) { + Ok(result) => { + *alloc_ptr = result; + CUresult::CUDA_SUCCESS + } + Err(err) => err, + } } unsafe extern "system" fn heap_free( - _halloc: *mut zluda_dark_api::HeapAllocRecord, - _param2: *mut usize, + alloc_handle: *mut zluda_dark_api::HeapAllocRecord, + value_ptr: *mut usize, ) -> CUresult { - super::unimplemented() + unsafe fn heap_free_impl( + alloc_handle: *mut zluda_dark_api::HeapAllocRecord, + ) -> Result<usize, CUresult> { + let state = GLOBAL_STATE.get()?; + { + let mut dark_api_heap = state + .dark_api_alloc + .lock() + .map_err(|_| CUresult::CUDA_ERROR_UNKNOWN)?; + if alloc_handle == *dark_api_heap { + *dark_api_heap = (&*alloc_handle).next_alloc; + } + if (&*alloc_handle).next_alloc != ptr::null_mut() { + (&mut *(&mut *alloc_handle).next_alloc).prev_alloc = + (&*alloc_handle).prev_alloc; + } + if (&*alloc_handle).prev_alloc != ptr::null_mut() { + (&mut *(&mut *alloc_handle).prev_alloc).next_alloc = + (&*alloc_handle).next_alloc; + } + } + let value = (&*alloc_handle).value; + drop(Box::from_raw(alloc_handle)); + Ok(value) + } + match heap_free_impl(alloc_handle) { + Ok(value) => { + *value_ptr = value; + CUresult::CUDA_SUCCESS + } + Err(err) => err, + } } unsafe extern "system" fn device_get_attribute_ex( diff --git a/zluda/src/impl/mod.rs b/zluda/src/impl/mod.rs index 1bce5e1..f19ef0e 100644 --- a/zluda/src/impl/mod.rs +++ b/zluda/src/impl/mod.rs @@ -9,7 +9,7 @@ use std::{ fs, mem::{self, ManuallyDrop, MaybeUninit}, ptr::{self, NonNull}, - sync::{atomic::AtomicI32, Once}, + sync::{atomic::AtomicI32, Mutex, Once}, }; use self::cache::KernelCache; @@ -79,11 +79,11 @@ static GLOBAL_STATE: Lazy<GlobalState> = Lazy::INIT; pub(crate) struct GlobalState { pub(crate) devices: Vec<device::Device>, - _dark_api_heap: *mut c_void, pub(crate) kernel_cache: Option<KernelCache>, pub(crate) comgr: Comgr, pub(crate) comgr_version: String, pub(crate) zero_buffers: bool, + pub(crate) dark_api_alloc: Mutex<*mut zluda_dark_api::HeapAllocRecord>, } assert_impl_one!(GlobalState: Sync); @@ -463,7 +463,7 @@ pub(crate) fn init(flags: u32) -> Result<(), CUresult> { GLOBAL_STATE.init(|| GlobalState { devices, kernel_cache, - _dark_api_heap: global_heap, + dark_api_alloc: Mutex::new(ptr::null_mut()), comgr, comgr_version, zero_buffers, diff --git a/zluda/tests/dark_api.rs b/zluda/tests/dark_api.rs new file mode 100644 index 0000000..192167a --- /dev/null +++ b/zluda/tests/dark_api.rs @@ -0,0 +1,92 @@ +use crate::common::CudaDriverFns;
+use cuda_types::*;
+use std::{mem, ptr};
+mod common;
+
+cuda_driver_test!(heap_alloc_chain);
+
+unsafe fn heap_alloc_chain<T: CudaDriverFns>(cuda: T) {
+ assert_eq!(cuda.cuInit(0), CUresult::CUDA_SUCCESS);
+ let mut export_table = mem::zeroed();
+ let guid = zluda_dark_api::HeapAccess::GUID;
+ assert_eq!(
+ cuda.cuGetExportTable(&mut export_table, &guid),
+ CUresult::CUDA_SUCCESS
+ );
+ assert_eq!(3 * mem::size_of::<usize>(), *export_table.cast::<usize>());
+ let heap_access = zluda_dark_api::HeapAccess::new(export_table);
+ let mut record1 = ptr::null_mut();
+ assert_eq!(
+ CUresult::CUDA_SUCCESS,
+ heap_access.heap_alloc(&mut record1, None, 1)
+ );
+ let mut record2 = ptr::null_mut();
+ assert_eq!(
+ CUresult::CUDA_SUCCESS,
+ heap_access.heap_alloc(&mut record2, None, 2)
+ );
+ let mut record3 = ptr::null_mut();
+ assert_eq!(
+ CUresult::CUDA_SUCCESS,
+ heap_access.heap_alloc(&mut record3, None, 3)
+ );
+ assert_eq!((&*record1).destructor, None);
+ assert_eq!((&*record1).value, 1);
+ assert_eq!((&*record1).prev_alloc, record2);
+ assert_eq!((&*record1).next_alloc, ptr::null_mut());
+ assert_eq!((&*record2).destructor, None);
+ assert_eq!((&*record2).value, 2);
+ assert_eq!((&*record2).prev_alloc, record3);
+ assert_eq!((&*record2).next_alloc, record1);
+ assert_eq!((&*record3).destructor, None);
+ assert_eq!((&*record3).value, 3);
+ assert_eq!((&*record3).prev_alloc, ptr::null_mut());
+ assert_eq!((&*record3).next_alloc, record2);
+}
+
+cuda_driver_test!(heap_free);
+
+unsafe fn heap_free<T: CudaDriverFns>(cuda: T) {
+ assert_eq!(cuda.cuInit(0), CUresult::CUDA_SUCCESS);
+ let mut export_table = mem::zeroed();
+ let guid = zluda_dark_api::HeapAccess::GUID;
+ assert_eq!(
+ cuda.cuGetExportTable(&mut export_table, &guid),
+ CUresult::CUDA_SUCCESS
+ );
+ let heap_access = zluda_dark_api::HeapAccess::new(export_table);
+ let mut record1 = ptr::null_mut();
+ assert_eq!(
+ CUresult::CUDA_SUCCESS,
+ heap_access.heap_alloc(&mut record1, Some(shutdown), 11)
+ );
+ let mut record2 = ptr::null_mut();
+ assert_eq!(
+ CUresult::CUDA_SUCCESS,
+ heap_access.heap_alloc(&mut record2, Some(shutdown), 12)
+ );
+ let mut record3 = ptr::null_mut();
+ assert_eq!(
+ CUresult::CUDA_SUCCESS,
+ heap_access.heap_alloc(&mut record3, None, 13)
+ );
+ let mut value = 0usize;
+ assert_eq!(
+ CUresult::CUDA_SUCCESS,
+ heap_access.heap_free(record2, &mut value),
+ );
+ assert_eq!(value, 12);
+ assert_eq!(
+ mem::transmute::<_, usize>((&*record1).destructor),
+ shutdown as usize
+ );
+ assert_eq!((&*record1).value, 11);
+ assert_eq!((&*record1).prev_alloc, record3);
+ assert_eq!((&*record1).next_alloc, ptr::null_mut());
+ assert_eq!((&*record3).destructor, None);
+ assert_eq!((&*record3).value, 13);
+ assert_eq!((&*record3).prev_alloc, ptr::null_mut());
+ assert_eq!((&*record3).next_alloc, record1);
+}
+
+unsafe extern "system" fn shutdown(_unknown: u32, _value: usize) {}
diff --git a/zluda_dark_api/src/lib.rs b/zluda_dark_api/src/lib.rs index 15c6091..3d0766e 100644 --- a/zluda_dark_api/src/lib.rs +++ b/zluda_dark_api/src/lib.rs @@ -285,12 +285,16 @@ dark_api_table!( [0x19, 0x5B, 0xCB, 0xF4, 0xD6, 0x7D, 0x02, 0x4A, 0xAC, 0xC5, 0x1D, 0x29, 0xCE, 0xA6, 0x31, 0xAE] => HEAP_ACCESS [3] { 0 => SIZE_OF, + #[dump] 1 => heap_alloc( - halloc_ptr: *mut *mut HeapAllocRecord, - param1: usize, - param2: usize + alloc_ptr: *mut *mut HeapAllocRecord, + // destructor is called only on CUDA exit, on Windows + // that is DLL unload from DllMain + destructor: Option<unsafe extern "system" fn(u32, usize)>, + value: usize ) -> CUresult, - 2 => heap_free(halloc: *mut HeapAllocRecord, param2: *mut usize) -> CUresult + #[dump] + 2 => heap_free(halloc: *mut HeapAllocRecord, value: *mut usize) -> CUresult }, // This fn table is used by OptiX [0xB1u8, 0x05, 0x41, 0xE1, 0xF7, 0xC7, 0xC7, 0x4A, 0x9F, 0x64, 0xF2, 0x23, 0xBE, 0x99, 0xF1, 0xE2] @@ -487,10 +491,12 @@ pub enum ContextStateManager {} #[repr(C)] pub struct HeapAllocRecord { - param1: usize, - param2: usize, - _unknown: usize, - global_heap: *mut c_void, + pub destructor: Option<unsafe extern "system" fn(u32, usize)>, + pub value: usize, + // The two fields below are mainatined by the driver, + // they form a a doubly-linked list + pub prev_alloc: *mut HeapAllocRecord, + pub next_alloc: *mut HeapAllocRecord, } #[derive(Clone, Copy)] diff --git a/zluda_dump/src/dark_api.rs b/zluda_dump/src/dark_api.rs index 72e4198..1bdceb9 100644 --- a/zluda_dump/src/dark_api.rs +++ b/zluda_dump/src/dark_api.rs @@ -941,6 +941,25 @@ impl CudaDarkApiDump for CudaDarkApiDumpFns { fn_logger.result = Some(original_result);
original_result
}
+
+ unsafe fn heap_alloc_impl(
+ guid: &[u8; 16],
+ idx: usize,
+ alloc_ptr: *mut *mut zluda_dark_api::HeapAllocRecord,
+ destructor: Option<unsafe extern "system" fn(u32, usize)>,
+ value: usize,
+ ) -> CUresult {
+ todo!()
+ }
+
+ unsafe fn heap_free_impl(
+ guid: &[u8; 16],
+ idx: usize,
+ halloc: *mut zluda_dark_api::HeapAllocRecord,
+ value: *mut usize,
+ ) -> CUresult {
+ todo!()
+ }
}
unsafe fn deref_not_null<T>(ptr: *mut *mut T) -> *mut T {
|