aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorAndrzej Janik <[email protected]>2024-04-03 16:48:07 +0200
committerAndrzej Janik <[email protected]>2024-04-03 16:48:07 +0200
commit865cab71d4b6b116addf05bb22ccf476eb7ae06a (patch)
tree0fecb9bd0a830c595774ba27f254087496af4934
parente1d3ed3246fb192328d9341aacefc88e59785318 (diff)
parent468cffb0231f7ca5bb3252f35db8a3d4e24df6ab (diff)
downloadZLUDA-865cab71d4b6b116addf05bb22ccf476eb7ae06a.tar.gz
ZLUDA-865cab71d4b6b116addf05bb22ccf476eb7ae06a.zip
Merge remote-tracking branch 'public/malloc_private' into compat
-rw-r--r--zluda/src/impl/dark_api.rs77
-rw-r--r--zluda/src/impl/mod.rs6
-rw-r--r--zluda/tests/dark_api.rs92
-rw-r--r--zluda_dark_api/src/lib.rs22
-rw-r--r--zluda_dump/src/dark_api.rs19
5 files changed, 197 insertions, 19 deletions
diff --git a/zluda/src/impl/dark_api.rs b/zluda/src/impl/dark_api.rs
index c3b596c..8fe4cf3 100644
--- a/zluda/src/impl/dark_api.rs
+++ b/zluda/src/impl/dark_api.rs
@@ -1,8 +1,8 @@
-use super::module;
use super::{
context::{self, LocalStorageValue},
device, FromCuda, IntoCuda, LiveCheck,
};
+use super::{module, GLOBAL_STATE};
use crate::r#impl::{dark_api, stream};
use cuda_types::*;
use hip_common::zluda_ext::CudaResult;
@@ -188,18 +188,79 @@ impl CudaDarkApi for CudaDarkApiZluda {
}
unsafe extern "system" fn heap_alloc(
- _halloc_ptr: *mut *mut zluda_dark_api::HeapAllocRecord,
- _param1: usize,
- _param2: usize,
+ alloc_ptr: *mut *mut zluda_dark_api::HeapAllocRecord,
+ destructor: Option<unsafe extern "system" fn(u32, usize)>,
+ value: usize,
) -> CUresult {
- super::unimplemented()
+ unsafe fn heap_alloc_impl(
+ destructor: Option<unsafe extern "system" fn(u32, usize)>,
+ value: usize,
+ ) -> Result<*mut zluda_dark_api::HeapAllocRecord, CUresult> {
+ let state = GLOBAL_STATE.get()?;
+ let entry = Box::into_raw(Box::new(zluda_dark_api::HeapAllocRecord {
+ destructor,
+ value,
+ prev_alloc: ptr::null_mut(),
+ next_alloc: ptr::null_mut(),
+ }));
+ {
+ let mut dark_api_heap = state
+ .dark_api_alloc
+ .lock()
+ .map_err(|_| CUresult::CUDA_ERROR_UNKNOWN)?;
+ let prev_entry = mem::replace(&mut *dark_api_heap, entry);
+ if prev_entry != ptr::null_mut() {
+ (&mut *prev_entry).prev_alloc = entry;
+ (&mut *entry).next_alloc = prev_entry;
+ }
+ }
+ Ok(entry)
+ }
+ match heap_alloc_impl(destructor, value) {
+ Ok(result) => {
+ *alloc_ptr = result;
+ CUresult::CUDA_SUCCESS
+ }
+ Err(err) => err,
+ }
}
unsafe extern "system" fn heap_free(
- _halloc: *mut zluda_dark_api::HeapAllocRecord,
- _param2: *mut usize,
+ alloc_handle: *mut zluda_dark_api::HeapAllocRecord,
+ value_ptr: *mut usize,
) -> CUresult {
- super::unimplemented()
+ unsafe fn heap_free_impl(
+ alloc_handle: *mut zluda_dark_api::HeapAllocRecord,
+ ) -> Result<usize, CUresult> {
+ let state = GLOBAL_STATE.get()?;
+ {
+ let mut dark_api_heap = state
+ .dark_api_alloc
+ .lock()
+ .map_err(|_| CUresult::CUDA_ERROR_UNKNOWN)?;
+ if alloc_handle == *dark_api_heap {
+ *dark_api_heap = (&*alloc_handle).next_alloc;
+ }
+ if (&*alloc_handle).next_alloc != ptr::null_mut() {
+ (&mut *(&mut *alloc_handle).next_alloc).prev_alloc =
+ (&*alloc_handle).prev_alloc;
+ }
+ if (&*alloc_handle).prev_alloc != ptr::null_mut() {
+ (&mut *(&mut *alloc_handle).prev_alloc).next_alloc =
+ (&*alloc_handle).next_alloc;
+ }
+ }
+ let value = (&*alloc_handle).value;
+ drop(Box::from_raw(alloc_handle));
+ Ok(value)
+ }
+ match heap_free_impl(alloc_handle) {
+ Ok(value) => {
+ *value_ptr = value;
+ CUresult::CUDA_SUCCESS
+ }
+ Err(err) => err,
+ }
}
unsafe extern "system" fn device_get_attribute_ex(
diff --git a/zluda/src/impl/mod.rs b/zluda/src/impl/mod.rs
index 1bce5e1..f19ef0e 100644
--- a/zluda/src/impl/mod.rs
+++ b/zluda/src/impl/mod.rs
@@ -9,7 +9,7 @@ use std::{
fs,
mem::{self, ManuallyDrop, MaybeUninit},
ptr::{self, NonNull},
- sync::{atomic::AtomicI32, Once},
+ sync::{atomic::AtomicI32, Mutex, Once},
};
use self::cache::KernelCache;
@@ -79,11 +79,11 @@ static GLOBAL_STATE: Lazy<GlobalState> = Lazy::INIT;
pub(crate) struct GlobalState {
pub(crate) devices: Vec<device::Device>,
- _dark_api_heap: *mut c_void,
pub(crate) kernel_cache: Option<KernelCache>,
pub(crate) comgr: Comgr,
pub(crate) comgr_version: String,
pub(crate) zero_buffers: bool,
+ pub(crate) dark_api_alloc: Mutex<*mut zluda_dark_api::HeapAllocRecord>,
}
assert_impl_one!(GlobalState: Sync);
@@ -463,7 +463,7 @@ pub(crate) fn init(flags: u32) -> Result<(), CUresult> {
GLOBAL_STATE.init(|| GlobalState {
devices,
kernel_cache,
- _dark_api_heap: global_heap,
+ dark_api_alloc: Mutex::new(ptr::null_mut()),
comgr,
comgr_version,
zero_buffers,
diff --git a/zluda/tests/dark_api.rs b/zluda/tests/dark_api.rs
new file mode 100644
index 0000000..192167a
--- /dev/null
+++ b/zluda/tests/dark_api.rs
@@ -0,0 +1,92 @@
+use crate::common::CudaDriverFns;
+use cuda_types::*;
+use std::{mem, ptr};
+mod common;
+
+cuda_driver_test!(heap_alloc_chain);
+
+unsafe fn heap_alloc_chain<T: CudaDriverFns>(cuda: T) {
+ assert_eq!(cuda.cuInit(0), CUresult::CUDA_SUCCESS);
+ let mut export_table = mem::zeroed();
+ let guid = zluda_dark_api::HeapAccess::GUID;
+ assert_eq!(
+ cuda.cuGetExportTable(&mut export_table, &guid),
+ CUresult::CUDA_SUCCESS
+ );
+ assert_eq!(3 * mem::size_of::<usize>(), *export_table.cast::<usize>());
+ let heap_access = zluda_dark_api::HeapAccess::new(export_table);
+ let mut record1 = ptr::null_mut();
+ assert_eq!(
+ CUresult::CUDA_SUCCESS,
+ heap_access.heap_alloc(&mut record1, None, 1)
+ );
+ let mut record2 = ptr::null_mut();
+ assert_eq!(
+ CUresult::CUDA_SUCCESS,
+ heap_access.heap_alloc(&mut record2, None, 2)
+ );
+ let mut record3 = ptr::null_mut();
+ assert_eq!(
+ CUresult::CUDA_SUCCESS,
+ heap_access.heap_alloc(&mut record3, None, 3)
+ );
+ assert_eq!((&*record1).destructor, None);
+ assert_eq!((&*record1).value, 1);
+ assert_eq!((&*record1).prev_alloc, record2);
+ assert_eq!((&*record1).next_alloc, ptr::null_mut());
+ assert_eq!((&*record2).destructor, None);
+ assert_eq!((&*record2).value, 2);
+ assert_eq!((&*record2).prev_alloc, record3);
+ assert_eq!((&*record2).next_alloc, record1);
+ assert_eq!((&*record3).destructor, None);
+ assert_eq!((&*record3).value, 3);
+ assert_eq!((&*record3).prev_alloc, ptr::null_mut());
+ assert_eq!((&*record3).next_alloc, record2);
+}
+
+cuda_driver_test!(heap_free);
+
+unsafe fn heap_free<T: CudaDriverFns>(cuda: T) {
+ assert_eq!(cuda.cuInit(0), CUresult::CUDA_SUCCESS);
+ let mut export_table = mem::zeroed();
+ let guid = zluda_dark_api::HeapAccess::GUID;
+ assert_eq!(
+ cuda.cuGetExportTable(&mut export_table, &guid),
+ CUresult::CUDA_SUCCESS
+ );
+ let heap_access = zluda_dark_api::HeapAccess::new(export_table);
+ let mut record1 = ptr::null_mut();
+ assert_eq!(
+ CUresult::CUDA_SUCCESS,
+ heap_access.heap_alloc(&mut record1, Some(shutdown), 11)
+ );
+ let mut record2 = ptr::null_mut();
+ assert_eq!(
+ CUresult::CUDA_SUCCESS,
+ heap_access.heap_alloc(&mut record2, Some(shutdown), 12)
+ );
+ let mut record3 = ptr::null_mut();
+ assert_eq!(
+ CUresult::CUDA_SUCCESS,
+ heap_access.heap_alloc(&mut record3, None, 13)
+ );
+ let mut value = 0usize;
+ assert_eq!(
+ CUresult::CUDA_SUCCESS,
+ heap_access.heap_free(record2, &mut value),
+ );
+ assert_eq!(value, 12);
+ assert_eq!(
+ mem::transmute::<_, usize>((&*record1).destructor),
+ shutdown as usize
+ );
+ assert_eq!((&*record1).value, 11);
+ assert_eq!((&*record1).prev_alloc, record3);
+ assert_eq!((&*record1).next_alloc, ptr::null_mut());
+ assert_eq!((&*record3).destructor, None);
+ assert_eq!((&*record3).value, 13);
+ assert_eq!((&*record3).prev_alloc, ptr::null_mut());
+ assert_eq!((&*record3).next_alloc, record1);
+}
+
+unsafe extern "system" fn shutdown(_unknown: u32, _value: usize) {}
diff --git a/zluda_dark_api/src/lib.rs b/zluda_dark_api/src/lib.rs
index 15c6091..3d0766e 100644
--- a/zluda_dark_api/src/lib.rs
+++ b/zluda_dark_api/src/lib.rs
@@ -285,12 +285,16 @@ dark_api_table!(
[0x19, 0x5B, 0xCB, 0xF4, 0xD6, 0x7D, 0x02, 0x4A, 0xAC, 0xC5, 0x1D, 0x29, 0xCE, 0xA6, 0x31, 0xAE]
=> HEAP_ACCESS [3] {
0 => SIZE_OF,
+ #[dump]
1 => heap_alloc(
- halloc_ptr: *mut *mut HeapAllocRecord,
- param1: usize,
- param2: usize
+ alloc_ptr: *mut *mut HeapAllocRecord,
+ // destructor is called only on CUDA exit, on Windows
+ // that is DLL unload from DllMain
+ destructor: Option<unsafe extern "system" fn(u32, usize)>,
+ value: usize
) -> CUresult,
- 2 => heap_free(halloc: *mut HeapAllocRecord, param2: *mut usize) -> CUresult
+ #[dump]
+ 2 => heap_free(halloc: *mut HeapAllocRecord, value: *mut usize) -> CUresult
},
// This fn table is used by OptiX
[0xB1u8, 0x05, 0x41, 0xE1, 0xF7, 0xC7, 0xC7, 0x4A, 0x9F, 0x64, 0xF2, 0x23, 0xBE, 0x99, 0xF1, 0xE2]
@@ -487,10 +491,12 @@ pub enum ContextStateManager {}
#[repr(C)]
pub struct HeapAllocRecord {
- param1: usize,
- param2: usize,
- _unknown: usize,
- global_heap: *mut c_void,
+ pub destructor: Option<unsafe extern "system" fn(u32, usize)>,
+ pub value: usize,
+ // The two fields below are mainatined by the driver,
+ // they form a a doubly-linked list
+ pub prev_alloc: *mut HeapAllocRecord,
+ pub next_alloc: *mut HeapAllocRecord,
}
#[derive(Clone, Copy)]
diff --git a/zluda_dump/src/dark_api.rs b/zluda_dump/src/dark_api.rs
index 72e4198..1bdceb9 100644
--- a/zluda_dump/src/dark_api.rs
+++ b/zluda_dump/src/dark_api.rs
@@ -941,6 +941,25 @@ impl CudaDarkApiDump for CudaDarkApiDumpFns {
fn_logger.result = Some(original_result);
original_result
}
+
+ unsafe fn heap_alloc_impl(
+ guid: &[u8; 16],
+ idx: usize,
+ alloc_ptr: *mut *mut zluda_dark_api::HeapAllocRecord,
+ destructor: Option<unsafe extern "system" fn(u32, usize)>,
+ value: usize,
+ ) -> CUresult {
+ todo!()
+ }
+
+ unsafe fn heap_free_impl(
+ guid: &[u8; 16],
+ idx: usize,
+ halloc: *mut zluda_dark_api::HeapAllocRecord,
+ value: *mut usize,
+ ) -> CUresult {
+ todo!()
+ }
}
unsafe fn deref_not_null<T>(ptr: *mut *mut T) -> *mut T {