aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorAndrzej Janik <[email protected]>2024-04-03 01:46:10 +0200
committerAndrzej Janik <[email protected]>2024-04-03 01:46:10 +0200
commit468cffb0231f7ca5bb3252f35db8a3d4e24df6ab (patch)
treedb533157d2f35ed3b21bf7e00d4655dab43a382f
parent01cc959b453dec2b1d7a9e5ecad465669137989c (diff)
downloadZLUDA-468cffb0231f7ca5bb3252f35db8a3d4e24df6ab.tar.gz
ZLUDA-468cffb0231f7ca5bb3252f35db8a3d4e24df6ab.zip
Implement dark api alloc chainmalloc_private
-rw-r--r--zluda/src/impl/dark_api.rs71
-rw-r--r--zluda/src/impl/mod.rs8
-rw-r--r--zluda/tests/dark_api.rs8
-rw-r--r--zluda_dark_api/src/lib.rs6
-rw-r--r--zluda_dump/src/dark_api.rs19
5 files changed, 97 insertions, 15 deletions
diff --git a/zluda/src/impl/dark_api.rs b/zluda/src/impl/dark_api.rs
index e7eb4b1..8fe4cf3 100644
--- a/zluda/src/impl/dark_api.rs
+++ b/zluda/src/impl/dark_api.rs
@@ -1,8 +1,8 @@
-use super::module;
use super::{
context::{self, LocalStorageValue},
device, FromCuda, IntoCuda, LiveCheck,
};
+use super::{module, GLOBAL_STATE};
use crate::r#impl::{dark_api, stream};
use cuda_types::*;
use hip_common::zluda_ext::CudaResult;
@@ -192,14 +192,75 @@ impl CudaDarkApi for CudaDarkApiZluda {
destructor: Option<unsafe extern "system" fn(u32, usize)>,
value: usize,
) -> CUresult {
- super::unimplemented()
+ unsafe fn heap_alloc_impl(
+ destructor: Option<unsafe extern "system" fn(u32, usize)>,
+ value: usize,
+ ) -> Result<*mut zluda_dark_api::HeapAllocRecord, CUresult> {
+ let state = GLOBAL_STATE.get()?;
+ let entry = Box::into_raw(Box::new(zluda_dark_api::HeapAllocRecord {
+ destructor,
+ value,
+ prev_alloc: ptr::null_mut(),
+ next_alloc: ptr::null_mut(),
+ }));
+ {
+ let mut dark_api_heap = state
+ .dark_api_alloc
+ .lock()
+ .map_err(|_| CUresult::CUDA_ERROR_UNKNOWN)?;
+ let prev_entry = mem::replace(&mut *dark_api_heap, entry);
+ if prev_entry != ptr::null_mut() {
+ (&mut *prev_entry).prev_alloc = entry;
+ (&mut *entry).next_alloc = prev_entry;
+ }
+ }
+ Ok(entry)
+ }
+ match heap_alloc_impl(destructor, value) {
+ Ok(result) => {
+ *alloc_ptr = result;
+ CUresult::CUDA_SUCCESS
+ }
+ Err(err) => err,
+ }
}
unsafe extern "system" fn heap_free(
- alloc_ptr: *mut zluda_dark_api::HeapAllocRecord,
- value: *mut usize,
+ alloc_handle: *mut zluda_dark_api::HeapAllocRecord,
+ value_ptr: *mut usize,
) -> CUresult {
- super::unimplemented()
+ unsafe fn heap_free_impl(
+ alloc_handle: *mut zluda_dark_api::HeapAllocRecord,
+ ) -> Result<usize, CUresult> {
+ let state = GLOBAL_STATE.get()?;
+ {
+ let mut dark_api_heap = state
+ .dark_api_alloc
+ .lock()
+ .map_err(|_| CUresult::CUDA_ERROR_UNKNOWN)?;
+ if alloc_handle == *dark_api_heap {
+ *dark_api_heap = (&*alloc_handle).next_alloc;
+ }
+ if (&*alloc_handle).next_alloc != ptr::null_mut() {
+ (&mut *(&mut *alloc_handle).next_alloc).prev_alloc =
+ (&*alloc_handle).prev_alloc;
+ }
+ if (&*alloc_handle).prev_alloc != ptr::null_mut() {
+ (&mut *(&mut *alloc_handle).prev_alloc).next_alloc =
+ (&*alloc_handle).next_alloc;
+ }
+ }
+ let value = (&*alloc_handle).value;
+ drop(Box::from_raw(alloc_handle));
+ Ok(value)
+ }
+ match heap_free_impl(alloc_handle) {
+ Ok(value) => {
+ *value_ptr = value;
+ CUresult::CUDA_SUCCESS
+ }
+ Err(err) => err,
+ }
}
unsafe extern "system" fn device_get_attribute_ex(
diff --git a/zluda/src/impl/mod.rs b/zluda/src/impl/mod.rs
index 34566af..42fbd53 100644
--- a/zluda/src/impl/mod.rs
+++ b/zluda/src/impl/mod.rs
@@ -9,7 +9,7 @@ use std::{
fs,
mem::{self, ManuallyDrop, MaybeUninit},
ptr::{self, NonNull},
- sync::{atomic::AtomicI32, Once},
+ sync::{atomic::AtomicI32, Mutex, Once},
};
use self::cache::KernelCache;
@@ -78,11 +78,11 @@ static GLOBAL_STATE: Lazy<GlobalState> = Lazy::INIT;
pub(crate) struct GlobalState {
pub(crate) devices: Vec<device::Device>,
- _dark_api_heap: *mut c_void,
pub(crate) kernel_cache: Option<KernelCache>,
pub(crate) comgr: Comgr,
pub(crate) comgr_version: String,
pub(crate) zero_buffers: bool,
+ pub(crate) dark_api_alloc: Mutex<*mut zluda_dark_api::HeapAllocRecord>,
}
assert_impl_one!(GlobalState: Sync);
@@ -148,7 +148,7 @@ impl<T: ZludaObject> LiveCheck<T> {
outer_ptr as *mut Self
}
- pub unsafe fn as_ref_unchecked(&self) -> & T {
+ pub unsafe fn as_ref_unchecked(&self) -> &T {
&self.data
}
@@ -462,7 +462,7 @@ pub(crate) fn init(flags: u32) -> Result<(), CUresult> {
GLOBAL_STATE.init(|| GlobalState {
devices,
kernel_cache,
- _dark_api_heap: global_heap,
+ dark_api_alloc: Mutex::new(ptr::null_mut()),
comgr,
comgr_version,
zero_buffers,
diff --git a/zluda/tests/dark_api.rs b/zluda/tests/dark_api.rs
index 3e5b3e0..192167a 100644
--- a/zluda/tests/dark_api.rs
+++ b/zluda/tests/dark_api.rs
@@ -33,14 +33,14 @@ unsafe fn heap_alloc_chain<T: CudaDriverFns>(cuda: T) {
assert_eq!((&*record1).destructor, None);
assert_eq!((&*record1).value, 1);
assert_eq!((&*record1).prev_alloc, record2);
- assert_eq!((&*record1).next_alloc, ptr::null());
+ assert_eq!((&*record1).next_alloc, ptr::null_mut());
assert_eq!((&*record2).destructor, None);
assert_eq!((&*record2).value, 2);
assert_eq!((&*record2).prev_alloc, record3);
assert_eq!((&*record2).next_alloc, record1);
assert_eq!((&*record3).destructor, None);
assert_eq!((&*record3).value, 3);
- assert_eq!((&*record3).prev_alloc, ptr::null());
+ assert_eq!((&*record3).prev_alloc, ptr::null_mut());
assert_eq!((&*record3).next_alloc, record2);
}
@@ -82,10 +82,10 @@ unsafe fn heap_free<T: CudaDriverFns>(cuda: T) {
);
assert_eq!((&*record1).value, 11);
assert_eq!((&*record1).prev_alloc, record3);
- assert_eq!((&*record1).next_alloc, ptr::null());
+ assert_eq!((&*record1).next_alloc, ptr::null_mut());
assert_eq!((&*record3).destructor, None);
assert_eq!((&*record3).value, 13);
- assert_eq!((&*record3).prev_alloc, ptr::null());
+ assert_eq!((&*record3).prev_alloc, ptr::null_mut());
assert_eq!((&*record3).next_alloc, record1);
}
diff --git a/zluda_dark_api/src/lib.rs b/zluda_dark_api/src/lib.rs
index 31e0fea..0642f25 100644
--- a/zluda_dark_api/src/lib.rs
+++ b/zluda_dark_api/src/lib.rs
@@ -285,6 +285,7 @@ dark_api_table!(
[0x19, 0x5B, 0xCB, 0xF4, 0xD6, 0x7D, 0x02, 0x4A, 0xAC, 0xC5, 0x1D, 0x29, 0xCE, 0xA6, 0x31, 0xAE]
=> HEAP_ACCESS [3] {
0 => SIZE_OF,
+ #[dump]
1 => heap_alloc(
alloc_ptr: *mut *mut HeapAllocRecord,
// destructor is called only on CUDA exit, on Windows
@@ -292,6 +293,7 @@ dark_api_table!(
destructor: Option<unsafe extern "system" fn(u32, usize)>,
value: usize
) -> CUresult,
+ #[dump]
2 => heap_free(halloc: *mut HeapAllocRecord, value: *mut usize) -> CUresult
},
// This fn table is used by OptiX
@@ -493,8 +495,8 @@ pub struct HeapAllocRecord {
pub value: usize,
// The two fields below are mainatined by the driver,
// they form a a doubly-linked list
- pub prev_alloc: *const HeapAllocRecord,
- pub next_alloc: *const HeapAllocRecord,
+ pub prev_alloc: *mut HeapAllocRecord,
+ pub next_alloc: *mut HeapAllocRecord,
}
#[derive(Clone, Copy)]
diff --git a/zluda_dump/src/dark_api.rs b/zluda_dump/src/dark_api.rs
index a619bca..7c9d0b8 100644
--- a/zluda_dump/src/dark_api.rs
+++ b/zluda_dump/src/dark_api.rs
@@ -912,6 +912,25 @@ impl CudaDarkApiDump for CudaDarkApiDumpFns {
fn_logger.result = Some(original_result);
original_result
}
+
+ unsafe fn heap_alloc_impl(
+ guid: &[u8; 16],
+ idx: usize,
+ alloc_ptr: *mut *mut zluda_dark_api::HeapAllocRecord,
+ destructor: Option<unsafe extern "system" fn(u32, usize)>,
+ value: usize,
+ ) -> CUresult {
+ todo!()
+ }
+
+ unsafe fn heap_free_impl(
+ guid: &[u8; 16],
+ idx: usize,
+ halloc: *mut zluda_dark_api::HeapAllocRecord,
+ value: *mut usize,
+ ) -> CUresult {
+ todo!()
+ }
}
unsafe fn deref_not_null<T>(ptr: *mut *mut T) -> *mut T {