aboutsummaryrefslogtreecommitdiffhomepage
path: root/zluda
diff options
context:
space:
mode:
authorAndrzej Janik <[email protected]>2021-05-17 01:25:38 +0200
committerAndrzej Janik <[email protected]>2021-05-17 01:25:38 +0200
commit89e72e4e95858e329276b1feb080a847306e02d2 (patch)
tree763012d4cc6b6892596a71e240bb0bdea033f6c1 /zluda
parentdca4c5bd21d816bb72c9a2772dd444a04717630a (diff)
downloadZLUDA-89e72e4e95858e329276b1feb080a847306e02d2.tar.gz
ZLUDA-89e72e4e95858e329276b1feb080a847306e02d2.zip
Handle even more export table functions
Diffstat (limited to 'zluda')
-rw-r--r--zluda/Cargo.toml3
-rw-r--r--zluda/src/impl/export_table.rs113
-rw-r--r--zluda/src/impl/mod.rs12
3 files changed, 127 insertions, 1 deletions
diff --git a/zluda/Cargo.toml b/zluda/Cargo.toml
index 6e0d077..c7d8cc9 100644
--- a/zluda/Cargo.toml
+++ b/zluda/Cargo.toml
@@ -15,6 +15,9 @@ lazy_static = "1.4"
num_enum = "0.4"
lz4-sys = "1.9"
+[target.'cfg(windows)'.dependencies]
+winapi = { version = "0.3", features = ["heapapi", "std"] }
+
[dev-dependencies]
cuda-driver-sys = "0.3.0"
paste = "1.0" \ No newline at end of file
diff --git a/zluda/src/impl/export_table.rs b/zluda/src/impl/export_table.rs
index e5b17ca..bfae799 100644
--- a/zluda/src/impl/export_table.rs
+++ b/zluda/src/impl/export_table.rs
@@ -1,3 +1,5 @@
+use winapi::um::heapapi::{HeapAlloc, HeapFree};
+
use crate::cuda::CUresult;
use crate::{
cuda::{CUcontext, CUdevice, CUmodule, CUuuid},
@@ -34,6 +36,14 @@ pub fn get(table: *mut *const std::os::raw::c_void, id: *const CUuuid) -> CUresu
unsafe { *table = CONTEXT_LOCAL_STORAGE_INTERFACE_V0301_VTABLE.as_ptr() as *const _ };
CUresult::CUDA_SUCCESS
}
+ CTX_CREATE_BYPASS_GUID => {
+ unsafe { *table = CTX_CREATE_BYPASS_VTABLE.as_ptr() as *const _ };
+ CUresult::CUDA_SUCCESS
+ }
+ HEAP_ACCESS_GUID => {
+ unsafe { *table = HEAP_ACCESS_VTABLE.as_ptr() as *const _ };
+ CUresult::CUDA_SUCCESS
+ }
_ => CUresult::CUDA_ERROR_NOT_SUPPORTED,
}
}
@@ -412,3 +422,106 @@ fn lock_context<T>(
})?
}
}
+
+const CTX_CREATE_BYPASS_GUID: CUuuid = CUuuid {
+ bytes: [
+ 0x0C, 0xA5, 0x0B, 0x8C, 0x10, 0x04, 0x92, 0x9A, 0x89, 0xA7, 0xD0, 0xDF, 0x10, 0xE7, 0x72,
+ 0x86,
+ ],
+};
+
+const CTX_CREATE_BYPASS_LENGTH: usize = 2;
+static CTX_CREATE_BYPASS_VTABLE: [VTableEntry; CTX_CREATE_BYPASS_LENGTH] = [
+ VTableEntry {
+ length: mem::size_of::<[VTableEntry; CTX_CREATE_BYPASS_LENGTH]>(),
+ },
+ VTableEntry {
+ ptr: ctx_create_v2_bypass as *const (),
+ },
+];
+
+// I have no idea what is the difference between this function and
+// cuCtxCreate_v2, but PhysX uses both interchangeably
+extern "system" fn ctx_create_v2_bypass(
+ pctx: *mut CUcontext,
+ flags: ::std::os::raw::c_uint,
+ dev: CUdevice,
+) -> CUresult {
+ context::create_v2(pctx.decuda(), flags, dev.decuda()).encuda()
+}
+
+const HEAP_ACCESS_GUID: CUuuid = CUuuid {
+ bytes: [
+ 0x19, 0x5B, 0xCB, 0xF4, 0xD6, 0x7D, 0x02, 0x4A, 0xAC, 0xC5, 0x1D, 0x29, 0xCE, 0xA6, 0x31,
+ 0xAE,
+ ],
+};
+
+#[repr(C)]
+struct HeapAllocRecord {
+ arg1: usize,
+ arg2: usize,
+ _unknown: usize,
+ global_heap: *mut c_void,
+}
+
+const HEAP_ACCESS_LENGTH: usize = 3;
+static HEAP_ACCESS_VTABLE: [VTableEntry; HEAP_ACCESS_LENGTH] = [
+ VTableEntry {
+ length: mem::size_of::<[VTableEntry; HEAP_ACCESS_LENGTH]>(),
+ },
+ VTableEntry {
+ ptr: heap_alloc as *const (),
+ },
+ VTableEntry {
+ ptr: heap_free as *const (),
+ },
+];
+
+// TODO: reverse and implement for Linux
+unsafe extern "system" fn heap_alloc(
+ halloc_ptr: *mut *const HeapAllocRecord,
+ arg1: usize,
+ arg2: usize,
+) -> CUresult {
+ if halloc_ptr == ptr::null_mut() {
+ return CUresult::CUDA_ERROR_INVALID_VALUE;
+ }
+ let halloc = GlobalState::lock(|global_state| {
+ let halloc = HeapAlloc(
+ global_state.global_heap,
+ 0,
+ mem::size_of::<HeapAllocRecord>(),
+ ) as *mut HeapAllocRecord;
+ if halloc == ptr::null_mut() {
+ return Err(CUresult::CUDA_ERROR_OUT_OF_MEMORY);
+ }
+ (*halloc).arg1 = arg1;
+ (*halloc).arg2 = arg2;
+ (*halloc)._unknown = 0;
+ (*halloc).global_heap = global_state.global_heap;
+ Ok(halloc)
+ });
+ match halloc {
+ Ok(Ok(halloc)) => {
+ *halloc_ptr = halloc;
+ CUresult::CUDA_SUCCESS
+ }
+ Err(err) | Ok(Err(err)) => err,
+ }
+}
+
+// TODO: reverse and implement for Linux
+unsafe extern "system" fn heap_free(halloc: *mut HeapAllocRecord, arg1: *mut usize) -> CUresult {
+ if halloc == ptr::null_mut() {
+ return CUresult::CUDA_ERROR_INVALID_VALUE;
+ }
+ if arg1 != ptr::null_mut() {
+ *arg1 = (*halloc).arg2;
+ }
+ GlobalState::lock(|global_state| {
+ HeapFree(global_state.global_heap, 0, halloc as *mut _);
+ ()
+ })
+ .encuda()
+}
diff --git a/zluda/src/impl/mod.rs b/zluda/src/impl/mod.rs
index 67b3e2b..48e9a24 100644
--- a/zluda/src/impl/mod.rs
+++ b/zluda/src/impl/mod.rs
@@ -1,3 +1,5 @@
+use winapi::um::{heapapi::HeapCreate, winnt::HEAP_NO_SERIALIZE};
+
use crate::{
cuda::{CUctx_st, CUdevice, CUdeviceptr, CUfunc_st, CUmod_st, CUresult, CUstream_st},
r#impl::device::Device,
@@ -203,6 +205,7 @@ lazy_static! {
struct GlobalState {
devices: Vec<Device>,
+ global_heap: *mut c_void,
}
unsafe impl Send for GlobalState {}
@@ -301,7 +304,14 @@ pub fn init() -> Result<(), CUresult> {
None => return Err(CUresult::CUDA_ERROR_UNKNOWN),
Some(driver) => device::init(&driver)?,
};
- *global_state = Some(GlobalState { devices });
+ let global_heap = unsafe { HeapCreate(HEAP_NO_SERIALIZE, 0, 0) };
+ if global_heap == ptr::null_mut() {
+ return Err(CUresult::CUDA_ERROR_OUT_OF_MEMORY);
+ }
+ *global_state = Some(GlobalState {
+ devices,
+ global_heap,
+ });
drop(global_state);
Ok(())
}