From 89e72e4e95858e329276b1feb080a847306e02d2 Mon Sep 17 00:00:00 2001 From: Andrzej Janik Date: Mon, 17 May 2021 01:25:38 +0200 Subject: Handle even more export table functions --- zluda/Cargo.toml | 3 ++ zluda/src/impl/export_table.rs | 113 +++++++++++++++++++++++++++++++++++++++++ zluda/src/impl/mod.rs | 12 ++++- 3 files changed, 127 insertions(+), 1 deletion(-) (limited to 'zluda') diff --git a/zluda/Cargo.toml b/zluda/Cargo.toml index 6e0d077..c7d8cc9 100644 --- a/zluda/Cargo.toml +++ b/zluda/Cargo.toml @@ -15,6 +15,9 @@ lazy_static = "1.4" num_enum = "0.4" lz4-sys = "1.9" +[target.'cfg(windows)'.dependencies] +winapi = { version = "0.3", features = ["heapapi", "std"] } + [dev-dependencies] cuda-driver-sys = "0.3.0" paste = "1.0" \ No newline at end of file diff --git a/zluda/src/impl/export_table.rs b/zluda/src/impl/export_table.rs index e5b17ca..bfae799 100644 --- a/zluda/src/impl/export_table.rs +++ b/zluda/src/impl/export_table.rs @@ -1,3 +1,5 @@ +use winapi::um::heapapi::{HeapAlloc, HeapFree}; + use crate::cuda::CUresult; use crate::{ cuda::{CUcontext, CUdevice, CUmodule, CUuuid}, @@ -34,6 +36,14 @@ pub fn get(table: *mut *const std::os::raw::c_void, id: *const CUuuid) -> CUresu unsafe { *table = CONTEXT_LOCAL_STORAGE_INTERFACE_V0301_VTABLE.as_ptr() as *const _ }; CUresult::CUDA_SUCCESS } + CTX_CREATE_BYPASS_GUID => { + unsafe { *table = CTX_CREATE_BYPASS_VTABLE.as_ptr() as *const _ }; + CUresult::CUDA_SUCCESS + } + HEAP_ACCESS_GUID => { + unsafe { *table = HEAP_ACCESS_VTABLE.as_ptr() as *const _ }; + CUresult::CUDA_SUCCESS + } _ => CUresult::CUDA_ERROR_NOT_SUPPORTED, } } @@ -412,3 +422,106 @@ fn lock_context( })? } } + +const CTX_CREATE_BYPASS_GUID: CUuuid = CUuuid { + bytes: [ + 0x0C, 0xA5, 0x0B, 0x8C, 0x10, 0x04, 0x92, 0x9A, 0x89, 0xA7, 0xD0, 0xDF, 0x10, 0xE7, 0x72, + 0x86, + ], +}; + +const CTX_CREATE_BYPASS_LENGTH: usize = 2; +static CTX_CREATE_BYPASS_VTABLE: [VTableEntry; CTX_CREATE_BYPASS_LENGTH] = [ + VTableEntry { + length: mem::size_of::<[VTableEntry; CTX_CREATE_BYPASS_LENGTH]>(), + }, + VTableEntry { + ptr: ctx_create_v2_bypass as *const (), + }, +]; + +// I have no idea what is the difference between this function and +// cuCtxCreate_v2, but PhysX uses both interchangeably +extern "system" fn ctx_create_v2_bypass( + pctx: *mut CUcontext, + flags: ::std::os::raw::c_uint, + dev: CUdevice, +) -> CUresult { + context::create_v2(pctx.decuda(), flags, dev.decuda()).encuda() +} + +const HEAP_ACCESS_GUID: CUuuid = CUuuid { + bytes: [ + 0x19, 0x5B, 0xCB, 0xF4, 0xD6, 0x7D, 0x02, 0x4A, 0xAC, 0xC5, 0x1D, 0x29, 0xCE, 0xA6, 0x31, + 0xAE, + ], +}; + +#[repr(C)] +struct HeapAllocRecord { + arg1: usize, + arg2: usize, + _unknown: usize, + global_heap: *mut c_void, +} + +const HEAP_ACCESS_LENGTH: usize = 3; +static HEAP_ACCESS_VTABLE: [VTableEntry; HEAP_ACCESS_LENGTH] = [ + VTableEntry { + length: mem::size_of::<[VTableEntry; HEAP_ACCESS_LENGTH]>(), + }, + VTableEntry { + ptr: heap_alloc as *const (), + }, + VTableEntry { + ptr: heap_free as *const (), + }, +]; + +// TODO: reverse and implement for Linux +unsafe extern "system" fn heap_alloc( + halloc_ptr: *mut *const HeapAllocRecord, + arg1: usize, + arg2: usize, +) -> CUresult { + if halloc_ptr == ptr::null_mut() { + return CUresult::CUDA_ERROR_INVALID_VALUE; + } + let halloc = GlobalState::lock(|global_state| { + let halloc = HeapAlloc( + global_state.global_heap, + 0, + mem::size_of::(), + ) as *mut HeapAllocRecord; + if halloc == ptr::null_mut() { + return Err(CUresult::CUDA_ERROR_OUT_OF_MEMORY); + } + (*halloc).arg1 = arg1; + (*halloc).arg2 = arg2; + (*halloc)._unknown = 0; + (*halloc).global_heap = global_state.global_heap; + Ok(halloc) + }); + match halloc { + Ok(Ok(halloc)) => { + *halloc_ptr = halloc; + CUresult::CUDA_SUCCESS + } + Err(err) | Ok(Err(err)) => err, + } +} + +// TODO: reverse and implement for Linux +unsafe extern "system" fn heap_free(halloc: *mut HeapAllocRecord, arg1: *mut usize) -> CUresult { + if halloc == ptr::null_mut() { + return CUresult::CUDA_ERROR_INVALID_VALUE; + } + if arg1 != ptr::null_mut() { + *arg1 = (*halloc).arg2; + } + GlobalState::lock(|global_state| { + HeapFree(global_state.global_heap, 0, halloc as *mut _); + () + }) + .encuda() +} diff --git a/zluda/src/impl/mod.rs b/zluda/src/impl/mod.rs index 67b3e2b..48e9a24 100644 --- a/zluda/src/impl/mod.rs +++ b/zluda/src/impl/mod.rs @@ -1,3 +1,5 @@ +use winapi::um::{heapapi::HeapCreate, winnt::HEAP_NO_SERIALIZE}; + use crate::{ cuda::{CUctx_st, CUdevice, CUdeviceptr, CUfunc_st, CUmod_st, CUresult, CUstream_st}, r#impl::device::Device, @@ -203,6 +205,7 @@ lazy_static! { struct GlobalState { devices: Vec, + global_heap: *mut c_void, } unsafe impl Send for GlobalState {} @@ -301,7 +304,14 @@ pub fn init() -> Result<(), CUresult> { None => return Err(CUresult::CUDA_ERROR_UNKNOWN), Some(driver) => device::init(&driver)?, }; - *global_state = Some(GlobalState { devices }); + let global_heap = unsafe { HeapCreate(HEAP_NO_SERIALIZE, 0, 0) }; + if global_heap == ptr::null_mut() { + return Err(CUresult::CUDA_ERROR_OUT_OF_MEMORY); + } + *global_state = Some(GlobalState { + devices, + global_heap, + }); drop(global_state); Ok(()) } -- cgit v1.2.3