use crate::{ cuda::{CUctx_st, CUdevice, CUdeviceptr, CUfunc_st, CUmod_st, CUresult, CUstream_st}, r#impl::device::Device, }; use std::{ ffi::c_void, mem::{self, ManuallyDrop}, os::raw::c_int, ptr, sync::Mutex, sync::TryLockError, }; #[cfg(test)] #[macro_use] pub mod test; pub mod context; pub mod device; pub mod export_table; pub mod function; pub mod memory; pub mod module; pub mod stream; #[cfg(debug_assertions)] pub fn unimplemented() -> CUresult { unimplemented!() } #[cfg(not(debug_assertions))] pub fn unimplemented() -> CUresult { CUresult::CUDA_ERROR_NOT_SUPPORTED } pub trait HasLivenessCookie: Sized { const COOKIE: usize; const LIVENESS_FAIL: CUresult; fn try_drop(&mut self) -> Result<(), CUresult>; } // This struct is a best-effort check if wrapped value has been dropped, // while it's inherently safe, its use coming from FFI is very unsafe #[repr(C)] pub struct LiveCheck { cookie: usize, data: ManuallyDrop, } impl LiveCheck { pub fn new(data: T) -> Self { LiveCheck { cookie: T::COOKIE, data: ManuallyDrop::new(data), } } fn destroy_impl(this: *mut Self) -> Result<(), CUresult> { let mut ctx_box = ManuallyDrop::new(unsafe { Box::from_raw(this) }); ctx_box.try_drop()?; unsafe { ManuallyDrop::drop(&mut ctx_box) }; Ok(()) } unsafe fn ptr_from_inner(this: *mut T) -> *mut Self { let outer_ptr = (this as *mut u8).sub(mem::size_of::()); outer_ptr as *mut Self } pub unsafe fn as_ref_unchecked(&self) -> &T { &self.data } pub fn as_option_mut(&mut self) -> Option<&mut T> { if self.cookie == T::COOKIE { Some(&mut self.data) } else { None } } pub fn as_result(&self) -> Result<&T, CUresult> { if self.cookie == T::COOKIE { Ok(&self.data) } else { Err(T::LIVENESS_FAIL) } } pub fn as_result_mut(&mut self) -> Result<&mut T, CUresult> { if self.cookie == T::COOKIE { Ok(&mut self.data) } else { Err(T::LIVENESS_FAIL) } } #[must_use] pub fn try_drop(&mut self) -> Result<(), CUresult> { if self.cookie == T::COOKIE { self.cookie = 0; self.data.try_drop()?; unsafe { ManuallyDrop::drop(&mut self.data) }; return Ok(()); } Err(T::LIVENESS_FAIL) } } impl Drop for LiveCheck { fn drop(&mut self) { self.cookie = 0; } } pub trait CudaRepr: Sized { type Impl: Sized; } impl CudaRepr for *mut T { type Impl = *mut T::Impl; } pub trait Decuda { fn decuda(self: Self) -> To; } impl Decuda<*mut T::Impl> for *mut T { fn decuda(self: Self) -> *mut T::Impl { self as *mut _ } } impl From for CUresult { fn from(result: l0::sys::ze_result_t) -> Self { match result { l0::sys::ze_result_t::ZE_RESULT_SUCCESS => CUresult::CUDA_SUCCESS, l0_sys::ze_result_t::ZE_RESULT_ERROR_UNINITIALIZED => { CUresult::CUDA_ERROR_NOT_INITIALIZED } l0_sys::ze_result_t::ZE_RESULT_ERROR_INVALID_ENUMERATION | l0_sys::ze_result_t::ZE_RESULT_ERROR_INVALID_ARGUMENT | l0_sys::ze_result_t::ZE_RESULT_ERROR_INVALID_GROUP_SIZE_DIMENSION | l0_sys::ze_result_t::ZE_RESULT_ERROR_INVALID_GLOBAL_WIDTH_DIMENSION => { CUresult::CUDA_ERROR_INVALID_VALUE } l0_sys::ze_result_t::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY => { CUresult::CUDA_ERROR_OUT_OF_MEMORY } l0_sys::ze_result_t::ZE_RESULT_ERROR_UNSUPPORTED_FEATURE => { CUresult::CUDA_ERROR_NOT_SUPPORTED } _ => CUresult::CUDA_ERROR_UNKNOWN, } } } impl From> for CUresult { fn from(_: TryLockError) -> Self { CUresult::CUDA_ERROR_ILLEGAL_STATE } } pub trait Encuda { type To: Sized; fn encuda(self: Self) -> Self::To; } impl Encuda for CUresult { type To = CUresult; fn encuda(self: Self) -> Self::To { self } } impl Encuda for l0::sys::ze_result_t { type To = CUresult; fn encuda(self: Self) -> Self::To { self.into() } } impl Encuda for () { type To = CUresult; fn encuda(self: Self) -> Self::To { CUresult::CUDA_SUCCESS } } impl, T2: Encuda> Encuda for Result { type To = CUresult; fn encuda(self: Self) -> Self::To { match self { Ok(e) => e.encuda(), Err(e) => e.encuda(), } } } lazy_static! { static ref GLOBAL_STATE: Mutex> = Mutex::new(None); } struct GlobalState { devices: Vec, } unsafe impl Send for GlobalState {} impl GlobalState { fn lock(f: impl FnOnce(&mut GlobalState) -> T) -> Result { let mut mutex = GLOBAL_STATE .lock() .unwrap_or_else(|poison| poison.into_inner()); let global_state = mutex.as_mut().ok_or(CUresult::CUDA_ERROR_ILLEGAL_STATE)?; Ok(f(global_state)) } fn lock_device( device::Index(dev_idx): device::Index, f: impl FnOnce(&'static mut device::Device) -> T, ) -> Result { if dev_idx < 0 { return Err(CUresult::CUDA_ERROR_INVALID_DEVICE); } Self::lock(|global_state| { if dev_idx >= global_state.devices.len() as c_int { Err(CUresult::CUDA_ERROR_INVALID_DEVICE) } else { Ok(f(unsafe { transmute_lifetime_mut(&mut global_state.devices[dev_idx as usize]) })) } })? } fn lock_current_context R, R>( f: F, ) -> Result { Self::lock_current_context_unchecked(|ctx| Ok(f(ctx.as_result_mut()?)))? } fn lock_current_context_unchecked R, R>( f: F, ) -> Result { context::CONTEXT_STACK.with(|stack| { stack .borrow_mut() .last_mut() .ok_or(CUresult::CUDA_ERROR_INVALID_CONTEXT) .map(|ctx| GlobalState::lock(|_| f(unsafe { &mut **ctx })))? }) } fn lock_stream( stream: *mut stream::Stream, f: impl FnOnce(&mut stream::StreamData) -> T, ) -> Result { if stream == ptr::null_mut() || stream == stream::CU_STREAM_LEGACY || stream == stream::CU_STREAM_PER_THREAD { Self::lock_current_context(|ctx| Ok(f(&mut ctx.default_stream)))? } else { Self::lock(|_| { let stream = unsafe { &mut *stream }.as_result_mut()?; Ok(f(stream)) })? } } fn lock_function( func: *mut function::Function, f: impl FnOnce(&mut function::FunctionData) -> T, ) -> Result { if func == ptr::null_mut() { return Err(CUresult::CUDA_ERROR_INVALID_HANDLE); } Self::lock(|_| { let func = unsafe { &mut *func }.as_result_mut()?; Ok(f(func)) })? } } // TODO: implement fn is_intel_gpu_driver(_: &l0::Driver) -> bool { true } pub fn init() -> Result<(), CUresult> { let mut global_state = GLOBAL_STATE .lock() .map_err(|_| CUresult::CUDA_ERROR_UNKNOWN)?; if global_state.is_some() { return Ok(()); } l0::init()?; let drivers = l0::Driver::get()?; let devices = match drivers.into_iter().find(is_intel_gpu_driver) { None => return Err(CUresult::CUDA_ERROR_UNKNOWN), Some(driver) => device::init(&driver)?, }; *global_state = Some(GlobalState { devices }); drop(global_state); Ok(()) } macro_rules! stringify_curesult { ($x:ident => [ $($variant:ident),+ ]) => { match $x { $( CUresult::$variant => Some(concat!(stringify!($variant), "\0")), )+ _ => None } } } pub(crate) fn get_error_string(error: CUresult, str: *mut *const i8) -> CUresult { if str == ptr::null_mut() { return CUresult::CUDA_ERROR_INVALID_VALUE; } let text = stringify_curesult!( error => [ CUDA_SUCCESS, CUDA_ERROR_INVALID_VALUE, CUDA_ERROR_OUT_OF_MEMORY, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_PROFILER_DISABLED, CUDA_ERROR_PROFILER_NOT_INITIALIZED, CUDA_ERROR_PROFILER_ALREADY_STARTED, CUDA_ERROR_PROFILER_ALREADY_STOPPED, CUDA_ERROR_NO_DEVICE, CUDA_ERROR_INVALID_DEVICE, CUDA_ERROR_INVALID_IMAGE, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_CONTEXT_ALREADY_CURRENT, CUDA_ERROR_MAP_FAILED, CUDA_ERROR_UNMAP_FAILED, CUDA_ERROR_ARRAY_IS_MAPPED, CUDA_ERROR_ALREADY_MAPPED, CUDA_ERROR_NO_BINARY_FOR_GPU, CUDA_ERROR_ALREADY_ACQUIRED, CUDA_ERROR_NOT_MAPPED, CUDA_ERROR_NOT_MAPPED_AS_ARRAY, CUDA_ERROR_NOT_MAPPED_AS_POINTER, CUDA_ERROR_ECC_UNCORRECTABLE, CUDA_ERROR_UNSUPPORTED_LIMIT, CUDA_ERROR_CONTEXT_ALREADY_IN_USE, CUDA_ERROR_PEER_ACCESS_UNSUPPORTED, CUDA_ERROR_INVALID_PTX, CUDA_ERROR_INVALID_GRAPHICS_CONTEXT, CUDA_ERROR_NVLINK_UNCORRECTABLE, CUDA_ERROR_JIT_COMPILER_NOT_FOUND, CUDA_ERROR_INVALID_SOURCE, CUDA_ERROR_FILE_NOT_FOUND, CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND, CUDA_ERROR_SHARED_OBJECT_INIT_FAILED, CUDA_ERROR_OPERATING_SYSTEM, CUDA_ERROR_INVALID_HANDLE, CUDA_ERROR_ILLEGAL_STATE, CUDA_ERROR_NOT_FOUND, CUDA_ERROR_NOT_READY, CUDA_ERROR_ILLEGAL_ADDRESS, CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES, CUDA_ERROR_LAUNCH_TIMEOUT, CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING, CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED, CUDA_ERROR_PEER_ACCESS_NOT_ENABLED, CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE, CUDA_ERROR_CONTEXT_IS_DESTROYED, CUDA_ERROR_ASSERT, CUDA_ERROR_TOO_MANY_PEERS, CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED, CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED, CUDA_ERROR_HARDWARE_STACK_ERROR, CUDA_ERROR_ILLEGAL_INSTRUCTION, CUDA_ERROR_MISALIGNED_ADDRESS, CUDA_ERROR_INVALID_ADDRESS_SPACE, CUDA_ERROR_INVALID_PC, CUDA_ERROR_LAUNCH_FAILED, CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE, CUDA_ERROR_NOT_PERMITTED, CUDA_ERROR_NOT_SUPPORTED, CUDA_ERROR_SYSTEM_NOT_READY, CUDA_ERROR_SYSTEM_DRIVER_MISMATCH, CUDA_ERROR_COMPAT_NOT_SUPPORTED_ON_DEVICE, CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED, CUDA_ERROR_STREAM_CAPTURE_INVALIDATED, CUDA_ERROR_STREAM_CAPTURE_MERGE, CUDA_ERROR_STREAM_CAPTURE_UNMATCHED, CUDA_ERROR_STREAM_CAPTURE_UNJOINED, CUDA_ERROR_STREAM_CAPTURE_ISOLATION, CUDA_ERROR_STREAM_CAPTURE_IMPLICIT, CUDA_ERROR_CAPTURED_EVENT, CUDA_ERROR_STREAM_CAPTURE_WRONG_THREAD, CUDA_ERROR_TIMEOUT, CUDA_ERROR_GRAPH_EXEC_UPDATE_FAILURE, CUDA_ERROR_UNKNOWN ] ); match text { Some(text) => { unsafe { *str = text.as_ptr() as *const _ }; CUresult::CUDA_SUCCESS } None => CUresult::CUDA_ERROR_INVALID_VALUE, } } unsafe fn transmute_lifetime_mut<'a, 'b, T: ?Sized>(t: &'a mut T) -> &'b mut T { mem::transmute(t) } pub fn driver_get_version() -> c_int { i32::max_value() } impl<'a> CudaRepr for CUctx_st { type Impl = context::Context; } impl<'a> CudaRepr for CUdevice { type Impl = device::Index; } impl Decuda for CUdevice { fn decuda(self) -> device::Index { device::Index(self.0) } } impl<'a> CudaRepr for CUdeviceptr { type Impl = *mut c_void; } impl Decuda<*mut c_void> for CUdeviceptr { fn decuda(self) -> *mut c_void { self.0 as *mut _ } } impl<'a> CudaRepr for CUmod_st { type Impl = module::Module; } impl<'a> CudaRepr for CUfunc_st { type Impl = function::Function; } impl<'a> CudaRepr for CUstream_st { type Impl = stream::Stream; }