diff options
Diffstat (limited to 'zluda/src')
-rw-r--r-- | zluda/src/cuda.rs | 4496 | ||||
-rw-r--r-- | zluda/src/cuda_impl/mod.rs | 1 | ||||
-rw-r--r-- | zluda/src/cuda_impl/rt.rs | 2 | ||||
-rw-r--r-- | zluda/src/impl/context.rs | 359 | ||||
-rw-r--r-- | zluda/src/impl/device.rs | 397 | ||||
-rw-r--r-- | zluda/src/impl/export_table.rs | 372 | ||||
-rw-r--r-- | zluda/src/impl/function.rs | 112 | ||||
-rw-r--r-- | zluda/src/impl/memory.rs | 100 | ||||
-rw-r--r-- | zluda/src/impl/mod.rs | 351 | ||||
-rw-r--r-- | zluda/src/impl/module.rs | 188 | ||||
-rw-r--r-- | zluda/src/impl/stream.rs | 242 | ||||
-rw-r--r-- | zluda/src/impl/test.rs | 157 | ||||
-rw-r--r-- | zluda/src/lib.rs | 16 |
13 files changed, 6793 insertions, 0 deletions
diff --git a/zluda/src/cuda.rs b/zluda/src/cuda.rs new file mode 100644 index 0000000..2dc26f5 --- /dev/null +++ b/zluda/src/cuda.rs @@ -0,0 +1,4496 @@ +use super::r#impl; +use super::r#impl::{Decuda, Encuda}; + +/* automatically generated by rust-bindgen 0.55.1 */ + +pub type __uint32_t = ::std::os::raw::c_uint; +pub type __uint64_t = ::std::os::raw::c_ulong; +pub type cuuint32_t = u32; +pub type cuuint64_t = u64; +#[repr(transparent)] +#[derive(Copy, Clone)] +pub struct CUdeviceptr(pub ::std::os::raw::c_ulonglong); +#[repr(transparent)] +#[derive(Copy, Clone)] +pub struct CUdevice(pub ::std::os::raw::c_int); +#[repr(C)] +#[derive(Copy, Clone)] +pub struct CUctx_st { + _unused: [u8; 0], +} +pub type CUcontext = *mut CUctx_st; +#[repr(C)] +#[derive(Copy, Clone)] +pub struct CUmod_st { + _unused: [u8; 0], +} +pub type CUmodule = *mut CUmod_st; +#[repr(C)] +#[derive(Copy, Clone)] +pub struct CUfunc_st { + _unused: [u8; 0], +} +pub type CUfunction = *mut CUfunc_st; +#[repr(C)] +#[derive(Copy, Clone)] +pub struct CUarray_st { + _unused: [u8; 0], +} +pub type CUarray = *mut CUarray_st; +#[repr(C)] +#[derive(Copy, Clone)] +pub struct CUmipmappedArray_st { + _unused: [u8; 0], +} +pub type CUmipmappedArray = *mut CUmipmappedArray_st; +#[repr(C)] +#[derive(Copy, Clone)] +pub struct CUtexref_st { + _unused: [u8; 0], +} +pub type CUtexref = *mut CUtexref_st; +#[repr(C)] +#[derive(Copy, Clone)] +pub struct CUsurfref_st { + _unused: [u8; 0], +} +pub type CUsurfref = *mut CUsurfref_st; +#[repr(C)] +#[derive(Copy, Clone)] +pub struct CUevent_st { + _unused: [u8; 0], +} +pub type CUevent = *mut CUevent_st; +#[repr(C)] +#[derive(Copy, Clone)] +pub struct CUstream_st { + _unused: [u8; 0], +} +pub type CUstream = *mut CUstream_st; +#[repr(C)] +#[derive(Copy, Clone)] +pub struct CUgraphicsResource_st { + _unused: [u8; 0], +} +pub type CUgraphicsResource = *mut CUgraphicsResource_st; +pub type CUtexObject = ::std::os::raw::c_ulonglong; +pub type CUsurfObject = ::std::os::raw::c_ulonglong; +#[repr(C)] +#[derive(Copy, Clone)] +pub struct CUextMemory_st { + _unused: [u8; 0], +} +pub type CUexternalMemory = *mut CUextMemory_st; +#[repr(C)] +#[derive(Copy, Clone)] +pub struct CUextSemaphore_st { + _unused: [u8; 0], +} +pub type CUexternalSemaphore = *mut CUextSemaphore_st; +#[repr(C)] +#[derive(Copy, Clone)] +pub struct CUgraph_st { + _unused: [u8; 0], +} +pub type CUgraph = *mut CUgraph_st; +#[repr(C)] +#[derive(Copy, Clone)] +pub struct CUgraphNode_st { + _unused: [u8; 0], +} +pub type CUgraphNode = *mut CUgraphNode_st; +#[repr(C)] +#[derive(Copy, Clone)] +pub struct CUgraphExec_st { + _unused: [u8; 0], +} +pub type CUgraphExec = *mut CUgraphExec_st; +#[repr(C)] +#[derive(Copy, Clone, PartialEq, Eq)] +pub struct CUuuid_st { + pub bytes: [::std::os::raw::c_uchar; 16usize], +} +pub type CUuuid = CUuuid_st; +#[repr(C)] +#[derive(Copy, Clone)] +pub struct CUipcEventHandle_st { + pub reserved: [::std::os::raw::c_char; 64usize], +} +pub type CUipcEventHandle = CUipcEventHandle_st; +#[repr(C)] +#[derive(Copy, Clone)] +pub struct CUipcMemHandle_st { + pub reserved: [::std::os::raw::c_char; 64usize], +} +pub type CUipcMemHandle = CUipcMemHandle_st; +impl CUstreamBatchMemOpType_enum { + pub const CU_STREAM_MEM_OP_WAIT_VALUE_32: CUstreamBatchMemOpType_enum = + CUstreamBatchMemOpType_enum(1); +} +impl CUstreamBatchMemOpType_enum { + pub const CU_STREAM_MEM_OP_WRITE_VALUE_32: CUstreamBatchMemOpType_enum = + CUstreamBatchMemOpType_enum(2); +} +impl CUstreamBatchMemOpType_enum { + pub const CU_STREAM_MEM_OP_WAIT_VALUE_64: CUstreamBatchMemOpType_enum = + CUstreamBatchMemOpType_enum(4); +} +impl CUstreamBatchMemOpType_enum { + pub const CU_STREAM_MEM_OP_WRITE_VALUE_64: CUstreamBatchMemOpType_enum = + CUstreamBatchMemOpType_enum(5); +} +impl CUstreamBatchMemOpType_enum { + pub const CU_STREAM_MEM_OP_FLUSH_REMOTE_WRITES: CUstreamBatchMemOpType_enum = + CUstreamBatchMemOpType_enum(3); +} +#[repr(transparent)] +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +pub struct CUstreamBatchMemOpType_enum(pub ::std::os::raw::c_uint); +pub use self::CUstreamBatchMemOpType_enum as CUstreamBatchMemOpType; +#[repr(C)] +#[derive(Copy, Clone)] +pub union CUstreamBatchMemOpParams_union { + pub operation: CUstreamBatchMemOpType, + pub waitValue: CUstreamBatchMemOpParams_union_CUstreamMemOpWaitValueParams_st, + pub writeValue: CUstreamBatchMemOpParams_union_CUstreamMemOpWriteValueParams_st, + pub flushRemoteWrites: CUstreamBatchMemOpParams_union_CUstreamMemOpFlushRemoteWritesParams_st, + pub pad: [cuuint64_t; 6usize], + _bindgen_union_align: [u64; 6usize], +} +#[repr(C)] +#[derive(Copy, Clone)] +pub struct CUstreamBatchMemOpParams_union_CUstreamMemOpWaitValueParams_st { + pub operation: CUstreamBatchMemOpType, + pub address: CUdeviceptr, + pub __bindgen_anon_1: + CUstreamBatchMemOpParams_union_CUstreamMemOpWaitValueParams_st__bindgen_ty_1, + pub flags: ::std::os::raw::c_uint, + pub alias: CUdeviceptr, +} +#[repr(C)] +#[derive(Copy, Clone)] +pub union CUstreamBatchMemOpParams_union_CUstreamMemOpWaitValueParams_st__bindgen_ty_1 { + pub value: cuuint32_t, + pub value64: cuuint64_t, + _bindgen_union_align: u64, +} +#[repr(C)] +#[derive(Copy, Clone)] +pub struct CUstreamBatchMemOpParams_union_CUstreamMemOpWriteValueParams_st { + pub operation: CUstreamBatchMemOpType, + pub address: CUdeviceptr, + pub __bindgen_anon_1: + CUstreamBatchMemOpParams_union_CUstreamMemOpWriteValueParams_st__bindgen_ty_1, + pub flags: ::std::os::raw::c_uint, + pub alias: CUdeviceptr, +} +#[repr(C)] +#[derive(Copy, Clone)] +pub union CUstreamBatchMemOpParams_union_CUstreamMemOpWriteValueParams_st__bindgen_ty_1 { + pub value: cuuint32_t, + pub value64: cuuint64_t, + _bindgen_union_align: u64, +} +#[repr(C)] +#[derive(Copy, Clone)] +pub struct CUstreamBatchMemOpParams_union_CUstreamMemOpFlushRemoteWritesParams_st { + pub operation: CUstreamBatchMemOpType, + pub flags: ::std::os::raw::c_uint, +} +pub type CUstreamBatchMemOpParams = CUstreamBatchMemOpParams_union; +impl CUarray_format_enum { + pub const CU_AD_FORMAT_UNSIGNED_INT8: CUarray_format_enum = CUarray_format_enum(1); +} +impl CUarray_format_enum { + pub const CU_AD_FORMAT_UNSIGNED_INT16: CUarray_format_enum = CUarray_format_enum(2); +} +impl CUarray_format_enum { + pub const CU_AD_FORMAT_UNSIGNED_INT32: CUarray_format_enum = CUarray_format_enum(3); +} +impl CUarray_format_enum { + pub const CU_AD_FORMAT_SIGNED_INT8: CUarray_format_enum = CUarray_format_enum(8); +} +impl CUarray_format_enum { + pub const CU_AD_FORMAT_SIGNED_INT16: CUarray_format_enum = CUarray_format_enum(9); +} +impl CUarray_format_enum { + pub const CU_AD_FORMAT_SIGNED_INT32: CUarray_format_enum = CUarray_format_enum(10); +} +impl CUarray_format_enum { + pub const CU_AD_FORMAT_HALF: CUarray_format_enum = CUarray_format_enum(16); +} +impl CUarray_format_enum { + pub const CU_AD_FORMAT_FLOAT: CUarray_format_enum = CUarray_format_enum(32); +} +#[repr(transparent)] +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +pub struct CUarray_format_enum(pub ::std::os::raw::c_uint); +pub use self::CUarray_format_enum as CUarray_format; +impl CUaddress_mode_enum { + pub const CU_TR_ADDRESS_MODE_WRAP: CUaddress_mode_enum = CUaddress_mode_enum(0); +} +impl CUaddress_mode_enum { + pub const CU_TR_ADDRESS_MODE_CLAMP: CUaddress_mode_enum = CUaddress_mode_enum(1); +} +impl CUaddress_mode_enum { + pub const CU_TR_ADDRESS_MODE_MIRROR: CUaddress_mode_enum = CUaddress_mode_enum(2); +} +impl CUaddress_mode_enum { + pub const CU_TR_ADDRESS_MODE_BORDER: CUaddress_mode_enum = CUaddress_mode_enum(3); +} +#[repr(transparent)] +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +pub struct CUaddress_mode_enum(pub ::std::os::raw::c_uint); +pub use self::CUaddress_mode_enum as CUaddress_mode; +impl CUfilter_mode_enum { + pub const CU_TR_FILTER_MODE_POINT: CUfilter_mode_enum = CUfilter_mode_enum(0); +} +impl CUfilter_mode_enum { + pub const CU_TR_FILTER_MODE_LINEAR: CUfilter_mode_enum = CUfilter_mode_enum(1); +} +#[repr(transparent)] +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +pub struct CUfilter_mode_enum(pub ::std::os::raw::c_uint); +pub use self::CUfilter_mode_enum as CUfilter_mode; +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK: CUdevice_attribute_enum = + CUdevice_attribute_enum(1); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X: CUdevice_attribute_enum = + CUdevice_attribute_enum(2); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y: CUdevice_attribute_enum = + CUdevice_attribute_enum(3); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z: CUdevice_attribute_enum = + CUdevice_attribute_enum(4); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X: CUdevice_attribute_enum = + CUdevice_attribute_enum(5); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y: CUdevice_attribute_enum = + CUdevice_attribute_enum(6); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z: CUdevice_attribute_enum = + CUdevice_attribute_enum(7); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK: CUdevice_attribute_enum = + CUdevice_attribute_enum(8); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK: CUdevice_attribute_enum = + CUdevice_attribute_enum(8); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY: CUdevice_attribute_enum = + CUdevice_attribute_enum(9); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_WARP_SIZE: CUdevice_attribute_enum = CUdevice_attribute_enum(10); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_MAX_PITCH: CUdevice_attribute_enum = CUdevice_attribute_enum(11); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK: CUdevice_attribute_enum = + CUdevice_attribute_enum(12); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_REGISTERS_PER_BLOCK: CUdevice_attribute_enum = + CUdevice_attribute_enum(12); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_CLOCK_RATE: CUdevice_attribute_enum = CUdevice_attribute_enum(13); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT: CUdevice_attribute_enum = + CUdevice_attribute_enum(14); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_GPU_OVERLAP: CUdevice_attribute_enum = + CUdevice_attribute_enum(15); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT: CUdevice_attribute_enum = + CUdevice_attribute_enum(16); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT: CUdevice_attribute_enum = + CUdevice_attribute_enum(17); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_INTEGRATED: CUdevice_attribute_enum = CUdevice_attribute_enum(18); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY: CUdevice_attribute_enum = + CUdevice_attribute_enum(19); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_COMPUTE_MODE: CUdevice_attribute_enum = + CUdevice_attribute_enum(20); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH: CUdevice_attribute_enum = + CUdevice_attribute_enum(21); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH: CUdevice_attribute_enum = + CUdevice_attribute_enum(22); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT: CUdevice_attribute_enum = + CUdevice_attribute_enum(23); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH: CUdevice_attribute_enum = + CUdevice_attribute_enum(24); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT: CUdevice_attribute_enum = + CUdevice_attribute_enum(25); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH: CUdevice_attribute_enum = + CUdevice_attribute_enum(26); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH: CUdevice_attribute_enum = + CUdevice_attribute_enum(27); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT: CUdevice_attribute_enum = + CUdevice_attribute_enum(28); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS: CUdevice_attribute_enum = + CUdevice_attribute_enum(29); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_WIDTH: CUdevice_attribute_enum = + CUdevice_attribute_enum(27); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_HEIGHT: CUdevice_attribute_enum = + CUdevice_attribute_enum(28); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_NUMSLICES: CUdevice_attribute_enum = + CUdevice_attribute_enum(29); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT: CUdevice_attribute_enum = + CUdevice_attribute_enum(30); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS: CUdevice_attribute_enum = + CUdevice_attribute_enum(31); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_ECC_ENABLED: CUdevice_attribute_enum = + CUdevice_attribute_enum(32); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_PCI_BUS_ID: CUdevice_attribute_enum = CUdevice_attribute_enum(33); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID: CUdevice_attribute_enum = + CUdevice_attribute_enum(34); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_TCC_DRIVER: CUdevice_attribute_enum = CUdevice_attribute_enum(35); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE: CUdevice_attribute_enum = + CUdevice_attribute_enum(36); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH: CUdevice_attribute_enum = + CUdevice_attribute_enum(37); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE: CUdevice_attribute_enum = + CUdevice_attribute_enum(38); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR: CUdevice_attribute_enum = + CUdevice_attribute_enum(39); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT: CUdevice_attribute_enum = + CUdevice_attribute_enum(40); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING: CUdevice_attribute_enum = + CUdevice_attribute_enum(41); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH: CUdevice_attribute_enum = + CUdevice_attribute_enum(42); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS: CUdevice_attribute_enum = + CUdevice_attribute_enum(43); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_CAN_TEX2D_GATHER: CUdevice_attribute_enum = + CUdevice_attribute_enum(44); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_WIDTH: CUdevice_attribute_enum = + CUdevice_attribute_enum(45); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_HEIGHT: CUdevice_attribute_enum = + CUdevice_attribute_enum(46); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE: CUdevice_attribute_enum = + CUdevice_attribute_enum(47); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE: CUdevice_attribute_enum = + CUdevice_attribute_enum(48); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE: CUdevice_attribute_enum = + CUdevice_attribute_enum(49); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID: CUdevice_attribute_enum = + CUdevice_attribute_enum(50); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT: CUdevice_attribute_enum = + CUdevice_attribute_enum(51); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_WIDTH: CUdevice_attribute_enum = + CUdevice_attribute_enum(52); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH: CUdevice_attribute_enum = + CUdevice_attribute_enum(53); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS: CUdevice_attribute_enum = + CUdevice_attribute_enum(54); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH: CUdevice_attribute_enum = + CUdevice_attribute_enum(55); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH: CUdevice_attribute_enum = + CUdevice_attribute_enum(56); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT: CUdevice_attribute_enum = + CUdevice_attribute_enum(57); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH: CUdevice_attribute_enum = + CUdevice_attribute_enum(58); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT: CUdevice_attribute_enum = + CUdevice_attribute_enum(59); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH: CUdevice_attribute_enum = + CUdevice_attribute_enum(60); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_WIDTH: CUdevice_attribute_enum = + CUdevice_attribute_enum(61); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_LAYERS: CUdevice_attribute_enum = + CUdevice_attribute_enum(62); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_WIDTH: CUdevice_attribute_enum = + CUdevice_attribute_enum(63); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_HEIGHT: CUdevice_attribute_enum = + CUdevice_attribute_enum(64); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_LAYERS: CUdevice_attribute_enum = + CUdevice_attribute_enum(65); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_WIDTH: CUdevice_attribute_enum = + CUdevice_attribute_enum(66); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH: CUdevice_attribute_enum = + CUdevice_attribute_enum(67); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS: CUdevice_attribute_enum = + CUdevice_attribute_enum(68); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH: CUdevice_attribute_enum = + CUdevice_attribute_enum(69); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH: CUdevice_attribute_enum = + CUdevice_attribute_enum(70); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT: CUdevice_attribute_enum = + CUdevice_attribute_enum(71); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH: CUdevice_attribute_enum = + CUdevice_attribute_enum(72); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH: CUdevice_attribute_enum = + CUdevice_attribute_enum(73); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT: CUdevice_attribute_enum = + CUdevice_attribute_enum(74); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR: CUdevice_attribute_enum = + CUdevice_attribute_enum(75); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR: CUdevice_attribute_enum = + CUdevice_attribute_enum(76); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH: CUdevice_attribute_enum = + CUdevice_attribute_enum(77); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_STREAM_PRIORITIES_SUPPORTED: CUdevice_attribute_enum = + CUdevice_attribute_enum(78); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_GLOBAL_L1_CACHE_SUPPORTED: CUdevice_attribute_enum = + CUdevice_attribute_enum(79); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_LOCAL_L1_CACHE_SUPPORTED: CUdevice_attribute_enum = + CUdevice_attribute_enum(80); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR: CUdevice_attribute_enum = + CUdevice_attribute_enum(81); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR: CUdevice_attribute_enum = + CUdevice_attribute_enum(82); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY: CUdevice_attribute_enum = + CUdevice_attribute_enum(83); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD: CUdevice_attribute_enum = + CUdevice_attribute_enum(84); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD_GROUP_ID: CUdevice_attribute_enum = + CUdevice_attribute_enum(85); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_HOST_NATIVE_ATOMIC_SUPPORTED: CUdevice_attribute_enum = + CUdevice_attribute_enum(86); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_SINGLE_TO_DOUBLE_PRECISION_PERF_RATIO: CUdevice_attribute_enum = + CUdevice_attribute_enum(87); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS: CUdevice_attribute_enum = + CUdevice_attribute_enum(88); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS: CUdevice_attribute_enum = + CUdevice_attribute_enum(89); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED: CUdevice_attribute_enum = + CUdevice_attribute_enum(90); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM: CUdevice_attribute_enum = + CUdevice_attribute_enum(91); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_MEM_OPS: CUdevice_attribute_enum = + CUdevice_attribute_enum(92); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_CAN_USE_64_BIT_STREAM_MEM_OPS: CUdevice_attribute_enum = + CUdevice_attribute_enum(93); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR: CUdevice_attribute_enum = + CUdevice_attribute_enum(94); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_COOPERATIVE_LAUNCH: CUdevice_attribute_enum = + CUdevice_attribute_enum(95); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_COOPERATIVE_MULTI_DEVICE_LAUNCH: CUdevice_attribute_enum = + CUdevice_attribute_enum(96); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN: CUdevice_attribute_enum = + CUdevice_attribute_enum(97); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_CAN_FLUSH_REMOTE_WRITES: CUdevice_attribute_enum = + CUdevice_attribute_enum(98); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_HOST_REGISTER_SUPPORTED: CUdevice_attribute_enum = + CUdevice_attribute_enum(99); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES: + CUdevice_attribute_enum = CUdevice_attribute_enum(100); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_DIRECT_MANAGED_MEM_ACCESS_FROM_HOST: CUdevice_attribute_enum = + CUdevice_attribute_enum(101); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_VIRTUAL_ADDRESS_MANAGEMENT_SUPPORTED: CUdevice_attribute_enum = + CUdevice_attribute_enum(102); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR_SUPPORTED: + CUdevice_attribute_enum = CUdevice_attribute_enum(103); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_HANDLE_SUPPORTED: CUdevice_attribute_enum = + CUdevice_attribute_enum(104); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_KMT_HANDLE_SUPPORTED: CUdevice_attribute_enum = + CUdevice_attribute_enum(105); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_MAX_BLOCKS_PER_MULTIPROCESSOR: CUdevice_attribute_enum = + CUdevice_attribute_enum(106); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_GENERIC_COMPRESSION_SUPPORTED: CUdevice_attribute_enum = + CUdevice_attribute_enum(107); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_MAX_PERSISTING_L2_CACHE_SIZE: CUdevice_attribute_enum = + CUdevice_attribute_enum(108); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_MAX_ACCESS_POLICY_WINDOW_SIZE: CUdevice_attribute_enum = + CUdevice_attribute_enum(109); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WITH_CUDA_VMM_SUPPORTED: CUdevice_attribute_enum = + CUdevice_attribute_enum(110); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_RESERVED_SHARED_MEMORY_PER_BLOCK: CUdevice_attribute_enum = + CUdevice_attribute_enum(111); +} +impl CUdevice_attribute_enum { + pub const CU_DEVICE_ATTRIBUTE_MAX: CUdevice_attribute_enum = CUdevice_attribute_enum(112); +} +#[repr(transparent)] +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +pub struct CUdevice_attribute_enum(pub ::std::os::raw::c_uint); +pub use self::CUdevice_attribute_enum as CUdevice_attribute; +#[repr(C)] +#[derive(Copy, Clone)] +pub struct CUdevprop_st { + pub maxThreadsPerBlock: ::std::os::raw::c_int, + pub maxThreadsDim: [::std::os::raw::c_int; 3usize], + pub maxGridSize: [::std::os::raw::c_int; 3usize], + pub sharedMemPerBlock: ::std::os::raw::c_int, + pub totalConstantMemory: ::std::os::raw::c_int, + pub SIMDWidth: ::std::os::raw::c_int, + pub memPitch: ::std::os::raw::c_int, + pub regsPerBlock: ::std::os::raw::c_int, + pub clockRate: ::std::os::raw::c_int, + pub textureAlign: ::std::os::raw::c_int, +} +pub type CUdevprop = CUdevprop_st; +impl CUpointer_attribute_enum { + pub const CU_POINTER_ATTRIBUTE_CONTEXT: CUpointer_attribute_enum = CUpointer_attribute_enum(1); +} +impl CUpointer_attribute_enum { + pub const CU_POINTER_ATTRIBUTE_MEMORY_TYPE: CUpointer_attribute_enum = + CUpointer_attribute_enum(2); +} +impl CUpointer_attribute_enum { + pub const CU_POINTER_ATTRIBUTE_DEVICE_POINTER: CUpointer_attribute_enum = + CUpointer_attribute_enum(3); +} +impl CUpointer_attribute_enum { + pub const CU_POINTER_ATTRIBUTE_HOST_POINTER: CUpointer_attribute_enum = + CUpointer_attribute_enum(4); +} +impl CUpointer_attribute_enum { + pub const CU_POINTER_ATTRIBUTE_P2P_TOKENS: CUpointer_attribute_enum = + CUpointer_attribute_enum(5); +} +impl CUpointer_attribute_enum { + pub const CU_POINTER_ATTRIBUTE_SYNC_MEMOPS: CUpointer_attribute_enum = + CUpointer_attribute_enum(6); +} +impl CUpointer_attribute_enum { + pub const CU_POINTER_ATTRIBUTE_BUFFER_ID: CUpointer_attribute_enum = + CUpointer_attribute_enum(7); +} +impl CUpointer_attribute_enum { + pub const CU_POINTER_ATTRIBUTE_IS_MANAGED: CUpointer_attribute_enum = + CUpointer_attribute_enum(8); +} +impl CUpointer_attribute_enum { + pub const CU_POINTER_ATTRIBUTE_DEVICE_ORDINAL: CUpointer_attribute_enum = + CUpointer_attribute_enum(9); +} +impl CUpointer_attribute_enum { + pub const CU_POINTER_ATTRIBUTE_IS_LEGACY_CUDA_IPC_CAPABLE: CUpointer_attribute_enum = + CUpointer_attribute_enum(10); +} +impl CUpointer_attribute_enum { + pub const CU_POINTER_ATTRIBUTE_RANGE_START_ADDR: CUpointer_attribute_enum = + CUpointer_attribute_enum(11); +} +impl CUpointer_attribute_enum { + pub const CU_POINTER_ATTRIBUTE_RANGE_SIZE: CUpointer_attribute_enum = + CUpointer_attribute_enum(12); +} +impl CUpointer_attribute_enum { + pub const CU_POINTER_ATTRIBUTE_MAPPED: CUpointer_attribute_enum = CUpointer_attribute_enum(13); +} +impl CUpointer_attribute_enum { + pub const CU_POINTER_ATTRIBUTE_ALLOWED_HANDLE_TYPES: CUpointer_attribute_enum = + CUpointer_attribute_enum(14); +} +impl CUpointer_attribute_enum { + pub const CU_POINTER_ATTRIBUTE_IS_GPU_DIRECT_RDMA_CAPABLE: CUpointer_attribute_enum = + CUpointer_attribute_enum(15); +} +#[repr(transparent)] +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +pub struct CUpointer_attribute_enum(pub ::std::os::raw::c_uint); +pub use self::CUpointer_attribute_enum as CUpointer_attribute; +impl CUfunction_attribute_enum { + pub const CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK: CUfunction_attribute_enum = + CUfunction_attribute_enum(0); +} +impl CUfunction_attribute_enum { + pub const CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES: CUfunction_attribute_enum = + CUfunction_attribute_enum(1); +} +impl CUfunction_attribute_enum { + pub const CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES: CUfunction_attribute_enum = + CUfunction_attribute_enum(2); +} +impl CUfunction_attribute_enum { + pub const CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES: CUfunction_attribute_enum = + CUfunction_attribute_enum(3); +} +impl CUfunction_attribute_enum { + pub const CU_FUNC_ATTRIBUTE_NUM_REGS: CUfunction_attribute_enum = CUfunction_attribute_enum(4); +} +impl CUfunction_attribute_enum { + pub const CU_FUNC_ATTRIBUTE_PTX_VERSION: CUfunction_attribute_enum = + CUfunction_attribute_enum(5); +} +impl CUfunction_attribute_enum { + pub const CU_FUNC_ATTRIBUTE_BINARY_VERSION: CUfunction_attribute_enum = + CUfunction_attribute_enum(6); +} +impl CUfunction_attribute_enum { + pub const CU_FUNC_ATTRIBUTE_CACHE_MODE_CA: CUfunction_attribute_enum = + CUfunction_attribute_enum(7); +} +impl CUfunction_attribute_enum { + pub const CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES: CUfunction_attribute_enum = + CUfunction_attribute_enum(8); +} +impl CUfunction_attribute_enum { + pub const CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT: CUfunction_attribute_enum = + CUfunction_attribute_enum(9); +} +impl CUfunction_attribute_enum { + pub const CU_FUNC_ATTRIBUTE_MAX: CUfunction_attribute_enum = CUfunction_attribute_enum(10); +} +#[repr(transparent)] +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +pub struct CUfunction_attribute_enum(pub ::std::os::raw::c_uint); +pub use self::CUfunction_attribute_enum as CUfunction_attribute; +impl CUfunc_cache_enum { + pub const CU_FUNC_CACHE_PREFER_NONE: CUfunc_cache_enum = CUfunc_cache_enum(0); +} +impl CUfunc_cache_enum { + pub const CU_FUNC_CACHE_PREFER_SHARED: CUfunc_cache_enum = CUfunc_cache_enum(1); +} +impl CUfunc_cache_enum { + pub const CU_FUNC_CACHE_PREFER_L1: CUfunc_cache_enum = CUfunc_cache_enum(2); +} +impl CUfunc_cache_enum { + pub const CU_FUNC_CACHE_PREFER_EQUAL: CUfunc_cache_enum = CUfunc_cache_enum(3); +} +#[repr(transparent)] +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +pub struct CUfunc_cache_enum(pub ::std::os::raw::c_uint); +pub use self::CUfunc_cache_enum as CUfunc_cache; +impl CUsharedconfig_enum { + pub const CU_SHARED_MEM_CONFIG_DEFAULT_BANK_SIZE: CUsharedconfig_enum = CUsharedconfig_enum(0); +} +impl CUsharedconfig_enum { + pub const CU_SHARED_MEM_CONFIG_FOUR_BYTE_BANK_SIZE: CUsharedconfig_enum = + CUsharedconfig_enum(1); +} +impl CUsharedconfig_enum { + pub const CU_SHARED_MEM_CONFIG_EIGHT_BYTE_BANK_SIZE: CUsharedconfig_enum = + CUsharedconfig_enum(2); +} +#[repr(transparent)] +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +pub struct CUsharedconfig_enum(pub ::std::os::raw::c_uint); +pub use self::CUsharedconfig_enum as CUsharedconfig; +impl CUmemorytype_enum { + pub const CU_MEMORYTYPE_HOST: CUmemorytype_enum = CUmemorytype_enum(1); +} +impl CUmemorytype_enum { + pub const CU_MEMORYTYPE_DEVICE: CUmemorytype_enum = CUmemorytype_enum(2); +} +impl CUmemorytype_enum { + pub const CU_MEMORYTYPE_ARRAY: CUmemorytype_enum = CUmemorytype_enum(3); +} +impl CUmemorytype_enum { + pub const CU_MEMORYTYPE_UNIFIED: CUmemorytype_enum = CUmemorytype_enum(4); +} +#[repr(transparent)] +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +pub struct CUmemorytype_enum(pub ::std::os::raw::c_uint); +pub use self::CUmemorytype_enum as CUmemorytype; +impl CUmem_advise_enum { + pub const CU_MEM_ADVISE_SET_READ_MOSTLY: CUmem_advise_enum = CUmem_advise_enum(1); +} +impl CUmem_advise_enum { + pub const CU_MEM_ADVISE_UNSET_READ_MOSTLY: CUmem_advise_enum = CUmem_advise_enum(2); +} +impl CUmem_advise_enum { + pub const CU_MEM_ADVISE_SET_PREFERRED_LOCATION: CUmem_advise_enum = CUmem_advise_enum(3); +} +impl CUmem_advise_enum { + pub const CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION: CUmem_advise_enum = CUmem_advise_enum(4); +} +impl CUmem_advise_enum { + pub const CU_MEM_ADVISE_SET_ACCESSED_BY: CUmem_advise_enum = CUmem_advise_enum(5); +} +impl CUmem_advise_enum { + pub const CU_MEM_ADVISE_UNSET_ACCESSED_BY: CUmem_advise_enum = CUmem_advise_enum(6); +} +#[repr(transparent)] +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +pub struct CUmem_advise_enum(pub ::std::os::raw::c_uint); +pub use self::CUmem_advise_enum as CUmem_advise; +impl CUmem_range_attribute_enum { + pub const CU_MEM_RANGE_ATTRIBUTE_READ_MOSTLY: CUmem_range_attribute_enum = + CUmem_range_attribute_enum(1); +} +impl CUmem_range_attribute_enum { + pub const CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION: CUmem_range_attribute_enum = + CUmem_range_attribute_enum(2); +} +impl CUmem_range_attribute_enum { + pub const CU_MEM_RANGE_ATTRIBUTE_ACCESSED_BY: CUmem_range_attribute_enum = + CUmem_range_attribute_enum(3); +} +impl CUmem_range_attribute_enum { + pub const CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION: CUmem_range_attribute_enum = + CUmem_range_attribute_enum(4); +} +#[repr(transparent)] +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +pub struct CUmem_range_attribute_enum(pub ::std::os::raw::c_uint); +pub use self::CUmem_range_attribute_enum as CUmem_range_attribute; +impl CUjit_option_enum { + pub const CU_JIT_MAX_REGISTERS: CUjit_option_enum = CUjit_option_enum(0); +} +impl CUjit_option_enum { + pub const CU_JIT_THREADS_PER_BLOCK: CUjit_option_enum = CUjit_option_enum(1); +} +impl CUjit_option_enum { + pub const CU_JIT_WALL_TIME: CUjit_option_enum = CUjit_option_enum(2); +} +impl CUjit_option_enum { + pub const CU_JIT_INFO_LOG_BUFFER: CUjit_option_enum = CUjit_option_enum(3); +} +impl CUjit_option_enum { + pub const CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES: CUjit_option_enum = CUjit_option_enum(4); +} +impl CUjit_option_enum { + pub const CU_JIT_ERROR_LOG_BUFFER: CUjit_option_enum = CUjit_option_enum(5); +} +impl CUjit_option_enum { + pub const CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES: CUjit_option_enum = CUjit_option_enum(6); +} +impl CUjit_option_enum { + pub const CU_JIT_OPTIMIZATION_LEVEL: CUjit_option_enum = CUjit_option_enum(7); +} +impl CUjit_option_enum { + pub const CU_JIT_TARGET_FROM_CUCONTEXT: CUjit_option_enum = CUjit_option_enum(8); +} +impl CUjit_option_enum { + pub const CU_JIT_TARGET: CUjit_option_enum = CUjit_option_enum(9); +} +impl CUjit_option_enum { + pub const CU_JIT_FALLBACK_STRATEGY: CUjit_option_enum = CUjit_option_enum(10); +} +impl CUjit_option_enum { + pub const CU_JIT_GENERATE_DEBUG_INFO: CUjit_option_enum = CUjit_option_enum(11); +} +impl CUjit_option_enum { + pub const CU_JIT_LOG_VERBOSE: CUjit_option_enum = CUjit_option_enum(12); +} +impl CUjit_option_enum { + pub const CU_JIT_GENERATE_LINE_INFO: CUjit_option_enum = CUjit_option_enum(13); +} +impl CUjit_option_enum { + pub const CU_JIT_CACHE_MODE: CUjit_option_enum = CUjit_option_enum(14); +} +impl CUjit_option_enum { + pub const CU_JIT_NEW_SM3X_OPT: CUjit_option_enum = CUjit_option_enum(15); +} +impl CUjit_option_enum { + pub const CU_JIT_FAST_COMPILE: CUjit_option_enum = CUjit_option_enum(16); +} +impl CUjit_option_enum { + pub const CU_JIT_GLOBAL_SYMBOL_NAMES: CUjit_option_enum = CUjit_option_enum(17); +} +impl CUjit_option_enum { + pub const CU_JIT_GLOBAL_SYMBOL_ADDRESSES: CUjit_option_enum = CUjit_option_enum(18); +} +impl CUjit_option_enum { + pub const CU_JIT_GLOBAL_SYMBOL_COUNT: CUjit_option_enum = CUjit_option_enum(19); +} +impl CUjit_option_enum { + pub const CU_JIT_NUM_OPTIONS: CUjit_option_enum = CUjit_option_enum(20); +} +#[repr(transparent)] +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +pub struct CUjit_option_enum(pub ::std::os::raw::c_uint); +pub use self::CUjit_option_enum as CUjit_option; +impl CUjitInputType_enum { + pub const CU_JIT_INPUT_CUBIN: CUjitInputType_enum = CUjitInputType_enum(0); +} +impl CUjitInputType_enum { + pub const CU_JIT_INPUT_PTX: CUjitInputType_enum = CUjitInputType_enum(1); +} +impl CUjitInputType_enum { + pub const CU_JIT_INPUT_FATBINARY: CUjitInputType_enum = CUjitInputType_enum(2); +} +impl CUjitInputType_enum { + pub const CU_JIT_INPUT_OBJECT: CUjitInputType_enum = CUjitInputType_enum(3); +} +impl CUjitInputType_enum { + pub const CU_JIT_INPUT_LIBRARY: CUjitInputType_enum = CUjitInputType_enum(4); +} +impl CUjitInputType_enum { + pub const CU_JIT_NUM_INPUT_TYPES: CUjitInputType_enum = CUjitInputType_enum(5); +} +#[repr(transparent)] +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +pub struct CUjitInputType_enum(pub ::std::os::raw::c_uint); +pub use self::CUjitInputType_enum as CUjitInputType; +#[repr(C)] +#[derive(Copy, Clone)] +pub struct CUlinkState_st { + _unused: [u8; 0], +} +pub type CUlinkState = *mut CUlinkState_st; +impl CUlimit_enum { + pub const CU_LIMIT_STACK_SIZE: CUlimit_enum = CUlimit_enum(0); +} +impl CUlimit_enum { + pub const CU_LIMIT_PRINTF_FIFO_SIZE: CUlimit_enum = CUlimit_enum(1); +} +impl CUlimit_enum { + pub const CU_LIMIT_MALLOC_HEAP_SIZE: CUlimit_enum = CUlimit_enum(2); +} +impl CUlimit_enum { + pub const CU_LIMIT_DEV_RUNTIME_SYNC_DEPTH: CUlimit_enum = CUlimit_enum(3); +} +impl CUlimit_enum { + pub const CU_LIMIT_DEV_RUNTIME_PENDING_LAUNCH_COUNT: CUlimit_enum = CUlimit_enum(4); +} +impl CUlimit_enum { + pub const CU_LIMIT_MAX_L2_FETCH_GRANULARITY: CUlimit_enum = CUlimit_enum(5); +} +impl CUlimit_enum { + pub const CU_LIMIT_PERSISTING_L2_CACHE_SIZE: CUlimit_enum = CUlimit_enum(6); +} +impl CUlimit_enum { + pub const CU_LIMIT_MAX: CUlimit_enum = CUlimit_enum(7); +} +#[repr(transparent)] +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +pub struct CUlimit_enum(pub ::std::os::raw::c_uint); +pub use self::CUlimit_enum as CUlimit; +impl CUresourcetype_enum { + pub const CU_RESOURCE_TYPE_ARRAY: CUresourcetype_enum = CUresourcetype_enum(0); +} +impl CUresourcetype_enum { + pub const CU_RESOURCE_TYPE_MIPMAPPED_ARRAY: CUresourcetype_enum = CUresourcetype_enum(1); +} +impl CUresourcetype_enum { + pub const CU_RESOURCE_TYPE_LINEAR: CUresourcetype_enum = CUresourcetype_enum(2); +} +impl CUresourcetype_enum { + pub const CU_RESOURCE_TYPE_PITCH2D: CUresourcetype_enum = CUresourcetype_enum(3); +} +#[repr(transparent)] +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +pub struct CUresourcetype_enum(pub ::std::os::raw::c_uint); +pub use self::CUresourcetype_enum as CUresourcetype; +pub type CUhostFn = + ::std::option::Option<unsafe extern "C" fn(userData: *mut ::std::os::raw::c_void)>; +impl CUaccessProperty_enum { + pub const CU_ACCESS_PROPERTY_NORMAL: CUaccessProperty_enum = CUaccessProperty_enum(0); +} +impl CUaccessProperty_enum { + pub const CU_ACCESS_PROPERTY_STREAMING: CUaccessProperty_enum = CUaccessProperty_enum(1); +} +impl CUaccessProperty_enum { + pub const CU_ACCESS_PROPERTY_PERSISTING: CUaccessProperty_enum = CUaccessProperty_enum(2); +} +#[repr(transparent)] +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +pub struct CUaccessProperty_enum(pub ::std::os::raw::c_uint); +pub use self::CUaccessProperty_enum as CUaccessProperty; +#[repr(C)] +#[derive(Copy, Clone)] +pub struct CUaccessPolicyWindow_st { + pub base_ptr: *mut ::std::os::raw::c_void, + pub num_bytes: usize, + pub hitRatio: f32, + pub hitProp: CUaccessProperty, + pub missProp: CUaccessProperty, +} +pub type CUaccessPolicyWindow = CUaccessPolicyWindow_st; +#[repr(C)] +#[derive(Copy, Clone)] +pub struct CUDA_KERNEL_NODE_PARAMS_st { + pub func: CUfunction, + pub gridDimX: ::std::os::raw::c_uint, + pub gridDimY: ::std::os::raw::c_uint, + pub gridDimZ: ::std::os::raw::c_uint, + pub blockDimX: ::std::os::raw::c_uint, + pub blockDimY: ::std::os::raw::c_uint, + pub blockDimZ: ::std::os::raw::c_uint, + pub sharedMemBytes: ::std::os::raw::c_uint, + pub kernelParams: *mut *mut ::std::os::raw::c_void, + pub extra: *mut *mut ::std::os::raw::c_void, +} +pub type CUDA_KERNEL_NODE_PARAMS = CUDA_KERNEL_NODE_PARAMS_st; +#[repr(C)] +#[derive(Copy, Clone)] +pub struct CUDA_MEMSET_NODE_PARAMS_st { + pub dst: CUdeviceptr, + pub pitch: usize, + pub value: ::std::os::raw::c_uint, + pub elementSize: ::std::os::raw::c_uint, + pub width: usize, + pub height: usize, +} +pub type CUDA_MEMSET_NODE_PARAMS = CUDA_MEMSET_NODE_PARAMS_st; +#[repr(C)] +#[derive(Copy, Clone)] +pub struct CUDA_HOST_NODE_PARAMS_st { + pub fn_: CUhostFn, + pub userData: *mut ::std::os::raw::c_void, +} +pub type CUDA_HOST_NODE_PARAMS = CUDA_HOST_NODE_PARAMS_st; +impl CUgraphNodeType_enum { + pub const CU_GRAPH_NODE_TYPE_KERNEL: CUgraphNodeType_enum = CUgraphNodeType_enum(0); +} +impl CUgraphNodeType_enum { + pub const CU_GRAPH_NODE_TYPE_MEMCPY: CUgraphNodeType_enum = CUgraphNodeType_enum(1); +} +impl CUgraphNodeType_enum { + pub const CU_GRAPH_NODE_TYPE_MEMSET: CUgraphNodeType_enum = CUgraphNodeType_enum(2); +} +impl CUgraphNodeType_enum { + pub const CU_GRAPH_NODE_TYPE_HOST: CUgraphNodeType_enum = CUgraphNodeType_enum(3); +} +impl CUgraphNodeType_enum { + pub const CU_GRAPH_NODE_TYPE_GRAPH: CUgraphNodeType_enum = CUgraphNodeType_enum(4); +} +impl CUgraphNodeType_enum { + pub const CU_GRAPH_NODE_TYPE_EMPTY: CUgraphNodeType_enum = CUgraphNodeType_enum(5); +} +#[repr(transparent)] +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +pub struct CUgraphNodeType_enum(pub ::std::os::raw::c_uint); +pub use self::CUgraphNodeType_enum as CUgraphNodeType; +impl CUsynchronizationPolicy_enum { + pub const CU_SYNC_POLICY_AUTO: CUsynchronizationPolicy_enum = CUsynchronizationPolicy_enum(1); +} +impl CUsynchronizationPolicy_enum { + pub const CU_SYNC_POLICY_SPIN: CUsynchronizationPolicy_enum = CUsynchronizationPolicy_enum(2); +} +impl CUsynchronizationPolicy_enum { + pub const CU_SYNC_POLICY_YIELD: CUsynchronizationPolicy_enum = CUsynchronizationPolicy_enum(3); +} +impl CUsynchronizationPolicy_enum { + pub const CU_SYNC_POLICY_BLOCKING_SYNC: CUsynchronizationPolicy_enum = + CUsynchronizationPolicy_enum(4); +} +#[repr(transparent)] +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +pub struct CUsynchronizationPolicy_enum(pub ::std::os::raw::c_uint); +pub use self::CUsynchronizationPolicy_enum as CUsynchronizationPolicy; +impl CUkernelNodeAttrID_enum { + pub const CU_KERNEL_NODE_ATTRIBUTE_ACCESS_POLICY_WINDOW: CUkernelNodeAttrID_enum = + CUkernelNodeAttrID_enum(1); +} +impl CUkernelNodeAttrID_enum { + pub const CU_KERNEL_NODE_ATTRIBUTE_COOPERATIVE: CUkernelNodeAttrID_enum = + CUkernelNodeAttrID_enum(2); +} +#[repr(transparent)] +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +pub struct CUkernelNodeAttrID_enum(pub ::std::os::raw::c_uint); +pub use self::CUkernelNodeAttrID_enum as CUkernelNodeAttrID; +#[repr(C)] +#[derive(Copy, Clone)] +pub union CUkernelNodeAttrValue_union { + pub accessPolicyWindow: CUaccessPolicyWindow, + pub cooperative: ::std::os::raw::c_int, + _bindgen_union_align: [u64; 4usize], +} +pub type CUkernelNodeAttrValue = CUkernelNodeAttrValue_union; +impl CUstreamCaptureStatus_enum { + pub const CU_STREAM_CAPTURE_STATUS_NONE: CUstreamCaptureStatus_enum = + CUstreamCaptureStatus_enum(0); +} +impl CUstreamCaptureStatus_enum { + pub const CU_STREAM_CAPTURE_STATUS_ACTIVE: CUstreamCaptureStatus_enum = + CUstreamCaptureStatus_enum(1); +} +impl CUstreamCaptureStatus_enum { + pub const CU_STREAM_CAPTURE_STATUS_INVALIDATED: CUstreamCaptureStatus_enum = + CUstreamCaptureStatus_enum(2); +} +#[repr(transparent)] +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +pub struct CUstreamCaptureStatus_enum(pub ::std::os::raw::c_uint); +pub use self::CUstreamCaptureStatus_enum as CUstreamCaptureStatus; +impl CUstreamCaptureMode_enum { + pub const CU_STREAM_CAPTURE_MODE_GLOBAL: CUstreamCaptureMode_enum = CUstreamCaptureMode_enum(0); +} +impl CUstreamCaptureMode_enum { + pub const CU_STREAM_CAPTURE_MODE_THREAD_LOCAL: CUstreamCaptureMode_enum = + CUstreamCaptureMode_enum(1); +} +impl CUstreamCaptureMode_enum { + pub const CU_STREAM_CAPTURE_MODE_RELAXED: CUstreamCaptureMode_enum = + CUstreamCaptureMode_enum(2); +} +#[repr(transparent)] +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +pub struct CUstreamCaptureMode_enum(pub ::std::os::raw::c_uint); +pub use self::CUstreamCaptureMode_enum as CUstreamCaptureMode; +impl CUstreamAttrID_enum { + pub const CU_STREAM_ATTRIBUTE_ACCESS_POLICY_WINDOW: CUstreamAttrID_enum = + CUstreamAttrID_enum(1); +} +impl CUstreamAttrID_enum { + pub const CU_STREAM_ATTRIBUTE_SYNCHRONIZATION_POLICY: CUstreamAttrID_enum = + CUstreamAttrID_enum(3); +} +#[repr(transparent)] +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +pub struct CUstreamAttrID_enum(pub ::std::os::raw::c_uint); +pub use self::CUstreamAttrID_enum as CUstreamAttrID; +#[repr(C)] +#[derive(Copy, Clone)] +pub union CUstreamAttrValue_union { + pub accessPolicyWindow: CUaccessPolicyWindow, + pub syncPolicy: CUsynchronizationPolicy, + _bindgen_union_align: [u64; 4usize], +} +pub type CUstreamAttrValue = CUstreamAttrValue_union; +impl cudaError_enum { + pub const CUDA_SUCCESS: cudaError_enum = cudaError_enum(0); +} +impl cudaError_enum { + pub const CUDA_ERROR_INVALID_VALUE: cudaError_enum = cudaError_enum(1); +} +impl cudaError_enum { + pub const CUDA_ERROR_OUT_OF_MEMORY: cudaError_enum = cudaError_enum(2); +} +impl cudaError_enum { + pub const CUDA_ERROR_NOT_INITIALIZED: cudaError_enum = cudaError_enum(3); +} +impl cudaError_enum { + pub const CUDA_ERROR_DEINITIALIZED: cudaError_enum = cudaError_enum(4); +} +impl cudaError_enum { + pub const CUDA_ERROR_PROFILER_DISABLED: cudaError_enum = cudaError_enum(5); +} +impl cudaError_enum { + pub const CUDA_ERROR_PROFILER_NOT_INITIALIZED: cudaError_enum = cudaError_enum(6); +} +impl cudaError_enum { + pub const CUDA_ERROR_PROFILER_ALREADY_STARTED: cudaError_enum = cudaError_enum(7); +} +impl cudaError_enum { + pub const CUDA_ERROR_PROFILER_ALREADY_STOPPED: cudaError_enum = cudaError_enum(8); +} +impl cudaError_enum { + pub const CUDA_ERROR_NO_DEVICE: cudaError_enum = cudaError_enum(100); +} +impl cudaError_enum { + pub const CUDA_ERROR_INVALID_DEVICE: cudaError_enum = cudaError_enum(101); +} +impl cudaError_enum { + pub const CUDA_ERROR_INVALID_IMAGE: cudaError_enum = cudaError_enum(200); +} +impl cudaError_enum { + pub const CUDA_ERROR_INVALID_CONTEXT: cudaError_enum = cudaError_enum(201); +} +impl cudaError_enum { + pub const CUDA_ERROR_CONTEXT_ALREADY_CURRENT: cudaError_enum = cudaError_enum(202); +} +impl cudaError_enum { + pub const CUDA_ERROR_MAP_FAILED: cudaError_enum = cudaError_enum(205); +} +impl cudaError_enum { + pub const CUDA_ERROR_UNMAP_FAILED: cudaError_enum = cudaError_enum(206); +} +impl cudaError_enum { + pub const CUDA_ERROR_ARRAY_IS_MAPPED: cudaError_enum = cudaError_enum(207); +} +impl cudaError_enum { + pub const CUDA_ERROR_ALREADY_MAPPED: cudaError_enum = cudaError_enum(208); +} +impl cudaError_enum { + pub const CUDA_ERROR_NO_BINARY_FOR_GPU: cudaError_enum = cudaError_enum(209); +} +impl cudaError_enum { + pub const CUDA_ERROR_ALREADY_ACQUIRED: cudaError_enum = cudaError_enum(210); +} +impl cudaError_enum { + pub const CUDA_ERROR_NOT_MAPPED: cudaError_enum = cudaError_enum(211); +} +impl cudaError_enum { + pub const CUDA_ERROR_NOT_MAPPED_AS_ARRAY: cudaError_enum = cudaError_enum(212); +} +impl cudaError_enum { + pub const CUDA_ERROR_NOT_MAPPED_AS_POINTER: cudaError_enum = cudaError_enum(213); +} +impl cudaError_enum { + pub const CUDA_ERROR_ECC_UNCORRECTABLE: cudaError_enum = cudaError_enum(214); +} +impl cudaError_enum { + pub const CUDA_ERROR_UNSUPPORTED_LIMIT: cudaError_enum = cudaError_enum(215); +} +impl cudaError_enum { + pub const CUDA_ERROR_CONTEXT_ALREADY_IN_USE: cudaError_enum = cudaError_enum(216); +} +impl cudaError_enum { + pub const CUDA_ERROR_PEER_ACCESS_UNSUPPORTED: cudaError_enum = cudaError_enum(217); +} +impl cudaError_enum { + pub const CUDA_ERROR_INVALID_PTX: cudaError_enum = cudaError_enum(218); +} +impl cudaError_enum { + pub const CUDA_ERROR_INVALID_GRAPHICS_CONTEXT: cudaError_enum = cudaError_enum(219); +} +impl cudaError_enum { + pub const CUDA_ERROR_NVLINK_UNCORRECTABLE: cudaError_enum = cudaError_enum(220); +} +impl cudaError_enum { + pub const CUDA_ERROR_JIT_COMPILER_NOT_FOUND: cudaError_enum = cudaError_enum(221); +} +impl cudaError_enum { + pub const CUDA_ERROR_INVALID_SOURCE: cudaError_enum = cudaError_enum(300); +} +impl cudaError_enum { + pub const CUDA_ERROR_FILE_NOT_FOUND: cudaError_enum = cudaError_enum(301); +} +impl cudaError_enum { + pub const CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND: cudaError_enum = cudaError_enum(302); +} +impl cudaError_enum { + pub const CUDA_ERROR_SHARED_OBJECT_INIT_FAILED: cudaError_enum = cudaError_enum(303); +} +impl cudaError_enum { + pub const CUDA_ERROR_OPERATING_SYSTEM: cudaError_enum = cudaError_enum(304); +} +impl cudaError_enum { + pub const CUDA_ERROR_INVALID_HANDLE: cudaError_enum = cudaError_enum(400); +} +impl cudaError_enum { + pub const CUDA_ERROR_ILLEGAL_STATE: cudaError_enum = cudaError_enum(401); +} +impl cudaError_enum { + pub const CUDA_ERROR_NOT_FOUND: cudaError_enum = cudaError_enum(500); +} +impl cudaError_enum { + pub const CUDA_ERROR_NOT_READY: cudaError_enum = cudaError_enum(600); +} +impl cudaError_enum { + pub const CUDA_ERROR_ILLEGAL_ADDRESS: cudaError_enum = cudaError_enum(700); +} +impl cudaError_enum { + pub const CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES: cudaError_enum = cudaError_enum(701); +} +impl cudaError_enum { + pub const CUDA_ERROR_LAUNCH_TIMEOUT: cudaError_enum = cudaError_enum(702); +} +impl cudaError_enum { + pub const CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING: cudaError_enum = cudaError_enum(703); +} +impl cudaError_enum { + pub const CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED: cudaError_enum = cudaError_enum(704); +} +impl cudaError_enum { + pub const CUDA_ERROR_PEER_ACCESS_NOT_ENABLED: cudaError_enum = cudaError_enum(705); +} +impl cudaError_enum { + pub const CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE: cudaError_enum = cudaError_enum(708); +} +impl cudaError_enum { + pub const CUDA_ERROR_CONTEXT_IS_DESTROYED: cudaError_enum = cudaError_enum(709); +} +impl cudaError_enum { + pub const CUDA_ERROR_ASSERT: cudaError_enum = cudaError_enum(710); +} +impl cudaError_enum { + pub const CUDA_ERROR_TOO_MANY_PEERS: cudaError_enum = cudaError_enum(711); +} +impl cudaError_enum { + pub const CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED: cudaError_enum = cudaError_enum(712); +} +impl cudaError_enum { + pub const CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED: cudaError_enum = cudaError_enum(713); +} +impl cudaError_enum { + pub const CUDA_ERROR_HARDWARE_STACK_ERROR: cudaError_enum = cudaError_enum(714); +} +impl cudaError_enum { + pub const CUDA_ERROR_ILLEGAL_INSTRUCTION: cudaError_enum = cudaError_enum(715); +} +impl cudaError_enum { + pub const CUDA_ERROR_MISALIGNED_ADDRESS: cudaError_enum = cudaError_enum(716); +} +impl cudaError_enum { + pub const CUDA_ERROR_INVALID_ADDRESS_SPACE: cudaError_enum = cudaError_enum(717); +} +impl cudaError_enum { + pub const CUDA_ERROR_INVALID_PC: cudaError_enum = cudaError_enum(718); +} +impl cudaError_enum { + pub const CUDA_ERROR_LAUNCH_FAILED: cudaError_enum = cudaError_enum(719); +} +impl cudaError_enum { + pub const CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE: cudaError_enum = cudaError_enum(720); +} +impl cudaError_enum { + pub const CUDA_ERROR_NOT_PERMITTED: cudaError_enum = cudaError_enum(800); +} +impl cudaError_enum { + pub const CUDA_ERROR_NOT_SUPPORTED: cudaError_enum = cudaError_enum(801); +} +impl cudaError_enum { + pub const CUDA_ERROR_SYSTEM_NOT_READY: cudaError_enum = cudaError_enum(802); +} +impl cudaError_enum { + pub const CUDA_ERROR_SYSTEM_DRIVER_MISMATCH: cudaError_enum = cudaError_enum(803); +} +impl cudaError_enum { + pub const CUDA_ERROR_COMPAT_NOT_SUPPORTED_ON_DEVICE: cudaError_enum = cudaError_enum(804); +} +impl cudaError_enum { + pub const CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED: cudaError_enum = cudaError_enum(900); +} +impl cudaError_enum { + pub const CUDA_ERROR_STREAM_CAPTURE_INVALIDATED: cudaError_enum = cudaError_enum(901); +} +impl cudaError_enum { + pub const CUDA_ERROR_STREAM_CAPTURE_MERGE: cudaError_enum = cudaError_enum(902); +} +impl cudaError_enum { + pub const CUDA_ERROR_STREAM_CAPTURE_UNMATCHED: cudaError_enum = cudaError_enum(903); +} +impl cudaError_enum { + pub const CUDA_ERROR_STREAM_CAPTURE_UNJOINED: cudaError_enum = cudaError_enum(904); +} +impl cudaError_enum { + pub const CUDA_ERROR_STREAM_CAPTURE_ISOLATION: cudaError_enum = cudaError_enum(905); +} +impl cudaError_enum { + pub const CUDA_ERROR_STREAM_CAPTURE_IMPLICIT: cudaError_enum = cudaError_enum(906); +} +impl cudaError_enum { + pub const CUDA_ERROR_CAPTURED_EVENT: cudaError_enum = cudaError_enum(907); +} +impl cudaError_enum { + pub const CUDA_ERROR_STREAM_CAPTURE_WRONG_THREAD: cudaError_enum = cudaError_enum(908); +} +impl cudaError_enum { + pub const CUDA_ERROR_TIMEOUT: cudaError_enum = cudaError_enum(909); +} +impl cudaError_enum { + pub const CUDA_ERROR_GRAPH_EXEC_UPDATE_FAILURE: cudaError_enum = cudaError_enum(910); +} +impl cudaError_enum { + pub const CUDA_ERROR_UNKNOWN: cudaError_enum = cudaError_enum(999); +} +#[repr(transparent)] +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +pub struct cudaError_enum(pub ::std::os::raw::c_uint); +pub use self::cudaError_enum as CUresult; +impl CUdevice_P2PAttribute_enum { + pub const CU_DEVICE_P2P_ATTRIBUTE_PERFORMANCE_RANK: CUdevice_P2PAttribute_enum = + CUdevice_P2PAttribute_enum(1); +} +impl CUdevice_P2PAttribute_enum { + pub const CU_DEVICE_P2P_ATTRIBUTE_ACCESS_SUPPORTED: CUdevice_P2PAttribute_enum = + CUdevice_P2PAttribute_enum(2); +} +impl CUdevice_P2PAttribute_enum { + pub const CU_DEVICE_P2P_ATTRIBUTE_NATIVE_ATOMIC_SUPPORTED: CUdevice_P2PAttribute_enum = + CUdevice_P2PAttribute_enum(3); +} +impl CUdevice_P2PAttribute_enum { + pub const CU_DEVICE_P2P_ATTRIBUTE_ACCESS_ACCESS_SUPPORTED: CUdevice_P2PAttribute_enum = + CUdevice_P2PAttribute_enum(4); +} +impl CUdevice_P2PAttribute_enum { + pub const CU_DEVICE_P2P_ATTRIBUTE_CUDA_ARRAY_ACCESS_SUPPORTED: CUdevice_P2PAttribute_enum = + CUdevice_P2PAttribute_enum(4); +} +#[repr(transparent)] +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +pub struct CUdevice_P2PAttribute_enum(pub ::std::os::raw::c_uint); +pub use self::CUdevice_P2PAttribute_enum as CUdevice_P2PAttribute; +pub type CUstreamCallback = ::std::option::Option< + unsafe extern "C" fn( + hStream: CUstream, + status: CUresult, + userData: *mut ::std::os::raw::c_void, + ), +>; +pub type CUoccupancyB2DSize = + ::std::option::Option<unsafe extern "C" fn(blockSize: ::std::os::raw::c_int) -> usize>; +#[repr(C)] +#[derive(Copy, Clone)] +pub struct CUDA_MEMCPY2D_st { + pub srcXInBytes: usize, + pub srcY: usize, + pub srcMemoryType: CUmemorytype, + pub srcHost: *const ::std::os::raw::c_void, + pub srcDevice: CUdeviceptr, + pub srcArray: CUarray, + pub srcPitch: usize, + pub dstXInBytes: usize, + pub dstY: usize, + pub dstMemoryType: CUmemorytype, + pub dstHost: *mut ::std::os::raw::c_void, + pub dstDevice: CUdeviceptr, + pub dstArray: CUarray, + pub dstPitch: usize, + pub WidthInBytes: usize, + pub Height: usize, +} +pub type CUDA_MEMCPY2D = CUDA_MEMCPY2D_st; +#[repr(C)] +#[derive(Copy, Clone)] +pub struct CUDA_MEMCPY3D_st { + pub srcXInBytes: usize, + pub srcY: usize, + pub srcZ: usize, + pub srcLOD: usize, + pub srcMemoryType: CUmemorytype, + pub srcHost: *const ::std::os::raw::c_void, + pub srcDevice: CUdeviceptr, + pub srcArray: CUarray, + pub reserved0: *mut ::std::os::raw::c_void, + pub srcPitch: usize, + pub srcHeight: usize, + pub dstXInBytes: usize, + pub dstY: usize, + pub dstZ: usize, + pub dstLOD: usize, + pub dstMemoryType: CUmemorytype, + pub dstHost: *mut ::std::os::raw::c_void, + pub dstDevice: CUdeviceptr, + pub dstArray: CUarray, + pub reserved1: *mut ::std::os::raw::c_void, + pub dstPitch: usize, + pub dstHeight: usize, + pub WidthInBytes: usize, + pub Height: usize, + pub Depth: usize, +} +pub type CUDA_MEMCPY3D = CUDA_MEMCPY3D_st; +#[repr(C)] +#[derive(Copy, Clone)] +pub struct CUDA_MEMCPY3D_PEER_st { + pub srcXInBytes: usize, + pub srcY: usize, + pub srcZ: usize, + pub srcLOD: usize, + pub srcMemoryType: CUmemorytype, + pub srcHost: *const ::std::os::raw::c_void, + pub srcDevice: CUdeviceptr, + pub srcArray: CUarray, + pub srcContext: CUcontext, + pub srcPitch: usize, + pub srcHeight: usize, + pub dstXInBytes: usize, + pub dstY: usize, + pub dstZ: usize, + pub dstLOD: usize, + pub dstMemoryType: CUmemorytype, + pub dstHost: *mut ::std::os::raw::c_void, + pub dstDevice: CUdeviceptr, + pub dstArray: CUarray, + pub dstContext: CUcontext, + pub dstPitch: usize, + pub dstHeight: usize, + pub WidthInBytes: usize, + pub Height: usize, + pub Depth: usize, +} +pub type CUDA_MEMCPY3D_PEER = CUDA_MEMCPY3D_PEER_st; +#[repr(C)] +#[derive(Copy, Clone)] +pub struct CUDA_ARRAY_DESCRIPTOR_st { + pub Width: usize, + pub Height: usize, + pub Format: CUarray_format, + pub NumChannels: ::std::os::raw::c_uint, +} +pub type CUDA_ARRAY_DESCRIPTOR = CUDA_ARRAY_DESCRIPTOR_st; +#[repr(C)] +#[derive(Copy, Clone)] +pub struct CUDA_ARRAY3D_DESCRIPTOR_st { + pub Width: usize, + pub Height: usize, + pub Depth: usize, + pub Format: CUarray_format, + pub NumChannels: ::std::os::raw::c_uint, + pub Flags: ::std::os::raw::c_uint, +} +pub type CUDA_ARRAY3D_DESCRIPTOR = CUDA_ARRAY3D_DESCRIPTOR_st; +#[repr(C)] +#[derive(Copy, Clone)] +pub struct CUDA_RESOURCE_DESC_st { + pub resType: CUresourcetype, + pub res: CUDA_RESOURCE_DESC_st__bindgen_ty_1, + pub flags: ::std::os::raw::c_uint, +} +#[repr(C)] +#[derive(Copy, Clone)] +pub union CUDA_RESOURCE_DESC_st__bindgen_ty_1 { + pub array: CUDA_RESOURCE_DESC_st__bindgen_ty_1__bindgen_ty_1, + pub mipmap: CUDA_RESOURCE_DESC_st__bindgen_ty_1__bindgen_ty_2, + pub linear: CUDA_RESOURCE_DESC_st__bindgen_ty_1__bindgen_ty_3, + pub pitch2D: CUDA_RESOURCE_DESC_st__bindgen_ty_1__bindgen_ty_4, + pub reserved: CUDA_RESOURCE_DESC_st__bindgen_ty_1__bindgen_ty_5, + _bindgen_union_align: [u64; 16usize], +} +#[repr(C)] +#[derive(Copy, Clone)] +pub struct CUDA_RESOURCE_DESC_st__bindgen_ty_1__bindgen_ty_1 { + pub hArray: CUarray, +} +#[repr(C)] +#[derive(Copy, Clone)] +pub struct CUDA_RESOURCE_DESC_st__bindgen_ty_1__bindgen_ty_2 { + pub hMipmappedArray: CUmipmappedArray, +} +#[repr(C)] +#[derive(Copy, Clone)] +pub struct CUDA_RESOURCE_DESC_st__bindgen_ty_1__bindgen_ty_3 { + pub devPtr: CUdeviceptr, + pub format: CUarray_format, + pub numChannels: ::std::os::raw::c_uint, + pub sizeInBytes: usize, +} +#[repr(C)] +#[derive(Copy, Clone)] +pub struct CUDA_RESOURCE_DESC_st__bindgen_ty_1__bindgen_ty_4 { + pub devPtr: CUdeviceptr, + pub format: CUarray_format, + pub numChannels: ::std::os::raw::c_uint, + pub width: usize, + pub height: usize, + pub pitchInBytes: usize, +} +#[repr(C)] +#[derive(Copy, Clone)] +pub struct CUDA_RESOURCE_DESC_st__bindgen_ty_1__bindgen_ty_5 { + pub reserved: [::std::os::raw::c_int; 32usize], +} +pub type CUDA_RESOURCE_DESC = CUDA_RESOURCE_DESC_st; +#[repr(C)] +#[derive(Copy, Clone)] +pub struct CUDA_TEXTURE_DESC_st { + pub addressMode: [CUaddress_mode; 3usize], + pub filterMode: CUfilter_mode, + pub flags: ::std::os::raw::c_uint, + pub maxAnisotropy: ::std::os::raw::c_uint, + pub mipmapFilterMode: CUfilter_mode, + pub mipmapLevelBias: f32, + pub minMipmapLevelClamp: f32, + pub maxMipmapLevelClamp: f32, + pub borderColor: [f32; 4usize], + pub reserved: [::std::os::raw::c_int; 12usize], +} +pub type CUDA_TEXTURE_DESC = CUDA_TEXTURE_DESC_st; +impl CUresourceViewFormat_enum { + pub const CU_RES_VIEW_FORMAT_NONE: CUresourceViewFormat_enum = CUresourceViewFormat_enum(0); +} +impl CUresourceViewFormat_enum { + pub const CU_RES_VIEW_FORMAT_UINT_1X8: CUresourceViewFormat_enum = CUresourceViewFormat_enum(1); +} +impl CUresourceViewFormat_enum { + pub const CU_RES_VIEW_FORMAT_UINT_2X8: CUresourceViewFormat_enum = CUresourceViewFormat_enum(2); +} +impl CUresourceViewFormat_enum { + pub const CU_RES_VIEW_FORMAT_UINT_4X8: CUresourceViewFormat_enum = CUresourceViewFormat_enum(3); +} +impl CUresourceViewFormat_enum { + pub const CU_RES_VIEW_FORMAT_SINT_1X8: CUresourceViewFormat_enum = CUresourceViewFormat_enum(4); +} +impl CUresourceViewFormat_enum { + pub const CU_RES_VIEW_FORMAT_SINT_2X8: CUresourceViewFormat_enum = CUresourceViewFormat_enum(5); +} +impl CUresourceViewFormat_enum { + pub const CU_RES_VIEW_FORMAT_SINT_4X8: CUresourceViewFormat_enum = CUresourceViewFormat_enum(6); +} +impl CUresourceViewFormat_enum { + pub const CU_RES_VIEW_FORMAT_UINT_1X16: CUresourceViewFormat_enum = + CUresourceViewFormat_enum(7); +} +impl CUresourceViewFormat_enum { + pub const CU_RES_VIEW_FORMAT_UINT_2X16: CUresourceViewFormat_enum = + CUresourceViewFormat_enum(8); +} +impl CUresourceViewFormat_enum { + pub const CU_RES_VIEW_FORMAT_UINT_4X16: CUresourceViewFormat_enum = + CUresourceViewFormat_enum(9); +} +impl CUresourceViewFormat_enum { + pub const CU_RES_VIEW_FORMAT_SINT_1X16: CUresourceViewFormat_enum = + CUresourceViewFormat_enum(10); +} +impl CUresourceViewFormat_enum { + pub const CU_RES_VIEW_FORMAT_SINT_2X16: CUresourceViewFormat_enum = + CUresourceViewFormat_enum(11); +} +impl CUresourceViewFormat_enum { + pub const CU_RES_VIEW_FORMAT_SINT_4X16: CUresourceViewFormat_enum = + CUresourceViewFormat_enum(12); +} +impl CUresourceViewFormat_enum { + pub const CU_RES_VIEW_FORMAT_UINT_1X32: CUresourceViewFormat_enum = + CUresourceViewFormat_enum(13); +} +impl CUresourceViewFormat_enum { + pub const CU_RES_VIEW_FORMAT_UINT_2X32: CUresourceViewFormat_enum = + CUresourceViewFormat_enum(14); +} +impl CUresourceViewFormat_enum { + pub const CU_RES_VIEW_FORMAT_UINT_4X32: CUresourceViewFormat_enum = + CUresourceViewFormat_enum(15); +} +impl CUresourceViewFormat_enum { + pub const CU_RES_VIEW_FORMAT_SINT_1X32: CUresourceViewFormat_enum = + CUresourceViewFormat_enum(16); +} +impl CUresourceViewFormat_enum { + pub const CU_RES_VIEW_FORMAT_SINT_2X32: CUresourceViewFormat_enum = + CUresourceViewFormat_enum(17); +} +impl CUresourceViewFormat_enum { + pub const CU_RES_VIEW_FORMAT_SINT_4X32: CUresourceViewFormat_enum = + CUresourceViewFormat_enum(18); +} +impl CUresourceViewFormat_enum { + pub const CU_RES_VIEW_FORMAT_FLOAT_1X16: CUresourceViewFormat_enum = + CUresourceViewFormat_enum(19); +} +impl CUresourceViewFormat_enum { + pub const CU_RES_VIEW_FORMAT_FLOAT_2X16: CUresourceViewFormat_enum = + CUresourceViewFormat_enum(20); +} +impl CUresourceViewFormat_enum { + pub const CU_RES_VIEW_FORMAT_FLOAT_4X16: CUresourceViewFormat_enum = + CUresourceViewFormat_enum(21); +} +impl CUresourceViewFormat_enum { + pub const CU_RES_VIEW_FORMAT_FLOAT_1X32: CUresourceViewFormat_enum = + CUresourceViewFormat_enum(22); +} +impl CUresourceViewFormat_enum { + pub const CU_RES_VIEW_FORMAT_FLOAT_2X32: CUresourceViewFormat_enum = + CUresourceViewFormat_enum(23); +} +impl CUresourceViewFormat_enum { + pub const CU_RES_VIEW_FORMAT_FLOAT_4X32: CUresourceViewFormat_enum = + CUresourceViewFormat_enum(24); +} +impl CUresourceViewFormat_enum { + pub const CU_RES_VIEW_FORMAT_UNSIGNED_BC1: CUresourceViewFormat_enum = + CUresourceViewFormat_enum(25); +} +impl CUresourceViewFormat_enum { + pub const CU_RES_VIEW_FORMAT_UNSIGNED_BC2: CUresourceViewFormat_enum = + CUresourceViewFormat_enum(26); +} +impl CUresourceViewFormat_enum { + pub const CU_RES_VIEW_FORMAT_UNSIGNED_BC3: CUresourceViewFormat_enum = + CUresourceViewFormat_enum(27); +} +impl CUresourceViewFormat_enum { + pub const CU_RES_VIEW_FORMAT_UNSIGNED_BC4: CUresourceViewFormat_enum = + CUresourceViewFormat_enum(28); +} +impl CUresourceViewFormat_enum { + pub const CU_RES_VIEW_FORMAT_SIGNED_BC4: CUresourceViewFormat_enum = + CUresourceViewFormat_enum(29); +} +impl CUresourceViewFormat_enum { + pub const CU_RES_VIEW_FORMAT_UNSIGNED_BC5: CUresourceViewFormat_enum = + CUresourceViewFormat_enum(30); +} +impl CUresourceViewFormat_enum { + pub const CU_RES_VIEW_FORMAT_SIGNED_BC5: CUresourceViewFormat_enum = + CUresourceViewFormat_enum(31); +} +impl CUresourceViewFormat_enum { + pub const CU_RES_VIEW_FORMAT_UNSIGNED_BC6H: CUresourceViewFormat_enum = + CUresourceViewFormat_enum(32); +} +impl CUresourceViewFormat_enum { + pub const CU_RES_VIEW_FORMAT_SIGNED_BC6H: CUresourceViewFormat_enum = + CUresourceViewFormat_enum(33); +} +impl CUresourceViewFormat_enum { + pub const CU_RES_VIEW_FORMAT_UNSIGNED_BC7: CUresourceViewFormat_enum = + CUresourceViewFormat_enum(34); +} +#[repr(transparent)] +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +pub struct CUresourceViewFormat_enum(pub ::std::os::raw::c_uint); +pub use self::CUresourceViewFormat_enum as CUresourceViewFormat; +#[repr(C)] +#[derive(Copy, Clone)] +pub struct CUDA_RESOURCE_VIEW_DESC_st { + pub format: CUresourceViewFormat, + pub width: usize, + pub height: usize, + pub depth: usize, + pub firstMipmapLevel: ::std::os::raw::c_uint, + pub lastMipmapLevel: ::std::os::raw::c_uint, + pub firstLayer: ::std::os::raw::c_uint, + pub lastLayer: ::std::os::raw::c_uint, + pub reserved: [::std::os::raw::c_uint; 16usize], +} +pub type CUDA_RESOURCE_VIEW_DESC = CUDA_RESOURCE_VIEW_DESC_st; +#[repr(C)] +#[derive(Copy, Clone)] +pub struct CUDA_LAUNCH_PARAMS_st { + pub function: CUfunction, + pub gridDimX: ::std::os::raw::c_uint, + pub gridDimY: ::std::os::raw::c_uint, + pub gridDimZ: ::std::os::raw::c_uint, + pub blockDimX: ::std::os::raw::c_uint, + pub blockDimY: ::std::os::raw::c_uint, + pub blockDimZ: ::std::os::raw::c_uint, + pub sharedMemBytes: ::std::os::raw::c_uint, + pub hStream: CUstream, + pub kernelParams: *mut *mut ::std::os::raw::c_void, +} +pub type CUDA_LAUNCH_PARAMS = CUDA_LAUNCH_PARAMS_st; +impl CUexternalMemoryHandleType_enum { + pub const CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD: CUexternalMemoryHandleType_enum = + CUexternalMemoryHandleType_enum(1); +} +impl CUexternalMemoryHandleType_enum { + pub const CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32: CUexternalMemoryHandleType_enum = + CUexternalMemoryHandleType_enum(2); +} +impl CUexternalMemoryHandleType_enum { + pub const CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT: CUexternalMemoryHandleType_enum = + CUexternalMemoryHandleType_enum(3); +} +impl CUexternalMemoryHandleType_enum { + pub const CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_HEAP: CUexternalMemoryHandleType_enum = + CUexternalMemoryHandleType_enum(4); +} +impl CUexternalMemoryHandleType_enum { + pub const CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_RESOURCE: CUexternalMemoryHandleType_enum = + CUexternalMemoryHandleType_enum(5); +} +impl CUexternalMemoryHandleType_enum { + pub const CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE: CUexternalMemoryHandleType_enum = + CUexternalMemoryHandleType_enum(6); +} +impl CUexternalMemoryHandleType_enum { + pub const CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE_KMT: CUexternalMemoryHandleType_enum = + CUexternalMemoryHandleType_enum(7); +} +impl CUexternalMemoryHandleType_enum { + pub const CU_EXTERNAL_MEMORY_HANDLE_TYPE_NVSCIBUF: CUexternalMemoryHandleType_enum = + CUexternalMemoryHandleType_enum(8); +} +#[repr(transparent)] +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +pub struct CUexternalMemoryHandleType_enum(pub ::std::os::raw::c_uint); +pub use self::CUexternalMemoryHandleType_enum as CUexternalMemoryHandleType; +#[repr(C)] +#[derive(Copy, Clone)] +pub struct CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st { + pub type_: CUexternalMemoryHandleType, + pub handle: CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st__bindgen_ty_1, + pub size: ::std::os::raw::c_ulonglong, + pub flags: ::std::os::raw::c_uint, + pub reserved: [::std::os::raw::c_uint; 16usize], +} +#[repr(C)] +#[derive(Copy, Clone)] +pub union CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st__bindgen_ty_1 { + pub fd: ::std::os::raw::c_int, + pub win32: CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st__bindgen_ty_1__bindgen_ty_1, + pub nvSciBufObject: *const ::std::os::raw::c_void, + _bindgen_union_align: [u64; 2usize], +} +#[repr(C)] +#[derive(Copy, Clone)] +pub struct CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st__bindgen_ty_1__bindgen_ty_1 { + pub handle: *mut ::std::os::raw::c_void, + pub name: *const ::std::os::raw::c_void, +} +pub type CUDA_EXTERNAL_MEMORY_HANDLE_DESC = CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st; +#[repr(C)] +#[derive(Copy, Clone)] +pub struct CUDA_EXTERNAL_MEMORY_BUFFER_DESC_st { + pub offset: ::std::os::raw::c_ulonglong, + pub size: ::std::os::raw::c_ulonglong, + pub flags: ::std::os::raw::c_uint, + pub reserved: [::std::os::raw::c_uint; 16usize], +} +pub type CUDA_EXTERNAL_MEMORY_BUFFER_DESC = CUDA_EXTERNAL_MEMORY_BUFFER_DESC_st; +#[repr(C)] +#[derive(Copy, Clone)] +pub struct CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC_st { + pub offset: ::std::os::raw::c_ulonglong, + pub arrayDesc: CUDA_ARRAY3D_DESCRIPTOR, + pub numLevels: ::std::os::raw::c_uint, + pub reserved: [::std::os::raw::c_uint; 16usize], +} +pub type CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC = CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC_st; +impl CUexternalSemaphoreHandleType_enum { + pub const CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD: CUexternalSemaphoreHandleType_enum = + CUexternalSemaphoreHandleType_enum(1); +} +impl CUexternalSemaphoreHandleType_enum { + pub const CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32: CUexternalSemaphoreHandleType_enum = + CUexternalSemaphoreHandleType_enum(2); +} +impl CUexternalSemaphoreHandleType_enum { + pub const CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT: + CUexternalSemaphoreHandleType_enum = CUexternalSemaphoreHandleType_enum(3); +} +impl CUexternalSemaphoreHandleType_enum { + pub const CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D12_FENCE: CUexternalSemaphoreHandleType_enum = + CUexternalSemaphoreHandleType_enum(4); +} +impl CUexternalSemaphoreHandleType_enum { + pub const CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_FENCE: CUexternalSemaphoreHandleType_enum = + CUexternalSemaphoreHandleType_enum(5); +} +impl CUexternalSemaphoreHandleType_enum { + pub const CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_NVSCISYNC: CUexternalSemaphoreHandleType_enum = + CUexternalSemaphoreHandleType_enum(6); +} +impl CUexternalSemaphoreHandleType_enum { + pub const CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX: + CUexternalSemaphoreHandleType_enum = CUexternalSemaphoreHandleType_enum(7); +} +impl CUexternalSemaphoreHandleType_enum { + pub const CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX_KMT: + CUexternalSemaphoreHandleType_enum = CUexternalSemaphoreHandleType_enum(8); +} +#[repr(transparent)] +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +pub struct CUexternalSemaphoreHandleType_enum(pub ::std::os::raw::c_uint); +pub use self::CUexternalSemaphoreHandleType_enum as CUexternalSemaphoreHandleType; +#[repr(C)] +#[derive(Copy, Clone)] +pub struct CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st { + pub type_: CUexternalSemaphoreHandleType, + pub handle: CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st__bindgen_ty_1, + pub flags: ::std::os::raw::c_uint, + pub reserved: [::std::os::raw::c_uint; 16usize], +} +#[repr(C)] +#[derive(Copy, Clone)] +pub union CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st__bindgen_ty_1 { + pub fd: ::std::os::raw::c_int, + pub win32: CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st__bindgen_ty_1__bindgen_ty_1, + pub nvSciSyncObj: *const ::std::os::raw::c_void, + _bindgen_union_align: [u64; 2usize], +} +#[repr(C)] +#[derive(Copy, Clone)] +pub struct CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st__bindgen_ty_1__bindgen_ty_1 { + pub handle: *mut ::std::os::raw::c_void, + pub name: *const ::std::os::raw::c_void, +} +pub type CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC = CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st; +#[repr(C)] +#[derive(Copy, Clone)] +pub struct CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st { + pub params: CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1, + pub flags: ::std::os::raw::c_uint, + pub reserved: [::std::os::raw::c_uint; 16usize], +} +#[repr(C)] +#[derive(Copy, Clone)] +pub struct CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1 { + pub fence: CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1__bindgen_ty_1, + pub nvSciSync: CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1__bindgen_ty_2, + pub keyedMutex: CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1__bindgen_ty_3, + pub reserved: [::std::os::raw::c_uint; 12usize], +} +#[repr(C)] +#[derive(Copy, Clone)] +pub struct CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1__bindgen_ty_1 { + pub value: ::std::os::raw::c_ulonglong, +} +#[repr(C)] +#[derive(Copy, Clone)] +pub union CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1__bindgen_ty_2 { + pub fence: *mut ::std::os::raw::c_void, + pub reserved: ::std::os::raw::c_ulonglong, + _bindgen_union_align: u64, +} +#[repr(C)] +#[derive(Copy, Clone)] +pub struct CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1__bindgen_ty_3 { + pub key: ::std::os::raw::c_ulonglong, +} +pub type CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS = CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st; +#[repr(C)] +#[derive(Copy, Clone)] +pub struct CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st { + pub params: CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1, + pub flags: ::std::os::raw::c_uint, + pub reserved: [::std::os::raw::c_uint; 16usize], +} +#[repr(C)] +#[derive(Copy, Clone)] +pub struct CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1 { + pub fence: CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1__bindgen_ty_1, + pub nvSciSync: CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1__bindgen_ty_2, + pub keyedMutex: CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1__bindgen_ty_3, + pub reserved: [::std::os::raw::c_uint; 10usize], +} +#[repr(C)] +#[derive(Copy, Clone)] +pub struct CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1__bindgen_ty_1 { + pub value: ::std::os::raw::c_ulonglong, +} +#[repr(C)] +#[derive(Copy, Clone)] +pub union CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1__bindgen_ty_2 { + pub fence: *mut ::std::os::raw::c_void, + pub reserved: ::std::os::raw::c_ulonglong, + _bindgen_union_align: u64, +} +#[repr(C)] +#[derive(Copy, Clone)] +pub struct CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1__bindgen_ty_3 { + pub key: ::std::os::raw::c_ulonglong, + pub timeoutMs: ::std::os::raw::c_uint, +} +pub type CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS = CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st; +pub type CUmemGenericAllocationHandle = ::std::os::raw::c_ulonglong; +impl CUmemAllocationHandleType_enum { + pub const CU_MEM_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR: CUmemAllocationHandleType_enum = + CUmemAllocationHandleType_enum(1); +} +impl CUmemAllocationHandleType_enum { + pub const CU_MEM_HANDLE_TYPE_WIN32: CUmemAllocationHandleType_enum = + CUmemAllocationHandleType_enum(2); +} +impl CUmemAllocationHandleType_enum { + pub const CU_MEM_HANDLE_TYPE_WIN32_KMT: CUmemAllocationHandleType_enum = + CUmemAllocationHandleType_enum(4); +} +impl CUmemAllocationHandleType_enum { + pub const CU_MEM_HANDLE_TYPE_MAX: CUmemAllocationHandleType_enum = + CUmemAllocationHandleType_enum(4294967295); +} +#[repr(transparent)] +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +pub struct CUmemAllocationHandleType_enum(pub ::std::os::raw::c_uint); +pub use self::CUmemAllocationHandleType_enum as CUmemAllocationHandleType; +impl CUmemAccess_flags_enum { + pub const CU_MEM_ACCESS_FLAGS_PROT_NONE: CUmemAccess_flags_enum = CUmemAccess_flags_enum(0); +} +impl CUmemAccess_flags_enum { + pub const CU_MEM_ACCESS_FLAGS_PROT_READ: CUmemAccess_flags_enum = CUmemAccess_flags_enum(1); +} +impl CUmemAccess_flags_enum { + pub const CU_MEM_ACCESS_FLAGS_PROT_READWRITE: CUmemAccess_flags_enum = + CUmemAccess_flags_enum(3); +} +impl CUmemAccess_flags_enum { + pub const CU_MEM_ACCESS_FLAGS_PROT_MAX: CUmemAccess_flags_enum = + CUmemAccess_flags_enum(4294967295); +} +#[repr(transparent)] +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +pub struct CUmemAccess_flags_enum(pub ::std::os::raw::c_uint); +pub use self::CUmemAccess_flags_enum as CUmemAccess_flags; +impl CUmemLocationType_enum { + pub const CU_MEM_LOCATION_TYPE_INVALID: CUmemLocationType_enum = CUmemLocationType_enum(0); +} +impl CUmemLocationType_enum { + pub const CU_MEM_LOCATION_TYPE_DEVICE: CUmemLocationType_enum = CUmemLocationType_enum(1); +} +impl CUmemLocationType_enum { + pub const CU_MEM_LOCATION_TYPE_MAX: CUmemLocationType_enum = CUmemLocationType_enum(4294967295); +} +#[repr(transparent)] +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +pub struct CUmemLocationType_enum(pub ::std::os::raw::c_uint); +pub use self::CUmemLocationType_enum as CUmemLocationType; +impl CUmemAllocationType_enum { + pub const CU_MEM_ALLOCATION_TYPE_INVALID: CUmemAllocationType_enum = + CUmemAllocationType_enum(0); +} +impl CUmemAllocationType_enum { + pub const CU_MEM_ALLOCATION_TYPE_PINNED: CUmemAllocationType_enum = CUmemAllocationType_enum(1); +} +impl CUmemAllocationType_enum { + pub const CU_MEM_ALLOCATION_TYPE_MAX: CUmemAllocationType_enum = + CUmemAllocationType_enum(4294967295); +} +#[repr(transparent)] +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +pub struct CUmemAllocationType_enum(pub ::std::os::raw::c_uint); +pub use self::CUmemAllocationType_enum as CUmemAllocationType; +impl CUmemAllocationGranularity_flags_enum { + pub const CU_MEM_ALLOC_GRANULARITY_MINIMUM: CUmemAllocationGranularity_flags_enum = + CUmemAllocationGranularity_flags_enum(0); +} +impl CUmemAllocationGranularity_flags_enum { + pub const CU_MEM_ALLOC_GRANULARITY_RECOMMENDED: CUmemAllocationGranularity_flags_enum = + CUmemAllocationGranularity_flags_enum(1); +} +#[repr(transparent)] +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +pub struct CUmemAllocationGranularity_flags_enum(pub ::std::os::raw::c_uint); +pub use self::CUmemAllocationGranularity_flags_enum as CUmemAllocationGranularity_flags; +#[repr(C)] +#[derive(Copy, Clone)] +pub struct CUmemLocation_st { + pub type_: CUmemLocationType, + pub id: ::std::os::raw::c_int, +} +pub type CUmemLocation = CUmemLocation_st; +#[repr(C)] +#[derive(Copy, Clone)] +pub struct CUmemAllocationProp_st { + pub type_: CUmemAllocationType, + pub requestedHandleTypes: CUmemAllocationHandleType, + pub location: CUmemLocation, + pub win32HandleMetaData: *mut ::std::os::raw::c_void, + pub allocFlags: CUmemAllocationProp_st__bindgen_ty_1, +} +#[repr(C)] +#[derive(Copy, Clone)] +pub struct CUmemAllocationProp_st__bindgen_ty_1 { + pub compressionType: ::std::os::raw::c_uchar, + pub gpuDirectRDMACapable: ::std::os::raw::c_uchar, + pub reserved: [::std::os::raw::c_uchar; 6usize], +} +pub type CUmemAllocationProp = CUmemAllocationProp_st; +#[repr(C)] +#[derive(Copy, Clone)] +pub struct CUmemAccessDesc_st { + pub location: CUmemLocation, + pub flags: CUmemAccess_flags, +} +pub type CUmemAccessDesc = CUmemAccessDesc_st; +impl CUgraphExecUpdateResult_enum { + pub const CU_GRAPH_EXEC_UPDATE_SUCCESS: CUgraphExecUpdateResult_enum = + CUgraphExecUpdateResult_enum(0); +} +impl CUgraphExecUpdateResult_enum { + pub const CU_GRAPH_EXEC_UPDATE_ERROR: CUgraphExecUpdateResult_enum = + CUgraphExecUpdateResult_enum(1); +} +impl CUgraphExecUpdateResult_enum { + pub const CU_GRAPH_EXEC_UPDATE_ERROR_TOPOLOGY_CHANGED: CUgraphExecUpdateResult_enum = + CUgraphExecUpdateResult_enum(2); +} +impl CUgraphExecUpdateResult_enum { + pub const CU_GRAPH_EXEC_UPDATE_ERROR_NODE_TYPE_CHANGED: CUgraphExecUpdateResult_enum = + CUgraphExecUpdateResult_enum(3); +} +impl CUgraphExecUpdateResult_enum { + pub const CU_GRAPH_EXEC_UPDATE_ERROR_FUNCTION_CHANGED: CUgraphExecUpdateResult_enum = + CUgraphExecUpdateResult_enum(4); +} +impl CUgraphExecUpdateResult_enum { + pub const CU_GRAPH_EXEC_UPDATE_ERROR_PARAMETERS_CHANGED: CUgraphExecUpdateResult_enum = + CUgraphExecUpdateResult_enum(5); +} +impl CUgraphExecUpdateResult_enum { + pub const CU_GRAPH_EXEC_UPDATE_ERROR_NOT_SUPPORTED: CUgraphExecUpdateResult_enum = + CUgraphExecUpdateResult_enum(6); +} +#[repr(transparent)] +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +pub struct CUgraphExecUpdateResult_enum(pub ::std::os::raw::c_uint); +pub use self::CUgraphExecUpdateResult_enum as CUgraphExecUpdateResult; + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuGetErrorString( + error: CUresult, + pStr: *mut *const ::std::os::raw::c_char, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuGetErrorName( + error: CUresult, + pStr: *mut *const ::std::os::raw::c_char, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuInit(Flags: ::std::os::raw::c_uint) -> CUresult { + r#impl::init().encuda() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuDriverGetVersion(driverVersion: *mut ::std::os::raw::c_int) -> CUresult { + unsafe { *driverVersion = r#impl::driver_get_version() }; + CUresult::CUDA_SUCCESS +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuDeviceGet(device: *mut CUdevice, ordinal: ::std::os::raw::c_int) -> CUresult { + r#impl::device::get(device.decuda(), ordinal).encuda() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuDeviceGetCount(count: *mut ::std::os::raw::c_int) -> CUresult { + r#impl::device::get_count(count).encuda() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuDeviceGetName( + name: *mut ::std::os::raw::c_char, + len: ::std::os::raw::c_int, + dev: CUdevice, +) -> CUresult { + r#impl::device::get_name(name, len, dev.decuda()).encuda() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuDeviceGetUuid(uuid: *mut CUuuid, dev: CUdevice) -> CUresult { + r#impl::device::get_uuid(uuid, dev.decuda()).encuda() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuDeviceTotalMem_v2(bytes: *mut usize, dev: CUdevice) -> CUresult { + r#impl::device::total_mem_v2(bytes, dev.decuda()).encuda() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuDeviceGetAttribute( + pi: *mut ::std::os::raw::c_int, + attrib: CUdevice_attribute, + dev: CUdevice, +) -> CUresult { + r#impl::device::get_attribute(pi, attrib, dev.decuda()).encuda() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuDeviceGetNvSciSyncAttributes( + nvSciSyncAttrList: *mut ::std::os::raw::c_void, + dev: CUdevice, + flags: ::std::os::raw::c_int, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuDeviceGetProperties(prop: *mut CUdevprop, dev: CUdevice) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuDeviceComputeCapability( + major: *mut ::std::os::raw::c_int, + minor: *mut ::std::os::raw::c_int, + dev: CUdevice, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuDevicePrimaryCtxRetain(pctx: *mut CUcontext, dev: CUdevice) -> CUresult { + r#impl::device::primary_ctx_retain(pctx.decuda(), dev.decuda()).encuda() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuDevicePrimaryCtxRelease(dev: CUdevice) -> CUresult { + cuDevicePrimaryCtxRelease_v2(dev) +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuDevicePrimaryCtxRelease_v2(dev: CUdevice) -> CUresult { + r#impl::device::primary_ctx_release_v2(dev.decuda()) +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuDevicePrimaryCtxSetFlags( + dev: CUdevice, + flags: ::std::os::raw::c_uint, +) -> CUresult { + cuDevicePrimaryCtxSetFlags_v2(dev, flags) +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuDevicePrimaryCtxSetFlags_v2( + dev: CUdevice, + flags: ::std::os::raw::c_uint, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuDevicePrimaryCtxGetState( + dev: CUdevice, + flags: *mut ::std::os::raw::c_uint, + active: *mut ::std::os::raw::c_int, +) -> CUresult { + r#impl::device::primary_ctx_get_state(dev.decuda(), flags, active).encuda() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuDevicePrimaryCtxReset(dev: CUdevice) -> CUresult { + cuDevicePrimaryCtxReset_v2(dev) +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuDevicePrimaryCtxReset_v2(dev: CUdevice) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuCtxCreate_v2( + pctx: *mut CUcontext, + flags: ::std::os::raw::c_uint, + dev: CUdevice, +) -> CUresult { + r#impl::context::create_v2(pctx.decuda(), flags, dev.decuda()).encuda() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuCtxDestroy_v2(ctx: CUcontext) -> CUresult { + r#impl::context::destroy_v2(ctx.decuda()).encuda() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuCtxPushCurrent_v2(ctx: CUcontext) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuCtxPopCurrent_v2(pctx: *mut CUcontext) -> CUresult { + r#impl::context::pop_current_v2(pctx.decuda()) +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuCtxSetCurrent(ctx: CUcontext) -> CUresult { + r#impl::context::set_current(ctx.decuda()) +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuCtxGetCurrent(pctx: *mut CUcontext) -> CUresult { + r#impl::context::get_current(pctx.decuda()).encuda() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuCtxGetDevice(device: *mut CUdevice) -> CUresult { + r#impl::context::get_device(device.decuda()).encuda() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuCtxGetFlags(flags: *mut ::std::os::raw::c_uint) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuCtxSynchronize() -> CUresult { + r#impl::context::synchronize() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuCtxSetLimit(limit: CUlimit, value: usize) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuCtxGetLimit(pvalue: *mut usize, limit: CUlimit) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuCtxGetCacheConfig(pconfig: *mut CUfunc_cache) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuCtxSetCacheConfig(config: CUfunc_cache) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuCtxGetSharedMemConfig(pConfig: *mut CUsharedconfig) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuCtxSetSharedMemConfig(config: CUsharedconfig) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuCtxGetApiVersion( + ctx: CUcontext, + version: *mut ::std::os::raw::c_uint, +) -> CUresult { + r#impl::context::get_api_version(ctx.decuda(), version).encuda() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuCtxGetStreamPriorityRange( + leastPriority: *mut ::std::os::raw::c_int, + greatestPriority: *mut ::std::os::raw::c_int, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuCtxResetPersistingL2Cache() -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuCtxAttach(pctx: *mut CUcontext, flags: ::std::os::raw::c_uint) -> CUresult { + r#impl::context::attach(pctx.decuda(), flags).encuda() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuCtxDetach(ctx: CUcontext) -> CUresult { + r#impl::context::detach(ctx.decuda()).encuda() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuModuleLoad( + module: *mut CUmodule, + fname: *const ::std::os::raw::c_char, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuModuleLoadData( + module: *mut CUmodule, + image: *const ::std::os::raw::c_void, +) -> CUresult { + r#impl::module::load_data(module.decuda(), image).encuda() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuModuleLoadDataEx( + module: *mut CUmodule, + image: *const ::std::os::raw::c_void, + numOptions: ::std::os::raw::c_uint, + options: *mut CUjit_option, + optionValues: *mut *mut ::std::os::raw::c_void, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuModuleLoadFatBinary( + module: *mut CUmodule, + fatCubin: *const ::std::os::raw::c_void, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuModuleUnload(hmod: CUmodule) -> CUresult { + r#impl::module::unload(hmod.decuda()).encuda() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuModuleGetFunction( + hfunc: *mut CUfunction, + hmod: CUmodule, + name: *const ::std::os::raw::c_char, +) -> CUresult { + r#impl::module::get_function(hfunc.decuda(), hmod.decuda(), name).encuda() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuModuleGetGlobal_v2( + dptr: *mut CUdeviceptr, + bytes: *mut usize, + hmod: CUmodule, + name: *const ::std::os::raw::c_char, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuModuleGetTexRef( + pTexRef: *mut CUtexref, + hmod: CUmodule, + name: *const ::std::os::raw::c_char, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuModuleGetSurfRef( + pSurfRef: *mut CUsurfref, + hmod: CUmodule, + name: *const ::std::os::raw::c_char, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuLinkCreate_v2( + numOptions: ::std::os::raw::c_uint, + options: *mut CUjit_option, + optionValues: *mut *mut ::std::os::raw::c_void, + stateOut: *mut CUlinkState, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuLinkAddData_v2( + state: CUlinkState, + type_: CUjitInputType, + data: *mut ::std::os::raw::c_void, + size: usize, + name: *const ::std::os::raw::c_char, + numOptions: ::std::os::raw::c_uint, + options: *mut CUjit_option, + optionValues: *mut *mut ::std::os::raw::c_void, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuLinkAddFile_v2( + state: CUlinkState, + type_: CUjitInputType, + path: *const ::std::os::raw::c_char, + numOptions: ::std::os::raw::c_uint, + options: *mut CUjit_option, + optionValues: *mut *mut ::std::os::raw::c_void, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuLinkComplete( + state: CUlinkState, + cubinOut: *mut *mut ::std::os::raw::c_void, + sizeOut: *mut usize, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuLinkDestroy(state: CUlinkState) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuMemGetInfo_v2(free: *mut usize, total: *mut usize) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuMemAlloc_v2(dptr: *mut CUdeviceptr, bytesize: usize) -> CUresult { + r#impl::memory::alloc_v2(dptr.decuda(), bytesize).encuda() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuMemAllocPitch_v2( + dptr: *mut CUdeviceptr, + pPitch: *mut usize, + WidthInBytes: usize, + Height: usize, + ElementSizeBytes: ::std::os::raw::c_uint, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuMemFree_v2(dptr: CUdeviceptr) -> CUresult { + r#impl::memory::free_v2(dptr.decuda()).encuda() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuMemGetAddressRange_v2( + pbase: *mut CUdeviceptr, + psize: *mut usize, + dptr: CUdeviceptr, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuMemAllocHost_v2( + pp: *mut *mut ::std::os::raw::c_void, + bytesize: usize, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuMemFreeHost(p: *mut ::std::os::raw::c_void) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuMemHostAlloc( + pp: *mut *mut ::std::os::raw::c_void, + bytesize: usize, + Flags: ::std::os::raw::c_uint, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuMemHostGetDevicePointer_v2( + pdptr: *mut CUdeviceptr, + p: *mut ::std::os::raw::c_void, + Flags: ::std::os::raw::c_uint, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuMemHostGetFlags( + pFlags: *mut ::std::os::raw::c_uint, + p: *mut ::std::os::raw::c_void, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuMemAllocManaged( + dptr: *mut CUdeviceptr, + bytesize: usize, + flags: ::std::os::raw::c_uint, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuDeviceGetByPCIBusId( + dev: *mut CUdevice, + pciBusId: *const ::std::os::raw::c_char, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuDeviceGetPCIBusId( + pciBusId: *mut ::std::os::raw::c_char, + len: ::std::os::raw::c_int, + dev: CUdevice, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuIpcGetEventHandle(pHandle: *mut CUipcEventHandle, event: CUevent) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuIpcOpenEventHandle( + phEvent: *mut CUevent, + handle: CUipcEventHandle, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuIpcGetMemHandle(pHandle: *mut CUipcMemHandle, dptr: CUdeviceptr) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuIpcOpenMemHandle( + pdptr: *mut CUdeviceptr, + handle: CUipcMemHandle, + Flags: ::std::os::raw::c_uint, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuIpcCloseMemHandle(dptr: CUdeviceptr) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuMemHostRegister_v2( + p: *mut ::std::os::raw::c_void, + bytesize: usize, + Flags: ::std::os::raw::c_uint, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuMemHostUnregister(p: *mut ::std::os::raw::c_void) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuMemcpy(dst: CUdeviceptr, src: CUdeviceptr, ByteCount: usize) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuMemcpyPeer( + dstDevice: CUdeviceptr, + dstContext: CUcontext, + srcDevice: CUdeviceptr, + srcContext: CUcontext, + ByteCount: usize, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuMemcpyHtoD_v2( + dstDevice: CUdeviceptr, + srcHost: *const ::std::os::raw::c_void, + ByteCount: usize, +) -> CUresult { + r#impl::memory::copy_v2(dstDevice.decuda(), srcHost, ByteCount).encuda() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuMemcpyDtoH_v2( + dstHost: *mut ::std::os::raw::c_void, + srcDevice: CUdeviceptr, + ByteCount: usize, +) -> CUresult { + r#impl::memory::copy_v2(dstHost, srcDevice.decuda(), ByteCount).encuda() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuMemcpyDtoD_v2( + dstDevice: CUdeviceptr, + srcDevice: CUdeviceptr, + ByteCount: usize, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuMemcpyDtoA_v2( + dstArray: CUarray, + dstOffset: usize, + srcDevice: CUdeviceptr, + ByteCount: usize, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuMemcpyAtoD_v2( + dstDevice: CUdeviceptr, + srcArray: CUarray, + srcOffset: usize, + ByteCount: usize, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuMemcpyHtoA_v2( + dstArray: CUarray, + dstOffset: usize, + srcHost: *const ::std::os::raw::c_void, + ByteCount: usize, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuMemcpyAtoH_v2( + dstHost: *mut ::std::os::raw::c_void, + srcArray: CUarray, + srcOffset: usize, + ByteCount: usize, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuMemcpyAtoA_v2( + dstArray: CUarray, + dstOffset: usize, + srcArray: CUarray, + srcOffset: usize, + ByteCount: usize, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuMemcpy2D_v2(pCopy: *const CUDA_MEMCPY2D) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuMemcpy2DUnaligned_v2(pCopy: *const CUDA_MEMCPY2D) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuMemcpy3D_v2(pCopy: *const CUDA_MEMCPY3D) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuMemcpy3DPeer(pCopy: *const CUDA_MEMCPY3D_PEER) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuMemcpyAsync( + dst: CUdeviceptr, + src: CUdeviceptr, + ByteCount: usize, + hStream: CUstream, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuMemcpyPeerAsync( + dstDevice: CUdeviceptr, + dstContext: CUcontext, + srcDevice: CUdeviceptr, + srcContext: CUcontext, + ByteCount: usize, + hStream: CUstream, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuMemcpyHtoDAsync_v2( + dstDevice: CUdeviceptr, + srcHost: *const ::std::os::raw::c_void, + ByteCount: usize, + hStream: CUstream, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuMemcpyDtoHAsync_v2( + dstHost: *mut ::std::os::raw::c_void, + srcDevice: CUdeviceptr, + ByteCount: usize, + hStream: CUstream, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuMemcpyDtoDAsync_v2( + dstDevice: CUdeviceptr, + srcDevice: CUdeviceptr, + ByteCount: usize, + hStream: CUstream, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuMemcpyHtoAAsync_v2( + dstArray: CUarray, + dstOffset: usize, + srcHost: *const ::std::os::raw::c_void, + ByteCount: usize, + hStream: CUstream, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuMemcpyAtoHAsync_v2( + dstHost: *mut ::std::os::raw::c_void, + srcArray: CUarray, + srcOffset: usize, + ByteCount: usize, + hStream: CUstream, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuMemcpy2DAsync_v2(pCopy: *const CUDA_MEMCPY2D, hStream: CUstream) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuMemcpy3DAsync_v2(pCopy: *const CUDA_MEMCPY3D, hStream: CUstream) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuMemcpy3DPeerAsync( + pCopy: *const CUDA_MEMCPY3D_PEER, + hStream: CUstream, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuMemsetD8_v2( + dstDevice: CUdeviceptr, + uc: ::std::os::raw::c_uchar, + N: usize, +) -> CUresult { + r#impl::memory::set_d8_v2(dstDevice.decuda(), uc, N).encuda() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuMemsetD16_v2( + dstDevice: CUdeviceptr, + us: ::std::os::raw::c_ushort, + N: usize, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuMemsetD32_v2( + dstDevice: CUdeviceptr, + ui: ::std::os::raw::c_uint, + N: usize, +) -> CUresult { + r#impl::memory::set_d32_v2(dstDevice.decuda(), ui, N).encuda() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuMemsetD2D8_v2( + dstDevice: CUdeviceptr, + dstPitch: usize, + uc: ::std::os::raw::c_uchar, + Width: usize, + Height: usize, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuMemsetD2D16_v2( + dstDevice: CUdeviceptr, + dstPitch: usize, + us: ::std::os::raw::c_ushort, + Width: usize, + Height: usize, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuMemsetD2D32_v2( + dstDevice: CUdeviceptr, + dstPitch: usize, + ui: ::std::os::raw::c_uint, + Width: usize, + Height: usize, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuMemsetD8Async( + dstDevice: CUdeviceptr, + uc: ::std::os::raw::c_uchar, + N: usize, + hStream: CUstream, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuMemsetD16Async( + dstDevice: CUdeviceptr, + us: ::std::os::raw::c_ushort, + N: usize, + hStream: CUstream, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuMemsetD32Async( + dstDevice: CUdeviceptr, + ui: ::std::os::raw::c_uint, + N: usize, + hStream: CUstream, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuMemsetD2D8Async( + dstDevice: CUdeviceptr, + dstPitch: usize, + uc: ::std::os::raw::c_uchar, + Width: usize, + Height: usize, + hStream: CUstream, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuMemsetD2D16Async( + dstDevice: CUdeviceptr, + dstPitch: usize, + us: ::std::os::raw::c_ushort, + Width: usize, + Height: usize, + hStream: CUstream, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuMemsetD2D32Async( + dstDevice: CUdeviceptr, + dstPitch: usize, + ui: ::std::os::raw::c_uint, + Width: usize, + Height: usize, + hStream: CUstream, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuArrayCreate_v2( + pHandle: *mut CUarray, + pAllocateArray: *const CUDA_ARRAY_DESCRIPTOR, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuArrayGetDescriptor_v2( + pArrayDescriptor: *mut CUDA_ARRAY_DESCRIPTOR, + hArray: CUarray, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuArrayDestroy(hArray: CUarray) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuArray3DCreate_v2( + pHandle: *mut CUarray, + pAllocateArray: *const CUDA_ARRAY3D_DESCRIPTOR, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuArray3DGetDescriptor_v2( + pArrayDescriptor: *mut CUDA_ARRAY3D_DESCRIPTOR, + hArray: CUarray, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuMipmappedArrayCreate( + pHandle: *mut CUmipmappedArray, + pMipmappedArrayDesc: *const CUDA_ARRAY3D_DESCRIPTOR, + numMipmapLevels: ::std::os::raw::c_uint, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuMipmappedArrayGetLevel( + pLevelArray: *mut CUarray, + hMipmappedArray: CUmipmappedArray, + level: ::std::os::raw::c_uint, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuMipmappedArrayDestroy(hMipmappedArray: CUmipmappedArray) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuMemAddressReserve( + ptr: *mut CUdeviceptr, + size: usize, + alignment: usize, + addr: CUdeviceptr, + flags: ::std::os::raw::c_ulonglong, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuMemAddressFree(ptr: CUdeviceptr, size: usize) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuMemCreate( + handle: *mut CUmemGenericAllocationHandle, + size: usize, + prop: *const CUmemAllocationProp, + flags: ::std::os::raw::c_ulonglong, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuMemRelease(handle: CUmemGenericAllocationHandle) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuMemMap( + ptr: CUdeviceptr, + size: usize, + offset: usize, + handle: CUmemGenericAllocationHandle, + flags: ::std::os::raw::c_ulonglong, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuMemUnmap(ptr: CUdeviceptr, size: usize) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuMemSetAccess( + ptr: CUdeviceptr, + size: usize, + desc: *const CUmemAccessDesc, + count: usize, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuMemGetAccess( + flags: *mut ::std::os::raw::c_ulonglong, + location: *const CUmemLocation, + ptr: CUdeviceptr, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuMemExportToShareableHandle( + shareableHandle: *mut ::std::os::raw::c_void, + handle: CUmemGenericAllocationHandle, + handleType: CUmemAllocationHandleType, + flags: ::std::os::raw::c_ulonglong, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuMemImportFromShareableHandle( + handle: *mut CUmemGenericAllocationHandle, + osHandle: *mut ::std::os::raw::c_void, + shHandleType: CUmemAllocationHandleType, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuMemGetAllocationGranularity( + granularity: *mut usize, + prop: *const CUmemAllocationProp, + option: CUmemAllocationGranularity_flags, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuMemGetAllocationPropertiesFromHandle( + prop: *mut CUmemAllocationProp, + handle: CUmemGenericAllocationHandle, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuMemRetainAllocationHandle( + handle: *mut CUmemGenericAllocationHandle, + addr: *mut ::std::os::raw::c_void, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuPointerGetAttribute( + data: *mut ::std::os::raw::c_void, + attribute: CUpointer_attribute, + ptr: CUdeviceptr, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuMemPrefetchAsync( + devPtr: CUdeviceptr, + count: usize, + dstDevice: CUdevice, + hStream: CUstream, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuMemAdvise( + devPtr: CUdeviceptr, + count: usize, + advice: CUmem_advise, + device: CUdevice, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuMemRangeGetAttribute( + data: *mut ::std::os::raw::c_void, + dataSize: usize, + attribute: CUmem_range_attribute, + devPtr: CUdeviceptr, + count: usize, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuMemRangeGetAttributes( + data: *mut *mut ::std::os::raw::c_void, + dataSizes: *mut usize, + attributes: *mut CUmem_range_attribute, + numAttributes: usize, + devPtr: CUdeviceptr, + count: usize, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuPointerSetAttribute( + value: *const ::std::os::raw::c_void, + attribute: CUpointer_attribute, + ptr: CUdeviceptr, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuPointerGetAttributes( + numAttributes: ::std::os::raw::c_uint, + attributes: *mut CUpointer_attribute, + data: *mut *mut ::std::os::raw::c_void, + ptr: CUdeviceptr, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuStreamCreate( + phStream: *mut CUstream, + Flags: ::std::os::raw::c_uint, +) -> CUresult { + r#impl::stream::create(phStream.decuda(), Flags).encuda() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuStreamCreateWithPriority( + phStream: *mut CUstream, + flags: ::std::os::raw::c_uint, + priority: ::std::os::raw::c_int, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuStreamGetPriority( + hStream: CUstream, + priority: *mut ::std::os::raw::c_int, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuStreamGetFlags( + hStream: CUstream, + flags: *mut ::std::os::raw::c_uint, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuStreamGetCtx(hStream: CUstream, pctx: *mut CUcontext) -> CUresult { + r#impl::stream::get_ctx(hStream.decuda(), pctx.decuda()).encuda() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuStreamWaitEvent( + hStream: CUstream, + hEvent: CUevent, + Flags: ::std::os::raw::c_uint, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuStreamAddCallback( + hStream: CUstream, + callback: CUstreamCallback, + userData: *mut ::std::os::raw::c_void, + flags: ::std::os::raw::c_uint, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuStreamBeginCapture_v2( + hStream: CUstream, + mode: CUstreamCaptureMode, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuThreadExchangeStreamCaptureMode(mode: *mut CUstreamCaptureMode) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuStreamEndCapture(hStream: CUstream, phGraph: *mut CUgraph) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuStreamIsCapturing( + hStream: CUstream, + captureStatus: *mut CUstreamCaptureStatus, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuStreamGetCaptureInfo( + hStream: CUstream, + captureStatus: *mut CUstreamCaptureStatus, + id: *mut cuuint64_t, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuStreamAttachMemAsync( + hStream: CUstream, + dptr: CUdeviceptr, + length: usize, + flags: ::std::os::raw::c_uint, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuStreamQuery(hStream: CUstream) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuStreamSynchronize(hStream: CUstream) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuStreamDestroy_v2(hStream: CUstream) -> CUresult { + r#impl::stream::destroy_v2(hStream.decuda()).encuda() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuStreamCopyAttributes(dst: CUstream, src: CUstream) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuStreamGetAttribute( + hStream: CUstream, + attr: CUstreamAttrID, + value_out: *mut CUstreamAttrValue, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuStreamSetAttribute( + hStream: CUstream, + attr: CUstreamAttrID, + value: *const CUstreamAttrValue, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuEventCreate(phEvent: *mut CUevent, Flags: ::std::os::raw::c_uint) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuEventRecord(hEvent: CUevent, hStream: CUstream) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuEventQuery(hEvent: CUevent) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuEventSynchronize(hEvent: CUevent) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuEventDestroy_v2(hEvent: CUevent) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuEventElapsedTime( + pMilliseconds: *mut f32, + hStart: CUevent, + hEnd: CUevent, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuImportExternalMemory( + extMem_out: *mut CUexternalMemory, + memHandleDesc: *const CUDA_EXTERNAL_MEMORY_HANDLE_DESC, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuExternalMemoryGetMappedBuffer( + devPtr: *mut CUdeviceptr, + extMem: CUexternalMemory, + bufferDesc: *const CUDA_EXTERNAL_MEMORY_BUFFER_DESC, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuExternalMemoryGetMappedMipmappedArray( + mipmap: *mut CUmipmappedArray, + extMem: CUexternalMemory, + mipmapDesc: *const CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuDestroyExternalMemory(extMem: CUexternalMemory) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuImportExternalSemaphore( + extSem_out: *mut CUexternalSemaphore, + semHandleDesc: *const CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuSignalExternalSemaphoresAsync( + extSemArray: *const CUexternalSemaphore, + paramsArray: *const CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS, + numExtSems: ::std::os::raw::c_uint, + stream: CUstream, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuWaitExternalSemaphoresAsync( + extSemArray: *const CUexternalSemaphore, + paramsArray: *const CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS, + numExtSems: ::std::os::raw::c_uint, + stream: CUstream, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuDestroyExternalSemaphore(extSem: CUexternalSemaphore) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuStreamWaitValue32( + stream: CUstream, + addr: CUdeviceptr, + value: cuuint32_t, + flags: ::std::os::raw::c_uint, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuStreamWaitValue64( + stream: CUstream, + addr: CUdeviceptr, + value: cuuint64_t, + flags: ::std::os::raw::c_uint, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuStreamWriteValue32( + stream: CUstream, + addr: CUdeviceptr, + value: cuuint32_t, + flags: ::std::os::raw::c_uint, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuStreamWriteValue64( + stream: CUstream, + addr: CUdeviceptr, + value: cuuint64_t, + flags: ::std::os::raw::c_uint, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuStreamBatchMemOp( + stream: CUstream, + count: ::std::os::raw::c_uint, + paramArray: *mut CUstreamBatchMemOpParams, + flags: ::std::os::raw::c_uint, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuFuncGetAttribute( + pi: *mut ::std::os::raw::c_int, + attrib: CUfunction_attribute, + hfunc: CUfunction, +) -> CUresult { + r#impl::function::get_attribute(pi, attrib, hfunc.decuda()).encuda() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuFuncSetAttribute( + hfunc: CUfunction, + attrib: CUfunction_attribute, + value: ::std::os::raw::c_int, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuFuncSetCacheConfig(hfunc: CUfunction, config: CUfunc_cache) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuFuncSetSharedMemConfig(hfunc: CUfunction, config: CUsharedconfig) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuLaunchKernel( + f: CUfunction, + gridDimX: ::std::os::raw::c_uint, + gridDimY: ::std::os::raw::c_uint, + gridDimZ: ::std::os::raw::c_uint, + blockDimX: ::std::os::raw::c_uint, + blockDimY: ::std::os::raw::c_uint, + blockDimZ: ::std::os::raw::c_uint, + sharedMemBytes: ::std::os::raw::c_uint, + hStream: CUstream, + kernelParams: *mut *mut ::std::os::raw::c_void, + extra: *mut *mut ::std::os::raw::c_void, +) -> CUresult { + r#impl::function::launch_kernel( + f.decuda(), + gridDimX, + gridDimY, + gridDimZ, + blockDimX, + blockDimY, + blockDimZ, + sharedMemBytes, + hStream.decuda(), + kernelParams, + extra, + ) + .encuda() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuLaunchCooperativeKernel( + f: CUfunction, + gridDimX: ::std::os::raw::c_uint, + gridDimY: ::std::os::raw::c_uint, + gridDimZ: ::std::os::raw::c_uint, + blockDimX: ::std::os::raw::c_uint, + blockDimY: ::std::os::raw::c_uint, + blockDimZ: ::std::os::raw::c_uint, + sharedMemBytes: ::std::os::raw::c_uint, + hStream: CUstream, + kernelParams: *mut *mut ::std::os::raw::c_void, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuLaunchCooperativeKernelMultiDevice( + launchParamsList: *mut CUDA_LAUNCH_PARAMS, + numDevices: ::std::os::raw::c_uint, + flags: ::std::os::raw::c_uint, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuLaunchHostFunc( + hStream: CUstream, + fn_: CUhostFn, + userData: *mut ::std::os::raw::c_void, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuFuncSetBlockShape( + hfunc: CUfunction, + x: ::std::os::raw::c_int, + y: ::std::os::raw::c_int, + z: ::std::os::raw::c_int, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuFuncSetSharedSize( + hfunc: CUfunction, + bytes: ::std::os::raw::c_uint, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuParamSetSize(hfunc: CUfunction, numbytes: ::std::os::raw::c_uint) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuParamSeti( + hfunc: CUfunction, + offset: ::std::os::raw::c_int, + value: ::std::os::raw::c_uint, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuParamSetf( + hfunc: CUfunction, + offset: ::std::os::raw::c_int, + value: f32, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuParamSetv( + hfunc: CUfunction, + offset: ::std::os::raw::c_int, + ptr: *mut ::std::os::raw::c_void, + numbytes: ::std::os::raw::c_uint, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuLaunch(f: CUfunction) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuLaunchGrid( + f: CUfunction, + grid_width: ::std::os::raw::c_int, + grid_height: ::std::os::raw::c_int, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuLaunchGridAsync( + f: CUfunction, + grid_width: ::std::os::raw::c_int, + grid_height: ::std::os::raw::c_int, + hStream: CUstream, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuParamSetTexRef( + hfunc: CUfunction, + texunit: ::std::os::raw::c_int, + hTexRef: CUtexref, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuGraphCreate(phGraph: *mut CUgraph, flags: ::std::os::raw::c_uint) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuGraphAddKernelNode( + phGraphNode: *mut CUgraphNode, + hGraph: CUgraph, + dependencies: *const CUgraphNode, + numDependencies: usize, + nodeParams: *const CUDA_KERNEL_NODE_PARAMS, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuGraphKernelNodeGetParams( + hNode: CUgraphNode, + nodeParams: *mut CUDA_KERNEL_NODE_PARAMS, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuGraphKernelNodeSetParams( + hNode: CUgraphNode, + nodeParams: *const CUDA_KERNEL_NODE_PARAMS, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuGraphAddMemcpyNode( + phGraphNode: *mut CUgraphNode, + hGraph: CUgraph, + dependencies: *const CUgraphNode, + numDependencies: usize, + copyParams: *const CUDA_MEMCPY3D, + ctx: CUcontext, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuGraphMemcpyNodeGetParams( + hNode: CUgraphNode, + nodeParams: *mut CUDA_MEMCPY3D, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuGraphMemcpyNodeSetParams( + hNode: CUgraphNode, + nodeParams: *const CUDA_MEMCPY3D, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuGraphAddMemsetNode( + phGraphNode: *mut CUgraphNode, + hGraph: CUgraph, + dependencies: *const CUgraphNode, + numDependencies: usize, + memsetParams: *const CUDA_MEMSET_NODE_PARAMS, + ctx: CUcontext, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuGraphMemsetNodeGetParams( + hNode: CUgraphNode, + nodeParams: *mut CUDA_MEMSET_NODE_PARAMS, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuGraphMemsetNodeSetParams( + hNode: CUgraphNode, + nodeParams: *const CUDA_MEMSET_NODE_PARAMS, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuGraphAddHostNode( + phGraphNode: *mut CUgraphNode, + hGraph: CUgraph, + dependencies: *const CUgraphNode, + numDependencies: usize, + nodeParams: *const CUDA_HOST_NODE_PARAMS, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuGraphHostNodeGetParams( + hNode: CUgraphNode, + nodeParams: *mut CUDA_HOST_NODE_PARAMS, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuGraphHostNodeSetParams( + hNode: CUgraphNode, + nodeParams: *const CUDA_HOST_NODE_PARAMS, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuGraphAddChildGraphNode( + phGraphNode: *mut CUgraphNode, + hGraph: CUgraph, + dependencies: *const CUgraphNode, + numDependencies: usize, + childGraph: CUgraph, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuGraphChildGraphNodeGetGraph( + hNode: CUgraphNode, + phGraph: *mut CUgraph, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuGraphAddEmptyNode( + phGraphNode: *mut CUgraphNode, + hGraph: CUgraph, + dependencies: *const CUgraphNode, + numDependencies: usize, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuGraphClone(phGraphClone: *mut CUgraph, originalGraph: CUgraph) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuGraphNodeFindInClone( + phNode: *mut CUgraphNode, + hOriginalNode: CUgraphNode, + hClonedGraph: CUgraph, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuGraphNodeGetType(hNode: CUgraphNode, type_: *mut CUgraphNodeType) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuGraphGetNodes( + hGraph: CUgraph, + nodes: *mut CUgraphNode, + numNodes: *mut usize, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuGraphGetRootNodes( + hGraph: CUgraph, + rootNodes: *mut CUgraphNode, + numRootNodes: *mut usize, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuGraphGetEdges( + hGraph: CUgraph, + from: *mut CUgraphNode, + to: *mut CUgraphNode, + numEdges: *mut usize, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuGraphNodeGetDependencies( + hNode: CUgraphNode, + dependencies: *mut CUgraphNode, + numDependencies: *mut usize, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuGraphNodeGetDependentNodes( + hNode: CUgraphNode, + dependentNodes: *mut CUgraphNode, + numDependentNodes: *mut usize, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuGraphAddDependencies( + hGraph: CUgraph, + from: *const CUgraphNode, + to: *const CUgraphNode, + numDependencies: usize, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuGraphRemoveDependencies( + hGraph: CUgraph, + from: *const CUgraphNode, + to: *const CUgraphNode, + numDependencies: usize, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuGraphDestroyNode(hNode: CUgraphNode) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuGraphInstantiate_v2( + phGraphExec: *mut CUgraphExec, + hGraph: CUgraph, + phErrorNode: *mut CUgraphNode, + logBuffer: *mut ::std::os::raw::c_char, + bufferSize: usize, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuGraphExecKernelNodeSetParams( + hGraphExec: CUgraphExec, + hNode: CUgraphNode, + nodeParams: *const CUDA_KERNEL_NODE_PARAMS, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuGraphExecMemcpyNodeSetParams( + hGraphExec: CUgraphExec, + hNode: CUgraphNode, + copyParams: *const CUDA_MEMCPY3D, + ctx: CUcontext, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuGraphExecMemsetNodeSetParams( + hGraphExec: CUgraphExec, + hNode: CUgraphNode, + memsetParams: *const CUDA_MEMSET_NODE_PARAMS, + ctx: CUcontext, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuGraphExecHostNodeSetParams( + hGraphExec: CUgraphExec, + hNode: CUgraphNode, + nodeParams: *const CUDA_HOST_NODE_PARAMS, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuGraphLaunch(hGraphExec: CUgraphExec, hStream: CUstream) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuGraphExecDestroy(hGraphExec: CUgraphExec) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuGraphDestroy(hGraph: CUgraph) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuGraphExecUpdate( + hGraphExec: CUgraphExec, + hGraph: CUgraph, + hErrorNode_out: *mut CUgraphNode, + updateResult_out: *mut CUgraphExecUpdateResult, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuGraphKernelNodeCopyAttributes(dst: CUgraphNode, src: CUgraphNode) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuGraphKernelNodeGetAttribute( + hNode: CUgraphNode, + attr: CUkernelNodeAttrID, + value_out: *mut CUkernelNodeAttrValue, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuGraphKernelNodeSetAttribute( + hNode: CUgraphNode, + attr: CUkernelNodeAttrID, + value: *const CUkernelNodeAttrValue, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuOccupancyMaxActiveBlocksPerMultiprocessor( + numBlocks: *mut ::std::os::raw::c_int, + func: CUfunction, + blockSize: ::std::os::raw::c_int, + dynamicSMemSize: usize, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( + numBlocks: *mut ::std::os::raw::c_int, + func: CUfunction, + blockSize: ::std::os::raw::c_int, + dynamicSMemSize: usize, + flags: ::std::os::raw::c_uint, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuOccupancyMaxPotentialBlockSize( + minGridSize: *mut ::std::os::raw::c_int, + blockSize: *mut ::std::os::raw::c_int, + func: CUfunction, + blockSizeToDynamicSMemSize: CUoccupancyB2DSize, + dynamicSMemSize: usize, + blockSizeLimit: ::std::os::raw::c_int, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuOccupancyMaxPotentialBlockSizeWithFlags( + minGridSize: *mut ::std::os::raw::c_int, + blockSize: *mut ::std::os::raw::c_int, + func: CUfunction, + blockSizeToDynamicSMemSize: CUoccupancyB2DSize, + dynamicSMemSize: usize, + blockSizeLimit: ::std::os::raw::c_int, + flags: ::std::os::raw::c_uint, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuOccupancyAvailableDynamicSMemPerBlock( + dynamicSmemSize: *mut usize, + func: CUfunction, + numBlocks: ::std::os::raw::c_int, + blockSize: ::std::os::raw::c_int, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuTexRefSetArray( + hTexRef: CUtexref, + hArray: CUarray, + Flags: ::std::os::raw::c_uint, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuTexRefSetMipmappedArray( + hTexRef: CUtexref, + hMipmappedArray: CUmipmappedArray, + Flags: ::std::os::raw::c_uint, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuTexRefSetAddress_v2( + ByteOffset: *mut usize, + hTexRef: CUtexref, + dptr: CUdeviceptr, + bytes: usize, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuTexRefSetAddress2D_v3( + hTexRef: CUtexref, + desc: *const CUDA_ARRAY_DESCRIPTOR, + dptr: CUdeviceptr, + Pitch: usize, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuTexRefSetFormat( + hTexRef: CUtexref, + fmt: CUarray_format, + NumPackedComponents: ::std::os::raw::c_int, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuTexRefSetAddressMode( + hTexRef: CUtexref, + dim: ::std::os::raw::c_int, + am: CUaddress_mode, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuTexRefSetFilterMode(hTexRef: CUtexref, fm: CUfilter_mode) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuTexRefSetMipmapFilterMode(hTexRef: CUtexref, fm: CUfilter_mode) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuTexRefSetMipmapLevelBias(hTexRef: CUtexref, bias: f32) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuTexRefSetMipmapLevelClamp( + hTexRef: CUtexref, + minMipmapLevelClamp: f32, + maxMipmapLevelClamp: f32, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuTexRefSetMaxAnisotropy( + hTexRef: CUtexref, + maxAniso: ::std::os::raw::c_uint, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuTexRefSetBorderColor(hTexRef: CUtexref, pBorderColor: *mut f32) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuTexRefSetFlags(hTexRef: CUtexref, Flags: ::std::os::raw::c_uint) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuTexRefGetAddress_v2(pdptr: *mut CUdeviceptr, hTexRef: CUtexref) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuTexRefGetArray(phArray: *mut CUarray, hTexRef: CUtexref) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuTexRefGetMipmappedArray( + phMipmappedArray: *mut CUmipmappedArray, + hTexRef: CUtexref, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuTexRefGetAddressMode( + pam: *mut CUaddress_mode, + hTexRef: CUtexref, + dim: ::std::os::raw::c_int, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuTexRefGetFilterMode(pfm: *mut CUfilter_mode, hTexRef: CUtexref) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuTexRefGetFormat( + pFormat: *mut CUarray_format, + pNumChannels: *mut ::std::os::raw::c_int, + hTexRef: CUtexref, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuTexRefGetMipmapFilterMode( + pfm: *mut CUfilter_mode, + hTexRef: CUtexref, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuTexRefGetMipmapLevelBias(pbias: *mut f32, hTexRef: CUtexref) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuTexRefGetMipmapLevelClamp( + pminMipmapLevelClamp: *mut f32, + pmaxMipmapLevelClamp: *mut f32, + hTexRef: CUtexref, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuTexRefGetMaxAnisotropy( + pmaxAniso: *mut ::std::os::raw::c_int, + hTexRef: CUtexref, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuTexRefGetBorderColor(pBorderColor: *mut f32, hTexRef: CUtexref) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuTexRefGetFlags( + pFlags: *mut ::std::os::raw::c_uint, + hTexRef: CUtexref, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuTexRefCreate(pTexRef: *mut CUtexref) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuTexRefDestroy(hTexRef: CUtexref) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuSurfRefSetArray( + hSurfRef: CUsurfref, + hArray: CUarray, + Flags: ::std::os::raw::c_uint, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuSurfRefGetArray(phArray: *mut CUarray, hSurfRef: CUsurfref) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuTexObjectCreate( + pTexObject: *mut CUtexObject, + pResDesc: *const CUDA_RESOURCE_DESC, + pTexDesc: *const CUDA_TEXTURE_DESC, + pResViewDesc: *const CUDA_RESOURCE_VIEW_DESC, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuTexObjectDestroy(texObject: CUtexObject) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuTexObjectGetResourceDesc( + pResDesc: *mut CUDA_RESOURCE_DESC, + texObject: CUtexObject, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuTexObjectGetTextureDesc( + pTexDesc: *mut CUDA_TEXTURE_DESC, + texObject: CUtexObject, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuTexObjectGetResourceViewDesc( + pResViewDesc: *mut CUDA_RESOURCE_VIEW_DESC, + texObject: CUtexObject, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuSurfObjectCreate( + pSurfObject: *mut CUsurfObject, + pResDesc: *const CUDA_RESOURCE_DESC, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuSurfObjectDestroy(surfObject: CUsurfObject) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuSurfObjectGetResourceDesc( + pResDesc: *mut CUDA_RESOURCE_DESC, + surfObject: CUsurfObject, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuDeviceCanAccessPeer( + canAccessPeer: *mut ::std::os::raw::c_int, + dev: CUdevice, + peerDev: CUdevice, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuCtxEnablePeerAccess( + peerContext: CUcontext, + Flags: ::std::os::raw::c_uint, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuCtxDisablePeerAccess(peerContext: CUcontext) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuDeviceGetP2PAttribute( + value: *mut ::std::os::raw::c_int, + attrib: CUdevice_P2PAttribute, + srcDevice: CUdevice, + dstDevice: CUdevice, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuGraphicsUnregisterResource(resource: CUgraphicsResource) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuGraphicsSubResourceGetMappedArray( + pArray: *mut CUarray, + resource: CUgraphicsResource, + arrayIndex: ::std::os::raw::c_uint, + mipLevel: ::std::os::raw::c_uint, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuGraphicsResourceGetMappedMipmappedArray( + pMipmappedArray: *mut CUmipmappedArray, + resource: CUgraphicsResource, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuGraphicsResourceGetMappedPointer_v2( + pDevPtr: *mut CUdeviceptr, + pSize: *mut usize, + resource: CUgraphicsResource, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuGraphicsResourceSetMapFlags_v2( + resource: CUgraphicsResource, + flags: ::std::os::raw::c_uint, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuGraphicsMapResources( + count: ::std::os::raw::c_uint, + resources: *mut CUgraphicsResource, + hStream: CUstream, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuGraphicsUnmapResources( + count: ::std::os::raw::c_uint, + resources: *mut CUgraphicsResource, + hStream: CUstream, +) -> CUresult { + r#impl::unimplemented() +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuGetExportTable( + ppExportTable: *mut *const ::std::os::raw::c_void, + pExportTableId: *const CUuuid, +) -> CUresult { + r#impl::export_table::get(ppExportTable, pExportTableId) +} + +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuFuncGetModule(hmod: *mut CUmodule, hfunc: CUfunction) -> CUresult { + r#impl::unimplemented() +} diff --git a/zluda/src/cuda_impl/mod.rs b/zluda/src/cuda_impl/mod.rs new file mode 100644 index 0000000..63b9049 --- /dev/null +++ b/zluda/src/cuda_impl/mod.rs @@ -0,0 +1 @@ +pub mod rt;
\ No newline at end of file diff --git a/zluda/src/cuda_impl/rt.rs b/zluda/src/cuda_impl/rt.rs new file mode 100644 index 0000000..3931bc3 --- /dev/null +++ b/zluda/src/cuda_impl/rt.rs @@ -0,0 +1,2 @@ +pub enum ContextState {} +pub enum ContextStateManager {} diff --git a/zluda/src/impl/context.rs b/zluda/src/impl/context.rs new file mode 100644 index 0000000..873fc47 --- /dev/null +++ b/zluda/src/impl/context.rs @@ -0,0 +1,359 @@ +use super::{device, stream::Stream, stream::StreamData, HasLivenessCookie, LiveCheck}; +use super::{CUresult, GlobalState}; +use crate::{cuda::CUcontext, cuda_impl}; +use l0::sys::ze_result_t; +use std::{cell::RefCell, num::NonZeroU32, os::raw::c_uint, ptr, sync::atomic::AtomicU32}; +use std::{ + collections::HashSet, + mem::{self}, +}; + +thread_local! { + pub static CONTEXT_STACK: RefCell<Vec<*mut Context>> = RefCell::new(Vec::new()); +} + +pub type Context = LiveCheck<ContextData>; + +impl HasLivenessCookie for ContextData { + #[cfg(target_pointer_width = "64")] + const COOKIE: usize = 0x5f0119560b643ffb; + + #[cfg(target_pointer_width = "32")] + const COOKIE: usize = 0x0b643ffb; + + const LIVENESS_FAIL: CUresult = CUresult::CUDA_ERROR_INVALID_CONTEXT; + + fn try_drop(&mut self) -> Result<(), CUresult> { + for stream in self.streams.iter() { + let stream = unsafe { &mut **stream }; + stream.context = ptr::null_mut(); + Stream::destroy_impl(unsafe { Stream::ptr_from_inner(stream) })?; + } + Ok(()) + } +} + +enum ContextRefCount { + Primary, + NonPrimary(NonZeroU32), +} + +impl ContextRefCount { + fn new(is_primary: bool) -> Self { + if is_primary { + ContextRefCount::Primary + } else { + ContextRefCount::NonPrimary(unsafe { NonZeroU32::new_unchecked(1) }) + } + } + + fn incr(&mut self) -> Result<(), CUresult> { + match self { + ContextRefCount::Primary => Ok(()), + ContextRefCount::NonPrimary(c) => { + let (new_count, overflow) = c.get().overflowing_add(1); + if overflow { + Err(CUresult::CUDA_ERROR_INVALID_VALUE) + } else { + *c = unsafe { NonZeroU32::new_unchecked(new_count) }; + Ok(()) + } + } + } + } + + #[must_use] + fn decr(&mut self) -> bool { + match self { + ContextRefCount::Primary => false, + ContextRefCount::NonPrimary(c) => { + if c.get() == 1 { + return true; + } + *c = unsafe { NonZeroU32::new_unchecked(c.get() - 1) }; + false + } + } + } +} + +pub struct ContextData { + pub flags: AtomicU32, + // This pointer is null only for a moment when constructing primary context + pub device: *mut device::Device, + ref_count: ContextRefCount, + pub default_stream: StreamData, + pub streams: HashSet<*mut StreamData>, + // All the fields below are here to support internal CUDA driver API + pub cuda_manager: *mut cuda_impl::rt::ContextStateManager, + pub cuda_state: *mut cuda_impl::rt::ContextState, + pub cuda_dtor_cb: Option< + extern "C" fn( + CUcontext, + *mut cuda_impl::rt::ContextStateManager, + *mut cuda_impl::rt::ContextState, + ), + >, +} + +impl ContextData { + pub fn new( + l0_ctx: &mut l0::Context, + l0_dev: &l0::Device, + flags: c_uint, + is_primary: bool, + dev: *mut device::Device, + ) -> Result<Self, CUresult> { + let default_stream = StreamData::new_unitialized(l0_ctx, l0_dev)?; + Ok(ContextData { + flags: AtomicU32::new(flags), + device: dev, + ref_count: ContextRefCount::new(is_primary), + default_stream, + streams: HashSet::new(), + cuda_manager: ptr::null_mut(), + cuda_state: ptr::null_mut(), + cuda_dtor_cb: None, + }) + } +} + +impl Context { + pub fn late_init(&mut self) { + let ctx_data = self.as_option_mut().unwrap(); + ctx_data.default_stream.context = ctx_data as *mut _; + } +} + +pub fn create_v2( + pctx: *mut *mut Context, + flags: u32, + dev_idx: device::Index, +) -> Result<(), CUresult> { + if pctx == ptr::null_mut() { + return Err(CUresult::CUDA_ERROR_INVALID_VALUE); + } + let mut ctx_box = GlobalState::lock_device(dev_idx, |dev| { + let dev_ptr = dev as *mut _; + let mut ctx_box = Box::new(LiveCheck::new(ContextData::new( + &mut dev.l0_context, + &dev.base, + flags, + false, + dev_ptr as *mut _, + )?)); + ctx_box.late_init(); + Ok::<_, CUresult>(ctx_box) + })??; + let ctx_ref = ctx_box.as_mut() as *mut Context; + unsafe { *pctx = ctx_ref }; + mem::forget(ctx_box); + CONTEXT_STACK.with(|stack| stack.borrow_mut().push(ctx_ref)); + Ok(()) +} + +pub fn destroy_v2(ctx: *mut Context) -> Result<(), CUresult> { + if ctx == ptr::null_mut() { + return Err(CUresult::CUDA_ERROR_INVALID_VALUE); + } + CONTEXT_STACK.with(|stack| { + let mut stack = stack.borrow_mut(); + let should_pop = match stack.last() { + Some(active_ctx) => *active_ctx == (ctx as *mut _), + None => false, + }; + if should_pop { + stack.pop(); + } + }); + GlobalState::lock(|_| Context::destroy_impl(ctx))? +} + +pub fn pop_current_v2(pctx: *mut *mut Context) -> CUresult { + if pctx == ptr::null_mut() { + return CUresult::CUDA_ERROR_INVALID_VALUE; + } + let mut ctx = CONTEXT_STACK.with(|stack| stack.borrow_mut().pop()); + let ctx_ptr = match &mut ctx { + Some(ctx) => *ctx as *mut _, + None => return CUresult::CUDA_ERROR_INVALID_CONTEXT, + }; + unsafe { *pctx = ctx_ptr }; + CUresult::CUDA_SUCCESS +} + +pub fn get_current(pctx: *mut *mut Context) -> l0::Result<()> { + if pctx == ptr::null_mut() { + return Err(ze_result_t::ZE_RESULT_ERROR_INVALID_ARGUMENT); + } + let ctx = CONTEXT_STACK.with(|stack| match stack.borrow().last() { + Some(ctx) => *ctx as *mut _, + None => ptr::null_mut(), + }); + unsafe { *pctx = ctx }; + Ok(()) +} + +pub fn set_current(ctx: *mut Context) -> CUresult { + if ctx == ptr::null_mut() { + CONTEXT_STACK.with(|stack| stack.borrow_mut().pop()); + CUresult::CUDA_SUCCESS + } else { + CONTEXT_STACK.with(|stack| stack.borrow_mut().push(ctx)); + CUresult::CUDA_SUCCESS + } +} + +pub fn get_api_version(ctx: *mut Context, version: *mut u32) -> Result<(), CUresult> { + if ctx == ptr::null_mut() { + return Err(CUresult::CUDA_ERROR_INVALID_VALUE); + } + GlobalState::lock(|_| { + unsafe { &*ctx }.as_result()?; + Ok::<_, CUresult>(()) + })??; + //TODO: query device for properties roughly matching CUDA API version + unsafe { *version = 1100 }; + Ok(()) +} + +pub fn get_device(dev: *mut device::Index) -> Result<(), CUresult> { + let dev_idx = GlobalState::lock_current_context(|ctx| unsafe { &*ctx.device }.index)?; + unsafe { *dev = dev_idx }; + Ok(()) +} + +pub fn attach(pctx: *mut *mut Context, _flags: c_uint) -> Result<(), CUresult> { + if pctx == ptr::null_mut() { + return Err(CUresult::CUDA_ERROR_INVALID_VALUE); + } + let ctx = GlobalState::lock_current_context_unchecked(|unchecked_ctx| { + let ctx = unchecked_ctx.as_result_mut()?; + ctx.ref_count.incr()?; + Ok::<_, CUresult>(unchecked_ctx as *mut _) + })??; + unsafe { *pctx = ctx }; + Ok(()) +} + +pub fn detach(pctx: *mut Context) -> Result<(), CUresult> { + if pctx == ptr::null_mut() { + return Err(CUresult::CUDA_ERROR_INVALID_VALUE); + } + GlobalState::lock_current_context_unchecked(|unchecked_ctx| { + let ctx = unchecked_ctx.as_result_mut()?; + if ctx.ref_count.decr() { + Context::destroy_impl(unchecked_ctx)?; + } + Ok::<_, CUresult>(()) + })? +} + +pub(crate) fn synchronize() -> CUresult { + // TODO: change the implementation once we do async stream operations + CUresult::CUDA_SUCCESS +} + +#[cfg(test)] +mod test { + use super::super::test::CudaDriverFns; + use super::super::CUresult; + use std::{ffi::c_void, ptr}; + + cuda_driver_test!(destroy_leaves_zombie_context); + + fn destroy_leaves_zombie_context<T: CudaDriverFns>() { + assert_eq!(T::cuInit(0), CUresult::CUDA_SUCCESS); + let mut ctx1 = ptr::null_mut(); + let mut ctx2 = ptr::null_mut(); + let mut ctx3 = ptr::null_mut(); + assert_eq!(T::cuCtxCreate_v2(&mut ctx1, 0, 0), CUresult::CUDA_SUCCESS); + assert_eq!(T::cuCtxCreate_v2(&mut ctx2, 0, 0), CUresult::CUDA_SUCCESS); + assert_eq!(T::cuCtxCreate_v2(&mut ctx3, 0, 0), CUresult::CUDA_SUCCESS); + assert_eq!(T::cuCtxDestroy_v2(ctx2), CUresult::CUDA_SUCCESS); + let mut popped_ctx1 = ptr::null_mut(); + assert_eq!( + T::cuCtxPopCurrent_v2(&mut popped_ctx1), + CUresult::CUDA_SUCCESS + ); + assert_eq!(popped_ctx1, ctx3); + let mut popped_ctx2 = ptr::null_mut(); + assert_eq!( + T::cuCtxPopCurrent_v2(&mut popped_ctx2), + CUresult::CUDA_SUCCESS + ); + assert_eq!(popped_ctx2, ctx2); + let mut popped_ctx3 = ptr::null_mut(); + assert_eq!( + T::cuCtxPopCurrent_v2(&mut popped_ctx3), + CUresult::CUDA_SUCCESS + ); + assert_eq!(popped_ctx3, ctx1); + let mut temp = 0; + assert_eq!( + T::cuCtxGetApiVersion(ctx2, &mut temp), + CUresult::CUDA_ERROR_INVALID_CONTEXT + ); + assert_eq!( + T::cuCtxPopCurrent_v2(&mut ptr::null_mut()), + CUresult::CUDA_ERROR_INVALID_CONTEXT + ); + } + + cuda_driver_test!(empty_pop_fails); + + fn empty_pop_fails<T: CudaDriverFns>() { + assert_eq!(T::cuInit(0), CUresult::CUDA_SUCCESS); + let mut ctx = ptr::null_mut(); + assert_eq!( + T::cuCtxPopCurrent_v2(&mut ctx), + CUresult::CUDA_ERROR_INVALID_CONTEXT + ); + } + + cuda_driver_test!(destroy_pops_top_of_stack); + + fn destroy_pops_top_of_stack<T: CudaDriverFns>() { + assert_eq!(T::cuInit(0), CUresult::CUDA_SUCCESS); + let mut ctx1 = ptr::null_mut(); + let mut ctx2 = ptr::null_mut(); + assert_eq!(T::cuCtxCreate_v2(&mut ctx1, 0, 0), CUresult::CUDA_SUCCESS); + assert_eq!(T::cuCtxCreate_v2(&mut ctx2, 0, 0), CUresult::CUDA_SUCCESS); + assert_eq!(T::cuCtxDestroy_v2(ctx2), CUresult::CUDA_SUCCESS); + let mut popped_ctx1 = ptr::null_mut(); + assert_eq!( + T::cuCtxPopCurrent_v2(&mut popped_ctx1), + CUresult::CUDA_SUCCESS + ); + assert_eq!(popped_ctx1, ctx1); + let mut popped_ctx2 = ptr::null_mut(); + assert_eq!( + T::cuCtxPopCurrent_v2(&mut popped_ctx2), + CUresult::CUDA_ERROR_INVALID_CONTEXT + ); + } + + cuda_driver_test!(double_destroy_fails); + + fn double_destroy_fails<T: CudaDriverFns>() { + assert_eq!(T::cuInit(0), CUresult::CUDA_SUCCESS); + let mut ctx = ptr::null_mut(); + assert_eq!(T::cuCtxCreate_v2(&mut ctx, 0, 0), CUresult::CUDA_SUCCESS); + assert_eq!(T::cuCtxDestroy_v2(ctx), CUresult::CUDA_SUCCESS); + let destroy_result = T::cuCtxDestroy_v2(ctx); + // original CUDA impl returns randomly one or the other + assert!( + destroy_result == CUresult::CUDA_ERROR_INVALID_CONTEXT + || destroy_result == CUresult::CUDA_ERROR_CONTEXT_IS_DESTROYED + ); + } + + cuda_driver_test!(no_current_on_init); + + fn no_current_on_init<T: CudaDriverFns>() { + assert_eq!(T::cuInit(0), CUresult::CUDA_SUCCESS); + let mut ctx = 1 as *mut c_void; + assert_eq!(T::cuCtxGetCurrent(&mut ctx), CUresult::CUDA_SUCCESS); + assert_eq!(ctx, ptr::null_mut()); + } +} diff --git a/zluda/src/impl/device.rs b/zluda/src/impl/device.rs new file mode 100644 index 0000000..23b75f0 --- /dev/null +++ b/zluda/src/impl/device.rs @@ -0,0 +1,397 @@ +use super::{context, CUresult, GlobalState}; +use crate::cuda; +use cuda::{CUdevice_attribute, CUuuid_st}; +use std::{ + cmp, mem, + os::raw::{c_char, c_int}, + ptr, + sync::atomic::{AtomicU32, Ordering}, +}; + +const PROJECT_URL_SUFFIX: &'static str = " [github.com/vosen/ZLUDA]"; + +#[repr(transparent)] +#[derive(Clone, Copy, Eq, PartialEq, Hash)] +pub struct Index(pub c_int); + +pub struct Device { + pub index: Index, + pub base: l0::Device, + pub default_queue: l0::CommandQueue, + pub l0_context: l0::Context, + pub primary_context: context::Context, + properties: Option<Box<l0::sys::ze_device_properties_t>>, + image_properties: Option<Box<l0::sys::ze_device_image_properties_t>>, + memory_properties: Option<Vec<l0::sys::ze_device_memory_properties_t>>, + compute_properties: Option<Box<l0::sys::ze_device_compute_properties_t>>, +} + +unsafe impl Send for Device {} + +impl Device { + // Unsafe because it does not fully initalize primary_context + unsafe fn new(drv: &l0::Driver, l0_dev: l0::Device, idx: usize) -> Result<Self, CUresult> { + let mut ctx = l0::Context::new(drv)?; + let queue = l0::CommandQueue::new(&mut ctx, &l0_dev)?; + let primary_context = context::Context::new(context::ContextData::new( + &mut ctx, + &l0_dev, + 0, + true, + ptr::null_mut(), + )?); + Ok(Self { + index: Index(idx as c_int), + base: l0_dev, + default_queue: queue, + l0_context: ctx, + primary_context: primary_context, + properties: None, + image_properties: None, + memory_properties: None, + compute_properties: None, + }) + } + + fn get_properties<'a>(&'a mut self) -> l0::Result<&'a l0::sys::ze_device_properties_t> { + if let Some(ref prop) = self.properties { + return Ok(prop); + } + match self.base.get_properties() { + Ok(prop) => Ok(self.properties.get_or_insert(prop)), + Err(e) => Err(e), + } + } + + fn get_image_properties(&mut self) -> l0::Result<&l0::sys::ze_device_image_properties_t> { + if let Some(ref prop) = self.image_properties { + return Ok(prop); + } + match self.base.get_image_properties() { + Ok(prop) => Ok(self.image_properties.get_or_insert(prop)), + Err(e) => Err(e), + } + } + + fn get_memory_properties(&mut self) -> l0::Result<&[l0::sys::ze_device_memory_properties_t]> { + if let Some(ref prop) = self.memory_properties { + return Ok(prop); + } + match self.base.get_memory_properties() { + Ok(prop) => Ok(self.memory_properties.get_or_insert(prop)), + Err(e) => Err(e), + } + } + + fn get_compute_properties(&mut self) -> l0::Result<&l0::sys::ze_device_compute_properties_t> { + if let Some(ref prop) = self.compute_properties { + return Ok(prop); + } + match self.base.get_compute_properties() { + Ok(prop) => Ok(self.compute_properties.get_or_insert(prop)), + Err(e) => Err(e), + } + } + + pub fn late_init(&mut self) { + self.primary_context.as_option_mut().unwrap().device = self as *mut _; + } + + fn get_max_simd(&mut self) -> l0::Result<u32> { + let props = self.get_compute_properties()?; + Ok(*props.subGroupSizes[0..props.numSubGroupSizes as usize] + .iter() + .max() + .unwrap()) + } +} + +pub fn init(driver: &l0::Driver) -> Result<Vec<Device>, CUresult> { + let ze_devices = driver.devices()?; + let mut devices = ze_devices + .into_iter() + .enumerate() + .map(|(idx, d)| unsafe { Device::new(driver, d, idx) }) + .collect::<Result<Vec<_>, _>>()?; + for dev in devices.iter_mut() { + dev.late_init(); + dev.primary_context.late_init(); + } + Ok(devices) +} + +pub fn get_count(count: *mut c_int) -> Result<(), CUresult> { + let len = GlobalState::lock(|state| state.devices.len())?; + unsafe { *count = len as c_int }; + Ok(()) +} + +pub fn get(device: *mut Index, ordinal: c_int) -> Result<(), CUresult> { + if device == ptr::null_mut() || ordinal < 0 { + return Err(CUresult::CUDA_ERROR_INVALID_VALUE); + } + let len = GlobalState::lock(|state| state.devices.len())?; + if ordinal < (len as i32) { + unsafe { *device = Index(ordinal) }; + Ok(()) + } else { + Err(CUresult::CUDA_ERROR_INVALID_VALUE) + } +} + +pub fn get_name(name: *mut c_char, len: i32, dev_idx: Index) -> Result<(), CUresult> { + if name == ptr::null_mut() || len < 0 { + return Err(CUresult::CUDA_ERROR_INVALID_VALUE); + } + let name_ptr = GlobalState::lock_device(dev_idx, |dev| { + let props = dev.get_properties()?; + Ok::<_, l0::sys::ze_result_t>(props.name.as_ptr()) + })??; + let name_len = (0..256) + .position(|i| unsafe { *name_ptr.add(i) } == 0) + .unwrap_or(256); + let mut dst_null_pos = cmp::min((len - 1) as usize, name_len); + unsafe { std::ptr::copy_nonoverlapping(name_ptr, name, dst_null_pos) }; + if name_len + PROJECT_URL_SUFFIX.len() < (len as usize) { + unsafe { + std::ptr::copy_nonoverlapping( + PROJECT_URL_SUFFIX.as_ptr(), + name.add(name_len) as *mut _, + PROJECT_URL_SUFFIX.len(), + ) + }; + dst_null_pos += PROJECT_URL_SUFFIX.len(); + } + unsafe { *(name.add(dst_null_pos)) = 0 }; + Ok(()) +} + +pub fn total_mem_v2(bytes: *mut usize, dev_idx: Index) -> Result<(), CUresult> { + if bytes == ptr::null_mut() { + return Err(CUresult::CUDA_ERROR_INVALID_VALUE); + } + let mem_props = GlobalState::lock_device(dev_idx, |dev| { + let mem_props = dev.get_memory_properties()?; + Ok::<_, l0::sys::ze_result_t>(mem_props) + })??; + let max_mem = mem_props + .iter() + .map(|p| p.totalSize) + .max() + .ok_or(CUresult::CUDA_ERROR_ILLEGAL_STATE)?; + unsafe { *bytes = max_mem as usize }; + Ok(()) +} + +impl CUdevice_attribute { + fn get_static_value(self) -> Option<i32> { + match self { + CUdevice_attribute::CU_DEVICE_ATTRIBUTE_GPU_OVERLAP => Some(1), + CUdevice_attribute::CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT => Some(1), + // TODO: fix this for DG1 + CUdevice_attribute::CU_DEVICE_ATTRIBUTE_INTEGRATED => Some(1), + // TODO: go back to this once we have more funcitonality implemented + CUdevice_attribute::CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR => Some(8), + CUdevice_attribute::CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR => Some(0), + CUdevice_attribute::CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY => Some(1), + _ => None, + } + } +} + +pub fn get_attribute( + pi: *mut i32, + attrib: CUdevice_attribute, + dev_idx: Index, +) -> Result<(), CUresult> { + if pi == ptr::null_mut() { + return Err(CUresult::CUDA_ERROR_INVALID_VALUE); + } + if let Some(value) = attrib.get_static_value() { + unsafe { *pi = value }; + return Ok(()); + } + let value = match attrib { + CUdevice_attribute::CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT => { + GlobalState::lock_device(dev_idx, |dev| { + let props = dev.get_properties()?; + Ok::<_, l0::sys::ze_result_t>(props.maxHardwareContexts as i32) + })?? + } + // Streaming Multiprocessor corresponds roughly to a sub-slice (thread group can't cross either) + CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT => { + GlobalState::lock_device(dev_idx, |dev| { + let props = dev.get_properties()?; + Ok::<_, l0::sys::ze_result_t>((props.numSlices * props.numSubslicesPerSlice) as i32) + })?? + } + // I honestly don't know how to answer this query + CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR => { + GlobalState::lock_device(dev_idx, |dev| { + let max_simd = dev.get_max_simd()?; + let props = dev.get_properties()?; + Ok::<_, l0::sys::ze_result_t>( + (props.numEUsPerSubslice * props.numThreadsPerEU * max_simd) as i32, + ) + })?? + } + CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK => { + GlobalState::lock_device(dev_idx, |dev| { + let props = dev.get_compute_properties()?; + Ok::<_, l0::sys::ze_result_t>(cmp::min( + i32::max_value() as u32, + props.maxTotalGroupSize, + ) as i32) + })?? + } + CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH => { + GlobalState::lock_device(dev_idx, |dev| { + let props = dev.get_image_properties()?; + Ok::<_, l0::sys::ze_result_t>(cmp::min( + props.maxImageDims1D, + c_int::max_value() as u32, + ) as c_int) + })?? + } + CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X => { + GlobalState::lock_device(dev_idx, |dev| { + let props = dev.get_compute_properties()?; + Ok::<_, l0::sys::ze_result_t>(cmp::min( + i32::max_value() as u32, + props.maxGroupCountX, + ) as i32) + })?? + } + CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y => { + GlobalState::lock_device(dev_idx, |dev| { + let props = dev.get_compute_properties()?; + Ok::<_, l0::sys::ze_result_t>(cmp::min( + i32::max_value() as u32, + props.maxGroupCountY, + ) as i32) + })?? + } + CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z => { + GlobalState::lock_device(dev_idx, |dev| { + let props = dev.get_compute_properties()?; + Ok::<_, l0::sys::ze_result_t>(cmp::min( + i32::max_value() as u32, + props.maxGroupCountZ, + ) as i32) + })?? + } + CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X => { + GlobalState::lock_device(dev_idx, |dev| { + let props = dev.get_compute_properties()?; + Ok::<_, l0::sys::ze_result_t>( + cmp::min(i32::max_value() as u32, props.maxGroupSizeX) as i32, + ) + })?? + } + CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y => { + GlobalState::lock_device(dev_idx, |dev| { + let props = dev.get_compute_properties()?; + Ok::<_, l0::sys::ze_result_t>( + cmp::min(i32::max_value() as u32, props.maxGroupSizeY) as i32, + ) + })?? + } + CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z => { + GlobalState::lock_device(dev_idx, |dev| { + let props = dev.get_compute_properties()?; + Ok::<_, l0::sys::ze_result_t>( + cmp::min(i32::max_value() as u32, props.maxGroupSizeZ) as i32, + ) + })?? + } + CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK => { + GlobalState::lock_device(dev_idx, |dev| { + let props = dev.get_compute_properties()?; + Ok::<_, l0::sys::ze_result_t>(props.maxSharedLocalMemory as i32) + })?? + } + CUdevice_attribute::CU_DEVICE_ATTRIBUTE_WARP_SIZE => { + GlobalState::lock_device(dev_idx, |dev| Ok::<_, CUresult>(dev.get_max_simd()? as i32))?? + } + _ => { + // TODO: support more attributes for CUDA runtime + /* + return Err(l0::Error( + l0::sys::ze_result_t::ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, + )) + */ + return Ok(()); + } + }; + unsafe { *pi = value }; + Ok(()) +} + +pub fn get_uuid(uuid: *mut CUuuid_st, dev_idx: Index) -> Result<(), CUresult> { + let ze_uuid = GlobalState::lock_device(dev_idx, |dev| { + let props = dev.get_properties()?; + Ok::<_, l0::sys::ze_result_t>(props.uuid) + })??; + unsafe { + *uuid = CUuuid_st { + bytes: mem::transmute(ze_uuid.id), + } + }; + Ok(()) +} + +pub fn primary_ctx_get_state( + dev_idx: Index, + flags: *mut u32, + active: *mut i32, +) -> Result<(), CUresult> { + let (is_active, flags_value) = GlobalState::lock_device(dev_idx, |dev| { + // This is safe because primary context can't be dropped + let ctx_ptr = &mut dev.primary_context as *mut _; + let flags_ptr = + (&unsafe { dev.primary_context.as_ref_unchecked() }.flags) as *const AtomicU32; + let is_active = context::CONTEXT_STACK + .with(|stack| stack.borrow().last().map(|x| *x)) + .map(|current| current == ctx_ptr) + .unwrap_or(false); + let flags_value = unsafe { &*flags_ptr }.load(Ordering::Relaxed); + Ok::<_, l0::sys::ze_result_t>((is_active, flags_value)) + })??; + unsafe { *active = if is_active { 1 } else { 0 } }; + unsafe { *flags = flags_value }; + Ok(()) +} + +pub fn primary_ctx_retain( + pctx: *mut *mut context::Context, + dev_idx: Index, +) -> Result<(), CUresult> { + let ctx_ptr = GlobalState::lock_device(dev_idx, |dev| &mut dev.primary_context as *mut _)?; + unsafe { *pctx = ctx_ptr }; + Ok(()) +} + +// TODO: allow for retain/reset/release of primary context +pub(crate) fn primary_ctx_release_v2(_dev_idx: Index) -> CUresult { + CUresult::CUDA_SUCCESS +} + +#[cfg(test)] +mod test { + use super::super::test::CudaDriverFns; + use super::super::CUresult; + + cuda_driver_test!(primary_ctx_default_inactive); + + fn primary_ctx_default_inactive<T: CudaDriverFns>() { + assert_eq!(T::cuInit(0), CUresult::CUDA_SUCCESS); + let mut flags = u32::max_value(); + let mut active = i32::max_value(); + assert_eq!( + T::cuDevicePrimaryCtxGetState(0, &mut flags, &mut active), + CUresult::CUDA_SUCCESS + ); + assert_eq!(flags, 0); + assert_eq!(active, 0); + } +} diff --git a/zluda/src/impl/export_table.rs b/zluda/src/impl/export_table.rs new file mode 100644 index 0000000..87d7f40 --- /dev/null +++ b/zluda/src/impl/export_table.rs @@ -0,0 +1,372 @@ +use crate::cuda::CUresult;
+use crate::{
+ cuda::{CUcontext, CUdevice, CUmodule, CUuuid},
+ cuda_impl,
+};
+
+use super::{context, context::ContextData, device, module, Decuda, Encuda, GlobalState};
+use std::mem;
+use std::os::raw::{c_uint, c_ulong, c_ushort};
+use std::{
+ ffi::{c_void, CStr},
+ ptr, slice,
+};
+
+pub fn get(table: *mut *const std::os::raw::c_void, id: *const CUuuid) -> CUresult {
+ if table == ptr::null_mut() || id == ptr::null_mut() {
+ return CUresult::CUDA_ERROR_INVALID_VALUE;
+ }
+ let id = unsafe { *id };
+ match id {
+ TOOLS_RUNTIME_CALLBACK_HOOKS_GUID => {
+ unsafe { *table = TOOLS_RUNTIME_CALLBACK_HOOKS_VTABLE.as_ptr() as *const _ };
+ CUresult::CUDA_SUCCESS
+ }
+ CUDART_INTERFACE_GUID => {
+ unsafe { *table = CUDART_INTERFACE_VTABLE.as_ptr() as *const _ };
+ CUresult::CUDA_SUCCESS
+ }
+ TOOLS_TLS_GUID => {
+ unsafe { *table = 1 as _ };
+ CUresult::CUDA_SUCCESS
+ }
+ CONTEXT_LOCAL_STORAGE_INTERFACE_V0301_GUID => {
+ unsafe { *table = CONTEXT_LOCAL_STORAGE_INTERFACE_V0301_VTABLE.as_ptr() as *const _ };
+ CUresult::CUDA_SUCCESS
+ }
+ _ => CUresult::CUDA_ERROR_NOT_SUPPORTED,
+ }
+}
+
+const TOOLS_RUNTIME_CALLBACK_HOOKS_GUID: CUuuid = CUuuid {
+ bytes: [
+ 0xa0, 0x94, 0x79, 0x8c, 0x2e, 0x74, 0x2e, 0x74, 0x93, 0xf2, 0x08, 0x00, 0x20, 0x0c, 0x0a,
+ 0x66,
+ ],
+};
+#[repr(C)]
+union VTableEntry {
+ ptr: *const (),
+ length: usize,
+}
+unsafe impl Sync for VTableEntry {}
+const TOOLS_RUNTIME_CALLBACK_HOOKS_LENGTH: usize = 7;
+static TOOLS_RUNTIME_CALLBACK_HOOKS_VTABLE: [VTableEntry; TOOLS_RUNTIME_CALLBACK_HOOKS_LENGTH] = [
+ VTableEntry {
+ length: mem::size_of::<[VTableEntry; TOOLS_RUNTIME_CALLBACK_HOOKS_LENGTH]>(),
+ },
+ VTableEntry { ptr: ptr::null() },
+ VTableEntry {
+ ptr: runtime_callback_hooks_fn1 as *const (),
+ },
+ VTableEntry { ptr: ptr::null() },
+ VTableEntry { ptr: ptr::null() },
+ VTableEntry { ptr: ptr::null() },
+ VTableEntry {
+ ptr: runtime_callback_hooks_fn5 as *const (),
+ },
+];
+static mut TOOLS_RUNTIME_CALLBACK_HOOKS_FN1_SPACE: [usize; 512] = [0; 512];
+
+unsafe extern "C" fn runtime_callback_hooks_fn1(ptr: *mut *mut usize, size: *mut usize) {
+ *ptr = TOOLS_RUNTIME_CALLBACK_HOOKS_FN1_SPACE.as_mut_ptr();
+ *size = TOOLS_RUNTIME_CALLBACK_HOOKS_FN1_SPACE.len();
+}
+
+static mut TOOLS_RUNTIME_CALLBACK_HOOKS_FN5_SPACE: [u8; 2] = [0; 2];
+
+unsafe extern "C" fn runtime_callback_hooks_fn5(ptr: *mut *mut u8, size: *mut usize) -> *mut u8 {
+ *ptr = TOOLS_RUNTIME_CALLBACK_HOOKS_FN5_SPACE.as_mut_ptr();
+ *size = TOOLS_RUNTIME_CALLBACK_HOOKS_FN5_SPACE.len();
+ return TOOLS_RUNTIME_CALLBACK_HOOKS_FN5_SPACE.as_mut_ptr();
+}
+
+const CUDART_INTERFACE_GUID: CUuuid = CUuuid {
+ bytes: [
+ 0x6b, 0xd5, 0xfb, 0x6c, 0x5b, 0xf4, 0xe7, 0x4a, 0x89, 0x87, 0xd9, 0x39, 0x12, 0xfd, 0x9d,
+ 0xf9,
+ ],
+};
+
+const CUDART_INTERFACE_LENGTH: usize = 10;
+static CUDART_INTERFACE_VTABLE: [VTableEntry; CUDART_INTERFACE_LENGTH] = [
+ VTableEntry {
+ length: mem::size_of::<[VTableEntry; CUDART_INTERFACE_LENGTH]>(),
+ },
+ VTableEntry { ptr: ptr::null() },
+ VTableEntry {
+ ptr: cudart_interface_fn1 as *const (),
+ },
+ VTableEntry { ptr: ptr::null() },
+ VTableEntry { ptr: ptr::null() },
+ VTableEntry { ptr: ptr::null() },
+ VTableEntry {
+ ptr: get_module_from_cubin as *const (),
+ },
+ VTableEntry {
+ ptr: cudart_interface_fn6 as *const (),
+ },
+ VTableEntry { ptr: ptr::null() },
+ VTableEntry { ptr: ptr::null() },
+];
+
+unsafe extern "C" fn cudart_interface_fn1(pctx: *mut CUcontext, dev: CUdevice) -> CUresult {
+ cudart_interface_fn1_impl(pctx.decuda(), dev.decuda()).encuda()
+}
+
+fn cudart_interface_fn1_impl(
+ pctx: *mut *mut context::Context,
+ dev: device::Index,
+) -> Result<(), CUresult> {
+ let ctx_ptr = GlobalState::lock_device(dev, |d| &mut d.primary_context as *mut _)?;
+ unsafe { *pctx = ctx_ptr };
+ Ok(())
+}
+
+/*
+fat_cubin:
+typedef struct {
+ int magic;
+ int version;
+ const unsigned long long* data;
+ void *filename_or_fatbins; /* version 1: offline filename,
+ * version 2: array of prelinked fatbins */
+} __fatBinC_Wrapper_t;
+
+data start with this header:
+#define FATBIN_MAGIC 0xBA55ED50U
+#define OLD_STYLE_FATBIN_MAGIC 0x1EE55A01U
+#define FATBIN_VERSION 0x0001U
+
+struct fatbinary_ALIGN_(8) fatBinaryHeader
+{
+ unsigned int magic; // FATBIN_MAGIC
+ unsigned short version; // FATBIN_VERSION
+ unsigned short headerSize;
+ unsigned long long int fatSize; // size of the entire fat binary excluding this header
+};
+
+there's binary data after header
+
+*/
+
+const FATBINC_MAGIC: c_uint = 0x466243B1;
+const FATBINC_VERSION: c_uint = 0x1;
+
+#[repr(C)]
+struct FatbincWrapper {
+ magic: c_uint,
+ version: c_uint,
+ data: *const FatbinHeader,
+ filename_or_fatbins: *const c_void,
+}
+
+const FATBIN_MAGIC: c_uint = 0xBA55ED50;
+const FATBIN_VERSION: c_ushort = 0x01;
+
+#[repr(C, align(8))]
+struct FatbinHeader {
+ magic: c_uint,
+ version: c_ushort,
+ header_size: c_ushort,
+ files_size: c_ulong, // excluding frame header, size of all blocks framed by this frame
+}
+
+const FATBIN_FILE_HEADER_KIND_PTX: c_ushort = 0x01;
+const FATBIN_FILE_HEADER_VERSION_CURRENT: c_ushort = 0x101;
+
+// assembly file header is a bit different, but we don't care
+#[repr(C)]
+struct FatbinFileHeader {
+ kind: c_ushort,
+ version: c_ushort,
+ header_size: c_uint,
+ padded_payload_size: c_uint,
+ unknown0: c_uint, // check if it's written into separately
+ payload_size: c_uint,
+ unknown1: c_uint,
+ unknown2: c_uint,
+ sm_version: c_uint,
+ bit_width: c_uint,
+ unknown3: c_uint,
+ unknown4: c_ulong,
+ unknown5: c_ulong,
+ uncompressed_payload: c_ulong,
+}
+
+unsafe extern "C" fn get_module_from_cubin(
+ result: *mut CUmodule,
+ fatbinc_wrapper: *const FatbincWrapper,
+ ptr1: *mut c_void,
+ ptr2: *mut c_void,
+) -> CUresult {
+ // Not sure what those two parameters are actually used for,
+ // they are somehow involved in __cudaRegisterHostVar
+ if ptr1 != ptr::null_mut() || ptr2 != ptr::null_mut() {
+ return CUresult::CUDA_ERROR_NOT_SUPPORTED;
+ }
+ if result == ptr::null_mut()
+ || (*fatbinc_wrapper).magic != FATBINC_MAGIC
+ || (*fatbinc_wrapper).version != FATBINC_VERSION
+ {
+ return CUresult::CUDA_ERROR_INVALID_VALUE;
+ }
+ let result = result.decuda();
+ let fatbin_header = (*fatbinc_wrapper).data;
+ if (*fatbin_header).magic != FATBIN_MAGIC || (*fatbin_header).version != FATBIN_VERSION {
+ return CUresult::CUDA_ERROR_INVALID_VALUE;
+ }
+ let file = (fatbin_header as *const u8).add((*fatbin_header).header_size as usize);
+ let end = file.add((*fatbin_header).files_size as usize);
+ let mut ptx_files = get_ptx_files(file, end);
+ ptx_files.sort_unstable_by_key(|f| c_uint::max_value() - (**f).sm_version);
+ for file in ptx_files {
+ let slice = slice::from_raw_parts(
+ (file as *const u8).add((*file).header_size as usize),
+ (*file).payload_size as usize,
+ );
+ let kernel_text =
+ lz4::block::decompress(slice, Some((*file).uncompressed_payload as i32)).unwrap();
+ let kernel_text_string = match CStr::from_bytes_with_nul(&kernel_text) {
+ Ok(c_str) => match c_str.to_str() {
+ Ok(s) => s,
+ Err(_) => continue,
+ },
+ Err(_) => continue,
+ };
+ let module = module::SpirvModule::new(kernel_text_string);
+ match module {
+ Ok(module) => {
+ match module::load_data_impl(result, module) {
+ Ok(()) => {}
+ Err(err) => return err,
+ }
+ return CUresult::CUDA_SUCCESS;
+ }
+ Err(_) => continue,
+ }
+ }
+ CUresult::CUDA_ERROR_COMPAT_NOT_SUPPORTED_ON_DEVICE
+}
+
+unsafe fn get_ptx_files(file: *const u8, end: *const u8) -> Vec<*const FatbinFileHeader> {
+ let mut index = file;
+ let mut result = Vec::new();
+ while index < end {
+ let file = index as *const FatbinFileHeader;
+ if (*file).kind == FATBIN_FILE_HEADER_KIND_PTX
+ && (*file).version == FATBIN_FILE_HEADER_VERSION_CURRENT
+ {
+ result.push(file)
+ }
+ index = index.add((*file).header_size as usize + (*file).padded_payload_size as usize);
+ }
+ result
+}
+
+unsafe extern "C" fn cudart_interface_fn6(_: u64) {}
+
+const TOOLS_TLS_GUID: CUuuid = CUuuid {
+ bytes: [
+ 0x42, 0xd8, 0x5a, 0x81, 0x23, 0xf6, 0xcb, 0x47, 0x82, 0x98, 0xf6, 0xe7, 0x8a, 0x3a, 0xec,
+ 0xdc,
+ ],
+};
+
+const CONTEXT_LOCAL_STORAGE_INTERFACE_V0301_GUID: CUuuid = CUuuid {
+ bytes: [
+ 0xc6, 0x93, 0x33, 0x6e, 0x11, 0x21, 0xdf, 0x11, 0xa8, 0xc3, 0x68, 0xf3, 0x55, 0xd8, 0x95,
+ 0x93,
+ ],
+};
+
+// the table is much bigger and starts earlier
+static CONTEXT_LOCAL_STORAGE_INTERFACE_V0301_VTABLE: [VTableEntry; 4] = [
+ VTableEntry {
+ ptr: context_local_storage_ctor as *const (),
+ },
+ VTableEntry {
+ ptr: context_local_storage_dtor as *const (),
+ },
+ VTableEntry {
+ ptr: context_local_storage_get_state as *const (),
+ },
+ VTableEntry { ptr: ptr::null() },
+];
+
+// some kind of ctor
+unsafe extern "C" fn context_local_storage_ctor(
+ cu_ctx: CUcontext, // always zero
+ mgr: *mut cuda_impl::rt::ContextStateManager,
+ ctx_state: *mut cuda_impl::rt::ContextState,
+ // clsContextDestroyCallback, have to be called on cuDevicePrimaryCtxReset
+ dtor_cb: Option<
+ extern "C" fn(
+ CUcontext,
+ *mut cuda_impl::rt::ContextStateManager,
+ *mut cuda_impl::rt::ContextState,
+ ),
+ >,
+) -> CUresult {
+ context_local_storage_ctor_impl(cu_ctx.decuda(), mgr, ctx_state, dtor_cb).encuda()
+}
+
+fn context_local_storage_ctor_impl(
+ cu_ctx: *mut context::Context,
+ mgr: *mut cuda_impl::rt::ContextStateManager,
+ ctx_state: *mut cuda_impl::rt::ContextState,
+ dtor_cb: Option<
+ extern "C" fn(
+ CUcontext,
+ *mut cuda_impl::rt::ContextStateManager,
+ *mut cuda_impl::rt::ContextState,
+ ),
+ >,
+) -> Result<(), CUresult> {
+ lock_context(cu_ctx, |ctx: &mut ContextData| {
+ ctx.cuda_manager = mgr;
+ ctx.cuda_state = ctx_state;
+ ctx.cuda_dtor_cb = dtor_cb;
+ })
+}
+
+// some kind of dtor
+unsafe extern "C" fn context_local_storage_dtor(_: *mut usize, _: *mut ()) -> u32 {
+ 0
+}
+
+unsafe extern "C" fn context_local_storage_get_state(
+ ctx_state: *mut *mut cuda_impl::rt::ContextState,
+ cu_ctx: CUcontext,
+ state_mgr: *mut cuda_impl::rt::ContextStateManager,
+) -> CUresult {
+ context_local_storage_get_state_impl(ctx_state, cu_ctx.decuda(), state_mgr).encuda()
+}
+
+fn context_local_storage_get_state_impl(
+ ctx_state: *mut *mut cuda_impl::rt::ContextState,
+ cu_ctx: *mut context::Context,
+ _: *mut cuda_impl::rt::ContextStateManager,
+) -> Result<(), CUresult> {
+ let cuda_state = lock_context(cu_ctx, |ctx: &mut ContextData| ctx.cuda_state)?;
+ if cuda_state == ptr::null_mut() {
+ Err(CUresult::CUDA_ERROR_INVALID_VALUE)
+ } else {
+ unsafe { *ctx_state = cuda_state };
+ Ok(())
+ }
+}
+
+fn lock_context<T>(
+ cu_ctx: *mut context::Context,
+ fn_impl: impl FnOnce(&mut ContextData) -> T,
+) -> Result<T, CUresult> {
+ if cu_ctx == ptr::null_mut() {
+ GlobalState::lock_current_context(fn_impl)
+ } else {
+ GlobalState::lock(|_| {
+ let ctx = unsafe { &mut *cu_ctx }.as_result_mut()?;
+ Ok(fn_impl(ctx))
+ })?
+ }
+}
diff --git a/zluda/src/impl/function.rs b/zluda/src/impl/function.rs new file mode 100644 index 0000000..27bf9b6 --- /dev/null +++ b/zluda/src/impl/function.rs @@ -0,0 +1,112 @@ +use ::std::os::raw::{c_uint, c_void}; +use std::{hint, ptr}; + +use crate::cuda::CUfunction_attribute; + +use super::{stream::Stream, CUresult, GlobalState, HasLivenessCookie, LiveCheck}; + +pub type Function = LiveCheck<FunctionData>; + +impl HasLivenessCookie for FunctionData { + #[cfg(target_pointer_width = "64")] + const COOKIE: usize = 0x5e2ab14d5840678e; + + #[cfg(target_pointer_width = "32")] + const COOKIE: usize = 0x33e6a1e6; + + const LIVENESS_FAIL: CUresult = CUresult::CUDA_ERROR_INVALID_HANDLE; + + fn try_drop(&mut self) -> Result<(), CUresult> { + Ok(()) + } +} + +pub struct FunctionData { + pub base: l0::Kernel<'static>, + pub arg_size: Vec<usize>, + pub use_shared_mem: bool, + pub properties: Option<Box<l0::sys::ze_kernel_properties_t>>, +} + +impl FunctionData { + fn get_properties(&mut self) -> Result<&l0::sys::ze_kernel_properties_t, l0::sys::ze_result_t> { + if let None = self.properties { + self.properties = Some(self.base.get_properties()?) + } + match self.properties { + Some(ref props) => Ok(props.as_ref()), + None => unsafe { hint::unreachable_unchecked() }, + } + } +} + +pub fn launch_kernel( + f: *mut Function, + grid_dim_x: c_uint, + grid_dim_y: c_uint, + grid_dim_z: c_uint, + block_dim_x: c_uint, + block_dim_y: c_uint, + block_dim_z: c_uint, + shared_mem_bytes: c_uint, + hstream: *mut Stream, + kernel_params: *mut *mut c_void, + extra: *mut *mut c_void, +) -> Result<(), CUresult> { + if f == ptr::null_mut() { + return Err(CUresult::CUDA_ERROR_INVALID_VALUE); + } + if extra != ptr::null_mut() { + return Err(CUresult::CUDA_ERROR_NOT_SUPPORTED); + } + GlobalState::lock_stream(hstream, |stream| { + let func: &mut FunctionData = unsafe { &mut *f }.as_result_mut()?; + for (i, arg_size) in func.arg_size.iter().enumerate() { + unsafe { + func.base + .set_arg_raw(i as u32, *arg_size, *kernel_params.add(i))? + }; + } + if func.use_shared_mem { + unsafe { + func.base.set_arg_raw( + func.arg_size.len() as u32, + shared_mem_bytes as usize, + ptr::null(), + )? + }; + } + func.base + .set_group_size(block_dim_x, block_dim_y, block_dim_z)?; + let mut cmd_list = stream.command_list()?; + cmd_list.append_launch_kernel( + &mut func.base, + &[grid_dim_x, grid_dim_y, grid_dim_z], + None, + &mut [], + )?; + stream.queue.execute(cmd_list)?; + Ok(()) + })? +} + +pub(crate) fn get_attribute( + pi: *mut i32, + attrib: CUfunction_attribute, + func: *mut Function, +) -> Result<(), CUresult> { + if pi == ptr::null_mut() || func == ptr::null_mut() { + return Err(CUresult::CUDA_ERROR_INVALID_VALUE); + } + match attrib { + CUfunction_attribute::CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK => { + let max_threads = GlobalState::lock_function(func, |func| { + let props = func.get_properties()?; + Ok::<_, CUresult>(props.maxSubgroupSize * props.maxNumSubgroups) + })??; + unsafe { *pi = max_threads as i32 }; + Ok(()) + } + _ => Err(CUresult::CUDA_ERROR_NOT_SUPPORTED), + } +} diff --git a/zluda/src/impl/memory.rs b/zluda/src/impl/memory.rs new file mode 100644 index 0000000..f33a08c --- /dev/null +++ b/zluda/src/impl/memory.rs @@ -0,0 +1,100 @@ +use super::{stream, CUresult, GlobalState};
+use std::{ffi::c_void, mem};
+
+pub fn alloc_v2(dptr: *mut *mut c_void, bytesize: usize) -> Result<(), CUresult> {
+ let ptr = GlobalState::lock_current_context(|ctx| {
+ let dev = unsafe { &mut *ctx.device };
+ Ok::<_, CUresult>(unsafe { dev.base.mem_alloc_device(&mut dev.l0_context, bytesize, 0) }?)
+ })??;
+ unsafe { *dptr = ptr };
+ Ok(())
+}
+
+pub fn copy_v2(dst: *mut c_void, src: *const c_void, bytesize: usize) -> Result<(), CUresult> {
+ GlobalState::lock_stream(stream::CU_STREAM_LEGACY, |stream| {
+ let mut cmd_list = stream.command_list()?;
+ unsafe { cmd_list.append_memory_copy_unsafe(dst, src, bytesize, None, &mut []) }?;
+ stream.queue.execute(cmd_list)?;
+ Ok::<_, CUresult>(())
+ })?
+}
+
+pub fn free_v2(ptr: *mut c_void) -> Result<(), CUresult> {
+ GlobalState::lock_current_context(|ctx| {
+ let dev = unsafe { &mut *ctx.device };
+ Ok::<_, CUresult>(unsafe { dev.l0_context.mem_free(ptr) }?)
+ })
+ .map_err(|_| CUresult::CUDA_ERROR_INVALID_VALUE)?
+}
+
+pub(crate) fn set_d32_v2(dst: *mut c_void, ui: u32, n: usize) -> Result<(), CUresult> {
+ GlobalState::lock_stream(stream::CU_STREAM_LEGACY, |stream| {
+ let mut cmd_list = stream.command_list()?;
+ unsafe {
+ cmd_list.append_memory_fill_unsafe(dst, &ui, mem::size_of::<u32>() * n, None, &mut [])
+ }?;
+ stream.queue.execute(cmd_list)?;
+ Ok::<_, CUresult>(())
+ })?
+}
+
+pub(crate) fn set_d8_v2(dst: *mut c_void, uc: u8, n: usize) -> Result<(), CUresult> {
+ GlobalState::lock_stream(stream::CU_STREAM_LEGACY, |stream| {
+ let mut cmd_list = stream.command_list()?;
+ unsafe {
+ cmd_list.append_memory_fill_unsafe(dst, &uc, mem::size_of::<u8>() * n, None, &mut [])
+ }?;
+ stream.queue.execute(cmd_list)?;
+ Ok::<_, CUresult>(())
+ })?
+}
+
+#[cfg(test)]
+mod test {
+ use super::super::test::CudaDriverFns;
+ use super::super::CUresult;
+ use std::ptr;
+
+ cuda_driver_test!(alloc_without_ctx);
+
+ fn alloc_without_ctx<T: CudaDriverFns>() {
+ assert_eq!(T::cuInit(0), CUresult::CUDA_SUCCESS);
+ let mut mem = ptr::null_mut();
+ assert_eq!(
+ T::cuMemAlloc_v2(&mut mem, std::mem::size_of::<usize>()),
+ CUresult::CUDA_ERROR_INVALID_CONTEXT
+ );
+ assert_eq!(mem, ptr::null_mut());
+ }
+
+ cuda_driver_test!(alloc_with_ctx);
+
+ fn alloc_with_ctx<T: CudaDriverFns>() {
+ assert_eq!(T::cuInit(0), CUresult::CUDA_SUCCESS);
+ let mut ctx = ptr::null_mut();
+ assert_eq!(T::cuCtxCreate_v2(&mut ctx, 0, 0), CUresult::CUDA_SUCCESS);
+ let mut mem = ptr::null_mut();
+ assert_eq!(
+ T::cuMemAlloc_v2(&mut mem, std::mem::size_of::<usize>()),
+ CUresult::CUDA_SUCCESS
+ );
+ assert_ne!(mem, ptr::null_mut());
+ assert_eq!(T::cuCtxDestroy_v2(ctx), CUresult::CUDA_SUCCESS);
+ }
+
+ cuda_driver_test!(free_without_ctx);
+
+ fn free_without_ctx<T: CudaDriverFns>() {
+ assert_eq!(T::cuInit(0), CUresult::CUDA_SUCCESS);
+ let mut ctx = ptr::null_mut();
+ assert_eq!(T::cuCtxCreate_v2(&mut ctx, 0, 0), CUresult::CUDA_SUCCESS);
+ let mut mem = ptr::null_mut();
+ assert_eq!(
+ T::cuMemAlloc_v2(&mut mem, std::mem::size_of::<usize>()),
+ CUresult::CUDA_SUCCESS
+ );
+ assert_ne!(mem, ptr::null_mut());
+ assert_eq!(T::cuCtxDestroy_v2(ctx), CUresult::CUDA_SUCCESS);
+ assert_eq!(T::cuMemFree_v2(mem), CUresult::CUDA_ERROR_INVALID_VALUE);
+ }
+}
diff --git a/zluda/src/impl/mod.rs b/zluda/src/impl/mod.rs new file mode 100644 index 0000000..086d260 --- /dev/null +++ b/zluda/src/impl/mod.rs @@ -0,0 +1,351 @@ +use crate::{ + cuda::{CUctx_st, CUdevice, CUdeviceptr, CUfunc_st, CUmod_st, CUresult, CUstream_st}, + r#impl::device::Device, +}; +use std::{ + ffi::c_void, + mem::{self, ManuallyDrop}, + os::raw::c_int, + ptr, + sync::Mutex, + sync::TryLockError, +}; + +#[cfg(test)] +#[macro_use] +pub mod test; +pub mod context; +pub mod device; +pub mod export_table; +pub mod function; +pub mod memory; +pub mod module; +pub mod stream; + +#[cfg(debug_assertions)] +pub fn unimplemented() -> CUresult { + unimplemented!() +} + +#[cfg(not(debug_assertions))] +pub fn unimplemented() -> CUresult { + CUresult::CUDA_ERROR_NOT_SUPPORTED +} + +pub trait HasLivenessCookie: Sized { + const COOKIE: usize; + const LIVENESS_FAIL: CUresult; + + fn try_drop(&mut self) -> Result<(), CUresult>; +} + +// This struct is a best-effort check if wrapped value has been dropped, +// while it's inherently safe, its use coming from FFI is very unsafe +#[repr(C)] +pub struct LiveCheck<T: HasLivenessCookie> { + cookie: usize, + data: ManuallyDrop<T>, +} + +impl<T: HasLivenessCookie> LiveCheck<T> { + pub fn new(data: T) -> Self { + LiveCheck { + cookie: T::COOKIE, + data: ManuallyDrop::new(data), + } + } + + fn destroy_impl(this: *mut Self) -> Result<(), CUresult> { + let mut ctx_box = ManuallyDrop::new(unsafe { Box::from_raw(this) }); + ctx_box.try_drop()?; + unsafe { ManuallyDrop::drop(&mut ctx_box) }; + Ok(()) + } + + unsafe fn ptr_from_inner(this: *mut T) -> *mut Self { + let outer_ptr = (this as *mut u8).sub(mem::size_of::<usize>()); + outer_ptr as *mut Self + } + + pub unsafe fn as_ref_unchecked(&self) -> &T { + &self.data + } + + pub fn as_option_mut(&mut self) -> Option<&mut T> { + if self.cookie == T::COOKIE { + Some(&mut self.data) + } else { + None + } + } + + pub fn as_result(&self) -> Result<&T, CUresult> { + if self.cookie == T::COOKIE { + Ok(&self.data) + } else { + Err(T::LIVENESS_FAIL) + } + } + + pub fn as_result_mut(&mut self) -> Result<&mut T, CUresult> { + if self.cookie == T::COOKIE { + Ok(&mut self.data) + } else { + Err(T::LIVENESS_FAIL) + } + } + + #[must_use] + pub fn try_drop(&mut self) -> Result<(), CUresult> { + if self.cookie == T::COOKIE { + self.cookie = 0; + self.data.try_drop()?; + unsafe { ManuallyDrop::drop(&mut self.data) }; + return Ok(()); + } + Err(T::LIVENESS_FAIL) + } +} + +impl<T: HasLivenessCookie> Drop for LiveCheck<T> { + fn drop(&mut self) { + self.cookie = 0; + } +} + +pub trait CudaRepr: Sized { + type Impl: Sized; +} + +impl<T: CudaRepr> CudaRepr for *mut T { + type Impl = *mut T::Impl; +} + +pub trait Decuda<To> { + fn decuda(self: Self) -> To; +} + +impl<T: CudaRepr> Decuda<*mut T::Impl> for *mut T { + fn decuda(self: Self) -> *mut T::Impl { + self as *mut _ + } +} + +impl From<l0::sys::ze_result_t> for CUresult { + fn from(result: l0::sys::ze_result_t) -> Self { + match result { + l0::sys::ze_result_t::ZE_RESULT_SUCCESS => CUresult::CUDA_SUCCESS, + l0_sys::ze_result_t::ZE_RESULT_ERROR_UNINITIALIZED => { + CUresult::CUDA_ERROR_NOT_INITIALIZED + } + l0_sys::ze_result_t::ZE_RESULT_ERROR_INVALID_ENUMERATION => { + CUresult::CUDA_ERROR_INVALID_VALUE + } + l0_sys::ze_result_t::ZE_RESULT_ERROR_INVALID_ARGUMENT => { + CUresult::CUDA_ERROR_INVALID_VALUE + } + l0_sys::ze_result_t::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY => { + CUresult::CUDA_ERROR_OUT_OF_MEMORY + } + l0_sys::ze_result_t::ZE_RESULT_ERROR_UNSUPPORTED_FEATURE => { + CUresult::CUDA_ERROR_NOT_SUPPORTED + } + _ => CUresult::CUDA_ERROR_UNKNOWN, + } + } +} + +impl<T> From<TryLockError<T>> for CUresult { + fn from(_: TryLockError<T>) -> Self { + CUresult::CUDA_ERROR_ILLEGAL_STATE + } +} + +pub trait Encuda { + type To: Sized; + fn encuda(self: Self) -> Self::To; +} + +impl Encuda for CUresult { + type To = CUresult; + fn encuda(self: Self) -> Self::To { + self + } +} + +impl Encuda for l0::sys::ze_result_t { + type To = CUresult; + fn encuda(self: Self) -> Self::To { + self.into() + } +} + +impl Encuda for () { + type To = CUresult; + fn encuda(self: Self) -> Self::To { + CUresult::CUDA_SUCCESS + } +} + +impl<T1: Encuda<To = CUresult>, T2: Encuda<To = CUresult>> Encuda for Result<T1, T2> { + type To = CUresult; + fn encuda(self: Self) -> Self::To { + match self { + Ok(e) => e.encuda(), + Err(e) => e.encuda(), + } + } +} + +lazy_static! { + static ref GLOBAL_STATE: Mutex<Option<GlobalState>> = Mutex::new(None); +} + +struct GlobalState { + devices: Vec<Device>, +} + +unsafe impl Send for GlobalState {} + +impl GlobalState { + fn lock<T>(f: impl FnOnce(&mut GlobalState) -> T) -> Result<T, CUresult> { + let mut mutex = GLOBAL_STATE + .lock() + .unwrap_or_else(|poison| poison.into_inner()); + let global_state = mutex.as_mut().ok_or(CUresult::CUDA_ERROR_ILLEGAL_STATE)?; + Ok(f(global_state)) + } + + fn lock_device<T>( + device::Index(dev_idx): device::Index, + f: impl FnOnce(&'static mut device::Device) -> T, + ) -> Result<T, CUresult> { + if dev_idx < 0 { + return Err(CUresult::CUDA_ERROR_INVALID_DEVICE); + } + Self::lock(|global_state| { + if dev_idx >= global_state.devices.len() as c_int { + Err(CUresult::CUDA_ERROR_INVALID_DEVICE) + } else { + Ok(f(unsafe { + transmute_lifetime_mut(&mut global_state.devices[dev_idx as usize]) + })) + } + })? + } + + fn lock_current_context<F: FnOnce(&mut context::ContextData) -> R, R>( + f: F, + ) -> Result<R, CUresult> { + Self::lock_current_context_unchecked(|ctx| Ok(f(ctx.as_result_mut()?)))? + } + + fn lock_current_context_unchecked<F: FnOnce(&mut context::Context) -> R, R>( + f: F, + ) -> Result<R, CUresult> { + context::CONTEXT_STACK.with(|stack| { + stack + .borrow_mut() + .last_mut() + .ok_or(CUresult::CUDA_ERROR_INVALID_CONTEXT) + .map(|ctx| GlobalState::lock(|_| f(unsafe { &mut **ctx })))? + }) + } + + fn lock_stream<T>( + stream: *mut stream::Stream, + f: impl FnOnce(&mut stream::StreamData) -> T, + ) -> Result<T, CUresult> { + if stream == ptr::null_mut() + || stream == stream::CU_STREAM_LEGACY + || stream == stream::CU_STREAM_PER_THREAD + { + Self::lock_current_context(|ctx| Ok(f(&mut ctx.default_stream)))? + } else { + Self::lock(|_| { + let stream = unsafe { &mut *stream }.as_result_mut()?; + Ok(f(stream)) + })? + } + } + + fn lock_function<T>( + func: *mut function::Function, + f: impl FnOnce(&mut function::FunctionData) -> T, + ) -> Result<T, CUresult> { + if func == ptr::null_mut() { + return Err(CUresult::CUDA_ERROR_INVALID_HANDLE); + } + Self::lock(|_| { + let func = unsafe { &mut *func }.as_result_mut()?; + Ok(f(func)) + })? + } +} + +// TODO: implement +fn is_intel_gpu_driver(_: &l0::Driver) -> bool { + true +} + +pub fn init() -> Result<(), CUresult> { + let mut global_state = GLOBAL_STATE + .lock() + .map_err(|_| CUresult::CUDA_ERROR_UNKNOWN)?; + if global_state.is_some() { + return Ok(()); + } + l0::init()?; + let drivers = l0::Driver::get()?; + let devices = match drivers.into_iter().find(is_intel_gpu_driver) { + None => return Err(CUresult::CUDA_ERROR_UNKNOWN), + Some(driver) => device::init(&driver)?, + }; + *global_state = Some(GlobalState { devices }); + drop(global_state); + Ok(()) +} + +unsafe fn transmute_lifetime_mut<'a, 'b, T: ?Sized>(t: &'a mut T) -> &'b mut T { + mem::transmute(t) +} + +pub fn driver_get_version() -> c_int { + i32::max_value() +} + +impl<'a> CudaRepr for CUctx_st { + type Impl = context::Context; +} + +impl<'a> CudaRepr for CUdevice { + type Impl = device::Index; +} + +impl Decuda<device::Index> for CUdevice { + fn decuda(self) -> device::Index { + device::Index(self.0) + } +} + +impl<'a> CudaRepr for CUdeviceptr { + type Impl = *mut c_void; +} + +impl Decuda<*mut c_void> for CUdeviceptr { + fn decuda(self) -> *mut c_void { + self.0 as *mut _ + } +} + +impl<'a> CudaRepr for CUmod_st { + type Impl = module::Module; +} + +impl<'a> CudaRepr for CUfunc_st { + type Impl = function::Function; +} + +impl<'a> CudaRepr for CUstream_st { + type Impl = stream::Stream; +} diff --git a/zluda/src/impl/module.rs b/zluda/src/impl/module.rs new file mode 100644 index 0000000..cba030e --- /dev/null +++ b/zluda/src/impl/module.rs @@ -0,0 +1,188 @@ +use std::{ + collections::hash_map, collections::HashMap, ffi::c_void, ffi::CStr, ffi::CString, mem, + os::raw::c_char, ptr, slice, +}; + +use super::{ + device, function::Function, function::FunctionData, CUresult, GlobalState, HasLivenessCookie, + LiveCheck, +}; +use ptx; + +pub type Module = LiveCheck<ModuleData>; + +impl HasLivenessCookie for ModuleData { + #[cfg(target_pointer_width = "64")] + const COOKIE: usize = 0xf1313bd46505f98a; + + #[cfg(target_pointer_width = "32")] + const COOKIE: usize = 0xbdbe3f15; + + const LIVENESS_FAIL: CUresult = CUresult::CUDA_ERROR_INVALID_HANDLE; + + fn try_drop(&mut self) -> Result<(), CUresult> { + Ok(()) + } +} + +pub struct ModuleData { + pub spirv: SpirvModule, + // This should be a Vec<>, but I'm feeling lazy + pub device_binaries: HashMap<device::Index, CompiledModule>, +} + +pub struct SpirvModule { + pub binaries: Vec<u32>, + pub kernel_info: HashMap<String, ptx::KernelInfo>, + pub should_link_ptx_impl: Option<&'static [u8]>, + pub build_options: CString, +} + +pub struct CompiledModule { + pub base: l0::Module, + pub kernels: HashMap<CString, Box<Function>>, +} + +impl<L, T, E> From<ptx::ParseError<L, T, E>> for CUresult { + fn from(_: ptx::ParseError<L, T, E>) -> Self { + CUresult::CUDA_ERROR_INVALID_PTX + } +} + +impl From<ptx::TranslateError> for CUresult { + fn from(_: ptx::TranslateError) -> Self { + CUresult::CUDA_ERROR_INVALID_PTX + } +} + +impl SpirvModule { + pub fn new_raw<'a>(text: *const c_char) -> Result<Self, CUresult> { + let u8_text = unsafe { CStr::from_ptr(text) }; + let ptx_text = u8_text + .to_str() + .map_err(|_| CUresult::CUDA_ERROR_INVALID_PTX)?; + Self::new(ptx_text) + } + + pub fn new<'a>(ptx_text: &str) -> Result<Self, CUresult> { + let mut errors = Vec::new(); + let ast = ptx::ModuleParser::new().parse(&mut errors, ptx_text)?; + let spirv_module = ptx::to_spirv_module(ast)?; + Ok(SpirvModule { + binaries: spirv_module.assemble(), + kernel_info: spirv_module.kernel_info, + should_link_ptx_impl: spirv_module.should_link_ptx_impl, + build_options: spirv_module.build_options, + }) + } + + pub fn compile(&self, ctx: &mut l0::Context, dev: &l0::Device) -> Result<l0::Module, CUresult> { + let byte_il = unsafe { + slice::from_raw_parts( + self.binaries.as_ptr() as *const u8, + self.binaries.len() * mem::size_of::<u32>(), + ) + }; + let l0_module = match self.should_link_ptx_impl { + None => { + l0::Module::build_spirv(ctx, dev, byte_il, Some(self.build_options.as_c_str())).0 + } + Some(ptx_impl) => { + l0::Module::build_link_spirv( + ctx, + &dev, + &[ptx_impl, byte_il], + Some(self.build_options.as_c_str()), + ) + .0 + } + }; + Ok(l0_module?) + } +} + +pub fn get_function( + hfunc: *mut *mut Function, + hmod: *mut Module, + name: *const c_char, +) -> Result<(), CUresult> { + if hfunc == ptr::null_mut() || hmod == ptr::null_mut() || name == ptr::null() { + return Err(CUresult::CUDA_ERROR_INVALID_VALUE); + } + let name = unsafe { CStr::from_ptr(name) }.to_owned(); + let function: *mut Function = GlobalState::lock_current_context(|ctx| { + let module = unsafe { &mut *hmod }.as_result_mut()?; + let device = unsafe { &mut *ctx.device }; + let compiled_module = match module.device_binaries.entry(device.index) { + hash_map::Entry::Occupied(entry) => entry.into_mut(), + hash_map::Entry::Vacant(entry) => { + let new_module = CompiledModule { + base: module.spirv.compile(&mut device.l0_context, &device.base)?, + kernels: HashMap::new(), + }; + entry.insert(new_module) + } + }; + let kernel = match compiled_module.kernels.entry(name) { + hash_map::Entry::Occupied(entry) => entry.into_mut().as_mut(), + hash_map::Entry::Vacant(entry) => { + let kernel_info = module + .spirv + .kernel_info + .get(unsafe { + std::str::from_utf8_unchecked(entry.key().as_c_str().to_bytes()) + }) + .ok_or(CUresult::CUDA_ERROR_NOT_FOUND)?; + let mut kernel = + l0::Kernel::new_resident(&compiled_module.base, entry.key().as_c_str())?; + kernel.set_indirect_access( + l0::sys::ze_kernel_indirect_access_flags_t::ZE_KERNEL_INDIRECT_ACCESS_FLAG_DEVICE + | l0::sys::ze_kernel_indirect_access_flags_t::ZE_KERNEL_INDIRECT_ACCESS_FLAG_HOST + | l0::sys::ze_kernel_indirect_access_flags_t::ZE_KERNEL_INDIRECT_ACCESS_FLAG_SHARED + )?; + entry.insert(Box::new(Function::new(FunctionData { + base: kernel, + arg_size: kernel_info.arguments_sizes.clone(), + use_shared_mem: kernel_info.uses_shared_mem, + properties: None, + }))) + } + }; + Ok::<_, CUresult>(kernel as *mut _) + })??; + unsafe { *hfunc = function }; + Ok(()) +} + +pub(crate) fn load_data(pmod: *mut *mut Module, image: *const c_void) -> Result<(), CUresult> { + let spirv_data = SpirvModule::new_raw(image as *const _)?; + load_data_impl(pmod, spirv_data) +} + +pub fn load_data_impl(pmod: *mut *mut Module, spirv_data: SpirvModule) -> Result<(), CUresult> { + let module = GlobalState::lock_current_context(|ctx| { + let device = unsafe { &mut *ctx.device }; + let l0_module = spirv_data.compile(&mut device.l0_context, &device.base)?; + let mut device_binaries = HashMap::new(); + let compiled_module = CompiledModule { + base: l0_module, + kernels: HashMap::new(), + }; + device_binaries.insert(device.index, compiled_module); + let module_data = ModuleData { + spirv: spirv_data, + device_binaries, + }; + Ok::<_, CUresult>(module_data) + })??; + let module_ptr = Box::into_raw(Box::new(Module::new(module))); + unsafe { *pmod = module_ptr }; + Ok(()) +} + +pub(crate) fn unload(module: *mut Module) -> Result<(), CUresult> { + if module == ptr::null_mut() { + return Err(CUresult::CUDA_ERROR_INVALID_VALUE); + } + GlobalState::lock(|_| Module::destroy_impl(module))? +} diff --git a/zluda/src/impl/stream.rs b/zluda/src/impl/stream.rs new file mode 100644 index 0000000..e212dfc --- /dev/null +++ b/zluda/src/impl/stream.rs @@ -0,0 +1,242 @@ +use super::{ + context::{Context, ContextData}, + CUresult, GlobalState, +}; +use std::{mem, ptr}; + +use super::{HasLivenessCookie, LiveCheck}; + +pub type Stream = LiveCheck<StreamData>; + +pub const CU_STREAM_LEGACY: *mut Stream = 1 as *mut _; +pub const CU_STREAM_PER_THREAD: *mut Stream = 2 as *mut _; + +impl HasLivenessCookie for StreamData { + #[cfg(target_pointer_width = "64")] + const COOKIE: usize = 0x512097354de18d35; + + #[cfg(target_pointer_width = "32")] + const COOKIE: usize = 0x77d5cc0b; + + const LIVENESS_FAIL: CUresult = CUresult::CUDA_ERROR_INVALID_HANDLE; + + fn try_drop(&mut self) -> Result<(), CUresult> { + if self.context != ptr::null_mut() { + let context = unsafe { &mut *self.context }; + if !context.streams.remove(&(self as *mut _)) { + return Err(CUresult::CUDA_ERROR_UNKNOWN); + } + } + Ok(()) + } +} + +pub struct StreamData { + pub context: *mut ContextData, + pub queue: l0::CommandQueue, +} + +impl StreamData { + pub fn new_unitialized(ctx: &mut l0::Context, dev: &l0::Device) -> Result<Self, CUresult> { + Ok(StreamData { + context: ptr::null_mut(), + queue: l0::CommandQueue::new(ctx, dev)?, + }) + } + pub fn new(ctx: &mut ContextData) -> Result<Self, CUresult> { + let l0_ctx = &mut unsafe { &mut *ctx.device }.l0_context; + let l0_dev = &unsafe { &*ctx.device }.base; + Ok(StreamData { + context: ctx as *mut _, + queue: l0::CommandQueue::new(l0_ctx, l0_dev)?, + }) + } + + pub fn command_list(&self) -> Result<l0::CommandList, l0::sys::_ze_result_t> { + let ctx = unsafe { &mut *self.context }; + let dev = unsafe { &mut *ctx.device }; + l0::CommandList::new(&mut dev.l0_context, &dev.base) + } +} + +impl Drop for StreamData { + fn drop(&mut self) { + if self.context == ptr::null_mut() { + return; + } + unsafe { (&mut *self.context).streams.remove(&(&mut *self as *mut _)) }; + } +} + +pub(crate) fn get_ctx(hstream: *mut Stream, pctx: *mut *mut Context) -> Result<(), CUresult> { + if pctx == ptr::null_mut() { + return Err(CUresult::CUDA_ERROR_INVALID_VALUE); + } + let ctx_ptr = GlobalState::lock_stream(hstream, |stream| stream.context)?; + if ctx_ptr == ptr::null_mut() { + return Err(CUresult::CUDA_ERROR_CONTEXT_IS_DESTROYED); + } + unsafe { *pctx = Context::ptr_from_inner(ctx_ptr) }; + Ok(()) +} + +pub(crate) fn create(phstream: *mut *mut Stream, _flags: u32) -> Result<(), CUresult> { + let stream_ptr = GlobalState::lock_current_context(|ctx| { + let mut stream_box = Box::new(Stream::new(StreamData::new(ctx)?)); + let stream_ptr = stream_box.as_mut().as_option_mut().unwrap() as *mut _; + if !ctx.streams.insert(stream_ptr) { + return Err(CUresult::CUDA_ERROR_UNKNOWN); + } + mem::forget(stream_box); + Ok::<_, CUresult>(stream_ptr) + })??; + unsafe { *phstream = Stream::ptr_from_inner(stream_ptr) }; + Ok(()) +} + +pub(crate) fn destroy_v2(pstream: *mut Stream) -> Result<(), CUresult> { + if pstream == ptr::null_mut() || pstream == CU_STREAM_LEGACY || pstream == CU_STREAM_PER_THREAD + { + return Err(CUresult::CUDA_ERROR_INVALID_VALUE); + } + GlobalState::lock(|_| Stream::destroy_impl(pstream))? +} + +#[cfg(test)] +mod test { + use crate::cuda::CUstream; + + use super::super::test::CudaDriverFns; + use super::super::CUresult; + use std::{ptr, thread}; + + const CU_STREAM_LEGACY: CUstream = 1 as *mut _; + const CU_STREAM_PER_THREAD: CUstream = 2 as *mut _; + + cuda_driver_test!(default_stream_uses_current_ctx_legacy); + cuda_driver_test!(default_stream_uses_current_ctx_ptsd); + + fn default_stream_uses_current_ctx_legacy<T: CudaDriverFns>() { + default_stream_uses_current_ctx_impl::<T>(CU_STREAM_LEGACY); + } + + fn default_stream_uses_current_ctx_ptsd<T: CudaDriverFns>() { + default_stream_uses_current_ctx_impl::<T>(CU_STREAM_PER_THREAD); + } + + fn default_stream_uses_current_ctx_impl<T: CudaDriverFns>(stream: CUstream) { + assert_eq!(T::cuInit(0), CUresult::CUDA_SUCCESS); + let mut ctx1 = ptr::null_mut(); + assert_eq!(T::cuCtxCreate_v2(&mut ctx1, 0, 0), CUresult::CUDA_SUCCESS); + let mut stream_ctx1 = ptr::null_mut(); + assert_eq!( + T::cuStreamGetCtx(stream, &mut stream_ctx1), + CUresult::CUDA_SUCCESS + ); + assert_eq!(ctx1, stream_ctx1); + let mut ctx2 = ptr::null_mut(); + assert_eq!(T::cuCtxCreate_v2(&mut ctx2, 0, 0), CUresult::CUDA_SUCCESS); + assert_ne!(ctx1, ctx2); + let mut stream_ctx2 = ptr::null_mut(); + assert_eq!( + T::cuStreamGetCtx(stream, &mut stream_ctx2), + CUresult::CUDA_SUCCESS + ); + assert_eq!(ctx2, stream_ctx2); + // Cleanup + assert_eq!(T::cuCtxDestroy_v2(ctx1), CUresult::CUDA_SUCCESS); + assert_eq!(T::cuCtxDestroy_v2(ctx2), CUresult::CUDA_SUCCESS); + } + + cuda_driver_test!(stream_context_destroyed); + + fn stream_context_destroyed<T: CudaDriverFns>() { + assert_eq!(T::cuInit(0), CUresult::CUDA_SUCCESS); + let mut ctx = ptr::null_mut(); + assert_eq!(T::cuCtxCreate_v2(&mut ctx, 0, 0), CUresult::CUDA_SUCCESS); + let mut stream = ptr::null_mut(); + assert_eq!(T::cuStreamCreate(&mut stream, 0), CUresult::CUDA_SUCCESS); + let mut stream_ctx1 = ptr::null_mut(); + assert_eq!( + T::cuStreamGetCtx(stream, &mut stream_ctx1), + CUresult::CUDA_SUCCESS + ); + assert_eq!(stream_ctx1, ctx); + assert_eq!(T::cuCtxDestroy_v2(ctx), CUresult::CUDA_SUCCESS); + let mut stream_ctx2 = ptr::null_mut(); + // When a context gets destroyed, its streams are also destroyed + let cuda_result = T::cuStreamGetCtx(stream, &mut stream_ctx2); + assert!( + cuda_result == CUresult::CUDA_ERROR_INVALID_HANDLE + || cuda_result == CUresult::CUDA_ERROR_INVALID_CONTEXT + || cuda_result == CUresult::CUDA_ERROR_CONTEXT_IS_DESTROYED + ); + assert_eq!( + T::cuStreamDestroy_v2(stream), + CUresult::CUDA_ERROR_INVALID_HANDLE + ); + // Check if creating another context is possible + let mut ctx2 = ptr::null_mut(); + assert_eq!(T::cuCtxCreate_v2(&mut ctx2, 0, 0), CUresult::CUDA_SUCCESS); + // Cleanup + assert_eq!(T::cuCtxDestroy_v2(ctx2), CUresult::CUDA_SUCCESS); + } + + cuda_driver_test!(stream_moves_context_to_another_thread); + + fn stream_moves_context_to_another_thread<T: CudaDriverFns>() { + assert_eq!(T::cuInit(0), CUresult::CUDA_SUCCESS); + let mut ctx = ptr::null_mut(); + assert_eq!(T::cuCtxCreate_v2(&mut ctx, 0, 0), CUresult::CUDA_SUCCESS); + let mut stream = ptr::null_mut(); + assert_eq!(T::cuStreamCreate(&mut stream, 0), CUresult::CUDA_SUCCESS); + let mut stream_ctx1 = ptr::null_mut(); + assert_eq!( + T::cuStreamGetCtx(stream, &mut stream_ctx1), + CUresult::CUDA_SUCCESS + ); + assert_eq!(stream_ctx1, ctx); + let stream_ptr = stream as usize; + let stream_ctx_on_thread = thread::spawn(move || { + let mut stream_ctx2 = ptr::null_mut(); + assert_eq!( + T::cuStreamGetCtx(stream_ptr as *mut _, &mut stream_ctx2), + CUresult::CUDA_SUCCESS + ); + stream_ctx2 as usize + }) + .join() + .unwrap(); + assert_eq!(stream_ctx1, stream_ctx_on_thread as *mut _); + // Cleanup + assert_eq!(T::cuStreamDestroy_v2(stream), CUresult::CUDA_SUCCESS); + assert_eq!(T::cuCtxDestroy_v2(ctx), CUresult::CUDA_SUCCESS); + } + + cuda_driver_test!(can_destroy_stream); + + fn can_destroy_stream<T: CudaDriverFns>() { + assert_eq!(T::cuInit(0), CUresult::CUDA_SUCCESS); + let mut ctx = ptr::null_mut(); + assert_eq!(T::cuCtxCreate_v2(&mut ctx, 0, 0), CUresult::CUDA_SUCCESS); + let mut stream = ptr::null_mut(); + assert_eq!(T::cuStreamCreate(&mut stream, 0), CUresult::CUDA_SUCCESS); + assert_eq!(T::cuStreamDestroy_v2(stream), CUresult::CUDA_SUCCESS); + // Cleanup + assert_eq!(T::cuCtxDestroy_v2(ctx), CUresult::CUDA_SUCCESS); + } + + cuda_driver_test!(cant_destroy_default_stream); + + fn cant_destroy_default_stream<T: CudaDriverFns>() { + assert_eq!(T::cuInit(0), CUresult::CUDA_SUCCESS); + let mut ctx = ptr::null_mut(); + assert_eq!(T::cuCtxCreate_v2(&mut ctx, 0, 0), CUresult::CUDA_SUCCESS); + assert_ne!( + T::cuStreamDestroy_v2(super::CU_STREAM_LEGACY as *mut _), + CUresult::CUDA_SUCCESS + ); + // Cleanup + assert_eq!(T::cuCtxDestroy_v2(ctx), CUresult::CUDA_SUCCESS); + } +} diff --git a/zluda/src/impl/test.rs b/zluda/src/impl/test.rs new file mode 100644 index 0000000..b36ccd8 --- /dev/null +++ b/zluda/src/impl/test.rs @@ -0,0 +1,157 @@ +#![allow(non_snake_case)] + +use crate::cuda as zluda; +use crate::cuda::CUstream; +use crate::cuda::CUuuid; +use crate::{ + cuda::{CUdevice, CUdeviceptr}, + r#impl::CUresult, +}; +use ::std::{ + ffi::c_void, + os::raw::{c_int, c_uint}, +}; +use cuda_driver_sys as cuda; + +#[macro_export] +macro_rules! cuda_driver_test { + ($func:ident) => { + paste! { + #[test] + fn [<$func _zluda>]() { + $func::<crate::r#impl::test::Zluda>() + } + + #[test] + fn [<$func _cuda>]() { + $func::<crate::r#impl::test::Cuda>() + } + } + }; +} + +pub trait CudaDriverFns { + fn cuInit(flags: c_uint) -> CUresult; + fn cuCtxCreate_v2(pctx: *mut *mut c_void, flags: c_uint, dev: c_int) -> CUresult; + fn cuCtxDestroy_v2(ctx: *mut c_void) -> CUresult; + fn cuCtxPopCurrent_v2(pctx: *mut *mut c_void) -> CUresult; + fn cuCtxGetApiVersion(ctx: *mut c_void, version: *mut c_uint) -> CUresult; + fn cuCtxGetCurrent(pctx: *mut *mut c_void) -> CUresult; + fn cuMemAlloc_v2(dptr: *mut *mut c_void, bytesize: usize) -> CUresult; + fn cuDeviceGetUuid(uuid: *mut CUuuid, dev: c_int) -> CUresult; + fn cuDevicePrimaryCtxGetState(dev: c_int, flags: *mut c_uint, active: *mut c_int) -> CUresult; + fn cuStreamGetCtx(hStream: CUstream, pctx: *mut *mut c_void) -> CUresult; + fn cuStreamCreate(stream: *mut CUstream, flags: c_uint) -> CUresult; + fn cuMemFree_v2(mem: *mut c_void) -> CUresult; + fn cuStreamDestroy_v2(stream: CUstream) -> CUresult; +} + +pub struct Zluda(); + +impl CudaDriverFns for Zluda { + fn cuInit(_flags: c_uint) -> CUresult { + zluda::cuInit(_flags as _) + } + + fn cuCtxCreate_v2(pctx: *mut *mut c_void, flags: c_uint, dev: c_int) -> CUresult { + zluda::cuCtxCreate_v2(pctx as *mut _, flags, CUdevice(dev)) + } + + fn cuCtxDestroy_v2(ctx: *mut c_void) -> CUresult { + zluda::cuCtxDestroy_v2(ctx as *mut _) + } + + fn cuCtxPopCurrent_v2(pctx: *mut *mut c_void) -> CUresult { + zluda::cuCtxPopCurrent_v2(pctx as *mut _) + } + + fn cuCtxGetApiVersion(ctx: *mut c_void, version: *mut c_uint) -> CUresult { + zluda::cuCtxGetApiVersion(ctx as *mut _, version) + } + + fn cuCtxGetCurrent(pctx: *mut *mut c_void) -> CUresult { + zluda::cuCtxGetCurrent(pctx as *mut _) + } + fn cuMemAlloc_v2(dptr: *mut *mut c_void, bytesize: usize) -> CUresult { + zluda::cuMemAlloc_v2(dptr as *mut _, bytesize) + } + + fn cuDeviceGetUuid(uuid: *mut CUuuid, dev: c_int) -> CUresult { + zluda::cuDeviceGetUuid(uuid, CUdevice(dev)) + } + + fn cuDevicePrimaryCtxGetState(dev: c_int, flags: *mut c_uint, active: *mut c_int) -> CUresult { + zluda::cuDevicePrimaryCtxGetState(CUdevice(dev), flags, active) + } + + fn cuStreamGetCtx(hStream: CUstream, pctx: *mut *mut c_void) -> CUresult { + zluda::cuStreamGetCtx(hStream, pctx as _) + } + + fn cuStreamCreate(stream: *mut CUstream, flags: c_uint) -> CUresult { + zluda::cuStreamCreate(stream, flags) + } + + fn cuMemFree_v2(dptr: *mut c_void) -> CUresult { + zluda::cuMemFree_v2(CUdeviceptr(dptr as _)) + } + + fn cuStreamDestroy_v2(stream: CUstream) -> CUresult { + zluda::cuStreamDestroy_v2(stream) + } +} + +pub struct Cuda(); + +impl CudaDriverFns for Cuda { + fn cuInit(flags: c_uint) -> CUresult { + unsafe { CUresult(cuda::cuInit(flags) as c_uint) } + } + + fn cuCtxCreate_v2(pctx: *mut *mut c_void, flags: c_uint, dev: c_int) -> CUresult { + unsafe { CUresult(cuda::cuCtxCreate_v2(pctx as *mut _, flags, dev) as c_uint) } + } + + fn cuCtxDestroy_v2(ctx: *mut c_void) -> CUresult { + unsafe { CUresult(cuda::cuCtxDestroy_v2(ctx as *mut _) as c_uint) } + } + + fn cuCtxPopCurrent_v2(pctx: *mut *mut c_void) -> CUresult { + unsafe { CUresult(cuda::cuCtxPopCurrent_v2(pctx as *mut _) as c_uint) } + } + + fn cuCtxGetApiVersion(ctx: *mut c_void, version: *mut c_uint) -> CUresult { + unsafe { CUresult(cuda::cuCtxGetApiVersion(ctx as *mut _, version) as c_uint) } + } + + fn cuCtxGetCurrent(pctx: *mut *mut c_void) -> CUresult { + unsafe { CUresult(cuda::cuCtxGetCurrent(pctx as *mut _) as c_uint) } + } + fn cuMemAlloc_v2(dptr: *mut *mut c_void, bytesize: usize) -> CUresult { + unsafe { CUresult(cuda::cuMemAlloc_v2(dptr as *mut _, bytesize) as c_uint) } + } + + fn cuDeviceGetUuid(uuid: *mut CUuuid, dev: c_int) -> CUresult { + unsafe { CUresult(cuda::cuDeviceGetUuid(uuid as *mut _, dev) as c_uint) } + } + + fn cuDevicePrimaryCtxGetState(dev: c_int, flags: *mut c_uint, active: *mut c_int) -> CUresult { + unsafe { CUresult(cuda::cuDevicePrimaryCtxGetState(dev, flags, active) as c_uint) } + } + + fn cuStreamGetCtx(hStream: CUstream, pctx: *mut *mut c_void) -> CUresult { + unsafe { CUresult(cuda::cuStreamGetCtx(hStream as _, pctx as _) as c_uint) } + } + + fn cuStreamCreate(stream: *mut CUstream, flags: c_uint) -> CUresult { + unsafe { CUresult(cuda::cuStreamCreate(stream as _, flags as _) as c_uint) } + } + + fn cuMemFree_v2(mem: *mut c_void) -> CUresult { + unsafe { CUresult(cuda::cuMemFree_v2(mem as _) as c_uint) } + } + + fn cuStreamDestroy_v2(stream: CUstream) -> CUresult { + unsafe { CUresult(cuda::cuStreamDestroy_v2(stream as _) as c_uint) } + } +} diff --git a/zluda/src/lib.rs b/zluda/src/lib.rs new file mode 100644 index 0000000..0f7d014 --- /dev/null +++ b/zluda/src/lib.rs @@ -0,0 +1,16 @@ +extern crate level_zero as l0; +extern crate level_zero_sys as l0_sys; +#[macro_use] +extern crate lazy_static; +#[cfg(test)] +extern crate cuda_driver_sys; +extern crate lz4; +#[cfg(test)] +#[macro_use] +extern crate paste; +extern crate ptx; + +#[allow(warnings)] +mod cuda; +mod cuda_impl; +pub(crate) mod r#impl; |