diff options
author | Andrzej Janik <[email protected]> | 2024-12-02 00:29:57 +0100 |
---|---|---|
committer | GitHub <[email protected]> | 2024-12-02 00:29:57 +0100 |
commit | 7a6df9dcbf59edef371e7f63c16c64916ddb0c0b (patch) | |
tree | 7800524ba25d38c514f1c769c9c1b665542c5500 /cuda_base/src/cuda.rs | |
parent | 870fed4bb69d919a10822032d65ec20f385df9d7 (diff) | |
download | ZLUDA-7a6df9dcbf59edef371e7f63c16c64916ddb0c0b.tar.gz ZLUDA-7a6df9dcbf59edef371e7f63c16c64916ddb0c0b.zip |
Fix host code and update to CUDA 12.4 (#299)
Diffstat (limited to 'cuda_base/src/cuda.rs')
-rw-r--r-- | cuda_base/src/cuda.rs | 26688 |
1 files changed, 20852 insertions, 5836 deletions
diff --git a/cuda_base/src/cuda.rs b/cuda_base/src/cuda.rs index 6c141cf..2cc5a56 100644 --- a/cuda_base/src/cuda.rs +++ b/cuda_base/src/cuda.rs @@ -1,5836 +1,20852 @@ -/* automatically generated by rust-bindgen 0.59.2 */ - -pub const CUDA_VERSION: u32 = 11050;
-pub const CU_IPC_HANDLE_SIZE: u32 = 64;
-pub const CU_MEMHOSTALLOC_PORTABLE: u32 = 1;
-pub const CU_MEMHOSTALLOC_DEVICEMAP: u32 = 2;
-pub const CU_MEMHOSTALLOC_WRITECOMBINED: u32 = 4;
-pub const CU_MEMHOSTREGISTER_PORTABLE: u32 = 1;
-pub const CU_MEMHOSTREGISTER_DEVICEMAP: u32 = 2;
-pub const CU_MEMHOSTREGISTER_IOMEMORY: u32 = 4;
-pub const CU_MEMHOSTREGISTER_READ_ONLY: u32 = 8;
-pub const CU_ARRAY_SPARSE_PROPERTIES_SINGLE_MIPTAIL: u32 = 1;
-pub const CUDA_EXTERNAL_MEMORY_DEDICATED: u32 = 1;
-pub const CUDA_EXTERNAL_SEMAPHORE_SIGNAL_SKIP_NVSCIBUF_MEMSYNC: u32 = 1;
-pub const CUDA_EXTERNAL_SEMAPHORE_WAIT_SKIP_NVSCIBUF_MEMSYNC: u32 = 2;
-pub const CUDA_NVSCISYNC_ATTR_SIGNAL: u32 = 1;
-pub const CUDA_NVSCISYNC_ATTR_WAIT: u32 = 2;
-pub const CU_MEM_CREATE_USAGE_TILE_POOL: u32 = 1;
-pub const CUDA_COOPERATIVE_LAUNCH_MULTI_DEVICE_NO_PRE_LAUNCH_SYNC: u32 = 1;
-pub const CUDA_COOPERATIVE_LAUNCH_MULTI_DEVICE_NO_POST_LAUNCH_SYNC: u32 = 2;
-pub const CUDA_ARRAY3D_LAYERED: u32 = 1;
-pub const CUDA_ARRAY3D_2DARRAY: u32 = 1;
-pub const CUDA_ARRAY3D_SURFACE_LDST: u32 = 2;
-pub const CUDA_ARRAY3D_CUBEMAP: u32 = 4;
-pub const CUDA_ARRAY3D_TEXTURE_GATHER: u32 = 8;
-pub const CUDA_ARRAY3D_DEPTH_TEXTURE: u32 = 16;
-pub const CUDA_ARRAY3D_COLOR_ATTACHMENT: u32 = 32;
-pub const CUDA_ARRAY3D_SPARSE: u32 = 64;
-pub const CU_TRSA_OVERRIDE_FORMAT: u32 = 1;
-pub const CU_TRSF_READ_AS_INTEGER: u32 = 1;
-pub const CU_TRSF_NORMALIZED_COORDINATES: u32 = 2;
-pub const CU_TRSF_SRGB: u32 = 16;
-pub const CU_TRSF_DISABLE_TRILINEAR_OPTIMIZATION: u32 = 32;
-pub const CU_PARAM_TR_DEFAULT: i32 = -1;
-pub const CURRENT_IMPORT_REDIRECTION_VERSION: u32 = 1;
-pub const CURVECAPS: u32 = 28;
-pub const CURSOR_SHOWING: u32 = 1;
-pub const CUR_BLOB_VERSION: u32 = 2;
-pub type cuuint32_t = u32;
-pub type cuuint64_t = u64;
-#[repr(transparent)]
-#[derive(Copy, Clone, PartialEq, Eq)]
-pub struct CUdeviceptr_v2(pub ::std::os::raw::c_ulonglong);
-pub type CUdeviceptr = CUdeviceptr_v2;
-#[repr(transparent)]
-#[derive(Copy, Clone, PartialEq, Eq)]
-pub struct CUdevice_v1(pub ::std::os::raw::c_int);
-pub type CUdevice = CUdevice_v1;
-#[repr(C)]
-#[derive(Copy, Clone)]
-pub struct CUctx_st {
- _unused: [u8; 0],
-}
-pub type CUcontext = *mut CUctx_st;
-#[repr(C)]
-#[derive(Copy, Clone)]
-pub struct CUmod_st {
- _unused: [u8; 0],
-}
-pub type CUmodule = *mut CUmod_st;
-#[repr(C)]
-#[derive(Copy, Clone)]
-pub struct CUfunc_st {
- _unused: [u8; 0],
-}
-pub type CUfunction = *mut CUfunc_st;
-#[repr(C)]
-#[derive(Copy, Clone)]
-pub struct CUarray_st {
- _unused: [u8; 0],
-}
-pub type CUarray = *mut CUarray_st;
-#[repr(C)]
-#[derive(Copy, Clone)]
-pub struct CUmipmappedArray_st {
- _unused: [u8; 0],
-}
-pub type CUmipmappedArray = *mut CUmipmappedArray_st;
-#[repr(C)]
-#[derive(Copy, Clone)]
-pub struct CUtexref_st {
- _unused: [u8; 0],
-}
-pub type CUtexref = *mut CUtexref_st;
-#[repr(C)]
-#[derive(Copy, Clone)]
-pub struct CUsurfref_st {
- _unused: [u8; 0],
-}
-pub type CUsurfref = *mut CUsurfref_st;
-#[repr(C)]
-#[derive(Copy, Clone)]
-pub struct CUevent_st {
- _unused: [u8; 0],
-}
-pub type CUevent = *mut CUevent_st;
-#[repr(C)]
-#[derive(Copy, Clone)]
-pub struct CUstream_st {
- _unused: [u8; 0],
-}
-pub type CUstream = *mut CUstream_st;
-#[repr(C)]
-#[derive(Copy, Clone)]
-pub struct CUgraphicsResource_st {
- _unused: [u8; 0],
-}
-pub type CUgraphicsResource = *mut CUgraphicsResource_st;
-pub type CUtexObject_v1 = ::std::os::raw::c_ulonglong;
-pub type CUtexObject = CUtexObject_v1;
-pub type CUsurfObject_v1 = ::std::os::raw::c_ulonglong;
-pub type CUsurfObject = CUsurfObject_v1;
-#[repr(C)]
-#[derive(Copy, Clone)]
-pub struct CUextMemory_st {
- _unused: [u8; 0],
-}
-pub type CUexternalMemory = *mut CUextMemory_st;
-#[repr(C)]
-#[derive(Copy, Clone)]
-pub struct CUextSemaphore_st {
- _unused: [u8; 0],
-}
-pub type CUexternalSemaphore = *mut CUextSemaphore_st;
-#[repr(C)]
-#[derive(Copy, Clone)]
-pub struct CUgraph_st {
- _unused: [u8; 0],
-}
-pub type CUgraph = *mut CUgraph_st;
-#[repr(C)]
-#[derive(Copy, Clone)]
-pub struct CUgraphNode_st {
- _unused: [u8; 0],
-}
-pub type CUgraphNode = *mut CUgraphNode_st;
-#[repr(C)]
-#[derive(Copy, Clone)]
-pub struct CUgraphExec_st {
- _unused: [u8; 0],
-}
-pub type CUgraphExec = *mut CUgraphExec_st;
-#[repr(C)]
-#[derive(Copy, Clone)]
-pub struct CUmemPoolHandle_st {
- _unused: [u8; 0],
-}
-pub type CUmemoryPool = *mut CUmemPoolHandle_st;
-#[repr(C)]
-#[derive(Copy, Clone)]
-pub struct CUuserObject_st {
- _unused: [u8; 0],
-}
-pub type CUuserObject = *mut CUuserObject_st;
-#[repr(C)]
-#[derive(Copy, Clone, PartialEq, Eq)]
-pub struct CUuuid_st {
- pub bytes: [::std::os::raw::c_char; 16usize],
-}
-pub type CUuuid = CUuuid_st;
-#[repr(C)]
-#[derive(Copy, Clone, PartialEq, Eq)]
-pub struct CUipcEventHandle_st {
- pub reserved: [::std::os::raw::c_char; 64usize],
-}
-pub type CUipcEventHandle_v1 = CUipcEventHandle_st;
-pub type CUipcEventHandle = CUipcEventHandle_v1;
-#[repr(C)]
-#[derive(Copy, Clone, PartialEq, Eq)]
-pub struct CUipcMemHandle_st {
- pub reserved: [::std::os::raw::c_char; 64usize],
-}
-pub type CUipcMemHandle_v1 = CUipcMemHandle_st;
-pub type CUipcMemHandle = CUipcMemHandle_v1;
-impl CUstreamBatchMemOpType_enum {
- pub const CU_STREAM_MEM_OP_WAIT_VALUE_32: CUstreamBatchMemOpType_enum =
- CUstreamBatchMemOpType_enum(1);
-}
-impl CUstreamBatchMemOpType_enum {
- pub const CU_STREAM_MEM_OP_WRITE_VALUE_32: CUstreamBatchMemOpType_enum =
- CUstreamBatchMemOpType_enum(2);
-}
-impl CUstreamBatchMemOpType_enum {
- pub const CU_STREAM_MEM_OP_WAIT_VALUE_64: CUstreamBatchMemOpType_enum =
- CUstreamBatchMemOpType_enum(4);
-}
-impl CUstreamBatchMemOpType_enum {
- pub const CU_STREAM_MEM_OP_WRITE_VALUE_64: CUstreamBatchMemOpType_enum =
- CUstreamBatchMemOpType_enum(5);
-}
-impl CUstreamBatchMemOpType_enum {
- pub const CU_STREAM_MEM_OP_FLUSH_REMOTE_WRITES: CUstreamBatchMemOpType_enum =
- CUstreamBatchMemOpType_enum(3);
-}
-#[repr(transparent)]
-#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
-pub struct CUstreamBatchMemOpType_enum(pub ::std::os::raw::c_uint);
-pub use self::CUstreamBatchMemOpType_enum as CUstreamBatchMemOpType;
-#[repr(C)]
-#[derive(Copy, Clone)]
-pub union CUstreamBatchMemOpParams_union {
- pub operation: CUstreamBatchMemOpType,
- pub waitValue: CUstreamBatchMemOpParams_union_CUstreamMemOpWaitValueParams_st,
- pub writeValue: CUstreamBatchMemOpParams_union_CUstreamMemOpWriteValueParams_st,
- pub flushRemoteWrites: CUstreamBatchMemOpParams_union_CUstreamMemOpFlushRemoteWritesParams_st,
- pub pad: [cuuint64_t; 6usize],
-}
-#[repr(C)]
-#[derive(Copy, Clone)]
-pub struct CUstreamBatchMemOpParams_union_CUstreamMemOpWaitValueParams_st {
- pub operation: CUstreamBatchMemOpType,
- pub address: CUdeviceptr,
- pub __bindgen_anon_1:
- CUstreamBatchMemOpParams_union_CUstreamMemOpWaitValueParams_st__bindgen_ty_1,
- pub flags: ::std::os::raw::c_uint,
- pub alias: CUdeviceptr,
-}
-#[repr(C)]
-#[derive(Copy, Clone)]
-pub union CUstreamBatchMemOpParams_union_CUstreamMemOpWaitValueParams_st__bindgen_ty_1 {
- pub value: cuuint32_t,
- pub value64: cuuint64_t,
-}
-#[repr(C)]
-#[derive(Copy, Clone)]
-pub struct CUstreamBatchMemOpParams_union_CUstreamMemOpWriteValueParams_st {
- pub operation: CUstreamBatchMemOpType,
- pub address: CUdeviceptr,
- pub __bindgen_anon_1:
- CUstreamBatchMemOpParams_union_CUstreamMemOpWriteValueParams_st__bindgen_ty_1,
- pub flags: ::std::os::raw::c_uint,
- pub alias: CUdeviceptr,
-}
-#[repr(C)]
-#[derive(Copy, Clone)]
-pub union CUstreamBatchMemOpParams_union_CUstreamMemOpWriteValueParams_st__bindgen_ty_1 {
- pub value: cuuint32_t,
- pub value64: cuuint64_t,
-}
-#[repr(C)]
-#[derive(Copy, Clone, PartialEq, Eq)]
-pub struct CUstreamBatchMemOpParams_union_CUstreamMemOpFlushRemoteWritesParams_st {
- pub operation: CUstreamBatchMemOpType,
- pub flags: ::std::os::raw::c_uint,
-}
-pub type CUstreamBatchMemOpParams_v1 = CUstreamBatchMemOpParams_union;
-pub type CUstreamBatchMemOpParams = CUstreamBatchMemOpParams_v1;
-impl CUarray_format_enum {
- pub const CU_AD_FORMAT_UNSIGNED_INT8: CUarray_format_enum = CUarray_format_enum(1);
-}
-impl CUarray_format_enum {
- pub const CU_AD_FORMAT_UNSIGNED_INT16: CUarray_format_enum = CUarray_format_enum(2);
-}
-impl CUarray_format_enum {
- pub const CU_AD_FORMAT_UNSIGNED_INT32: CUarray_format_enum = CUarray_format_enum(3);
-}
-impl CUarray_format_enum {
- pub const CU_AD_FORMAT_SIGNED_INT8: CUarray_format_enum = CUarray_format_enum(8);
-}
-impl CUarray_format_enum {
- pub const CU_AD_FORMAT_SIGNED_INT16: CUarray_format_enum = CUarray_format_enum(9);
-}
-impl CUarray_format_enum {
- pub const CU_AD_FORMAT_SIGNED_INT32: CUarray_format_enum = CUarray_format_enum(10);
-}
-impl CUarray_format_enum {
- pub const CU_AD_FORMAT_HALF: CUarray_format_enum = CUarray_format_enum(16);
-}
-impl CUarray_format_enum {
- pub const CU_AD_FORMAT_FLOAT: CUarray_format_enum = CUarray_format_enum(32);
-}
-impl CUarray_format_enum {
- pub const CU_AD_FORMAT_NV12: CUarray_format_enum = CUarray_format_enum(176);
-}
-impl CUarray_format_enum {
- pub const CU_AD_FORMAT_UNORM_INT8X1: CUarray_format_enum = CUarray_format_enum(192);
-}
-impl CUarray_format_enum {
- pub const CU_AD_FORMAT_UNORM_INT8X2: CUarray_format_enum = CUarray_format_enum(193);
-}
-impl CUarray_format_enum {
- pub const CU_AD_FORMAT_UNORM_INT8X4: CUarray_format_enum = CUarray_format_enum(194);
-}
-impl CUarray_format_enum {
- pub const CU_AD_FORMAT_UNORM_INT16X1: CUarray_format_enum = CUarray_format_enum(195);
-}
-impl CUarray_format_enum {
- pub const CU_AD_FORMAT_UNORM_INT16X2: CUarray_format_enum = CUarray_format_enum(196);
-}
-impl CUarray_format_enum {
- pub const CU_AD_FORMAT_UNORM_INT16X4: CUarray_format_enum = CUarray_format_enum(197);
-}
-impl CUarray_format_enum {
- pub const CU_AD_FORMAT_SNORM_INT8X1: CUarray_format_enum = CUarray_format_enum(198);
-}
-impl CUarray_format_enum {
- pub const CU_AD_FORMAT_SNORM_INT8X2: CUarray_format_enum = CUarray_format_enum(199);
-}
-impl CUarray_format_enum {
- pub const CU_AD_FORMAT_SNORM_INT8X4: CUarray_format_enum = CUarray_format_enum(200);
-}
-impl CUarray_format_enum {
- pub const CU_AD_FORMAT_SNORM_INT16X1: CUarray_format_enum = CUarray_format_enum(201);
-}
-impl CUarray_format_enum {
- pub const CU_AD_FORMAT_SNORM_INT16X2: CUarray_format_enum = CUarray_format_enum(202);
-}
-impl CUarray_format_enum {
- pub const CU_AD_FORMAT_SNORM_INT16X4: CUarray_format_enum = CUarray_format_enum(203);
-}
-impl CUarray_format_enum {
- pub const CU_AD_FORMAT_BC1_UNORM: CUarray_format_enum = CUarray_format_enum(145);
-}
-impl CUarray_format_enum {
- pub const CU_AD_FORMAT_BC1_UNORM_SRGB: CUarray_format_enum = CUarray_format_enum(146);
-}
-impl CUarray_format_enum {
- pub const CU_AD_FORMAT_BC2_UNORM: CUarray_format_enum = CUarray_format_enum(147);
-}
-impl CUarray_format_enum {
- pub const CU_AD_FORMAT_BC2_UNORM_SRGB: CUarray_format_enum = CUarray_format_enum(148);
-}
-impl CUarray_format_enum {
- pub const CU_AD_FORMAT_BC3_UNORM: CUarray_format_enum = CUarray_format_enum(149);
-}
-impl CUarray_format_enum {
- pub const CU_AD_FORMAT_BC3_UNORM_SRGB: CUarray_format_enum = CUarray_format_enum(150);
-}
-impl CUarray_format_enum {
- pub const CU_AD_FORMAT_BC4_UNORM: CUarray_format_enum = CUarray_format_enum(151);
-}
-impl CUarray_format_enum {
- pub const CU_AD_FORMAT_BC4_SNORM: CUarray_format_enum = CUarray_format_enum(152);
-}
-impl CUarray_format_enum {
- pub const CU_AD_FORMAT_BC5_UNORM: CUarray_format_enum = CUarray_format_enum(153);
-}
-impl CUarray_format_enum {
- pub const CU_AD_FORMAT_BC5_SNORM: CUarray_format_enum = CUarray_format_enum(154);
-}
-impl CUarray_format_enum {
- pub const CU_AD_FORMAT_BC6H_UF16: CUarray_format_enum = CUarray_format_enum(155);
-}
-impl CUarray_format_enum {
- pub const CU_AD_FORMAT_BC6H_SF16: CUarray_format_enum = CUarray_format_enum(156);
-}
-impl CUarray_format_enum {
- pub const CU_AD_FORMAT_BC7_UNORM: CUarray_format_enum = CUarray_format_enum(157);
-}
-impl CUarray_format_enum {
- pub const CU_AD_FORMAT_BC7_UNORM_SRGB: CUarray_format_enum = CUarray_format_enum(158);
-}
-#[repr(transparent)]
-#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
-pub struct CUarray_format_enum(pub ::std::os::raw::c_uint);
-pub use self::CUarray_format_enum as CUarray_format;
-impl CUaddress_mode_enum {
- pub const CU_TR_ADDRESS_MODE_WRAP: CUaddress_mode_enum = CUaddress_mode_enum(0);
-}
-impl CUaddress_mode_enum {
- pub const CU_TR_ADDRESS_MODE_CLAMP: CUaddress_mode_enum = CUaddress_mode_enum(1);
-}
-impl CUaddress_mode_enum {
- pub const CU_TR_ADDRESS_MODE_MIRROR: CUaddress_mode_enum = CUaddress_mode_enum(2);
-}
-impl CUaddress_mode_enum {
- pub const CU_TR_ADDRESS_MODE_BORDER: CUaddress_mode_enum = CUaddress_mode_enum(3);
-}
-#[repr(transparent)]
-#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
-pub struct CUaddress_mode_enum(pub ::std::os::raw::c_uint);
-pub use self::CUaddress_mode_enum as CUaddress_mode;
-impl CUfilter_mode_enum {
- pub const CU_TR_FILTER_MODE_POINT: CUfilter_mode_enum = CUfilter_mode_enum(0);
-}
-impl CUfilter_mode_enum {
- pub const CU_TR_FILTER_MODE_LINEAR: CUfilter_mode_enum = CUfilter_mode_enum(1);
-}
-#[repr(transparent)]
-#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
-pub struct CUfilter_mode_enum(pub ::std::os::raw::c_uint);
-pub use self::CUfilter_mode_enum as CUfilter_mode;
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK: CUdevice_attribute_enum =
- CUdevice_attribute_enum(1);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X: CUdevice_attribute_enum =
- CUdevice_attribute_enum(2);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y: CUdevice_attribute_enum =
- CUdevice_attribute_enum(3);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z: CUdevice_attribute_enum =
- CUdevice_attribute_enum(4);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X: CUdevice_attribute_enum =
- CUdevice_attribute_enum(5);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y: CUdevice_attribute_enum =
- CUdevice_attribute_enum(6);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z: CUdevice_attribute_enum =
- CUdevice_attribute_enum(7);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK: CUdevice_attribute_enum =
- CUdevice_attribute_enum(8);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK: CUdevice_attribute_enum =
- CUdevice_attribute_enum(8);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY: CUdevice_attribute_enum =
- CUdevice_attribute_enum(9);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_WARP_SIZE: CUdevice_attribute_enum = CUdevice_attribute_enum(10);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_MAX_PITCH: CUdevice_attribute_enum = CUdevice_attribute_enum(11);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK: CUdevice_attribute_enum =
- CUdevice_attribute_enum(12);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_REGISTERS_PER_BLOCK: CUdevice_attribute_enum =
- CUdevice_attribute_enum(12);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_CLOCK_RATE: CUdevice_attribute_enum = CUdevice_attribute_enum(13);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT: CUdevice_attribute_enum =
- CUdevice_attribute_enum(14);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_GPU_OVERLAP: CUdevice_attribute_enum =
- CUdevice_attribute_enum(15);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT: CUdevice_attribute_enum =
- CUdevice_attribute_enum(16);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT: CUdevice_attribute_enum =
- CUdevice_attribute_enum(17);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_INTEGRATED: CUdevice_attribute_enum = CUdevice_attribute_enum(18);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY: CUdevice_attribute_enum =
- CUdevice_attribute_enum(19);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_COMPUTE_MODE: CUdevice_attribute_enum =
- CUdevice_attribute_enum(20);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH: CUdevice_attribute_enum =
- CUdevice_attribute_enum(21);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH: CUdevice_attribute_enum =
- CUdevice_attribute_enum(22);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT: CUdevice_attribute_enum =
- CUdevice_attribute_enum(23);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH: CUdevice_attribute_enum =
- CUdevice_attribute_enum(24);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT: CUdevice_attribute_enum =
- CUdevice_attribute_enum(25);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH: CUdevice_attribute_enum =
- CUdevice_attribute_enum(26);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH: CUdevice_attribute_enum =
- CUdevice_attribute_enum(27);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT: CUdevice_attribute_enum =
- CUdevice_attribute_enum(28);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS: CUdevice_attribute_enum =
- CUdevice_attribute_enum(29);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_WIDTH: CUdevice_attribute_enum =
- CUdevice_attribute_enum(27);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_HEIGHT: CUdevice_attribute_enum =
- CUdevice_attribute_enum(28);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_NUMSLICES: CUdevice_attribute_enum =
- CUdevice_attribute_enum(29);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT: CUdevice_attribute_enum =
- CUdevice_attribute_enum(30);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS: CUdevice_attribute_enum =
- CUdevice_attribute_enum(31);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_ECC_ENABLED: CUdevice_attribute_enum =
- CUdevice_attribute_enum(32);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_PCI_BUS_ID: CUdevice_attribute_enum = CUdevice_attribute_enum(33);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID: CUdevice_attribute_enum =
- CUdevice_attribute_enum(34);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_TCC_DRIVER: CUdevice_attribute_enum = CUdevice_attribute_enum(35);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE: CUdevice_attribute_enum =
- CUdevice_attribute_enum(36);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH: CUdevice_attribute_enum =
- CUdevice_attribute_enum(37);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE: CUdevice_attribute_enum =
- CUdevice_attribute_enum(38);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR: CUdevice_attribute_enum =
- CUdevice_attribute_enum(39);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT: CUdevice_attribute_enum =
- CUdevice_attribute_enum(40);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING: CUdevice_attribute_enum =
- CUdevice_attribute_enum(41);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH: CUdevice_attribute_enum =
- CUdevice_attribute_enum(42);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS: CUdevice_attribute_enum =
- CUdevice_attribute_enum(43);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_CAN_TEX2D_GATHER: CUdevice_attribute_enum =
- CUdevice_attribute_enum(44);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_WIDTH: CUdevice_attribute_enum =
- CUdevice_attribute_enum(45);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_HEIGHT: CUdevice_attribute_enum =
- CUdevice_attribute_enum(46);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE: CUdevice_attribute_enum =
- CUdevice_attribute_enum(47);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE: CUdevice_attribute_enum =
- CUdevice_attribute_enum(48);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE: CUdevice_attribute_enum =
- CUdevice_attribute_enum(49);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID: CUdevice_attribute_enum =
- CUdevice_attribute_enum(50);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT: CUdevice_attribute_enum =
- CUdevice_attribute_enum(51);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_WIDTH: CUdevice_attribute_enum =
- CUdevice_attribute_enum(52);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH: CUdevice_attribute_enum =
- CUdevice_attribute_enum(53);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS: CUdevice_attribute_enum =
- CUdevice_attribute_enum(54);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH: CUdevice_attribute_enum =
- CUdevice_attribute_enum(55);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH: CUdevice_attribute_enum =
- CUdevice_attribute_enum(56);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT: CUdevice_attribute_enum =
- CUdevice_attribute_enum(57);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH: CUdevice_attribute_enum =
- CUdevice_attribute_enum(58);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT: CUdevice_attribute_enum =
- CUdevice_attribute_enum(59);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH: CUdevice_attribute_enum =
- CUdevice_attribute_enum(60);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_WIDTH: CUdevice_attribute_enum =
- CUdevice_attribute_enum(61);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_LAYERS: CUdevice_attribute_enum =
- CUdevice_attribute_enum(62);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_WIDTH: CUdevice_attribute_enum =
- CUdevice_attribute_enum(63);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_HEIGHT: CUdevice_attribute_enum =
- CUdevice_attribute_enum(64);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_LAYERS: CUdevice_attribute_enum =
- CUdevice_attribute_enum(65);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_WIDTH: CUdevice_attribute_enum =
- CUdevice_attribute_enum(66);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH: CUdevice_attribute_enum =
- CUdevice_attribute_enum(67);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS: CUdevice_attribute_enum =
- CUdevice_attribute_enum(68);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH: CUdevice_attribute_enum =
- CUdevice_attribute_enum(69);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH: CUdevice_attribute_enum =
- CUdevice_attribute_enum(70);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT: CUdevice_attribute_enum =
- CUdevice_attribute_enum(71);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH: CUdevice_attribute_enum =
- CUdevice_attribute_enum(72);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH: CUdevice_attribute_enum =
- CUdevice_attribute_enum(73);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT: CUdevice_attribute_enum =
- CUdevice_attribute_enum(74);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR: CUdevice_attribute_enum =
- CUdevice_attribute_enum(75);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR: CUdevice_attribute_enum =
- CUdevice_attribute_enum(76);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH: CUdevice_attribute_enum =
- CUdevice_attribute_enum(77);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_STREAM_PRIORITIES_SUPPORTED: CUdevice_attribute_enum =
- CUdevice_attribute_enum(78);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_GLOBAL_L1_CACHE_SUPPORTED: CUdevice_attribute_enum =
- CUdevice_attribute_enum(79);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_LOCAL_L1_CACHE_SUPPORTED: CUdevice_attribute_enum =
- CUdevice_attribute_enum(80);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR: CUdevice_attribute_enum =
- CUdevice_attribute_enum(81);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR: CUdevice_attribute_enum =
- CUdevice_attribute_enum(82);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY: CUdevice_attribute_enum =
- CUdevice_attribute_enum(83);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD: CUdevice_attribute_enum =
- CUdevice_attribute_enum(84);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD_GROUP_ID: CUdevice_attribute_enum =
- CUdevice_attribute_enum(85);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_HOST_NATIVE_ATOMIC_SUPPORTED: CUdevice_attribute_enum =
- CUdevice_attribute_enum(86);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_SINGLE_TO_DOUBLE_PRECISION_PERF_RATIO: CUdevice_attribute_enum =
- CUdevice_attribute_enum(87);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS: CUdevice_attribute_enum =
- CUdevice_attribute_enum(88);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS: CUdevice_attribute_enum =
- CUdevice_attribute_enum(89);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED: CUdevice_attribute_enum =
- CUdevice_attribute_enum(90);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM: CUdevice_attribute_enum =
- CUdevice_attribute_enum(91);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_MEM_OPS: CUdevice_attribute_enum =
- CUdevice_attribute_enum(92);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_CAN_USE_64_BIT_STREAM_MEM_OPS: CUdevice_attribute_enum =
- CUdevice_attribute_enum(93);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR: CUdevice_attribute_enum =
- CUdevice_attribute_enum(94);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_COOPERATIVE_LAUNCH: CUdevice_attribute_enum =
- CUdevice_attribute_enum(95);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_COOPERATIVE_MULTI_DEVICE_LAUNCH: CUdevice_attribute_enum =
- CUdevice_attribute_enum(96);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN: CUdevice_attribute_enum =
- CUdevice_attribute_enum(97);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_CAN_FLUSH_REMOTE_WRITES: CUdevice_attribute_enum =
- CUdevice_attribute_enum(98);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_HOST_REGISTER_SUPPORTED: CUdevice_attribute_enum =
- CUdevice_attribute_enum(99);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES:
- CUdevice_attribute_enum = CUdevice_attribute_enum(100);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_DIRECT_MANAGED_MEM_ACCESS_FROM_HOST: CUdevice_attribute_enum =
- CUdevice_attribute_enum(101);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_VIRTUAL_ADDRESS_MANAGEMENT_SUPPORTED: CUdevice_attribute_enum =
- CUdevice_attribute_enum(102);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED: CUdevice_attribute_enum =
- CUdevice_attribute_enum(102);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR_SUPPORTED:
- CUdevice_attribute_enum = CUdevice_attribute_enum(103);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_HANDLE_SUPPORTED: CUdevice_attribute_enum =
- CUdevice_attribute_enum(104);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_KMT_HANDLE_SUPPORTED: CUdevice_attribute_enum =
- CUdevice_attribute_enum(105);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_MAX_BLOCKS_PER_MULTIPROCESSOR: CUdevice_attribute_enum =
- CUdevice_attribute_enum(106);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_GENERIC_COMPRESSION_SUPPORTED: CUdevice_attribute_enum =
- CUdevice_attribute_enum(107);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_MAX_PERSISTING_L2_CACHE_SIZE: CUdevice_attribute_enum =
- CUdevice_attribute_enum(108);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_MAX_ACCESS_POLICY_WINDOW_SIZE: CUdevice_attribute_enum =
- CUdevice_attribute_enum(109);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WITH_CUDA_VMM_SUPPORTED: CUdevice_attribute_enum =
- CUdevice_attribute_enum(110);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_RESERVED_SHARED_MEMORY_PER_BLOCK: CUdevice_attribute_enum =
- CUdevice_attribute_enum(111);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_SPARSE_CUDA_ARRAY_SUPPORTED: CUdevice_attribute_enum =
- CUdevice_attribute_enum(112);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_READ_ONLY_HOST_REGISTER_SUPPORTED: CUdevice_attribute_enum =
- CUdevice_attribute_enum(113);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_TIMELINE_SEMAPHORE_INTEROP_SUPPORTED: CUdevice_attribute_enum =
- CUdevice_attribute_enum(114);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_MEMORY_POOLS_SUPPORTED: CUdevice_attribute_enum =
- CUdevice_attribute_enum(115);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_SUPPORTED: CUdevice_attribute_enum =
- CUdevice_attribute_enum(116);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_FLUSH_WRITES_OPTIONS: CUdevice_attribute_enum =
- CUdevice_attribute_enum(117);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WRITES_ORDERING: CUdevice_attribute_enum =
- CUdevice_attribute_enum(118);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_MEMPOOL_SUPPORTED_HANDLE_TYPES: CUdevice_attribute_enum =
- CUdevice_attribute_enum(119);
-}
-impl CUdevice_attribute_enum {
- pub const CU_DEVICE_ATTRIBUTE_MAX: CUdevice_attribute_enum = CUdevice_attribute_enum(120);
-}
-#[repr(transparent)]
-#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
-pub struct CUdevice_attribute_enum(pub ::std::os::raw::c_uint);
-pub use self::CUdevice_attribute_enum as CUdevice_attribute;
-#[repr(C)]
-#[derive(Copy, Clone, PartialEq, Eq)]
-pub struct CUdevprop_st {
- pub maxThreadsPerBlock: ::std::os::raw::c_int,
- pub maxThreadsDim: [::std::os::raw::c_int; 3usize],
- pub maxGridSize: [::std::os::raw::c_int; 3usize],
- pub sharedMemPerBlock: ::std::os::raw::c_int,
- pub totalConstantMemory: ::std::os::raw::c_int,
- pub SIMDWidth: ::std::os::raw::c_int,
- pub memPitch: ::std::os::raw::c_int,
- pub regsPerBlock: ::std::os::raw::c_int,
- pub clockRate: ::std::os::raw::c_int,
- pub textureAlign: ::std::os::raw::c_int,
-}
-pub type CUdevprop_v1 = CUdevprop_st;
-pub type CUdevprop = CUdevprop_v1;
-impl CUpointer_attribute_enum {
- pub const CU_POINTER_ATTRIBUTE_CONTEXT: CUpointer_attribute_enum = CUpointer_attribute_enum(1);
-}
-impl CUpointer_attribute_enum {
- pub const CU_POINTER_ATTRIBUTE_MEMORY_TYPE: CUpointer_attribute_enum =
- CUpointer_attribute_enum(2);
-}
-impl CUpointer_attribute_enum {
- pub const CU_POINTER_ATTRIBUTE_DEVICE_POINTER: CUpointer_attribute_enum =
- CUpointer_attribute_enum(3);
-}
-impl CUpointer_attribute_enum {
- pub const CU_POINTER_ATTRIBUTE_HOST_POINTER: CUpointer_attribute_enum =
- CUpointer_attribute_enum(4);
-}
-impl CUpointer_attribute_enum {
- pub const CU_POINTER_ATTRIBUTE_P2P_TOKENS: CUpointer_attribute_enum =
- CUpointer_attribute_enum(5);
-}
-impl CUpointer_attribute_enum {
- pub const CU_POINTER_ATTRIBUTE_SYNC_MEMOPS: CUpointer_attribute_enum =
- CUpointer_attribute_enum(6);
-}
-impl CUpointer_attribute_enum {
- pub const CU_POINTER_ATTRIBUTE_BUFFER_ID: CUpointer_attribute_enum =
- CUpointer_attribute_enum(7);
-}
-impl CUpointer_attribute_enum {
- pub const CU_POINTER_ATTRIBUTE_IS_MANAGED: CUpointer_attribute_enum =
- CUpointer_attribute_enum(8);
-}
-impl CUpointer_attribute_enum {
- pub const CU_POINTER_ATTRIBUTE_DEVICE_ORDINAL: CUpointer_attribute_enum =
- CUpointer_attribute_enum(9);
-}
-impl CUpointer_attribute_enum {
- pub const CU_POINTER_ATTRIBUTE_IS_LEGACY_CUDA_IPC_CAPABLE: CUpointer_attribute_enum =
- CUpointer_attribute_enum(10);
-}
-impl CUpointer_attribute_enum {
- pub const CU_POINTER_ATTRIBUTE_RANGE_START_ADDR: CUpointer_attribute_enum =
- CUpointer_attribute_enum(11);
-}
-impl CUpointer_attribute_enum {
- pub const CU_POINTER_ATTRIBUTE_RANGE_SIZE: CUpointer_attribute_enum =
- CUpointer_attribute_enum(12);
-}
-impl CUpointer_attribute_enum {
- pub const CU_POINTER_ATTRIBUTE_MAPPED: CUpointer_attribute_enum = CUpointer_attribute_enum(13);
-}
-impl CUpointer_attribute_enum {
- pub const CU_POINTER_ATTRIBUTE_ALLOWED_HANDLE_TYPES: CUpointer_attribute_enum =
- CUpointer_attribute_enum(14);
-}
-impl CUpointer_attribute_enum {
- pub const CU_POINTER_ATTRIBUTE_IS_GPU_DIRECT_RDMA_CAPABLE: CUpointer_attribute_enum =
- CUpointer_attribute_enum(15);
-}
-impl CUpointer_attribute_enum {
- pub const CU_POINTER_ATTRIBUTE_ACCESS_FLAGS: CUpointer_attribute_enum =
- CUpointer_attribute_enum(16);
-}
-impl CUpointer_attribute_enum {
- pub const CU_POINTER_ATTRIBUTE_MEMPOOL_HANDLE: CUpointer_attribute_enum =
- CUpointer_attribute_enum(17);
-}
-#[repr(transparent)]
-#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
-pub struct CUpointer_attribute_enum(pub ::std::os::raw::c_uint);
-pub use self::CUpointer_attribute_enum as CUpointer_attribute;
-impl CUfunction_attribute_enum {
- pub const CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK: CUfunction_attribute_enum =
- CUfunction_attribute_enum(0);
-}
-impl CUfunction_attribute_enum {
- pub const CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES: CUfunction_attribute_enum =
- CUfunction_attribute_enum(1);
-}
-impl CUfunction_attribute_enum {
- pub const CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES: CUfunction_attribute_enum =
- CUfunction_attribute_enum(2);
-}
-impl CUfunction_attribute_enum {
- pub const CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES: CUfunction_attribute_enum =
- CUfunction_attribute_enum(3);
-}
-impl CUfunction_attribute_enum {
- pub const CU_FUNC_ATTRIBUTE_NUM_REGS: CUfunction_attribute_enum = CUfunction_attribute_enum(4);
-}
-impl CUfunction_attribute_enum {
- pub const CU_FUNC_ATTRIBUTE_PTX_VERSION: CUfunction_attribute_enum =
- CUfunction_attribute_enum(5);
-}
-impl CUfunction_attribute_enum {
- pub const CU_FUNC_ATTRIBUTE_BINARY_VERSION: CUfunction_attribute_enum =
- CUfunction_attribute_enum(6);
-}
-impl CUfunction_attribute_enum {
- pub const CU_FUNC_ATTRIBUTE_CACHE_MODE_CA: CUfunction_attribute_enum =
- CUfunction_attribute_enum(7);
-}
-impl CUfunction_attribute_enum {
- pub const CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES: CUfunction_attribute_enum =
- CUfunction_attribute_enum(8);
-}
-impl CUfunction_attribute_enum {
- pub const CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT: CUfunction_attribute_enum =
- CUfunction_attribute_enum(9);
-}
-impl CUfunction_attribute_enum {
- pub const CU_FUNC_ATTRIBUTE_MAX: CUfunction_attribute_enum = CUfunction_attribute_enum(10);
-}
-#[repr(transparent)]
-#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
-pub struct CUfunction_attribute_enum(pub ::std::os::raw::c_uint);
-pub use self::CUfunction_attribute_enum as CUfunction_attribute;
-impl CUfunc_cache_enum {
- pub const CU_FUNC_CACHE_PREFER_NONE: CUfunc_cache_enum = CUfunc_cache_enum(0);
-}
-impl CUfunc_cache_enum {
- pub const CU_FUNC_CACHE_PREFER_SHARED: CUfunc_cache_enum = CUfunc_cache_enum(1);
-}
-impl CUfunc_cache_enum {
- pub const CU_FUNC_CACHE_PREFER_L1: CUfunc_cache_enum = CUfunc_cache_enum(2);
-}
-impl CUfunc_cache_enum {
- pub const CU_FUNC_CACHE_PREFER_EQUAL: CUfunc_cache_enum = CUfunc_cache_enum(3);
-}
-#[repr(transparent)]
-#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
-pub struct CUfunc_cache_enum(pub ::std::os::raw::c_uint);
-pub use self::CUfunc_cache_enum as CUfunc_cache;
-impl CUsharedconfig_enum {
- pub const CU_SHARED_MEM_CONFIG_DEFAULT_BANK_SIZE: CUsharedconfig_enum = CUsharedconfig_enum(0);
-}
-impl CUsharedconfig_enum {
- pub const CU_SHARED_MEM_CONFIG_FOUR_BYTE_BANK_SIZE: CUsharedconfig_enum =
- CUsharedconfig_enum(1);
-}
-impl CUsharedconfig_enum {
- pub const CU_SHARED_MEM_CONFIG_EIGHT_BYTE_BANK_SIZE: CUsharedconfig_enum =
- CUsharedconfig_enum(2);
-}
-#[repr(transparent)]
-#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
-pub struct CUsharedconfig_enum(pub ::std::os::raw::c_uint);
-pub use self::CUsharedconfig_enum as CUsharedconfig;
-impl CUmemorytype_enum {
- pub const CU_MEMORYTYPE_HOST: CUmemorytype_enum = CUmemorytype_enum(1);
-}
-impl CUmemorytype_enum {
- pub const CU_MEMORYTYPE_DEVICE: CUmemorytype_enum = CUmemorytype_enum(2);
-}
-impl CUmemorytype_enum {
- pub const CU_MEMORYTYPE_ARRAY: CUmemorytype_enum = CUmemorytype_enum(3);
-}
-impl CUmemorytype_enum {
- pub const CU_MEMORYTYPE_UNIFIED: CUmemorytype_enum = CUmemorytype_enum(4);
-}
-#[repr(transparent)]
-#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
-pub struct CUmemorytype_enum(pub ::std::os::raw::c_uint);
-pub use self::CUmemorytype_enum as CUmemorytype;
-impl CUmem_advise_enum {
- pub const CU_MEM_ADVISE_SET_READ_MOSTLY: CUmem_advise_enum = CUmem_advise_enum(1);
-}
-impl CUmem_advise_enum {
- pub const CU_MEM_ADVISE_UNSET_READ_MOSTLY: CUmem_advise_enum = CUmem_advise_enum(2);
-}
-impl CUmem_advise_enum {
- pub const CU_MEM_ADVISE_SET_PREFERRED_LOCATION: CUmem_advise_enum = CUmem_advise_enum(3);
-}
-impl CUmem_advise_enum {
- pub const CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION: CUmem_advise_enum = CUmem_advise_enum(4);
-}
-impl CUmem_advise_enum {
- pub const CU_MEM_ADVISE_SET_ACCESSED_BY: CUmem_advise_enum = CUmem_advise_enum(5);
-}
-impl CUmem_advise_enum {
- pub const CU_MEM_ADVISE_UNSET_ACCESSED_BY: CUmem_advise_enum = CUmem_advise_enum(6);
-}
-#[repr(transparent)]
-#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
-pub struct CUmem_advise_enum(pub ::std::os::raw::c_uint);
-pub use self::CUmem_advise_enum as CUmem_advise;
-impl CUmem_range_attribute_enum {
- pub const CU_MEM_RANGE_ATTRIBUTE_READ_MOSTLY: CUmem_range_attribute_enum =
- CUmem_range_attribute_enum(1);
-}
-impl CUmem_range_attribute_enum {
- pub const CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION: CUmem_range_attribute_enum =
- CUmem_range_attribute_enum(2);
-}
-impl CUmem_range_attribute_enum {
- pub const CU_MEM_RANGE_ATTRIBUTE_ACCESSED_BY: CUmem_range_attribute_enum =
- CUmem_range_attribute_enum(3);
-}
-impl CUmem_range_attribute_enum {
- pub const CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION: CUmem_range_attribute_enum =
- CUmem_range_attribute_enum(4);
-}
-#[repr(transparent)]
-#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
-pub struct CUmem_range_attribute_enum(pub ::std::os::raw::c_uint);
-pub use self::CUmem_range_attribute_enum as CUmem_range_attribute;
-impl CUjit_option_enum {
- pub const CU_JIT_MAX_REGISTERS: CUjit_option_enum = CUjit_option_enum(0);
-}
-impl CUjit_option_enum {
- pub const CU_JIT_THREADS_PER_BLOCK: CUjit_option_enum = CUjit_option_enum(1);
-}
-impl CUjit_option_enum {
- pub const CU_JIT_WALL_TIME: CUjit_option_enum = CUjit_option_enum(2);
-}
-impl CUjit_option_enum {
- pub const CU_JIT_INFO_LOG_BUFFER: CUjit_option_enum = CUjit_option_enum(3);
-}
-impl CUjit_option_enum {
- pub const CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES: CUjit_option_enum = CUjit_option_enum(4);
-}
-impl CUjit_option_enum {
- pub const CU_JIT_ERROR_LOG_BUFFER: CUjit_option_enum = CUjit_option_enum(5);
-}
-impl CUjit_option_enum {
- pub const CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES: CUjit_option_enum = CUjit_option_enum(6);
-}
-impl CUjit_option_enum {
- pub const CU_JIT_OPTIMIZATION_LEVEL: CUjit_option_enum = CUjit_option_enum(7);
-}
-impl CUjit_option_enum {
- pub const CU_JIT_TARGET_FROM_CUCONTEXT: CUjit_option_enum = CUjit_option_enum(8);
-}
-impl CUjit_option_enum {
- pub const CU_JIT_TARGET: CUjit_option_enum = CUjit_option_enum(9);
-}
-impl CUjit_option_enum {
- pub const CU_JIT_FALLBACK_STRATEGY: CUjit_option_enum = CUjit_option_enum(10);
-}
-impl CUjit_option_enum {
- pub const CU_JIT_GENERATE_DEBUG_INFO: CUjit_option_enum = CUjit_option_enum(11);
-}
-impl CUjit_option_enum {
- pub const CU_JIT_LOG_VERBOSE: CUjit_option_enum = CUjit_option_enum(12);
-}
-impl CUjit_option_enum {
- pub const CU_JIT_GENERATE_LINE_INFO: CUjit_option_enum = CUjit_option_enum(13);
-}
-impl CUjit_option_enum {
- pub const CU_JIT_CACHE_MODE: CUjit_option_enum = CUjit_option_enum(14);
-}
-impl CUjit_option_enum {
- pub const CU_JIT_NEW_SM3X_OPT: CUjit_option_enum = CUjit_option_enum(15);
-}
-impl CUjit_option_enum {
- pub const CU_JIT_FAST_COMPILE: CUjit_option_enum = CUjit_option_enum(16);
-}
-impl CUjit_option_enum {
- pub const CU_JIT_GLOBAL_SYMBOL_NAMES: CUjit_option_enum = CUjit_option_enum(17);
-}
-impl CUjit_option_enum {
- pub const CU_JIT_GLOBAL_SYMBOL_ADDRESSES: CUjit_option_enum = CUjit_option_enum(18);
-}
-impl CUjit_option_enum {
- pub const CU_JIT_GLOBAL_SYMBOL_COUNT: CUjit_option_enum = CUjit_option_enum(19);
-}
-impl CUjit_option_enum {
- pub const CU_JIT_LTO: CUjit_option_enum = CUjit_option_enum(20);
-}
-impl CUjit_option_enum {
- pub const CU_JIT_FTZ: CUjit_option_enum = CUjit_option_enum(21);
-}
-impl CUjit_option_enum {
- pub const CU_JIT_PREC_DIV: CUjit_option_enum = CUjit_option_enum(22);
-}
-impl CUjit_option_enum {
- pub const CU_JIT_PREC_SQRT: CUjit_option_enum = CUjit_option_enum(23);
-}
-impl CUjit_option_enum {
- pub const CU_JIT_FMA: CUjit_option_enum = CUjit_option_enum(24);
-}
-impl CUjit_option_enum {
- pub const CU_JIT_NUM_OPTIONS: CUjit_option_enum = CUjit_option_enum(25);
-}
-#[repr(transparent)]
-#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
-pub struct CUjit_option_enum(pub ::std::os::raw::c_uint);
-pub use self::CUjit_option_enum as CUjit_option;
-impl CUjitInputType_enum {
- pub const CU_JIT_INPUT_CUBIN: CUjitInputType_enum = CUjitInputType_enum(0);
-}
-impl CUjitInputType_enum {
- pub const CU_JIT_INPUT_PTX: CUjitInputType_enum = CUjitInputType_enum(1);
-}
-impl CUjitInputType_enum {
- pub const CU_JIT_INPUT_FATBINARY: CUjitInputType_enum = CUjitInputType_enum(2);
-}
-impl CUjitInputType_enum {
- pub const CU_JIT_INPUT_OBJECT: CUjitInputType_enum = CUjitInputType_enum(3);
-}
-impl CUjitInputType_enum {
- pub const CU_JIT_INPUT_LIBRARY: CUjitInputType_enum = CUjitInputType_enum(4);
-}
-impl CUjitInputType_enum {
- pub const CU_JIT_INPUT_NVVM: CUjitInputType_enum = CUjitInputType_enum(5);
-}
-impl CUjitInputType_enum {
- pub const CU_JIT_NUM_INPUT_TYPES: CUjitInputType_enum = CUjitInputType_enum(6);
-}
-#[repr(transparent)]
-#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
-pub struct CUjitInputType_enum(pub ::std::os::raw::c_uint);
-pub use self::CUjitInputType_enum as CUjitInputType;
-#[repr(C)]
-#[derive(Copy, Clone)]
-pub struct CUlinkState_st {
- _unused: [u8; 0],
-}
-pub type CUlinkState = *mut CUlinkState_st;
-impl CUlimit_enum {
- pub const CU_LIMIT_STACK_SIZE: CUlimit_enum = CUlimit_enum(0);
-}
-impl CUlimit_enum {
- pub const CU_LIMIT_PRINTF_FIFO_SIZE: CUlimit_enum = CUlimit_enum(1);
-}
-impl CUlimit_enum {
- pub const CU_LIMIT_MALLOC_HEAP_SIZE: CUlimit_enum = CUlimit_enum(2);
-}
-impl CUlimit_enum {
- pub const CU_LIMIT_DEV_RUNTIME_SYNC_DEPTH: CUlimit_enum = CUlimit_enum(3);
-}
-impl CUlimit_enum {
- pub const CU_LIMIT_DEV_RUNTIME_PENDING_LAUNCH_COUNT: CUlimit_enum = CUlimit_enum(4);
-}
-impl CUlimit_enum {
- pub const CU_LIMIT_MAX_L2_FETCH_GRANULARITY: CUlimit_enum = CUlimit_enum(5);
-}
-impl CUlimit_enum {
- pub const CU_LIMIT_PERSISTING_L2_CACHE_SIZE: CUlimit_enum = CUlimit_enum(6);
-}
-impl CUlimit_enum {
- pub const CU_LIMIT_MAX: CUlimit_enum = CUlimit_enum(7);
-}
-#[repr(transparent)]
-#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
-pub struct CUlimit_enum(pub ::std::os::raw::c_uint);
-pub use self::CUlimit_enum as CUlimit;
-impl CUresourcetype_enum {
- pub const CU_RESOURCE_TYPE_ARRAY: CUresourcetype_enum = CUresourcetype_enum(0);
-}
-impl CUresourcetype_enum {
- pub const CU_RESOURCE_TYPE_MIPMAPPED_ARRAY: CUresourcetype_enum = CUresourcetype_enum(1);
-}
-impl CUresourcetype_enum {
- pub const CU_RESOURCE_TYPE_LINEAR: CUresourcetype_enum = CUresourcetype_enum(2);
-}
-impl CUresourcetype_enum {
- pub const CU_RESOURCE_TYPE_PITCH2D: CUresourcetype_enum = CUresourcetype_enum(3);
-}
-#[repr(transparent)]
-#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
-pub struct CUresourcetype_enum(pub ::std::os::raw::c_uint);
-pub use self::CUresourcetype_enum as CUresourcetype;
-pub type CUhostFn =
- ::std::option::Option<unsafe extern "C" fn(userData: *mut ::std::os::raw::c_void)>;
-impl CUaccessProperty_enum {
- pub const CU_ACCESS_PROPERTY_NORMAL: CUaccessProperty_enum = CUaccessProperty_enum(0);
-}
-impl CUaccessProperty_enum {
- pub const CU_ACCESS_PROPERTY_STREAMING: CUaccessProperty_enum = CUaccessProperty_enum(1);
-}
-impl CUaccessProperty_enum {
- pub const CU_ACCESS_PROPERTY_PERSISTING: CUaccessProperty_enum = CUaccessProperty_enum(2);
-}
-#[repr(transparent)]
-#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
-pub struct CUaccessProperty_enum(pub ::std::os::raw::c_uint);
-pub use self::CUaccessProperty_enum as CUaccessProperty;
-#[repr(C)]
-#[derive(Copy, Clone, PartialEq)]
-pub struct CUaccessPolicyWindow_st {
- pub base_ptr: *mut ::std::os::raw::c_void,
- pub num_bytes: usize,
- pub hitRatio: f32,
- pub hitProp: CUaccessProperty,
- pub missProp: CUaccessProperty,
-}
-pub type CUaccessPolicyWindow_v1 = CUaccessPolicyWindow_st;
-pub type CUaccessPolicyWindow = CUaccessPolicyWindow_v1;
-#[repr(C)]
-#[derive(Copy, Clone, PartialEq, Eq)]
-pub struct CUDA_KERNEL_NODE_PARAMS_st {
- pub func: CUfunction,
- pub gridDimX: ::std::os::raw::c_uint,
- pub gridDimY: ::std::os::raw::c_uint,
- pub gridDimZ: ::std::os::raw::c_uint,
- pub blockDimX: ::std::os::raw::c_uint,
- pub blockDimY: ::std::os::raw::c_uint,
- pub blockDimZ: ::std::os::raw::c_uint,
- pub sharedMemBytes: ::std::os::raw::c_uint,
- pub kernelParams: *mut *mut ::std::os::raw::c_void,
- pub extra: *mut *mut ::std::os::raw::c_void,
-}
-pub type CUDA_KERNEL_NODE_PARAMS_v1 = CUDA_KERNEL_NODE_PARAMS_st;
-pub type CUDA_KERNEL_NODE_PARAMS = CUDA_KERNEL_NODE_PARAMS_v1;
-#[repr(C)]
-#[derive(Copy, Clone, PartialEq, Eq)]
-pub struct CUDA_MEMSET_NODE_PARAMS_st {
- pub dst: CUdeviceptr,
- pub pitch: usize,
- pub value: ::std::os::raw::c_uint,
- pub elementSize: ::std::os::raw::c_uint,
- pub width: usize,
- pub height: usize,
-}
-pub type CUDA_MEMSET_NODE_PARAMS_v1 = CUDA_MEMSET_NODE_PARAMS_st;
-pub type CUDA_MEMSET_NODE_PARAMS = CUDA_MEMSET_NODE_PARAMS_v1;
-#[repr(C)]
-#[derive(Copy, Clone)]
-pub struct CUDA_HOST_NODE_PARAMS_st {
- pub fn_: CUhostFn,
- pub userData: *mut ::std::os::raw::c_void,
-}
-pub type CUDA_HOST_NODE_PARAMS_v1 = CUDA_HOST_NODE_PARAMS_st;
-pub type CUDA_HOST_NODE_PARAMS = CUDA_HOST_NODE_PARAMS_v1;
-impl CUgraphNodeType_enum {
- pub const CU_GRAPH_NODE_TYPE_KERNEL: CUgraphNodeType_enum = CUgraphNodeType_enum(0);
-}
-impl CUgraphNodeType_enum {
- pub const CU_GRAPH_NODE_TYPE_MEMCPY: CUgraphNodeType_enum = CUgraphNodeType_enum(1);
-}
-impl CUgraphNodeType_enum {
- pub const CU_GRAPH_NODE_TYPE_MEMSET: CUgraphNodeType_enum = CUgraphNodeType_enum(2);
-}
-impl CUgraphNodeType_enum {
- pub const CU_GRAPH_NODE_TYPE_HOST: CUgraphNodeType_enum = CUgraphNodeType_enum(3);
-}
-impl CUgraphNodeType_enum {
- pub const CU_GRAPH_NODE_TYPE_GRAPH: CUgraphNodeType_enum = CUgraphNodeType_enum(4);
-}
-impl CUgraphNodeType_enum {
- pub const CU_GRAPH_NODE_TYPE_EMPTY: CUgraphNodeType_enum = CUgraphNodeType_enum(5);
-}
-impl CUgraphNodeType_enum {
- pub const CU_GRAPH_NODE_TYPE_WAIT_EVENT: CUgraphNodeType_enum = CUgraphNodeType_enum(6);
-}
-impl CUgraphNodeType_enum {
- pub const CU_GRAPH_NODE_TYPE_EVENT_RECORD: CUgraphNodeType_enum = CUgraphNodeType_enum(7);
-}
-impl CUgraphNodeType_enum {
- pub const CU_GRAPH_NODE_TYPE_EXT_SEMAS_SIGNAL: CUgraphNodeType_enum = CUgraphNodeType_enum(8);
-}
-impl CUgraphNodeType_enum {
- pub const CU_GRAPH_NODE_TYPE_EXT_SEMAS_WAIT: CUgraphNodeType_enum = CUgraphNodeType_enum(9);
-}
-impl CUgraphNodeType_enum {
- pub const CU_GRAPH_NODE_TYPE_MEM_ALLOC: CUgraphNodeType_enum = CUgraphNodeType_enum(10);
-}
-impl CUgraphNodeType_enum {
- pub const CU_GRAPH_NODE_TYPE_MEM_FREE: CUgraphNodeType_enum = CUgraphNodeType_enum(11);
-}
-#[repr(transparent)]
-#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
-pub struct CUgraphNodeType_enum(pub ::std::os::raw::c_uint);
-pub use self::CUgraphNodeType_enum as CUgraphNodeType;
-impl CUsynchronizationPolicy_enum {
- pub const CU_SYNC_POLICY_AUTO: CUsynchronizationPolicy_enum = CUsynchronizationPolicy_enum(1);
-}
-impl CUsynchronizationPolicy_enum {
- pub const CU_SYNC_POLICY_SPIN: CUsynchronizationPolicy_enum = CUsynchronizationPolicy_enum(2);
-}
-impl CUsynchronizationPolicy_enum {
- pub const CU_SYNC_POLICY_YIELD: CUsynchronizationPolicy_enum = CUsynchronizationPolicy_enum(3);
-}
-impl CUsynchronizationPolicy_enum {
- pub const CU_SYNC_POLICY_BLOCKING_SYNC: CUsynchronizationPolicy_enum =
- CUsynchronizationPolicy_enum(4);
-}
-#[repr(transparent)]
-#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
-pub struct CUsynchronizationPolicy_enum(pub ::std::os::raw::c_uint);
-pub use self::CUsynchronizationPolicy_enum as CUsynchronizationPolicy;
-impl CUkernelNodeAttrID_enum {
- pub const CU_KERNEL_NODE_ATTRIBUTE_ACCESS_POLICY_WINDOW: CUkernelNodeAttrID_enum =
- CUkernelNodeAttrID_enum(1);
-}
-impl CUkernelNodeAttrID_enum {
- pub const CU_KERNEL_NODE_ATTRIBUTE_COOPERATIVE: CUkernelNodeAttrID_enum =
- CUkernelNodeAttrID_enum(2);
-}
-#[repr(transparent)]
-#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
-pub struct CUkernelNodeAttrID_enum(pub ::std::os::raw::c_uint);
-pub use self::CUkernelNodeAttrID_enum as CUkernelNodeAttrID;
-#[repr(C)]
-#[derive(Copy, Clone)]
-pub union CUkernelNodeAttrValue_union {
- pub accessPolicyWindow: CUaccessPolicyWindow,
- pub cooperative: ::std::os::raw::c_int,
-}
-pub type CUkernelNodeAttrValue_v1 = CUkernelNodeAttrValue_union;
-pub type CUkernelNodeAttrValue = CUkernelNodeAttrValue_v1;
-impl CUstreamCaptureStatus_enum {
- pub const CU_STREAM_CAPTURE_STATUS_NONE: CUstreamCaptureStatus_enum =
- CUstreamCaptureStatus_enum(0);
-}
-impl CUstreamCaptureStatus_enum {
- pub const CU_STREAM_CAPTURE_STATUS_ACTIVE: CUstreamCaptureStatus_enum =
- CUstreamCaptureStatus_enum(1);
-}
-impl CUstreamCaptureStatus_enum {
- pub const CU_STREAM_CAPTURE_STATUS_INVALIDATED: CUstreamCaptureStatus_enum =
- CUstreamCaptureStatus_enum(2);
-}
-#[repr(transparent)]
-#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
-pub struct CUstreamCaptureStatus_enum(pub ::std::os::raw::c_uint);
-pub use self::CUstreamCaptureStatus_enum as CUstreamCaptureStatus;
-impl CUstreamCaptureMode_enum {
- pub const CU_STREAM_CAPTURE_MODE_GLOBAL: CUstreamCaptureMode_enum = CUstreamCaptureMode_enum(0);
-}
-impl CUstreamCaptureMode_enum {
- pub const CU_STREAM_CAPTURE_MODE_THREAD_LOCAL: CUstreamCaptureMode_enum =
- CUstreamCaptureMode_enum(1);
-}
-impl CUstreamCaptureMode_enum {
- pub const CU_STREAM_CAPTURE_MODE_RELAXED: CUstreamCaptureMode_enum =
- CUstreamCaptureMode_enum(2);
-}
-#[repr(transparent)]
-#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
-pub struct CUstreamCaptureMode_enum(pub ::std::os::raw::c_uint);
-pub use self::CUstreamCaptureMode_enum as CUstreamCaptureMode;
-impl CUstreamAttrID_enum {
- pub const CU_STREAM_ATTRIBUTE_ACCESS_POLICY_WINDOW: CUstreamAttrID_enum =
- CUstreamAttrID_enum(1);
-}
-impl CUstreamAttrID_enum {
- pub const CU_STREAM_ATTRIBUTE_SYNCHRONIZATION_POLICY: CUstreamAttrID_enum =
- CUstreamAttrID_enum(3);
-}
-#[repr(transparent)]
-#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
-pub struct CUstreamAttrID_enum(pub ::std::os::raw::c_uint);
-pub use self::CUstreamAttrID_enum as CUstreamAttrID;
-#[repr(C)]
-#[derive(Copy, Clone)]
-pub union CUstreamAttrValue_union {
- pub accessPolicyWindow: CUaccessPolicyWindow,
- pub syncPolicy: CUsynchronizationPolicy,
-}
-pub type CUstreamAttrValue_v1 = CUstreamAttrValue_union;
-pub type CUstreamAttrValue = CUstreamAttrValue_v1;
-impl CUexecAffinityType_enum {
- pub const CU_EXEC_AFFINITY_TYPE_SM_COUNT: CUexecAffinityType_enum = CUexecAffinityType_enum(0);
-}
-impl CUexecAffinityType_enum {
- pub const CU_EXEC_AFFINITY_TYPE_MAX: CUexecAffinityType_enum = CUexecAffinityType_enum(1);
-}
-#[repr(transparent)]
-#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
-pub struct CUexecAffinityType_enum(pub ::std::os::raw::c_uint);
-pub use self::CUexecAffinityType_enum as CUexecAffinityType;
-#[repr(C)]
-#[derive(Copy, Clone, PartialEq, Eq)]
-pub struct CUexecAffinitySmCount_st {
- pub val: ::std::os::raw::c_uint,
-}
-pub type CUexecAffinitySmCount_v1 = CUexecAffinitySmCount_st;
-pub type CUexecAffinitySmCount = CUexecAffinitySmCount_v1;
-#[repr(C)]
-#[derive(Copy, Clone)]
-pub struct CUexecAffinityParam_st {
- pub type_: CUexecAffinityType,
- pub param: CUexecAffinityParam_st__bindgen_ty_1,
-}
-#[repr(C)]
-#[derive(Copy, Clone)]
-pub union CUexecAffinityParam_st__bindgen_ty_1 {
- pub smCount: CUexecAffinitySmCount,
-}
-pub type CUexecAffinityParam_v1 = CUexecAffinityParam_st;
-pub type CUexecAffinityParam = CUexecAffinityParam_v1;
-impl cudaError_enum {
- pub const CUDA_SUCCESS: cudaError_enum = cudaError_enum(0);
-}
-impl cudaError_enum {
- pub const CUDA_ERROR_INVALID_VALUE: cudaError_enum = cudaError_enum(1);
-}
-impl cudaError_enum {
- pub const CUDA_ERROR_OUT_OF_MEMORY: cudaError_enum = cudaError_enum(2);
-}
-impl cudaError_enum {
- pub const CUDA_ERROR_NOT_INITIALIZED: cudaError_enum = cudaError_enum(3);
-}
-impl cudaError_enum {
- pub const CUDA_ERROR_DEINITIALIZED: cudaError_enum = cudaError_enum(4);
-}
-impl cudaError_enum {
- pub const CUDA_ERROR_PROFILER_DISABLED: cudaError_enum = cudaError_enum(5);
-}
-impl cudaError_enum {
- pub const CUDA_ERROR_PROFILER_NOT_INITIALIZED: cudaError_enum = cudaError_enum(6);
-}
-impl cudaError_enum {
- pub const CUDA_ERROR_PROFILER_ALREADY_STARTED: cudaError_enum = cudaError_enum(7);
-}
-impl cudaError_enum {
- pub const CUDA_ERROR_PROFILER_ALREADY_STOPPED: cudaError_enum = cudaError_enum(8);
-}
-impl cudaError_enum {
- pub const CUDA_ERROR_STUB_LIBRARY: cudaError_enum = cudaError_enum(34);
-}
-impl cudaError_enum {
- pub const CUDA_ERROR_NO_DEVICE: cudaError_enum = cudaError_enum(100);
-}
-impl cudaError_enum {
- pub const CUDA_ERROR_INVALID_DEVICE: cudaError_enum = cudaError_enum(101);
-}
-impl cudaError_enum {
- pub const CUDA_ERROR_DEVICE_NOT_LICENSED: cudaError_enum = cudaError_enum(102);
-}
-impl cudaError_enum {
- pub const CUDA_ERROR_INVALID_IMAGE: cudaError_enum = cudaError_enum(200);
-}
-impl cudaError_enum {
- pub const CUDA_ERROR_INVALID_CONTEXT: cudaError_enum = cudaError_enum(201);
-}
-impl cudaError_enum {
- pub const CUDA_ERROR_CONTEXT_ALREADY_CURRENT: cudaError_enum = cudaError_enum(202);
-}
-impl cudaError_enum {
- pub const CUDA_ERROR_MAP_FAILED: cudaError_enum = cudaError_enum(205);
-}
-impl cudaError_enum {
- pub const CUDA_ERROR_UNMAP_FAILED: cudaError_enum = cudaError_enum(206);
-}
-impl cudaError_enum {
- pub const CUDA_ERROR_ARRAY_IS_MAPPED: cudaError_enum = cudaError_enum(207);
-}
-impl cudaError_enum {
- pub const CUDA_ERROR_ALREADY_MAPPED: cudaError_enum = cudaError_enum(208);
-}
-impl cudaError_enum {
- pub const CUDA_ERROR_NO_BINARY_FOR_GPU: cudaError_enum = cudaError_enum(209);
-}
-impl cudaError_enum {
- pub const CUDA_ERROR_ALREADY_ACQUIRED: cudaError_enum = cudaError_enum(210);
-}
-impl cudaError_enum {
- pub const CUDA_ERROR_NOT_MAPPED: cudaError_enum = cudaError_enum(211);
-}
-impl cudaError_enum {
- pub const CUDA_ERROR_NOT_MAPPED_AS_ARRAY: cudaError_enum = cudaError_enum(212);
-}
-impl cudaError_enum {
- pub const CUDA_ERROR_NOT_MAPPED_AS_POINTER: cudaError_enum = cudaError_enum(213);
-}
-impl cudaError_enum {
- pub const CUDA_ERROR_ECC_UNCORRECTABLE: cudaError_enum = cudaError_enum(214);
-}
-impl cudaError_enum {
- pub const CUDA_ERROR_UNSUPPORTED_LIMIT: cudaError_enum = cudaError_enum(215);
-}
-impl cudaError_enum {
- pub const CUDA_ERROR_CONTEXT_ALREADY_IN_USE: cudaError_enum = cudaError_enum(216);
-}
-impl cudaError_enum {
- pub const CUDA_ERROR_PEER_ACCESS_UNSUPPORTED: cudaError_enum = cudaError_enum(217);
-}
-impl cudaError_enum {
- pub const CUDA_ERROR_INVALID_PTX: cudaError_enum = cudaError_enum(218);
-}
-impl cudaError_enum {
- pub const CUDA_ERROR_INVALID_GRAPHICS_CONTEXT: cudaError_enum = cudaError_enum(219);
-}
-impl cudaError_enum {
- pub const CUDA_ERROR_NVLINK_UNCORRECTABLE: cudaError_enum = cudaError_enum(220);
-}
-impl cudaError_enum {
- pub const CUDA_ERROR_JIT_COMPILER_NOT_FOUND: cudaError_enum = cudaError_enum(221);
-}
-impl cudaError_enum {
- pub const CUDA_ERROR_UNSUPPORTED_PTX_VERSION: cudaError_enum = cudaError_enum(222);
-}
-impl cudaError_enum {
- pub const CUDA_ERROR_JIT_COMPILATION_DISABLED: cudaError_enum = cudaError_enum(223);
-}
-impl cudaError_enum {
- pub const CUDA_ERROR_UNSUPPORTED_EXEC_AFFINITY: cudaError_enum = cudaError_enum(224);
-}
-impl cudaError_enum {
- pub const CUDA_ERROR_INVALID_SOURCE: cudaError_enum = cudaError_enum(300);
-}
-impl cudaError_enum {
- pub const CUDA_ERROR_FILE_NOT_FOUND: cudaError_enum = cudaError_enum(301);
-}
-impl cudaError_enum {
- pub const CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND: cudaError_enum = cudaError_enum(302);
-}
-impl cudaError_enum {
- pub const CUDA_ERROR_SHARED_OBJECT_INIT_FAILED: cudaError_enum = cudaError_enum(303);
-}
-impl cudaError_enum {
- pub const CUDA_ERROR_OPERATING_SYSTEM: cudaError_enum = cudaError_enum(304);
-}
-impl cudaError_enum {
- pub const CUDA_ERROR_INVALID_HANDLE: cudaError_enum = cudaError_enum(400);
-}
-impl cudaError_enum {
- pub const CUDA_ERROR_ILLEGAL_STATE: cudaError_enum = cudaError_enum(401);
-}
-impl cudaError_enum {
- pub const CUDA_ERROR_NOT_FOUND: cudaError_enum = cudaError_enum(500);
-}
-impl cudaError_enum {
- pub const CUDA_ERROR_NOT_READY: cudaError_enum = cudaError_enum(600);
-}
-impl cudaError_enum {
- pub const CUDA_ERROR_ILLEGAL_ADDRESS: cudaError_enum = cudaError_enum(700);
-}
-impl cudaError_enum {
- pub const CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES: cudaError_enum = cudaError_enum(701);
-}
-impl cudaError_enum {
- pub const CUDA_ERROR_LAUNCH_TIMEOUT: cudaError_enum = cudaError_enum(702);
-}
-impl cudaError_enum {
- pub const CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING: cudaError_enum = cudaError_enum(703);
-}
-impl cudaError_enum {
- pub const CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED: cudaError_enum = cudaError_enum(704);
-}
-impl cudaError_enum {
- pub const CUDA_ERROR_PEER_ACCESS_NOT_ENABLED: cudaError_enum = cudaError_enum(705);
-}
-impl cudaError_enum {
- pub const CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE: cudaError_enum = cudaError_enum(708);
-}
-impl cudaError_enum {
- pub const CUDA_ERROR_CONTEXT_IS_DESTROYED: cudaError_enum = cudaError_enum(709);
-}
-impl cudaError_enum {
- pub const CUDA_ERROR_ASSERT: cudaError_enum = cudaError_enum(710);
-}
-impl cudaError_enum {
- pub const CUDA_ERROR_TOO_MANY_PEERS: cudaError_enum = cudaError_enum(711);
-}
-impl cudaError_enum {
- pub const CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED: cudaError_enum = cudaError_enum(712);
-}
-impl cudaError_enum {
- pub const CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED: cudaError_enum = cudaError_enum(713);
-}
-impl cudaError_enum {
- pub const CUDA_ERROR_HARDWARE_STACK_ERROR: cudaError_enum = cudaError_enum(714);
-}
-impl cudaError_enum {
- pub const CUDA_ERROR_ILLEGAL_INSTRUCTION: cudaError_enum = cudaError_enum(715);
-}
-impl cudaError_enum {
- pub const CUDA_ERROR_MISALIGNED_ADDRESS: cudaError_enum = cudaError_enum(716);
-}
-impl cudaError_enum {
- pub const CUDA_ERROR_INVALID_ADDRESS_SPACE: cudaError_enum = cudaError_enum(717);
-}
-impl cudaError_enum {
- pub const CUDA_ERROR_INVALID_PC: cudaError_enum = cudaError_enum(718);
-}
-impl cudaError_enum {
- pub const CUDA_ERROR_LAUNCH_FAILED: cudaError_enum = cudaError_enum(719);
-}
-impl cudaError_enum {
- pub const CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE: cudaError_enum = cudaError_enum(720);
-}
-impl cudaError_enum {
- pub const CUDA_ERROR_NOT_PERMITTED: cudaError_enum = cudaError_enum(800);
-}
-impl cudaError_enum {
- pub const CUDA_ERROR_NOT_SUPPORTED: cudaError_enum = cudaError_enum(801);
-}
-impl cudaError_enum {
- pub const CUDA_ERROR_SYSTEM_NOT_READY: cudaError_enum = cudaError_enum(802);
-}
-impl cudaError_enum {
- pub const CUDA_ERROR_SYSTEM_DRIVER_MISMATCH: cudaError_enum = cudaError_enum(803);
-}
-impl cudaError_enum {
- pub const CUDA_ERROR_COMPAT_NOT_SUPPORTED_ON_DEVICE: cudaError_enum = cudaError_enum(804);
-}
-impl cudaError_enum {
- pub const CUDA_ERROR_MPS_CONNECTION_FAILED: cudaError_enum = cudaError_enum(805);
-}
-impl cudaError_enum {
- pub const CUDA_ERROR_MPS_RPC_FAILURE: cudaError_enum = cudaError_enum(806);
-}
-impl cudaError_enum {
- pub const CUDA_ERROR_MPS_SERVER_NOT_READY: cudaError_enum = cudaError_enum(807);
-}
-impl cudaError_enum {
- pub const CUDA_ERROR_MPS_MAX_CLIENTS_REACHED: cudaError_enum = cudaError_enum(808);
-}
-impl cudaError_enum {
- pub const CUDA_ERROR_MPS_MAX_CONNECTIONS_REACHED: cudaError_enum = cudaError_enum(809);
-}
-impl cudaError_enum {
- pub const CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED: cudaError_enum = cudaError_enum(900);
-}
-impl cudaError_enum {
- pub const CUDA_ERROR_STREAM_CAPTURE_INVALIDATED: cudaError_enum = cudaError_enum(901);
-}
-impl cudaError_enum {
- pub const CUDA_ERROR_STREAM_CAPTURE_MERGE: cudaError_enum = cudaError_enum(902);
-}
-impl cudaError_enum {
- pub const CUDA_ERROR_STREAM_CAPTURE_UNMATCHED: cudaError_enum = cudaError_enum(903);
-}
-impl cudaError_enum {
- pub const CUDA_ERROR_STREAM_CAPTURE_UNJOINED: cudaError_enum = cudaError_enum(904);
-}
-impl cudaError_enum {
- pub const CUDA_ERROR_STREAM_CAPTURE_ISOLATION: cudaError_enum = cudaError_enum(905);
-}
-impl cudaError_enum {
- pub const CUDA_ERROR_STREAM_CAPTURE_IMPLICIT: cudaError_enum = cudaError_enum(906);
-}
-impl cudaError_enum {
- pub const CUDA_ERROR_CAPTURED_EVENT: cudaError_enum = cudaError_enum(907);
-}
-impl cudaError_enum {
- pub const CUDA_ERROR_STREAM_CAPTURE_WRONG_THREAD: cudaError_enum = cudaError_enum(908);
-}
-impl cudaError_enum {
- pub const CUDA_ERROR_TIMEOUT: cudaError_enum = cudaError_enum(909);
-}
-impl cudaError_enum {
- pub const CUDA_ERROR_GRAPH_EXEC_UPDATE_FAILURE: cudaError_enum = cudaError_enum(910);
-}
-impl cudaError_enum {
- pub const CUDA_ERROR_EXTERNAL_DEVICE: cudaError_enum = cudaError_enum(911);
-}
-impl cudaError_enum {
- pub const CUDA_ERROR_UNKNOWN: cudaError_enum = cudaError_enum(999);
-}
-#[repr(transparent)]
-#[must_use]
-#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
-pub struct cudaError_enum(pub ::std::os::raw::c_uint);
-pub use self::cudaError_enum as CUresult;
-impl CUdevice_P2PAttribute_enum {
- pub const CU_DEVICE_P2P_ATTRIBUTE_PERFORMANCE_RANK: CUdevice_P2PAttribute_enum =
- CUdevice_P2PAttribute_enum(1);
-}
-impl CUdevice_P2PAttribute_enum {
- pub const CU_DEVICE_P2P_ATTRIBUTE_ACCESS_SUPPORTED: CUdevice_P2PAttribute_enum =
- CUdevice_P2PAttribute_enum(2);
-}
-impl CUdevice_P2PAttribute_enum {
- pub const CU_DEVICE_P2P_ATTRIBUTE_NATIVE_ATOMIC_SUPPORTED: CUdevice_P2PAttribute_enum =
- CUdevice_P2PAttribute_enum(3);
-}
-impl CUdevice_P2PAttribute_enum {
- pub const CU_DEVICE_P2P_ATTRIBUTE_ACCESS_ACCESS_SUPPORTED: CUdevice_P2PAttribute_enum =
- CUdevice_P2PAttribute_enum(4);
-}
-impl CUdevice_P2PAttribute_enum {
- pub const CU_DEVICE_P2P_ATTRIBUTE_CUDA_ARRAY_ACCESS_SUPPORTED: CUdevice_P2PAttribute_enum =
- CUdevice_P2PAttribute_enum(4);
-}
-#[repr(transparent)]
-#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
-pub struct CUdevice_P2PAttribute_enum(pub ::std::os::raw::c_uint);
-pub use self::CUdevice_P2PAttribute_enum as CUdevice_P2PAttribute;
-pub type CUstreamCallback = ::std::option::Option<
- unsafe extern "C" fn(
- hStream: CUstream,
- status: CUresult,
- userData: *mut ::std::os::raw::c_void,
- ),
->;
-pub type CUoccupancyB2DSize =
- ::std::option::Option<unsafe extern "C" fn(blockSize: ::std::os::raw::c_int) -> usize>;
-#[repr(C)]
-#[derive(Copy, Clone, PartialEq, Eq)]
-pub struct CUDA_MEMCPY2D_st {
- pub srcXInBytes: usize,
- pub srcY: usize,
- pub srcMemoryType: CUmemorytype,
- pub srcHost: *const ::std::os::raw::c_void,
- pub srcDevice: CUdeviceptr,
- pub srcArray: CUarray,
- pub srcPitch: usize,
- pub dstXInBytes: usize,
- pub dstY: usize,
- pub dstMemoryType: CUmemorytype,
- pub dstHost: *mut ::std::os::raw::c_void,
- pub dstDevice: CUdeviceptr,
- pub dstArray: CUarray,
- pub dstPitch: usize,
- pub WidthInBytes: usize,
- pub Height: usize,
-}
-pub type CUDA_MEMCPY2D_v2 = CUDA_MEMCPY2D_st;
-pub type CUDA_MEMCPY2D = CUDA_MEMCPY2D_v2;
-#[repr(C)]
-#[derive(Copy, Clone, PartialEq, Eq)]
-pub struct CUDA_MEMCPY3D_st {
- pub srcXInBytes: usize,
- pub srcY: usize,
- pub srcZ: usize,
- pub srcLOD: usize,
- pub srcMemoryType: CUmemorytype,
- pub srcHost: *const ::std::os::raw::c_void,
- pub srcDevice: CUdeviceptr,
- pub srcArray: CUarray,
- pub reserved0: *mut ::std::os::raw::c_void,
- pub srcPitch: usize,
- pub srcHeight: usize,
- pub dstXInBytes: usize,
- pub dstY: usize,
- pub dstZ: usize,
- pub dstLOD: usize,
- pub dstMemoryType: CUmemorytype,
- pub dstHost: *mut ::std::os::raw::c_void,
- pub dstDevice: CUdeviceptr,
- pub dstArray: CUarray,
- pub reserved1: *mut ::std::os::raw::c_void,
- pub dstPitch: usize,
- pub dstHeight: usize,
- pub WidthInBytes: usize,
- pub Height: usize,
- pub Depth: usize,
-}
-pub type CUDA_MEMCPY3D_v2 = CUDA_MEMCPY3D_st;
-pub type CUDA_MEMCPY3D = CUDA_MEMCPY3D_v2;
-#[repr(C)]
-#[derive(Copy, Clone, PartialEq, Eq)]
-pub struct CUDA_MEMCPY3D_PEER_st {
- pub srcXInBytes: usize,
- pub srcY: usize,
- pub srcZ: usize,
- pub srcLOD: usize,
- pub srcMemoryType: CUmemorytype,
- pub srcHost: *const ::std::os::raw::c_void,
- pub srcDevice: CUdeviceptr,
- pub srcArray: CUarray,
- pub srcContext: CUcontext,
- pub srcPitch: usize,
- pub srcHeight: usize,
- pub dstXInBytes: usize,
- pub dstY: usize,
- pub dstZ: usize,
- pub dstLOD: usize,
- pub dstMemoryType: CUmemorytype,
- pub dstHost: *mut ::std::os::raw::c_void,
- pub dstDevice: CUdeviceptr,
- pub dstArray: CUarray,
- pub dstContext: CUcontext,
- pub dstPitch: usize,
- pub dstHeight: usize,
- pub WidthInBytes: usize,
- pub Height: usize,
- pub Depth: usize,
-}
-pub type CUDA_MEMCPY3D_PEER_v1 = CUDA_MEMCPY3D_PEER_st;
-pub type CUDA_MEMCPY3D_PEER = CUDA_MEMCPY3D_PEER_v1;
-#[repr(C)]
-#[derive(Copy, Clone, PartialEq, Eq)]
-pub struct CUDA_ARRAY_DESCRIPTOR_st {
- pub Width: usize,
- pub Height: usize,
- pub Format: CUarray_format,
- pub NumChannels: ::std::os::raw::c_uint,
-}
-pub type CUDA_ARRAY_DESCRIPTOR_v2 = CUDA_ARRAY_DESCRIPTOR_st;
-pub type CUDA_ARRAY_DESCRIPTOR = CUDA_ARRAY_DESCRIPTOR_v2;
-#[repr(C)]
-#[derive(Copy, Clone, PartialEq, Eq)]
-pub struct CUDA_ARRAY3D_DESCRIPTOR_st {
- pub Width: usize,
- pub Height: usize,
- pub Depth: usize,
- pub Format: CUarray_format,
- pub NumChannels: ::std::os::raw::c_uint,
- pub Flags: ::std::os::raw::c_uint,
-}
-pub type CUDA_ARRAY3D_DESCRIPTOR_v2 = CUDA_ARRAY3D_DESCRIPTOR_st;
-pub type CUDA_ARRAY3D_DESCRIPTOR = CUDA_ARRAY3D_DESCRIPTOR_v2;
-#[repr(C)]
-#[derive(Copy, Clone, PartialEq, Eq)]
-pub struct CUDA_ARRAY_SPARSE_PROPERTIES_st {
- pub tileExtent: CUDA_ARRAY_SPARSE_PROPERTIES_st__bindgen_ty_1,
- pub miptailFirstLevel: ::std::os::raw::c_uint,
- pub miptailSize: ::std::os::raw::c_ulonglong,
- pub flags: ::std::os::raw::c_uint,
- pub reserved: [::std::os::raw::c_uint; 4usize],
-}
-#[repr(C)]
-#[derive(Copy, Clone, PartialEq, Eq)]
-pub struct CUDA_ARRAY_SPARSE_PROPERTIES_st__bindgen_ty_1 {
- pub width: ::std::os::raw::c_uint,
- pub height: ::std::os::raw::c_uint,
- pub depth: ::std::os::raw::c_uint,
-}
-pub type CUDA_ARRAY_SPARSE_PROPERTIES_v1 = CUDA_ARRAY_SPARSE_PROPERTIES_st;
-pub type CUDA_ARRAY_SPARSE_PROPERTIES = CUDA_ARRAY_SPARSE_PROPERTIES_v1;
-#[repr(C)]
-#[derive(Copy, Clone)]
-pub struct CUDA_RESOURCE_DESC_st {
- pub resType: CUresourcetype,
- pub res: CUDA_RESOURCE_DESC_st__bindgen_ty_1,
- pub flags: ::std::os::raw::c_uint,
-}
-#[repr(C)]
-#[derive(Copy, Clone)]
-pub union CUDA_RESOURCE_DESC_st__bindgen_ty_1 {
- pub array: CUDA_RESOURCE_DESC_st__bindgen_ty_1__bindgen_ty_1,
- pub mipmap: CUDA_RESOURCE_DESC_st__bindgen_ty_1__bindgen_ty_2,
- pub linear: CUDA_RESOURCE_DESC_st__bindgen_ty_1__bindgen_ty_3,
- pub pitch2D: CUDA_RESOURCE_DESC_st__bindgen_ty_1__bindgen_ty_4,
- pub reserved: CUDA_RESOURCE_DESC_st__bindgen_ty_1__bindgen_ty_5,
-}
-#[repr(C)]
-#[derive(Copy, Clone, PartialEq, Eq)]
-pub struct CUDA_RESOURCE_DESC_st__bindgen_ty_1__bindgen_ty_1 {
- pub hArray: CUarray,
-}
-#[repr(C)]
-#[derive(Copy, Clone, PartialEq, Eq)]
-pub struct CUDA_RESOURCE_DESC_st__bindgen_ty_1__bindgen_ty_2 {
- pub hMipmappedArray: CUmipmappedArray,
-}
-#[repr(C)]
-#[derive(Copy, Clone, PartialEq, Eq)]
-pub struct CUDA_RESOURCE_DESC_st__bindgen_ty_1__bindgen_ty_3 {
- pub devPtr: CUdeviceptr,
- pub format: CUarray_format,
- pub numChannels: ::std::os::raw::c_uint,
- pub sizeInBytes: usize,
-}
-#[repr(C)]
-#[derive(Copy, Clone, PartialEq, Eq)]
-pub struct CUDA_RESOURCE_DESC_st__bindgen_ty_1__bindgen_ty_4 {
- pub devPtr: CUdeviceptr,
- pub format: CUarray_format,
- pub numChannels: ::std::os::raw::c_uint,
- pub width: usize,
- pub height: usize,
- pub pitchInBytes: usize,
-}
-#[repr(C)]
-#[derive(Copy, Clone, PartialEq, Eq)]
-pub struct CUDA_RESOURCE_DESC_st__bindgen_ty_1__bindgen_ty_5 {
- pub reserved: [::std::os::raw::c_int; 32usize],
-}
-pub type CUDA_RESOURCE_DESC_v1 = CUDA_RESOURCE_DESC_st;
-pub type CUDA_RESOURCE_DESC = CUDA_RESOURCE_DESC_v1;
-#[repr(C)]
-#[derive(Copy, Clone, PartialEq)]
-pub struct CUDA_TEXTURE_DESC_st {
- pub addressMode: [CUaddress_mode; 3usize],
- pub filterMode: CUfilter_mode,
- pub flags: ::std::os::raw::c_uint,
- pub maxAnisotropy: ::std::os::raw::c_uint,
- pub mipmapFilterMode: CUfilter_mode,
- pub mipmapLevelBias: f32,
- pub minMipmapLevelClamp: f32,
- pub maxMipmapLevelClamp: f32,
- pub borderColor: [f32; 4usize],
- pub reserved: [::std::os::raw::c_int; 12usize],
-}
-pub type CUDA_TEXTURE_DESC_v1 = CUDA_TEXTURE_DESC_st;
-pub type CUDA_TEXTURE_DESC = CUDA_TEXTURE_DESC_v1;
-impl CUresourceViewFormat_enum {
- pub const CU_RES_VIEW_FORMAT_NONE: CUresourceViewFormat_enum = CUresourceViewFormat_enum(0);
-}
-impl CUresourceViewFormat_enum {
- pub const CU_RES_VIEW_FORMAT_UINT_1X8: CUresourceViewFormat_enum = CUresourceViewFormat_enum(1);
-}
-impl CUresourceViewFormat_enum {
- pub const CU_RES_VIEW_FORMAT_UINT_2X8: CUresourceViewFormat_enum = CUresourceViewFormat_enum(2);
-}
-impl CUresourceViewFormat_enum {
- pub const CU_RES_VIEW_FORMAT_UINT_4X8: CUresourceViewFormat_enum = CUresourceViewFormat_enum(3);
-}
-impl CUresourceViewFormat_enum {
- pub const CU_RES_VIEW_FORMAT_SINT_1X8: CUresourceViewFormat_enum = CUresourceViewFormat_enum(4);
-}
-impl CUresourceViewFormat_enum {
- pub const CU_RES_VIEW_FORMAT_SINT_2X8: CUresourceViewFormat_enum = CUresourceViewFormat_enum(5);
-}
-impl CUresourceViewFormat_enum {
- pub const CU_RES_VIEW_FORMAT_SINT_4X8: CUresourceViewFormat_enum = CUresourceViewFormat_enum(6);
-}
-impl CUresourceViewFormat_enum {
- pub const CU_RES_VIEW_FORMAT_UINT_1X16: CUresourceViewFormat_enum =
- CUresourceViewFormat_enum(7);
-}
-impl CUresourceViewFormat_enum {
- pub const CU_RES_VIEW_FORMAT_UINT_2X16: CUresourceViewFormat_enum =
- CUresourceViewFormat_enum(8);
-}
-impl CUresourceViewFormat_enum {
- pub const CU_RES_VIEW_FORMAT_UINT_4X16: CUresourceViewFormat_enum =
- CUresourceViewFormat_enum(9);
-}
-impl CUresourceViewFormat_enum {
- pub const CU_RES_VIEW_FORMAT_SINT_1X16: CUresourceViewFormat_enum =
- CUresourceViewFormat_enum(10);
-}
-impl CUresourceViewFormat_enum {
- pub const CU_RES_VIEW_FORMAT_SINT_2X16: CUresourceViewFormat_enum =
- CUresourceViewFormat_enum(11);
-}
-impl CUresourceViewFormat_enum {
- pub const CU_RES_VIEW_FORMAT_SINT_4X16: CUresourceViewFormat_enum =
- CUresourceViewFormat_enum(12);
-}
-impl CUresourceViewFormat_enum {
- pub const CU_RES_VIEW_FORMAT_UINT_1X32: CUresourceViewFormat_enum =
- CUresourceViewFormat_enum(13);
-}
-impl CUresourceViewFormat_enum {
- pub const CU_RES_VIEW_FORMAT_UINT_2X32: CUresourceViewFormat_enum =
- CUresourceViewFormat_enum(14);
-}
-impl CUresourceViewFormat_enum {
- pub const CU_RES_VIEW_FORMAT_UINT_4X32: CUresourceViewFormat_enum =
- CUresourceViewFormat_enum(15);
-}
-impl CUresourceViewFormat_enum {
- pub const CU_RES_VIEW_FORMAT_SINT_1X32: CUresourceViewFormat_enum =
- CUresourceViewFormat_enum(16);
-}
-impl CUresourceViewFormat_enum {
- pub const CU_RES_VIEW_FORMAT_SINT_2X32: CUresourceViewFormat_enum =
- CUresourceViewFormat_enum(17);
-}
-impl CUresourceViewFormat_enum {
- pub const CU_RES_VIEW_FORMAT_SINT_4X32: CUresourceViewFormat_enum =
- CUresourceViewFormat_enum(18);
-}
-impl CUresourceViewFormat_enum {
- pub const CU_RES_VIEW_FORMAT_FLOAT_1X16: CUresourceViewFormat_enum =
- CUresourceViewFormat_enum(19);
-}
-impl CUresourceViewFormat_enum {
- pub const CU_RES_VIEW_FORMAT_FLOAT_2X16: CUresourceViewFormat_enum =
- CUresourceViewFormat_enum(20);
-}
-impl CUresourceViewFormat_enum {
- pub const CU_RES_VIEW_FORMAT_FLOAT_4X16: CUresourceViewFormat_enum =
- CUresourceViewFormat_enum(21);
-}
-impl CUresourceViewFormat_enum {
- pub const CU_RES_VIEW_FORMAT_FLOAT_1X32: CUresourceViewFormat_enum =
- CUresourceViewFormat_enum(22);
-}
-impl CUresourceViewFormat_enum {
- pub const CU_RES_VIEW_FORMAT_FLOAT_2X32: CUresourceViewFormat_enum =
- CUresourceViewFormat_enum(23);
-}
-impl CUresourceViewFormat_enum {
- pub const CU_RES_VIEW_FORMAT_FLOAT_4X32: CUresourceViewFormat_enum =
- CUresourceViewFormat_enum(24);
-}
-impl CUresourceViewFormat_enum {
- pub const CU_RES_VIEW_FORMAT_UNSIGNED_BC1: CUresourceViewFormat_enum =
- CUresourceViewFormat_enum(25);
-}
-impl CUresourceViewFormat_enum {
- pub const CU_RES_VIEW_FORMAT_UNSIGNED_BC2: CUresourceViewFormat_enum =
- CUresourceViewFormat_enum(26);
-}
-impl CUresourceViewFormat_enum {
- pub const CU_RES_VIEW_FORMAT_UNSIGNED_BC3: CUresourceViewFormat_enum =
- CUresourceViewFormat_enum(27);
-}
-impl CUresourceViewFormat_enum {
- pub const CU_RES_VIEW_FORMAT_UNSIGNED_BC4: CUresourceViewFormat_enum =
- CUresourceViewFormat_enum(28);
-}
-impl CUresourceViewFormat_enum {
- pub const CU_RES_VIEW_FORMAT_SIGNED_BC4: CUresourceViewFormat_enum =
- CUresourceViewFormat_enum(29);
-}
-impl CUresourceViewFormat_enum {
- pub const CU_RES_VIEW_FORMAT_UNSIGNED_BC5: CUresourceViewFormat_enum =
- CUresourceViewFormat_enum(30);
-}
-impl CUresourceViewFormat_enum {
- pub const CU_RES_VIEW_FORMAT_SIGNED_BC5: CUresourceViewFormat_enum =
- CUresourceViewFormat_enum(31);
-}
-impl CUresourceViewFormat_enum {
- pub const CU_RES_VIEW_FORMAT_UNSIGNED_BC6H: CUresourceViewFormat_enum =
- CUresourceViewFormat_enum(32);
-}
-impl CUresourceViewFormat_enum {
- pub const CU_RES_VIEW_FORMAT_SIGNED_BC6H: CUresourceViewFormat_enum =
- CUresourceViewFormat_enum(33);
-}
-impl CUresourceViewFormat_enum {
- pub const CU_RES_VIEW_FORMAT_UNSIGNED_BC7: CUresourceViewFormat_enum =
- CUresourceViewFormat_enum(34);
-}
-#[repr(transparent)]
-#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
-pub struct CUresourceViewFormat_enum(pub ::std::os::raw::c_uint);
-pub use self::CUresourceViewFormat_enum as CUresourceViewFormat;
-#[repr(C)]
-#[derive(Copy, Clone, PartialEq, Eq)]
-pub struct CUDA_RESOURCE_VIEW_DESC_st {
- pub format: CUresourceViewFormat,
- pub width: usize,
- pub height: usize,
- pub depth: usize,
- pub firstMipmapLevel: ::std::os::raw::c_uint,
- pub lastMipmapLevel: ::std::os::raw::c_uint,
- pub firstLayer: ::std::os::raw::c_uint,
- pub lastLayer: ::std::os::raw::c_uint,
- pub reserved: [::std::os::raw::c_uint; 16usize],
-}
-pub type CUDA_RESOURCE_VIEW_DESC_v1 = CUDA_RESOURCE_VIEW_DESC_st;
-pub type CUDA_RESOURCE_VIEW_DESC = CUDA_RESOURCE_VIEW_DESC_v1;
-#[repr(C)]
-#[derive(Copy, Clone, PartialEq, Eq)]
-pub struct CUDA_LAUNCH_PARAMS_st {
- pub function: CUfunction,
- pub gridDimX: ::std::os::raw::c_uint,
- pub gridDimY: ::std::os::raw::c_uint,
- pub gridDimZ: ::std::os::raw::c_uint,
- pub blockDimX: ::std::os::raw::c_uint,
- pub blockDimY: ::std::os::raw::c_uint,
- pub blockDimZ: ::std::os::raw::c_uint,
- pub sharedMemBytes: ::std::os::raw::c_uint,
- pub hStream: CUstream,
- pub kernelParams: *mut *mut ::std::os::raw::c_void,
-}
-pub type CUDA_LAUNCH_PARAMS_v1 = CUDA_LAUNCH_PARAMS_st;
-pub type CUDA_LAUNCH_PARAMS = CUDA_LAUNCH_PARAMS_v1;
-impl CUexternalMemoryHandleType_enum {
- pub const CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD: CUexternalMemoryHandleType_enum =
- CUexternalMemoryHandleType_enum(1);
-}
-impl CUexternalMemoryHandleType_enum {
- pub const CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32: CUexternalMemoryHandleType_enum =
- CUexternalMemoryHandleType_enum(2);
-}
-impl CUexternalMemoryHandleType_enum {
- pub const CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT: CUexternalMemoryHandleType_enum =
- CUexternalMemoryHandleType_enum(3);
-}
-impl CUexternalMemoryHandleType_enum {
- pub const CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_HEAP: CUexternalMemoryHandleType_enum =
- CUexternalMemoryHandleType_enum(4);
-}
-impl CUexternalMemoryHandleType_enum {
- pub const CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_RESOURCE: CUexternalMemoryHandleType_enum =
- CUexternalMemoryHandleType_enum(5);
-}
-impl CUexternalMemoryHandleType_enum {
- pub const CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE: CUexternalMemoryHandleType_enum =
- CUexternalMemoryHandleType_enum(6);
-}
-impl CUexternalMemoryHandleType_enum {
- pub const CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE_KMT: CUexternalMemoryHandleType_enum =
- CUexternalMemoryHandleType_enum(7);
-}
-impl CUexternalMemoryHandleType_enum {
- pub const CU_EXTERNAL_MEMORY_HANDLE_TYPE_NVSCIBUF: CUexternalMemoryHandleType_enum =
- CUexternalMemoryHandleType_enum(8);
-}
-#[repr(transparent)]
-#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
-pub struct CUexternalMemoryHandleType_enum(pub ::std::os::raw::c_uint);
-pub use self::CUexternalMemoryHandleType_enum as CUexternalMemoryHandleType;
-#[repr(C)]
-#[derive(Copy, Clone)]
-pub struct CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st {
- pub type_: CUexternalMemoryHandleType,
- pub handle: CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st__bindgen_ty_1,
- pub size: ::std::os::raw::c_ulonglong,
- pub flags: ::std::os::raw::c_uint,
- pub reserved: [::std::os::raw::c_uint; 16usize],
-}
-#[repr(C)]
-#[derive(Copy, Clone)]
-pub union CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st__bindgen_ty_1 {
- pub fd: ::std::os::raw::c_int,
- pub win32: CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st__bindgen_ty_1__bindgen_ty_1,
- pub nvSciBufObject: *const ::std::os::raw::c_void,
-}
-#[repr(C)]
-#[derive(Copy, Clone, PartialEq, Eq)]
-pub struct CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st__bindgen_ty_1__bindgen_ty_1 {
- pub handle: *mut ::std::os::raw::c_void,
- pub name: *const ::std::os::raw::c_void,
-}
-pub type CUDA_EXTERNAL_MEMORY_HANDLE_DESC_v1 = CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st;
-pub type CUDA_EXTERNAL_MEMORY_HANDLE_DESC = CUDA_EXTERNAL_MEMORY_HANDLE_DESC_v1;
-#[repr(C)]
-#[derive(Copy, Clone, PartialEq, Eq)]
-pub struct CUDA_EXTERNAL_MEMORY_BUFFER_DESC_st {
- pub offset: ::std::os::raw::c_ulonglong,
- pub size: ::std::os::raw::c_ulonglong,
- pub flags: ::std::os::raw::c_uint,
- pub reserved: [::std::os::raw::c_uint; 16usize],
-}
-pub type CUDA_EXTERNAL_MEMORY_BUFFER_DESC_v1 = CUDA_EXTERNAL_MEMORY_BUFFER_DESC_st;
-pub type CUDA_EXTERNAL_MEMORY_BUFFER_DESC = CUDA_EXTERNAL_MEMORY_BUFFER_DESC_v1;
-#[repr(C)]
-#[derive(Copy, Clone, PartialEq, Eq)]
-pub struct CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC_st {
- pub offset: ::std::os::raw::c_ulonglong,
- pub arrayDesc: CUDA_ARRAY3D_DESCRIPTOR,
- pub numLevels: ::std::os::raw::c_uint,
- pub reserved: [::std::os::raw::c_uint; 16usize],
-}
-pub type CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC_v1 =
- CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC_st;
-pub type CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC = CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC_v1;
-impl CUexternalSemaphoreHandleType_enum {
- pub const CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD: CUexternalSemaphoreHandleType_enum =
- CUexternalSemaphoreHandleType_enum(1);
-}
-impl CUexternalSemaphoreHandleType_enum {
- pub const CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32: CUexternalSemaphoreHandleType_enum =
- CUexternalSemaphoreHandleType_enum(2);
-}
-impl CUexternalSemaphoreHandleType_enum {
- pub const CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT:
- CUexternalSemaphoreHandleType_enum = CUexternalSemaphoreHandleType_enum(3);
-}
-impl CUexternalSemaphoreHandleType_enum {
- pub const CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D12_FENCE: CUexternalSemaphoreHandleType_enum =
- CUexternalSemaphoreHandleType_enum(4);
-}
-impl CUexternalSemaphoreHandleType_enum {
- pub const CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_FENCE: CUexternalSemaphoreHandleType_enum =
- CUexternalSemaphoreHandleType_enum(5);
-}
-impl CUexternalSemaphoreHandleType_enum {
- pub const CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_NVSCISYNC: CUexternalSemaphoreHandleType_enum =
- CUexternalSemaphoreHandleType_enum(6);
-}
-impl CUexternalSemaphoreHandleType_enum {
- pub const CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX:
- CUexternalSemaphoreHandleType_enum = CUexternalSemaphoreHandleType_enum(7);
-}
-impl CUexternalSemaphoreHandleType_enum {
- pub const CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX_KMT:
- CUexternalSemaphoreHandleType_enum = CUexternalSemaphoreHandleType_enum(8);
-}
-impl CUexternalSemaphoreHandleType_enum {
- pub const CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_FD:
- CUexternalSemaphoreHandleType_enum = CUexternalSemaphoreHandleType_enum(9);
-}
-impl CUexternalSemaphoreHandleType_enum {
- pub const CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_WIN32:
- CUexternalSemaphoreHandleType_enum = CUexternalSemaphoreHandleType_enum(10);
-}
-#[repr(transparent)]
-#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
-pub struct CUexternalSemaphoreHandleType_enum(pub ::std::os::raw::c_uint);
-pub use self::CUexternalSemaphoreHandleType_enum as CUexternalSemaphoreHandleType;
-#[repr(C)]
-#[derive(Copy, Clone)]
-pub struct CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st {
- pub type_: CUexternalSemaphoreHandleType,
- pub handle: CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st__bindgen_ty_1,
- pub flags: ::std::os::raw::c_uint,
- pub reserved: [::std::os::raw::c_uint; 16usize],
-}
-#[repr(C)]
-#[derive(Copy, Clone)]
-pub union CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st__bindgen_ty_1 {
- pub fd: ::std::os::raw::c_int,
- pub win32: CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st__bindgen_ty_1__bindgen_ty_1,
- pub nvSciSyncObj: *const ::std::os::raw::c_void,
-}
-#[repr(C)]
-#[derive(Copy, Clone, PartialEq, Eq)]
-pub struct CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st__bindgen_ty_1__bindgen_ty_1 {
- pub handle: *mut ::std::os::raw::c_void,
- pub name: *const ::std::os::raw::c_void,
-}
-pub type CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_v1 = CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st;
-pub type CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC = CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_v1;
-#[repr(C)]
-#[derive(Copy, Clone)]
-pub struct CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st {
- pub params: CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1,
- pub flags: ::std::os::raw::c_uint,
- pub reserved: [::std::os::raw::c_uint; 16usize],
-}
-#[repr(C)]
-#[derive(Copy, Clone)]
-pub struct CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1 {
- pub fence: CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1__bindgen_ty_1,
- pub nvSciSync: CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1__bindgen_ty_2,
- pub keyedMutex: CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1__bindgen_ty_3,
- pub reserved: [::std::os::raw::c_uint; 12usize],
-}
-#[repr(C)]
-#[derive(Copy, Clone, PartialEq, Eq)]
-pub struct CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1__bindgen_ty_1 {
- pub value: ::std::os::raw::c_ulonglong,
-}
-#[repr(C)]
-#[derive(Copy, Clone)]
-pub union CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1__bindgen_ty_2 {
- pub fence: *mut ::std::os::raw::c_void,
- pub reserved: ::std::os::raw::c_ulonglong,
-}
-#[repr(C)]
-#[derive(Copy, Clone, PartialEq, Eq)]
-pub struct CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1__bindgen_ty_3 {
- pub key: ::std::os::raw::c_ulonglong,
-}
-pub type CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_v1 = CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st;
-pub type CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS = CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_v1;
-#[repr(C)]
-#[derive(Copy, Clone)]
-pub struct CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st {
- pub params: CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1,
- pub flags: ::std::os::raw::c_uint,
- pub reserved: [::std::os::raw::c_uint; 16usize],
-}
-#[repr(C)]
-#[derive(Copy, Clone)]
-pub struct CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1 {
- pub fence: CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1__bindgen_ty_1,
- pub nvSciSync: CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1__bindgen_ty_2,
- pub keyedMutex: CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1__bindgen_ty_3,
- pub reserved: [::std::os::raw::c_uint; 10usize],
-}
-#[repr(C)]
-#[derive(Copy, Clone, PartialEq, Eq)]
-pub struct CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1__bindgen_ty_1 {
- pub value: ::std::os::raw::c_ulonglong,
-}
-#[repr(C)]
-#[derive(Copy, Clone)]
-pub union CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1__bindgen_ty_2 {
- pub fence: *mut ::std::os::raw::c_void,
- pub reserved: ::std::os::raw::c_ulonglong,
-}
-#[repr(C)]
-#[derive(Copy, Clone, PartialEq, Eq)]
-pub struct CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1__bindgen_ty_3 {
- pub key: ::std::os::raw::c_ulonglong,
- pub timeoutMs: ::std::os::raw::c_uint,
-}
-pub type CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_v1 = CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st;
-pub type CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS = CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_v1;
-#[repr(C)]
-#[derive(Copy, Clone, PartialEq, Eq)]
-pub struct CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_st {
- pub extSemArray: *mut CUexternalSemaphore,
- pub paramsArray: *const CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS,
- pub numExtSems: ::std::os::raw::c_uint,
-}
-pub type CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_v1 = CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_st;
-pub type CUDA_EXT_SEM_SIGNAL_NODE_PARAMS = CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_v1;
-#[repr(C)]
-#[derive(Copy, Clone, PartialEq, Eq)]
-pub struct CUDA_EXT_SEM_WAIT_NODE_PARAMS_st {
- pub extSemArray: *mut CUexternalSemaphore,
- pub paramsArray: *const CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS,
- pub numExtSems: ::std::os::raw::c_uint,
-}
-pub type CUDA_EXT_SEM_WAIT_NODE_PARAMS_v1 = CUDA_EXT_SEM_WAIT_NODE_PARAMS_st;
-pub type CUDA_EXT_SEM_WAIT_NODE_PARAMS = CUDA_EXT_SEM_WAIT_NODE_PARAMS_v1;
-pub type CUmemGenericAllocationHandle_v1 = ::std::os::raw::c_ulonglong;
-pub type CUmemGenericAllocationHandle = CUmemGenericAllocationHandle_v1;
-impl CUmemAllocationHandleType_enum {
- pub const CU_MEM_HANDLE_TYPE_NONE: CUmemAllocationHandleType_enum =
- CUmemAllocationHandleType_enum(0);
-}
-impl CUmemAllocationHandleType_enum {
- pub const CU_MEM_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR: CUmemAllocationHandleType_enum =
- CUmemAllocationHandleType_enum(1);
-}
-impl CUmemAllocationHandleType_enum {
- pub const CU_MEM_HANDLE_TYPE_WIN32: CUmemAllocationHandleType_enum =
- CUmemAllocationHandleType_enum(2);
-}
-impl CUmemAllocationHandleType_enum {
- pub const CU_MEM_HANDLE_TYPE_WIN32_KMT: CUmemAllocationHandleType_enum =
- CUmemAllocationHandleType_enum(4);
-}
-impl CUmemAllocationHandleType_enum {
- pub const CU_MEM_HANDLE_TYPE_MAX: CUmemAllocationHandleType_enum =
- CUmemAllocationHandleType_enum(2147483647);
-}
-#[repr(transparent)]
-#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
-pub struct CUmemAllocationHandleType_enum(pub ::std::os::raw::c_uint);
-pub use self::CUmemAllocationHandleType_enum as CUmemAllocationHandleType;
-impl CUmemAccess_flags_enum {
- pub const CU_MEM_ACCESS_FLAGS_PROT_NONE: CUmemAccess_flags_enum = CUmemAccess_flags_enum(0);
-}
-impl CUmemAccess_flags_enum {
- pub const CU_MEM_ACCESS_FLAGS_PROT_READ: CUmemAccess_flags_enum = CUmemAccess_flags_enum(1);
-}
-impl CUmemAccess_flags_enum {
- pub const CU_MEM_ACCESS_FLAGS_PROT_READWRITE: CUmemAccess_flags_enum =
- CUmemAccess_flags_enum(3);
-}
-impl CUmemAccess_flags_enum {
- pub const CU_MEM_ACCESS_FLAGS_PROT_MAX: CUmemAccess_flags_enum =
- CUmemAccess_flags_enum(2147483647);
-}
-#[repr(transparent)]
-#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
-pub struct CUmemAccess_flags_enum(pub ::std::os::raw::c_uint);
-pub use self::CUmemAccess_flags_enum as CUmemAccess_flags;
-impl CUmemLocationType_enum {
- pub const CU_MEM_LOCATION_TYPE_INVALID: CUmemLocationType_enum = CUmemLocationType_enum(0);
-}
-impl CUmemLocationType_enum {
- pub const CU_MEM_LOCATION_TYPE_DEVICE: CUmemLocationType_enum = CUmemLocationType_enum(1);
-}
-impl CUmemLocationType_enum {
- pub const CU_MEM_LOCATION_TYPE_MAX: CUmemLocationType_enum = CUmemLocationType_enum(2147483647);
-}
-#[repr(transparent)]
-#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
-pub struct CUmemLocationType_enum(pub ::std::os::raw::c_uint);
-pub use self::CUmemLocationType_enum as CUmemLocationType;
-impl CUmemAllocationType_enum {
- pub const CU_MEM_ALLOCATION_TYPE_INVALID: CUmemAllocationType_enum =
- CUmemAllocationType_enum(0);
-}
-impl CUmemAllocationType_enum {
- pub const CU_MEM_ALLOCATION_TYPE_PINNED: CUmemAllocationType_enum = CUmemAllocationType_enum(1);
-}
-impl CUmemAllocationType_enum {
- pub const CU_MEM_ALLOCATION_TYPE_MAX: CUmemAllocationType_enum =
- CUmemAllocationType_enum(2147483647);
-}
-#[repr(transparent)]
-#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
-pub struct CUmemAllocationType_enum(pub ::std::os::raw::c_uint);
-pub use self::CUmemAllocationType_enum as CUmemAllocationType;
-impl CUmemAllocationGranularity_flags_enum {
- pub const CU_MEM_ALLOC_GRANULARITY_MINIMUM: CUmemAllocationGranularity_flags_enum =
- CUmemAllocationGranularity_flags_enum(0);
-}
-impl CUmemAllocationGranularity_flags_enum {
- pub const CU_MEM_ALLOC_GRANULARITY_RECOMMENDED: CUmemAllocationGranularity_flags_enum =
- CUmemAllocationGranularity_flags_enum(1);
-}
-#[repr(transparent)]
-#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
-pub struct CUmemAllocationGranularity_flags_enum(pub ::std::os::raw::c_uint);
-pub use self::CUmemAllocationGranularity_flags_enum as CUmemAllocationGranularity_flags;
-impl CUarraySparseSubresourceType_enum {
- pub const CU_ARRAY_SPARSE_SUBRESOURCE_TYPE_SPARSE_LEVEL: CUarraySparseSubresourceType_enum =
- CUarraySparseSubresourceType_enum(0);
-}
-impl CUarraySparseSubresourceType_enum {
- pub const CU_ARRAY_SPARSE_SUBRESOURCE_TYPE_MIPTAIL: CUarraySparseSubresourceType_enum =
- CUarraySparseSubresourceType_enum(1);
-}
-#[repr(transparent)]
-#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
-pub struct CUarraySparseSubresourceType_enum(pub ::std::os::raw::c_uint);
-pub use self::CUarraySparseSubresourceType_enum as CUarraySparseSubresourceType;
-impl CUmemOperationType_enum {
- pub const CU_MEM_OPERATION_TYPE_MAP: CUmemOperationType_enum = CUmemOperationType_enum(1);
-}
-impl CUmemOperationType_enum {
- pub const CU_MEM_OPERATION_TYPE_UNMAP: CUmemOperationType_enum = CUmemOperationType_enum(2);
-}
-#[repr(transparent)]
-#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
-pub struct CUmemOperationType_enum(pub ::std::os::raw::c_uint);
-pub use self::CUmemOperationType_enum as CUmemOperationType;
-impl CUmemHandleType_enum {
- pub const CU_MEM_HANDLE_TYPE_GENERIC: CUmemHandleType_enum = CUmemHandleType_enum(0);
-}
-#[repr(transparent)]
-#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
-pub struct CUmemHandleType_enum(pub ::std::os::raw::c_uint);
-pub use self::CUmemHandleType_enum as CUmemHandleType;
-#[repr(C)]
-#[derive(Copy, Clone)]
-pub struct CUarrayMapInfo_st {
- pub resourceType: CUresourcetype,
- pub resource: CUarrayMapInfo_st__bindgen_ty_1,
- pub subresourceType: CUarraySparseSubresourceType,
- pub subresource: CUarrayMapInfo_st__bindgen_ty_2,
- pub memOperationType: CUmemOperationType,
- pub memHandleType: CUmemHandleType,
- pub memHandle: CUarrayMapInfo_st__bindgen_ty_3,
- pub offset: ::std::os::raw::c_ulonglong,
- pub deviceBitMask: ::std::os::raw::c_uint,
- pub flags: ::std::os::raw::c_uint,
- pub reserved: [::std::os::raw::c_uint; 2usize],
-}
-#[repr(C)]
-#[derive(Copy, Clone)]
-pub union CUarrayMapInfo_st__bindgen_ty_1 {
- pub mipmap: CUmipmappedArray,
- pub array: CUarray,
-}
-#[repr(C)]
-#[derive(Copy, Clone)]
-pub union CUarrayMapInfo_st__bindgen_ty_2 {
- pub sparseLevel: CUarrayMapInfo_st__bindgen_ty_2__bindgen_ty_1,
- pub miptail: CUarrayMapInfo_st__bindgen_ty_2__bindgen_ty_2,
-}
-#[repr(C)]
-#[derive(Copy, Clone, PartialEq, Eq)]
-pub struct CUarrayMapInfo_st__bindgen_ty_2__bindgen_ty_1 {
- pub level: ::std::os::raw::c_uint,
- pub layer: ::std::os::raw::c_uint,
- pub offsetX: ::std::os::raw::c_uint,
- pub offsetY: ::std::os::raw::c_uint,
- pub offsetZ: ::std::os::raw::c_uint,
- pub extentWidth: ::std::os::raw::c_uint,
- pub extentHeight: ::std::os::raw::c_uint,
- pub extentDepth: ::std::os::raw::c_uint,
-}
-#[repr(C)]
-#[derive(Copy, Clone, PartialEq, Eq)]
-pub struct CUarrayMapInfo_st__bindgen_ty_2__bindgen_ty_2 {
- pub layer: ::std::os::raw::c_uint,
- pub offset: ::std::os::raw::c_ulonglong,
- pub size: ::std::os::raw::c_ulonglong,
-}
-#[repr(C)]
-#[derive(Copy, Clone)]
-pub union CUarrayMapInfo_st__bindgen_ty_3 {
- pub memHandle: CUmemGenericAllocationHandle,
-}
-pub type CUarrayMapInfo_v1 = CUarrayMapInfo_st;
-pub type CUarrayMapInfo = CUarrayMapInfo_v1;
-#[repr(C)]
-#[derive(Copy, Clone, PartialEq, Eq)]
-pub struct CUmemLocation_st {
- pub type_: CUmemLocationType,
- pub id: ::std::os::raw::c_int,
-}
-pub type CUmemLocation_v1 = CUmemLocation_st;
-pub type CUmemLocation = CUmemLocation_v1;
-#[repr(C)]
-#[derive(Copy, Clone, PartialEq, Eq)]
-pub struct CUmemAllocationProp_st {
- pub type_: CUmemAllocationType,
- pub requestedHandleTypes: CUmemAllocationHandleType,
- pub location: CUmemLocation,
- pub win32HandleMetaData: *mut ::std::os::raw::c_void,
- pub allocFlags: CUmemAllocationProp_st__bindgen_ty_1,
-}
-#[repr(C)]
-#[derive(Copy, Clone, PartialEq, Eq)]
-pub struct CUmemAllocationProp_st__bindgen_ty_1 {
- pub compressionType: ::std::os::raw::c_uchar,
- pub gpuDirectRDMACapable: ::std::os::raw::c_uchar,
- pub usage: ::std::os::raw::c_ushort,
- pub reserved: [::std::os::raw::c_uchar; 4usize],
-}
-pub type CUmemAllocationProp_v1 = CUmemAllocationProp_st;
-pub type CUmemAllocationProp = CUmemAllocationProp_v1;
-#[repr(C)]
-#[derive(Copy, Clone, PartialEq, Eq)]
-pub struct CUmemAccessDesc_st {
- pub location: CUmemLocation,
- pub flags: CUmemAccess_flags,
-}
-pub type CUmemAccessDesc_v1 = CUmemAccessDesc_st;
-pub type CUmemAccessDesc = CUmemAccessDesc_v1;
-impl CUgraphExecUpdateResult_enum {
- pub const CU_GRAPH_EXEC_UPDATE_SUCCESS: CUgraphExecUpdateResult_enum =
- CUgraphExecUpdateResult_enum(0);
-}
-impl CUgraphExecUpdateResult_enum {
- pub const CU_GRAPH_EXEC_UPDATE_ERROR: CUgraphExecUpdateResult_enum =
- CUgraphExecUpdateResult_enum(1);
-}
-impl CUgraphExecUpdateResult_enum {
- pub const CU_GRAPH_EXEC_UPDATE_ERROR_TOPOLOGY_CHANGED: CUgraphExecUpdateResult_enum =
- CUgraphExecUpdateResult_enum(2);
-}
-impl CUgraphExecUpdateResult_enum {
- pub const CU_GRAPH_EXEC_UPDATE_ERROR_NODE_TYPE_CHANGED: CUgraphExecUpdateResult_enum =
- CUgraphExecUpdateResult_enum(3);
-}
-impl CUgraphExecUpdateResult_enum {
- pub const CU_GRAPH_EXEC_UPDATE_ERROR_FUNCTION_CHANGED: CUgraphExecUpdateResult_enum =
- CUgraphExecUpdateResult_enum(4);
-}
-impl CUgraphExecUpdateResult_enum {
- pub const CU_GRAPH_EXEC_UPDATE_ERROR_PARAMETERS_CHANGED: CUgraphExecUpdateResult_enum =
- CUgraphExecUpdateResult_enum(5);
-}
-impl CUgraphExecUpdateResult_enum {
- pub const CU_GRAPH_EXEC_UPDATE_ERROR_NOT_SUPPORTED: CUgraphExecUpdateResult_enum =
- CUgraphExecUpdateResult_enum(6);
-}
-impl CUgraphExecUpdateResult_enum {
- pub const CU_GRAPH_EXEC_UPDATE_ERROR_UNSUPPORTED_FUNCTION_CHANGE: CUgraphExecUpdateResult_enum =
- CUgraphExecUpdateResult_enum(7);
-}
-#[repr(transparent)]
-#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
-pub struct CUgraphExecUpdateResult_enum(pub ::std::os::raw::c_uint);
-pub use self::CUgraphExecUpdateResult_enum as CUgraphExecUpdateResult;
-impl CUmemPool_attribute_enum {
- pub const CU_MEMPOOL_ATTR_REUSE_FOLLOW_EVENT_DEPENDENCIES: CUmemPool_attribute_enum =
- CUmemPool_attribute_enum(1);
-}
-impl CUmemPool_attribute_enum {
- pub const CU_MEMPOOL_ATTR_REUSE_ALLOW_OPPORTUNISTIC: CUmemPool_attribute_enum =
- CUmemPool_attribute_enum(2);
-}
-impl CUmemPool_attribute_enum {
- pub const CU_MEMPOOL_ATTR_REUSE_ALLOW_INTERNAL_DEPENDENCIES: CUmemPool_attribute_enum =
- CUmemPool_attribute_enum(3);
-}
-impl CUmemPool_attribute_enum {
- pub const CU_MEMPOOL_ATTR_RELEASE_THRESHOLD: CUmemPool_attribute_enum =
- CUmemPool_attribute_enum(4);
-}
-impl CUmemPool_attribute_enum {
- pub const CU_MEMPOOL_ATTR_RESERVED_MEM_CURRENT: CUmemPool_attribute_enum =
- CUmemPool_attribute_enum(5);
-}
-impl CUmemPool_attribute_enum {
- pub const CU_MEMPOOL_ATTR_RESERVED_MEM_HIGH: CUmemPool_attribute_enum =
- CUmemPool_attribute_enum(6);
-}
-impl CUmemPool_attribute_enum {
- pub const CU_MEMPOOL_ATTR_USED_MEM_CURRENT: CUmemPool_attribute_enum =
- CUmemPool_attribute_enum(7);
-}
-impl CUmemPool_attribute_enum {
- pub const CU_MEMPOOL_ATTR_USED_MEM_HIGH: CUmemPool_attribute_enum = CUmemPool_attribute_enum(8);
-}
-#[repr(transparent)]
-#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
-pub struct CUmemPool_attribute_enum(pub ::std::os::raw::c_uint);
-pub use self::CUmemPool_attribute_enum as CUmemPool_attribute;
-#[repr(C)]
-#[derive(Copy, Clone, PartialEq, Eq)]
-pub struct CUmemPoolProps_st {
- pub allocType: CUmemAllocationType,
- pub handleTypes: CUmemAllocationHandleType,
- pub location: CUmemLocation,
- pub win32SecurityAttributes: *mut ::std::os::raw::c_void,
- pub reserved: [::std::os::raw::c_uchar; 64usize],
-}
-pub type CUmemPoolProps_v1 = CUmemPoolProps_st;
-pub type CUmemPoolProps = CUmemPoolProps_v1;
-#[repr(C)]
-#[derive(Copy, Clone, PartialEq, Eq)]
-pub struct CUmemPoolPtrExportData_st {
- pub reserved: [::std::os::raw::c_uchar; 64usize],
-}
-pub type CUmemPoolPtrExportData_v1 = CUmemPoolPtrExportData_st;
-pub type CUmemPoolPtrExportData = CUmemPoolPtrExportData_v1;
-#[repr(C)]
-#[derive(Copy, Clone, PartialEq, Eq)]
-pub struct CUDA_MEM_ALLOC_NODE_PARAMS_st {
- pub poolProps: CUmemPoolProps,
- pub accessDescs: *const CUmemAccessDesc,
- pub accessDescCount: usize,
- pub bytesize: usize,
- pub dptr: CUdeviceptr,
-}
-pub type CUDA_MEM_ALLOC_NODE_PARAMS = CUDA_MEM_ALLOC_NODE_PARAMS_st;
-impl CUgraphMem_attribute_enum {
- pub const CU_GRAPH_MEM_ATTR_USED_MEM_CURRENT: CUgraphMem_attribute_enum =
- CUgraphMem_attribute_enum(0);
-}
-impl CUgraphMem_attribute_enum {
- pub const CU_GRAPH_MEM_ATTR_USED_MEM_HIGH: CUgraphMem_attribute_enum =
- CUgraphMem_attribute_enum(1);
-}
-impl CUgraphMem_attribute_enum {
- pub const CU_GRAPH_MEM_ATTR_RESERVED_MEM_CURRENT: CUgraphMem_attribute_enum =
- CUgraphMem_attribute_enum(2);
-}
-impl CUgraphMem_attribute_enum {
- pub const CU_GRAPH_MEM_ATTR_RESERVED_MEM_HIGH: CUgraphMem_attribute_enum =
- CUgraphMem_attribute_enum(3);
-}
-#[repr(transparent)]
-#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
-pub struct CUgraphMem_attribute_enum(pub ::std::os::raw::c_uint);
-pub use self::CUgraphMem_attribute_enum as CUgraphMem_attribute;
-impl CUflushGPUDirectRDMAWritesScope_enum {
- pub const CU_FLUSH_GPU_DIRECT_RDMA_WRITES_TO_OWNER: CUflushGPUDirectRDMAWritesScope_enum =
- CUflushGPUDirectRDMAWritesScope_enum(100);
-}
-impl CUflushGPUDirectRDMAWritesScope_enum {
- pub const CU_FLUSH_GPU_DIRECT_RDMA_WRITES_TO_ALL_DEVICES: CUflushGPUDirectRDMAWritesScope_enum =
- CUflushGPUDirectRDMAWritesScope_enum(200);
-}
-#[repr(transparent)]
-#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
-pub struct CUflushGPUDirectRDMAWritesScope_enum(pub ::std::os::raw::c_uint);
-pub use self::CUflushGPUDirectRDMAWritesScope_enum as CUflushGPUDirectRDMAWritesScope;
-impl CUflushGPUDirectRDMAWritesTarget_enum {
- pub const CU_FLUSH_GPU_DIRECT_RDMA_WRITES_TARGET_CURRENT_CTX:
- CUflushGPUDirectRDMAWritesTarget_enum = CUflushGPUDirectRDMAWritesTarget_enum(0);
-}
-#[repr(transparent)]
-#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
-pub struct CUflushGPUDirectRDMAWritesTarget_enum(pub ::std::os::raw::c_uint);
-pub use self::CUflushGPUDirectRDMAWritesTarget_enum as CUflushGPUDirectRDMAWritesTarget;
-extern "C" {
- pub fn cuGetErrorString(error: CUresult, pStr: *mut *const ::std::os::raw::c_char) -> CUresult;
-}
-extern "C" {
- pub fn cuGetErrorName(error: CUresult, pStr: *mut *const ::std::os::raw::c_char) -> CUresult;
-}
-extern "C" {
- pub fn cuInit(Flags: ::std::os::raw::c_uint) -> CUresult;
-}
-extern "C" {
- pub fn cuDriverGetVersion(driverVersion: *mut ::std::os::raw::c_int) -> CUresult;
-}
-extern "C" {
- pub fn cuDeviceGet(device: *mut CUdevice, ordinal: ::std::os::raw::c_int) -> CUresult;
-}
-extern "C" {
- pub fn cuDeviceGetCount(count: *mut ::std::os::raw::c_int) -> CUresult;
-}
-extern "C" {
- pub fn cuDeviceGetName(
- name: *mut ::std::os::raw::c_char,
- len: ::std::os::raw::c_int,
- dev: CUdevice,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuDeviceGetUuid(uuid: *mut CUuuid, dev: CUdevice) -> CUresult;
-}
-extern "C" {
- pub fn cuDeviceGetUuid_v2(uuid: *mut CUuuid, dev: CUdevice) -> CUresult;
-}
-extern "C" {
- pub fn cuDeviceGetLuid(
- luid: *mut ::std::os::raw::c_char,
- deviceNodeMask: *mut ::std::os::raw::c_uint,
- dev: CUdevice,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuDeviceTotalMem_v2(bytes: *mut usize, dev: CUdevice) -> CUresult;
-}
-extern "C" {
- pub fn cuDeviceGetTexture1DLinearMaxWidth(
- maxWidthInElements: *mut usize,
- format: CUarray_format,
- numChannels: ::std::os::raw::c_uint,
- dev: CUdevice,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuDeviceGetAttribute(
- pi: *mut ::std::os::raw::c_int,
- attrib: CUdevice_attribute,
- dev: CUdevice,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuDeviceGetNvSciSyncAttributes(
- nvSciSyncAttrList: *mut ::std::os::raw::c_void,
- dev: CUdevice,
- flags: ::std::os::raw::c_int,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuDeviceSetMemPool(dev: CUdevice, pool: CUmemoryPool) -> CUresult;
-}
-extern "C" {
- pub fn cuDeviceGetMemPool(pool: *mut CUmemoryPool, dev: CUdevice) -> CUresult;
-}
-extern "C" {
- pub fn cuDeviceGetDefaultMemPool(pool_out: *mut CUmemoryPool, dev: CUdevice) -> CUresult;
-}
-extern "C" {
- pub fn cuFlushGPUDirectRDMAWrites(
- target: CUflushGPUDirectRDMAWritesTarget,
- scope: CUflushGPUDirectRDMAWritesScope,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuDeviceGetProperties(prop: *mut CUdevprop, dev: CUdevice) -> CUresult;
-}
-extern "C" {
- pub fn cuDeviceComputeCapability(
- major: *mut ::std::os::raw::c_int,
- minor: *mut ::std::os::raw::c_int,
- dev: CUdevice,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuDevicePrimaryCtxRetain(pctx: *mut CUcontext, dev: CUdevice) -> CUresult;
-}
-extern "C" {
- pub fn cuDevicePrimaryCtxRelease_v2(dev: CUdevice) -> CUresult;
-}
-extern "C" {
- pub fn cuDevicePrimaryCtxSetFlags_v2(dev: CUdevice, flags: ::std::os::raw::c_uint) -> CUresult;
-}
-extern "C" {
- pub fn cuDevicePrimaryCtxGetState(
- dev: CUdevice,
- flags: *mut ::std::os::raw::c_uint,
- active: *mut ::std::os::raw::c_int,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuDevicePrimaryCtxReset_v2(dev: CUdevice) -> CUresult;
-}
-extern "C" {
- pub fn cuDeviceGetExecAffinitySupport(
- pi: *mut ::std::os::raw::c_int,
- type_: CUexecAffinityType,
- dev: CUdevice,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuCtxCreate_v2(
- pctx: *mut CUcontext,
- flags: ::std::os::raw::c_uint,
- dev: CUdevice,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuCtxCreate_v3(
- pctx: *mut CUcontext,
- paramsArray: *mut CUexecAffinityParam,
- numParams: ::std::os::raw::c_int,
- flags: ::std::os::raw::c_uint,
- dev: CUdevice,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuCtxDestroy_v2(ctx: CUcontext) -> CUresult;
-}
-extern "C" {
- pub fn cuCtxPushCurrent_v2(ctx: CUcontext) -> CUresult;
-}
-extern "C" {
- pub fn cuCtxPopCurrent_v2(pctx: *mut CUcontext) -> CUresult;
-}
-extern "C" {
- pub fn cuCtxSetCurrent(ctx: CUcontext) -> CUresult;
-}
-extern "C" {
- pub fn cuCtxGetCurrent(pctx: *mut CUcontext) -> CUresult;
-}
-extern "C" {
- pub fn cuCtxGetDevice(device: *mut CUdevice) -> CUresult;
-}
-extern "C" {
- pub fn cuCtxGetFlags(flags: *mut ::std::os::raw::c_uint) -> CUresult;
-}
-extern "C" {
- pub fn cuCtxSynchronize() -> CUresult;
-}
-extern "C" {
- pub fn cuCtxSetLimit(limit: CUlimit, value: usize) -> CUresult;
-}
-extern "C" {
- pub fn cuCtxGetLimit(pvalue: *mut usize, limit: CUlimit) -> CUresult;
-}
-extern "C" {
- pub fn cuCtxGetCacheConfig(pconfig: *mut CUfunc_cache) -> CUresult;
-}
-extern "C" {
- pub fn cuCtxSetCacheConfig(config: CUfunc_cache) -> CUresult;
-}
-extern "C" {
- pub fn cuCtxGetSharedMemConfig(pConfig: *mut CUsharedconfig) -> CUresult;
-}
-extern "C" {
- pub fn cuCtxSetSharedMemConfig(config: CUsharedconfig) -> CUresult;
-}
-extern "C" {
- pub fn cuCtxGetApiVersion(ctx: CUcontext, version: *mut ::std::os::raw::c_uint) -> CUresult;
-}
-extern "C" {
- pub fn cuCtxGetStreamPriorityRange(
- leastPriority: *mut ::std::os::raw::c_int,
- greatestPriority: *mut ::std::os::raw::c_int,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuCtxResetPersistingL2Cache() -> CUresult;
-}
-extern "C" {
- pub fn cuCtxGetExecAffinity(
- pExecAffinity: *mut CUexecAffinityParam,
- type_: CUexecAffinityType,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuCtxAttach(pctx: *mut CUcontext, flags: ::std::os::raw::c_uint) -> CUresult;
-}
-extern "C" {
- pub fn cuCtxDetach(ctx: CUcontext) -> CUresult;
-}
-extern "C" {
- pub fn cuModuleLoad(module: *mut CUmodule, fname: *const ::std::os::raw::c_char) -> CUresult;
-}
-extern "C" {
- pub fn cuModuleLoadData(
- module: *mut CUmodule,
- image: *const ::std::os::raw::c_void,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuModuleLoadDataEx(
- module: *mut CUmodule,
- image: *const ::std::os::raw::c_void,
- numOptions: ::std::os::raw::c_uint,
- options: *mut CUjit_option,
- optionValues: *mut *mut ::std::os::raw::c_void,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuModuleLoadFatBinary(
- module: *mut CUmodule,
- fatCubin: *const ::std::os::raw::c_void,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuModuleUnload(hmod: CUmodule) -> CUresult;
-}
-extern "C" {
- pub fn cuModuleGetFunction(
- hfunc: *mut CUfunction,
- hmod: CUmodule,
- name: *const ::std::os::raw::c_char,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuModuleGetGlobal_v2(
- dptr: *mut CUdeviceptr,
- bytes: *mut usize,
- hmod: CUmodule,
- name: *const ::std::os::raw::c_char,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuModuleGetTexRef(
- pTexRef: *mut CUtexref,
- hmod: CUmodule,
- name: *const ::std::os::raw::c_char,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuModuleGetSurfRef(
- pSurfRef: *mut CUsurfref,
- hmod: CUmodule,
- name: *const ::std::os::raw::c_char,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuLinkCreate_v2(
- numOptions: ::std::os::raw::c_uint,
- options: *mut CUjit_option,
- optionValues: *mut *mut ::std::os::raw::c_void,
- stateOut: *mut CUlinkState,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuLinkAddData_v2(
- state: CUlinkState,
- type_: CUjitInputType,
- data: *mut ::std::os::raw::c_void,
- size: usize,
- name: *const ::std::os::raw::c_char,
- numOptions: ::std::os::raw::c_uint,
- options: *mut CUjit_option,
- optionValues: *mut *mut ::std::os::raw::c_void,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuLinkAddFile_v2(
- state: CUlinkState,
- type_: CUjitInputType,
- path: *const ::std::os::raw::c_char,
- numOptions: ::std::os::raw::c_uint,
- options: *mut CUjit_option,
- optionValues: *mut *mut ::std::os::raw::c_void,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuLinkComplete(
- state: CUlinkState,
- cubinOut: *mut *mut ::std::os::raw::c_void,
- sizeOut: *mut usize,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuLinkDestroy(state: CUlinkState) -> CUresult;
-}
-extern "C" {
- pub fn cuMemGetInfo_v2(free: *mut usize, total: *mut usize) -> CUresult;
-}
-extern "C" {
- pub fn cuMemAlloc_v2(dptr: *mut CUdeviceptr, bytesize: usize) -> CUresult;
-}
-extern "C" {
- pub fn cuMemAllocPitch_v2(
- dptr: *mut CUdeviceptr,
- pPitch: *mut usize,
- WidthInBytes: usize,
- Height: usize,
- ElementSizeBytes: ::std::os::raw::c_uint,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemFree_v2(dptr: CUdeviceptr) -> CUresult;
-}
-extern "C" {
- pub fn cuMemGetAddressRange_v2(
- pbase: *mut CUdeviceptr,
- psize: *mut usize,
- dptr: CUdeviceptr,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemAllocHost_v2(pp: *mut *mut ::std::os::raw::c_void, bytesize: usize) -> CUresult;
-}
-extern "C" {
- pub fn cuMemFreeHost(p: *mut ::std::os::raw::c_void) -> CUresult;
-}
-extern "C" {
- pub fn cuMemHostAlloc(
- pp: *mut *mut ::std::os::raw::c_void,
- bytesize: usize,
- Flags: ::std::os::raw::c_uint,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemHostGetDevicePointer_v2(
- pdptr: *mut CUdeviceptr,
- p: *mut ::std::os::raw::c_void,
- Flags: ::std::os::raw::c_uint,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemHostGetFlags(
- pFlags: *mut ::std::os::raw::c_uint,
- p: *mut ::std::os::raw::c_void,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemAllocManaged(
- dptr: *mut CUdeviceptr,
- bytesize: usize,
- flags: ::std::os::raw::c_uint,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuDeviceGetByPCIBusId(
- dev: *mut CUdevice,
- pciBusId: *const ::std::os::raw::c_char,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuDeviceGetPCIBusId(
- pciBusId: *mut ::std::os::raw::c_char,
- len: ::std::os::raw::c_int,
- dev: CUdevice,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuIpcGetEventHandle(pHandle: *mut CUipcEventHandle, event: CUevent) -> CUresult;
-}
-extern "C" {
- pub fn cuIpcOpenEventHandle(phEvent: *mut CUevent, handle: CUipcEventHandle) -> CUresult;
-}
-extern "C" {
- pub fn cuIpcGetMemHandle(pHandle: *mut CUipcMemHandle, dptr: CUdeviceptr) -> CUresult;
-}
-extern "C" {
- pub fn cuIpcOpenMemHandle_v2(
- pdptr: *mut CUdeviceptr,
- handle: CUipcMemHandle,
- Flags: ::std::os::raw::c_uint,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuIpcCloseMemHandle(dptr: CUdeviceptr) -> CUresult;
-}
-extern "C" {
- pub fn cuMemHostRegister_v2(
- p: *mut ::std::os::raw::c_void,
- bytesize: usize,
- Flags: ::std::os::raw::c_uint,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemHostUnregister(p: *mut ::std::os::raw::c_void) -> CUresult;
-}
-extern "C" {
- pub fn cuMemcpy_ptds(dst: CUdeviceptr, src: CUdeviceptr, ByteCount: usize) -> CUresult;
-}
-extern "C" {
- pub fn cuMemcpyPeer_ptds(
- dstDevice: CUdeviceptr,
- dstContext: CUcontext,
- srcDevice: CUdeviceptr,
- srcContext: CUcontext,
- ByteCount: usize,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemcpyHtoD_v2_ptds(
- dstDevice: CUdeviceptr,
- srcHost: *const ::std::os::raw::c_void,
- ByteCount: usize,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemcpyDtoH_v2_ptds(
- dstHost: *mut ::std::os::raw::c_void,
- srcDevice: CUdeviceptr,
- ByteCount: usize,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemcpyDtoD_v2_ptds(
- dstDevice: CUdeviceptr,
- srcDevice: CUdeviceptr,
- ByteCount: usize,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemcpyDtoA_v2_ptds(
- dstArray: CUarray,
- dstOffset: usize,
- srcDevice: CUdeviceptr,
- ByteCount: usize,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemcpyAtoD_v2_ptds(
- dstDevice: CUdeviceptr,
- srcArray: CUarray,
- srcOffset: usize,
- ByteCount: usize,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemcpyHtoA_v2_ptds(
- dstArray: CUarray,
- dstOffset: usize,
- srcHost: *const ::std::os::raw::c_void,
- ByteCount: usize,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemcpyAtoH_v2_ptds(
- dstHost: *mut ::std::os::raw::c_void,
- srcArray: CUarray,
- srcOffset: usize,
- ByteCount: usize,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemcpyAtoA_v2_ptds(
- dstArray: CUarray,
- dstOffset: usize,
- srcArray: CUarray,
- srcOffset: usize,
- ByteCount: usize,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemcpy2D_v2_ptds(pCopy: *const CUDA_MEMCPY2D) -> CUresult;
-}
-extern "C" {
- pub fn cuMemcpy2DUnaligned_v2_ptds(pCopy: *const CUDA_MEMCPY2D) -> CUresult;
-}
-extern "C" {
- pub fn cuMemcpy3D_v2_ptds(pCopy: *const CUDA_MEMCPY3D) -> CUresult;
-}
-extern "C" {
- pub fn cuMemcpy3DPeer_ptds(pCopy: *const CUDA_MEMCPY3D_PEER) -> CUresult;
-}
-extern "C" {
- pub fn cuMemcpyAsync_ptsz(
- dst: CUdeviceptr,
- src: CUdeviceptr,
- ByteCount: usize,
- hStream: CUstream,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemcpyPeerAsync_ptsz(
- dstDevice: CUdeviceptr,
- dstContext: CUcontext,
- srcDevice: CUdeviceptr,
- srcContext: CUcontext,
- ByteCount: usize,
- hStream: CUstream,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemcpyHtoDAsync_v2_ptsz(
- dstDevice: CUdeviceptr,
- srcHost: *const ::std::os::raw::c_void,
- ByteCount: usize,
- hStream: CUstream,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemcpyDtoHAsync_v2_ptsz(
- dstHost: *mut ::std::os::raw::c_void,
- srcDevice: CUdeviceptr,
- ByteCount: usize,
- hStream: CUstream,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemcpyDtoDAsync_v2_ptsz(
- dstDevice: CUdeviceptr,
- srcDevice: CUdeviceptr,
- ByteCount: usize,
- hStream: CUstream,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemcpyHtoAAsync_v2_ptsz(
- dstArray: CUarray,
- dstOffset: usize,
- srcHost: *const ::std::os::raw::c_void,
- ByteCount: usize,
- hStream: CUstream,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemcpyAtoHAsync_v2_ptsz(
- dstHost: *mut ::std::os::raw::c_void,
- srcArray: CUarray,
- srcOffset: usize,
- ByteCount: usize,
- hStream: CUstream,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemcpy2DAsync_v2_ptsz(pCopy: *const CUDA_MEMCPY2D, hStream: CUstream) -> CUresult;
-}
-extern "C" {
- pub fn cuMemcpy3DAsync_v2_ptsz(pCopy: *const CUDA_MEMCPY3D, hStream: CUstream) -> CUresult;
-}
-extern "C" {
- pub fn cuMemcpy3DPeerAsync_ptsz(
- pCopy: *const CUDA_MEMCPY3D_PEER,
- hStream: CUstream,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemsetD8_v2_ptds(
- dstDevice: CUdeviceptr,
- uc: ::std::os::raw::c_uchar,
- N: usize,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemsetD16_v2_ptds(
- dstDevice: CUdeviceptr,
- us: ::std::os::raw::c_ushort,
- N: usize,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemsetD32_v2_ptds(
- dstDevice: CUdeviceptr,
- ui: ::std::os::raw::c_uint,
- N: usize,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemsetD2D8_v2_ptds(
- dstDevice: CUdeviceptr,
- dstPitch: usize,
- uc: ::std::os::raw::c_uchar,
- Width: usize,
- Height: usize,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemsetD2D16_v2_ptds(
- dstDevice: CUdeviceptr,
- dstPitch: usize,
- us: ::std::os::raw::c_ushort,
- Width: usize,
- Height: usize,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemsetD2D32_v2_ptds(
- dstDevice: CUdeviceptr,
- dstPitch: usize,
- ui: ::std::os::raw::c_uint,
- Width: usize,
- Height: usize,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemsetD8Async_ptsz(
- dstDevice: CUdeviceptr,
- uc: ::std::os::raw::c_uchar,
- N: usize,
- hStream: CUstream,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemsetD16Async_ptsz(
- dstDevice: CUdeviceptr,
- us: ::std::os::raw::c_ushort,
- N: usize,
- hStream: CUstream,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemsetD32Async_ptsz(
- dstDevice: CUdeviceptr,
- ui: ::std::os::raw::c_uint,
- N: usize,
- hStream: CUstream,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemsetD2D8Async_ptsz(
- dstDevice: CUdeviceptr,
- dstPitch: usize,
- uc: ::std::os::raw::c_uchar,
- Width: usize,
- Height: usize,
- hStream: CUstream,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemsetD2D16Async_ptsz(
- dstDevice: CUdeviceptr,
- dstPitch: usize,
- us: ::std::os::raw::c_ushort,
- Width: usize,
- Height: usize,
- hStream: CUstream,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemsetD2D32Async_ptsz(
- dstDevice: CUdeviceptr,
- dstPitch: usize,
- ui: ::std::os::raw::c_uint,
- Width: usize,
- Height: usize,
- hStream: CUstream,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuArrayCreate_v2(
- pHandle: *mut CUarray,
- pAllocateArray: *const CUDA_ARRAY_DESCRIPTOR,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuArrayGetDescriptor_v2(
- pArrayDescriptor: *mut CUDA_ARRAY_DESCRIPTOR,
- hArray: CUarray,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuArrayGetSparseProperties(
- sparseProperties: *mut CUDA_ARRAY_SPARSE_PROPERTIES,
- array: CUarray,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMipmappedArrayGetSparseProperties(
- sparseProperties: *mut CUDA_ARRAY_SPARSE_PROPERTIES,
- mipmap: CUmipmappedArray,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuArrayGetPlane(
- pPlaneArray: *mut CUarray,
- hArray: CUarray,
- planeIdx: ::std::os::raw::c_uint,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuArrayDestroy(hArray: CUarray) -> CUresult;
-}
-extern "C" {
- pub fn cuArray3DCreate_v2(
- pHandle: *mut CUarray,
- pAllocateArray: *const CUDA_ARRAY3D_DESCRIPTOR,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuArray3DGetDescriptor_v2(
- pArrayDescriptor: *mut CUDA_ARRAY3D_DESCRIPTOR,
- hArray: CUarray,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMipmappedArrayCreate(
- pHandle: *mut CUmipmappedArray,
- pMipmappedArrayDesc: *const CUDA_ARRAY3D_DESCRIPTOR,
- numMipmapLevels: ::std::os::raw::c_uint,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMipmappedArrayGetLevel(
- pLevelArray: *mut CUarray,
- hMipmappedArray: CUmipmappedArray,
- level: ::std::os::raw::c_uint,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMipmappedArrayDestroy(hMipmappedArray: CUmipmappedArray) -> CUresult;
-}
-extern "C" {
- pub fn cuMemAddressReserve(
- ptr: *mut CUdeviceptr,
- size: usize,
- alignment: usize,
- addr: CUdeviceptr,
- flags: ::std::os::raw::c_ulonglong,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemAddressFree(ptr: CUdeviceptr, size: usize) -> CUresult;
-}
-extern "C" {
- pub fn cuMemCreate(
- handle: *mut CUmemGenericAllocationHandle,
- size: usize,
- prop: *const CUmemAllocationProp,
- flags: ::std::os::raw::c_ulonglong,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemRelease(handle: CUmemGenericAllocationHandle) -> CUresult;
-}
-extern "C" {
- pub fn cuMemMap(
- ptr: CUdeviceptr,
- size: usize,
- offset: usize,
- handle: CUmemGenericAllocationHandle,
- flags: ::std::os::raw::c_ulonglong,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemMapArrayAsync_ptsz(
- mapInfoList: *mut CUarrayMapInfo,
- count: ::std::os::raw::c_uint,
- hStream: CUstream,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemUnmap(ptr: CUdeviceptr, size: usize) -> CUresult;
-}
-extern "C" {
- pub fn cuMemSetAccess(
- ptr: CUdeviceptr,
- size: usize,
- desc: *const CUmemAccessDesc,
- count: usize,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemGetAccess(
- flags: *mut ::std::os::raw::c_ulonglong,
- location: *const CUmemLocation,
- ptr: CUdeviceptr,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemExportToShareableHandle(
- shareableHandle: *mut ::std::os::raw::c_void,
- handle: CUmemGenericAllocationHandle,
- handleType: CUmemAllocationHandleType,
- flags: ::std::os::raw::c_ulonglong,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemImportFromShareableHandle(
- handle: *mut CUmemGenericAllocationHandle,
- osHandle: *mut ::std::os::raw::c_void,
- shHandleType: CUmemAllocationHandleType,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemGetAllocationGranularity(
- granularity: *mut usize,
- prop: *const CUmemAllocationProp,
- option: CUmemAllocationGranularity_flags,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemGetAllocationPropertiesFromHandle(
- prop: *mut CUmemAllocationProp,
- handle: CUmemGenericAllocationHandle,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemRetainAllocationHandle(
- handle: *mut CUmemGenericAllocationHandle,
- addr: *mut ::std::os::raw::c_void,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemFreeAsync_ptsz(dptr: CUdeviceptr, hStream: CUstream) -> CUresult;
-}
-extern "C" {
- pub fn cuMemAllocAsync_ptsz(
- dptr: *mut CUdeviceptr,
- bytesize: usize,
- hStream: CUstream,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemPoolTrimTo(pool: CUmemoryPool, minBytesToKeep: usize) -> CUresult;
-}
-extern "C" {
- pub fn cuMemPoolSetAttribute(
- pool: CUmemoryPool,
- attr: CUmemPool_attribute,
- value: *mut ::std::os::raw::c_void,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemPoolGetAttribute(
- pool: CUmemoryPool,
- attr: CUmemPool_attribute,
- value: *mut ::std::os::raw::c_void,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemPoolSetAccess(
- pool: CUmemoryPool,
- map: *const CUmemAccessDesc,
- count: usize,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemPoolGetAccess(
- flags: *mut CUmemAccess_flags,
- memPool: CUmemoryPool,
- location: *mut CUmemLocation,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemPoolCreate(pool: *mut CUmemoryPool, poolProps: *const CUmemPoolProps) -> CUresult;
-}
-extern "C" {
- pub fn cuMemPoolDestroy(pool: CUmemoryPool) -> CUresult;
-}
-extern "C" {
- pub fn cuMemAllocFromPoolAsync_ptsz(
- dptr: *mut CUdeviceptr,
- bytesize: usize,
- pool: CUmemoryPool,
- hStream: CUstream,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemPoolExportToShareableHandle(
- handle_out: *mut ::std::os::raw::c_void,
- pool: CUmemoryPool,
- handleType: CUmemAllocationHandleType,
- flags: ::std::os::raw::c_ulonglong,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemPoolImportFromShareableHandle(
- pool_out: *mut CUmemoryPool,
- handle: *mut ::std::os::raw::c_void,
- handleType: CUmemAllocationHandleType,
- flags: ::std::os::raw::c_ulonglong,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemPoolExportPointer(
- shareData_out: *mut CUmemPoolPtrExportData,
- ptr: CUdeviceptr,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemPoolImportPointer(
- ptr_out: *mut CUdeviceptr,
- pool: CUmemoryPool,
- shareData: *mut CUmemPoolPtrExportData,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuPointerGetAttribute(
- data: *mut ::std::os::raw::c_void,
- attribute: CUpointer_attribute,
- ptr: CUdeviceptr,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemPrefetchAsync_ptsz(
- devPtr: CUdeviceptr,
- count: usize,
- dstDevice: CUdevice,
- hStream: CUstream,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemAdvise(
- devPtr: CUdeviceptr,
- count: usize,
- advice: CUmem_advise,
- device: CUdevice,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemRangeGetAttribute(
- data: *mut ::std::os::raw::c_void,
- dataSize: usize,
- attribute: CUmem_range_attribute,
- devPtr: CUdeviceptr,
- count: usize,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemRangeGetAttributes(
- data: *mut *mut ::std::os::raw::c_void,
- dataSizes: *mut usize,
- attributes: *mut CUmem_range_attribute,
- numAttributes: usize,
- devPtr: CUdeviceptr,
- count: usize,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuPointerSetAttribute(
- value: *const ::std::os::raw::c_void,
- attribute: CUpointer_attribute,
- ptr: CUdeviceptr,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuPointerGetAttributes(
- numAttributes: ::std::os::raw::c_uint,
- attributes: *mut CUpointer_attribute,
- data: *mut *mut ::std::os::raw::c_void,
- ptr: CUdeviceptr,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuStreamCreate(phStream: *mut CUstream, Flags: ::std::os::raw::c_uint) -> CUresult;
-}
-extern "C" {
- pub fn cuStreamCreateWithPriority(
- phStream: *mut CUstream,
- flags: ::std::os::raw::c_uint,
- priority: ::std::os::raw::c_int,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuStreamGetPriority_ptsz(
- hStream: CUstream,
- priority: *mut ::std::os::raw::c_int,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuStreamGetFlags_ptsz(hStream: CUstream, flags: *mut ::std::os::raw::c_uint)
- -> CUresult;
-}
-extern "C" {
- pub fn cuStreamGetCtx_ptsz(hStream: CUstream, pctx: *mut CUcontext) -> CUresult;
-}
-extern "C" {
- pub fn cuStreamWaitEvent_ptsz(
- hStream: CUstream,
- hEvent: CUevent,
- Flags: ::std::os::raw::c_uint,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuStreamAddCallback_ptsz(
- hStream: CUstream,
- callback: CUstreamCallback,
- userData: *mut ::std::os::raw::c_void,
- flags: ::std::os::raw::c_uint,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuStreamBeginCapture_v2_ptsz(hStream: CUstream, mode: CUstreamCaptureMode) -> CUresult;
-}
-extern "C" {
- pub fn cuThreadExchangeStreamCaptureMode(mode: *mut CUstreamCaptureMode) -> CUresult;
-}
-extern "C" {
- pub fn cuStreamEndCapture_ptsz(hStream: CUstream, phGraph: *mut CUgraph) -> CUresult;
-}
-extern "C" {
- pub fn cuStreamIsCapturing_ptsz(
- hStream: CUstream,
- captureStatus: *mut CUstreamCaptureStatus,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuStreamGetCaptureInfo_ptsz(
- hStream: CUstream,
- captureStatus_out: *mut CUstreamCaptureStatus,
- id_out: *mut cuuint64_t,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuStreamGetCaptureInfo_v2_ptsz(
- hStream: CUstream,
- captureStatus_out: *mut CUstreamCaptureStatus,
- id_out: *mut cuuint64_t,
- graph_out: *mut CUgraph,
- dependencies_out: *mut *const CUgraphNode,
- numDependencies_out: *mut usize,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuStreamUpdateCaptureDependencies_ptsz(
- hStream: CUstream,
- dependencies: *mut CUgraphNode,
- numDependencies: usize,
- flags: ::std::os::raw::c_uint,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuStreamAttachMemAsync_ptsz(
- hStream: CUstream,
- dptr: CUdeviceptr,
- length: usize,
- flags: ::std::os::raw::c_uint,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuStreamQuery_ptsz(hStream: CUstream) -> CUresult;
-}
-extern "C" {
- pub fn cuStreamSynchronize_ptsz(hStream: CUstream) -> CUresult;
-}
-extern "C" {
- pub fn cuStreamDestroy_v2(hStream: CUstream) -> CUresult;
-}
-extern "C" {
- pub fn cuStreamCopyAttributes_ptsz(dst: CUstream, src: CUstream) -> CUresult;
-}
-extern "C" {
- pub fn cuStreamGetAttribute_ptsz(
- hStream: CUstream,
- attr: CUstreamAttrID,
- value_out: *mut CUstreamAttrValue,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuStreamSetAttribute_ptsz(
- hStream: CUstream,
- attr: CUstreamAttrID,
- value: *const CUstreamAttrValue,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuEventCreate(phEvent: *mut CUevent, Flags: ::std::os::raw::c_uint) -> CUresult;
-}
-extern "C" {
- pub fn cuEventRecord_ptsz(hEvent: CUevent, hStream: CUstream) -> CUresult;
-}
-extern "C" {
- pub fn cuEventRecordWithFlags_ptsz(
- hEvent: CUevent,
- hStream: CUstream,
- flags: ::std::os::raw::c_uint,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuEventQuery(hEvent: CUevent) -> CUresult;
-}
-extern "C" {
- pub fn cuEventSynchronize(hEvent: CUevent) -> CUresult;
-}
-extern "C" {
- pub fn cuEventDestroy_v2(hEvent: CUevent) -> CUresult;
-}
-extern "C" {
- pub fn cuEventElapsedTime(pMilliseconds: *mut f32, hStart: CUevent, hEnd: CUevent) -> CUresult;
-}
-extern "C" {
- pub fn cuImportExternalMemory(
- extMem_out: *mut CUexternalMemory,
- memHandleDesc: *const CUDA_EXTERNAL_MEMORY_HANDLE_DESC,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuExternalMemoryGetMappedBuffer(
- devPtr: *mut CUdeviceptr,
- extMem: CUexternalMemory,
- bufferDesc: *const CUDA_EXTERNAL_MEMORY_BUFFER_DESC,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuExternalMemoryGetMappedMipmappedArray(
- mipmap: *mut CUmipmappedArray,
- extMem: CUexternalMemory,
- mipmapDesc: *const CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuDestroyExternalMemory(extMem: CUexternalMemory) -> CUresult;
-}
-extern "C" {
- pub fn cuImportExternalSemaphore(
- extSem_out: *mut CUexternalSemaphore,
- semHandleDesc: *const CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuSignalExternalSemaphoresAsync_ptsz(
- extSemArray: *const CUexternalSemaphore,
- paramsArray: *const CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS,
- numExtSems: ::std::os::raw::c_uint,
- stream: CUstream,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuWaitExternalSemaphoresAsync_ptsz(
- extSemArray: *const CUexternalSemaphore,
- paramsArray: *const CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS,
- numExtSems: ::std::os::raw::c_uint,
- stream: CUstream,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuDestroyExternalSemaphore(extSem: CUexternalSemaphore) -> CUresult;
-}
-extern "C" {
- pub fn cuStreamWaitValue32_ptsz(
- stream: CUstream,
- addr: CUdeviceptr,
- value: cuuint32_t,
- flags: ::std::os::raw::c_uint,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuStreamWaitValue64_ptsz(
- stream: CUstream,
- addr: CUdeviceptr,
- value: cuuint64_t,
- flags: ::std::os::raw::c_uint,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuStreamWriteValue32_ptsz(
- stream: CUstream,
- addr: CUdeviceptr,
- value: cuuint32_t,
- flags: ::std::os::raw::c_uint,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuStreamWriteValue64_ptsz(
- stream: CUstream,
- addr: CUdeviceptr,
- value: cuuint64_t,
- flags: ::std::os::raw::c_uint,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuStreamBatchMemOp_ptsz(
- stream: CUstream,
- count: ::std::os::raw::c_uint,
- paramArray: *mut CUstreamBatchMemOpParams,
- flags: ::std::os::raw::c_uint,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuFuncGetAttribute(
- pi: *mut ::std::os::raw::c_int,
- attrib: CUfunction_attribute,
- hfunc: CUfunction,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuFuncSetAttribute(
- hfunc: CUfunction,
- attrib: CUfunction_attribute,
- value: ::std::os::raw::c_int,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuFuncSetCacheConfig(hfunc: CUfunction, config: CUfunc_cache) -> CUresult;
-}
-extern "C" {
- pub fn cuFuncSetSharedMemConfig(hfunc: CUfunction, config: CUsharedconfig) -> CUresult;
-}
-extern "C" {
- pub fn cuFuncGetModule(hmod: *mut CUmodule, hfunc: CUfunction) -> CUresult;
-}
-extern "C" {
- pub fn cuLaunchKernel_ptsz(
- f: CUfunction,
- gridDimX: ::std::os::raw::c_uint,
- gridDimY: ::std::os::raw::c_uint,
- gridDimZ: ::std::os::raw::c_uint,
- blockDimX: ::std::os::raw::c_uint,
- blockDimY: ::std::os::raw::c_uint,
- blockDimZ: ::std::os::raw::c_uint,
- sharedMemBytes: ::std::os::raw::c_uint,
- hStream: CUstream,
- kernelParams: *mut *mut ::std::os::raw::c_void,
- extra: *mut *mut ::std::os::raw::c_void,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuLaunchCooperativeKernel_ptsz(
- f: CUfunction,
- gridDimX: ::std::os::raw::c_uint,
- gridDimY: ::std::os::raw::c_uint,
- gridDimZ: ::std::os::raw::c_uint,
- blockDimX: ::std::os::raw::c_uint,
- blockDimY: ::std::os::raw::c_uint,
- blockDimZ: ::std::os::raw::c_uint,
- sharedMemBytes: ::std::os::raw::c_uint,
- hStream: CUstream,
- kernelParams: *mut *mut ::std::os::raw::c_void,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuLaunchCooperativeKernelMultiDevice(
- launchParamsList: *mut CUDA_LAUNCH_PARAMS,
- numDevices: ::std::os::raw::c_uint,
- flags: ::std::os::raw::c_uint,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuLaunchHostFunc_ptsz(
- hStream: CUstream,
- fn_: CUhostFn,
- userData: *mut ::std::os::raw::c_void,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuFuncSetBlockShape(
- hfunc: CUfunction,
- x: ::std::os::raw::c_int,
- y: ::std::os::raw::c_int,
- z: ::std::os::raw::c_int,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuFuncSetSharedSize(hfunc: CUfunction, bytes: ::std::os::raw::c_uint) -> CUresult;
-}
-extern "C" {
- pub fn cuParamSetSize(hfunc: CUfunction, numbytes: ::std::os::raw::c_uint) -> CUresult;
-}
-extern "C" {
- pub fn cuParamSeti(
- hfunc: CUfunction,
- offset: ::std::os::raw::c_int,
- value: ::std::os::raw::c_uint,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuParamSetf(hfunc: CUfunction, offset: ::std::os::raw::c_int, value: f32) -> CUresult;
-}
-extern "C" {
- pub fn cuParamSetv(
- hfunc: CUfunction,
- offset: ::std::os::raw::c_int,
- ptr: *mut ::std::os::raw::c_void,
- numbytes: ::std::os::raw::c_uint,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuLaunch(f: CUfunction) -> CUresult;
-}
-extern "C" {
- pub fn cuLaunchGrid(
- f: CUfunction,
- grid_width: ::std::os::raw::c_int,
- grid_height: ::std::os::raw::c_int,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuLaunchGridAsync(
- f: CUfunction,
- grid_width: ::std::os::raw::c_int,
- grid_height: ::std::os::raw::c_int,
- hStream: CUstream,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuParamSetTexRef(
- hfunc: CUfunction,
- texunit: ::std::os::raw::c_int,
- hTexRef: CUtexref,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuGraphCreate(phGraph: *mut CUgraph, flags: ::std::os::raw::c_uint) -> CUresult;
-}
-extern "C" {
- pub fn cuGraphAddKernelNode(
- phGraphNode: *mut CUgraphNode,
- hGraph: CUgraph,
- dependencies: *const CUgraphNode,
- numDependencies: usize,
- nodeParams: *const CUDA_KERNEL_NODE_PARAMS,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuGraphKernelNodeGetParams(
- hNode: CUgraphNode,
- nodeParams: *mut CUDA_KERNEL_NODE_PARAMS,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuGraphKernelNodeSetParams(
- hNode: CUgraphNode,
- nodeParams: *const CUDA_KERNEL_NODE_PARAMS,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuGraphAddMemcpyNode(
- phGraphNode: *mut CUgraphNode,
- hGraph: CUgraph,
- dependencies: *const CUgraphNode,
- numDependencies: usize,
- copyParams: *const CUDA_MEMCPY3D,
- ctx: CUcontext,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuGraphMemcpyNodeGetParams(
- hNode: CUgraphNode,
- nodeParams: *mut CUDA_MEMCPY3D,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuGraphMemcpyNodeSetParams(
- hNode: CUgraphNode,
- nodeParams: *const CUDA_MEMCPY3D,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuGraphAddMemsetNode(
- phGraphNode: *mut CUgraphNode,
- hGraph: CUgraph,
- dependencies: *const CUgraphNode,
- numDependencies: usize,
- memsetParams: *const CUDA_MEMSET_NODE_PARAMS,
- ctx: CUcontext,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuGraphMemsetNodeGetParams(
- hNode: CUgraphNode,
- nodeParams: *mut CUDA_MEMSET_NODE_PARAMS,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuGraphMemsetNodeSetParams(
- hNode: CUgraphNode,
- nodeParams: *const CUDA_MEMSET_NODE_PARAMS,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuGraphAddHostNode(
- phGraphNode: *mut CUgraphNode,
- hGraph: CUgraph,
- dependencies: *const CUgraphNode,
- numDependencies: usize,
- nodeParams: *const CUDA_HOST_NODE_PARAMS,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuGraphHostNodeGetParams(
- hNode: CUgraphNode,
- nodeParams: *mut CUDA_HOST_NODE_PARAMS,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuGraphHostNodeSetParams(
- hNode: CUgraphNode,
- nodeParams: *const CUDA_HOST_NODE_PARAMS,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuGraphAddChildGraphNode(
- phGraphNode: *mut CUgraphNode,
- hGraph: CUgraph,
- dependencies: *const CUgraphNode,
- numDependencies: usize,
- childGraph: CUgraph,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuGraphChildGraphNodeGetGraph(hNode: CUgraphNode, phGraph: *mut CUgraph) -> CUresult;
-}
-extern "C" {
- pub fn cuGraphAddEmptyNode(
- phGraphNode: *mut CUgraphNode,
- hGraph: CUgraph,
- dependencies: *const CUgraphNode,
- numDependencies: usize,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuGraphAddEventRecordNode(
- phGraphNode: *mut CUgraphNode,
- hGraph: CUgraph,
- dependencies: *const CUgraphNode,
- numDependencies: usize,
- event: CUevent,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuGraphEventRecordNodeGetEvent(hNode: CUgraphNode, event_out: *mut CUevent) -> CUresult;
-}
-extern "C" {
- pub fn cuGraphEventRecordNodeSetEvent(hNode: CUgraphNode, event: CUevent) -> CUresult;
-}
-extern "C" {
- pub fn cuGraphAddEventWaitNode(
- phGraphNode: *mut CUgraphNode,
- hGraph: CUgraph,
- dependencies: *const CUgraphNode,
- numDependencies: usize,
- event: CUevent,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuGraphEventWaitNodeGetEvent(hNode: CUgraphNode, event_out: *mut CUevent) -> CUresult;
-}
-extern "C" {
- pub fn cuGraphEventWaitNodeSetEvent(hNode: CUgraphNode, event: CUevent) -> CUresult;
-}
-extern "C" {
- pub fn cuGraphAddExternalSemaphoresSignalNode(
- phGraphNode: *mut CUgraphNode,
- hGraph: CUgraph,
- dependencies: *const CUgraphNode,
- numDependencies: usize,
- nodeParams: *const CUDA_EXT_SEM_SIGNAL_NODE_PARAMS,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuGraphExternalSemaphoresSignalNodeGetParams(
- hNode: CUgraphNode,
- params_out: *mut CUDA_EXT_SEM_SIGNAL_NODE_PARAMS,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuGraphExternalSemaphoresSignalNodeSetParams(
- hNode: CUgraphNode,
- nodeParams: *const CUDA_EXT_SEM_SIGNAL_NODE_PARAMS,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuGraphAddExternalSemaphoresWaitNode(
- phGraphNode: *mut CUgraphNode,
- hGraph: CUgraph,
- dependencies: *const CUgraphNode,
- numDependencies: usize,
- nodeParams: *const CUDA_EXT_SEM_WAIT_NODE_PARAMS,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuGraphExternalSemaphoresWaitNodeGetParams(
- hNode: CUgraphNode,
- params_out: *mut CUDA_EXT_SEM_WAIT_NODE_PARAMS,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuGraphExternalSemaphoresWaitNodeSetParams(
- hNode: CUgraphNode,
- nodeParams: *const CUDA_EXT_SEM_WAIT_NODE_PARAMS,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuGraphAddMemAllocNode(
- phGraphNode: *mut CUgraphNode,
- hGraph: CUgraph,
- dependencies: *const CUgraphNode,
- numDependencies: usize,
- nodeParams: *mut CUDA_MEM_ALLOC_NODE_PARAMS,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuGraphMemAllocNodeGetParams(
- hNode: CUgraphNode,
- params_out: *mut CUDA_MEM_ALLOC_NODE_PARAMS,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuGraphAddMemFreeNode(
- phGraphNode: *mut CUgraphNode,
- hGraph: CUgraph,
- dependencies: *const CUgraphNode,
- numDependencies: usize,
- dptr: CUdeviceptr,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuGraphMemFreeNodeGetParams(hNode: CUgraphNode, dptr_out: *mut CUdeviceptr) -> CUresult;
-}
-extern "C" {
- pub fn cuDeviceGraphMemTrim(device: CUdevice) -> CUresult;
-}
-extern "C" {
- pub fn cuDeviceGetGraphMemAttribute(
- device: CUdevice,
- attr: CUgraphMem_attribute,
- value: *mut ::std::os::raw::c_void,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuDeviceSetGraphMemAttribute(
- device: CUdevice,
- attr: CUgraphMem_attribute,
- value: *mut ::std::os::raw::c_void,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuGraphClone(phGraphClone: *mut CUgraph, originalGraph: CUgraph) -> CUresult;
-}
-extern "C" {
- pub fn cuGraphNodeFindInClone(
- phNode: *mut CUgraphNode,
- hOriginalNode: CUgraphNode,
- hClonedGraph: CUgraph,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuGraphNodeGetType(hNode: CUgraphNode, type_: *mut CUgraphNodeType) -> CUresult;
-}
-extern "C" {
- pub fn cuGraphGetNodes(
- hGraph: CUgraph,
- nodes: *mut CUgraphNode,
- numNodes: *mut usize,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuGraphGetRootNodes(
- hGraph: CUgraph,
- rootNodes: *mut CUgraphNode,
- numRootNodes: *mut usize,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuGraphGetEdges(
- hGraph: CUgraph,
- from: *mut CUgraphNode,
- to: *mut CUgraphNode,
- numEdges: *mut usize,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuGraphNodeGetDependencies(
- hNode: CUgraphNode,
- dependencies: *mut CUgraphNode,
- numDependencies: *mut usize,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuGraphNodeGetDependentNodes(
- hNode: CUgraphNode,
- dependentNodes: *mut CUgraphNode,
- numDependentNodes: *mut usize,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuGraphAddDependencies(
- hGraph: CUgraph,
- from: *const CUgraphNode,
- to: *const CUgraphNode,
- numDependencies: usize,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuGraphRemoveDependencies(
- hGraph: CUgraph,
- from: *const CUgraphNode,
- to: *const CUgraphNode,
- numDependencies: usize,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuGraphDestroyNode(hNode: CUgraphNode) -> CUresult;
-}
-extern "C" {
- pub fn cuGraphInstantiate_v2(
- phGraphExec: *mut CUgraphExec,
- hGraph: CUgraph,
- phErrorNode: *mut CUgraphNode,
- logBuffer: *mut ::std::os::raw::c_char,
- bufferSize: usize,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuGraphInstantiateWithFlags(
- phGraphExec: *mut CUgraphExec,
- hGraph: CUgraph,
- flags: ::std::os::raw::c_ulonglong,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuGraphExecKernelNodeSetParams(
- hGraphExec: CUgraphExec,
- hNode: CUgraphNode,
- nodeParams: *const CUDA_KERNEL_NODE_PARAMS,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuGraphExecMemcpyNodeSetParams(
- hGraphExec: CUgraphExec,
- hNode: CUgraphNode,
- copyParams: *const CUDA_MEMCPY3D,
- ctx: CUcontext,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuGraphExecMemsetNodeSetParams(
- hGraphExec: CUgraphExec,
- hNode: CUgraphNode,
- memsetParams: *const CUDA_MEMSET_NODE_PARAMS,
- ctx: CUcontext,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuGraphExecHostNodeSetParams(
- hGraphExec: CUgraphExec,
- hNode: CUgraphNode,
- nodeParams: *const CUDA_HOST_NODE_PARAMS,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuGraphExecChildGraphNodeSetParams(
- hGraphExec: CUgraphExec,
- hNode: CUgraphNode,
- childGraph: CUgraph,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuGraphExecEventRecordNodeSetEvent(
- hGraphExec: CUgraphExec,
- hNode: CUgraphNode,
- event: CUevent,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuGraphExecEventWaitNodeSetEvent(
- hGraphExec: CUgraphExec,
- hNode: CUgraphNode,
- event: CUevent,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuGraphExecExternalSemaphoresSignalNodeSetParams(
- hGraphExec: CUgraphExec,
- hNode: CUgraphNode,
- nodeParams: *const CUDA_EXT_SEM_SIGNAL_NODE_PARAMS,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuGraphExecExternalSemaphoresWaitNodeSetParams(
- hGraphExec: CUgraphExec,
- hNode: CUgraphNode,
- nodeParams: *const CUDA_EXT_SEM_WAIT_NODE_PARAMS,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuGraphUpload_ptsz(hGraphExec: CUgraphExec, hStream: CUstream) -> CUresult;
-}
-extern "C" {
- pub fn cuGraphLaunch_ptsz(hGraphExec: CUgraphExec, hStream: CUstream) -> CUresult;
-}
-extern "C" {
- pub fn cuGraphExecDestroy(hGraphExec: CUgraphExec) -> CUresult;
-}
-extern "C" {
- pub fn cuGraphDestroy(hGraph: CUgraph) -> CUresult;
-}
-extern "C" {
- pub fn cuGraphExecUpdate(
- hGraphExec: CUgraphExec,
- hGraph: CUgraph,
- hErrorNode_out: *mut CUgraphNode,
- updateResult_out: *mut CUgraphExecUpdateResult,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuGraphKernelNodeCopyAttributes(dst: CUgraphNode, src: CUgraphNode) -> CUresult;
-}
-extern "C" {
- pub fn cuGraphKernelNodeGetAttribute(
- hNode: CUgraphNode,
- attr: CUkernelNodeAttrID,
- value_out: *mut CUkernelNodeAttrValue,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuGraphKernelNodeSetAttribute(
- hNode: CUgraphNode,
- attr: CUkernelNodeAttrID,
- value: *const CUkernelNodeAttrValue,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuGraphDebugDotPrint(
- hGraph: CUgraph,
- path: *const ::std::os::raw::c_char,
- flags: ::std::os::raw::c_uint,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuUserObjectCreate(
- object_out: *mut CUuserObject,
- ptr: *mut ::std::os::raw::c_void,
- destroy: CUhostFn,
- initialRefcount: ::std::os::raw::c_uint,
- flags: ::std::os::raw::c_uint,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuUserObjectRetain(object: CUuserObject, count: ::std::os::raw::c_uint) -> CUresult;
-}
-extern "C" {
- pub fn cuUserObjectRelease(object: CUuserObject, count: ::std::os::raw::c_uint) -> CUresult;
-}
-extern "C" {
- pub fn cuGraphRetainUserObject(
- graph: CUgraph,
- object: CUuserObject,
- count: ::std::os::raw::c_uint,
- flags: ::std::os::raw::c_uint,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuGraphReleaseUserObject(
- graph: CUgraph,
- object: CUuserObject,
- count: ::std::os::raw::c_uint,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuOccupancyMaxActiveBlocksPerMultiprocessor(
- numBlocks: *mut ::std::os::raw::c_int,
- func: CUfunction,
- blockSize: ::std::os::raw::c_int,
- dynamicSMemSize: usize,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(
- numBlocks: *mut ::std::os::raw::c_int,
- func: CUfunction,
- blockSize: ::std::os::raw::c_int,
- dynamicSMemSize: usize,
- flags: ::std::os::raw::c_uint,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuOccupancyMaxPotentialBlockSize(
- minGridSize: *mut ::std::os::raw::c_int,
- blockSize: *mut ::std::os::raw::c_int,
- func: CUfunction,
- blockSizeToDynamicSMemSize: CUoccupancyB2DSize,
- dynamicSMemSize: usize,
- blockSizeLimit: ::std::os::raw::c_int,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuOccupancyMaxPotentialBlockSizeWithFlags(
- minGridSize: *mut ::std::os::raw::c_int,
- blockSize: *mut ::std::os::raw::c_int,
- func: CUfunction,
- blockSizeToDynamicSMemSize: CUoccupancyB2DSize,
- dynamicSMemSize: usize,
- blockSizeLimit: ::std::os::raw::c_int,
- flags: ::std::os::raw::c_uint,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuOccupancyAvailableDynamicSMemPerBlock(
- dynamicSmemSize: *mut usize,
- func: CUfunction,
- numBlocks: ::std::os::raw::c_int,
- blockSize: ::std::os::raw::c_int,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuTexRefSetArray(
- hTexRef: CUtexref,
- hArray: CUarray,
- Flags: ::std::os::raw::c_uint,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuTexRefSetMipmappedArray(
- hTexRef: CUtexref,
- hMipmappedArray: CUmipmappedArray,
- Flags: ::std::os::raw::c_uint,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuTexRefSetAddress_v2(
- ByteOffset: *mut usize,
- hTexRef: CUtexref,
- dptr: CUdeviceptr,
- bytes: usize,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuTexRefSetAddress2D_v3(
- hTexRef: CUtexref,
- desc: *const CUDA_ARRAY_DESCRIPTOR,
- dptr: CUdeviceptr,
- Pitch: usize,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuTexRefSetFormat(
- hTexRef: CUtexref,
- fmt: CUarray_format,
- NumPackedComponents: ::std::os::raw::c_int,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuTexRefSetAddressMode(
- hTexRef: CUtexref,
- dim: ::std::os::raw::c_int,
- am: CUaddress_mode,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuTexRefSetFilterMode(hTexRef: CUtexref, fm: CUfilter_mode) -> CUresult;
-}
-extern "C" {
- pub fn cuTexRefSetMipmapFilterMode(hTexRef: CUtexref, fm: CUfilter_mode) -> CUresult;
-}
-extern "C" {
- pub fn cuTexRefSetMipmapLevelBias(hTexRef: CUtexref, bias: f32) -> CUresult;
-}
-extern "C" {
- pub fn cuTexRefSetMipmapLevelClamp(
- hTexRef: CUtexref,
- minMipmapLevelClamp: f32,
- maxMipmapLevelClamp: f32,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuTexRefSetMaxAnisotropy(
- hTexRef: CUtexref,
- maxAniso: ::std::os::raw::c_uint,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuTexRefSetBorderColor(hTexRef: CUtexref, pBorderColor: *mut f32) -> CUresult;
-}
-extern "C" {
- pub fn cuTexRefSetFlags(hTexRef: CUtexref, Flags: ::std::os::raw::c_uint) -> CUresult;
-}
-extern "C" {
- pub fn cuTexRefGetAddress_v2(pdptr: *mut CUdeviceptr, hTexRef: CUtexref) -> CUresult;
-}
-extern "C" {
- pub fn cuTexRefGetArray(phArray: *mut CUarray, hTexRef: CUtexref) -> CUresult;
-}
-extern "C" {
- pub fn cuTexRefGetMipmappedArray(
- phMipmappedArray: *mut CUmipmappedArray,
- hTexRef: CUtexref,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuTexRefGetAddressMode(
- pam: *mut CUaddress_mode,
- hTexRef: CUtexref,
- dim: ::std::os::raw::c_int,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuTexRefGetFilterMode(pfm: *mut CUfilter_mode, hTexRef: CUtexref) -> CUresult;
-}
-extern "C" {
- pub fn cuTexRefGetFormat(
- pFormat: *mut CUarray_format,
- pNumChannels: *mut ::std::os::raw::c_int,
- hTexRef: CUtexref,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuTexRefGetMipmapFilterMode(pfm: *mut CUfilter_mode, hTexRef: CUtexref) -> CUresult;
-}
-extern "C" {
- pub fn cuTexRefGetMipmapLevelBias(pbias: *mut f32, hTexRef: CUtexref) -> CUresult;
-}
-extern "C" {
- pub fn cuTexRefGetMipmapLevelClamp(
- pminMipmapLevelClamp: *mut f32,
- pmaxMipmapLevelClamp: *mut f32,
- hTexRef: CUtexref,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuTexRefGetMaxAnisotropy(
- pmaxAniso: *mut ::std::os::raw::c_int,
- hTexRef: CUtexref,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuTexRefGetBorderColor(pBorderColor: *mut f32, hTexRef: CUtexref) -> CUresult;
-}
-extern "C" {
- pub fn cuTexRefGetFlags(pFlags: *mut ::std::os::raw::c_uint, hTexRef: CUtexref) -> CUresult;
-}
-extern "C" {
- pub fn cuTexRefCreate(pTexRef: *mut CUtexref) -> CUresult;
-}
-extern "C" {
- pub fn cuTexRefDestroy(hTexRef: CUtexref) -> CUresult;
-}
-extern "C" {
- pub fn cuSurfRefSetArray(
- hSurfRef: CUsurfref,
- hArray: CUarray,
- Flags: ::std::os::raw::c_uint,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuSurfRefGetArray(phArray: *mut CUarray, hSurfRef: CUsurfref) -> CUresult;
-}
-extern "C" {
- pub fn cuTexObjectCreate(
- pTexObject: *mut CUtexObject,
- pResDesc: *const CUDA_RESOURCE_DESC,
- pTexDesc: *const CUDA_TEXTURE_DESC,
- pResViewDesc: *const CUDA_RESOURCE_VIEW_DESC,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuTexObjectDestroy(texObject: CUtexObject) -> CUresult;
-}
-extern "C" {
- pub fn cuTexObjectGetResourceDesc(
- pResDesc: *mut CUDA_RESOURCE_DESC,
- texObject: CUtexObject,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuTexObjectGetTextureDesc(
- pTexDesc: *mut CUDA_TEXTURE_DESC,
- texObject: CUtexObject,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuTexObjectGetResourceViewDesc(
- pResViewDesc: *mut CUDA_RESOURCE_VIEW_DESC,
- texObject: CUtexObject,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuSurfObjectCreate(
- pSurfObject: *mut CUsurfObject,
- pResDesc: *const CUDA_RESOURCE_DESC,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuSurfObjectDestroy(surfObject: CUsurfObject) -> CUresult;
-}
-extern "C" {
- pub fn cuSurfObjectGetResourceDesc(
- pResDesc: *mut CUDA_RESOURCE_DESC,
- surfObject: CUsurfObject,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuDeviceCanAccessPeer(
- canAccessPeer: *mut ::std::os::raw::c_int,
- dev: CUdevice,
- peerDev: CUdevice,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuCtxEnablePeerAccess(peerContext: CUcontext, Flags: ::std::os::raw::c_uint)
- -> CUresult;
-}
-extern "C" {
- pub fn cuCtxDisablePeerAccess(peerContext: CUcontext) -> CUresult;
-}
-extern "C" {
- pub fn cuDeviceGetP2PAttribute(
- value: *mut ::std::os::raw::c_int,
- attrib: CUdevice_P2PAttribute,
- srcDevice: CUdevice,
- dstDevice: CUdevice,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuGraphicsUnregisterResource(resource: CUgraphicsResource) -> CUresult;
-}
-extern "C" {
- pub fn cuGraphicsSubResourceGetMappedArray(
- pArray: *mut CUarray,
- resource: CUgraphicsResource,
- arrayIndex: ::std::os::raw::c_uint,
- mipLevel: ::std::os::raw::c_uint,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuGraphicsResourceGetMappedMipmappedArray(
- pMipmappedArray: *mut CUmipmappedArray,
- resource: CUgraphicsResource,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuGraphicsResourceGetMappedPointer_v2(
- pDevPtr: *mut CUdeviceptr,
- pSize: *mut usize,
- resource: CUgraphicsResource,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuGraphicsResourceSetMapFlags_v2(
- resource: CUgraphicsResource,
- flags: ::std::os::raw::c_uint,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuGraphicsMapResources_ptsz(
- count: ::std::os::raw::c_uint,
- resources: *mut CUgraphicsResource,
- hStream: CUstream,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuGraphicsUnmapResources_ptsz(
- count: ::std::os::raw::c_uint,
- resources: *mut CUgraphicsResource,
- hStream: CUstream,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuGetProcAddress(
- symbol: *const ::std::os::raw::c_char,
- pfn: *mut *mut ::std::os::raw::c_void,
- cudaVersion: ::std::os::raw::c_int,
- flags: cuuint64_t,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuGetExportTable(
- ppExportTable: *mut *const ::std::os::raw::c_void,
- pExportTableId: *const CUuuid,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemHostRegister(
- p: *mut ::std::os::raw::c_void,
- bytesize: usize,
- Flags: ::std::os::raw::c_uint,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuGraphicsResourceSetMapFlags(
- resource: CUgraphicsResource,
- flags: ::std::os::raw::c_uint,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuLinkCreate(
- numOptions: ::std::os::raw::c_uint,
- options: *mut CUjit_option,
- optionValues: *mut *mut ::std::os::raw::c_void,
- stateOut: *mut CUlinkState,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuLinkAddData(
- state: CUlinkState,
- type_: CUjitInputType,
- data: *mut ::std::os::raw::c_void,
- size: usize,
- name: *const ::std::os::raw::c_char,
- numOptions: ::std::os::raw::c_uint,
- options: *mut CUjit_option,
- optionValues: *mut *mut ::std::os::raw::c_void,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuLinkAddFile(
- state: CUlinkState,
- type_: CUjitInputType,
- path: *const ::std::os::raw::c_char,
- numOptions: ::std::os::raw::c_uint,
- options: *mut CUjit_option,
- optionValues: *mut *mut ::std::os::raw::c_void,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuTexRefSetAddress2D_v2(
- hTexRef: CUtexref,
- desc: *const CUDA_ARRAY_DESCRIPTOR,
- dptr: CUdeviceptr,
- Pitch: usize,
- ) -> CUresult;
-}
-#[repr(transparent)]
-#[derive(Copy, Clone, PartialEq, Eq)]
-pub struct CUdeviceptr_v1(pub ::std::os::raw::c_uint);
-#[repr(C)]
-#[derive(Copy, Clone, PartialEq, Eq)]
-pub struct CUDA_MEMCPY2D_v1_st {
- pub srcXInBytes: ::std::os::raw::c_uint,
- pub srcY: ::std::os::raw::c_uint,
- pub srcMemoryType: CUmemorytype,
- pub srcHost: *const ::std::os::raw::c_void,
- pub srcDevice: CUdeviceptr_v1,
- pub srcArray: CUarray,
- pub srcPitch: ::std::os::raw::c_uint,
- pub dstXInBytes: ::std::os::raw::c_uint,
- pub dstY: ::std::os::raw::c_uint,
- pub dstMemoryType: CUmemorytype,
- pub dstHost: *mut ::std::os::raw::c_void,
- pub dstDevice: CUdeviceptr_v1,
- pub dstArray: CUarray,
- pub dstPitch: ::std::os::raw::c_uint,
- pub WidthInBytes: ::std::os::raw::c_uint,
- pub Height: ::std::os::raw::c_uint,
-}
-pub type CUDA_MEMCPY2D_v1 = CUDA_MEMCPY2D_v1_st;
-#[repr(C)]
-#[derive(Copy, Clone, PartialEq, Eq)]
-pub struct CUDA_MEMCPY3D_v1_st {
- pub srcXInBytes: ::std::os::raw::c_uint,
- pub srcY: ::std::os::raw::c_uint,
- pub srcZ: ::std::os::raw::c_uint,
- pub srcLOD: ::std::os::raw::c_uint,
- pub srcMemoryType: CUmemorytype,
- pub srcHost: *const ::std::os::raw::c_void,
- pub srcDevice: CUdeviceptr_v1,
- pub srcArray: CUarray,
- pub reserved0: *mut ::std::os::raw::c_void,
- pub srcPitch: ::std::os::raw::c_uint,
- pub srcHeight: ::std::os::raw::c_uint,
- pub dstXInBytes: ::std::os::raw::c_uint,
- pub dstY: ::std::os::raw::c_uint,
- pub dstZ: ::std::os::raw::c_uint,
- pub dstLOD: ::std::os::raw::c_uint,
- pub dstMemoryType: CUmemorytype,
- pub dstHost: *mut ::std::os::raw::c_void,
- pub dstDevice: CUdeviceptr_v1,
- pub dstArray: CUarray,
- pub reserved1: *mut ::std::os::raw::c_void,
- pub dstPitch: ::std::os::raw::c_uint,
- pub dstHeight: ::std::os::raw::c_uint,
- pub WidthInBytes: ::std::os::raw::c_uint,
- pub Height: ::std::os::raw::c_uint,
- pub Depth: ::std::os::raw::c_uint,
-}
-pub type CUDA_MEMCPY3D_v1 = CUDA_MEMCPY3D_v1_st;
-#[repr(C)]
-#[derive(Copy, Clone, PartialEq, Eq)]
-pub struct CUDA_ARRAY_DESCRIPTOR_v1_st {
- pub Width: ::std::os::raw::c_uint,
- pub Height: ::std::os::raw::c_uint,
- pub Format: CUarray_format,
- pub NumChannels: ::std::os::raw::c_uint,
-}
-pub type CUDA_ARRAY_DESCRIPTOR_v1 = CUDA_ARRAY_DESCRIPTOR_v1_st;
-#[repr(C)]
-#[derive(Copy, Clone, PartialEq, Eq)]
-pub struct CUDA_ARRAY3D_DESCRIPTOR_v1_st {
- pub Width: ::std::os::raw::c_uint,
- pub Height: ::std::os::raw::c_uint,
- pub Depth: ::std::os::raw::c_uint,
- pub Format: CUarray_format,
- pub NumChannels: ::std::os::raw::c_uint,
- pub Flags: ::std::os::raw::c_uint,
-}
-pub type CUDA_ARRAY3D_DESCRIPTOR_v1 = CUDA_ARRAY3D_DESCRIPTOR_v1_st;
-extern "C" {
- pub fn cuDeviceTotalMem(bytes: *mut ::std::os::raw::c_uint, dev: CUdevice) -> CUresult;
-}
-extern "C" {
- pub fn cuCtxCreate(
- pctx: *mut CUcontext,
- flags: ::std::os::raw::c_uint,
- dev: CUdevice,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuModuleGetGlobal(
- dptr: *mut CUdeviceptr_v1,
- bytes: *mut ::std::os::raw::c_uint,
- hmod: CUmodule,
- name: *const ::std::os::raw::c_char,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemGetInfo(
- free: *mut ::std::os::raw::c_uint,
- total: *mut ::std::os::raw::c_uint,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemAlloc(dptr: *mut CUdeviceptr_v1, bytesize: ::std::os::raw::c_uint) -> CUresult;
-}
-extern "C" {
- pub fn cuMemAllocPitch(
- dptr: *mut CUdeviceptr_v1,
- pPitch: *mut ::std::os::raw::c_uint,
- WidthInBytes: ::std::os::raw::c_uint,
- Height: ::std::os::raw::c_uint,
- ElementSizeBytes: ::std::os::raw::c_uint,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemFree(dptr: CUdeviceptr_v1) -> CUresult;
-}
-extern "C" {
- pub fn cuMemGetAddressRange(
- pbase: *mut CUdeviceptr_v1,
- psize: *mut ::std::os::raw::c_uint,
- dptr: CUdeviceptr_v1,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemAllocHost(
- pp: *mut *mut ::std::os::raw::c_void,
- bytesize: ::std::os::raw::c_uint,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemHostGetDevicePointer(
- pdptr: *mut CUdeviceptr_v1,
- p: *mut ::std::os::raw::c_void,
- Flags: ::std::os::raw::c_uint,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemcpyHtoD(
- dstDevice: CUdeviceptr_v1,
- srcHost: *const ::std::os::raw::c_void,
- ByteCount: ::std::os::raw::c_uint,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemcpyDtoH(
- dstHost: *mut ::std::os::raw::c_void,
- srcDevice: CUdeviceptr_v1,
- ByteCount: ::std::os::raw::c_uint,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemcpyDtoD(
- dstDevice: CUdeviceptr_v1,
- srcDevice: CUdeviceptr_v1,
- ByteCount: ::std::os::raw::c_uint,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemcpyDtoA(
- dstArray: CUarray,
- dstOffset: ::std::os::raw::c_uint,
- srcDevice: CUdeviceptr_v1,
- ByteCount: ::std::os::raw::c_uint,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemcpyAtoD(
- dstDevice: CUdeviceptr_v1,
- srcArray: CUarray,
- srcOffset: ::std::os::raw::c_uint,
- ByteCount: ::std::os::raw::c_uint,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemcpyHtoA(
- dstArray: CUarray,
- dstOffset: ::std::os::raw::c_uint,
- srcHost: *const ::std::os::raw::c_void,
- ByteCount: ::std::os::raw::c_uint,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemcpyAtoH(
- dstHost: *mut ::std::os::raw::c_void,
- srcArray: CUarray,
- srcOffset: ::std::os::raw::c_uint,
- ByteCount: ::std::os::raw::c_uint,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemcpyAtoA(
- dstArray: CUarray,
- dstOffset: ::std::os::raw::c_uint,
- srcArray: CUarray,
- srcOffset: ::std::os::raw::c_uint,
- ByteCount: ::std::os::raw::c_uint,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemcpyHtoAAsync(
- dstArray: CUarray,
- dstOffset: ::std::os::raw::c_uint,
- srcHost: *const ::std::os::raw::c_void,
- ByteCount: ::std::os::raw::c_uint,
- hStream: CUstream,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemcpyAtoHAsync(
- dstHost: *mut ::std::os::raw::c_void,
- srcArray: CUarray,
- srcOffset: ::std::os::raw::c_uint,
- ByteCount: ::std::os::raw::c_uint,
- hStream: CUstream,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemcpy2D(pCopy: *const CUDA_MEMCPY2D_v1) -> CUresult;
-}
-extern "C" {
- pub fn cuMemcpy2DUnaligned(pCopy: *const CUDA_MEMCPY2D_v1) -> CUresult;
-}
-extern "C" {
- pub fn cuMemcpy3D(pCopy: *const CUDA_MEMCPY3D_v1) -> CUresult;
-}
-extern "C" {
- pub fn cuMemcpyHtoDAsync(
- dstDevice: CUdeviceptr_v1,
- srcHost: *const ::std::os::raw::c_void,
- ByteCount: ::std::os::raw::c_uint,
- hStream: CUstream,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemcpyDtoHAsync(
- dstHost: *mut ::std::os::raw::c_void,
- srcDevice: CUdeviceptr_v1,
- ByteCount: ::std::os::raw::c_uint,
- hStream: CUstream,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemcpyDtoDAsync(
- dstDevice: CUdeviceptr_v1,
- srcDevice: CUdeviceptr_v1,
- ByteCount: ::std::os::raw::c_uint,
- hStream: CUstream,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemcpy2DAsync(pCopy: *const CUDA_MEMCPY2D_v1, hStream: CUstream) -> CUresult;
-}
-extern "C" {
- pub fn cuMemcpy3DAsync(pCopy: *const CUDA_MEMCPY3D_v1, hStream: CUstream) -> CUresult;
-}
-extern "C" {
- pub fn cuMemsetD8(
- dstDevice: CUdeviceptr_v1,
- uc: ::std::os::raw::c_uchar,
- N: ::std::os::raw::c_uint,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemsetD16(
- dstDevice: CUdeviceptr_v1,
- us: ::std::os::raw::c_ushort,
- N: ::std::os::raw::c_uint,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemsetD32(
- dstDevice: CUdeviceptr_v1,
- ui: ::std::os::raw::c_uint,
- N: ::std::os::raw::c_uint,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemsetD2D8(
- dstDevice: CUdeviceptr_v1,
- dstPitch: ::std::os::raw::c_uint,
- uc: ::std::os::raw::c_uchar,
- Width: ::std::os::raw::c_uint,
- Height: ::std::os::raw::c_uint,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemsetD2D16(
- dstDevice: CUdeviceptr_v1,
- dstPitch: ::std::os::raw::c_uint,
- us: ::std::os::raw::c_ushort,
- Width: ::std::os::raw::c_uint,
- Height: ::std::os::raw::c_uint,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemsetD2D32(
- dstDevice: CUdeviceptr_v1,
- dstPitch: ::std::os::raw::c_uint,
- ui: ::std::os::raw::c_uint,
- Width: ::std::os::raw::c_uint,
- Height: ::std::os::raw::c_uint,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuArrayCreate(
- pHandle: *mut CUarray,
- pAllocateArray: *const CUDA_ARRAY_DESCRIPTOR_v1,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuArrayGetDescriptor(
- pArrayDescriptor: *mut CUDA_ARRAY_DESCRIPTOR_v1,
- hArray: CUarray,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuArray3DCreate(
- pHandle: *mut CUarray,
- pAllocateArray: *const CUDA_ARRAY3D_DESCRIPTOR_v1,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuArray3DGetDescriptor(
- pArrayDescriptor: *mut CUDA_ARRAY3D_DESCRIPTOR_v1,
- hArray: CUarray,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuTexRefSetAddress(
- ByteOffset: *mut ::std::os::raw::c_uint,
- hTexRef: CUtexref,
- dptr: CUdeviceptr_v1,
- bytes: ::std::os::raw::c_uint,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuTexRefSetAddress2D(
- hTexRef: CUtexref,
- desc: *const CUDA_ARRAY_DESCRIPTOR_v1,
- dptr: CUdeviceptr_v1,
- Pitch: ::std::os::raw::c_uint,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuTexRefGetAddress(pdptr: *mut CUdeviceptr_v1, hTexRef: CUtexref) -> CUresult;
-}
-extern "C" {
- pub fn cuGraphicsResourceGetMappedPointer(
- pDevPtr: *mut CUdeviceptr_v1,
- pSize: *mut ::std::os::raw::c_uint,
- resource: CUgraphicsResource,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuCtxDestroy(ctx: CUcontext) -> CUresult;
-}
-extern "C" {
- pub fn cuCtxPopCurrent(pctx: *mut CUcontext) -> CUresult;
-}
-extern "C" {
- pub fn cuCtxPushCurrent(ctx: CUcontext) -> CUresult;
-}
-extern "C" {
- pub fn cuStreamDestroy(hStream: CUstream) -> CUresult;
-}
-extern "C" {
- pub fn cuEventDestroy(hEvent: CUevent) -> CUresult;
-}
-extern "C" {
- pub fn cuDevicePrimaryCtxRelease(dev: CUdevice) -> CUresult;
-}
-extern "C" {
- pub fn cuDevicePrimaryCtxReset(dev: CUdevice) -> CUresult;
-}
-extern "C" {
- pub fn cuDevicePrimaryCtxSetFlags(dev: CUdevice, flags: ::std::os::raw::c_uint) -> CUresult;
-}
-extern "C" {
- pub fn cuMemcpyHtoD_v2(
- dstDevice: CUdeviceptr,
- srcHost: *const ::std::os::raw::c_void,
- ByteCount: usize,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemcpyDtoH_v2(
- dstHost: *mut ::std::os::raw::c_void,
- srcDevice: CUdeviceptr,
- ByteCount: usize,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemcpyDtoD_v2(
- dstDevice: CUdeviceptr,
- srcDevice: CUdeviceptr,
- ByteCount: usize,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemcpyDtoA_v2(
- dstArray: CUarray,
- dstOffset: usize,
- srcDevice: CUdeviceptr,
- ByteCount: usize,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemcpyAtoD_v2(
- dstDevice: CUdeviceptr,
- srcArray: CUarray,
- srcOffset: usize,
- ByteCount: usize,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemcpyHtoA_v2(
- dstArray: CUarray,
- dstOffset: usize,
- srcHost: *const ::std::os::raw::c_void,
- ByteCount: usize,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemcpyAtoH_v2(
- dstHost: *mut ::std::os::raw::c_void,
- srcArray: CUarray,
- srcOffset: usize,
- ByteCount: usize,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemcpyAtoA_v2(
- dstArray: CUarray,
- dstOffset: usize,
- srcArray: CUarray,
- srcOffset: usize,
- ByteCount: usize,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemcpyHtoAAsync_v2(
- dstArray: CUarray,
- dstOffset: usize,
- srcHost: *const ::std::os::raw::c_void,
- ByteCount: usize,
- hStream: CUstream,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemcpyAtoHAsync_v2(
- dstHost: *mut ::std::os::raw::c_void,
- srcArray: CUarray,
- srcOffset: usize,
- ByteCount: usize,
- hStream: CUstream,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemcpy2D_v2(pCopy: *const CUDA_MEMCPY2D) -> CUresult;
-}
-extern "C" {
- pub fn cuMemcpy2DUnaligned_v2(pCopy: *const CUDA_MEMCPY2D) -> CUresult;
-}
-extern "C" {
- pub fn cuMemcpy3D_v2(pCopy: *const CUDA_MEMCPY3D) -> CUresult;
-}
-extern "C" {
- pub fn cuMemcpyHtoDAsync_v2(
- dstDevice: CUdeviceptr,
- srcHost: *const ::std::os::raw::c_void,
- ByteCount: usize,
- hStream: CUstream,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemcpyDtoHAsync_v2(
- dstHost: *mut ::std::os::raw::c_void,
- srcDevice: CUdeviceptr,
- ByteCount: usize,
- hStream: CUstream,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemcpyDtoDAsync_v2(
- dstDevice: CUdeviceptr,
- srcDevice: CUdeviceptr,
- ByteCount: usize,
- hStream: CUstream,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemcpy2DAsync_v2(pCopy: *const CUDA_MEMCPY2D, hStream: CUstream) -> CUresult;
-}
-extern "C" {
- pub fn cuMemcpy3DAsync_v2(pCopy: *const CUDA_MEMCPY3D, hStream: CUstream) -> CUresult;
-}
-extern "C" {
- pub fn cuMemsetD8_v2(dstDevice: CUdeviceptr, uc: ::std::os::raw::c_uchar, N: usize)
- -> CUresult;
-}
-extern "C" {
- pub fn cuMemsetD16_v2(
- dstDevice: CUdeviceptr,
- us: ::std::os::raw::c_ushort,
- N: usize,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemsetD32_v2(dstDevice: CUdeviceptr, ui: ::std::os::raw::c_uint, N: usize)
- -> CUresult;
-}
-extern "C" {
- pub fn cuMemsetD2D8_v2(
- dstDevice: CUdeviceptr,
- dstPitch: usize,
- uc: ::std::os::raw::c_uchar,
- Width: usize,
- Height: usize,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemsetD2D16_v2(
- dstDevice: CUdeviceptr,
- dstPitch: usize,
- us: ::std::os::raw::c_ushort,
- Width: usize,
- Height: usize,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemsetD2D32_v2(
- dstDevice: CUdeviceptr,
- dstPitch: usize,
- ui: ::std::os::raw::c_uint,
- Width: usize,
- Height: usize,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemcpy(dst: CUdeviceptr, src: CUdeviceptr, ByteCount: usize) -> CUresult;
-}
-extern "C" {
- pub fn cuMemcpyAsync(
- dst: CUdeviceptr,
- src: CUdeviceptr,
- ByteCount: usize,
- hStream: CUstream,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemcpyPeer(
- dstDevice: CUdeviceptr,
- dstContext: CUcontext,
- srcDevice: CUdeviceptr,
- srcContext: CUcontext,
- ByteCount: usize,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemcpyPeerAsync(
- dstDevice: CUdeviceptr,
- dstContext: CUcontext,
- srcDevice: CUdeviceptr,
- srcContext: CUcontext,
- ByteCount: usize,
- hStream: CUstream,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemcpy3DPeer(pCopy: *const CUDA_MEMCPY3D_PEER) -> CUresult;
-}
-extern "C" {
- pub fn cuMemcpy3DPeerAsync(pCopy: *const CUDA_MEMCPY3D_PEER, hStream: CUstream) -> CUresult;
-}
-extern "C" {
- pub fn cuMemsetD8Async(
- dstDevice: CUdeviceptr,
- uc: ::std::os::raw::c_uchar,
- N: usize,
- hStream: CUstream,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemsetD16Async(
- dstDevice: CUdeviceptr,
- us: ::std::os::raw::c_ushort,
- N: usize,
- hStream: CUstream,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemsetD32Async(
- dstDevice: CUdeviceptr,
- ui: ::std::os::raw::c_uint,
- N: usize,
- hStream: CUstream,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemsetD2D8Async(
- dstDevice: CUdeviceptr,
- dstPitch: usize,
- uc: ::std::os::raw::c_uchar,
- Width: usize,
- Height: usize,
- hStream: CUstream,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemsetD2D16Async(
- dstDevice: CUdeviceptr,
- dstPitch: usize,
- us: ::std::os::raw::c_ushort,
- Width: usize,
- Height: usize,
- hStream: CUstream,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemsetD2D32Async(
- dstDevice: CUdeviceptr,
- dstPitch: usize,
- ui: ::std::os::raw::c_uint,
- Width: usize,
- Height: usize,
- hStream: CUstream,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuStreamGetPriority(hStream: CUstream, priority: *mut ::std::os::raw::c_int)
- -> CUresult;
-}
-extern "C" {
- pub fn cuStreamGetFlags(hStream: CUstream, flags: *mut ::std::os::raw::c_uint) -> CUresult;
-}
-extern "C" {
- pub fn cuStreamGetCtx(hStream: CUstream, pctx: *mut CUcontext) -> CUresult;
-}
-extern "C" {
- pub fn cuStreamWaitEvent(
- hStream: CUstream,
- hEvent: CUevent,
- Flags: ::std::os::raw::c_uint,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuStreamAddCallback(
- hStream: CUstream,
- callback: CUstreamCallback,
- userData: *mut ::std::os::raw::c_void,
- flags: ::std::os::raw::c_uint,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuStreamAttachMemAsync(
- hStream: CUstream,
- dptr: CUdeviceptr,
- length: usize,
- flags: ::std::os::raw::c_uint,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuStreamQuery(hStream: CUstream) -> CUresult;
-}
-extern "C" {
- pub fn cuStreamSynchronize(hStream: CUstream) -> CUresult;
-}
-extern "C" {
- pub fn cuEventRecord(hEvent: CUevent, hStream: CUstream) -> CUresult;
-}
-extern "C" {
- pub fn cuEventRecordWithFlags(
- hEvent: CUevent,
- hStream: CUstream,
- flags: ::std::os::raw::c_uint,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuLaunchKernel(
- f: CUfunction,
- gridDimX: ::std::os::raw::c_uint,
- gridDimY: ::std::os::raw::c_uint,
- gridDimZ: ::std::os::raw::c_uint,
- blockDimX: ::std::os::raw::c_uint,
- blockDimY: ::std::os::raw::c_uint,
- blockDimZ: ::std::os::raw::c_uint,
- sharedMemBytes: ::std::os::raw::c_uint,
- hStream: CUstream,
- kernelParams: *mut *mut ::std::os::raw::c_void,
- extra: *mut *mut ::std::os::raw::c_void,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuLaunchHostFunc(
- hStream: CUstream,
- fn_: CUhostFn,
- userData: *mut ::std::os::raw::c_void,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuGraphicsMapResources(
- count: ::std::os::raw::c_uint,
- resources: *mut CUgraphicsResource,
- hStream: CUstream,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuGraphicsUnmapResources(
- count: ::std::os::raw::c_uint,
- resources: *mut CUgraphicsResource,
- hStream: CUstream,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuStreamWriteValue32(
- stream: CUstream,
- addr: CUdeviceptr,
- value: cuuint32_t,
- flags: ::std::os::raw::c_uint,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuStreamWaitValue32(
- stream: CUstream,
- addr: CUdeviceptr,
- value: cuuint32_t,
- flags: ::std::os::raw::c_uint,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuStreamWriteValue64(
- stream: CUstream,
- addr: CUdeviceptr,
- value: cuuint64_t,
- flags: ::std::os::raw::c_uint,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuStreamWaitValue64(
- stream: CUstream,
- addr: CUdeviceptr,
- value: cuuint64_t,
- flags: ::std::os::raw::c_uint,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuStreamBatchMemOp(
- stream: CUstream,
- count: ::std::os::raw::c_uint,
- paramArray: *mut CUstreamBatchMemOpParams,
- flags: ::std::os::raw::c_uint,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemPrefetchAsync(
- devPtr: CUdeviceptr,
- count: usize,
- dstDevice: CUdevice,
- hStream: CUstream,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuLaunchCooperativeKernel(
- f: CUfunction,
- gridDimX: ::std::os::raw::c_uint,
- gridDimY: ::std::os::raw::c_uint,
- gridDimZ: ::std::os::raw::c_uint,
- blockDimX: ::std::os::raw::c_uint,
- blockDimY: ::std::os::raw::c_uint,
- blockDimZ: ::std::os::raw::c_uint,
- sharedMemBytes: ::std::os::raw::c_uint,
- hStream: CUstream,
- kernelParams: *mut *mut ::std::os::raw::c_void,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuSignalExternalSemaphoresAsync(
- extSemArray: *const CUexternalSemaphore,
- paramsArray: *const CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS,
- numExtSems: ::std::os::raw::c_uint,
- stream: CUstream,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuWaitExternalSemaphoresAsync(
- extSemArray: *const CUexternalSemaphore,
- paramsArray: *const CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS,
- numExtSems: ::std::os::raw::c_uint,
- stream: CUstream,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuStreamBeginCapture(hStream: CUstream) -> CUresult;
-}
-extern "C" {
- pub fn cuStreamBeginCapture_ptsz(hStream: CUstream) -> CUresult;
-}
-extern "C" {
- pub fn cuStreamBeginCapture_v2(hStream: CUstream, mode: CUstreamCaptureMode) -> CUresult;
-}
-extern "C" {
- pub fn cuStreamEndCapture(hStream: CUstream, phGraph: *mut CUgraph) -> CUresult;
-}
-extern "C" {
- pub fn cuStreamIsCapturing(
- hStream: CUstream,
- captureStatus: *mut CUstreamCaptureStatus,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuStreamGetCaptureInfo(
- hStream: CUstream,
- captureStatus_out: *mut CUstreamCaptureStatus,
- id_out: *mut cuuint64_t,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuStreamGetCaptureInfo_v2(
- hStream: CUstream,
- captureStatus_out: *mut CUstreamCaptureStatus,
- id_out: *mut cuuint64_t,
- graph_out: *mut CUgraph,
- dependencies_out: *mut *const CUgraphNode,
- numDependencies_out: *mut usize,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuGraphUpload(hGraph: CUgraphExec, hStream: CUstream) -> CUresult;
-}
-extern "C" {
- pub fn cuGraphLaunch(hGraph: CUgraphExec, hStream: CUstream) -> CUresult;
-}
-extern "C" {
- pub fn cuStreamCopyAttributes(dstStream: CUstream, srcStream: CUstream) -> CUresult;
-}
-extern "C" {
- pub fn cuStreamGetAttribute(
- hStream: CUstream,
- attr: CUstreamAttrID,
- value: *mut CUstreamAttrValue,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuStreamSetAttribute(
- hStream: CUstream,
- attr: CUstreamAttrID,
- param: *const CUstreamAttrValue,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuIpcOpenMemHandle(
- pdptr: *mut CUdeviceptr,
- handle: CUipcMemHandle,
- Flags: ::std::os::raw::c_uint,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuGraphInstantiate(
- phGraphExec: *mut CUgraphExec,
- hGraph: CUgraph,
- phErrorNode: *mut CUgraphNode,
- logBuffer: *mut ::std::os::raw::c_char,
- bufferSize: usize,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemMapArrayAsync(
- mapInfoList: *mut CUarrayMapInfo,
- count: ::std::os::raw::c_uint,
- hStream: CUstream,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuMemFreeAsync(dptr: CUdeviceptr, hStream: CUstream) -> CUresult;
-}
-extern "C" {
- pub fn cuMemAllocAsync(dptr: *mut CUdeviceptr, bytesize: usize, hStream: CUstream) -> CUresult;
-}
-extern "C" {
- pub fn cuMemAllocFromPoolAsync(
- dptr: *mut CUdeviceptr,
- bytesize: usize,
- pool: CUmemoryPool,
- hStream: CUstream,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuStreamUpdateCaptureDependencies(
- hStream: CUstream,
- dependencies: *mut CUgraphNode,
- numDependencies: usize,
- flags: ::std::os::raw::c_uint,
- ) -> CUresult;
-}
-pub type GLenum = ::std::os::raw::c_uint;
-pub type GLuint = ::std::os::raw::c_uint;
-pub type HGPUNV = *mut ::std::os::raw::c_void;
-extern "C" {
- pub fn cuGraphicsGLRegisterBuffer(
- pCudaResource: *mut CUgraphicsResource,
- buffer: GLuint,
- Flags: ::std::os::raw::c_uint,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuGraphicsGLRegisterImage(
- pCudaResource: *mut CUgraphicsResource,
- image: GLuint,
- target: GLenum,
- Flags: ::std::os::raw::c_uint,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuWGLGetDevice(pDevice: *mut CUdevice, hGpu: HGPUNV) -> CUresult;
-}
-impl CUGLDeviceList_enum {
- pub const CU_GL_DEVICE_LIST_ALL: CUGLDeviceList_enum = CUGLDeviceList_enum(1);
-}
-impl CUGLDeviceList_enum {
- pub const CU_GL_DEVICE_LIST_CURRENT_FRAME: CUGLDeviceList_enum = CUGLDeviceList_enum(2);
-}
-impl CUGLDeviceList_enum {
- pub const CU_GL_DEVICE_LIST_NEXT_FRAME: CUGLDeviceList_enum = CUGLDeviceList_enum(3);
-}
-#[repr(transparent)]
-#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
-pub struct CUGLDeviceList_enum(pub ::std::os::raw::c_uint);
-pub use self::CUGLDeviceList_enum as CUGLDeviceList;
-extern "C" {
- pub fn cuGLGetDevices_v2(
- pCudaDeviceCount: *mut ::std::os::raw::c_uint,
- pCudaDevices: *mut CUdevice,
- cudaDeviceCount: ::std::os::raw::c_uint,
- deviceList: CUGLDeviceList,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuGLCtxCreate_v2(
- pCtx: *mut CUcontext,
- Flags: ::std::os::raw::c_uint,
- device: CUdevice,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuGLInit() -> CUresult;
-}
-extern "C" {
- pub fn cuGLRegisterBufferObject(buffer: GLuint) -> CUresult;
-}
-extern "C" {
- pub fn cuGLMapBufferObject_v2_ptds(
- dptr: *mut CUdeviceptr,
- size: *mut usize,
- buffer: GLuint,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuGLUnmapBufferObject(buffer: GLuint) -> CUresult;
-}
-extern "C" {
- pub fn cuGLUnregisterBufferObject(buffer: GLuint) -> CUresult;
-}
-extern "C" {
- pub fn cuGLSetBufferObjectMapFlags(buffer: GLuint, Flags: ::std::os::raw::c_uint) -> CUresult;
-}
-extern "C" {
- pub fn cuGLMapBufferObjectAsync_v2_ptsz(
- dptr: *mut CUdeviceptr,
- size: *mut usize,
- buffer: GLuint,
- hStream: CUstream,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuGLUnmapBufferObjectAsync(buffer: GLuint, hStream: CUstream) -> CUresult;
-}
-extern "C" {
- pub fn cuGLGetDevices(
- pCudaDeviceCount: *mut ::std::os::raw::c_uint,
- pCudaDevices: *mut CUdevice,
- cudaDeviceCount: ::std::os::raw::c_uint,
- deviceList: CUGLDeviceList,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuGLMapBufferObject_v2(
- dptr: *mut CUdeviceptr,
- size: *mut usize,
- buffer: GLuint,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuGLMapBufferObjectAsync_v2(
- dptr: *mut CUdeviceptr,
- size: *mut usize,
- buffer: GLuint,
- hStream: CUstream,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuGLCtxCreate(
- pCtx: *mut CUcontext,
- Flags: ::std::os::raw::c_uint,
- device: CUdevice,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuGLMapBufferObject(
- dptr: *mut CUdeviceptr_v1,
- size: *mut ::std::os::raw::c_uint,
- buffer: GLuint,
- ) -> CUresult;
-}
-extern "C" {
- pub fn cuGLMapBufferObjectAsync(
- dptr: *mut CUdeviceptr_v1,
- size: *mut ::std::os::raw::c_uint,
- buffer: GLuint,
- hStream: CUstream,
- ) -> CUresult;
-}
+// Generated automatically by zluda_bindgen +// DO NOT EDIT MANUALLY +#![allow(warnings)] +extern "system" { + /** \brief Gets the string description of an error code + + Sets \p *pStr to the address of a NULL-terminated string description + of the error code \p error. + If the error code is not recognized, ::CUDA_ERROR_INVALID_VALUE + will be returned and \p *pStr will be set to the NULL address. + + \param error - Error code to convert to string + \param pStr - Address of the string pointer. + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE + + \sa + ::CUresult, + ::cudaGetErrorString*/ + fn cuGetErrorString( + error: cuda_types::CUresult, + pStr: *mut *const ::core::ffi::c_char, + ) -> cuda_types::CUresult; + /** \brief Gets the string representation of an error code enum name + + Sets \p *pStr to the address of a NULL-terminated string representation + of the name of the enum error code \p error. + If the error code is not recognized, ::CUDA_ERROR_INVALID_VALUE + will be returned and \p *pStr will be set to the NULL address. + + \param error - Error code to convert to string + \param pStr - Address of the string pointer. + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE + + \sa + ::CUresult, + ::cudaGetErrorName*/ + fn cuGetErrorName( + error: cuda_types::CUresult, + pStr: *mut *const ::core::ffi::c_char, + ) -> cuda_types::CUresult; + /** \brief Initialize the CUDA driver API + Initializes the driver API and must be called before any other function from + the driver API in the current process. Currently, the \p Flags parameter must be 0. If ::cuInit() + has not been called, any function from the driver API will return + ::CUDA_ERROR_NOT_INITIALIZED. + + \param Flags - Initialization flag for CUDA. + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_DEVICE, + ::CUDA_ERROR_SYSTEM_DRIVER_MISMATCH, + ::CUDA_ERROR_COMPAT_NOT_SUPPORTED_ON_DEVICE + \notefnerr*/ + fn cuInit(Flags: ::core::ffi::c_uint) -> cuda_types::CUresult; + /** \brief Returns the latest CUDA version supported by driver + + Returns in \p *driverVersion the version of CUDA supported by + the driver. The version is returned as + (1000 × major + 10 × minor). For example, CUDA 9.2 + would be represented by 9020. + + This function automatically returns ::CUDA_ERROR_INVALID_VALUE if + \p driverVersion is NULL. + + \param driverVersion - Returns the CUDA driver version + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE + \notefnerr + + \sa + ::cudaDriverGetVersion, + ::cudaRuntimeGetVersion*/ + fn cuDriverGetVersion( + driverVersion: *mut ::core::ffi::c_int, + ) -> cuda_types::CUresult; + /** \brief Returns a handle to a compute device + + Returns in \p *device a device handle given an ordinal in the range <b>[0, + ::cuDeviceGetCount()-1]</b>. + + \param device - Returned device handle + \param ordinal - Device number to get handle for + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_DEVICE + \notefnerr + + \sa + ::cuDeviceGetAttribute, + ::cuDeviceGetCount, + ::cuDeviceGetName, + ::cuDeviceGetUuid, + ::cuDeviceGetLuid, + ::cuDeviceTotalMem, + ::cuDeviceGetExecAffinitySupport*/ + fn cuDeviceGet( + device: *mut cuda_types::CUdevice, + ordinal: ::core::ffi::c_int, + ) -> cuda_types::CUresult; + /** \brief Returns the number of compute-capable devices + + Returns in \p *count the number of devices with compute capability greater + than or equal to 2.0 that are available for execution. If there is no such + device, ::cuDeviceGetCount() returns 0. + + \param count - Returned number of compute-capable devices + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE + \notefnerr + + \sa + ::cuDeviceGetAttribute, + ::cuDeviceGetName, + ::cuDeviceGetUuid, + ::cuDeviceGetLuid, + ::cuDeviceGet, + ::cuDeviceTotalMem, + ::cuDeviceGetExecAffinitySupport, + ::cudaGetDeviceCount*/ + fn cuDeviceGetCount(count: *mut ::core::ffi::c_int) -> cuda_types::CUresult; + /** \brief Returns an identifier string for the device + + Returns an ASCII string identifying the device \p dev in the NULL-terminated + string pointed to by \p name. \p len specifies the maximum length of the + string that may be returned. + + \param name - Returned identifier string for the device + \param len - Maximum length of string to store in \p name + \param dev - Device to get identifier string for + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_DEVICE + \notefnerr + + \sa + ::cuDeviceGetAttribute, + ::cuDeviceGetUuid, + ::cuDeviceGetLuid, + ::cuDeviceGetCount, + ::cuDeviceGet, + ::cuDeviceTotalMem, + ::cuDeviceGetExecAffinitySupport, + ::cudaGetDeviceProperties*/ + fn cuDeviceGetName( + name: *mut ::core::ffi::c_char, + len: ::core::ffi::c_int, + dev: cuda_types::CUdevice, + ) -> cuda_types::CUresult; + /** \brief Return an UUID for the device + + Note there is a later version of this API, ::cuDeviceGetUuid_v2. It will + supplant this version in 12.0, which is retained for minor version compatibility. + + Returns 16-octets identifying the device \p dev in the structure + pointed by the \p uuid. + + \param uuid - Returned UUID + \param dev - Device to get identifier string for + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_DEVICE + \notefnerr + + \sa + ::cuDeviceGetUuid_v2 + ::cuDeviceGetAttribute, + ::cuDeviceGetCount, + ::cuDeviceGetName, + ::cuDeviceGetLuid, + ::cuDeviceGet, + ::cuDeviceTotalMem, + ::cuDeviceGetExecAffinitySupport, + ::cudaGetDeviceProperties*/ + fn cuDeviceGetUuid( + uuid: *mut cuda_types::CUuuid, + dev: cuda_types::CUdevice, + ) -> cuda_types::CUresult; + /** \brief Return an UUID for the device (11.4+) + + Returns 16-octets identifying the device \p dev in the structure + pointed by the \p uuid. If the device is in MIG mode, returns its + MIG UUID which uniquely identifies the subscribed MIG compute instance. + + \param uuid - Returned UUID + \param dev - Device to get identifier string for + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_DEVICE + \notefnerr + + \sa + ::cuDeviceGetAttribute, + ::cuDeviceGetCount, + ::cuDeviceGetName, + ::cuDeviceGetLuid, + ::cuDeviceGet, + ::cuDeviceTotalMem, + ::cudaGetDeviceProperties*/ + fn cuDeviceGetUuid_v2( + uuid: *mut cuda_types::CUuuid, + dev: cuda_types::CUdevice, + ) -> cuda_types::CUresult; + /** \brief Return an LUID and device node mask for the device + + Return identifying information (\p luid and \p deviceNodeMask) to allow + matching device with graphics APIs. + + \param luid - Returned LUID + \param deviceNodeMask - Returned device node mask + \param dev - Device to get identifier string for + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_DEVICE + \notefnerr + + \sa + ::cuDeviceGetAttribute, + ::cuDeviceGetCount, + ::cuDeviceGetName, + ::cuDeviceGet, + ::cuDeviceTotalMem, + ::cuDeviceGetExecAffinitySupport, + ::cudaGetDeviceProperties*/ + fn cuDeviceGetLuid( + luid: *mut ::core::ffi::c_char, + deviceNodeMask: *mut ::core::ffi::c_uint, + dev: cuda_types::CUdevice, + ) -> cuda_types::CUresult; + /** \brief Returns the total amount of memory on the device + + Returns in \p *bytes the total amount of memory available on the device + \p dev in bytes. + + \param bytes - Returned memory available on device in bytes + \param dev - Device handle + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_DEVICE + \notefnerr + + \sa + ::cuDeviceGetAttribute, + ::cuDeviceGetCount, + ::cuDeviceGetName, + ::cuDeviceGetUuid, + ::cuDeviceGet, + ::cuDeviceGetExecAffinitySupport, + ::cudaMemGetInfo*/ + fn cuDeviceTotalMem_v2( + bytes: *mut usize, + dev: cuda_types::CUdevice, + ) -> cuda_types::CUresult; + /** \brief Returns the maximum number of elements allocatable in a 1D linear texture for a given texture element size. + + Returns in \p maxWidthInElements the maximum number of texture elements allocatable in a 1D linear texture + for given \p format and \p numChannels. + + \param maxWidthInElements - Returned maximum number of texture elements allocatable for given \p format and \p numChannels. + \param format - Texture format. + \param numChannels - Number of channels per texture element. + \param dev - Device handle. + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_DEVICE + \notefnerr + + \sa + ::cuDeviceGetAttribute, + ::cuDeviceGetCount, + ::cuDeviceGetName, + ::cuDeviceGetUuid, + ::cuDeviceGet, + ::cudaMemGetInfo, + ::cuDeviceTotalMem*/ + fn cuDeviceGetTexture1DLinearMaxWidth( + maxWidthInElements: *mut usize, + format: cuda_types::CUarray_format, + numChannels: ::core::ffi::c_uint, + dev: cuda_types::CUdevice, + ) -> cuda_types::CUresult; + /** \brief Returns information about the device + + Returns in \p *pi the integer value of the attribute \p attrib on device + \p dev. The supported attributes are: + - ::CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK: Maximum number of threads per + block; + - ::CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X: Maximum x-dimension of a block + - ::CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y: Maximum y-dimension of a block + - ::CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z: Maximum z-dimension of a block + - ::CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X: Maximum x-dimension of a grid + - ::CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y: Maximum y-dimension of a grid + - ::CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z: Maximum z-dimension of a grid + - ::CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK: Maximum amount of + shared memory available to a thread block in bytes + - ::CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY: Memory available on device for + __constant__ variables in a CUDA C kernel in bytes + - ::CU_DEVICE_ATTRIBUTE_WARP_SIZE: Warp size in threads + - ::CU_DEVICE_ATTRIBUTE_MAX_PITCH: Maximum pitch in bytes allowed by the + memory copy functions that involve memory regions allocated through + ::cuMemAllocPitch() + - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH: Maximum 1D + texture width + - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH: Maximum width + for a 1D texture bound to linear memory + - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH: Maximum + mipmapped 1D texture width + - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH: Maximum 2D + texture width + - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT: Maximum 2D + texture height + - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH: Maximum width + for a 2D texture bound to linear memory + - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT: Maximum height + for a 2D texture bound to linear memory + - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH: Maximum pitch + in bytes for a 2D texture bound to linear memory + - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH: Maximum + mipmapped 2D texture width + - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT: Maximum + mipmapped 2D texture height + - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH: Maximum 3D + texture width + - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT: Maximum 3D + texture height + - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH: Maximum 3D + texture depth + - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE: + Alternate maximum 3D texture width, 0 if no alternate + maximum 3D texture size is supported + - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE: + Alternate maximum 3D texture height, 0 if no alternate + maximum 3D texture size is supported + - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE: + Alternate maximum 3D texture depth, 0 if no alternate + maximum 3D texture size is supported + - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_WIDTH: + Maximum cubemap texture width or height + - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH: + Maximum 1D layered texture width + - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS: + Maximum layers in a 1D layered texture + - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH: + Maximum 2D layered texture width + - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT: + Maximum 2D layered texture height + - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS: + Maximum layers in a 2D layered texture + - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH: + Maximum cubemap layered texture width or height + - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS: + Maximum layers in a cubemap layered texture + - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH: + Maximum 1D surface width + - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH: + Maximum 2D surface width + - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT: + Maximum 2D surface height + - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH: + Maximum 3D surface width + - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT: + Maximum 3D surface height + - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH: + Maximum 3D surface depth + - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_WIDTH: + Maximum 1D layered surface width + - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_LAYERS: + Maximum layers in a 1D layered surface + - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_WIDTH: + Maximum 2D layered surface width + - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_HEIGHT: + Maximum 2D layered surface height + - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_LAYERS: + Maximum layers in a 2D layered surface + - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_WIDTH: + Maximum cubemap surface width + - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH: + Maximum cubemap layered surface width + - ::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS: + Maximum layers in a cubemap layered surface + - ::CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK: Maximum number of 32-bit + registers available to a thread block + - ::CU_DEVICE_ATTRIBUTE_CLOCK_RATE: The typical clock frequency in kilohertz + - ::CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT: Alignment requirement; texture + base addresses aligned to ::textureAlign bytes do not need an offset + applied to texture fetches + - ::CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT: Pitch alignment requirement + for 2D texture references bound to pitched memory + - ::CU_DEVICE_ATTRIBUTE_GPU_OVERLAP: 1 if the device can concurrently copy + memory between host and device while executing a kernel, or 0 if not + - ::CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT: Number of multiprocessors on + the device + - ::CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT: 1 if there is a run time limit + for kernels executed on the device, or 0 if not + - ::CU_DEVICE_ATTRIBUTE_INTEGRATED: 1 if the device is integrated with the + memory subsystem, or 0 if not + - ::CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY: 1 if the device can map host + memory into the CUDA address space, or 0 if not + - ::CU_DEVICE_ATTRIBUTE_COMPUTE_MODE: Compute mode that device is currently + in. Available modes are as follows: + - ::CU_COMPUTEMODE_DEFAULT: Default mode - Device is not restricted and + can have multiple CUDA contexts present at a single time. + - ::CU_COMPUTEMODE_PROHIBITED: Compute-prohibited mode - Device is + prohibited from creating new CUDA contexts. + - ::CU_COMPUTEMODE_EXCLUSIVE_PROCESS: Compute-exclusive-process mode - Device + can have only one context used by a single process at a time. + - ::CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS: 1 if the device supports + executing multiple kernels within the same context simultaneously, or 0 if + not. It is not guaranteed that multiple kernels will be resident + on the device concurrently so this feature should not be relied upon for + correctness. + - ::CU_DEVICE_ATTRIBUTE_ECC_ENABLED: 1 if error correction is enabled on the + device, 0 if error correction is disabled or not supported by the device + - ::CU_DEVICE_ATTRIBUTE_PCI_BUS_ID: PCI bus identifier of the device + - ::CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID: PCI device (also known as slot) identifier + of the device + - ::CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID: PCI domain identifier of the device + - ::CU_DEVICE_ATTRIBUTE_TCC_DRIVER: 1 if the device is using a TCC driver. TCC + is only available on Tesla hardware running Windows Vista or later + - ::CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE: Peak memory clock frequency in kilohertz + - ::CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH: Global memory bus width in bits + - ::CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE: Size of L2 cache in bytes. 0 if the device doesn't have L2 cache + - ::CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR: Maximum resident threads per multiprocessor + - ::CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING: 1 if the device shares a unified address space with + the host, or 0 if not + - ::CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR: Major compute capability version number + - ::CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR: Minor compute capability version number + - ::CU_DEVICE_ATTRIBUTE_GLOBAL_L1_CACHE_SUPPORTED: 1 if device supports caching globals + in L1 cache, 0 if caching globals in L1 cache is not supported by the device + - ::CU_DEVICE_ATTRIBUTE_LOCAL_L1_CACHE_SUPPORTED: 1 if device supports caching locals + in L1 cache, 0 if caching locals in L1 cache is not supported by the device + - ::CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR: Maximum amount of + shared memory available to a multiprocessor in bytes; this amount is shared + by all thread blocks simultaneously resident on a multiprocessor + - ::CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR: Maximum number of 32-bit + registers available to a multiprocessor; this number is shared by all thread + blocks simultaneously resident on a multiprocessor + - ::CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY: 1 if device supports allocating managed memory + on this system, 0 if allocating managed memory is not supported by the device on this system. + - ::CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD: 1 if device is on a multi-GPU board, 0 if not. + - ::CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD_GROUP_ID: Unique identifier for a group of devices + associated with the same board. Devices on the same multi-GPU board will share the same identifier. + - ::CU_DEVICE_ATTRIBUTE_HOST_NATIVE_ATOMIC_SUPPORTED: 1 if Link between the device and the host + supports native atomic operations. + - ::CU_DEVICE_ATTRIBUTE_SINGLE_TO_DOUBLE_PRECISION_PERF_RATIO: Ratio of single precision performance + (in floating-point operations per second) to double precision performance. + - ::CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS: Device supports coherently accessing + pageable memory without calling cudaHostRegister on it. + - ::CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS: Device can coherently access managed memory + concurrently with the CPU. + - ::CU_DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED: Device supports Compute Preemption. + - ::CU_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM: Device can access host registered + memory at the same virtual address as the CPU. + - ::CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN: The maximum per block shared memory size + supported on this device. This is the maximum value that can be opted into when using the cuFuncSetAttribute() or cuKernelSetAttribute() call. + For more details see ::CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES + - ::CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES: Device accesses pageable memory via the host's + page tables. + - ::CU_DEVICE_ATTRIBUTE_DIRECT_MANAGED_MEM_ACCESS_FROM_HOST: The host can directly access managed memory on the device without migration. + - ::CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED: Device supports virtual memory management APIs like ::cuMemAddressReserve, ::cuMemCreate, ::cuMemMap and related APIs + - ::CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR_SUPPORTED: Device supports exporting memory to a posix file descriptor with ::cuMemExportToShareableHandle, if requested via ::cuMemCreate + - ::CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_HANDLE_SUPPORTED: Device supports exporting memory to a Win32 NT handle with ::cuMemExportToShareableHandle, if requested via ::cuMemCreate + - ::CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_KMT_HANDLE_SUPPORTED: Device supports exporting memory to a Win32 KMT handle with ::cuMemExportToShareableHandle, if requested via ::cuMemCreate + - ::CU_DEVICE_ATTRIBUTE_MAX_BLOCKS_PER_MULTIPROCESSOR: Maximum number of thread blocks that can reside on a multiprocessor + - ::CU_DEVICE_ATTRIBUTE_GENERIC_COMPRESSION_SUPPORTED: Device supports compressible memory allocation via ::cuMemCreate + - ::CU_DEVICE_ATTRIBUTE_MAX_PERSISTING_L2_CACHE_SIZE: Maximum L2 persisting lines capacity setting in bytes + - ::CU_DEVICE_ATTRIBUTE_MAX_ACCESS_POLICY_WINDOW_SIZE: Maximum value of CUaccessPolicyWindow::num_bytes + - ::CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WITH_CUDA_VMM_SUPPORTED: Device supports specifying the GPUDirect RDMA flag with ::cuMemCreate. + - ::CU_DEVICE_ATTRIBUTE_RESERVED_SHARED_MEMORY_PER_BLOCK: Amount of shared memory per block reserved by CUDA driver in bytes + - ::CU_DEVICE_ATTRIBUTE_SPARSE_CUDA_ARRAY_SUPPORTED: Device supports sparse CUDA arrays and sparse CUDA mipmapped arrays. + - ::CU_DEVICE_ATTRIBUTE_READ_ONLY_HOST_REGISTER_SUPPORTED: Device supports using the ::cuMemHostRegister flag ::CU_MEMHOSTERGISTER_READ_ONLY to register memory that must be mapped as read-only to the GPU + - ::CU_DEVICE_ATTRIBUTE_MEMORY_POOLS_SUPPORTED: Device supports using the ::cuMemAllocAsync and ::cuMemPool family of APIs + - ::CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_SUPPORTED: Device supports GPUDirect RDMA APIs, like nvidia_p2p_get_pages (see https://docs.nvidia.com/cuda/gpudirect-rdma for more information) + - ::CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_FLUSH_WRITES_OPTIONS: The returned attribute shall be interpreted as a bitmask, where the individual bits are described by the ::CUflushGPUDirectRDMAWritesOptions enum + - ::CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WRITES_ORDERING: GPUDirect RDMA writes to the device do not need to be flushed for consumers within the scope indicated by the returned attribute. See ::CUGPUDirectRDMAWritesOrdering for the numerical values returned here. + - ::CU_DEVICE_ATTRIBUTE_MEMPOOL_SUPPORTED_HANDLE_TYPES: Bitmask of handle types supported with mempool based IPC + - ::CU_DEVICE_ATTRIBUTE_DEFERRED_MAPPING_CUDA_ARRAY_SUPPORTED: Device supports deferred mapping CUDA arrays and CUDA mipmapped arrays. + + \param pi - Returned device attribute value + \param attrib - Device attribute to query + \param dev - Device handle + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_DEVICE + \notefnerr + + \sa + ::cuDeviceGetCount, + ::cuDeviceGetName, + ::cuDeviceGetUuid, + ::cuDeviceGet, + ::cuDeviceTotalMem, + ::cuDeviceGetExecAffinitySupport, + ::cudaDeviceGetAttribute, + ::cudaGetDeviceProperties*/ + fn cuDeviceGetAttribute( + pi: *mut ::core::ffi::c_int, + attrib: cuda_types::CUdevice_attribute, + dev: cuda_types::CUdevice, + ) -> cuda_types::CUresult; + /** \brief Return NvSciSync attributes that this device can support. + + Returns in \p nvSciSyncAttrList, the properties of NvSciSync that + this CUDA device, \p dev can support. The returned \p nvSciSyncAttrList + can be used to create an NvSciSync object that matches this device's capabilities. + + If NvSciSyncAttrKey_RequiredPerm field in \p nvSciSyncAttrList is + already set this API will return ::CUDA_ERROR_INVALID_VALUE. + + The applications should set \p nvSciSyncAttrList to a valid + NvSciSyncAttrList failing which this API will return + ::CUDA_ERROR_INVALID_HANDLE. + + The \p flags controls how applications intends to use + the NvSciSync created from the \p nvSciSyncAttrList. The valid flags are: + - ::CUDA_NVSCISYNC_ATTR_SIGNAL, specifies that the applications intends to + signal an NvSciSync on this CUDA device. + - ::CUDA_NVSCISYNC_ATTR_WAIT, specifies that the applications intends to + wait on an NvSciSync on this CUDA device. + + At least one of these flags must be set, failing which the API + returns ::CUDA_ERROR_INVALID_VALUE. Both the flags are orthogonal + to one another: a developer may set both these flags that allows to + set both wait and signal specific attributes in the same \p nvSciSyncAttrList. + + Note that this API updates the input \p nvSciSyncAttrList with values equivalent + to the following public attribute key-values: + NvSciSyncAttrKey_RequiredPerm is set to + - NvSciSyncAccessPerm_SignalOnly if ::CUDA_NVSCISYNC_ATTR_SIGNAL is set in \p flags. + - NvSciSyncAccessPerm_WaitOnly if ::CUDA_NVSCISYNC_ATTR_WAIT is set in \p flags. + - NvSciSyncAccessPerm_WaitSignal if both ::CUDA_NVSCISYNC_ATTR_WAIT and + ::CUDA_NVSCISYNC_ATTR_SIGNAL are set in \p flags. + NvSciSyncAttrKey_PrimitiveInfo is set to + - NvSciSyncAttrValPrimitiveType_SysmemSemaphore on any valid \p device. + - NvSciSyncAttrValPrimitiveType_Syncpoint if \p device is a Tegra device. + - NvSciSyncAttrValPrimitiveType_SysmemSemaphorePayload64b if \p device is GA10X+. + NvSciSyncAttrKey_GpuId is set to the same UUID that is returned for this + \p device from ::cuDeviceGetUuid. + + \param nvSciSyncAttrList - Return NvSciSync attributes supported. + \param dev - Valid Cuda Device to get NvSciSync attributes for. + \param flags - flags describing NvSciSync usage. + + \return + + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_HANDLE, + ::CUDA_ERROR_INVALID_DEVICE, + ::CUDA_ERROR_NOT_SUPPORTED, + ::CUDA_ERROR_OUT_OF_MEMORY + + \sa + ::cuImportExternalSemaphore, + ::cuDestroyExternalSemaphore, + ::cuSignalExternalSemaphoresAsync, + ::cuWaitExternalSemaphoresAsync*/ + fn cuDeviceGetNvSciSyncAttributes( + nvSciSyncAttrList: *mut ::core::ffi::c_void, + dev: cuda_types::CUdevice, + flags: ::core::ffi::c_int, + ) -> cuda_types::CUresult; + /** \brief Sets the current memory pool of a device + + The memory pool must be local to the specified device. + ::cuMemAllocAsync allocates from the current mempool of the provided stream's device. + By default, a device's current memory pool is its default memory pool. + + \note Use ::cuMemAllocFromPoolAsync to specify asynchronous allocations from a device different + than the one the stream runs on. + + \returns + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE + + \sa ::cuDeviceGetDefaultMemPool, ::cuDeviceGetMemPool, ::cuMemPoolCreate, ::cuMemPoolDestroy, ::cuMemAllocFromPoolAsync*/ + fn cuDeviceSetMemPool( + dev: cuda_types::CUdevice, + pool: cuda_types::CUmemoryPool, + ) -> cuda_types::CUresult; + /** \brief Gets the current mempool for a device + + Returns the last pool provided to ::cuDeviceSetMemPool for this device + or the device's default memory pool if ::cuDeviceSetMemPool has never been called. + By default the current mempool is the default mempool for a device. + Otherwise the returned pool must have been set with ::cuDeviceSetMemPool. + + \returns + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE + + \sa ::cuDeviceGetDefaultMemPool, ::cuMemPoolCreate, ::cuDeviceSetMemPool*/ + fn cuDeviceGetMemPool( + pool: *mut cuda_types::CUmemoryPool, + dev: cuda_types::CUdevice, + ) -> cuda_types::CUresult; + /** \brief Returns the default mempool of a device + + The default mempool of a device contains device memory from that device. + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_DEVICE, + ::CUDA_ERROR_NOT_SUPPORTED + \notefnerr + + \sa ::cuMemAllocAsync, ::cuMemPoolTrimTo, ::cuMemPoolGetAttribute, ::cuMemPoolSetAttribute, cuMemPoolSetAccess, ::cuDeviceGetMemPool, ::cuMemPoolCreate*/ + fn cuDeviceGetDefaultMemPool( + pool_out: *mut cuda_types::CUmemoryPool, + dev: cuda_types::CUdevice, + ) -> cuda_types::CUresult; + /** \brief Returns information about the execution affinity support of the device. + + Returns in \p *pi whether execution affinity type \p type is supported by device \p dev. + The supported types are: + - ::CU_EXEC_AFFINITY_TYPE_SM_COUNT: 1 if context with limited SMs is supported by the device, + or 0 if not; + + \param pi - 1 if the execution affinity type \p type is supported by the device, or 0 if not + \param type - Execution affinity type to query + \param dev - Device handle + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_DEVICE + \notefnerr + + \sa + ::cuDeviceGetAttribute, + ::cuDeviceGetCount, + ::cuDeviceGetName, + ::cuDeviceGetUuid, + ::cuDeviceGet, + ::cuDeviceTotalMem*/ + fn cuDeviceGetExecAffinitySupport( + pi: *mut ::core::ffi::c_int, + type_: cuda_types::CUexecAffinityType, + dev: cuda_types::CUdevice, + ) -> cuda_types::CUresult; + /** \brief Blocks until remote writes are visible to the specified scope + + Blocks until GPUDirect RDMA writes to the target context via mappings + created through APIs like nvidia_p2p_get_pages (see + https://docs.nvidia.com/cuda/gpudirect-rdma for more information), are + visible to the specified scope. + + If the scope equals or lies within the scope indicated by + ::CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WRITES_ORDERING, the call + will be a no-op and can be safely omitted for performance. This can be + determined by comparing the numerical values between the two enums, with + smaller scopes having smaller values. + + Users may query support for this API via + ::CU_DEVICE_ATTRIBUTE_FLUSH_FLUSH_GPU_DIRECT_RDMA_OPTIONS. + + \param target - The target of the operation, see ::CUflushGPUDirectRDMAWritesTarget + \param scope - The scope of the operation, see ::CUflushGPUDirectRDMAWritesScope + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE, + \notefnerr +*/ + fn cuFlushGPUDirectRDMAWrites( + target: cuda_types::CUflushGPUDirectRDMAWritesTarget, + scope: cuda_types::CUflushGPUDirectRDMAWritesScope, + ) -> cuda_types::CUresult; + /** \brief Returns properties for a selected device + + \deprecated + + This function was deprecated as of CUDA 5.0 and replaced by ::cuDeviceGetAttribute(). + + Returns in \p *prop the properties of device \p dev. The ::CUdevprop + structure is defined as: + + \code +typedef struct CUdevprop_st { +int maxThreadsPerBlock; +int maxThreadsDim[3]; +int maxGridSize[3]; +int sharedMemPerBlock; +int totalConstantMemory; +int SIMDWidth; +int memPitch; +int regsPerBlock; +int clockRate; +int textureAlign +} CUdevprop; + \endcode + where: + + - ::maxThreadsPerBlock is the maximum number of threads per block; + - ::maxThreadsDim[3] is the maximum sizes of each dimension of a block; + - ::maxGridSize[3] is the maximum sizes of each dimension of a grid; + - ::sharedMemPerBlock is the total amount of shared memory available per + block in bytes; + - ::totalConstantMemory is the total amount of constant memory available on + the device in bytes; + - ::SIMDWidth is the warp size; + - ::memPitch is the maximum pitch allowed by the memory copy functions that + involve memory regions allocated through ::cuMemAllocPitch(); + - ::regsPerBlock is the total number of registers available per block; + - ::clockRate is the clock frequency in kilohertz; + - ::textureAlign is the alignment requirement; texture base addresses that + are aligned to ::textureAlign bytes do not need an offset applied to + texture fetches. + + \param prop - Returned properties of device + \param dev - Device to get properties for + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_DEVICE + \notefnerr + + \sa + ::cuDeviceGetAttribute, + ::cuDeviceGetCount, + ::cuDeviceGetName, + ::cuDeviceGetUuid, + ::cuDeviceGet, + ::cuDeviceTotalMem*/ + fn cuDeviceGetProperties( + prop: *mut cuda_types::CUdevprop, + dev: cuda_types::CUdevice, + ) -> cuda_types::CUresult; + /** \brief Returns the compute capability of the device + + \deprecated + + This function was deprecated as of CUDA 5.0 and its functionality superseded + by ::cuDeviceGetAttribute(). + + Returns in \p *major and \p *minor the major and minor revision numbers that + define the compute capability of the device \p dev. + + \param major - Major revision number + \param minor - Minor revision number + \param dev - Device handle + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_DEVICE + \notefnerr + + \sa + ::cuDeviceGetAttribute, + ::cuDeviceGetCount, + ::cuDeviceGetName, + ::cuDeviceGetUuid, + ::cuDeviceGet, + ::cuDeviceTotalMem*/ + fn cuDeviceComputeCapability( + major: *mut ::core::ffi::c_int, + minor: *mut ::core::ffi::c_int, + dev: cuda_types::CUdevice, + ) -> cuda_types::CUresult; + /** \brief Retain the primary context on the GPU + + Retains the primary context on the device. + Once the user successfully retains the primary context, the primary context + will be active and available to the user until the user releases it + with ::cuDevicePrimaryCtxRelease() or resets it with ::cuDevicePrimaryCtxReset(). + Unlike ::cuCtxCreate() the newly retained context is not pushed onto the stack. + + Retaining the primary context for the first time will fail with ::CUDA_ERROR_UNKNOWN + if the compute mode of the device is ::CU_COMPUTEMODE_PROHIBITED. The function + ::cuDeviceGetAttribute() can be used with ::CU_DEVICE_ATTRIBUTE_COMPUTE_MODE to + determine the compute mode of the device. + The <i>nvidia-smi</i> tool can be used to set the compute mode for + devices. Documentation for <i>nvidia-smi</i> can be obtained by passing a + -h option to it. + + Please note that the primary context always supports pinned allocations. Other + flags can be specified by ::cuDevicePrimaryCtxSetFlags(). + + \param pctx - Returned context handle of the new context + \param dev - Device for which primary context is requested + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_DEVICE, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_OUT_OF_MEMORY, + ::CUDA_ERROR_UNKNOWN + \notefnerr + + \sa ::cuDevicePrimaryCtxRelease, + ::cuDevicePrimaryCtxSetFlags, + ::cuCtxCreate, + ::cuCtxGetApiVersion, + ::cuCtxGetCacheConfig, + ::cuCtxGetDevice, + ::cuCtxGetFlags, + ::cuCtxGetLimit, + ::cuCtxPopCurrent, + ::cuCtxPushCurrent, + ::cuCtxSetCacheConfig, + ::cuCtxSetLimit, + ::cuCtxSynchronize*/ + fn cuDevicePrimaryCtxRetain( + pctx: *mut cuda_types::CUcontext, + dev: cuda_types::CUdevice, + ) -> cuda_types::CUresult; + /** \brief Release the primary context on the GPU + + Releases the primary context interop on the device. + A retained context should always be released once the user is done using + it. The context is automatically reset once the last reference to it is + released. This behavior is different when the primary context was retained + by the CUDA runtime from CUDA 4.0 and earlier. In this case, the primary + context remains always active. + + Releasing a primary context that has not been previously retained will + fail with ::CUDA_ERROR_INVALID_CONTEXT. + + Please note that unlike ::cuCtxDestroy() this method does not pop the context + from stack in any circumstances. + + \param dev - Device which primary context is released + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_DEVICE, + ::CUDA_ERROR_INVALID_CONTEXT + \notefnerr + + \sa ::cuDevicePrimaryCtxRetain, + ::cuCtxDestroy, + ::cuCtxGetApiVersion, + ::cuCtxGetCacheConfig, + ::cuCtxGetDevice, + ::cuCtxGetFlags, + ::cuCtxGetLimit, + ::cuCtxPopCurrent, + ::cuCtxPushCurrent, + ::cuCtxSetCacheConfig, + ::cuCtxSetLimit, + ::cuCtxSynchronize*/ + fn cuDevicePrimaryCtxRelease_v2(dev: cuda_types::CUdevice) -> cuda_types::CUresult; + /** \brief Set flags for the primary context + + Sets the flags for the primary context on the device overwriting perviously + set ones. + + The three LSBs of the \p flags parameter can be used to control how the OS + thread, which owns the CUDA context at the time of an API call, interacts + with the OS scheduler when waiting for results from the GPU. Only one of + the scheduling flags can be set when creating a context. + + - ::CU_CTX_SCHED_SPIN: Instruct CUDA to actively spin when waiting for + results from the GPU. This can decrease latency when waiting for the GPU, + but may lower the performance of CPU threads if they are performing work in + parallel with the CUDA thread. + + - ::CU_CTX_SCHED_YIELD: Instruct CUDA to yield its thread when waiting for + results from the GPU. This can increase latency when waiting for the GPU, + but can increase the performance of CPU threads performing work in parallel + with the GPU. + + - ::CU_CTX_SCHED_BLOCKING_SYNC: Instruct CUDA to block the CPU thread on a + synchronization primitive when waiting for the GPU to finish work. + + - ::CU_CTX_BLOCKING_SYNC: Instruct CUDA to block the CPU thread on a + synchronization primitive when waiting for the GPU to finish work. <br> + <b>Deprecated:</b> This flag was deprecated as of CUDA 4.0 and was + replaced with ::CU_CTX_SCHED_BLOCKING_SYNC. + + - ::CU_CTX_SCHED_AUTO: The default value if the \p flags parameter is zero, + uses a heuristic based on the number of active CUDA contexts in the + process \e C and the number of logical processors in the system \e P. If + \e C > \e P, then CUDA will yield to other OS threads when waiting for + the GPU (::CU_CTX_SCHED_YIELD), otherwise CUDA will not yield while + waiting for results and actively spin on the processor (::CU_CTX_SCHED_SPIN). + Additionally, on Tegra devices, ::CU_CTX_SCHED_AUTO uses a heuristic based on + the power profile of the platform and may choose ::CU_CTX_SCHED_BLOCKING_SYNC + for low-powered devices. + + - ::CU_CTX_LMEM_RESIZE_TO_MAX: Instruct CUDA to not reduce local memory + after resizing local memory for a kernel. This can prevent thrashing by + local memory allocations when launching many kernels with high local + memory usage at the cost of potentially increased memory usage. <br> + <b>Deprecated:</b> This flag is deprecated and the behavior enabled + by this flag is now the default and cannot be disabled. + + - ::CU_CTX_COREDUMP_ENABLE: If GPU coredumps have not been enabled globally + with ::cuCoredumpSetAttributeGlobal or environment variables, this flag can + be set during context creation to instruct CUDA to create a coredump if + this context raises an exception during execution. These environment variables + are described in the CUDA-GDB user guide under the "GPU core dump support" + section. + The initial settings will be taken from the global settings at the time of + context creation. The other settings that control coredump output can be + modified by calling ::cuCoredumpSetAttribute from the created context after + it becomes current. + + - ::CU_CTX_USER_COREDUMP_ENABLE: If user-triggered GPU coredumps have not + been enabled globally with ::cuCoredumpSetAttributeGlobal or environment + variables, this flag can be set during context creation to instruct CUDA to + create a coredump if data is written to a certain pipe that is present in the + OS space. These environment variables are described in the CUDA-GDB user + guide under the "GPU core dump support" section. + It is important to note that the pipe name *must* be set with + ::cuCoredumpSetAttributeGlobal before creating the context if this flag is + used. Setting this flag implies that ::CU_CTX_COREDUMP_ENABLE is set. + The initial settings will be taken from the global settings at the time of + context creation. The other settings that control coredump output can be + modified by calling ::cuCoredumpSetAttribute from the created context after + it becomes current. + + - ::CU_CTX_SYNC_MEMOPS: Ensures that synchronous memory operations initiated + on this context will always synchronize. See further documentation in the + section titled "API Synchronization behavior" to learn more about cases when + synchronous memory operations can exhibit asynchronous behavior. + + \param dev - Device for which the primary context flags are set + \param flags - New flags for the device + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_DEVICE, + ::CUDA_ERROR_INVALID_VALUE, + \notefnerr + + \sa ::cuDevicePrimaryCtxRetain, + ::cuDevicePrimaryCtxGetState, + ::cuCtxCreate, + ::cuCtxGetFlags, + ::cuCtxSetFlags, + ::cudaSetDeviceFlags*/ + fn cuDevicePrimaryCtxSetFlags_v2( + dev: cuda_types::CUdevice, + flags: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + /** \brief Get the state of the primary context + + Returns in \p *flags the flags for the primary context of \p dev, and in + \p *active whether it is active. See ::cuDevicePrimaryCtxSetFlags for flag + values. + + \param dev - Device to get primary context flags for + \param flags - Pointer to store flags + \param active - Pointer to store context state; 0 = inactive, 1 = active + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_DEVICE, + ::CUDA_ERROR_INVALID_VALUE, + \notefnerr + + \sa + ::cuDevicePrimaryCtxSetFlags, + ::cuCtxGetFlags, + ::cuCtxSetFlags, + ::cudaGetDeviceFlags*/ + fn cuDevicePrimaryCtxGetState( + dev: cuda_types::CUdevice, + flags: *mut ::core::ffi::c_uint, + active: *mut ::core::ffi::c_int, + ) -> cuda_types::CUresult; + /** \brief Destroy all allocations and reset all state on the primary context + + Explicitly destroys and cleans up all resources associated with the current + device in the current process. + + Note that it is responsibility of the calling function to ensure that no + other module in the process is using the device any more. For that reason + it is recommended to use ::cuDevicePrimaryCtxRelease() in most cases. + However it is safe for other modules to call ::cuDevicePrimaryCtxRelease() + even after resetting the device. + Resetting the primary context does not release it, an application that has + retained the primary context should explicitly release its usage. + + \param dev - Device for which primary context is destroyed + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_DEVICE, + ::CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE + \notefnerr + + \sa ::cuDevicePrimaryCtxRetain, + ::cuDevicePrimaryCtxRelease, + ::cuCtxGetApiVersion, + ::cuCtxGetCacheConfig, + ::cuCtxGetDevice, + ::cuCtxGetFlags, + ::cuCtxGetLimit, + ::cuCtxPopCurrent, + ::cuCtxPushCurrent, + ::cuCtxSetCacheConfig, + ::cuCtxSetLimit, + ::cuCtxSynchronize, + ::cudaDeviceReset*/ + fn cuDevicePrimaryCtxReset_v2(dev: cuda_types::CUdevice) -> cuda_types::CUresult; + /** \brief Create a CUDA context + + \note In most cases it is recommended to use ::cuDevicePrimaryCtxRetain. + + Creates a new CUDA context and associates it with the calling thread. The + \p flags parameter is described below. The context is created with a usage + count of 1 and the caller of ::cuCtxCreate() must call ::cuCtxDestroy() + when done using the context. If a context is already current to the thread, + it is supplanted by the newly created context and may be restored by a subsequent + call to ::cuCtxPopCurrent(). + + The three LSBs of the \p flags parameter can be used to control how the OS + thread, which owns the CUDA context at the time of an API call, interacts + with the OS scheduler when waiting for results from the GPU. Only one of + the scheduling flags can be set when creating a context. + + - ::CU_CTX_SCHED_SPIN: Instruct CUDA to actively spin when waiting for + results from the GPU. This can decrease latency when waiting for the GPU, + but may lower the performance of CPU threads if they are performing work in + parallel with the CUDA thread. + + - ::CU_CTX_SCHED_YIELD: Instruct CUDA to yield its thread when waiting for + results from the GPU. This can increase latency when waiting for the GPU, + but can increase the performance of CPU threads performing work in parallel + with the GPU. + + - ::CU_CTX_SCHED_BLOCKING_SYNC: Instruct CUDA to block the CPU thread on a + synchronization primitive when waiting for the GPU to finish work. + + - ::CU_CTX_BLOCKING_SYNC: Instruct CUDA to block the CPU thread on a + synchronization primitive when waiting for the GPU to finish work. <br> + <b>Deprecated:</b> This flag was deprecated as of CUDA 4.0 and was + replaced with ::CU_CTX_SCHED_BLOCKING_SYNC. + + - ::CU_CTX_SCHED_AUTO: The default value if the \p flags parameter is zero, + uses a heuristic based on the number of active CUDA contexts in the + process \e C and the number of logical processors in the system \e P. If + \e C > \e P, then CUDA will yield to other OS threads when waiting for + the GPU (::CU_CTX_SCHED_YIELD), otherwise CUDA will not yield while + waiting for results and actively spin on the processor (::CU_CTX_SCHED_SPIN). + Additionally, on Tegra devices, ::CU_CTX_SCHED_AUTO uses a heuristic based on + the power profile of the platform and may choose ::CU_CTX_SCHED_BLOCKING_SYNC + for low-powered devices. + + - ::CU_CTX_MAP_HOST: Instruct CUDA to support mapped pinned allocations. + This flag must be set in order to allocate pinned host memory that is + accessible to the GPU. + + - ::CU_CTX_LMEM_RESIZE_TO_MAX: Instruct CUDA to not reduce local memory + after resizing local memory for a kernel. This can prevent thrashing by + local memory allocations when launching many kernels with high local + memory usage at the cost of potentially increased memory usage. <br> + <b>Deprecated:</b> This flag is deprecated and the behavior enabled + by this flag is now the default and cannot be disabled. + Instead, the per-thread stack size can be controlled with ::cuCtxSetLimit(). + + - ::CU_CTX_COREDUMP_ENABLE: If GPU coredumps have not been enabled globally + with ::cuCoredumpSetAttributeGlobal or environment variables, this flag can + be set during context creation to instruct CUDA to create a coredump if + this context raises an exception during execution. These environment variables + are described in the CUDA-GDB user guide under the "GPU core dump support" + section. + The initial attributes will be taken from the global attributes at the time of + context creation. The other attributes that control coredump output can be + modified by calling ::cuCoredumpSetAttribute from the created context after + it becomes current. + + - ::CU_CTX_USER_COREDUMP_ENABLE: If user-triggered GPU coredumps have not + been enabled globally with ::cuCoredumpSetAttributeGlobal or environment + variables, this flag can be set during context creation to instruct CUDA to + create a coredump if data is written to a certain pipe that is present in the + OS space. These environment variables are described in the CUDA-GDB user + guide under the "GPU core dump support" section. + It is important to note that the pipe name *must* be set with + ::cuCoredumpSetAttributeGlobal before creating the context if this flag is + used. Setting this flag implies that ::CU_CTX_COREDUMP_ENABLE is set. + The initial attributes will be taken from the global attributes at the time of + context creation. The other attributes that control coredump output can be + modified by calling ::cuCoredumpSetAttribute from the created context after + it becomes current. + Setting this flag on any context creation is equivalent to setting the + ::CU_COREDUMP_ENABLE_USER_TRIGGER attribute to \p true globally. + + - ::CU_CTX_SYNC_MEMOPS: Ensures that synchronous memory operations initiated + on this context will always synchronize. See further documentation in the + section titled "API Synchronization behavior" to learn more about cases when + synchronous memory operations can exhibit asynchronous behavior. + + Context creation will fail with ::CUDA_ERROR_UNKNOWN if the compute mode of + the device is ::CU_COMPUTEMODE_PROHIBITED. The function ::cuDeviceGetAttribute() + can be used with ::CU_DEVICE_ATTRIBUTE_COMPUTE_MODE to determine the + compute mode of the device. The <i>nvidia-smi</i> tool can be used to set + the compute mode for * devices. + Documentation for <i>nvidia-smi</i> can be obtained by passing a + -h option to it. + + \param pctx - Returned context handle of the new context + \param flags - Context creation flags + \param dev - Device to create context on + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_DEVICE, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_OUT_OF_MEMORY, + ::CUDA_ERROR_UNKNOWN + \notefnerr + + \sa ::cuCtxDestroy, + ::cuCtxGetApiVersion, + ::cuCtxGetCacheConfig, + ::cuCtxGetDevice, + ::cuCtxGetFlags, + ::cuCtxGetLimit, + ::cuCtxPopCurrent, + ::cuCtxPushCurrent, + ::cuCtxSetCacheConfig, + ::cuCtxSetLimit, + ::cuCoredumpSetAttributeGlobal, + ::cuCoredumpSetAttribute, + ::cuCtxSynchronize*/ + fn cuCtxCreate_v2( + pctx: *mut cuda_types::CUcontext, + flags: ::core::ffi::c_uint, + dev: cuda_types::CUdevice, + ) -> cuda_types::CUresult; + /** \brief Create a CUDA context with execution affinity + + Creates a new CUDA context with execution affinity and associates it with + the calling thread. The \p paramsArray and \p flags parameter are described below. + The context is created with a usage count of 1 and the caller of ::cuCtxCreate() must + call ::cuCtxDestroy() when done using the context. If a context is already + current to the thread, it is supplanted by the newly created context and may + be restored by a subsequent call to ::cuCtxPopCurrent(). + + The type and the amount of execution resource the context can use is limited by \p paramsArray + and \p numParams. The \p paramsArray is an array of \p CUexecAffinityParam and the \p numParams + describes the size of the array. If two \p CUexecAffinityParam in the array have the same type, + the latter execution affinity parameter overrides the former execution affinity parameter. + The supported execution affinity types are: + - ::CU_EXEC_AFFINITY_TYPE_SM_COUNT limits the portion of SMs that the context can use. The portion + of SMs is specified as the number of SMs via \p CUexecAffinitySmCount. This limit will be internally + rounded up to the next hardware-supported amount. Hence, it is imperative to query the actual execution + affinity of the context via \p cuCtxGetExecAffinity after context creation. Currently, this attribute + is only supported under Volta+ MPS. + + The three LSBs of the \p flags parameter can be used to control how the OS + thread, which owns the CUDA context at the time of an API call, interacts + with the OS scheduler when waiting for results from the GPU. Only one of + the scheduling flags can be set when creating a context. + + - ::CU_CTX_SCHED_SPIN: Instruct CUDA to actively spin when waiting for + results from the GPU. This can decrease latency when waiting for the GPU, + but may lower the performance of CPU threads if they are performing work in + parallel with the CUDA thread. + + - ::CU_CTX_SCHED_YIELD: Instruct CUDA to yield its thread when waiting for + results from the GPU. This can increase latency when waiting for the GPU, + but can increase the performance of CPU threads performing work in parallel + with the GPU. + + - ::CU_CTX_SCHED_BLOCKING_SYNC: Instruct CUDA to block the CPU thread on a + synchronization primitive when waiting for the GPU to finish work. + + - ::CU_CTX_BLOCKING_SYNC: Instruct CUDA to block the CPU thread on a + synchronization primitive when waiting for the GPU to finish work. <br> + <b>Deprecated:</b> This flag was deprecated as of CUDA 4.0 and was + replaced with ::CU_CTX_SCHED_BLOCKING_SYNC. + + - ::CU_CTX_SCHED_AUTO: The default value if the \p flags parameter is zero, + uses a heuristic based on the number of active CUDA contexts in the + process \e C and the number of logical processors in the system \e P. If + \e C > \e P, then CUDA will yield to other OS threads when waiting for + the GPU (::CU_CTX_SCHED_YIELD), otherwise CUDA will not yield while + waiting for results and actively spin on the processor (::CU_CTX_SCHED_SPIN). + Additionally, on Tegra devices, ::CU_CTX_SCHED_AUTO uses a heuristic based on + the power profile of the platform and may choose ::CU_CTX_SCHED_BLOCKING_SYNC + for low-powered devices. + + - ::CU_CTX_MAP_HOST: Instruct CUDA to support mapped pinned allocations. + This flag must be set in order to allocate pinned host memory that is + accessible to the GPU. + + - ::CU_CTX_LMEM_RESIZE_TO_MAX: Instruct CUDA to not reduce local memory + after resizing local memory for a kernel. This can prevent thrashing by + local memory allocations when launching many kernels with high local + memory usage at the cost of potentially increased memory usage. <br> + <b>Deprecated:</b> This flag is deprecated and the behavior enabled + by this flag is now the default and cannot be disabled. + Instead, the per-thread stack size can be controlled with ::cuCtxSetLimit(). + + - ::CU_CTX_COREDUMP_ENABLE: If GPU coredumps have not been enabled globally + with ::cuCoredumpSetAttributeGlobal or environment variables, this flag can + be set during context creation to instruct CUDA to create a coredump if + this context raises an exception during execution. These environment variables + are described in the CUDA-GDB user guide under the "GPU core dump support" + section. + The initial attributes will be taken from the global attributes at the time of + context creation. The other attributes that control coredump output can be + modified by calling ::cuCoredumpSetAttribute from the created context after + it becomes current. + + - ::CU_CTX_USER_COREDUMP_ENABLE: If user-triggered GPU coredumps have not + been enabled globally with ::cuCoredumpSetAttributeGlobal or environment + variables, this flag can be set during context creation to instruct CUDA to + create a coredump if data is written to a certain pipe that is present in the + OS space. These environment variables are described in the CUDA-GDB user + guide under the "GPU core dump support" section. + It is important to note that the pipe name *must* be set with + ::cuCoredumpSetAttributeGlobal before creating the context if this flag is + used. Setting this flag implies that ::CU_CTX_COREDUMP_ENABLE is set. + The initial attributes will be taken from the global attributes at the time of + context creation. The other attributes that control coredump output can be + modified by calling ::cuCoredumpSetAttribute from the created context after + it becomes current. + Setting this flag on any context creation is equivalent to setting the + ::CU_COREDUMP_ENABLE_USER_TRIGGER attribute to \p true globally. + + Context creation will fail with ::CUDA_ERROR_UNKNOWN if the compute mode of + the device is ::CU_COMPUTEMODE_PROHIBITED. The function ::cuDeviceGetAttribute() + can be used with ::CU_DEVICE_ATTRIBUTE_COMPUTE_MODE to determine the + compute mode of the device. The <i>nvidia-smi</i> tool can be used to set + the compute mode for * devices. + Documentation for <i>nvidia-smi</i> can be obtained by passing a + -h option to it. + + \param pctx - Returned context handle of the new context + \param paramsArray - Execution affinity parameters + \param numParams - Number of execution affinity parameters + \param flags - Context creation flags + \param dev - Device to create context on + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_DEVICE, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_OUT_OF_MEMORY, + ::CUDA_ERROR_UNSUPPORTED_EXEC_AFFINITY, + ::CUDA_ERROR_UNKNOWN + \notefnerr + + \sa ::cuCtxDestroy, + ::cuCtxGetApiVersion, + ::cuCtxGetCacheConfig, + ::cuCtxGetDevice, + ::cuCtxGetFlags, + ::cuCtxGetLimit, + ::cuCtxPopCurrent, + ::cuCtxPushCurrent, + ::cuCtxSetCacheConfig, + ::cuCtxSetLimit, + ::cuCtxSynchronize, + ::cuCoredumpSetAttributeGlobal, + ::cuCoredumpSetAttribute, + ::CUexecAffinityParam*/ + fn cuCtxCreate_v3( + pctx: *mut cuda_types::CUcontext, + paramsArray: *mut cuda_types::CUexecAffinityParam, + numParams: ::core::ffi::c_int, + flags: ::core::ffi::c_uint, + dev: cuda_types::CUdevice, + ) -> cuda_types::CUresult; + /** \brief Destroy a CUDA context + + Destroys the CUDA context specified by \p ctx. The context \p ctx will be + destroyed regardless of how many threads it is current to. + It is the responsibility of the calling function to ensure that no API + call issues using \p ctx while ::cuCtxDestroy() is executing. + + Destroys and cleans up all resources associated with the context. + It is the caller's responsibility to ensure that the context or its resources + are not accessed or passed in subsequent API calls and doing so will result in undefined behavior. + These resources include CUDA types such as ::CUmodule, ::CUfunction, ::CUstream, ::CUevent, + ::CUarray, ::CUmipmappedArray, ::CUtexObject, ::CUsurfObject, ::CUtexref, ::CUsurfref, + ::CUgraphicsResource, ::CUlinkState, ::CUexternalMemory and ::CUexternalSemaphore. + + If \p ctx is current to the calling thread then \p ctx will also be + popped from the current thread's context stack (as though ::cuCtxPopCurrent() + were called). If \p ctx is current to other threads, then \p ctx will + remain current to those threads, and attempting to access \p ctx from + those threads will result in the error ::CUDA_ERROR_CONTEXT_IS_DESTROYED. + + \param ctx - Context to destroy + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE + \notefnerr + + \sa ::cuCtxCreate, + ::cuCtxGetApiVersion, + ::cuCtxGetCacheConfig, + ::cuCtxGetDevice, + ::cuCtxGetFlags, + ::cuCtxGetLimit, + ::cuCtxPopCurrent, + ::cuCtxPushCurrent, + ::cuCtxSetCacheConfig, + ::cuCtxSetLimit, + ::cuCtxSynchronize*/ + fn cuCtxDestroy_v2(ctx: cuda_types::CUcontext) -> cuda_types::CUresult; + /** \brief Pushes a context on the current CPU thread + + Pushes the given context \p ctx onto the CPU thread's stack of current + contexts. The specified context becomes the CPU thread's current context, so + all CUDA functions that operate on the current context are affected. + + The previous current context may be made current again by calling + ::cuCtxDestroy() or ::cuCtxPopCurrent(). + + \param ctx - Context to push + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE + \notefnerr + + \sa ::cuCtxCreate, + ::cuCtxDestroy, + ::cuCtxGetApiVersion, + ::cuCtxGetCacheConfig, + ::cuCtxGetDevice, + ::cuCtxGetFlags, + ::cuCtxGetLimit, + ::cuCtxPopCurrent, + ::cuCtxSetCacheConfig, + ::cuCtxSetLimit, + ::cuCtxSynchronize*/ + fn cuCtxPushCurrent_v2(ctx: cuda_types::CUcontext) -> cuda_types::CUresult; + /** \brief Pops the current CUDA context from the current CPU thread. + + Pops the current CUDA context from the CPU thread and passes back the + old context handle in \p *pctx. That context may then be made current + to a different CPU thread by calling ::cuCtxPushCurrent(). + + If a context was current to the CPU thread before ::cuCtxCreate() or + ::cuCtxPushCurrent() was called, this function makes that context current to + the CPU thread again. + + \param pctx - Returned popped context handle + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT + \notefnerr + + \sa ::cuCtxCreate, + ::cuCtxDestroy, + ::cuCtxGetApiVersion, + ::cuCtxGetCacheConfig, + ::cuCtxGetDevice, + ::cuCtxGetFlags, + ::cuCtxGetLimit, + ::cuCtxPushCurrent, + ::cuCtxSetCacheConfig, + ::cuCtxSetLimit, + ::cuCtxSynchronize*/ + fn cuCtxPopCurrent_v2(pctx: *mut cuda_types::CUcontext) -> cuda_types::CUresult; + /** \brief Binds the specified CUDA context to the calling CPU thread + + Binds the specified CUDA context to the calling CPU thread. + If \p ctx is NULL then the CUDA context previously bound to the + calling CPU thread is unbound and ::CUDA_SUCCESS is returned. + + If there exists a CUDA context stack on the calling CPU thread, this + will replace the top of that stack with \p ctx. + If \p ctx is NULL then this will be equivalent to popping the top + of the calling CPU thread's CUDA context stack (or a no-op if the + calling CPU thread's CUDA context stack is empty). + + \param ctx - Context to bind to the calling CPU thread + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT + \notefnerr + + \sa + ::cuCtxGetCurrent, + ::cuCtxCreate, + ::cuCtxDestroy, + ::cudaSetDevice*/ + fn cuCtxSetCurrent(ctx: cuda_types::CUcontext) -> cuda_types::CUresult; + /** \brief Returns the CUDA context bound to the calling CPU thread. + + Returns in \p *pctx the CUDA context bound to the calling CPU thread. + If no context is bound to the calling CPU thread then \p *pctx is + set to NULL and ::CUDA_SUCCESS is returned. + + \param pctx - Returned context handle + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + \notefnerr + + \sa + ::cuCtxSetCurrent, + ::cuCtxCreate, + ::cuCtxDestroy, + ::cudaGetDevice*/ + fn cuCtxGetCurrent(pctx: *mut cuda_types::CUcontext) -> cuda_types::CUresult; + /** \brief Returns the device ID for the current context + + Returns in \p *device the ordinal of the current context's device. + + \param device - Returned device ID for the current context + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE, + \notefnerr + + \sa ::cuCtxCreate, + ::cuCtxDestroy, + ::cuCtxGetApiVersion, + ::cuCtxGetCacheConfig, + ::cuCtxGetFlags, + ::cuCtxGetLimit, + ::cuCtxPopCurrent, + ::cuCtxPushCurrent, + ::cuCtxSetCacheConfig, + ::cuCtxSetLimit, + ::cuCtxSynchronize, + ::cudaGetDevice*/ + fn cuCtxGetDevice(device: *mut cuda_types::CUdevice) -> cuda_types::CUresult; + /** \brief Returns the flags for the current context + + Returns in \p *flags the flags of the current context. See ::cuCtxCreate + for flag values. + + \param flags - Pointer to store flags of current context + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE, + \notefnerr + + \sa ::cuCtxCreate, + ::cuCtxGetApiVersion, + ::cuCtxGetCacheConfig, + ::cuCtxGetCurrent, + ::cuCtxGetDevice, + ::cuCtxGetLimit, + ::cuCtxGetSharedMemConfig, + ::cuCtxGetStreamPriorityRange, + ::cuCtxSetFlags, + ::cudaGetDeviceFlags*/ + fn cuCtxGetFlags(flags: *mut ::core::ffi::c_uint) -> cuda_types::CUresult; + /** \brief Sets the flags for the current context + + Sets the flags for the current context overwriting previously set ones. See + ::cuDevicePrimaryCtxSetFlags for flag values. + + \param flags - Flags to set on the current context + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE, + \notefnerr + + \sa ::cuCtxCreate, + ::cuCtxGetApiVersion, + ::cuCtxGetCacheConfig, + ::cuCtxGetCurrent, + ::cuCtxGetDevice, + ::cuCtxGetLimit, + ::cuCtxGetSharedMemConfig, + ::cuCtxGetStreamPriorityRange, + ::cuCtxGetFlags, + ::cudaGetDeviceFlags, + ::cuDevicePrimaryCtxSetFlags,*/ + fn cuCtxSetFlags(flags: ::core::ffi::c_uint) -> cuda_types::CUresult; + /** \brief Returns the unique Id associated with the context supplied + + Returns in \p ctxId the unique Id which is associated with a given context. + The Id is unique for the life of the program for this instance of CUDA. + If context is supplied as NULL and there is one current, the Id of the + current context is returned. + + \param ctx - Context for which to obtain the Id + \param ctxId - Pointer to store the Id of the context + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_CONTEXT_IS_DESTROYED, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE + \notefnerr + + \sa ::cuCtxCreate, + ::cuCtxDestroy, + ::cuCtxGetApiVersion, + ::cuCtxGetCacheConfig, + ::cuCtxGetDevice, + ::cuCtxGetFlags, + ::cuCtxGetLimit, + ::cuCtxPushCurrent*/ + fn cuCtxGetId( + ctx: cuda_types::CUcontext, + ctxId: *mut ::core::ffi::c_ulonglong, + ) -> cuda_types::CUresult; + /** \brief Block for a context's tasks to complete + + Blocks until the device has completed all preceding requested tasks. + ::cuCtxSynchronize() returns an error if one of the preceding tasks failed. + If the context was created with the ::CU_CTX_SCHED_BLOCKING_SYNC flag, the + CPU thread will block until the GPU context has finished its work. + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT + \notefnerr + + \sa ::cuCtxCreate, + ::cuCtxDestroy, + ::cuCtxGetApiVersion, + ::cuCtxGetCacheConfig, + ::cuCtxGetDevice, + ::cuCtxGetFlags, + ::cuCtxGetLimit, + ::cuCtxPopCurrent, + ::cuCtxPushCurrent, + ::cuCtxSetCacheConfig, + ::cuCtxSetLimit, + ::cudaDeviceSynchronize*/ + fn cuCtxSynchronize() -> cuda_types::CUresult; + /** \brief Set resource limits + + Setting \p limit to \p value is a request by the application to update + the current limit maintained by the context. The driver is free to + modify the requested value to meet h/w requirements (this could be + clamping to minimum or maximum values, rounding up to nearest element + size, etc). The application can use ::cuCtxGetLimit() to find out exactly + what the limit has been set to. + + Setting each ::CUlimit has its own specific restrictions, so each is + discussed here. + + - ::CU_LIMIT_STACK_SIZE controls the stack size in bytes of each GPU thread. + The driver automatically increases the per-thread stack size + for each kernel launch as needed. This size isn't reset back to the + original value after each launch. Setting this value will take effect + immediately, and if necessary, the device will block until all preceding + requested tasks are complete. + + - ::CU_LIMIT_PRINTF_FIFO_SIZE controls the size in bytes of the FIFO used + by the ::printf() device system call. Setting ::CU_LIMIT_PRINTF_FIFO_SIZE + must be performed before launching any kernel that uses the ::printf() + device system call, otherwise ::CUDA_ERROR_INVALID_VALUE will be returned. + + - ::CU_LIMIT_MALLOC_HEAP_SIZE controls the size in bytes of the heap used + by the ::malloc() and ::free() device system calls. Setting + ::CU_LIMIT_MALLOC_HEAP_SIZE must be performed before launching any kernel + that uses the ::malloc() or ::free() device system calls, otherwise + ::CUDA_ERROR_INVALID_VALUE will be returned. + + - ::CU_LIMIT_DEV_RUNTIME_SYNC_DEPTH controls the maximum nesting depth of + a grid at which a thread can safely call ::cudaDeviceSynchronize(). Setting + this limit must be performed before any launch of a kernel that uses the + device runtime and calls ::cudaDeviceSynchronize() above the default sync + depth, two levels of grids. Calls to ::cudaDeviceSynchronize() will fail + with error code ::cudaErrorSyncDepthExceeded if the limitation is + violated. This limit can be set smaller than the default or up the maximum + launch depth of 24. When setting this limit, keep in mind that additional + levels of sync depth require the driver to reserve large amounts of device + memory which can no longer be used for user allocations. If these + reservations of device memory fail, ::cuCtxSetLimit() will return + ::CUDA_ERROR_OUT_OF_MEMORY, and the limit can be reset to a lower value. + This limit is only applicable to devices of compute capability < 9.0. + Attempting to set this limit on devices of other compute capability + versions will result in the error ::CUDA_ERROR_UNSUPPORTED_LIMIT being + returned. + + - ::CU_LIMIT_DEV_RUNTIME_PENDING_LAUNCH_COUNT controls the maximum number of + outstanding device runtime launches that can be made from the current + context. A grid is outstanding from the point of launch up until the grid + is known to have been completed. Device runtime launches which violate + this limitation fail and return ::cudaErrorLaunchPendingCountExceeded when + ::cudaGetLastError() is called after launch. If more pending launches than + the default (2048 launches) are needed for a module using the device + runtime, this limit can be increased. Keep in mind that being able to + sustain additional pending launches will require the driver to reserve + larger amounts of device memory upfront which can no longer be used for + allocations. If these reservations fail, ::cuCtxSetLimit() will return + ::CUDA_ERROR_OUT_OF_MEMORY, and the limit can be reset to a lower value. + This limit is only applicable to devices of compute capability 3.5 and + higher. Attempting to set this limit on devices of compute capability less + than 3.5 will result in the error ::CUDA_ERROR_UNSUPPORTED_LIMIT being + returned. + + - ::CU_LIMIT_MAX_L2_FETCH_GRANULARITY controls the L2 cache fetch granularity. + Values can range from 0B to 128B. This is purely a performance hint and + it can be ignored or clamped depending on the platform. + + - ::CU_LIMIT_PERSISTING_L2_CACHE_SIZE controls size in bytes available for + persisting L2 cache. This is purely a performance hint and it can be + ignored or clamped depending on the platform. + + \param limit - Limit to set + \param value - Size of limit + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_UNSUPPORTED_LIMIT, + ::CUDA_ERROR_OUT_OF_MEMORY, + ::CUDA_ERROR_INVALID_CONTEXT + \notefnerr + + \sa ::cuCtxCreate, + ::cuCtxDestroy, + ::cuCtxGetApiVersion, + ::cuCtxGetCacheConfig, + ::cuCtxGetDevice, + ::cuCtxGetFlags, + ::cuCtxGetLimit, + ::cuCtxPopCurrent, + ::cuCtxPushCurrent, + ::cuCtxSetCacheConfig, + ::cuCtxSynchronize, + ::cudaDeviceSetLimit*/ + fn cuCtxSetLimit(limit: cuda_types::CUlimit, value: usize) -> cuda_types::CUresult; + /** \brief Returns resource limits + + Returns in \p *pvalue the current size of \p limit. The supported + ::CUlimit values are: + - ::CU_LIMIT_STACK_SIZE: stack size in bytes of each GPU thread. + - ::CU_LIMIT_PRINTF_FIFO_SIZE: size in bytes of the FIFO used by the + ::printf() device system call. + - ::CU_LIMIT_MALLOC_HEAP_SIZE: size in bytes of the heap used by the + ::malloc() and ::free() device system calls. + - ::CU_LIMIT_DEV_RUNTIME_SYNC_DEPTH: maximum grid depth at which a thread + can issue the device runtime call ::cudaDeviceSynchronize() to wait on + child grid launches to complete. + - ::CU_LIMIT_DEV_RUNTIME_PENDING_LAUNCH_COUNT: maximum number of outstanding + device runtime launches that can be made from this context. + - ::CU_LIMIT_MAX_L2_FETCH_GRANULARITY: L2 cache fetch granularity. + - ::CU_LIMIT_PERSISTING_L2_CACHE_SIZE: Persisting L2 cache size in bytes + + \param limit - Limit to query + \param pvalue - Returned size of limit + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_UNSUPPORTED_LIMIT + \notefnerr + + \sa ::cuCtxCreate, + ::cuCtxDestroy, + ::cuCtxGetApiVersion, + ::cuCtxGetCacheConfig, + ::cuCtxGetDevice, + ::cuCtxGetFlags, + ::cuCtxPopCurrent, + ::cuCtxPushCurrent, + ::cuCtxSetCacheConfig, + ::cuCtxSetLimit, + ::cuCtxSynchronize, + ::cudaDeviceGetLimit*/ + fn cuCtxGetLimit( + pvalue: *mut usize, + limit: cuda_types::CUlimit, + ) -> cuda_types::CUresult; + /** \brief Returns the preferred cache configuration for the current context. + + On devices where the L1 cache and shared memory use the same hardware + resources, this function returns through \p pconfig the preferred cache configuration + for the current context. This is only a preference. The driver will use + the requested configuration if possible, but it is free to choose a different + configuration if required to execute functions. + + This will return a \p pconfig of ::CU_FUNC_CACHE_PREFER_NONE on devices + where the size of the L1 cache and shared memory are fixed. + + The supported cache configurations are: + - ::CU_FUNC_CACHE_PREFER_NONE: no preference for shared memory or L1 (default) + - ::CU_FUNC_CACHE_PREFER_SHARED: prefer larger shared memory and smaller L1 cache + - ::CU_FUNC_CACHE_PREFER_L1: prefer larger L1 cache and smaller shared memory + - ::CU_FUNC_CACHE_PREFER_EQUAL: prefer equal sized L1 cache and shared memory + + \param pconfig - Returned cache configuration + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE + \notefnerr + + \sa ::cuCtxCreate, + ::cuCtxDestroy, + ::cuCtxGetApiVersion, + ::cuCtxGetDevice, + ::cuCtxGetFlags, + ::cuCtxGetLimit, + ::cuCtxPopCurrent, + ::cuCtxPushCurrent, + ::cuCtxSetCacheConfig, + ::cuCtxSetLimit, + ::cuCtxSynchronize, + ::cuFuncSetCacheConfig, + ::cudaDeviceGetCacheConfig*/ + fn cuCtxGetCacheConfig( + pconfig: *mut cuda_types::CUfunc_cache, + ) -> cuda_types::CUresult; + /** \brief Sets the preferred cache configuration for the current context. + + On devices where the L1 cache and shared memory use the same hardware + resources, this sets through \p config the preferred cache configuration for + the current context. This is only a preference. The driver will use + the requested configuration if possible, but it is free to choose a different + configuration if required to execute the function. Any function preference + set via ::cuFuncSetCacheConfig() or ::cuKernelSetCacheConfig() will be preferred over this context-wide + setting. Setting the context-wide cache configuration to + ::CU_FUNC_CACHE_PREFER_NONE will cause subsequent kernel launches to prefer + to not change the cache configuration unless required to launch the kernel. + + This setting does nothing on devices where the size of the L1 cache and + shared memory are fixed. + + Launching a kernel with a different preference than the most recent + preference setting may insert a device-side synchronization point. + + The supported cache configurations are: + - ::CU_FUNC_CACHE_PREFER_NONE: no preference for shared memory or L1 (default) + - ::CU_FUNC_CACHE_PREFER_SHARED: prefer larger shared memory and smaller L1 cache + - ::CU_FUNC_CACHE_PREFER_L1: prefer larger L1 cache and smaller shared memory + - ::CU_FUNC_CACHE_PREFER_EQUAL: prefer equal sized L1 cache and shared memory + + \param config - Requested cache configuration + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE + \notefnerr + + \sa ::cuCtxCreate, + ::cuCtxDestroy, + ::cuCtxGetApiVersion, + ::cuCtxGetCacheConfig, + ::cuCtxGetDevice, + ::cuCtxGetFlags, + ::cuCtxGetLimit, + ::cuCtxPopCurrent, + ::cuCtxPushCurrent, + ::cuCtxSetLimit, + ::cuCtxSynchronize, + ::cuFuncSetCacheConfig, + ::cudaDeviceSetCacheConfig, + ::cuKernelSetCacheConfig*/ + fn cuCtxSetCacheConfig(config: cuda_types::CUfunc_cache) -> cuda_types::CUresult; + /** \brief Gets the context's API version. + + Returns a version number in \p version corresponding to the capabilities of + the context (e.g. 3010 or 3020), which library developers can use to direct + callers to a specific API version. If \p ctx is NULL, returns the API version + used to create the currently bound context. + + Note that new API versions are only introduced when context capabilities are + changed that break binary compatibility, so the API version and driver version + may be different. For example, it is valid for the API version to be 3020 while + the driver version is 4020. + + \param ctx - Context to check + \param version - Pointer to version + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_UNKNOWN + \notefnerr + + \sa ::cuCtxCreate, + ::cuCtxDestroy, + ::cuCtxGetDevice, + ::cuCtxGetFlags, + ::cuCtxGetLimit, + ::cuCtxPopCurrent, + ::cuCtxPushCurrent, + ::cuCtxSetCacheConfig, + ::cuCtxSetLimit, + ::cuCtxSynchronize*/ + fn cuCtxGetApiVersion( + ctx: cuda_types::CUcontext, + version: *mut ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + /** \brief Returns numerical values that correspond to the least and + greatest stream priorities. + + Returns in \p *leastPriority and \p *greatestPriority the numerical values that correspond + to the least and greatest stream priorities respectively. Stream priorities + follow a convention where lower numbers imply greater priorities. The range of + meaningful stream priorities is given by [\p *greatestPriority, \p *leastPriority]. + If the user attempts to create a stream with a priority value that is + outside the meaningful range as specified by this API, the priority is + automatically clamped down or up to either \p *leastPriority or \p *greatestPriority + respectively. See ::cuStreamCreateWithPriority for details on creating a + priority stream. + A NULL may be passed in for \p *leastPriority or \p *greatestPriority if the value + is not desired. + + This function will return '0' in both \p *leastPriority and \p *greatestPriority if + the current context's device does not support stream priorities + (see ::cuDeviceGetAttribute). + + \param leastPriority - Pointer to an int in which the numerical value for least + stream priority is returned + \param greatestPriority - Pointer to an int in which the numerical value for greatest + stream priority is returned + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE, + \notefnerr + + \sa ::cuStreamCreateWithPriority, + ::cuStreamGetPriority, + ::cuCtxGetDevice, + ::cuCtxGetFlags, + ::cuCtxSetLimit, + ::cuCtxSynchronize, + ::cudaDeviceGetStreamPriorityRange*/ + fn cuCtxGetStreamPriorityRange( + leastPriority: *mut ::core::ffi::c_int, + greatestPriority: *mut ::core::ffi::c_int, + ) -> cuda_types::CUresult; + /** \brief Resets all persisting lines in cache to normal status. + + ::cuCtxResetPersistingL2Cache Resets all persisting lines in cache to normal + status. Takes effect on function return. + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_NOT_SUPPORTED + \notefnerr + + \sa + ::CUaccessPolicyWindow*/ + fn cuCtxResetPersistingL2Cache() -> cuda_types::CUresult; + /** \brief Returns the execution affinity setting for the current context. + + Returns in \p *pExecAffinity the current value of \p type. The supported + ::CUexecAffinityType values are: + - ::CU_EXEC_AFFINITY_TYPE_SM_COUNT: number of SMs the context is limited to use. + + \param type - Execution affinity type to query + \param pExecAffinity - Returned execution affinity + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_UNSUPPORTED_EXEC_AFFINITY + \notefnerr + + \sa + ::CUexecAffinityParam*/ + fn cuCtxGetExecAffinity( + pExecAffinity: *mut cuda_types::CUexecAffinityParam, + type_: cuda_types::CUexecAffinityType, + ) -> cuda_types::CUresult; + /** \brief Increment a context's usage-count + + \deprecated + + Note that this function is deprecated and should not be used. + + Increments the usage count of the context and passes back a context handle + in \p *pctx that must be passed to ::cuCtxDetach() when the application is + done with the context. ::cuCtxAttach() fails if there is no context current + to the thread. + + Currently, the \p flags parameter must be 0. + + \param pctx - Returned context handle of the current context + \param flags - Context attach flags (must be 0) + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE + \notefnerr + + \sa ::cuCtxCreate, + ::cuCtxDestroy, + ::cuCtxDetach, + ::cuCtxGetApiVersion, + ::cuCtxGetCacheConfig, + ::cuCtxGetDevice, + ::cuCtxGetFlags, + ::cuCtxGetLimit, + ::cuCtxPopCurrent, + ::cuCtxPushCurrent, + ::cuCtxSetCacheConfig, + ::cuCtxSetLimit, + ::cuCtxSynchronize*/ + fn cuCtxAttach( + pctx: *mut cuda_types::CUcontext, + flags: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + /** \brief Decrement a context's usage-count + + \deprecated + + Note that this function is deprecated and should not be used. + + Decrements the usage count of the context \p ctx, and destroys the context + if the usage count goes to 0. The context must be a handle that was passed + back by ::cuCtxCreate() or ::cuCtxAttach(), and must be current to the + calling thread. + + \param ctx - Context to destroy + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT + \notefnerr + + \sa ::cuCtxCreate, + ::cuCtxDestroy, + ::cuCtxGetApiVersion, + ::cuCtxGetCacheConfig, + ::cuCtxGetDevice, + ::cuCtxGetFlags, + ::cuCtxGetLimit, + ::cuCtxPopCurrent, + ::cuCtxPushCurrent, + ::cuCtxSetCacheConfig, + ::cuCtxSetLimit, + ::cuCtxSynchronize*/ + fn cuCtxDetach(ctx: cuda_types::CUcontext) -> cuda_types::CUresult; + /** \brief Returns the current shared memory configuration for the current context. + + \deprecated + + This function will return in \p pConfig the current size of shared memory banks + in the current context. On devices with configurable shared memory banks, + ::cuCtxSetSharedMemConfig can be used to change this setting, so that all + subsequent kernel launches will by default use the new bank size. When + ::cuCtxGetSharedMemConfig is called on devices without configurable shared + memory, it will return the fixed bank size of the hardware. + + The returned bank configurations can be either: + - ::CU_SHARED_MEM_CONFIG_FOUR_BYTE_BANK_SIZE: shared memory bank width is + four bytes. + - ::CU_SHARED_MEM_CONFIG_EIGHT_BYTE_BANK_SIZE: shared memory bank width will + eight bytes. + + \param pConfig - returned shared memory configuration + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE + \notefnerr + + \sa ::cuCtxCreate, + ::cuCtxDestroy, + ::cuCtxGetApiVersion, + ::cuCtxGetCacheConfig, + ::cuCtxGetDevice, + ::cuCtxGetFlags, + ::cuCtxGetLimit, + ::cuCtxPopCurrent, + ::cuCtxPushCurrent, + ::cuCtxSetLimit, + ::cuCtxSynchronize, + ::cuCtxGetSharedMemConfig, + ::cuFuncSetCacheConfig, + ::cudaDeviceGetSharedMemConfig*/ + fn cuCtxGetSharedMemConfig( + pConfig: *mut cuda_types::CUsharedconfig, + ) -> cuda_types::CUresult; + /** \brief Sets the shared memory configuration for the current context. + + \deprecated + + On devices with configurable shared memory banks, this function will set + the context's shared memory bank size which is used for subsequent kernel + launches. + + Changed the shared memory configuration between launches may insert a device + side synchronization point between those launches. + + Changing the shared memory bank size will not increase shared memory usage + or affect occupancy of kernels, but may have major effects on performance. + Larger bank sizes will allow for greater potential bandwidth to shared memory, + but will change what kinds of accesses to shared memory will result in bank + conflicts. + + This function will do nothing on devices with fixed shared memory bank size. + + The supported bank configurations are: + - ::CU_SHARED_MEM_CONFIG_DEFAULT_BANK_SIZE: set bank width to the default initial + setting (currently, four bytes). + - ::CU_SHARED_MEM_CONFIG_FOUR_BYTE_BANK_SIZE: set shared memory bank width to + be natively four bytes. + - ::CU_SHARED_MEM_CONFIG_EIGHT_BYTE_BANK_SIZE: set shared memory bank width to + be natively eight bytes. + + \param config - requested shared memory configuration + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE + \notefnerr + + \sa ::cuCtxCreate, + ::cuCtxDestroy, + ::cuCtxGetApiVersion, + ::cuCtxGetCacheConfig, + ::cuCtxGetDevice, + ::cuCtxGetFlags, + ::cuCtxGetLimit, + ::cuCtxPopCurrent, + ::cuCtxPushCurrent, + ::cuCtxSetLimit, + ::cuCtxSynchronize, + ::cuCtxGetSharedMemConfig, + ::cuFuncSetCacheConfig, + ::cudaDeviceSetSharedMemConfig*/ + fn cuCtxSetSharedMemConfig( + config: cuda_types::CUsharedconfig, + ) -> cuda_types::CUresult; + /** \brief Loads a compute module + + Takes a filename \p fname and loads the corresponding module \p module into + the current context. The CUDA driver API does not attempt to lazily + allocate the resources needed by a module; if the memory for functions and + data (constant and global) needed by the module cannot be allocated, + ::cuModuleLoad() fails. The file should be a \e cubin file as output by + \b nvcc, or a \e PTX file either as output by \b nvcc or handwritten, or + a \e fatbin file as output by \b nvcc from toolchain 4.0 or later. + + \param module - Returned module + \param fname - Filename of module to load + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_PTX, + ::CUDA_ERROR_UNSUPPORTED_PTX_VERSION, + ::CUDA_ERROR_NOT_FOUND, + ::CUDA_ERROR_OUT_OF_MEMORY, + ::CUDA_ERROR_FILE_NOT_FOUND, + ::CUDA_ERROR_NO_BINARY_FOR_GPU, + ::CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND, + ::CUDA_ERROR_SHARED_OBJECT_INIT_FAILED, + ::CUDA_ERROR_JIT_COMPILER_NOT_FOUND + \notefnerr + + \sa ::cuModuleGetFunction, + ::cuModuleGetGlobal, + ::cuModuleGetTexRef, + ::cuModuleLoadData, + ::cuModuleLoadDataEx, + ::cuModuleLoadFatBinary, + ::cuModuleUnload*/ + fn cuModuleLoad( + module: *mut cuda_types::CUmodule, + fname: *const ::core::ffi::c_char, + ) -> cuda_types::CUresult; + /** \brief Load a module's data + + Takes a pointer \p image and loads the corresponding module \p module into + the current context. The \p image may be a \e cubin or \e fatbin + as output by \b nvcc, or a NULL-terminated \e PTX, either as output by \b nvcc + or hand-written. + + \param module - Returned module + \param image - Module data to load + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_PTX, + ::CUDA_ERROR_UNSUPPORTED_PTX_VERSION, + ::CUDA_ERROR_OUT_OF_MEMORY, + ::CUDA_ERROR_NO_BINARY_FOR_GPU, + ::CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND, + ::CUDA_ERROR_SHARED_OBJECT_INIT_FAILED, + ::CUDA_ERROR_JIT_COMPILER_NOT_FOUND + \notefnerr + + \sa ::cuModuleGetFunction, + ::cuModuleGetGlobal, + ::cuModuleGetTexRef, + ::cuModuleLoad, + ::cuModuleLoadDataEx, + ::cuModuleLoadFatBinary, + ::cuModuleUnload*/ + fn cuModuleLoadData( + module: *mut cuda_types::CUmodule, + image: *const ::core::ffi::c_void, + ) -> cuda_types::CUresult; + /** \brief Load a module's data with options + + Takes a pointer \p image and loads the corresponding module \p module into + the current context. The \p image may be a \e cubin or \e fatbin + as output by \b nvcc, or a NULL-terminated \e PTX, either as output by \b nvcc + or hand-written. + + \param module - Returned module + \param image - Module data to load + \param numOptions - Number of options + \param options - Options for JIT + \param optionValues - Option values for JIT + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_PTX, + ::CUDA_ERROR_UNSUPPORTED_PTX_VERSION, + ::CUDA_ERROR_OUT_OF_MEMORY, + ::CUDA_ERROR_NO_BINARY_FOR_GPU, + ::CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND, + ::CUDA_ERROR_SHARED_OBJECT_INIT_FAILED, + ::CUDA_ERROR_JIT_COMPILER_NOT_FOUND + \notefnerr + + \sa ::cuModuleGetFunction, + ::cuModuleGetGlobal, + ::cuModuleGetTexRef, + ::cuModuleLoad, + ::cuModuleLoadData, + ::cuModuleLoadFatBinary, + ::cuModuleUnload*/ + fn cuModuleLoadDataEx( + module: *mut cuda_types::CUmodule, + image: *const ::core::ffi::c_void, + numOptions: ::core::ffi::c_uint, + options: *mut cuda_types::CUjit_option, + optionValues: *mut *mut ::core::ffi::c_void, + ) -> cuda_types::CUresult; + /** \brief Load a module's data + + Takes a pointer \p fatCubin and loads the corresponding module \p module + into the current context. The pointer represents a <i>fat binary</i> object, + which is a collection of different \e cubin and/or \e PTX files, all + representing the same device code, but compiled and optimized for different + architectures. + + Prior to CUDA 4.0, there was no documented API for constructing and using + fat binary objects by programmers. Starting with CUDA 4.0, fat binary + objects can be constructed by providing the <i>-fatbin option</i> to \b nvcc. + More information can be found in the \b nvcc document. + + \param module - Returned module + \param fatCubin - Fat binary to load + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_PTX, + ::CUDA_ERROR_UNSUPPORTED_PTX_VERSION, + ::CUDA_ERROR_NOT_FOUND, + ::CUDA_ERROR_OUT_OF_MEMORY, + ::CUDA_ERROR_NO_BINARY_FOR_GPU, + ::CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND, + ::CUDA_ERROR_SHARED_OBJECT_INIT_FAILED, + ::CUDA_ERROR_JIT_COMPILER_NOT_FOUND + \notefnerr + + \sa ::cuModuleGetFunction, + ::cuModuleGetGlobal, + ::cuModuleGetTexRef, + ::cuModuleLoad, + ::cuModuleLoadData, + ::cuModuleLoadDataEx, + ::cuModuleUnload*/ + fn cuModuleLoadFatBinary( + module: *mut cuda_types::CUmodule, + fatCubin: *const ::core::ffi::c_void, + ) -> cuda_types::CUresult; + /** \brief Unloads a module + + Unloads a module \p hmod from the current context. Attempting to unload + a module which was obtained from the Library Management API such as + ::cuLibraryGetModule will return ::CUDA_ERROR_NOT_PERMITTED. + + \param hmod - Module to unload + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_NOT_PERMITTED + \notefnerr + \note_destroy_ub + + \sa ::cuModuleGetFunction, + ::cuModuleGetGlobal, + ::cuModuleGetTexRef, + ::cuModuleLoad, + ::cuModuleLoadData, + ::cuModuleLoadDataEx, + ::cuModuleLoadFatBinary*/ + fn cuModuleUnload(hmod: cuda_types::CUmodule) -> cuda_types::CUresult; + /** \brief Query lazy loading mode + + Returns lazy loading mode + Module loading mode is controlled by CUDA_MODULE_LOADING env variable + + \param mode - Returns the lazy loading mode + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE, + \notefnerr + + \sa + ::cuModuleLoad,*/ + fn cuModuleGetLoadingMode( + mode: *mut cuda_types::CUmoduleLoadingMode, + ) -> cuda_types::CUresult; + /** \brief Returns a function handle + + Returns in \p *hfunc the handle of the function of name \p name located in + module \p hmod. If no function of that name exists, ::cuModuleGetFunction() + returns ::CUDA_ERROR_NOT_FOUND. + + \param hfunc - Returned function handle + \param hmod - Module to retrieve function from + \param name - Name of function to retrieve + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_NOT_FOUND + \notefnerr + + \sa ::cuModuleGetGlobal, + ::cuModuleGetTexRef, + ::cuModuleLoad, + ::cuModuleLoadData, + ::cuModuleLoadDataEx, + ::cuModuleLoadFatBinary, + ::cuModuleUnload*/ + fn cuModuleGetFunction( + hfunc: *mut cuda_types::CUfunction, + hmod: cuda_types::CUmodule, + name: *const ::core::ffi::c_char, + ) -> cuda_types::CUresult; + /** \brief Returns the number of functions within a module + + Returns in \p count the number of functions in \p mod. + + \param count - Number of functions found within the module + \param mod - Module to query + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_HANDLE, + ::CUDA_ERROR_INVALID_VALUE*/ + fn cuModuleGetFunctionCount( + count: *mut ::core::ffi::c_uint, + mod_: cuda_types::CUmodule, + ) -> cuda_types::CUresult; + /** \brief Returns the function handles within a module. + + Returns in \p functions a maximum number of \p numFunctions function handles within \p mod. When + function loading mode is set to LAZY the function retrieved may be partially loaded. The loading + state of a function can be queried using ::cuFunctionIsLoaded. CUDA APIs may load the function + automatically when called with partially loaded function handle which may incur additional + latency. Alternatively, ::cuFunctionLoad can be used to explicitly load a function. The returned + function handles become invalid when the module is unloaded. + + \param functions - Buffer where the function handles are returned to + \param numFunctions - Maximum number of function handles may be returned to the buffer + \param mod - Module to query from + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_HANDLE, + ::CUDA_ERROR_INVALID_VALUE + + \sa ::cuModuleGetFunction, + ::cuModuleGetFunctionCount, + ::cuFuncIsLoaded, + ::cuFuncLoad*/ + fn cuModuleEnumerateFunctions( + functions: *mut cuda_types::CUfunction, + numFunctions: ::core::ffi::c_uint, + mod_: cuda_types::CUmodule, + ) -> cuda_types::CUresult; + /** \brief Returns a global pointer from a module + + Returns in \p *dptr and \p *bytes the base pointer and size of the + global of name \p name located in module \p hmod. If no variable of that name + exists, ::cuModuleGetGlobal() returns ::CUDA_ERROR_NOT_FOUND. + One of the parameters \p dptr or \p bytes (not both) can be NULL in which + case it is ignored. + + \param dptr - Returned global device pointer + \param bytes - Returned global size in bytes + \param hmod - Module to retrieve global from + \param name - Name of global to retrieve + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_NOT_FOUND + \notefnerr + + \sa ::cuModuleGetFunction, + ::cuModuleGetTexRef, + ::cuModuleLoad, + ::cuModuleLoadData, + ::cuModuleLoadDataEx, + ::cuModuleLoadFatBinary, + ::cuModuleUnload, + ::cudaGetSymbolAddress, + ::cudaGetSymbolSize*/ + fn cuModuleGetGlobal_v2( + dptr: *mut cuda_types::CUdeviceptr, + bytes: *mut usize, + hmod: cuda_types::CUmodule, + name: *const ::core::ffi::c_char, + ) -> cuda_types::CUresult; + /** \brief Creates a pending JIT linker invocation. + + If the call is successful, the caller owns the returned CUlinkState, which + should eventually be destroyed with ::cuLinkDestroy. The + device code machine size (32 or 64 bit) will match the calling application. + + Both linker and compiler options may be specified. Compiler options will + be applied to inputs to this linker action which must be compiled from PTX. + The options ::CU_JIT_WALL_TIME, + ::CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES, and ::CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES + will accumulate data until the CUlinkState is destroyed. + + \p optionValues must remain valid for the life of the CUlinkState if output + options are used. No other references to inputs are maintained after this + call returns. + + \note For LTO-IR input, only LTO-IR compiled with toolkits prior to CUDA 12.0 will be accepted + + \param numOptions Size of options arrays + \param options Array of linker and compiler options + \param optionValues Array of option values, each cast to void * + \param stateOut On success, this will contain a CUlinkState to specify + and complete this action + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_OUT_OF_MEMORY, + ::CUDA_ERROR_JIT_COMPILER_NOT_FOUND + \notefnerr + + \sa ::cuLinkAddData, + ::cuLinkAddFile, + ::cuLinkComplete, + ::cuLinkDestroy*/ + fn cuLinkCreate_v2( + numOptions: ::core::ffi::c_uint, + options: *mut cuda_types::CUjit_option, + optionValues: *mut *mut ::core::ffi::c_void, + stateOut: *mut cuda_types::CUlinkState, + ) -> cuda_types::CUresult; + /** \brief Add an input to a pending linker invocation + + Ownership of \p data is retained by the caller. No reference is retained to any + inputs after this call returns. + + This method accepts only compiler options, which are used if the data must + be compiled from PTX, and does not accept any of + ::CU_JIT_WALL_TIME, ::CU_JIT_INFO_LOG_BUFFER, ::CU_JIT_ERROR_LOG_BUFFER, + ::CU_JIT_TARGET_FROM_CUCONTEXT, or ::CU_JIT_TARGET. + + \note For LTO-IR input, only LTO-IR compiled with toolkits prior to CUDA 12.0 will be accepted + + \param state A pending linker action. + \param type The type of the input data. + \param data The input data. PTX must be NULL-terminated. + \param size The length of the input data. + \param name An optional name for this input in log messages. + \param numOptions Size of options. + \param options Options to be applied only for this input (overrides options from ::cuLinkCreate). + \param optionValues Array of option values, each cast to void *. + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_HANDLE, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_IMAGE, + ::CUDA_ERROR_INVALID_PTX, + ::CUDA_ERROR_UNSUPPORTED_PTX_VERSION, + ::CUDA_ERROR_OUT_OF_MEMORY, + ::CUDA_ERROR_NO_BINARY_FOR_GPU + + \sa ::cuLinkCreate, + ::cuLinkAddFile, + ::cuLinkComplete, + ::cuLinkDestroy*/ + fn cuLinkAddData_v2( + state: cuda_types::CUlinkState, + type_: cuda_types::CUjitInputType, + data: *mut ::core::ffi::c_void, + size: usize, + name: *const ::core::ffi::c_char, + numOptions: ::core::ffi::c_uint, + options: *mut cuda_types::CUjit_option, + optionValues: *mut *mut ::core::ffi::c_void, + ) -> cuda_types::CUresult; + /** \brief Add a file input to a pending linker invocation + + No reference is retained to any inputs after this call returns. + + This method accepts only compiler options, which are used if the input + must be compiled from PTX, and does not accept any of + ::CU_JIT_WALL_TIME, ::CU_JIT_INFO_LOG_BUFFER, ::CU_JIT_ERROR_LOG_BUFFER, + ::CU_JIT_TARGET_FROM_CUCONTEXT, or ::CU_JIT_TARGET. + + This method is equivalent to invoking ::cuLinkAddData on the contents + of the file. + + \note For LTO-IR input, only LTO-IR compiled with toolkits prior to CUDA 12.0 will be accepted + + \param state A pending linker action + \param type The type of the input data + \param path Path to the input file + \param numOptions Size of options + \param options Options to be applied only for this input (overrides options from ::cuLinkCreate) + \param optionValues Array of option values, each cast to void * + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_FILE_NOT_FOUND + ::CUDA_ERROR_INVALID_HANDLE, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_IMAGE, + ::CUDA_ERROR_INVALID_PTX, + ::CUDA_ERROR_UNSUPPORTED_PTX_VERSION, + ::CUDA_ERROR_OUT_OF_MEMORY, + ::CUDA_ERROR_NO_BINARY_FOR_GPU + + \sa ::cuLinkCreate, + ::cuLinkAddData, + ::cuLinkComplete, + ::cuLinkDestroy*/ + fn cuLinkAddFile_v2( + state: cuda_types::CUlinkState, + type_: cuda_types::CUjitInputType, + path: *const ::core::ffi::c_char, + numOptions: ::core::ffi::c_uint, + options: *mut cuda_types::CUjit_option, + optionValues: *mut *mut ::core::ffi::c_void, + ) -> cuda_types::CUresult; + /** \brief Complete a pending linker invocation + + Completes the pending linker action and returns the cubin image for the linked + device code, which can be used with ::cuModuleLoadData. The cubin is owned by + \p state, so it should be loaded before \p state is destroyed via ::cuLinkDestroy. + This call does not destroy \p state. + + \param state A pending linker invocation + \param cubinOut On success, this will point to the output image + \param sizeOut Optional parameter to receive the size of the generated image + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_HANDLE, + ::CUDA_ERROR_OUT_OF_MEMORY + + \sa ::cuLinkCreate, + ::cuLinkAddData, + ::cuLinkAddFile, + ::cuLinkDestroy, + ::cuModuleLoadData*/ + fn cuLinkComplete( + state: cuda_types::CUlinkState, + cubinOut: *mut *mut ::core::ffi::c_void, + sizeOut: *mut usize, + ) -> cuda_types::CUresult; + /** \brief Destroys state for a JIT linker invocation. + + \param state State object for the linker invocation + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_HANDLE + + \sa ::cuLinkCreate*/ + fn cuLinkDestroy(state: cuda_types::CUlinkState) -> cuda_types::CUresult; + /** \brief Returns a handle to a texture reference + + \deprecated + + Returns in \p *pTexRef the handle of the texture reference of name \p name + in the module \p hmod. If no texture reference of that name exists, + ::cuModuleGetTexRef() returns ::CUDA_ERROR_NOT_FOUND. This texture reference + handle should not be destroyed, since it will be destroyed when the module + is unloaded. + + \param pTexRef - Returned texture reference + \param hmod - Module to retrieve texture reference from + \param name - Name of texture reference to retrieve + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_NOT_FOUND + \notefnerr + + \sa + ::cuModuleGetFunction, + ::cuModuleGetGlobal, + ::cuModuleGetSurfRef, + ::cuModuleLoad, + ::cuModuleLoadData, + ::cuModuleLoadDataEx, + ::cuModuleLoadFatBinary, + ::cuModuleUnload*/ + fn cuModuleGetTexRef( + pTexRef: *mut cuda_types::CUtexref, + hmod: cuda_types::CUmodule, + name: *const ::core::ffi::c_char, + ) -> cuda_types::CUresult; + /** \brief Returns a handle to a surface reference + + \deprecated + + Returns in \p *pSurfRef the handle of the surface reference of name \p name + in the module \p hmod. If no surface reference of that name exists, + ::cuModuleGetSurfRef() returns ::CUDA_ERROR_NOT_FOUND. + + \param pSurfRef - Returned surface reference + \param hmod - Module to retrieve surface reference from + \param name - Name of surface reference to retrieve + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_NOT_FOUND + \notefnerr + + \sa + ::cuModuleGetFunction, + ::cuModuleGetGlobal, + ::cuModuleGetTexRef, + ::cuModuleLoad, + ::cuModuleLoadData, + ::cuModuleLoadDataEx, + ::cuModuleLoadFatBinary, + ::cuModuleUnload*/ + fn cuModuleGetSurfRef( + pSurfRef: *mut cuda_types::CUsurfref, + hmod: cuda_types::CUmodule, + name: *const ::core::ffi::c_char, + ) -> cuda_types::CUresult; + /** \brief Load a library with specified code and options + + Takes a pointer \p code and loads the corresponding library \p library based on + the application defined library loading mode: + - If module loading is set to EAGER, via the environment variables described in "Module loading", + \p library is loaded eagerly into all contexts at the time of the call and future contexts + at the time of creation until the library is unloaded with ::cuLibraryUnload(). + - If the environment variables are set to LAZY, \p library + is not immediately loaded onto all existent contexts and will only be + loaded when a function is needed for that context, such as a kernel launch. + + These environment variables are described in the CUDA programming guide under the + "CUDA environment variables" section. + + The \p code may be a \e cubin or \e fatbin as output by \b nvcc, + or a NULL-terminated \e PTX, either as output by \b nvcc or hand-written. + + Options are passed as an array via \p jitOptions and any corresponding parameters are passed in + \p jitOptionsValues. The number of total JIT options is supplied via \p numJitOptions. + Any outputs will be returned via \p jitOptionsValues. + + Library load options are passed as an array via \p libraryOptions and any corresponding parameters are passed in + \p libraryOptionValues. The number of total library load options is supplied via \p numLibraryOptions. + + \param library - Returned library + \param code - Code to load + \param jitOptions - Options for JIT + \param jitOptionsValues - Option values for JIT + \param numJitOptions - Number of options + \param libraryOptions - Options for loading + \param libraryOptionValues - Option values for loading + \param numLibraryOptions - Number of options for loading + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_PTX, + ::CUDA_ERROR_UNSUPPORTED_PTX_VERSION, + ::CUDA_ERROR_OUT_OF_MEMORY, + ::CUDA_ERROR_NO_BINARY_FOR_GPU, + ::CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND, + ::CUDA_ERROR_SHARED_OBJECT_INIT_FAILED, + ::CUDA_ERROR_JIT_COMPILER_NOT_FOUND + + \sa ::cuLibraryLoadFromFile, + ::cuLibraryUnload, + ::cuModuleLoad, + ::cuModuleLoadData, + ::cuModuleLoadDataEx*/ + fn cuLibraryLoadData( + library: *mut cuda_types::CUlibrary, + code: *const ::core::ffi::c_void, + jitOptions: *mut cuda_types::CUjit_option, + jitOptionsValues: *mut *mut ::core::ffi::c_void, + numJitOptions: ::core::ffi::c_uint, + libraryOptions: *mut cuda_types::CUlibraryOption, + libraryOptionValues: *mut *mut ::core::ffi::c_void, + numLibraryOptions: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + /** \brief Load a library with specified file and options + + Takes a pointer \p code and loads the corresponding library \p library based on + the application defined library loading mode: + - If module loading is set to EAGER, via the environment variables described in "Module loading", + \p library is loaded eagerly into all contexts at the time of the call and future contexts + at the time of creation until the library is unloaded with ::cuLibraryUnload(). + - If the environment variables are set to LAZY, \p library + is not immediately loaded onto all existent contexts and will only be + loaded when a function is needed for that context, such as a kernel launch. + + These environment variables are described in the CUDA programming guide under the + "CUDA environment variables" section. + + The file should be a \e cubin file as output by \b nvcc, or a \e PTX file either + as output by \b nvcc or handwritten, or a \e fatbin file as output by \b nvcc. + + Options are passed as an array via \p jitOptions and any corresponding parameters are + passed in \p jitOptionsValues. The number of total options is supplied via \p numJitOptions. + Any outputs will be returned via \p jitOptionsValues. + + Library load options are passed as an array via \p libraryOptions and any corresponding parameters are passed in + \p libraryOptionValues. The number of total library load options is supplied via \p numLibraryOptions. + + \param library - Returned library + \param fileName - File to load from + \param jitOptions - Options for JIT + \param jitOptionsValues - Option values for JIT + \param numJitOptions - Number of options + \param libraryOptions - Options for loading + \param libraryOptionValues - Option values for loading + \param numLibraryOptions - Number of options for loading + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_PTX, + ::CUDA_ERROR_UNSUPPORTED_PTX_VERSION, + ::CUDA_ERROR_OUT_OF_MEMORY, + ::CUDA_ERROR_NO_BINARY_FOR_GPU, + ::CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND, + ::CUDA_ERROR_SHARED_OBJECT_INIT_FAILED, + ::CUDA_ERROR_JIT_COMPILER_NOT_FOUND + + \sa ::cuLibraryLoadData, + ::cuLibraryUnload, + ::cuModuleLoad, + ::cuModuleLoadData, + ::cuModuleLoadDataEx*/ + fn cuLibraryLoadFromFile( + library: *mut cuda_types::CUlibrary, + fileName: *const ::core::ffi::c_char, + jitOptions: *mut cuda_types::CUjit_option, + jitOptionsValues: *mut *mut ::core::ffi::c_void, + numJitOptions: ::core::ffi::c_uint, + libraryOptions: *mut cuda_types::CUlibraryOption, + libraryOptionValues: *mut *mut ::core::ffi::c_void, + numLibraryOptions: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + /** \brief Unloads a library + + Unloads the library specified with \p library + + \param library - Library to unload + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_VALUE + + \sa ::cuLibraryLoadData, + ::cuLibraryLoadFromFile, + ::cuModuleUnload*/ + fn cuLibraryUnload(library: cuda_types::CUlibrary) -> cuda_types::CUresult; + /** \brief Returns a kernel handle + + Returns in \p pKernel the handle of the kernel with name \p name located in library \p library. + If kernel handle is not found, the call returns ::CUDA_ERROR_NOT_FOUND. + + \param pKernel - Returned kernel handle + \param library - Library to retrieve kernel from + \param name - Name of kernel to retrieve + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_HANDLE, + ::CUDA_ERROR_NOT_FOUND + + \sa ::cuLibraryLoadData, + ::cuLibraryLoadFromFile, + ::cuLibraryUnload, + ::cuKernelGetFunction, + ::cuLibraryGetModule, + ::cuModuleGetFunction*/ + fn cuLibraryGetKernel( + pKernel: *mut cuda_types::CUkernel, + library: cuda_types::CUlibrary, + name: *const ::core::ffi::c_char, + ) -> cuda_types::CUresult; + /** \brief Returns the number of kernels within a library + + Returns in \p count the number of kernels in \p lib. + + \param count - Number of kernels found within the library + \param lib - Library to query + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_HANDLE, + ::CUDA_ERROR_INVALID_VALUE*/ + fn cuLibraryGetKernelCount( + count: *mut ::core::ffi::c_uint, + lib: cuda_types::CUlibrary, + ) -> cuda_types::CUresult; + /** \brief Retrieve the kernel handles within a library. + + Returns in \p kernels a maximum number of \p numKernels kernel handles within \p lib. + The returned kernel handle becomes invalid when the library is unloaded. + + \param kernels - Buffer where the kernel handles are returned to + \param numKernels - Maximum number of kernel handles may be returned to the buffer + \param lib - Library to query from + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_HANDLE, + ::CUDA_ERROR_INVALID_VALUE + + \sa ::cuLibraryGetKernelCount*/ + fn cuLibraryEnumerateKernels( + kernels: *mut cuda_types::CUkernel, + numKernels: ::core::ffi::c_uint, + lib: cuda_types::CUlibrary, + ) -> cuda_types::CUresult; + /** \brief Returns a module handle + + Returns in \p pMod the module handle associated with the current context located in + library \p library. If module handle is not found, the call returns ::CUDA_ERROR_NOT_FOUND. + + \param pMod - Returned module handle + \param library - Library to retrieve module from + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_HANDLE, + ::CUDA_ERROR_NOT_FOUND, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_CONTEXT_IS_DESTROYED + + \sa ::cuLibraryLoadData, + ::cuLibraryLoadFromFile, + ::cuLibraryUnload, + ::cuModuleGetFunction*/ + fn cuLibraryGetModule( + pMod: *mut cuda_types::CUmodule, + library: cuda_types::CUlibrary, + ) -> cuda_types::CUresult; + /** \brief Returns a function handle + + Returns in \p pFunc the handle of the function for the requested kernel \p kernel and + the current context. If function handle is not found, the call returns ::CUDA_ERROR_NOT_FOUND. + + \param pFunc - Returned function handle + \param kernel - Kernel to retrieve function for the requested context + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_HANDLE, + ::CUDA_ERROR_NOT_FOUND, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_CONTEXT_IS_DESTROYED + + \sa ::cuLibraryLoadData, + ::cuLibraryLoadFromFile, + ::cuLibraryUnload, + ::cuLibraryGetKernel, + ::cuLibraryGetModule, + ::cuModuleGetFunction*/ + fn cuKernelGetFunction( + pFunc: *mut cuda_types::CUfunction, + kernel: cuda_types::CUkernel, + ) -> cuda_types::CUresult; + /** \brief Returns a global device pointer + + Returns in \p *dptr and \p *bytes the base pointer and size of the global with + name \p name for the requested library \p library and the current context. + If no global for the requested name \p name exists, the call returns ::CUDA_ERROR_NOT_FOUND. + One of the parameters \p dptr or \p bytes (not both) can be NULL in which + case it is ignored. + + \param dptr - Returned global device pointer for the requested context + \param bytes - Returned global size in bytes + \param library - Library to retrieve global from + \param name - Name of global to retrieve + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_HANDLE, + ::CUDA_ERROR_NOT_FOUND, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_CONTEXT_IS_DESTROYED + + \sa ::cuLibraryLoadData, + ::cuLibraryLoadFromFile, + ::cuLibraryUnload, + ::cuLibraryGetModule, + cuModuleGetGlobal*/ + fn cuLibraryGetGlobal( + dptr: *mut cuda_types::CUdeviceptr, + bytes: *mut usize, + library: cuda_types::CUlibrary, + name: *const ::core::ffi::c_char, + ) -> cuda_types::CUresult; + /** \brief Returns a pointer to managed memory + + Returns in \p *dptr and \p *bytes the base pointer and size of the managed memory with + name \p name for the requested library \p library. If no managed memory with the + requested name \p name exists, the call returns ::CUDA_ERROR_NOT_FOUND. One of the parameters + \p dptr or \p bytes (not both) can be NULL in which case it is ignored. + Note that managed memory for library \p library is shared across devices and is registered + when the library is loaded into atleast one context. + + \note The API requires a CUDA context to be present and initialized on at least one device. + If no context is present, the call returns ::CUDA_ERROR_NOT_FOUND. + + \param dptr - Returned pointer to the managed memory + \param bytes - Returned memory size in bytes + \param library - Library to retrieve managed memory from + \param name - Name of managed memory to retrieve + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_HANDLE, + ::CUDA_ERROR_NOT_FOUND + + \sa ::cuLibraryLoadData, + ::cuLibraryLoadFromFile, + ::cuLibraryUnload*/ + fn cuLibraryGetManaged( + dptr: *mut cuda_types::CUdeviceptr, + bytes: *mut usize, + library: cuda_types::CUlibrary, + name: *const ::core::ffi::c_char, + ) -> cuda_types::CUresult; + /** \brief Returns a pointer to a unified function + + Returns in \p *fptr the function pointer to a unified function denoted by \p symbol. + If no unified function with name \p symbol exists, the call returns ::CUDA_ERROR_NOT_FOUND. + If there is no device with attribute ::CU_DEVICE_ATTRIBUTE_UNIFIED_FUNCTION_POINTERS present in the system, + the call may return ::CUDA_ERROR_NOT_FOUND. + + \param fptr - Returned pointer to a unified function + \param library - Library to retrieve function pointer memory from + \param symbol - Name of function pointer to retrieve + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_HANDLE, + ::CUDA_ERROR_NOT_FOUND + + \sa ::cuLibraryLoadData, + ::cuLibraryLoadFromFile, + ::cuLibraryUnload*/ + fn cuLibraryGetUnifiedFunction( + fptr: *mut *mut ::core::ffi::c_void, + library: cuda_types::CUlibrary, + symbol: *const ::core::ffi::c_char, + ) -> cuda_types::CUresult; + /** \brief Returns information about a kernel + + Returns in \p *pi the integer value of the attribute \p attrib for the kernel + \p kernel for the requested device \p dev. The supported attributes are: + - ::CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK: The maximum number of threads + per block, beyond which a launch of the kernel would fail. This number + depends on both the kernel and the requested device. + - ::CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES: The size in bytes of + statically-allocated shared memory per block required by this kernel. + This does not include dynamically-allocated shared memory requested by + the user at runtime. + - ::CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES: The size in bytes of user-allocated + constant memory required by this kernel. + - ::CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES: The size in bytes of local memory + used by each thread of this kernel. + - ::CU_FUNC_ATTRIBUTE_NUM_REGS: The number of registers used by each thread + of this kernel. + - ::CU_FUNC_ATTRIBUTE_PTX_VERSION: The PTX virtual architecture version for + which the kernel was compiled. This value is the major PTX version * 10 + + the minor PTX version, so a PTX version 1.3 function would return the + value 13. Note that this may return the undefined value of 0 for cubins + compiled prior to CUDA 3.0. + - ::CU_FUNC_ATTRIBUTE_BINARY_VERSION: The binary architecture version for + which the kernel was compiled. This value is the major binary + version * 10 + the minor binary version, so a binary version 1.3 function + would return the value 13. Note that this will return a value of 10 for + legacy cubins that do not have a properly-encoded binary architecture + version. + - ::CU_FUNC_CACHE_MODE_CA: The attribute to indicate whether the kernel has + been compiled with user specified option "-Xptxas --dlcm=ca" set. + - ::CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES: The maximum size in bytes of + dynamically-allocated shared memory. + - ::CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT: Preferred shared memory-L1 + cache split ratio in percent of total shared memory. + - ::CU_FUNC_ATTRIBUTE_CLUSTER_SIZE_MUST_BE_SET: If this attribute is set, the + kernel must launch with a valid cluster size specified. + - ::CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_WIDTH: The required cluster width in + blocks. + - ::CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_HEIGHT: The required cluster height in + blocks. + - ::CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_DEPTH: The required cluster depth in + blocks. + - ::CU_FUNC_ATTRIBUTE_NON_PORTABLE_CLUSTER_SIZE_ALLOWED: Indicates whether + the function can be launched with non-portable cluster size. 1 is allowed, + 0 is disallowed. A non-portable cluster size may only function on the + specific SKUs the program is tested on. The launch might fail if the + program is run on a different hardware platform. CUDA API provides + cudaOccupancyMaxActiveClusters to assist with checking whether the desired + size can be launched on the current device. A portable cluster size is + guaranteed to be functional on all compute capabilities higher than the + target compute capability. The portable cluster size for sm_90 is 8 blocks + per cluster. This value may increase for future compute capabilities. The + specific hardware unit may support higher cluster sizes that’s not + guaranteed to be portable. + - ::CU_FUNC_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE: The block + scheduling policy of a function. The value type is CUclusterSchedulingPolicy. + + \note If another thread is trying to set the same attribute on the same device using + ::cuKernelSetAttribute() simultaneously, the attribute query will give the old or new + value depending on the interleavings chosen by the OS scheduler and memory consistency. + + \param pi - Returned attribute value + \param attrib - Attribute requested + \param kernel - Kernel to query attribute of + \param dev - Device to query attribute of + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_HANDLE, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_DEVICE + + \sa ::cuLibraryLoadData, + ::cuLibraryLoadFromFile, + ::cuLibraryUnload, + ::cuKernelSetAttribute, + ::cuLibraryGetKernel, + ::cuLaunchKernel, + ::cuKernelGetFunction, + ::cuLibraryGetModule, + ::cuModuleGetFunction, + ::cuFuncGetAttribute*/ + fn cuKernelGetAttribute( + pi: *mut ::core::ffi::c_int, + attrib: cuda_types::CUfunction_attribute, + kernel: cuda_types::CUkernel, + dev: cuda_types::CUdevice, + ) -> cuda_types::CUresult; + /** \brief Sets information about a kernel + + This call sets the value of a specified attribute \p attrib on the kernel \p kernel + for the requested device \p dev to an integer value specified by \p val. + This function returns CUDA_SUCCESS if the new value of the attribute could be + successfully set. If the set fails, this call will return an error. + Not all attributes can have values set. Attempting to set a value on a read-only + attribute will result in an error (CUDA_ERROR_INVALID_VALUE) + + Note that attributes set using ::cuFuncSetAttribute() will override the attribute + set by this API irrespective of whether the call to ::cuFuncSetAttribute() is made + before or after this API call. However, ::cuKernelGetAttribute() will always + return the attribute value set by this API. + + Supported attributes are: + - ::CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES: This is the maximum size in bytes of + dynamically-allocated shared memory. The value should contain the requested + maximum size of dynamically-allocated shared memory. The sum of this value and + the function attribute ::CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES cannot exceed the + device attribute ::CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN. + The maximal size of requestable dynamic shared memory may differ by GPU + architecture. + - ::CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT: On devices where the L1 + cache and shared memory use the same hardware resources, this sets the shared memory + carveout preference, in percent of the total shared memory. + See ::CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR + This is only a hint, and the driver can choose a different ratio if required to execute the function. + - ::CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_WIDTH: The required cluster width in + blocks. The width, height, and depth values must either all be 0 or all be + positive. The validity of the cluster dimensions is checked at launch time. + If the value is set during compile time, it cannot be set at runtime. + Setting it at runtime will return CUDA_ERROR_NOT_PERMITTED. + - ::CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_HEIGHT: The required cluster height in + blocks. The width, height, and depth values must either all be 0 or all be + positive. The validity of the cluster dimensions is checked at launch time. + If the value is set during compile time, it cannot be set at runtime. + Setting it at runtime will return CUDA_ERROR_NOT_PERMITTED. + - ::CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_DEPTH: The required cluster depth in + blocks. The width, height, and depth values must either all be 0 or all be + positive. The validity of the cluster dimensions is checked at launch time. + If the value is set during compile time, it cannot be set at runtime. + Setting it at runtime will return CUDA_ERROR_NOT_PERMITTED. + - ::CU_FUNC_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE: The block + scheduling policy of a function. The value type is CUclusterSchedulingPolicy. + + \note The API has stricter locking requirements in comparison to its legacy counterpart + ::cuFuncSetAttribute() due to device-wide semantics. If multiple threads are trying to + set the same attribute on the same device simultaneously, the attribute setting will depend + on the interleavings chosen by the OS scheduler and memory consistency. + + \param attrib - Attribute requested + \param val - Value to set + \param kernel - Kernel to set attribute of + \param dev - Device to set attribute of + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_HANDLE, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_DEVICE, + ::CUDA_ERROR_OUT_OF_MEMORY + + \sa ::cuLibraryLoadData, + ::cuLibraryLoadFromFile, + ::cuLibraryUnload, + ::cuKernelGetAttribute, + ::cuLibraryGetKernel, + ::cuLaunchKernel, + ::cuKernelGetFunction, + ::cuLibraryGetModule, + ::cuModuleGetFunction, + ::cuFuncSetAttribute*/ + fn cuKernelSetAttribute( + attrib: cuda_types::CUfunction_attribute, + val: ::core::ffi::c_int, + kernel: cuda_types::CUkernel, + dev: cuda_types::CUdevice, + ) -> cuda_types::CUresult; + /** \brief Sets the preferred cache configuration for a device kernel. + + On devices where the L1 cache and shared memory use the same hardware + resources, this sets through \p config the preferred cache configuration for + the device kernel \p kernel on the requested device \p dev. This is only a preference. + The driver will use the requested configuration if possible, but it is free to choose a different + configuration if required to execute \p kernel. Any context-wide preference + set via ::cuCtxSetCacheConfig() will be overridden by this per-kernel + setting. + + Note that attributes set using ::cuFuncSetCacheConfig() will override the attribute + set by this API irrespective of whether the call to ::cuFuncSetCacheConfig() is made + before or after this API call. + + This setting does nothing on devices where the size of the L1 cache and + shared memory are fixed. + + Launching a kernel with a different preference than the most recent + preference setting may insert a device-side synchronization point. + + + The supported cache configurations are: + - ::CU_FUNC_CACHE_PREFER_NONE: no preference for shared memory or L1 (default) + - ::CU_FUNC_CACHE_PREFER_SHARED: prefer larger shared memory and smaller L1 cache + - ::CU_FUNC_CACHE_PREFER_L1: prefer larger L1 cache and smaller shared memory + - ::CU_FUNC_CACHE_PREFER_EQUAL: prefer equal sized L1 cache and shared memory + + \note The API has stricter locking requirements in comparison to its legacy counterpart + ::cuFuncSetCacheConfig() due to device-wide semantics. If multiple threads are trying to + set a config on the same device simultaneously, the cache config setting will depend + on the interleavings chosen by the OS scheduler and memory consistency. + + \param kernel - Kernel to configure cache for + \param config - Requested cache configuration + \param dev - Device to set attribute of + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_HANDLE, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_DEVICE, + ::CUDA_ERROR_OUT_OF_MEMORY + + \sa ::cuLibraryLoadData, + ::cuLibraryLoadFromFile, + ::cuLibraryUnload, + ::cuLibraryGetKernel, + ::cuKernelGetFunction, + ::cuLibraryGetModule, + ::cuModuleGetFunction, + ::cuFuncSetCacheConfig, + ::cuCtxSetCacheConfig, + ::cuLaunchKernel*/ + fn cuKernelSetCacheConfig( + kernel: cuda_types::CUkernel, + config: cuda_types::CUfunc_cache, + dev: cuda_types::CUdevice, + ) -> cuda_types::CUresult; + /** \brief Returns the function name for a ::CUkernel handle + + Returns in \p **name the function name associated with the kernel handle \p hfunc . + The function name is returned as a null-terminated string. The returned name is only + valid when the kernel handle is valid. If the library is unloaded or reloaded, one + must call the API again to get the updated name. This API may return a mangled name if + the function is not declared as having C linkage. If either \p **name or \p hfunc + is NULL, ::CUDA_ERROR_INVALID_VALUE is returned. + + \param name - The returned name of the function + \param hfunc - The function handle to retrieve the name for + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE + \notefnerr +*/ + fn cuKernelGetName( + name: *mut *const ::core::ffi::c_char, + hfunc: cuda_types::CUkernel, + ) -> cuda_types::CUresult; + /** \brief Returns the offset and size of a kernel parameter in the device-side parameter layout + + Queries the kernel parameter at \p paramIndex into \p kernel's list of parameters, and returns + in \p paramOffset and \p paramSize the offset and size, respectively, where the parameter + will reside in the device-side parameter layout. This information can be used to update kernel + node parameters from the device via ::cudaGraphKernelNodeSetParam() and + ::cudaGraphKernelNodeUpdatesApply(). \p paramIndex must be less than the number of parameters + that \p kernel takes. \p paramSize can be set to NULL if only the parameter offset is desired. + + \param kernel - The kernel to query + \param paramIndex - The parameter index to query + \param paramOffset - Returns the offset into the device-side parameter layout at which the parameter resides + \param paramSize - Optionally returns the size of the parameter in the device-side parameter layout + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE, + \notefnerr + + \sa ::cuFuncGetParamInfo*/ + fn cuKernelGetParamInfo( + kernel: cuda_types::CUkernel, + paramIndex: usize, + paramOffset: *mut usize, + paramSize: *mut usize, + ) -> cuda_types::CUresult; + /** \brief Gets free and total memory + + Returns in \p *total the total amount of memory available to the the current context. + Returns in \p *free the amount of memory on the device that is free according to the OS. + CUDA is not guaranteed to be able to allocate all of the memory that the OS reports as free. + In a multi-tenet situation, free estimate returned is prone to race condition where + a new allocation/free done by a different process or a different thread in the same + process between the time when free memory was estimated and reported, will result in + deviation in free value reported and actual free memory. + + The integrated GPU on Tegra shares memory with CPU and other component + of the SoC. The free and total values returned by the API excludes + the SWAP memory space maintained by the OS on some platforms. + The OS may move some of the memory pages into swap area as the GPU or + CPU allocate or access memory. See Tegra app note on how to calculate + total and free memory on Tegra. + + \param free - Returned free memory in bytes + \param total - Returned total memory in bytes + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE + \notefnerr + + \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, + ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost, + ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned, + ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, + ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync, + ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync, + ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, + ::cuMemGetAddressRange, ::cuMemHostAlloc, + ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16, + ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32, + ::cudaMemGetInfo*/ + fn cuMemGetInfo_v2(free: *mut usize, total: *mut usize) -> cuda_types::CUresult; + /** \brief Allocates device memory + + Allocates \p bytesize bytes of linear memory on the device and returns in + \p *dptr a pointer to the allocated memory. The allocated memory is suitably + aligned for any kind of variable. The memory is not cleared. If \p bytesize + is 0, ::cuMemAlloc() returns ::CUDA_ERROR_INVALID_VALUE. + + \param dptr - Returned device pointer + \param bytesize - Requested allocation size in bytes + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_OUT_OF_MEMORY + \notefnerr + + \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, + ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAllocHost, + ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned, + ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, + ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync, + ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync, + ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, + ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, + ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16, + ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32, + ::cudaMalloc*/ + fn cuMemAlloc_v2( + dptr: *mut cuda_types::CUdeviceptr, + bytesize: usize, + ) -> cuda_types::CUresult; + /** \brief Allocates pitched device memory + + Allocates at least \p WidthInBytes * \p Height bytes of linear memory on + the device and returns in \p *dptr a pointer to the allocated memory. The + function may pad the allocation to ensure that corresponding pointers in + any given row will continue to meet the alignment requirements for + coalescing as the address is updated from row to row. \p ElementSizeBytes + specifies the size of the largest reads and writes that will be performed + on the memory range. \p ElementSizeBytes may be 4, 8 or 16 (since coalesced + memory transactions are not possible on other data sizes). If + \p ElementSizeBytes is smaller than the actual read/write size of a kernel, + the kernel will run correctly, but possibly at reduced speed. The pitch + returned in \p *pPitch by ::cuMemAllocPitch() is the width in bytes of the + allocation. The intended usage of pitch is as a separate parameter of the + allocation, used to compute addresses within the 2D array. Given the row + and column of an array element of type \b T, the address is computed as: + \code +T* pElement = (T*)((char*)BaseAddress + Row * Pitch) + Column; + \endcode + + The pitch returned by ::cuMemAllocPitch() is guaranteed to work with + ::cuMemcpy2D() under all circumstances. For allocations of 2D arrays, it is + recommended that programmers consider performing pitch allocations using + ::cuMemAllocPitch(). Due to alignment restrictions in the hardware, this is + especially true if the application will be performing 2D memory copies + between different regions of device memory (whether linear memory or CUDA + arrays). + + The byte alignment of the pitch returned by ::cuMemAllocPitch() is guaranteed + to match or exceed the alignment requirement for texture binding with + ::cuTexRefSetAddress2D(). + + \param dptr - Returned device pointer + \param pPitch - Returned pitch of allocation in bytes + \param WidthInBytes - Requested allocation width in bytes + \param Height - Requested allocation height in rows + \param ElementSizeBytes - Size of largest reads/writes for range + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_OUT_OF_MEMORY + \notefnerr + + \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, + ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost, + ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned, + ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, + ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync, + ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync, + ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, + ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, + ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16, + ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32, + ::cudaMallocPitch*/ + fn cuMemAllocPitch_v2( + dptr: *mut cuda_types::CUdeviceptr, + pPitch: *mut usize, + WidthInBytes: usize, + Height: usize, + ElementSizeBytes: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + /** \brief Frees device memory + + Frees the memory space pointed to by \p dptr, which must have been returned + by a previous call to one of the following memory allocation APIs - ::cuMemAlloc(), + ::cuMemAllocPitch(), ::cuMemAllocManaged(), ::cuMemAllocAsync(), ::cuMemAllocFromPoolAsync() + + Note - This API will not perform any implict synchronization when the pointer was allocated with + ::cuMemAllocAsync or ::cuMemAllocFromPoolAsync. Callers must ensure that all accesses to the + pointer have completed before invoking ::cuMemFree. For best performance and memory reuse, users + should use ::cuMemFreeAsync to free memory allocated via the stream ordered memory allocator. + + \param dptr - Pointer to memory to free + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE + \notefnerr + + \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, + ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost, + ::cuMemAllocPitch, ::cuMemAllocManaged, ::cuMemAllocAsync, ::cuMemAllocFromPoolAsync, + ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned, ::cuMemcpy3D, ::cuMemcpy3DAsync, + ::cuMemcpyAtoA, ::cuMemcpyAtoD, ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, + ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync, ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, + ::cuMemcpyHtoAAsync, ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFreeHost, + ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, ::cuMemFreeAsync, + ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16, + ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32, + ::cudaFree*/ + fn cuMemFree_v2(dptr: cuda_types::CUdeviceptr) -> cuda_types::CUresult; + /** \brief Get information on memory allocations + + Returns the base address in \p *pbase and size in \p *psize of the + allocation by ::cuMemAlloc() or ::cuMemAllocPitch() that contains the input + pointer \p dptr. Both parameters \p pbase and \p psize are optional. If one + of them is NULL, it is ignored. + + \param pbase - Returned base address + \param psize - Returned size of device memory allocation + \param dptr - Device pointer to query + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_NOT_FOUND, + ::CUDA_ERROR_INVALID_VALUE + \notefnerr + + \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, + ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost, + ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned, + ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, + ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync, + ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync, + ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, + ::cuMemGetInfo, ::cuMemHostAlloc, + ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16, + ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32*/ + fn cuMemGetAddressRange_v2( + pbase: *mut cuda_types::CUdeviceptr, + psize: *mut usize, + dptr: cuda_types::CUdeviceptr, + ) -> cuda_types::CUresult; + /** \brief Allocates page-locked host memory + + Allocates \p bytesize bytes of host memory that is page-locked and + accessible to the device. The driver tracks the virtual memory ranges + allocated with this function and automatically accelerates calls to + functions such as ::cuMemcpy(). Since the memory can be accessed directly by + the device, it can be read or written with much higher bandwidth than + pageable memory obtained with functions such as ::malloc(). + + On systems where ::CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES + is true, ::cuMemAllocHost may not page-lock the allocated memory. + + Page-locking excessive amounts of memory with ::cuMemAllocHost() may degrade system + performance, since it reduces the amount of memory available to the system + for paging. As a result, this function is best used sparingly to allocate + staging areas for data exchange between host and device. + + Note all host memory allocated using ::cuMemAllocHost() will automatically + be immediately accessible to all contexts on all devices which support unified + addressing (as may be queried using ::CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING). + The device pointer that may be used to access this host memory from those + contexts is always equal to the returned host pointer \p *pp. + See \ref CUDA_UNIFIED for additional details. + + \param pp - Returned pointer to host memory + \param bytesize - Requested allocation size in bytes + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_OUT_OF_MEMORY + \notefnerr + + \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, + ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, + ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned, + ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, + ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync, + ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync, + ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, + ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, + ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16, + ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32, + ::cudaMallocHost*/ + fn cuMemAllocHost_v2( + pp: *mut *mut ::core::ffi::c_void, + bytesize: usize, + ) -> cuda_types::CUresult; + /** \brief Frees page-locked host memory + + Frees the memory space pointed to by \p p, which must have been returned by + a previous call to ::cuMemAllocHost(). + + \param p - Pointer to memory to free + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE + \notefnerr + + \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, + ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost, + ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned, + ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, + ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync, + ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync, + ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, + ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, + ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16, + ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32, + ::cudaFreeHost*/ + fn cuMemFreeHost(p: *mut ::core::ffi::c_void) -> cuda_types::CUresult; + /** \brief Allocates page-locked host memory + + Allocates \p bytesize bytes of host memory that is page-locked and accessible + to the device. The driver tracks the virtual memory ranges allocated with + this function and automatically accelerates calls to functions such as + ::cuMemcpyHtoD(). Since the memory can be accessed directly by the device, + it can be read or written with much higher bandwidth than pageable memory + obtained with functions such as ::malloc(). + + On systems where ::CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES + is true, ::cuMemHostAlloc may not page-lock the allocated memory. + + Page-locking excessive amounts of memory may degrade system performance, + since it reduces the amount of memory available to the system for paging. + As a result, this function is best used sparingly to allocate staging areas + for data exchange between host and device. + + The \p Flags parameter enables different options to be specified that + affect the allocation, as follows. + + - ::CU_MEMHOSTALLOC_PORTABLE: The memory returned by this call will be + considered as pinned memory by all CUDA contexts, not just the one that + performed the allocation. + + - ::CU_MEMHOSTALLOC_DEVICEMAP: Maps the allocation into the CUDA address + space. The device pointer to the memory may be obtained by calling + ::cuMemHostGetDevicePointer(). + + - ::CU_MEMHOSTALLOC_WRITECOMBINED: Allocates the memory as write-combined + (WC). WC memory can be transferred across the PCI Express bus more + quickly on some system configurations, but cannot be read efficiently by + most CPUs. WC memory is a good option for buffers that will be written by + the CPU and read by the GPU via mapped pinned memory or host->device + transfers. + + All of these flags are orthogonal to one another: a developer may allocate + memory that is portable, mapped and/or write-combined with no restrictions. + + The ::CU_MEMHOSTALLOC_DEVICEMAP flag may be specified on CUDA contexts for + devices that do not support mapped pinned memory. The failure is deferred + to ::cuMemHostGetDevicePointer() because the memory may be mapped into + other CUDA contexts via the ::CU_MEMHOSTALLOC_PORTABLE flag. + + The memory allocated by this function must be freed with ::cuMemFreeHost(). + + Note all host memory allocated using ::cuMemHostAlloc() will automatically + be immediately accessible to all contexts on all devices which support unified + addressing (as may be queried using ::CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING). + Unless the flag ::CU_MEMHOSTALLOC_WRITECOMBINED is specified, the device pointer + that may be used to access this host memory from those contexts is always equal + to the returned host pointer \p *pp. If the flag ::CU_MEMHOSTALLOC_WRITECOMBINED + is specified, then the function ::cuMemHostGetDevicePointer() must be used + to query the device pointer, even if the context supports unified addressing. + See \ref CUDA_UNIFIED for additional details. + + \param pp - Returned pointer to host memory + \param bytesize - Requested allocation size in bytes + \param Flags - Flags for allocation request + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_OUT_OF_MEMORY + \notefnerr + + \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, + ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost, + ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned, + ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, + ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync, + ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync, + ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, + ::cuMemGetAddressRange, ::cuMemGetInfo, + ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16, + ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32, + ::cudaHostAlloc*/ + fn cuMemHostAlloc( + pp: *mut *mut ::core::ffi::c_void, + bytesize: usize, + Flags: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + /** \brief Passes back device pointer of mapped pinned memory + + Passes back the device pointer \p pdptr corresponding to the mapped, pinned + host buffer \p p allocated by ::cuMemHostAlloc. + + ::cuMemHostGetDevicePointer() will fail if the ::CU_MEMHOSTALLOC_DEVICEMAP + flag was not specified at the time the memory was allocated, or if the + function is called on a GPU that does not support mapped pinned memory. + + For devices that have a non-zero value for the device attribute + ::CU_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM, the memory + can also be accessed from the device using the host pointer \p p. + The device pointer returned by ::cuMemHostGetDevicePointer() may or may not + match the original host pointer \p p and depends on the devices visible to the + application. If all devices visible to the application have a non-zero value for the + device attribute, the device pointer returned by ::cuMemHostGetDevicePointer() + will match the original pointer \p p. If any device visible to the application + has a zero value for the device attribute, the device pointer returned by + ::cuMemHostGetDevicePointer() will not match the original host pointer \p p, + but it will be suitable for use on all devices provided Unified Virtual Addressing + is enabled. In such systems, it is valid to access the memory using either pointer + on devices that have a non-zero value for the device attribute. Note however that + such devices should access the memory using only one of the two pointers and not both. + + \p Flags provides for future releases. For now, it must be set to 0. + + \param pdptr - Returned device pointer + \param p - Host pointer + \param Flags - Options (must be 0) + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE + \notefnerr + + \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, + ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost, + ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned, + ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, + ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync, + ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync, + ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, + ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, + ::cuMemsetD2D8, ::cuMemsetD2D16, + ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32, + ::cudaHostGetDevicePointer*/ + fn cuMemHostGetDevicePointer_v2( + pdptr: *mut cuda_types::CUdeviceptr, + p: *mut ::core::ffi::c_void, + Flags: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + /** \brief Passes back flags that were used for a pinned allocation + + Passes back the flags \p pFlags that were specified when allocating + the pinned host buffer \p p allocated by ::cuMemHostAlloc. + + ::cuMemHostGetFlags() will fail if the pointer does not reside in + an allocation performed by ::cuMemAllocHost() or ::cuMemHostAlloc(). + + \param pFlags - Returned flags word + \param p - Host pointer + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE + \notefnerr + + \sa + ::cuMemAllocHost, + ::cuMemHostAlloc, + ::cudaHostGetFlags*/ + fn cuMemHostGetFlags( + pFlags: *mut ::core::ffi::c_uint, + p: *mut ::core::ffi::c_void, + ) -> cuda_types::CUresult; + /** \brief Allocates memory that will be automatically managed by the Unified Memory system + + Allocates \p bytesize bytes of managed memory on the device and returns in + \p *dptr a pointer to the allocated memory. If the device doesn't support + allocating managed memory, ::CUDA_ERROR_NOT_SUPPORTED is returned. Support + for managed memory can be queried using the device attribute + ::CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY. The allocated memory is suitably + aligned for any kind of variable. The memory is not cleared. If \p bytesize + is 0, ::cuMemAllocManaged returns ::CUDA_ERROR_INVALID_VALUE. The pointer + is valid on the CPU and on all GPUs in the system that support managed memory. + All accesses to this pointer must obey the Unified Memory programming model. + + \p flags specifies the default stream association for this allocation. + \p flags must be one of ::CU_MEM_ATTACH_GLOBAL or ::CU_MEM_ATTACH_HOST. If + ::CU_MEM_ATTACH_GLOBAL is specified, then this memory is accessible from + any stream on any device. If ::CU_MEM_ATTACH_HOST is specified, then the + allocation should not be accessed from devices that have a zero value for the + device attribute ::CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS; an explicit call to + ::cuStreamAttachMemAsync will be required to enable access on such devices. + + If the association is later changed via ::cuStreamAttachMemAsync to + a single stream, the default association as specified during ::cuMemAllocManaged + is restored when that stream is destroyed. For __managed__ variables, the + default association is always ::CU_MEM_ATTACH_GLOBAL. Note that destroying a + stream is an asynchronous operation, and as a result, the change to default + association won't happen until all work in the stream has completed. + + Memory allocated with ::cuMemAllocManaged should be released with ::cuMemFree. + + Device memory oversubscription is possible for GPUs that have a non-zero value for the + device attribute ::CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS. Managed memory on + such GPUs may be evicted from device memory to host memory at any time by the Unified + Memory driver in order to make room for other allocations. + + In a system where all GPUs have a non-zero value for the device attribute + ::CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS, managed memory may not be populated when this + API returns and instead may be populated on access. In such systems, managed memory can + migrate to any processor's memory at any time. The Unified Memory driver will employ heuristics to + maintain data locality and prevent excessive page faults to the extent possible. The application + can also guide the driver about memory usage patterns via ::cuMemAdvise. The application + can also explicitly migrate memory to a desired processor's memory via + ::cuMemPrefetchAsync. + + In a multi-GPU system where all of the GPUs have a zero value for the device attribute + ::CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS and all the GPUs have peer-to-peer support + with each other, the physical storage for managed memory is created on the GPU which is active + at the time ::cuMemAllocManaged is called. All other GPUs will reference the data at reduced + bandwidth via peer mappings over the PCIe bus. The Unified Memory driver does not migrate + memory among such GPUs. + + In a multi-GPU system where not all GPUs have peer-to-peer support with each other and + where the value of the device attribute ::CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS + is zero for at least one of those GPUs, the location chosen for physical storage of managed + memory is system-dependent. + - On Linux, the location chosen will be device memory as long as the current set of active + contexts are on devices that either have peer-to-peer support with each other or have a + non-zero value for the device attribute ::CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS. + If there is an active context on a GPU that does not have a non-zero value for that device + attribute and it does not have peer-to-peer support with the other devices that have active + contexts on them, then the location for physical storage will be 'zero-copy' or host memory. + Note that this means that managed memory that is located in device memory is migrated to + host memory if a new context is created on a GPU that doesn't have a non-zero value for + the device attribute and does not support peer-to-peer with at least one of the other devices + that has an active context. This in turn implies that context creation may fail if there is + insufficient host memory to migrate all managed allocations. + - On Windows, the physical storage is always created in 'zero-copy' or host memory. + All GPUs will reference the data at reduced bandwidth over the PCIe bus. In these + circumstances, use of the environment variable CUDA_VISIBLE_DEVICES is recommended to + restrict CUDA to only use those GPUs that have peer-to-peer support. + Alternatively, users can also set CUDA_MANAGED_FORCE_DEVICE_ALLOC to a + non-zero value to force the driver to always use device memory for physical storage. + When this environment variable is set to a non-zero value, all contexts created in + that process on devices that support managed memory have to be peer-to-peer compatible + with each other. Context creation will fail if a context is created on a device that + supports managed memory and is not peer-to-peer compatible with any of the other + managed memory supporting devices on which contexts were previously created, even if + those contexts have been destroyed. These environment variables are described + in the CUDA programming guide under the "CUDA environment variables" section. + - On ARM, managed memory is not available on discrete gpu with Drive PX-2. + + \param dptr - Returned device pointer + \param bytesize - Requested allocation size in bytes + \param flags - Must be one of ::CU_MEM_ATTACH_GLOBAL or ::CU_MEM_ATTACH_HOST + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_NOT_SUPPORTED, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_OUT_OF_MEMORY + \notefnerr + + \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, + ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAllocHost, + ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned, + ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, + ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync, + ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync, + ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, + ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, + ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16, + ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32, + ::cuDeviceGetAttribute, ::cuStreamAttachMemAsync, + ::cudaMallocManaged*/ + fn cuMemAllocManaged( + dptr: *mut cuda_types::CUdeviceptr, + bytesize: usize, + flags: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + /** \brief Registers a callback function to receive async notifications + + Registers \p callbackFunc to receive async notifications. + + The \p userData parameter is passed to the callback function at async notification time. + Likewise, \p callback is also passed to the callback function to distinguish between + multiple registered callbacks. + + The callback function being registered should be designed to return quickly (~10ms). + Any long running tasks should be queued for execution on an application thread. + + Callbacks may not call cuDeviceRegisterAsyncNotification or cuDeviceUnregisterAsyncNotification. + Doing so will result in ::CUDA_ERROR_NOT_PERMITTED. Async notification callbacks execute + in an undefined order and may be serialized. + + Returns in \p *callback a handle representing the registered callback instance. + + \param device - The device on which to register the callback + \param callbackFunc - The function to register as a callback + \param userData - A generic pointer to user data. This is passed into the callback function. + \param callback - A handle representing the registered callback instance + + \return + ::CUDA_SUCCESS + ::CUDA_ERROR_NOT_SUPPORTED + ::CUDA_ERROR_INVALID_DEVICE + ::CUDA_ERROR_INVALID_VALUE + ::CUDA_ERROR_NOT_PERMITTED + ::CUDA_ERROR_UNKNOWN + \notefnerr + + \sa + ::cuDeviceUnregisterAsyncNotification*/ + fn cuDeviceRegisterAsyncNotification( + device: cuda_types::CUdevice, + callbackFunc: cuda_types::CUasyncCallback, + userData: *mut ::core::ffi::c_void, + callback: *mut cuda_types::CUasyncCallbackHandle, + ) -> cuda_types::CUresult; + /** \brief Unregisters an async notification callback + + Unregisters \p callback so that the corresponding callback function will stop receiving + async notifications. + + \param device - The device from which to remove \p callback. + \param callback - The callback instance to unregister from receiving async notifications. + + \return + ::CUDA_SUCCESS + ::CUDA_ERROR_NOT_SUPPORTED + ::CUDA_ERROR_INVALID_DEVICE + ::CUDA_ERROR_INVALID_VALUE + ::CUDA_ERROR_NOT_PERMITTED + ::CUDA_ERROR_UNKNOWN + \notefnerr + + \sa + ::cuDeviceRegisterAsyncNotification*/ + fn cuDeviceUnregisterAsyncNotification( + device: cuda_types::CUdevice, + callback: cuda_types::CUasyncCallbackHandle, + ) -> cuda_types::CUresult; + /** \brief Returns a handle to a compute device + + Returns in \p *device a device handle given a PCI bus ID string. + + \param dev - Returned device handle + + \param pciBusId - String in one of the following forms: + [domain]:[bus]:[device].[function] + [domain]:[bus]:[device] + [bus]:[device].[function] + where \p domain, \p bus, \p device, and \p function are all hexadecimal values + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_DEVICE + \notefnerr + + \sa + ::cuDeviceGet, + ::cuDeviceGetAttribute, + ::cuDeviceGetPCIBusId, + ::cudaDeviceGetByPCIBusId*/ + fn cuDeviceGetByPCIBusId( + dev: *mut cuda_types::CUdevice, + pciBusId: *const ::core::ffi::c_char, + ) -> cuda_types::CUresult; + /** \brief Returns a PCI Bus Id string for the device + + Returns an ASCII string identifying the device \p dev in the NULL-terminated + string pointed to by \p pciBusId. \p len specifies the maximum length of the + string that may be returned. + + \param pciBusId - Returned identifier string for the device in the following format + [domain]:[bus]:[device].[function] + where \p domain, \p bus, \p device, and \p function are all hexadecimal values. + pciBusId should be large enough to store 13 characters including the NULL-terminator. + + \param len - Maximum length of string to store in \p name + + \param dev - Device to get identifier string for + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_DEVICE + \notefnerr + + \sa + ::cuDeviceGet, + ::cuDeviceGetAttribute, + ::cuDeviceGetByPCIBusId, + ::cudaDeviceGetPCIBusId*/ + fn cuDeviceGetPCIBusId( + pciBusId: *mut ::core::ffi::c_char, + len: ::core::ffi::c_int, + dev: cuda_types::CUdevice, + ) -> cuda_types::CUresult; + /** \brief Gets an interprocess handle for a previously allocated event + + Takes as input a previously allocated event. This event must have been + created with the ::CU_EVENT_INTERPROCESS and ::CU_EVENT_DISABLE_TIMING + flags set. This opaque handle may be copied into other processes and + opened with ::cuIpcOpenEventHandle to allow efficient hardware + synchronization between GPU work in different processes. + + After the event has been opened in the importing process, + ::cuEventRecord, ::cuEventSynchronize, ::cuStreamWaitEvent and + ::cuEventQuery may be used in either process. Performing operations + on the imported event after the exported event has been freed + with ::cuEventDestroy will result in undefined behavior. + + IPC functionality is restricted to devices with support for unified + addressing on Linux and Windows operating systems. + IPC functionality on Windows is restricted to GPUs in TCC mode + Users can test their device for IPC functionality by calling + ::cuapiDeviceGetAttribute with ::CU_DEVICE_ATTRIBUTE_IPC_EVENT_SUPPORTED + + \param pHandle - Pointer to a user allocated CUipcEventHandle + in which to return the opaque event handle + \param event - Event allocated with ::CU_EVENT_INTERPROCESS and + ::CU_EVENT_DISABLE_TIMING flags. + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_HANDLE, + ::CUDA_ERROR_OUT_OF_MEMORY, + ::CUDA_ERROR_MAP_FAILED, + ::CUDA_ERROR_INVALID_VALUE + + \sa + ::cuEventCreate, + ::cuEventDestroy, + ::cuEventSynchronize, + ::cuEventQuery, + ::cuStreamWaitEvent, + ::cuIpcOpenEventHandle, + ::cuIpcGetMemHandle, + ::cuIpcOpenMemHandle, + ::cuIpcCloseMemHandle, + ::cudaIpcGetEventHandle*/ + fn cuIpcGetEventHandle( + pHandle: *mut cuda_types::CUipcEventHandle, + event: cuda_types::CUevent, + ) -> cuda_types::CUresult; + /** \brief Opens an interprocess event handle for use in the current process + + Opens an interprocess event handle exported from another process with + ::cuIpcGetEventHandle. This function returns a ::CUevent that behaves like + a locally created event with the ::CU_EVENT_DISABLE_TIMING flag specified. + This event must be freed with ::cuEventDestroy. + + Performing operations on the imported event after the exported event has + been freed with ::cuEventDestroy will result in undefined behavior. + + IPC functionality is restricted to devices with support for unified + addressing on Linux and Windows operating systems. + IPC functionality on Windows is restricted to GPUs in TCC mode + Users can test their device for IPC functionality by calling + ::cuapiDeviceGetAttribute with ::CU_DEVICE_ATTRIBUTE_IPC_EVENT_SUPPORTED + + \param phEvent - Returns the imported event + \param handle - Interprocess handle to open + + \returns + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_MAP_FAILED, + ::CUDA_ERROR_PEER_ACCESS_UNSUPPORTED, + ::CUDA_ERROR_INVALID_HANDLE, + ::CUDA_ERROR_INVALID_VALUE + + \sa + ::cuEventCreate, + ::cuEventDestroy, + ::cuEventSynchronize, + ::cuEventQuery, + ::cuStreamWaitEvent, + ::cuIpcGetEventHandle, + ::cuIpcGetMemHandle, + ::cuIpcOpenMemHandle, + ::cuIpcCloseMemHandle, + ::cudaIpcOpenEventHandle*/ + fn cuIpcOpenEventHandle( + phEvent: *mut cuda_types::CUevent, + handle: cuda_types::CUipcEventHandle, + ) -> cuda_types::CUresult; + /** \brief Gets an interprocess memory handle for an existing device memory + allocation + + Takes a pointer to the base of an existing device memory allocation created + with ::cuMemAlloc and exports it for use in another process. This is a + lightweight operation and may be called multiple times on an allocation + without adverse effects. + + If a region of memory is freed with ::cuMemFree and a subsequent call + to ::cuMemAlloc returns memory with the same device address, + ::cuIpcGetMemHandle will return a unique handle for the + new memory. + + IPC functionality is restricted to devices with support for unified + addressing on Linux and Windows operating systems. + IPC functionality on Windows is restricted to GPUs in TCC mode + Users can test their device for IPC functionality by calling + ::cuapiDeviceGetAttribute with ::CU_DEVICE_ATTRIBUTE_IPC_EVENT_SUPPORTED + + \param pHandle - Pointer to user allocated ::CUipcMemHandle to return + the handle in. + \param dptr - Base pointer to previously allocated device memory + + \returns + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_HANDLE, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_OUT_OF_MEMORY, + ::CUDA_ERROR_MAP_FAILED, + ::CUDA_ERROR_INVALID_VALUE + + \sa + ::cuMemAlloc, + ::cuMemFree, + ::cuIpcGetEventHandle, + ::cuIpcOpenEventHandle, + ::cuIpcOpenMemHandle, + ::cuIpcCloseMemHandle, + ::cudaIpcGetMemHandle*/ + fn cuIpcGetMemHandle( + pHandle: *mut cuda_types::CUipcMemHandle, + dptr: cuda_types::CUdeviceptr, + ) -> cuda_types::CUresult; + /** \brief Opens an interprocess memory handle exported from another process + and returns a device pointer usable in the local process. + + Maps memory exported from another process with ::cuIpcGetMemHandle into + the current device address space. For contexts on different devices + ::cuIpcOpenMemHandle can attempt to enable peer access between the + devices as if the user called ::cuCtxEnablePeerAccess. This behavior is + controlled by the ::CU_IPC_MEM_LAZY_ENABLE_PEER_ACCESS flag. + ::cuDeviceCanAccessPeer can determine if a mapping is possible. + + Contexts that may open ::CUipcMemHandles are restricted in the following way. + ::CUipcMemHandles from each ::CUdevice in a given process may only be opened + by one ::CUcontext per ::CUdevice per other process. + + If the memory handle has already been opened by the current context, the + reference count on the handle is incremented by 1 and the existing device pointer + is returned. + + Memory returned from ::cuIpcOpenMemHandle must be freed with + ::cuIpcCloseMemHandle. + + Calling ::cuMemFree on an exported memory region before calling + ::cuIpcCloseMemHandle in the importing context will result in undefined + behavior. + + IPC functionality is restricted to devices with support for unified + addressing on Linux and Windows operating systems. + IPC functionality on Windows is restricted to GPUs in TCC mode + Users can test their device for IPC functionality by calling + ::cuapiDeviceGetAttribute with ::CU_DEVICE_ATTRIBUTE_IPC_EVENT_SUPPORTED + + \param pdptr - Returned device pointer + \param handle - ::CUipcMemHandle to open + \param Flags - Flags for this operation. Must be specified as ::CU_IPC_MEM_LAZY_ENABLE_PEER_ACCESS + + \returns + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_MAP_FAILED, + ::CUDA_ERROR_INVALID_HANDLE, + ::CUDA_ERROR_TOO_MANY_PEERS, + ::CUDA_ERROR_INVALID_VALUE + + \note No guarantees are made about the address returned in \p *pdptr. + In particular, multiple processes may not receive the same address for the same \p handle. + + \sa + ::cuMemAlloc, + ::cuMemFree, + ::cuIpcGetEventHandle, + ::cuIpcOpenEventHandle, + ::cuIpcGetMemHandle, + ::cuIpcCloseMemHandle, + ::cuCtxEnablePeerAccess, + ::cuDeviceCanAccessPeer, + ::cudaIpcOpenMemHandle*/ + fn cuIpcOpenMemHandle_v2( + pdptr: *mut cuda_types::CUdeviceptr, + handle: cuda_types::CUipcMemHandle, + Flags: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + /** \brief Attempts to close memory mapped with ::cuIpcOpenMemHandle + + Decrements the reference count of the memory returned by ::cuIpcOpenMemHandle by 1. + When the reference count reaches 0, this API unmaps the memory. The original allocation + in the exporting process as well as imported mappings in other processes + will be unaffected. + + Any resources used to enable peer access will be freed if this is the + last mapping using them. + + IPC functionality is restricted to devices with support for unified + addressing on Linux and Windows operating systems. + IPC functionality on Windows is restricted to GPUs in TCC mode + Users can test their device for IPC functionality by calling + ::cuapiDeviceGetAttribute with ::CU_DEVICE_ATTRIBUTE_IPC_EVENT_SUPPORTED + + \param dptr - Device pointer returned by ::cuIpcOpenMemHandle + + \returns + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_MAP_FAILED, + ::CUDA_ERROR_INVALID_HANDLE, + ::CUDA_ERROR_INVALID_VALUE + \sa + ::cuMemAlloc, + ::cuMemFree, + ::cuIpcGetEventHandle, + ::cuIpcOpenEventHandle, + ::cuIpcGetMemHandle, + ::cuIpcOpenMemHandle, + ::cudaIpcCloseMemHandle*/ + fn cuIpcCloseMemHandle(dptr: cuda_types::CUdeviceptr) -> cuda_types::CUresult; + /** \brief Registers an existing host memory range for use by CUDA + + Page-locks the memory range specified by \p p and \p bytesize and maps it + for the device(s) as specified by \p Flags. This memory range also is added + to the same tracking mechanism as ::cuMemHostAlloc to automatically accelerate + calls to functions such as ::cuMemcpyHtoD(). Since the memory can be accessed + directly by the device, it can be read or written with much higher bandwidth + than pageable memory that has not been registered. Page-locking excessive + amounts of memory may degrade system performance, since it reduces the amount + of memory available to the system for paging. As a result, this function is + best used sparingly to register staging areas for data exchange between + host and device. + + On systems where ::CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES + is true, ::cuMemHostRegister will not page-lock the memory range specified + by \p ptr but only populate unpopulated pages. + + The \p Flags parameter enables different options to be specified that + affect the allocation, as follows. + + - ::CU_MEMHOSTREGISTER_PORTABLE: The memory returned by this call will be + considered as pinned memory by all CUDA contexts, not just the one that + performed the allocation. + + - ::CU_MEMHOSTREGISTER_DEVICEMAP: Maps the allocation into the CUDA address + space. The device pointer to the memory may be obtained by calling + ::cuMemHostGetDevicePointer(). + + - ::CU_MEMHOSTREGISTER_IOMEMORY: The pointer is treated as pointing to some + I/O memory space, e.g. the PCI Express resource of a 3rd party device. + + - ::CU_MEMHOSTREGISTER_READ_ONLY: The pointer is treated as pointing to memory + that is considered read-only by the device. On platforms without + ::CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES, this flag is + required in order to register memory mapped to the CPU as read-only. Support + for the use of this flag can be queried from the device attribute + ::CU_DEVICE_ATTRIBUTE_READ_ONLY_HOST_REGISTER_SUPPORTED. Using this flag with + a current context associated with a device that does not have this attribute + set will cause ::cuMemHostRegister to error with CUDA_ERROR_NOT_SUPPORTED. + + All of these flags are orthogonal to one another: a developer may page-lock + memory that is portable or mapped with no restrictions. + + The ::CU_MEMHOSTREGISTER_DEVICEMAP flag may be specified on CUDA contexts for + devices that do not support mapped pinned memory. The failure is deferred + to ::cuMemHostGetDevicePointer() because the memory may be mapped into + other CUDA contexts via the ::CU_MEMHOSTREGISTER_PORTABLE flag. + + For devices that have a non-zero value for the device attribute + ::CU_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM, the memory + can also be accessed from the device using the host pointer \p p. + The device pointer returned by ::cuMemHostGetDevicePointer() may or may not + match the original host pointer \p ptr and depends on the devices visible to the + application. If all devices visible to the application have a non-zero value for the + device attribute, the device pointer returned by ::cuMemHostGetDevicePointer() + will match the original pointer \p ptr. If any device visible to the application + has a zero value for the device attribute, the device pointer returned by + ::cuMemHostGetDevicePointer() will not match the original host pointer \p ptr, + but it will be suitable for use on all devices provided Unified Virtual Addressing + is enabled. In such systems, it is valid to access the memory using either pointer + on devices that have a non-zero value for the device attribute. Note however that + such devices should access the memory using only of the two pointers and not both. + + The memory page-locked by this function must be unregistered with + ::cuMemHostUnregister(). + + \param p - Host pointer to memory to page-lock + \param bytesize - Size in bytes of the address range to page-lock + \param Flags - Flags for allocation request + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_OUT_OF_MEMORY, + ::CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED, + ::CUDA_ERROR_NOT_PERMITTED, + ::CUDA_ERROR_NOT_SUPPORTED + \notefnerr + + \sa + ::cuMemHostUnregister, + ::cuMemHostGetFlags, + ::cuMemHostGetDevicePointer, + ::cudaHostRegister*/ + fn cuMemHostRegister_v2( + p: *mut ::core::ffi::c_void, + bytesize: usize, + Flags: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + /** \brief Unregisters a memory range that was registered with cuMemHostRegister. + + Unmaps the memory range whose base address is specified by \p p, and makes + it pageable again. + + The base address must be the same one specified to ::cuMemHostRegister(). + + \param p - Host pointer to memory to unregister + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_OUT_OF_MEMORY, + ::CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED, + \notefnerr + + \sa + ::cuMemHostRegister, + ::cudaHostUnregister*/ + fn cuMemHostUnregister(p: *mut ::core::ffi::c_void) -> cuda_types::CUresult; + /** \brief Copies memory + + Copies data between two pointers. + \p dst and \p src are base pointers of the destination and source, respectively. + \p ByteCount specifies the number of bytes to copy. + Note that this function infers the type of the transfer (host to host, host to + device, device to device, or device to host) from the pointer values. This + function is only allowed in contexts which support unified addressing. + + \param dst - Destination unified virtual address space pointer + \param src - Source unified virtual address space pointer + \param ByteCount - Size of memory copy in bytes + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE + \notefnerr + \note_sync + \note_memcpy + + \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, + ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost, + ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned, + ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, + ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, + ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync, + ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, + ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, + ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16, + ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32, + ::cudaMemcpy, + ::cudaMemcpyToSymbol, + ::cudaMemcpyFromSymbol*/ + fn cuMemcpy_ptds( + dst: cuda_types::CUdeviceptr, + src: cuda_types::CUdeviceptr, + ByteCount: usize, + ) -> cuda_types::CUresult; + /** \brief Copies device memory between two contexts + + Copies from device memory in one context to device memory in another + context. \p dstDevice is the base device pointer of the destination memory + and \p dstContext is the destination context. \p srcDevice is the base + device pointer of the source memory and \p srcContext is the source pointer. + \p ByteCount specifies the number of bytes to copy. + + \param dstDevice - Destination device pointer + \param dstContext - Destination context + \param srcDevice - Source device pointer + \param srcContext - Source context + \param ByteCount - Size of memory copy in bytes + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE + \notefnerr + \note_sync + + \sa ::cuMemcpyDtoD, ::cuMemcpy3DPeer, ::cuMemcpyDtoDAsync, ::cuMemcpyPeerAsync, + ::cuMemcpy3DPeerAsync, + ::cudaMemcpyPeer*/ + fn cuMemcpyPeer_ptds( + dstDevice: cuda_types::CUdeviceptr, + dstContext: cuda_types::CUcontext, + srcDevice: cuda_types::CUdeviceptr, + srcContext: cuda_types::CUcontext, + ByteCount: usize, + ) -> cuda_types::CUresult; + /** \brief Copies memory from Host to Device + + Copies from host memory to device memory. \p dstDevice and \p srcHost are + the base addresses of the destination and source, respectively. \p ByteCount + specifies the number of bytes to copy. + + \param dstDevice - Destination device pointer + \param srcHost - Source host pointer + \param ByteCount - Size of memory copy in bytes + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE + \notefnerr + \note_sync + \note_memcpy + + \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, + ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost, + ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned, + ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, + ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync, + ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync, + ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, + ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, + ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16, + ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32, + ::cudaMemcpy, + ::cudaMemcpyToSymbol*/ + fn cuMemcpyHtoD_v2_ptds( + dstDevice: cuda_types::CUdeviceptr, + srcHost: *const ::core::ffi::c_void, + ByteCount: usize, + ) -> cuda_types::CUresult; + /** \brief Copies memory from Device to Host + + Copies from device to host memory. \p dstHost and \p srcDevice specify the + base pointers of the destination and source, respectively. \p ByteCount + specifies the number of bytes to copy. + + \param dstHost - Destination host pointer + \param srcDevice - Source device pointer + \param ByteCount - Size of memory copy in bytes + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE + \notefnerr + \note_sync + \note_memcpy + + \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, + ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost, + ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned, + ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, + ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync, + ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync, + ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, + ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, + ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16, + ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32, + ::cudaMemcpy, + ::cudaMemcpyFromSymbol*/ + fn cuMemcpyDtoH_v2_ptds( + dstHost: *mut ::core::ffi::c_void, + srcDevice: cuda_types::CUdeviceptr, + ByteCount: usize, + ) -> cuda_types::CUresult; + /** \brief Copies memory from Device to Device + + Copies from device memory to device memory. \p dstDevice and \p srcDevice + are the base pointers of the destination and source, respectively. + \p ByteCount specifies the number of bytes to copy. + + \param dstDevice - Destination device pointer + \param srcDevice - Source device pointer + \param ByteCount - Size of memory copy in bytes + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE + \notefnerr + \note_sync + + \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, + ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost, + ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned, + ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, + ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, + ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync, + ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, + ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, + ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16, + ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32, + ::cudaMemcpy, + ::cudaMemcpyToSymbol, + ::cudaMemcpyFromSymbol*/ + fn cuMemcpyDtoD_v2_ptds( + dstDevice: cuda_types::CUdeviceptr, + srcDevice: cuda_types::CUdeviceptr, + ByteCount: usize, + ) -> cuda_types::CUresult; + /** \brief Copies memory from Device to Array + + Copies from device memory to a 1D CUDA array. \p dstArray and \p dstOffset + specify the CUDA array handle and starting index of the destination data. + \p srcDevice specifies the base pointer of the source. \p ByteCount + specifies the number of bytes to copy. + + \param dstArray - Destination array + \param dstOffset - Offset in bytes of destination array + \param srcDevice - Source device pointer + \param ByteCount - Size of memory copy in bytes + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE + \notefnerr + \note_sync + + \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, + ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost, + ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned, + ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, + ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync, + ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync, + ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, + ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, + ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16, + ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32, + ::cudaMemcpyToArray*/ + fn cuMemcpyDtoA_v2_ptds( + dstArray: cuda_types::CUarray, + dstOffset: usize, + srcDevice: cuda_types::CUdeviceptr, + ByteCount: usize, + ) -> cuda_types::CUresult; + /** \brief Copies memory from Array to Device + + Copies from one 1D CUDA array to device memory. \p dstDevice specifies the + base pointer of the destination and must be naturally aligned with the CUDA + array elements. \p srcArray and \p srcOffset specify the CUDA array handle + and the offset in bytes into the array where the copy is to begin. + \p ByteCount specifies the number of bytes to copy and must be evenly + divisible by the array element size. + + \param dstDevice - Destination device pointer + \param srcArray - Source array + \param srcOffset - Offset in bytes of source array + \param ByteCount - Size of memory copy in bytes + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE + \notefnerr + \note_sync + + \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, + ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost, + ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned, + ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, + ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync, + ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync, + ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, + ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, + ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16, + ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32, + ::cudaMemcpyFromArray*/ + fn cuMemcpyAtoD_v2_ptds( + dstDevice: cuda_types::CUdeviceptr, + srcArray: cuda_types::CUarray, + srcOffset: usize, + ByteCount: usize, + ) -> cuda_types::CUresult; + /** \brief Copies memory from Host to Array + + Copies from host memory to a 1D CUDA array. \p dstArray and \p dstOffset + specify the CUDA array handle and starting offset in bytes of the destination + data. \p pSrc specifies the base address of the source. \p ByteCount specifies + the number of bytes to copy. + + \param dstArray - Destination array + \param dstOffset - Offset in bytes of destination array + \param srcHost - Source host pointer + \param ByteCount - Size of memory copy in bytes + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE + \notefnerr + \note_sync + \note_memcpy + + \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, + ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost, + ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned, + ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, + ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync, + ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoAAsync, + ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, + ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, + ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16, + ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32, + ::cudaMemcpyToArray*/ + fn cuMemcpyHtoA_v2_ptds( + dstArray: cuda_types::CUarray, + dstOffset: usize, + srcHost: *const ::core::ffi::c_void, + ByteCount: usize, + ) -> cuda_types::CUresult; + /** \brief Copies memory from Array to Host + + Copies from one 1D CUDA array to host memory. \p dstHost specifies the base + pointer of the destination. \p srcArray and \p srcOffset specify the CUDA + array handle and starting offset in bytes of the source data. + \p ByteCount specifies the number of bytes to copy. + + \param dstHost - Destination device pointer + \param srcArray - Source array + \param srcOffset - Offset in bytes of source array + \param ByteCount - Size of memory copy in bytes + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE + \notefnerr + \note_sync + \note_memcpy + + \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, + ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost, + ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned, + ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, + ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync, + ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync, + ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, + ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, + ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16, + ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32, + ::cudaMemcpyFromArray*/ + fn cuMemcpyAtoH_v2_ptds( + dstHost: *mut ::core::ffi::c_void, + srcArray: cuda_types::CUarray, + srcOffset: usize, + ByteCount: usize, + ) -> cuda_types::CUresult; + /** \brief Copies memory from Array to Array + + Copies from one 1D CUDA array to another. \p dstArray and \p srcArray + specify the handles of the destination and source CUDA arrays for the copy, + respectively. \p dstOffset and \p srcOffset specify the destination and + source offsets in bytes into the CUDA arrays. \p ByteCount is the number of + bytes to be copied. The size of the elements in the CUDA arrays need not be + the same format, but the elements must be the same size; and count must be + evenly divisible by that size. + + \param dstArray - Destination array + \param dstOffset - Offset in bytes of destination array + \param srcArray - Source array + \param srcOffset - Offset in bytes of source array + \param ByteCount - Size of memory copy in bytes + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE + \notefnerr + \note_sync + + \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, + ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost, + ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned, + ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoD, + ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync, + ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync, + ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, + ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, + ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16, + ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32, + ::cudaMemcpyArrayToArray*/ + fn cuMemcpyAtoA_v2_ptds( + dstArray: cuda_types::CUarray, + dstOffset: usize, + srcArray: cuda_types::CUarray, + srcOffset: usize, + ByteCount: usize, + ) -> cuda_types::CUresult; + /** \brief Copies memory for 2D arrays + + Perform a 2D memory copy according to the parameters specified in \p pCopy. + The ::CUDA_MEMCPY2D structure is defined as: + + \code +typedef struct CUDA_MEMCPY2D_st { +unsigned int srcXInBytes, srcY; +CUmemorytype srcMemoryType; +const void *srcHost; +CUdeviceptr srcDevice; +CUarray srcArray; +unsigned int srcPitch; + +unsigned int dstXInBytes, dstY; +CUmemorytype dstMemoryType; +void *dstHost; +CUdeviceptr dstDevice; +CUarray dstArray; +unsigned int dstPitch; + +unsigned int WidthInBytes; +unsigned int Height; +} CUDA_MEMCPY2D; + \endcode + where: + - ::srcMemoryType and ::dstMemoryType specify the type of memory of the + source and destination, respectively; ::CUmemorytype_enum is defined as: + + \code +typedef enum CUmemorytype_enum { +CU_MEMORYTYPE_HOST = 0x01, +CU_MEMORYTYPE_DEVICE = 0x02, +CU_MEMORYTYPE_ARRAY = 0x03, +CU_MEMORYTYPE_UNIFIED = 0x04 +} CUmemorytype; + \endcode + + \par + If ::srcMemoryType is ::CU_MEMORYTYPE_UNIFIED, ::srcDevice and ::srcPitch + specify the (unified virtual address space) base address of the source data + and the bytes per row to apply. ::srcArray is ignored. + This value may be used only if unified addressing is supported in the calling + context. + + \par + If ::srcMemoryType is ::CU_MEMORYTYPE_HOST, ::srcHost and ::srcPitch + specify the (host) base address of the source data and the bytes per row to + apply. ::srcArray is ignored. + + \par + If ::srcMemoryType is ::CU_MEMORYTYPE_DEVICE, ::srcDevice and ::srcPitch + specify the (device) base address of the source data and the bytes per row + to apply. ::srcArray is ignored. + + \par + If ::srcMemoryType is ::CU_MEMORYTYPE_ARRAY, ::srcArray specifies the + handle of the source data. ::srcHost, ::srcDevice and ::srcPitch are + ignored. + + \par + If ::dstMemoryType is ::CU_MEMORYTYPE_HOST, ::dstHost and ::dstPitch + specify the (host) base address of the destination data and the bytes per + row to apply. ::dstArray is ignored. + + \par + If ::dstMemoryType is ::CU_MEMORYTYPE_UNIFIED, ::dstDevice and ::dstPitch + specify the (unified virtual address space) base address of the source data + and the bytes per row to apply. ::dstArray is ignored. + This value may be used only if unified addressing is supported in the calling + context. + + \par + If ::dstMemoryType is ::CU_MEMORYTYPE_DEVICE, ::dstDevice and ::dstPitch + specify the (device) base address of the destination data and the bytes per + row to apply. ::dstArray is ignored. + + \par + If ::dstMemoryType is ::CU_MEMORYTYPE_ARRAY, ::dstArray specifies the + handle of the destination data. ::dstHost, ::dstDevice and ::dstPitch are + ignored. + + - ::srcXInBytes and ::srcY specify the base address of the source data for + the copy. + + \par + For host pointers, the starting address is + \code +void* Start = (void*)((char*)srcHost+srcY*srcPitch + srcXInBytes); + \endcode + + \par + For device pointers, the starting address is + \code +CUdeviceptr Start = srcDevice+srcY*srcPitch+srcXInBytes; + \endcode + + \par + For CUDA arrays, ::srcXInBytes must be evenly divisible by the array + element size. + + - ::dstXInBytes and ::dstY specify the base address of the destination data + for the copy. + + \par + For host pointers, the base address is + \code +void* dstStart = (void*)((char*)dstHost+dstY*dstPitch + dstXInBytes); + \endcode + + \par + For device pointers, the starting address is + \code +CUdeviceptr dstStart = dstDevice+dstY*dstPitch+dstXInBytes; + \endcode + + \par + For CUDA arrays, ::dstXInBytes must be evenly divisible by the array + element size. + + - ::WidthInBytes and ::Height specify the width (in bytes) and height of + the 2D copy being performed. + - If specified, ::srcPitch must be greater than or equal to ::WidthInBytes + + ::srcXInBytes, and ::dstPitch must be greater than or equal to + ::WidthInBytes + dstXInBytes. + + \par + ::cuMemcpy2D() returns an error if any pitch is greater than the maximum + allowed (::CU_DEVICE_ATTRIBUTE_MAX_PITCH). ::cuMemAllocPitch() passes back + pitches that always work with ::cuMemcpy2D(). On intra-device memory copies + (device to device, CUDA array to device, CUDA array to CUDA array), + ::cuMemcpy2D() may fail for pitches not computed by ::cuMemAllocPitch(). + ::cuMemcpy2DUnaligned() does not have this restriction, but may run + significantly slower in the cases where ::cuMemcpy2D() would have returned + an error code. + + \param pCopy - Parameters for the memory copy + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE + \notefnerr + \note_sync + + \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, + ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost, + ::cuMemAllocPitch, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned, + ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, + ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync, + ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync, + ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, + ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, + ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16, + ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32, + ::cudaMemcpy2D, + ::cudaMemcpy2DToArray, + ::cudaMemcpy2DFromArray*/ + fn cuMemcpy2D_v2_ptds( + pCopy: *const cuda_types::CUDA_MEMCPY2D, + ) -> cuda_types::CUresult; + /** \brief Copies memory for 2D arrays + + Perform a 2D memory copy according to the parameters specified in \p pCopy. + The ::CUDA_MEMCPY2D structure is defined as: + + \code +typedef struct CUDA_MEMCPY2D_st { +unsigned int srcXInBytes, srcY; +CUmemorytype srcMemoryType; +const void *srcHost; +CUdeviceptr srcDevice; +CUarray srcArray; +unsigned int srcPitch; +unsigned int dstXInBytes, dstY; +CUmemorytype dstMemoryType; +void *dstHost; +CUdeviceptr dstDevice; +CUarray dstArray; +unsigned int dstPitch; +unsigned int WidthInBytes; +unsigned int Height; +} CUDA_MEMCPY2D; + \endcode + where: + - ::srcMemoryType and ::dstMemoryType specify the type of memory of the + source and destination, respectively; ::CUmemorytype_enum is defined as: + + \code +typedef enum CUmemorytype_enum { +CU_MEMORYTYPE_HOST = 0x01, +CU_MEMORYTYPE_DEVICE = 0x02, +CU_MEMORYTYPE_ARRAY = 0x03, +CU_MEMORYTYPE_UNIFIED = 0x04 +} CUmemorytype; + \endcode + + \par + If ::srcMemoryType is ::CU_MEMORYTYPE_UNIFIED, ::srcDevice and ::srcPitch + specify the (unified virtual address space) base address of the source data + and the bytes per row to apply. ::srcArray is ignored. + This value may be used only if unified addressing is supported in the calling + context. + + \par + If ::srcMemoryType is ::CU_MEMORYTYPE_HOST, ::srcHost and ::srcPitch + specify the (host) base address of the source data and the bytes per row to + apply. ::srcArray is ignored. + + \par + If ::srcMemoryType is ::CU_MEMORYTYPE_DEVICE, ::srcDevice and ::srcPitch + specify the (device) base address of the source data and the bytes per row + to apply. ::srcArray is ignored. + + \par + If ::srcMemoryType is ::CU_MEMORYTYPE_ARRAY, ::srcArray specifies the + handle of the source data. ::srcHost, ::srcDevice and ::srcPitch are + ignored. + + \par + If ::dstMemoryType is ::CU_MEMORYTYPE_UNIFIED, ::dstDevice and ::dstPitch + specify the (unified virtual address space) base address of the source data + and the bytes per row to apply. ::dstArray is ignored. + This value may be used only if unified addressing is supported in the calling + context. + + \par + If ::dstMemoryType is ::CU_MEMORYTYPE_HOST, ::dstHost and ::dstPitch + specify the (host) base address of the destination data and the bytes per + row to apply. ::dstArray is ignored. + + \par + If ::dstMemoryType is ::CU_MEMORYTYPE_DEVICE, ::dstDevice and ::dstPitch + specify the (device) base address of the destination data and the bytes per + row to apply. ::dstArray is ignored. + + \par + If ::dstMemoryType is ::CU_MEMORYTYPE_ARRAY, ::dstArray specifies the + handle of the destination data. ::dstHost, ::dstDevice and ::dstPitch are + ignored. + + - ::srcXInBytes and ::srcY specify the base address of the source data for + the copy. + + \par + For host pointers, the starting address is + \code +void* Start = (void*)((char*)srcHost+srcY*srcPitch + srcXInBytes); + \endcode + + \par + For device pointers, the starting address is + \code +CUdeviceptr Start = srcDevice+srcY*srcPitch+srcXInBytes; + \endcode + + \par + For CUDA arrays, ::srcXInBytes must be evenly divisible by the array + element size. + + - ::dstXInBytes and ::dstY specify the base address of the destination data + for the copy. + + \par + For host pointers, the base address is + \code +void* dstStart = (void*)((char*)dstHost+dstY*dstPitch + dstXInBytes); + \endcode + + \par + For device pointers, the starting address is + \code +CUdeviceptr dstStart = dstDevice+dstY*dstPitch+dstXInBytes; + \endcode + + \par + For CUDA arrays, ::dstXInBytes must be evenly divisible by the array + element size. + + - ::WidthInBytes and ::Height specify the width (in bytes) and height of + the 2D copy being performed. + - If specified, ::srcPitch must be greater than or equal to ::WidthInBytes + + ::srcXInBytes, and ::dstPitch must be greater than or equal to + ::WidthInBytes + dstXInBytes. + + \par + ::cuMemcpy2D() returns an error if any pitch is greater than the maximum + allowed (::CU_DEVICE_ATTRIBUTE_MAX_PITCH). ::cuMemAllocPitch() passes back + pitches that always work with ::cuMemcpy2D(). On intra-device memory copies + (device to device, CUDA array to device, CUDA array to CUDA array), + ::cuMemcpy2D() may fail for pitches not computed by ::cuMemAllocPitch(). + ::cuMemcpy2DUnaligned() does not have this restriction, but may run + significantly slower in the cases where ::cuMemcpy2D() would have returned + an error code. + + \param pCopy - Parameters for the memory copy + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE + \notefnerr + \note_sync + + \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, + ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost, + ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, + ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, + ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync, + ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync, + ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, + ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, + ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16, + ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32, + ::cudaMemcpy2D, + ::cudaMemcpy2DToArray, + ::cudaMemcpy2DFromArray*/ + fn cuMemcpy2DUnaligned_v2_ptds( + pCopy: *const cuda_types::CUDA_MEMCPY2D, + ) -> cuda_types::CUresult; + /** \brief Copies memory for 3D arrays + + Perform a 3D memory copy according to the parameters specified in + \p pCopy. The ::CUDA_MEMCPY3D structure is defined as: + + \code +typedef struct CUDA_MEMCPY3D_st { + +unsigned int srcXInBytes, srcY, srcZ; +unsigned int srcLOD; +CUmemorytype srcMemoryType; +const void *srcHost; +CUdeviceptr srcDevice; +CUarray srcArray; +unsigned int srcPitch; // ignored when src is array +unsigned int srcHeight; // ignored when src is array; may be 0 if Depth==1 + +unsigned int dstXInBytes, dstY, dstZ; +unsigned int dstLOD; +CUmemorytype dstMemoryType; +void *dstHost; +CUdeviceptr dstDevice; +CUarray dstArray; +unsigned int dstPitch; // ignored when dst is array +unsigned int dstHeight; // ignored when dst is array; may be 0 if Depth==1 + +unsigned int WidthInBytes; +unsigned int Height; +unsigned int Depth; +} CUDA_MEMCPY3D; + \endcode + where: + - ::srcMemoryType and ::dstMemoryType specify the type of memory of the + source and destination, respectively; ::CUmemorytype_enum is defined as: + + \code +typedef enum CUmemorytype_enum { +CU_MEMORYTYPE_HOST = 0x01, +CU_MEMORYTYPE_DEVICE = 0x02, +CU_MEMORYTYPE_ARRAY = 0x03, +CU_MEMORYTYPE_UNIFIED = 0x04 +} CUmemorytype; + \endcode + + \par + If ::srcMemoryType is ::CU_MEMORYTYPE_UNIFIED, ::srcDevice and ::srcPitch + specify the (unified virtual address space) base address of the source data + and the bytes per row to apply. ::srcArray is ignored. + This value may be used only if unified addressing is supported in the calling + context. + + \par + If ::srcMemoryType is ::CU_MEMORYTYPE_HOST, ::srcHost, ::srcPitch and + ::srcHeight specify the (host) base address of the source data, the bytes + per row, and the height of each 2D slice of the 3D array. ::srcArray is + ignored. + + \par + If ::srcMemoryType is ::CU_MEMORYTYPE_DEVICE, ::srcDevice, ::srcPitch and + ::srcHeight specify the (device) base address of the source data, the bytes + per row, and the height of each 2D slice of the 3D array. ::srcArray is + ignored. + + \par + If ::srcMemoryType is ::CU_MEMORYTYPE_ARRAY, ::srcArray specifies the + handle of the source data. ::srcHost, ::srcDevice, ::srcPitch and + ::srcHeight are ignored. + + \par + If ::dstMemoryType is ::CU_MEMORYTYPE_UNIFIED, ::dstDevice and ::dstPitch + specify the (unified virtual address space) base address of the source data + and the bytes per row to apply. ::dstArray is ignored. + This value may be used only if unified addressing is supported in the calling + context. + + \par + If ::dstMemoryType is ::CU_MEMORYTYPE_HOST, ::dstHost and ::dstPitch + specify the (host) base address of the destination data, the bytes per row, + and the height of each 2D slice of the 3D array. ::dstArray is ignored. + + \par + If ::dstMemoryType is ::CU_MEMORYTYPE_DEVICE, ::dstDevice and ::dstPitch + specify the (device) base address of the destination data, the bytes per + row, and the height of each 2D slice of the 3D array. ::dstArray is ignored. + + \par + If ::dstMemoryType is ::CU_MEMORYTYPE_ARRAY, ::dstArray specifies the + handle of the destination data. ::dstHost, ::dstDevice, ::dstPitch and + ::dstHeight are ignored. + + - ::srcXInBytes, ::srcY and ::srcZ specify the base address of the source + data for the copy. + + \par + For host pointers, the starting address is + \code +void* Start = (void*)((char*)srcHost+(srcZ*srcHeight+srcY)*srcPitch + srcXInBytes); + \endcode + + \par + For device pointers, the starting address is + \code +CUdeviceptr Start = srcDevice+(srcZ*srcHeight+srcY)*srcPitch+srcXInBytes; + \endcode + + \par + For CUDA arrays, ::srcXInBytes must be evenly divisible by the array + element size. + + - dstXInBytes, ::dstY and ::dstZ specify the base address of the + destination data for the copy. + + \par + For host pointers, the base address is + \code +void* dstStart = (void*)((char*)dstHost+(dstZ*dstHeight+dstY)*dstPitch + dstXInBytes); + \endcode + + \par + For device pointers, the starting address is + \code +CUdeviceptr dstStart = dstDevice+(dstZ*dstHeight+dstY)*dstPitch+dstXInBytes; + \endcode + + \par + For CUDA arrays, ::dstXInBytes must be evenly divisible by the array + element size. + + - ::WidthInBytes, ::Height and ::Depth specify the width (in bytes), height + and depth of the 3D copy being performed. + - If specified, ::srcPitch must be greater than or equal to ::WidthInBytes + + ::srcXInBytes, and ::dstPitch must be greater than or equal to + ::WidthInBytes + dstXInBytes. + - If specified, ::srcHeight must be greater than or equal to ::Height + + ::srcY, and ::dstHeight must be greater than or equal to ::Height + ::dstY. + + \par + ::cuMemcpy3D() returns an error if any pitch is greater than the maximum + allowed (::CU_DEVICE_ATTRIBUTE_MAX_PITCH). + + The ::srcLOD and ::dstLOD members of the ::CUDA_MEMCPY3D structure must be + set to 0. + + \param pCopy - Parameters for the memory copy + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE + \notefnerr + \note_sync + + \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, + ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost, + ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned, + ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, + ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync, + ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync, + ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, + ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, + ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16, + ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32, + ::cudaMemcpy3D*/ + fn cuMemcpy3D_v2_ptds( + pCopy: *const cuda_types::CUDA_MEMCPY3D, + ) -> cuda_types::CUresult; + /** \brief Copies memory between contexts + + Perform a 3D memory copy according to the parameters specified in + \p pCopy. See the definition of the ::CUDA_MEMCPY3D_PEER structure + for documentation of its parameters. + + \param pCopy - Parameters for the memory copy + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE + \notefnerr + \note_sync + + \sa ::cuMemcpyDtoD, ::cuMemcpyPeer, ::cuMemcpyDtoDAsync, ::cuMemcpyPeerAsync, + ::cuMemcpy3DPeerAsync, + ::cudaMemcpy3DPeer*/ + fn cuMemcpy3DPeer_ptds( + pCopy: *const cuda_types::CUDA_MEMCPY3D_PEER, + ) -> cuda_types::CUresult; + /** \brief Copies memory asynchronously + + Copies data between two pointers. + \p dst and \p src are base pointers of the destination and source, respectively. + \p ByteCount specifies the number of bytes to copy. + Note that this function infers the type of the transfer (host to host, host to + device, device to device, or device to host) from the pointer values. This + function is only allowed in contexts which support unified addressing. + + \param dst - Destination unified virtual address space pointer + \param src - Source unified virtual address space pointer + \param ByteCount - Size of memory copy in bytes + \param hStream - Stream identifier + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_HANDLE + \notefnerr + \note_async + \note_null_stream + \note_memcpy + + \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, + ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost, + ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned, + ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, + ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, + ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync, + ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, + ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, + ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async, + ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32Async, + ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async, + ::cuMemsetD32, ::cuMemsetD32Async, + ::cudaMemcpyAsync, + ::cudaMemcpyToSymbolAsync, + ::cudaMemcpyFromSymbolAsync*/ + fn cuMemcpyAsync_ptsz( + dst: cuda_types::CUdeviceptr, + src: cuda_types::CUdeviceptr, + ByteCount: usize, + hStream: cuda_types::CUstream, + ) -> cuda_types::CUresult; + /** \brief Copies device memory between two contexts asynchronously. + + Copies from device memory in one context to device memory in another + context. \p dstDevice is the base device pointer of the destination memory + and \p dstContext is the destination context. \p srcDevice is the base + device pointer of the source memory and \p srcContext is the source pointer. + \p ByteCount specifies the number of bytes to copy. + + \param dstDevice - Destination device pointer + \param dstContext - Destination context + \param srcDevice - Source device pointer + \param srcContext - Source context + \param ByteCount - Size of memory copy in bytes + \param hStream - Stream identifier + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_HANDLE + \notefnerr + \note_async + \note_null_stream + + \sa ::cuMemcpyDtoD, ::cuMemcpyPeer, ::cuMemcpy3DPeer, ::cuMemcpyDtoDAsync, + ::cuMemcpy3DPeerAsync, + ::cudaMemcpyPeerAsync*/ + fn cuMemcpyPeerAsync_ptsz( + dstDevice: cuda_types::CUdeviceptr, + dstContext: cuda_types::CUcontext, + srcDevice: cuda_types::CUdeviceptr, + srcContext: cuda_types::CUcontext, + ByteCount: usize, + hStream: cuda_types::CUstream, + ) -> cuda_types::CUresult; + /** \brief Copies memory from Host to Device + + Copies from host memory to device memory. \p dstDevice and \p srcHost are + the base addresses of the destination and source, respectively. \p ByteCount + specifies the number of bytes to copy. + + \param dstDevice - Destination device pointer + \param srcHost - Source host pointer + \param ByteCount - Size of memory copy in bytes + \param hStream - Stream identifier + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_HANDLE + \notefnerr + \note_async + \note_null_stream + \note_memcpy + + \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, + ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost, + ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned, + ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, + ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync, + ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync, + ::cuMemcpyHtoD, ::cuMemFree, ::cuMemFreeHost, + ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, + ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async, + ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32Async, + ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async, + ::cuMemsetD32, ::cuMemsetD32Async, + ::cudaMemcpyAsync, + ::cudaMemcpyToSymbolAsync*/ + fn cuMemcpyHtoDAsync_v2_ptsz( + dstDevice: cuda_types::CUdeviceptr, + srcHost: *const ::core::ffi::c_void, + ByteCount: usize, + hStream: cuda_types::CUstream, + ) -> cuda_types::CUresult; + /** \brief Copies memory from Device to Host + + Copies from device to host memory. \p dstHost and \p srcDevice specify the + base pointers of the destination and source, respectively. \p ByteCount + specifies the number of bytes to copy. + + \param dstHost - Destination host pointer + \param srcDevice - Source device pointer + \param ByteCount - Size of memory copy in bytes + \param hStream - Stream identifier + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_HANDLE + \notefnerr + \note_async + \note_null_stream + \note_memcpy + + \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, + ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost, + ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned, + ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, + ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync, + ::cuMemcpyDtoH, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync, + ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, + ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, + ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async, + ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32Async, + ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async, + ::cuMemsetD32, ::cuMemsetD32Async, + ::cudaMemcpyAsync, + ::cudaMemcpyFromSymbolAsync*/ + fn cuMemcpyDtoHAsync_v2_ptsz( + dstHost: *mut ::core::ffi::c_void, + srcDevice: cuda_types::CUdeviceptr, + ByteCount: usize, + hStream: cuda_types::CUstream, + ) -> cuda_types::CUresult; + /** \brief Copies memory from Device to Device + + Copies from device memory to device memory. \p dstDevice and \p srcDevice + are the base pointers of the destination and source, respectively. + \p ByteCount specifies the number of bytes to copy. + + \param dstDevice - Destination device pointer + \param srcDevice - Source device pointer + \param ByteCount - Size of memory copy in bytes + \param hStream - Stream identifier + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_HANDLE + \notefnerr + \note_async + \note_null_stream + + \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, + ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost, + ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned, + ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, + ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, + ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync, + ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, + ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, + ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async, + ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32Async, + ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async, + ::cuMemsetD32, ::cuMemsetD32Async, + ::cudaMemcpyAsync, + ::cudaMemcpyToSymbolAsync, + ::cudaMemcpyFromSymbolAsync*/ + fn cuMemcpyDtoDAsync_v2_ptsz( + dstDevice: cuda_types::CUdeviceptr, + srcDevice: cuda_types::CUdeviceptr, + ByteCount: usize, + hStream: cuda_types::CUstream, + ) -> cuda_types::CUresult; + /** \brief Copies memory from Host to Array + + Copies from host memory to a 1D CUDA array. \p dstArray and \p dstOffset + specify the CUDA array handle and starting offset in bytes of the + destination data. \p srcHost specifies the base address of the source. + \p ByteCount specifies the number of bytes to copy. + + \param dstArray - Destination array + \param dstOffset - Offset in bytes of destination array + \param srcHost - Source host pointer + \param ByteCount - Size of memory copy in bytes + \param hStream - Stream identifier + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_HANDLE + \notefnerr + \note_async + \note_null_stream + \note_memcpy + + \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, + ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost, + ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned, + ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, + ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync, + ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, + ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, + ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, + ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async, + ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32Async, + ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async, + ::cuMemsetD32, ::cuMemsetD32Async, + ::cudaMemcpyToArrayAsync*/ + fn cuMemcpyHtoAAsync_v2_ptsz( + dstArray: cuda_types::CUarray, + dstOffset: usize, + srcHost: *const ::core::ffi::c_void, + ByteCount: usize, + hStream: cuda_types::CUstream, + ) -> cuda_types::CUresult; + /** \brief Copies memory from Array to Host + + Copies from one 1D CUDA array to host memory. \p dstHost specifies the base + pointer of the destination. \p srcArray and \p srcOffset specify the CUDA + array handle and starting offset in bytes of the source data. + \p ByteCount specifies the number of bytes to copy. + + \param dstHost - Destination pointer + \param srcArray - Source array + \param srcOffset - Offset in bytes of source array + \param ByteCount - Size of memory copy in bytes + \param hStream - Stream identifier + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_HANDLE + \notefnerr + \note_async + \note_null_stream + \note_memcpy + + \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, + ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost, + ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned, + ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, + ::cuMemcpyAtoH, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync, + ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync, + ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, + ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, + ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async, + ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32Async, + ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async, + ::cuMemsetD32, ::cuMemsetD32Async, + ::cudaMemcpyFromArrayAsync*/ + fn cuMemcpyAtoHAsync_v2_ptsz( + dstHost: *mut ::core::ffi::c_void, + srcArray: cuda_types::CUarray, + srcOffset: usize, + ByteCount: usize, + hStream: cuda_types::CUstream, + ) -> cuda_types::CUresult; + /** \brief Copies memory for 2D arrays + + Perform a 2D memory copy according to the parameters specified in \p pCopy. + The ::CUDA_MEMCPY2D structure is defined as: + + \code +typedef struct CUDA_MEMCPY2D_st { +unsigned int srcXInBytes, srcY; +CUmemorytype srcMemoryType; +const void *srcHost; +CUdeviceptr srcDevice; +CUarray srcArray; +unsigned int srcPitch; +unsigned int dstXInBytes, dstY; +CUmemorytype dstMemoryType; +void *dstHost; +CUdeviceptr dstDevice; +CUarray dstArray; +unsigned int dstPitch; +unsigned int WidthInBytes; +unsigned int Height; +} CUDA_MEMCPY2D; + \endcode + where: + - ::srcMemoryType and ::dstMemoryType specify the type of memory of the + source and destination, respectively; ::CUmemorytype_enum is defined as: + + \code +typedef enum CUmemorytype_enum { +CU_MEMORYTYPE_HOST = 0x01, +CU_MEMORYTYPE_DEVICE = 0x02, +CU_MEMORYTYPE_ARRAY = 0x03, +CU_MEMORYTYPE_UNIFIED = 0x04 +} CUmemorytype; + \endcode + + \par + If ::srcMemoryType is ::CU_MEMORYTYPE_HOST, ::srcHost and ::srcPitch + specify the (host) base address of the source data and the bytes per row to + apply. ::srcArray is ignored. + + \par + If ::srcMemoryType is ::CU_MEMORYTYPE_UNIFIED, ::srcDevice and ::srcPitch + specify the (unified virtual address space) base address of the source data + and the bytes per row to apply. ::srcArray is ignored. + This value may be used only if unified addressing is supported in the calling + context. + + \par + If ::srcMemoryType is ::CU_MEMORYTYPE_DEVICE, ::srcDevice and ::srcPitch + specify the (device) base address of the source data and the bytes per row + to apply. ::srcArray is ignored. + + \par + If ::srcMemoryType is ::CU_MEMORYTYPE_ARRAY, ::srcArray specifies the + handle of the source data. ::srcHost, ::srcDevice and ::srcPitch are + ignored. + + \par + If ::dstMemoryType is ::CU_MEMORYTYPE_UNIFIED, ::dstDevice and ::dstPitch + specify the (unified virtual address space) base address of the source data + and the bytes per row to apply. ::dstArray is ignored. + This value may be used only if unified addressing is supported in the calling + context. + + \par + If ::dstMemoryType is ::CU_MEMORYTYPE_HOST, ::dstHost and ::dstPitch + specify the (host) base address of the destination data and the bytes per + row to apply. ::dstArray is ignored. + + \par + If ::dstMemoryType is ::CU_MEMORYTYPE_DEVICE, ::dstDevice and ::dstPitch + specify the (device) base address of the destination data and the bytes per + row to apply. ::dstArray is ignored. + + \par + If ::dstMemoryType is ::CU_MEMORYTYPE_ARRAY, ::dstArray specifies the + handle of the destination data. ::dstHost, ::dstDevice and ::dstPitch are + ignored. + + - ::srcXInBytes and ::srcY specify the base address of the source data for + the copy. + + \par + For host pointers, the starting address is + \code +void* Start = (void*)((char*)srcHost+srcY*srcPitch + srcXInBytes); + \endcode + + \par + For device pointers, the starting address is + \code +CUdeviceptr Start = srcDevice+srcY*srcPitch+srcXInBytes; + \endcode + + \par + For CUDA arrays, ::srcXInBytes must be evenly divisible by the array + element size. + + - ::dstXInBytes and ::dstY specify the base address of the destination data + for the copy. + + \par + For host pointers, the base address is + \code +void* dstStart = (void*)((char*)dstHost+dstY*dstPitch + dstXInBytes); + \endcode + + \par + For device pointers, the starting address is + \code +CUdeviceptr dstStart = dstDevice+dstY*dstPitch+dstXInBytes; + \endcode + + \par + For CUDA arrays, ::dstXInBytes must be evenly divisible by the array + element size. + + - ::WidthInBytes and ::Height specify the width (in bytes) and height of + the 2D copy being performed. + - If specified, ::srcPitch must be greater than or equal to ::WidthInBytes + + ::srcXInBytes, and ::dstPitch must be greater than or equal to + ::WidthInBytes + dstXInBytes. + - If specified, ::srcPitch must be greater than or equal to ::WidthInBytes + + ::srcXInBytes, and ::dstPitch must be greater than or equal to + ::WidthInBytes + dstXInBytes. + - If specified, ::srcHeight must be greater than or equal to ::Height + + ::srcY, and ::dstHeight must be greater than or equal to ::Height + ::dstY. + + \par + ::cuMemcpy2DAsync() returns an error if any pitch is greater than the maximum + allowed (::CU_DEVICE_ATTRIBUTE_MAX_PITCH). ::cuMemAllocPitch() passes back + pitches that always work with ::cuMemcpy2D(). On intra-device memory copies + (device to device, CUDA array to device, CUDA array to CUDA array), + ::cuMemcpy2DAsync() may fail for pitches not computed by ::cuMemAllocPitch(). + + \param pCopy - Parameters for the memory copy + \param hStream - Stream identifier + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_HANDLE + \notefnerr + \note_async + \note_null_stream + + \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, + ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost, + ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DUnaligned, + ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, + ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync, + ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync, + ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, + ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, + ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async, + ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32Async, + ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async, + ::cuMemsetD32, ::cuMemsetD32Async, + ::cudaMemcpy2DAsync, + ::cudaMemcpy2DToArrayAsync, + ::cudaMemcpy2DFromArrayAsync*/ + fn cuMemcpy2DAsync_v2_ptsz( + pCopy: *const cuda_types::CUDA_MEMCPY2D, + hStream: cuda_types::CUstream, + ) -> cuda_types::CUresult; + /** \brief Copies memory for 3D arrays + + Perform a 3D memory copy according to the parameters specified in + \p pCopy. The ::CUDA_MEMCPY3D structure is defined as: + + \code +typedef struct CUDA_MEMCPY3D_st { + +unsigned int srcXInBytes, srcY, srcZ; +unsigned int srcLOD; +CUmemorytype srcMemoryType; +const void *srcHost; +CUdeviceptr srcDevice; +CUarray srcArray; +unsigned int srcPitch; // ignored when src is array +unsigned int srcHeight; // ignored when src is array; may be 0 if Depth==1 + +unsigned int dstXInBytes, dstY, dstZ; +unsigned int dstLOD; +CUmemorytype dstMemoryType; +void *dstHost; +CUdeviceptr dstDevice; +CUarray dstArray; +unsigned int dstPitch; // ignored when dst is array +unsigned int dstHeight; // ignored when dst is array; may be 0 if Depth==1 + +unsigned int WidthInBytes; +unsigned int Height; +unsigned int Depth; +} CUDA_MEMCPY3D; + \endcode + where: + - ::srcMemoryType and ::dstMemoryType specify the type of memory of the + source and destination, respectively; ::CUmemorytype_enum is defined as: + + \code +typedef enum CUmemorytype_enum { +CU_MEMORYTYPE_HOST = 0x01, +CU_MEMORYTYPE_DEVICE = 0x02, +CU_MEMORYTYPE_ARRAY = 0x03, +CU_MEMORYTYPE_UNIFIED = 0x04 +} CUmemorytype; + \endcode + + \par + If ::srcMemoryType is ::CU_MEMORYTYPE_UNIFIED, ::srcDevice and ::srcPitch + specify the (unified virtual address space) base address of the source data + and the bytes per row to apply. ::srcArray is ignored. + This value may be used only if unified addressing is supported in the calling + context. + + \par + If ::srcMemoryType is ::CU_MEMORYTYPE_HOST, ::srcHost, ::srcPitch and + ::srcHeight specify the (host) base address of the source data, the bytes + per row, and the height of each 2D slice of the 3D array. ::srcArray is + ignored. + + \par + If ::srcMemoryType is ::CU_MEMORYTYPE_DEVICE, ::srcDevice, ::srcPitch and + ::srcHeight specify the (device) base address of the source data, the bytes + per row, and the height of each 2D slice of the 3D array. ::srcArray is + ignored. + + \par + If ::srcMemoryType is ::CU_MEMORYTYPE_ARRAY, ::srcArray specifies the + handle of the source data. ::srcHost, ::srcDevice, ::srcPitch and + ::srcHeight are ignored. + + \par + If ::dstMemoryType is ::CU_MEMORYTYPE_UNIFIED, ::dstDevice and ::dstPitch + specify the (unified virtual address space) base address of the source data + and the bytes per row to apply. ::dstArray is ignored. + This value may be used only if unified addressing is supported in the calling + context. + + \par + If ::dstMemoryType is ::CU_MEMORYTYPE_HOST, ::dstHost and ::dstPitch + specify the (host) base address of the destination data, the bytes per row, + and the height of each 2D slice of the 3D array. ::dstArray is ignored. + + \par + If ::dstMemoryType is ::CU_MEMORYTYPE_DEVICE, ::dstDevice and ::dstPitch + specify the (device) base address of the destination data, the bytes per + row, and the height of each 2D slice of the 3D array. ::dstArray is ignored. + + \par + If ::dstMemoryType is ::CU_MEMORYTYPE_ARRAY, ::dstArray specifies the + handle of the destination data. ::dstHost, ::dstDevice, ::dstPitch and + ::dstHeight are ignored. + + - ::srcXInBytes, ::srcY and ::srcZ specify the base address of the source + data for the copy. + + \par + For host pointers, the starting address is + \code +void* Start = (void*)((char*)srcHost+(srcZ*srcHeight+srcY)*srcPitch + srcXInBytes); + \endcode + + \par + For device pointers, the starting address is + \code +CUdeviceptr Start = srcDevice+(srcZ*srcHeight+srcY)*srcPitch+srcXInBytes; + \endcode + + \par + For CUDA arrays, ::srcXInBytes must be evenly divisible by the array + element size. + + - dstXInBytes, ::dstY and ::dstZ specify the base address of the + destination data for the copy. + + \par + For host pointers, the base address is + \code +void* dstStart = (void*)((char*)dstHost+(dstZ*dstHeight+dstY)*dstPitch + dstXInBytes); + \endcode + + \par + For device pointers, the starting address is + \code +CUdeviceptr dstStart = dstDevice+(dstZ*dstHeight+dstY)*dstPitch+dstXInBytes; + \endcode + + \par + For CUDA arrays, ::dstXInBytes must be evenly divisible by the array + element size. + + - ::WidthInBytes, ::Height and ::Depth specify the width (in bytes), height + and depth of the 3D copy being performed. + - If specified, ::srcPitch must be greater than or equal to ::WidthInBytes + + ::srcXInBytes, and ::dstPitch must be greater than or equal to + ::WidthInBytes + dstXInBytes. + - If specified, ::srcHeight must be greater than or equal to ::Height + + ::srcY, and ::dstHeight must be greater than or equal to ::Height + ::dstY. + + \par + ::cuMemcpy3DAsync() returns an error if any pitch is greater than the maximum + allowed (::CU_DEVICE_ATTRIBUTE_MAX_PITCH). + + The ::srcLOD and ::dstLOD members of the ::CUDA_MEMCPY3D structure must be + set to 0. + + \param pCopy - Parameters for the memory copy + \param hStream - Stream identifier + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_HANDLE + \notefnerr + \note_async + \note_null_stream + + \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, + ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost, + ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned, + ::cuMemcpy3D, ::cuMemcpyAtoA, ::cuMemcpyAtoD, + ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync, + ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync, + ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, + ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, + ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async, + ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32Async, + ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async, + ::cuMemsetD32, ::cuMemsetD32Async, + ::cudaMemcpy3DAsync*/ + fn cuMemcpy3DAsync_v2_ptsz( + pCopy: *const cuda_types::CUDA_MEMCPY3D, + hStream: cuda_types::CUstream, + ) -> cuda_types::CUresult; + /** \brief Copies memory between contexts asynchronously. + + Perform a 3D memory copy according to the parameters specified in + \p pCopy. See the definition of the ::CUDA_MEMCPY3D_PEER structure + for documentation of its parameters. + + \param pCopy - Parameters for the memory copy + \param hStream - Stream identifier + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE + \notefnerr + \note_async + \note_null_stream + + \sa ::cuMemcpyDtoD, ::cuMemcpyPeer, ::cuMemcpyDtoDAsync, ::cuMemcpyPeerAsync, + ::cuMemcpy3DPeerAsync, + ::cudaMemcpy3DPeerAsync*/ + fn cuMemcpy3DPeerAsync_ptsz( + pCopy: *const cuda_types::CUDA_MEMCPY3D_PEER, + hStream: cuda_types::CUstream, + ) -> cuda_types::CUresult; + /** \brief Initializes device memory + + Sets the memory range of \p N 8-bit values to the specified value + \p uc. + + \param dstDevice - Destination device pointer + \param uc - Value to set + \param N - Number of elements + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE + \notefnerr + \note_memset + + \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, + ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost, + ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned, + ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, + ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync, + ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync, + ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, + ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, + ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async, + ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32Async, + ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async, + ::cuMemsetD32, ::cuMemsetD32Async, + ::cudaMemset*/ + fn cuMemsetD8_v2_ptds( + dstDevice: cuda_types::CUdeviceptr, + uc: ::core::ffi::c_uchar, + N: usize, + ) -> cuda_types::CUresult; + /** \brief Initializes device memory + + Sets the memory range of \p N 16-bit values to the specified value + \p us. The \p dstDevice pointer must be two byte aligned. + + \param dstDevice - Destination device pointer + \param us - Value to set + \param N - Number of elements + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE + \notefnerr + \note_memset + + \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, + ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost, + ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned, + ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, + ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync, + ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync, + ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, + ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, + ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async, + ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32Async, + ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16Async, + ::cuMemsetD32, ::cuMemsetD32Async, + ::cudaMemset*/ + fn cuMemsetD16_v2_ptds( + dstDevice: cuda_types::CUdeviceptr, + us: ::core::ffi::c_ushort, + N: usize, + ) -> cuda_types::CUresult; + /** \brief Initializes device memory + + Sets the memory range of \p N 32-bit values to the specified value + \p ui. The \p dstDevice pointer must be four byte aligned. + + \param dstDevice - Destination device pointer + \param ui - Value to set + \param N - Number of elements + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE + \notefnerr + \note_memset + + \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, + ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost, + ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned, + ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, + ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync, + ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync, + ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, + ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, + ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async, + ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32Async, + ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async, + ::cuMemsetD32Async, + ::cudaMemset*/ + fn cuMemsetD32_v2_ptds( + dstDevice: cuda_types::CUdeviceptr, + ui: ::core::ffi::c_uint, + N: usize, + ) -> cuda_types::CUresult; + /** \brief Initializes device memory + + Sets the 2D memory range of \p Width 8-bit values to the specified value + \p uc. \p Height specifies the number of rows to set, and \p dstPitch + specifies the number of bytes between each row. This function performs + fastest when the pitch is one that has been passed back by + ::cuMemAllocPitch(). + + \param dstDevice - Destination device pointer + \param dstPitch - Pitch of destination device pointer(Unused if \p Height is 1) + \param uc - Value to set + \param Width - Width of row + \param Height - Number of rows + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE + \notefnerr + \note_memset + + \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, + ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost, + ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned, + ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, + ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync, + ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync, + ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, + ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, + ::cuMemHostGetDevicePointer, ::cuMemsetD2D8Async, + ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32Async, + ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async, + ::cuMemsetD32, ::cuMemsetD32Async, + ::cudaMemset2D*/ + fn cuMemsetD2D8_v2_ptds( + dstDevice: cuda_types::CUdeviceptr, + dstPitch: usize, + uc: ::core::ffi::c_uchar, + Width: usize, + Height: usize, + ) -> cuda_types::CUresult; + /** \brief Initializes device memory + + Sets the 2D memory range of \p Width 16-bit values to the specified value + \p us. \p Height specifies the number of rows to set, and \p dstPitch + specifies the number of bytes between each row. The \p dstDevice pointer + and \p dstPitch offset must be two byte aligned. This function performs + fastest when the pitch is one that has been passed back by + ::cuMemAllocPitch(). + + \param dstDevice - Destination device pointer + \param dstPitch - Pitch of destination device pointer(Unused if \p Height is 1) + \param us - Value to set + \param Width - Width of row + \param Height - Number of rows + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE + \notefnerr + \note_memset + + \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, + ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost, + ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned, + ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, + ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync, + ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync, + ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, + ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, + ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async, + ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32Async, + ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async, + ::cuMemsetD32, ::cuMemsetD32Async, + ::cudaMemset2D*/ + fn cuMemsetD2D16_v2_ptds( + dstDevice: cuda_types::CUdeviceptr, + dstPitch: usize, + us: ::core::ffi::c_ushort, + Width: usize, + Height: usize, + ) -> cuda_types::CUresult; + /** \brief Initializes device memory + + Sets the 2D memory range of \p Width 32-bit values to the specified value + \p ui. \p Height specifies the number of rows to set, and \p dstPitch + specifies the number of bytes between each row. The \p dstDevice pointer + and \p dstPitch offset must be four byte aligned. This function performs + fastest when the pitch is one that has been passed back by + ::cuMemAllocPitch(). + + \param dstDevice - Destination device pointer + \param dstPitch - Pitch of destination device pointer(Unused if \p Height is 1) + \param ui - Value to set + \param Width - Width of row + \param Height - Number of rows + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE + \notefnerr + \note_memset + + \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, + ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost, + ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned, + ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, + ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync, + ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync, + ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, + ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, + ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async, + ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32Async, + ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async, + ::cuMemsetD32, ::cuMemsetD32Async, + ::cudaMemset2D*/ + fn cuMemsetD2D32_v2_ptds( + dstDevice: cuda_types::CUdeviceptr, + dstPitch: usize, + ui: ::core::ffi::c_uint, + Width: usize, + Height: usize, + ) -> cuda_types::CUresult; + /** \brief Sets device memory + + Sets the memory range of \p N 8-bit values to the specified value + \p uc. + + \param dstDevice - Destination device pointer + \param uc - Value to set + \param N - Number of elements + \param hStream - Stream identifier + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE + \notefnerr + \note_memset + \note_null_stream + + \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, + ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost, + ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned, + ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, + ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync, + ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync, + ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, + ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, + ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async, + ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32Async, + ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD16Async, + ::cuMemsetD32, ::cuMemsetD32Async, + ::cudaMemsetAsync*/ + fn cuMemsetD8Async_ptsz( + dstDevice: cuda_types::CUdeviceptr, + uc: ::core::ffi::c_uchar, + N: usize, + hStream: cuda_types::CUstream, + ) -> cuda_types::CUresult; + /** \brief Sets device memory + + Sets the memory range of \p N 16-bit values to the specified value + \p us. The \p dstDevice pointer must be two byte aligned. + + \param dstDevice - Destination device pointer + \param us - Value to set + \param N - Number of elements + \param hStream - Stream identifier + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE + \notefnerr + \note_memset + \note_null_stream + + \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, + ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost, + ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned, + ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, + ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync, + ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync, + ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, + ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, + ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async, + ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32Async, + ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, + ::cuMemsetD32, ::cuMemsetD32Async, + ::cudaMemsetAsync*/ + fn cuMemsetD16Async_ptsz( + dstDevice: cuda_types::CUdeviceptr, + us: ::core::ffi::c_ushort, + N: usize, + hStream: cuda_types::CUstream, + ) -> cuda_types::CUresult; + /** \brief Sets device memory + + Sets the memory range of \p N 32-bit values to the specified value + \p ui. The \p dstDevice pointer must be four byte aligned. + + \param dstDevice - Destination device pointer + \param ui - Value to set + \param N - Number of elements + \param hStream - Stream identifier + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE + \notefnerr + \note_memset + \note_null_stream + + \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, + ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost, + ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned, + ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, + ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync, + ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync, + ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, + ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, + ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async, + ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32Async, + ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async, ::cuMemsetD32, + ::cudaMemsetAsync*/ + fn cuMemsetD32Async_ptsz( + dstDevice: cuda_types::CUdeviceptr, + ui: ::core::ffi::c_uint, + N: usize, + hStream: cuda_types::CUstream, + ) -> cuda_types::CUresult; + /** \brief Sets device memory + + Sets the 2D memory range of \p Width 8-bit values to the specified value + \p uc. \p Height specifies the number of rows to set, and \p dstPitch + specifies the number of bytes between each row. This function performs + fastest when the pitch is one that has been passed back by + ::cuMemAllocPitch(). + + \param dstDevice - Destination device pointer + \param dstPitch - Pitch of destination device pointer(Unused if \p Height is 1) + \param uc - Value to set + \param Width - Width of row + \param Height - Number of rows + \param hStream - Stream identifier + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE + \notefnerr + \note_memset + \note_null_stream + + \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, + ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost, + ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned, + ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, + ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync, + ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync, + ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, + ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, + ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, + ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, ::cuMemsetD2D32Async, + ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async, + ::cuMemsetD32, ::cuMemsetD32Async, + ::cudaMemset2DAsync*/ + fn cuMemsetD2D8Async_ptsz( + dstDevice: cuda_types::CUdeviceptr, + dstPitch: usize, + uc: ::core::ffi::c_uchar, + Width: usize, + Height: usize, + hStream: cuda_types::CUstream, + ) -> cuda_types::CUresult; + /** \brief Sets device memory + + Sets the 2D memory range of \p Width 16-bit values to the specified value + \p us. \p Height specifies the number of rows to set, and \p dstPitch + specifies the number of bytes between each row. The \p dstDevice pointer + and \p dstPitch offset must be two byte aligned. This function performs + fastest when the pitch is one that has been passed back by + ::cuMemAllocPitch(). + + \param dstDevice - Destination device pointer + \param dstPitch - Pitch of destination device pointer(Unused if \p Height is 1) + \param us - Value to set + \param Width - Width of row + \param Height - Number of rows + \param hStream - Stream identifier + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE + \notefnerr + \note_memset + \note_null_stream + + \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, + ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost, + ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned, + ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, + ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync, + ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync, + ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, + ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, + ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async, + ::cuMemsetD2D16, ::cuMemsetD2D32, ::cuMemsetD2D32Async, + ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async, + ::cuMemsetD32, ::cuMemsetD32Async, + ::cudaMemset2DAsync*/ + fn cuMemsetD2D16Async_ptsz( + dstDevice: cuda_types::CUdeviceptr, + dstPitch: usize, + us: ::core::ffi::c_ushort, + Width: usize, + Height: usize, + hStream: cuda_types::CUstream, + ) -> cuda_types::CUresult; + /** \brief Sets device memory + + Sets the 2D memory range of \p Width 32-bit values to the specified value + \p ui. \p Height specifies the number of rows to set, and \p dstPitch + specifies the number of bytes between each row. The \p dstDevice pointer + and \p dstPitch offset must be four byte aligned. This function performs + fastest when the pitch is one that has been passed back by + ::cuMemAllocPitch(). + + \param dstDevice - Destination device pointer + \param dstPitch - Pitch of destination device pointer(Unused if \p Height is 1) + \param ui - Value to set + \param Width - Width of row + \param Height - Number of rows + \param hStream - Stream identifier + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE + \notefnerr + \note_memset + \note_null_stream + + \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, + ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost, + ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned, + ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, + ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync, + ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync, + ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, + ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, + ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D8Async, + ::cuMemsetD2D16, ::cuMemsetD2D16Async, ::cuMemsetD2D32, + ::cuMemsetD8, ::cuMemsetD8Async, ::cuMemsetD16, ::cuMemsetD16Async, + ::cuMemsetD32, ::cuMemsetD32Async, + ::cudaMemset2DAsync*/ + fn cuMemsetD2D32Async_ptsz( + dstDevice: cuda_types::CUdeviceptr, + dstPitch: usize, + ui: ::core::ffi::c_uint, + Width: usize, + Height: usize, + hStream: cuda_types::CUstream, + ) -> cuda_types::CUresult; + /** \brief Creates a 1D or 2D CUDA array + + Creates a CUDA array according to the ::CUDA_ARRAY_DESCRIPTOR structure + \p pAllocateArray and returns a handle to the new CUDA array in \p *pHandle. + The ::CUDA_ARRAY_DESCRIPTOR is defined as: + + \code +typedef struct { +unsigned int Width; +unsigned int Height; +CUarray_format Format; +unsigned int NumChannels; +} CUDA_ARRAY_DESCRIPTOR; + \endcode + where: + + - \p Width, and \p Height are the width, and height of the CUDA array (in + elements); the CUDA array is one-dimensional if height is 0, two-dimensional + otherwise; + - ::Format specifies the format of the elements; ::CUarray_format is + defined as: + \code +typedef enum CUarray_format_enum { +CU_AD_FORMAT_UNSIGNED_INT8 = 0x01, +CU_AD_FORMAT_UNSIGNED_INT16 = 0x02, +CU_AD_FORMAT_UNSIGNED_INT32 = 0x03, +CU_AD_FORMAT_SIGNED_INT8 = 0x08, +CU_AD_FORMAT_SIGNED_INT16 = 0x09, +CU_AD_FORMAT_SIGNED_INT32 = 0x0a, +CU_AD_FORMAT_HALF = 0x10, +CU_AD_FORMAT_FLOAT = 0x20 +} CUarray_format; + \endcode + - \p NumChannels specifies the number of packed components per CUDA array + element; it may be 1, 2, or 4; + + Here are examples of CUDA array descriptions: + + Description for a CUDA array of 2048 floats: + \code +CUDA_ARRAY_DESCRIPTOR desc; +desc.Format = CU_AD_FORMAT_FLOAT; +desc.NumChannels = 1; +desc.Width = 2048; +desc.Height = 1; + \endcode + + Description for a 64 x 64 CUDA array of floats: + \code +CUDA_ARRAY_DESCRIPTOR desc; +desc.Format = CU_AD_FORMAT_FLOAT; +desc.NumChannels = 1; +desc.Width = 64; +desc.Height = 64; + \endcode + + Description for a \p width x \p height CUDA array of 64-bit, 4x16-bit + float16's: + \code +CUDA_ARRAY_DESCRIPTOR desc; +desc.Format = CU_AD_FORMAT_HALF; +desc.NumChannels = 4; +desc.Width = width; +desc.Height = height; + \endcode + + Description for a \p width x \p height CUDA array of 16-bit elements, each + of which is two 8-bit unsigned chars: + \code +CUDA_ARRAY_DESCRIPTOR arrayDesc; +desc.Format = CU_AD_FORMAT_UNSIGNED_INT8; +desc.NumChannels = 2; +desc.Width = width; +desc.Height = height; + \endcode + + \param pHandle - Returned array + \param pAllocateArray - Array descriptor + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_OUT_OF_MEMORY, + ::CUDA_ERROR_UNKNOWN + \notefnerr + + \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, + ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost, + ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned, + ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, + ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync, + ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync, + ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, + ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, + ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16, + ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32, + ::cudaMallocArray*/ + fn cuArrayCreate_v2( + pHandle: *mut cuda_types::CUarray, + pAllocateArray: *const cuda_types::CUDA_ARRAY_DESCRIPTOR, + ) -> cuda_types::CUresult; + /** \brief Get a 1D or 2D CUDA array descriptor + + Returns in \p *pArrayDescriptor a descriptor containing information on the + format and dimensions of the CUDA array \p hArray. It is useful for + subroutines that have been passed a CUDA array, but need to know the CUDA + array parameters for validation or other purposes. + + \param pArrayDescriptor - Returned array descriptor + \param hArray - Array to get descriptor of + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_HANDLE + \notefnerr + + \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, + ::cuArrayDestroy, ::cuMemAlloc, ::cuMemAllocHost, + ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned, + ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, + ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync, + ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync, + ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, + ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, + ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16, + ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32, + ::cudaArrayGetInfo*/ + fn cuArrayGetDescriptor_v2( + pArrayDescriptor: *mut cuda_types::CUDA_ARRAY_DESCRIPTOR, + hArray: cuda_types::CUarray, + ) -> cuda_types::CUresult; + /** \brief Returns the layout properties of a sparse CUDA array + + Returns the layout properties of a sparse CUDA array in \p sparseProperties + If the CUDA array is not allocated with flag ::CUDA_ARRAY3D_SPARSE + ::CUDA_ERROR_INVALID_VALUE will be returned. + + If the returned value in ::CUDA_ARRAY_SPARSE_PROPERTIES::flags contains ::CU_ARRAY_SPARSE_PROPERTIES_SINGLE_MIPTAIL, + then ::CUDA_ARRAY_SPARSE_PROPERTIES::miptailSize represents the total size of the array. Otherwise, it will be zero. + Also, the returned value in ::CUDA_ARRAY_SPARSE_PROPERTIES::miptailFirstLevel is always zero. + Note that the \p array must have been allocated using ::cuArrayCreate or ::cuArray3DCreate. For CUDA arrays obtained + using ::cuMipmappedArrayGetLevel, ::CUDA_ERROR_INVALID_VALUE will be returned. Instead, ::cuMipmappedArrayGetSparseProperties + must be used to obtain the sparse properties of the entire CUDA mipmapped array to which \p array belongs to. + + \return + ::CUDA_SUCCESS + ::CUDA_ERROR_INVALID_VALUE + + \param[out] sparseProperties - Pointer to ::CUDA_ARRAY_SPARSE_PROPERTIES + \param[in] array - CUDA array to get the sparse properties of + \sa ::cuMipmappedArrayGetSparseProperties, ::cuMemMapArrayAsync*/ + fn cuArrayGetSparseProperties( + sparseProperties: *mut cuda_types::CUDA_ARRAY_SPARSE_PROPERTIES, + array: cuda_types::CUarray, + ) -> cuda_types::CUresult; + /** \brief Returns the layout properties of a sparse CUDA mipmapped array + + Returns the sparse array layout properties in \p sparseProperties + If the CUDA mipmapped array is not allocated with flag ::CUDA_ARRAY3D_SPARSE + ::CUDA_ERROR_INVALID_VALUE will be returned. + + For non-layered CUDA mipmapped arrays, ::CUDA_ARRAY_SPARSE_PROPERTIES::miptailSize returns the + size of the mip tail region. The mip tail region includes all mip levels whose width, height or depth + is less than that of the tile. + For layered CUDA mipmapped arrays, if ::CUDA_ARRAY_SPARSE_PROPERTIES::flags contains ::CU_ARRAY_SPARSE_PROPERTIES_SINGLE_MIPTAIL, + then ::CUDA_ARRAY_SPARSE_PROPERTIES::miptailSize specifies the size of the mip tail of all layers combined. + Otherwise, ::CUDA_ARRAY_SPARSE_PROPERTIES::miptailSize specifies mip tail size per layer. + The returned value of ::CUDA_ARRAY_SPARSE_PROPERTIES::miptailFirstLevel is valid only if ::CUDA_ARRAY_SPARSE_PROPERTIES::miptailSize is non-zero. + + \return + ::CUDA_SUCCESS + ::CUDA_ERROR_INVALID_VALUE + + \param[out] sparseProperties - Pointer to ::CUDA_ARRAY_SPARSE_PROPERTIES + \param[in] mipmap - CUDA mipmapped array to get the sparse properties of + \sa ::cuArrayGetSparseProperties, ::cuMemMapArrayAsync*/ + fn cuMipmappedArrayGetSparseProperties( + sparseProperties: *mut cuda_types::CUDA_ARRAY_SPARSE_PROPERTIES, + mipmap: cuda_types::CUmipmappedArray, + ) -> cuda_types::CUresult; + /** \brief Returns the memory requirements of a CUDA array + + Returns the memory requirements of a CUDA array in \p memoryRequirements + If the CUDA array is not allocated with flag ::CUDA_ARRAY3D_DEFERRED_MAPPING + ::CUDA_ERROR_INVALID_VALUE will be returned. + + The returned value in ::CUDA_ARRAY_MEMORY_REQUIREMENTS::size + represents the total size of the CUDA array. + The returned value in ::CUDA_ARRAY_MEMORY_REQUIREMENTS::alignment + represents the alignment necessary for mapping the CUDA array. + + \return + ::CUDA_SUCCESS + ::CUDA_ERROR_INVALID_VALUE + + \param[out] memoryRequirements - Pointer to ::CUDA_ARRAY_MEMORY_REQUIREMENTS + \param[in] array - CUDA array to get the memory requirements of + \param[in] device - Device to get the memory requirements for + \sa ::cuMipmappedArrayGetMemoryRequirements, ::cuMemMapArrayAsync*/ + fn cuArrayGetMemoryRequirements( + memoryRequirements: *mut cuda_types::CUDA_ARRAY_MEMORY_REQUIREMENTS, + array: cuda_types::CUarray, + device: cuda_types::CUdevice, + ) -> cuda_types::CUresult; + /** \brief Returns the memory requirements of a CUDA mipmapped array + + Returns the memory requirements of a CUDA mipmapped array in \p memoryRequirements + If the CUDA mipmapped array is not allocated with flag ::CUDA_ARRAY3D_DEFERRED_MAPPING + ::CUDA_ERROR_INVALID_VALUE will be returned. + + The returned value in ::CUDA_ARRAY_MEMORY_REQUIREMENTS::size + represents the total size of the CUDA mipmapped array. + The returned value in ::CUDA_ARRAY_MEMORY_REQUIREMENTS::alignment + represents the alignment necessary for mapping the CUDA mipmapped + array. + + \return + ::CUDA_SUCCESS + ::CUDA_ERROR_INVALID_VALUE + + \param[out] memoryRequirements - Pointer to ::CUDA_ARRAY_MEMORY_REQUIREMENTS + \param[in] mipmap - CUDA mipmapped array to get the memory requirements of + \param[in] device - Device to get the memory requirements for + \sa ::cuArrayGetMemoryRequirements, ::cuMemMapArrayAsync*/ + fn cuMipmappedArrayGetMemoryRequirements( + memoryRequirements: *mut cuda_types::CUDA_ARRAY_MEMORY_REQUIREMENTS, + mipmap: cuda_types::CUmipmappedArray, + device: cuda_types::CUdevice, + ) -> cuda_types::CUresult; + /** \brief Gets a CUDA array plane from a CUDA array + + Returns in \p pPlaneArray a CUDA array that represents a single format plane + of the CUDA array \p hArray. + + If \p planeIdx is greater than the maximum number of planes in this array or if the array does + not have a multi-planar format e.g: ::CU_AD_FORMAT_NV12, then ::CUDA_ERROR_INVALID_VALUE is returned. + + Note that if the \p hArray has format ::CU_AD_FORMAT_NV12, then passing in 0 for \p planeIdx returns + a CUDA array of the same size as \p hArray but with one channel and ::CU_AD_FORMAT_UNSIGNED_INT8 as its format. + If 1 is passed for \p planeIdx, then the returned CUDA array has half the height and width + of \p hArray with two channels and ::CU_AD_FORMAT_UNSIGNED_INT8 as its format. + + \param pPlaneArray - Returned CUDA array referenced by the \p planeIdx + \param hArray - Multiplanar CUDA array + \param planeIdx - Plane index + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_HANDLE + \notefnerr + + \sa + ::cuArrayCreate, + ::cudaArrayGetPlane*/ + fn cuArrayGetPlane( + pPlaneArray: *mut cuda_types::CUarray, + hArray: cuda_types::CUarray, + planeIdx: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + /** \brief Destroys a CUDA array + + Destroys the CUDA array \p hArray. + + \param hArray - Array to destroy + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_HANDLE, + ::CUDA_ERROR_ARRAY_IS_MAPPED, + ::CUDA_ERROR_CONTEXT_IS_DESTROYED + \notefnerr + + \sa ::cuArray3DCreate, ::cuArray3DGetDescriptor, ::cuArrayCreate, + ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost, + ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned, + ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, + ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync, + ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync, + ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, + ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, + ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16, + ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32, + ::cudaFreeArray*/ + fn cuArrayDestroy(hArray: cuda_types::CUarray) -> cuda_types::CUresult; + /** \brief Creates a 3D CUDA array + + Creates a CUDA array according to the ::CUDA_ARRAY3D_DESCRIPTOR structure + \p pAllocateArray and returns a handle to the new CUDA array in \p *pHandle. + The ::CUDA_ARRAY3D_DESCRIPTOR is defined as: + + \code +typedef struct { +unsigned int Width; +unsigned int Height; +unsigned int Depth; +CUarray_format Format; +unsigned int NumChannels; +unsigned int Flags; +} CUDA_ARRAY3D_DESCRIPTOR; + \endcode + where: + + - \p Width, \p Height, and \p Depth are the width, height, and depth of the + CUDA array (in elements); the following types of CUDA arrays can be allocated: + - A 1D array is allocated if \p Height and \p Depth extents are both zero. + - A 2D array is allocated if only \p Depth extent is zero. + - A 3D array is allocated if all three extents are non-zero. + - A 1D layered CUDA array is allocated if only \p Height is zero and the + ::CUDA_ARRAY3D_LAYERED flag is set. Each layer is a 1D array. The number + of layers is determined by the depth extent. + - A 2D layered CUDA array is allocated if all three extents are non-zero and + the ::CUDA_ARRAY3D_LAYERED flag is set. Each layer is a 2D array. The number + of layers is determined by the depth extent. + - A cubemap CUDA array is allocated if all three extents are non-zero and the + ::CUDA_ARRAY3D_CUBEMAP flag is set. \p Width must be equal to \p Height, and + \p Depth must be six. A cubemap is a special type of 2D layered CUDA array, + where the six layers represent the six faces of a cube. The order of the six + layers in memory is the same as that listed in ::CUarray_cubemap_face. + - A cubemap layered CUDA array is allocated if all three extents are non-zero, + and both, ::CUDA_ARRAY3D_CUBEMAP and ::CUDA_ARRAY3D_LAYERED flags are set. + \p Width must be equal to \p Height, and \p Depth must be a multiple of six. + A cubemap layered CUDA array is a special type of 2D layered CUDA array that + consists of a collection of cubemaps. The first six layers represent the first + cubemap, the next six layers form the second cubemap, and so on. + + - ::Format specifies the format of the elements; ::CUarray_format is + defined as: + \code +typedef enum CUarray_format_enum { +CU_AD_FORMAT_UNSIGNED_INT8 = 0x01, +CU_AD_FORMAT_UNSIGNED_INT16 = 0x02, +CU_AD_FORMAT_UNSIGNED_INT32 = 0x03, +CU_AD_FORMAT_SIGNED_INT8 = 0x08, +CU_AD_FORMAT_SIGNED_INT16 = 0x09, +CU_AD_FORMAT_SIGNED_INT32 = 0x0a, +CU_AD_FORMAT_HALF = 0x10, +CU_AD_FORMAT_FLOAT = 0x20 +} CUarray_format; + \endcode + + - \p NumChannels specifies the number of packed components per CUDA array + element; it may be 1, 2, or 4; + + - ::Flags may be set to + - ::CUDA_ARRAY3D_LAYERED to enable creation of layered CUDA arrays. If this flag is set, + \p Depth specifies the number of layers, not the depth of a 3D array. + - ::CUDA_ARRAY3D_SURFACE_LDST to enable surface references to be bound to the CUDA array. + If this flag is not set, ::cuSurfRefSetArray will fail when attempting to bind the CUDA array + to a surface reference. + - ::CUDA_ARRAY3D_CUBEMAP to enable creation of cubemaps. If this flag is set, \p Width must be + equal to \p Height, and \p Depth must be six. If the ::CUDA_ARRAY3D_LAYERED flag is also set, + then \p Depth must be a multiple of six. + - ::CUDA_ARRAY3D_TEXTURE_GATHER to indicate that the CUDA array will be used for texture gather. + Texture gather can only be performed on 2D CUDA arrays. + + \p Width, \p Height and \p Depth must meet certain size requirements as listed in the following table. + All values are specified in elements. Note that for brevity's sake, the full name of the device attribute + is not specified. For ex., TEXTURE1D_WIDTH refers to the device attribute + ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH. + + Note that 2D CUDA arrays have different size requirements if the ::CUDA_ARRAY3D_TEXTURE_GATHER flag + is set. \p Width and \p Height must not be greater than ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_WIDTH + and ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_HEIGHT respectively, in that case. + + <table> + <tr><td><b>CUDA array type</b></td> + <td><b>Valid extents that must always be met<br>{(width range in elements), (height range), + (depth range)}</b></td> + <td><b>Valid extents with CUDA_ARRAY3D_SURFACE_LDST set<br> + {(width range in elements), (height range), (depth range)}</b></td></tr> + <tr><td>1D</td> + <td><small>{ (1,TEXTURE1D_WIDTH), 0, 0 }</small></td> + <td><small>{ (1,SURFACE1D_WIDTH), 0, 0 }</small></td></tr> + <tr><td>2D</td> + <td><small>{ (1,TEXTURE2D_WIDTH), (1,TEXTURE2D_HEIGHT), 0 }</small></td> + <td><small>{ (1,SURFACE2D_WIDTH), (1,SURFACE2D_HEIGHT), 0 }</small></td></tr> + <tr><td>3D</td> + <td><small>{ (1,TEXTURE3D_WIDTH), (1,TEXTURE3D_HEIGHT), (1,TEXTURE3D_DEPTH) } + <br>OR<br>{ (1,TEXTURE3D_WIDTH_ALTERNATE), (1,TEXTURE3D_HEIGHT_ALTERNATE), + (1,TEXTURE3D_DEPTH_ALTERNATE) }</small></td> + <td><small>{ (1,SURFACE3D_WIDTH), (1,SURFACE3D_HEIGHT), + (1,SURFACE3D_DEPTH) }</small></td></tr> + <tr><td>1D Layered</td> + <td><small>{ (1,TEXTURE1D_LAYERED_WIDTH), 0, + (1,TEXTURE1D_LAYERED_LAYERS) }</small></td> + <td><small>{ (1,SURFACE1D_LAYERED_WIDTH), 0, + (1,SURFACE1D_LAYERED_LAYERS) }</small></td></tr> + <tr><td>2D Layered</td> + <td><small>{ (1,TEXTURE2D_LAYERED_WIDTH), (1,TEXTURE2D_LAYERED_HEIGHT), + (1,TEXTURE2D_LAYERED_LAYERS) }</small></td> + <td><small>{ (1,SURFACE2D_LAYERED_WIDTH), (1,SURFACE2D_LAYERED_HEIGHT), + (1,SURFACE2D_LAYERED_LAYERS) }</small></td></tr> + <tr><td>Cubemap</td> + <td><small>{ (1,TEXTURECUBEMAP_WIDTH), (1,TEXTURECUBEMAP_WIDTH), 6 }</small></td> + <td><small>{ (1,SURFACECUBEMAP_WIDTH), + (1,SURFACECUBEMAP_WIDTH), 6 }</small></td></tr> + <tr><td>Cubemap Layered</td> + <td><small>{ (1,TEXTURECUBEMAP_LAYERED_WIDTH), (1,TEXTURECUBEMAP_LAYERED_WIDTH), + (1,TEXTURECUBEMAP_LAYERED_LAYERS) }</small></td> + <td><small>{ (1,SURFACECUBEMAP_LAYERED_WIDTH), (1,SURFACECUBEMAP_LAYERED_WIDTH), + (1,SURFACECUBEMAP_LAYERED_LAYERS) }</small></td></tr> + </table> + + Here are examples of CUDA array descriptions: + + Description for a CUDA array of 2048 floats: + \code +CUDA_ARRAY3D_DESCRIPTOR desc; +desc.Format = CU_AD_FORMAT_FLOAT; +desc.NumChannels = 1; +desc.Width = 2048; +desc.Height = 0; +desc.Depth = 0; + \endcode + + Description for a 64 x 64 CUDA array of floats: + \code +CUDA_ARRAY3D_DESCRIPTOR desc; +desc.Format = CU_AD_FORMAT_FLOAT; +desc.NumChannels = 1; +desc.Width = 64; +desc.Height = 64; +desc.Depth = 0; + \endcode + + Description for a \p width x \p height x \p depth CUDA array of 64-bit, + 4x16-bit float16's: + \code +CUDA_ARRAY3D_DESCRIPTOR desc; +desc.Format = CU_AD_FORMAT_HALF; +desc.NumChannels = 4; +desc.Width = width; +desc.Height = height; +desc.Depth = depth; + \endcode + + \param pHandle - Returned array + \param pAllocateArray - 3D array descriptor + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_OUT_OF_MEMORY, + ::CUDA_ERROR_UNKNOWN + \notefnerr + + \sa ::cuArray3DGetDescriptor, ::cuArrayCreate, + ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost, + ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned, + ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, + ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync, + ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync, + ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, + ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, + ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16, + ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32, + ::cudaMalloc3DArray*/ + fn cuArray3DCreate_v2( + pHandle: *mut cuda_types::CUarray, + pAllocateArray: *const cuda_types::CUDA_ARRAY3D_DESCRIPTOR, + ) -> cuda_types::CUresult; + /** \brief Get a 3D CUDA array descriptor + + Returns in \p *pArrayDescriptor a descriptor containing information on the + format and dimensions of the CUDA array \p hArray. It is useful for + subroutines that have been passed a CUDA array, but need to know the CUDA + array parameters for validation or other purposes. + + This function may be called on 1D and 2D arrays, in which case the \p Height + and/or \p Depth members of the descriptor struct will be set to 0. + + \param pArrayDescriptor - Returned 3D array descriptor + \param hArray - 3D array to get descriptor of + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_HANDLE, + ::CUDA_ERROR_CONTEXT_IS_DESTROYED + \notefnerr + + \sa ::cuArray3DCreate, ::cuArrayCreate, + ::cuArrayDestroy, ::cuArrayGetDescriptor, ::cuMemAlloc, ::cuMemAllocHost, + ::cuMemAllocPitch, ::cuMemcpy2D, ::cuMemcpy2DAsync, ::cuMemcpy2DUnaligned, + ::cuMemcpy3D, ::cuMemcpy3DAsync, ::cuMemcpyAtoA, ::cuMemcpyAtoD, + ::cuMemcpyAtoH, ::cuMemcpyAtoHAsync, ::cuMemcpyDtoA, ::cuMemcpyDtoD, ::cuMemcpyDtoDAsync, + ::cuMemcpyDtoH, ::cuMemcpyDtoHAsync, ::cuMemcpyHtoA, ::cuMemcpyHtoAAsync, + ::cuMemcpyHtoD, ::cuMemcpyHtoDAsync, ::cuMemFree, ::cuMemFreeHost, + ::cuMemGetAddressRange, ::cuMemGetInfo, ::cuMemHostAlloc, + ::cuMemHostGetDevicePointer, ::cuMemsetD2D8, ::cuMemsetD2D16, + ::cuMemsetD2D32, ::cuMemsetD8, ::cuMemsetD16, ::cuMemsetD32, + ::cudaArrayGetInfo*/ + fn cuArray3DGetDescriptor_v2( + pArrayDescriptor: *mut cuda_types::CUDA_ARRAY3D_DESCRIPTOR, + hArray: cuda_types::CUarray, + ) -> cuda_types::CUresult; + /** \brief Creates a CUDA mipmapped array + + Creates a CUDA mipmapped array according to the ::CUDA_ARRAY3D_DESCRIPTOR structure + \p pMipmappedArrayDesc and returns a handle to the new CUDA mipmapped array in \p *pHandle. + \p numMipmapLevels specifies the number of mipmap levels to be allocated. This value is + clamped to the range [1, 1 + floor(log2(max(width, height, depth)))]. + + The ::CUDA_ARRAY3D_DESCRIPTOR is defined as: + + \code +typedef struct { +unsigned int Width; +unsigned int Height; +unsigned int Depth; +CUarray_format Format; +unsigned int NumChannels; +unsigned int Flags; +} CUDA_ARRAY3D_DESCRIPTOR; + \endcode + where: + + - \p Width, \p Height, and \p Depth are the width, height, and depth of the + CUDA array (in elements); the following types of CUDA arrays can be allocated: + - A 1D mipmapped array is allocated if \p Height and \p Depth extents are both zero. + - A 2D mipmapped array is allocated if only \p Depth extent is zero. + - A 3D mipmapped array is allocated if all three extents are non-zero. + - A 1D layered CUDA mipmapped array is allocated if only \p Height is zero and the + ::CUDA_ARRAY3D_LAYERED flag is set. Each layer is a 1D array. The number + of layers is determined by the depth extent. + - A 2D layered CUDA mipmapped array is allocated if all three extents are non-zero and + the ::CUDA_ARRAY3D_LAYERED flag is set. Each layer is a 2D array. The number + of layers is determined by the depth extent. + - A cubemap CUDA mipmapped array is allocated if all three extents are non-zero and the + ::CUDA_ARRAY3D_CUBEMAP flag is set. \p Width must be equal to \p Height, and + \p Depth must be six. A cubemap is a special type of 2D layered CUDA array, + where the six layers represent the six faces of a cube. The order of the six + layers in memory is the same as that listed in ::CUarray_cubemap_face. + - A cubemap layered CUDA mipmapped array is allocated if all three extents are non-zero, + and both, ::CUDA_ARRAY3D_CUBEMAP and ::CUDA_ARRAY3D_LAYERED flags are set. + \p Width must be equal to \p Height, and \p Depth must be a multiple of six. + A cubemap layered CUDA array is a special type of 2D layered CUDA array that + consists of a collection of cubemaps. The first six layers represent the first + cubemap, the next six layers form the second cubemap, and so on. + + - ::Format specifies the format of the elements; ::CUarray_format is + defined as: + \code +typedef enum CUarray_format_enum { +CU_AD_FORMAT_UNSIGNED_INT8 = 0x01, +CU_AD_FORMAT_UNSIGNED_INT16 = 0x02, +CU_AD_FORMAT_UNSIGNED_INT32 = 0x03, +CU_AD_FORMAT_SIGNED_INT8 = 0x08, +CU_AD_FORMAT_SIGNED_INT16 = 0x09, +CU_AD_FORMAT_SIGNED_INT32 = 0x0a, +CU_AD_FORMAT_HALF = 0x10, +CU_AD_FORMAT_FLOAT = 0x20 +} CUarray_format; + \endcode + + - \p NumChannels specifies the number of packed components per CUDA array + element; it may be 1, 2, or 4; + + - ::Flags may be set to + - ::CUDA_ARRAY3D_LAYERED to enable creation of layered CUDA mipmapped arrays. If this flag is set, + \p Depth specifies the number of layers, not the depth of a 3D array. + - ::CUDA_ARRAY3D_SURFACE_LDST to enable surface references to be bound to individual mipmap levels of + the CUDA mipmapped array. If this flag is not set, ::cuSurfRefSetArray will fail when attempting to + bind a mipmap level of the CUDA mipmapped array to a surface reference. + - ::CUDA_ARRAY3D_CUBEMAP to enable creation of mipmapped cubemaps. If this flag is set, \p Width must be + equal to \p Height, and \p Depth must be six. If the ::CUDA_ARRAY3D_LAYERED flag is also set, + then \p Depth must be a multiple of six. + - ::CUDA_ARRAY3D_TEXTURE_GATHER to indicate that the CUDA mipmapped array will be used for texture gather. + Texture gather can only be performed on 2D CUDA mipmapped arrays. + + \p Width, \p Height and \p Depth must meet certain size requirements as listed in the following table. + All values are specified in elements. Note that for brevity's sake, the full name of the device attribute + is not specified. For ex., TEXTURE1D_MIPMAPPED_WIDTH refers to the device attribute + ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH. + + <table> + <tr><td><b>CUDA array type</b></td> + <td><b>Valid extents that must always be met<br>{(width range in elements), (height range), + (depth range)}</b></td> + <td><b>Valid extents with CUDA_ARRAY3D_SURFACE_LDST set<br> + {(width range in elements), (height range), (depth range)}</b></td></tr> + <tr><td>1D</td> + <td><small>{ (1,TEXTURE1D_MIPMAPPED_WIDTH), 0, 0 }</small></td> + <td><small>{ (1,SURFACE1D_WIDTH), 0, 0 }</small></td></tr> + <tr><td>2D</td> + <td><small>{ (1,TEXTURE2D_MIPMAPPED_WIDTH), (1,TEXTURE2D_MIPMAPPED_HEIGHT), 0 }</small></td> + <td><small>{ (1,SURFACE2D_WIDTH), (1,SURFACE2D_HEIGHT), 0 }</small></td></tr> + <tr><td>3D</td> + <td><small>{ (1,TEXTURE3D_WIDTH), (1,TEXTURE3D_HEIGHT), (1,TEXTURE3D_DEPTH) } + <br>OR<br>{ (1,TEXTURE3D_WIDTH_ALTERNATE), (1,TEXTURE3D_HEIGHT_ALTERNATE), + (1,TEXTURE3D_DEPTH_ALTERNATE) }</small></td> + <td><small>{ (1,SURFACE3D_WIDTH), (1,SURFACE3D_HEIGHT), + (1,SURFACE3D_DEPTH) }</small></td></tr> + <tr><td>1D Layered</td> + <td><small>{ (1,TEXTURE1D_LAYERED_WIDTH), 0, + (1,TEXTURE1D_LAYERED_LAYERS) }</small></td> + <td><small>{ (1,SURFACE1D_LAYERED_WIDTH), 0, + (1,SURFACE1D_LAYERED_LAYERS) }</small></td></tr> + <tr><td>2D Layered</td> + <td><small>{ (1,TEXTURE2D_LAYERED_WIDTH), (1,TEXTURE2D_LAYERED_HEIGHT), + (1,TEXTURE2D_LAYERED_LAYERS) }</small></td> + <td><small>{ (1,SURFACE2D_LAYERED_WIDTH), (1,SURFACE2D_LAYERED_HEIGHT), + (1,SURFACE2D_LAYERED_LAYERS) }</small></td></tr> + <tr><td>Cubemap</td> + <td><small>{ (1,TEXTURECUBEMAP_WIDTH), (1,TEXTURECUBEMAP_WIDTH), 6 }</small></td> + <td><small>{ (1,SURFACECUBEMAP_WIDTH), + (1,SURFACECUBEMAP_WIDTH), 6 }</small></td></tr> + <tr><td>Cubemap Layered</td> + <td><small>{ (1,TEXTURECUBEMAP_LAYERED_WIDTH), (1,TEXTURECUBEMAP_LAYERED_WIDTH), + (1,TEXTURECUBEMAP_LAYERED_LAYERS) }</small></td> + <td><small>{ (1,SURFACECUBEMAP_LAYERED_WIDTH), (1,SURFACECUBEMAP_LAYERED_WIDTH), + (1,SURFACECUBEMAP_LAYERED_LAYERS) }</small></td></tr> + </table> + + + \param pHandle - Returned mipmapped array + \param pMipmappedArrayDesc - mipmapped array descriptor + \param numMipmapLevels - Number of mipmap levels + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_OUT_OF_MEMORY, + ::CUDA_ERROR_UNKNOWN + \notefnerr + + \sa + ::cuMipmappedArrayDestroy, + ::cuMipmappedArrayGetLevel, + ::cuArrayCreate, + ::cudaMallocMipmappedArray*/ + fn cuMipmappedArrayCreate( + pHandle: *mut cuda_types::CUmipmappedArray, + pMipmappedArrayDesc: *const cuda_types::CUDA_ARRAY3D_DESCRIPTOR, + numMipmapLevels: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + /** \brief Gets a mipmap level of a CUDA mipmapped array + + Returns in \p *pLevelArray a CUDA array that represents a single mipmap level + of the CUDA mipmapped array \p hMipmappedArray. + + If \p level is greater than the maximum number of levels in this mipmapped array, + ::CUDA_ERROR_INVALID_VALUE is returned. + + \param pLevelArray - Returned mipmap level CUDA array + \param hMipmappedArray - CUDA mipmapped array + \param level - Mipmap level + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_HANDLE + \notefnerr + + \sa + ::cuMipmappedArrayCreate, + ::cuMipmappedArrayDestroy, + ::cuArrayCreate, + ::cudaGetMipmappedArrayLevel*/ + fn cuMipmappedArrayGetLevel( + pLevelArray: *mut cuda_types::CUarray, + hMipmappedArray: cuda_types::CUmipmappedArray, + level: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + /** \brief Destroys a CUDA mipmapped array + + Destroys the CUDA mipmapped array \p hMipmappedArray. + + \param hMipmappedArray - Mipmapped array to destroy + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_HANDLE, + ::CUDA_ERROR_ARRAY_IS_MAPPED, + ::CUDA_ERROR_CONTEXT_IS_DESTROYED + \notefnerr + + \sa + ::cuMipmappedArrayCreate, + ::cuMipmappedArrayGetLevel, + ::cuArrayCreate, + ::cudaFreeMipmappedArray*/ + fn cuMipmappedArrayDestroy( + hMipmappedArray: cuda_types::CUmipmappedArray, + ) -> cuda_types::CUresult; + /** \brief Retrieve handle for an address range + + Get a handle of the specified type to an address range. The address range + must have been obtained by a prior call to either ::cuMemAlloc or ::cuMemAddressReserve. + If the address range was obtained via ::cuMemAddressReserve, it must also be fully mapped via ::cuMemMap. + The address range must have been obtained by a prior call to either ::cuMemAllocHost or + ::cuMemHostAlloc on Tegra. + + Users must ensure the \p dptr and \p size are aligned to the host page size. + + When requesting CUmemRangeHandleType::CU_MEM_RANGE_HANDLE_TYPE_DMA_BUF_FD, + users are expected to query for dma_buf support for the platform + by using ::CU_DEVICE_ATTRIBUTE_DMA_BUF_SUPPORTED device attribute before calling + this API. The \p handle will be interpreted as a pointer to an integer to store the dma_buf file descriptor. + Users must ensure the entire address range is backed and mapped when + the address range is allocated by ::cuMemAddressReserve. All the physical + allocations backing the address range must be resident on the same device and + have identical allocation properties. Users are also expected to retrieve a + new handle every time the underlying physical allocation(s) corresponding + to a previously queried VA range are changed. + + \param[out] handle - Pointer to the location where the returned handle will be stored. + \param[in] dptr - Pointer to a valid CUDA device allocation. Must be aligned to host page size. + \param[in] size - Length of the address range. Must be aligned to host page size. + \param[in] handleType - Type of handle requested (defines type and size of the \p handle output parameter) + \param[in] flags - Reserved, must be zero + + \return + CUDA_SUCCESS + CUDA_ERROR_INVALID_VALUE + CUDA_ERROR_NOT_SUPPORTED*/ + fn cuMemGetHandleForAddressRange( + handle: *mut ::core::ffi::c_void, + dptr: cuda_types::CUdeviceptr, + size: usize, + handleType: cuda_types::CUmemRangeHandleType, + flags: ::core::ffi::c_ulonglong, + ) -> cuda_types::CUresult; + /** \brief Allocate an address range reservation. + + Reserves a virtual address range based on the given parameters, giving + the starting address of the range in \p ptr. This API requires a system that + supports UVA. The size and address parameters must be a multiple of the + host page size and the alignment must be a power of two or zero for default + alignment. + + \param[out] ptr - Resulting pointer to start of virtual address range allocated + \param[in] size - Size of the reserved virtual address range requested + \param[in] alignment - Alignment of the reserved virtual address range requested + \param[in] addr - Fixed starting address range requested + \param[in] flags - Currently unused, must be zero + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_OUT_OF_MEMORY, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_PERMITTED, + ::CUDA_ERROR_NOT_SUPPORTED + + \sa ::cuMemAddressFree*/ + fn cuMemAddressReserve( + ptr: *mut cuda_types::CUdeviceptr, + size: usize, + alignment: usize, + addr: cuda_types::CUdeviceptr, + flags: ::core::ffi::c_ulonglong, + ) -> cuda_types::CUresult; + /** \brief Free an address range reservation. + + Frees a virtual address range reserved by cuMemAddressReserve. The size + must match what was given to memAddressReserve and the ptr given must + match what was returned from memAddressReserve. + + \param[in] ptr - Starting address of the virtual address range to free + \param[in] size - Size of the virtual address region to free + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_PERMITTED, + ::CUDA_ERROR_NOT_SUPPORTED + + \sa ::cuMemAddressReserve*/ + fn cuMemAddressFree( + ptr: cuda_types::CUdeviceptr, + size: usize, + ) -> cuda_types::CUresult; + /** \brief Create a CUDA memory handle representing a memory allocation of a given size described by the given properties + + This creates a memory allocation on the target device specified through the + \p prop structure. The created allocation will not have any device or host + mappings. The generic memory \p handle for the allocation can be + mapped to the address space of calling process via ::cuMemMap. This handle + cannot be transmitted directly to other processes (see + ::cuMemExportToShareableHandle). On Windows, the caller must also pass + an LPSECURITYATTRIBUTE in \p prop to be associated with this handle which + limits or allows access to this handle for a recipient process (see + ::CUmemAllocationProp::win32HandleMetaData for more). The \p size of this + allocation must be a multiple of the the value given via + ::cuMemGetAllocationGranularity with the ::CU_MEM_ALLOC_GRANULARITY_MINIMUM + flag. + To create a CPU allocation targeting a specific host NUMA node, applications must + set ::CUmemAllocationProp::CUmemLocation::type to ::CU_MEM_LOCATION_TYPE_HOST_NUMA and + ::CUmemAllocationProp::CUmemLocation::id must specify the NUMA ID of the CPU. + On systems where NUMA is not available ::CUmemAllocationProp::CUmemLocation::id must be set to 0. + + Applications can set ::CUmemAllocationProp::requestedHandleTypes to + ::CU_MEM_HANDLE_TYPE_FABRIC in order to create allocations suitable for sharing + within an IMEX domain. An IMEX domain is either an OS instance or a group of securely + connected OS instances using the NVIDIA IMEX daemon. An IMEX channel is a global resource + within the IMEX domain that represents a logical entity that aims to provide fine grained + accessibility control for the participating processes. When exporter and importer CUDA processes + have been granted access to the same IMEX channel, they can securely share memory. + If the allocating process does not have access setup for an IMEX channel, attempting to create + a ::CUmemGenericAllocationHandle with ::CU_MEM_HANDLE_TYPE_FABRIC will result in ::CUDA_ERROR_NOT_PERMITTED. + The nvidia-modprobe CLI provides more information regarding setting up of IMEX channels. + + If ::CUmemAllocationProp::allocFlags::usage contains ::CU_MEM_CREATE_USAGE_TILE_POOL flag then + the memory allocation is intended only to be used as backing tile pool for sparse CUDA arrays + and sparse CUDA mipmapped arrays. + (see ::cuMemMapArrayAsync). + + \param[out] handle - Value of handle returned. All operations on this allocation are to be performed using this handle. + \param[in] size - Size of the allocation requested + \param[in] prop - Properties of the allocation to create. + \param[in] flags - flags for future use, must be zero now. + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_OUT_OF_MEMORY, + ::CUDA_ERROR_INVALID_DEVICE, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_PERMITTED, + ::CUDA_ERROR_NOT_SUPPORTED + \notefnerr + + \sa ::cuMemRelease, ::cuMemExportToShareableHandle, ::cuMemImportFromShareableHandle*/ + fn cuMemCreate( + handle: *mut cuda_types::CUmemGenericAllocationHandle, + size: usize, + prop: *const cuda_types::CUmemAllocationProp, + flags: ::core::ffi::c_ulonglong, + ) -> cuda_types::CUresult; + /** \brief Release a memory handle representing a memory allocation which was previously allocated through cuMemCreate. + + Frees the memory that was allocated on a device through cuMemCreate. + + The memory allocation will be freed when all outstanding mappings to the memory + are unmapped and when all outstanding references to the handle (including it's + shareable counterparts) are also released. The generic memory handle can be + freed when there are still outstanding mappings made with this handle. Each + time a recipient process imports a shareable handle, it needs to pair it with + ::cuMemRelease for the handle to be freed. If \p handle is not a valid handle + the behavior is undefined. + + \param[in] handle Value of handle which was returned previously by cuMemCreate. + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_PERMITTED, + ::CUDA_ERROR_NOT_SUPPORTED + \notefnerr + + \sa ::cuMemCreate*/ + fn cuMemRelease( + handle: cuda_types::CUmemGenericAllocationHandle, + ) -> cuda_types::CUresult; + /** \brief Maps an allocation handle to a reserved virtual address range. + + Maps bytes of memory represented by \p handle starting from byte \p offset to + \p size to address range [\p addr, \p addr + \p size]. This range must be an + address reservation previously reserved with ::cuMemAddressReserve, and + \p offset + \p size must be less than the size of the memory allocation. + Both \p ptr, \p size, and \p offset must be a multiple of the value given via + ::cuMemGetAllocationGranularity with the ::CU_MEM_ALLOC_GRANULARITY_MINIMUM flag. + If \p handle represents a multicast object, \p ptr, \p size and \p offset must + be aligned to the value returned by ::cuMulticastGetGranularity with the flag + ::CU_MULTICAST_MINIMUM_GRANULARITY. For best performance however, it is + recommended that \p ptr, \p size and \p offset be aligned to the value + returned by ::cuMulticastGetGranularity with the flag + ::CU_MULTICAST_RECOMMENDED_GRANULARITY. + + Please note calling ::cuMemMap does not make the address accessible, + the caller needs to update accessibility of a contiguous mapped VA + range by calling ::cuMemSetAccess. + + Once a recipient process obtains a shareable memory handle + from ::cuMemImportFromShareableHandle, the process must + use ::cuMemMap to map the memory into its address ranges before + setting accessibility with ::cuMemSetAccess. + + ::cuMemMap can only create mappings on VA range reservations + that are not currently mapped. + + \param[in] ptr - Address where memory will be mapped. + \param[in] size - Size of the memory mapping. + \param[in] offset - Offset into the memory represented by + - \p handle from which to start mapping + - Note: currently must be zero. + \param[in] handle - Handle to a shareable memory + \param[in] flags - flags for future use, must be zero now. + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_DEVICE, + ::CUDA_ERROR_OUT_OF_MEMORY, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_PERMITTED, + ::CUDA_ERROR_NOT_SUPPORTED + \notefnerr + + \sa ::cuMemUnmap, ::cuMemSetAccess, ::cuMemCreate, ::cuMemAddressReserve, ::cuMemImportFromShareableHandle*/ + fn cuMemMap( + ptr: cuda_types::CUdeviceptr, + size: usize, + offset: usize, + handle: cuda_types::CUmemGenericAllocationHandle, + flags: ::core::ffi::c_ulonglong, + ) -> cuda_types::CUresult; + /** \brief Maps or unmaps subregions of sparse CUDA arrays and sparse CUDA mipmapped arrays + + Performs map or unmap operations on subregions of sparse CUDA arrays and sparse CUDA mipmapped arrays. + Each operation is specified by a ::CUarrayMapInfo entry in the \p mapInfoList array of size \p count. + The structure ::CUarrayMapInfo is defined as follow: +\code +typedef struct CUarrayMapInfo_st { +CUresourcetype resourceType; +union { +CUmipmappedArray mipmap; +CUarray array; +} resource; + +CUarraySparseSubresourceType subresourceType; +union { +struct { +unsigned int level; +unsigned int layer; +unsigned int offsetX; +unsigned int offsetY; +unsigned int offsetZ; +unsigned int extentWidth; +unsigned int extentHeight; +unsigned int extentDepth; +} sparseLevel; +struct { +unsigned int layer; +unsigned long long offset; +unsigned long long size; +} miptail; +} subresource; + +CUmemOperationType memOperationType; + +CUmemHandleType memHandleType; +union { +CUmemGenericAllocationHandle memHandle; +} memHandle; + +unsigned long long offset; +unsigned int deviceBitMask; +unsigned int flags; +unsigned int reserved[2]; +} CUarrayMapInfo; +\endcode + + where ::CUarrayMapInfo::resourceType specifies the type of resource to be operated on. + If ::CUarrayMapInfo::resourceType is set to ::CUresourcetype::CU_RESOURCE_TYPE_ARRAY then + ::CUarrayMapInfo::resource::array must be set to a valid sparse CUDA array handle. + The CUDA array must be either a 2D, 2D layered or 3D CUDA array and must have been allocated using + ::cuArrayCreate or ::cuArray3DCreate with the flag ::CUDA_ARRAY3D_SPARSE + or ::CUDA_ARRAY3D_DEFERRED_MAPPING. + For CUDA arrays obtained using ::cuMipmappedArrayGetLevel, ::CUDA_ERROR_INVALID_VALUE will be returned. + If ::CUarrayMapInfo::resourceType is set to ::CUresourcetype::CU_RESOURCE_TYPE_MIPMAPPED_ARRAY + then ::CUarrayMapInfo::resource::mipmap must be set to a valid sparse CUDA mipmapped array handle. + The CUDA mipmapped array must be either a 2D, 2D layered or 3D CUDA mipmapped array and must have been + allocated using ::cuMipmappedArrayCreate with the flag ::CUDA_ARRAY3D_SPARSE + or ::CUDA_ARRAY3D_DEFERRED_MAPPING. + + ::CUarrayMapInfo::subresourceType specifies the type of subresource within the resource. + ::CUarraySparseSubresourceType_enum is defined as: +\code +typedef enum CUarraySparseSubresourceType_enum { +CU_ARRAY_SPARSE_SUBRESOURCE_TYPE_SPARSE_LEVEL = 0, +CU_ARRAY_SPARSE_SUBRESOURCE_TYPE_MIPTAIL = 1 +} CUarraySparseSubresourceType; +\endcode + + where ::CUarraySparseSubresourceType::CU_ARRAY_SPARSE_SUBRESOURCE_TYPE_SPARSE_LEVEL indicates a + sparse-miplevel which spans at least one tile in every dimension. The remaining miplevels which + are too small to span at least one tile in any dimension constitute the mip tail region as indicated by + ::CUarraySparseSubresourceType::CU_ARRAY_SPARSE_SUBRESOURCE_TYPE_MIPTAIL subresource type. + + If ::CUarrayMapInfo::subresourceType is set to ::CUarraySparseSubresourceType::CU_ARRAY_SPARSE_SUBRESOURCE_TYPE_SPARSE_LEVEL + then ::CUarrayMapInfo::subresource::sparseLevel struct must contain valid array subregion offsets and extents. + The ::CUarrayMapInfo::subresource::sparseLevel::offsetX, ::CUarrayMapInfo::subresource::sparseLevel::offsetY + and ::CUarrayMapInfo::subresource::sparseLevel::offsetZ must specify valid X, Y and Z offsets respectively. + The ::CUarrayMapInfo::subresource::sparseLevel::extentWidth, ::CUarrayMapInfo::subresource::sparseLevel::extentHeight + and ::CUarrayMapInfo::subresource::sparseLevel::extentDepth must specify valid width, height and depth extents respectively. + These offsets and extents must be aligned to the corresponding tile dimension. + For CUDA mipmapped arrays ::CUarrayMapInfo::subresource::sparseLevel::level must specify a valid mip level index. Otherwise, + must be zero. + For layered CUDA arrays and layered CUDA mipmapped arrays ::CUarrayMapInfo::subresource::sparseLevel::layer must specify a valid layer index. Otherwise, + must be zero. + ::CUarrayMapInfo::subresource::sparseLevel::offsetZ must be zero and ::CUarrayMapInfo::subresource::sparseLevel::extentDepth + must be set to 1 for 2D and 2D layered CUDA arrays and CUDA mipmapped arrays. + Tile extents can be obtained by calling ::cuArrayGetSparseProperties and ::cuMipmappedArrayGetSparseProperties + + If ::CUarrayMapInfo::subresourceType is set to ::CUarraySparseSubresourceType::CU_ARRAY_SPARSE_SUBRESOURCE_TYPE_MIPTAIL + then ::CUarrayMapInfo::subresource::miptail struct must contain valid mip tail offset in + ::CUarrayMapInfo::subresource::miptail::offset and size in ::CUarrayMapInfo::subresource::miptail::size. + Both, mip tail offset and mip tail size must be aligned to the tile size. + For layered CUDA mipmapped arrays which don't have the flag ::CU_ARRAY_SPARSE_PROPERTIES_SINGLE_MIPTAIL set in ::CUDA_ARRAY_SPARSE_PROPERTIES::flags + as returned by ::cuMipmappedArrayGetSparseProperties, ::CUarrayMapInfo::subresource::miptail::layer must specify a valid layer index. + Otherwise, must be zero. + + If ::CUarrayMapInfo::resource::array or ::CUarrayMapInfo::resource::mipmap was created with ::CUDA_ARRAY3D_DEFERRED_MAPPING + flag set the ::CUarrayMapInfo::subresourceType and the contents of ::CUarrayMapInfo::subresource will be ignored. + + ::CUarrayMapInfo::memOperationType specifies the type of operation. ::CUmemOperationType is defined as: +\code +typedef enum CUmemOperationType_enum { +CU_MEM_OPERATION_TYPE_MAP = 1, +CU_MEM_OPERATION_TYPE_UNMAP = 2 +} CUmemOperationType; +\endcode + If ::CUarrayMapInfo::memOperationType is set to ::CUmemOperationType::CU_MEM_OPERATION_TYPE_MAP then the subresource + will be mapped onto the tile pool memory specified by ::CUarrayMapInfo::memHandle at offset ::CUarrayMapInfo::offset. + The tile pool allocation has to be created by specifying the ::CU_MEM_CREATE_USAGE_TILE_POOL flag when calling ::cuMemCreate. Also, + ::CUarrayMapInfo::memHandleType must be set to ::CUmemHandleType::CU_MEM_HANDLE_TYPE_GENERIC. + + If ::CUarrayMapInfo::memOperationType is set to ::CUmemOperationType::CU_MEM_OPERATION_TYPE_UNMAP then an unmapping operation + is performed. ::CUarrayMapInfo::memHandle must be NULL. + + ::CUarrayMapInfo::deviceBitMask specifies the list of devices that must map or unmap physical memory. + Currently, this mask must have exactly one bit set, and the corresponding device must match the device associated with the stream. + If ::CUarrayMapInfo::memOperationType is set to ::CUmemOperationType::CU_MEM_OPERATION_TYPE_MAP, the device must also match + the device associated with the tile pool memory allocation as specified by ::CUarrayMapInfo::memHandle. + + ::CUarrayMapInfo::flags and ::CUarrayMapInfo::reserved[] are unused and must be set to zero. + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_HANDLE + + \param[in] mapInfoList - List of ::CUarrayMapInfo + \param[in] count - Count of ::CUarrayMapInfo in \p mapInfoList + \param[in] hStream - Stream identifier for the stream to use for map or unmap operations + + \sa ::cuMipmappedArrayCreate, ::cuArrayCreate, ::cuArray3DCreate, ::cuMemCreate, ::cuArrayGetSparseProperties, ::cuMipmappedArrayGetSparseProperties*/ + fn cuMemMapArrayAsync_ptsz( + mapInfoList: *mut cuda_types::CUarrayMapInfo, + count: ::core::ffi::c_uint, + hStream: cuda_types::CUstream, + ) -> cuda_types::CUresult; + /** \brief Unmap the backing memory of a given address range. + + The range must be the entire contiguous address range that was mapped to. In + other words, ::cuMemUnmap cannot unmap a sub-range of an address range mapped + by ::cuMemCreate / ::cuMemMap. Any backing memory allocations will be freed + if there are no existing mappings and there are no unreleased memory handles. + + When ::cuMemUnmap returns successfully the address range is converted to an + address reservation and can be used for a future calls to ::cuMemMap. Any new + mapping to this virtual address will need to have access granted through + ::cuMemSetAccess, as all mappings start with no accessibility setup. + + \param[in] ptr - Starting address for the virtual address range to unmap + \param[in] size - Size of the virtual address range to unmap + \returns + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_PERMITTED, + ::CUDA_ERROR_NOT_SUPPORTED + \notefnerr + \note_sync + + \sa ::cuMemCreate, ::cuMemAddressReserve*/ + fn cuMemUnmap(ptr: cuda_types::CUdeviceptr, size: usize) -> cuda_types::CUresult; + /** \brief Set the access flags for each location specified in \p desc for the given virtual address range + + Given the virtual address range via \p ptr and \p size, and the locations + in the array given by \p desc and \p count, set the access flags for the + target locations. The range must be a fully mapped address range + containing all allocations created by ::cuMemMap / ::cuMemCreate. + Users cannot specify ::CU_MEM_LOCATION_TYPE_HOST_NUMA accessibility for allocations created on with other location types. + Note: When ::CUmemAccessDesc::CUmemLocation::type is ::CU_MEM_LOCATION_TYPE_HOST_NUMA, ::CUmemAccessDesc::CUmemLocation::id + is ignored. + When setting the access flags for a virtual address range mapping a multicast + object, \p ptr and \p size must be aligned to the value returned by + ::cuMulticastGetGranularity with the flag ::CU_MULTICAST_MINIMUM_GRANULARITY. + For best performance however, it is recommended that \p ptr and \p size be + aligned to the value returned by ::cuMulticastGetGranularity with the flag + ::CU_MULTICAST_RECOMMENDED_GRANULARITY. + + \param[in] ptr - Starting address for the virtual address range + \param[in] size - Length of the virtual address range + \param[in] desc - Array of ::CUmemAccessDesc that describe how to change the + - mapping for each location specified + \param[in] count - Number of ::CUmemAccessDesc in \p desc + \returns + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_DEVICE, + ::CUDA_ERROR_NOT_SUPPORTED + \notefnerr + \note_sync + + \sa ::cuMemSetAccess, ::cuMemCreate, :cuMemMap*/ + fn cuMemSetAccess( + ptr: cuda_types::CUdeviceptr, + size: usize, + desc: *const cuda_types::CUmemAccessDesc, + count: usize, + ) -> cuda_types::CUresult; + /** \brief Get the access \p flags set for the given \p location and \p ptr + + \param[out] flags - Flags set for this location + \param[in] location - Location in which to check the flags for + \param[in] ptr - Address in which to check the access flags for + \returns + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_DEVICE, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_PERMITTED, + ::CUDA_ERROR_NOT_SUPPORTED + + \sa ::cuMemSetAccess*/ + fn cuMemGetAccess( + flags: *mut ::core::ffi::c_ulonglong, + location: *const cuda_types::CUmemLocation, + ptr: cuda_types::CUdeviceptr, + ) -> cuda_types::CUresult; + /** \brief Exports an allocation to a requested shareable handle type + + Given a CUDA memory handle, create a shareable memory + allocation handle that can be used to share the memory with other + processes. The recipient process can convert the shareable handle back into a + CUDA memory handle using ::cuMemImportFromShareableHandle and map + it with ::cuMemMap. The implementation of what this handle is and how it + can be transferred is defined by the requested handle type in \p handleType + + Once all shareable handles are closed and the allocation is released, the allocated + memory referenced will be released back to the OS and uses of the CUDA handle afterward + will lead to undefined behavior. + + This API can also be used in conjunction with other APIs (e.g. Vulkan, OpenGL) + that support importing memory from the shareable type + + \param[out] shareableHandle - Pointer to the location in which to store the requested handle type + \param[in] handle - CUDA handle for the memory allocation + \param[in] handleType - Type of shareable handle requested (defines type and size of the \p shareableHandle output parameter) + \param[in] flags - Reserved, must be zero + \returns + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_PERMITTED, + ::CUDA_ERROR_NOT_SUPPORTED + + \sa ::cuMemImportFromShareableHandle*/ + fn cuMemExportToShareableHandle( + shareableHandle: *mut ::core::ffi::c_void, + handle: cuda_types::CUmemGenericAllocationHandle, + handleType: cuda_types::CUmemAllocationHandleType, + flags: ::core::ffi::c_ulonglong, + ) -> cuda_types::CUresult; + /** \brief Imports an allocation from a requested shareable handle type. + + If the current process cannot support the memory described by this shareable + handle, this API will error as ::CUDA_ERROR_NOT_SUPPORTED. + + If \p shHandleType is ::CU_MEM_HANDLE_TYPE_FABRIC and the importer process has not been + granted access to the same IMEX channel as the exporter process, this API will error + as ::CUDA_ERROR_NOT_PERMITTED. + + \note Importing shareable handles exported from some graphics APIs(VUlkan, OpenGL, etc) + created on devices under an SLI group may not be supported, and thus this API will + return CUDA_ERROR_NOT_SUPPORTED. + There is no guarantee that the contents of \p handle will be the same CUDA memory handle + for the same given OS shareable handle, or the same underlying allocation. + + \param[out] handle - CUDA Memory handle for the memory allocation. + \param[in] osHandle - Shareable Handle representing the memory allocation that is to be imported. + \param[in] shHandleType - handle type of the exported handle ::CUmemAllocationHandleType. + \returns + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_PERMITTED, + ::CUDA_ERROR_NOT_SUPPORTED + + \sa ::cuMemExportToShareableHandle, ::cuMemMap, ::cuMemRelease*/ + fn cuMemImportFromShareableHandle( + handle: *mut cuda_types::CUmemGenericAllocationHandle, + osHandle: *mut ::core::ffi::c_void, + shHandleType: cuda_types::CUmemAllocationHandleType, + ) -> cuda_types::CUresult; + /** \brief Calculates either the minimal or recommended granularity + + Calculates either the minimal or recommended granularity + for a given allocation specification and returns it in granularity. This + granularity can be used as a multiple for alignment, size, or address mapping. + + \param[out] granularity Returned granularity. + \param[in] prop Property for which to determine the granularity for + \param[in] option Determines which granularity to return + \returns + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_PERMITTED, + ::CUDA_ERROR_NOT_SUPPORTED + + \sa ::cuMemCreate, ::cuMemMap*/ + fn cuMemGetAllocationGranularity( + granularity: *mut usize, + prop: *const cuda_types::CUmemAllocationProp, + option: cuda_types::CUmemAllocationGranularity_flags, + ) -> cuda_types::CUresult; + /** \brief Retrieve the contents of the property structure defining properties for this handle + + \param[out] prop - Pointer to a properties structure which will hold the information about this handle + \param[in] handle - Handle which to perform the query on + \returns + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_PERMITTED, + ::CUDA_ERROR_NOT_SUPPORTED + + \sa ::cuMemCreate, ::cuMemImportFromShareableHandle*/ + fn cuMemGetAllocationPropertiesFromHandle( + prop: *mut cuda_types::CUmemAllocationProp, + handle: cuda_types::CUmemGenericAllocationHandle, + ) -> cuda_types::CUresult; + /** \brief Given an address \p addr, returns the allocation handle of the backing memory allocation. + + The handle is guaranteed to be the same handle value used to map the memory. If the address + requested is not mapped, the function will fail. The returned handle must be released with + corresponding number of calls to ::cuMemRelease. + + \note The address \p addr, can be any address in a range previously mapped + by ::cuMemMap, and not necessarily the start address. + + \param[out] handle CUDA Memory handle for the backing memory allocation. + \param[in] addr Memory address to query, that has been mapped previously. + \returns + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_PERMITTED, + ::CUDA_ERROR_NOT_SUPPORTED + + \sa ::cuMemCreate, ::cuMemRelease, ::cuMemMap*/ + fn cuMemRetainAllocationHandle( + handle: *mut cuda_types::CUmemGenericAllocationHandle, + addr: *mut ::core::ffi::c_void, + ) -> cuda_types::CUresult; + /** \brief Frees memory with stream ordered semantics + + Inserts a free operation into \p hStream. + The allocation must not be accessed after stream execution reaches the free. + After this API returns, accessing the memory from any subsequent work launched on the GPU + or querying its pointer attributes results in undefined behavior. + + \note During stream capture, this function results in the creation of a free node and + must therefore be passed the address of a graph allocation. + + \param dptr - memory to free + \param hStream - The stream establishing the stream ordering contract. + \returns + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT (default stream specified with no current context), + ::CUDA_ERROR_NOT_SUPPORTED*/ + fn cuMemFreeAsync_ptsz( + dptr: cuda_types::CUdeviceptr, + hStream: cuda_types::CUstream, + ) -> cuda_types::CUresult; + /** \brief Allocates memory with stream ordered semantics + + Inserts an allocation operation into \p hStream. + A pointer to the allocated memory is returned immediately in *dptr. + The allocation must not be accessed until the the allocation operation completes. + The allocation comes from the memory pool current to the stream's device. + + \note The default memory pool of a device contains device memory from that device. + \note Basic stream ordering allows future work submitted into the same stream to use the allocation. + Stream query, stream synchronize, and CUDA events can be used to guarantee that the allocation + operation completes before work submitted in a separate stream runs. + \note During stream capture, this function results in the creation of an allocation node. In this case, + the allocation is owned by the graph instead of the memory pool. The memory pool's properties + are used to set the node's creation parameters. + + \param[out] dptr - Returned device pointer + \param[in] bytesize - Number of bytes to allocate + \param[in] hStream - The stream establishing the stream ordering contract and the memory pool to allocate from + \returns + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT (default stream specified with no current context), + ::CUDA_ERROR_NOT_SUPPORTED, + ::CUDA_ERROR_OUT_OF_MEMORY + + \sa ::cuMemAllocFromPoolAsync, ::cuMemFreeAsync, ::cuDeviceSetMemPool, + ::cuDeviceGetDefaultMemPool, ::cuDeviceGetMemPool, ::cuMemPoolCreate, + ::cuMemPoolSetAccess, ::cuMemPoolSetAttribute*/ + fn cuMemAllocAsync_ptsz( + dptr: *mut cuda_types::CUdeviceptr, + bytesize: usize, + hStream: cuda_types::CUstream, + ) -> cuda_types::CUresult; + /** \brief Tries to release memory back to the OS + + Releases memory back to the OS until the pool contains fewer than minBytesToKeep + reserved bytes, or there is no more memory that the allocator can safely release. + The allocator cannot release OS allocations that back outstanding asynchronous allocations. + The OS allocations may happen at different granularity from the user allocations. + + \note: Allocations that have not been freed count as outstanding. + \note: Allocations that have been asynchronously freed but whose completion has + not been observed on the host (eg. by a synchronize) can count as outstanding. + + \param[in] pool - The memory pool to trim + \param[in] minBytesToKeep - If the pool has less than minBytesToKeep reserved, + the TrimTo operation is a no-op. Otherwise the pool will be guaranteed to have + at least minBytesToKeep bytes reserved after the operation. + \returns + ::CUDA_SUCCESS, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_VALUE + + \sa ::cuMemAllocAsync, ::cuMemFreeAsync, ::cuDeviceGetDefaultMemPool, + ::cuDeviceGetMemPool, ::cuMemPoolCreate*/ + fn cuMemPoolTrimTo( + pool: cuda_types::CUmemoryPool, + minBytesToKeep: usize, + ) -> cuda_types::CUresult; + /** \brief Sets attributes of a memory pool + + Supported attributes are: + - ::CU_MEMPOOL_ATTR_RELEASE_THRESHOLD: (value type = cuuint64_t) + Amount of reserved memory in bytes to hold onto before trying + to release memory back to the OS. When more than the release + threshold bytes of memory are held by the memory pool, the + allocator will try to release memory back to the OS on the + next call to stream, event or context synchronize. (default 0) + - ::CU_MEMPOOL_ATTR_REUSE_FOLLOW_EVENT_DEPENDENCIES: (value type = int) + Allow ::cuMemAllocAsync to use memory asynchronously freed + in another stream as long as a stream ordering dependency + of the allocating stream on the free action exists. + Cuda events and null stream interactions can create the required + stream ordered dependencies. (default enabled) + - ::CU_MEMPOOL_ATTR_REUSE_ALLOW_OPPORTUNISTIC: (value type = int) + Allow reuse of already completed frees when there is no dependency + between the free and allocation. (default enabled) + - ::CU_MEMPOOL_ATTR_REUSE_ALLOW_INTERNAL_DEPENDENCIES: (value type = int) + Allow ::cuMemAllocAsync to insert new stream dependencies + in order to establish the stream ordering required to reuse + a piece of memory released by ::cuMemFreeAsync (default enabled). + - ::CU_MEMPOOL_ATTR_RESERVED_MEM_HIGH: (value type = cuuint64_t) + Reset the high watermark that tracks the amount of backing memory that was + allocated for the memory pool. It is illegal to set this attribute to a non-zero value. + - ::CU_MEMPOOL_ATTR_USED_MEM_HIGH: (value type = cuuint64_t) + Reset the high watermark that tracks the amount of used memory that was + allocated for the memory pool. + + \param[in] pool - The memory pool to modify + \param[in] attr - The attribute to modify + \param[in] value - Pointer to the value to assign + + \returns + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE + + \sa ::cuMemAllocAsync, ::cuMemFreeAsync, ::cuDeviceGetDefaultMemPool, + ::cuDeviceGetMemPool, ::cuMemPoolCreate*/ + fn cuMemPoolSetAttribute( + pool: cuda_types::CUmemoryPool, + attr: cuda_types::CUmemPool_attribute, + value: *mut ::core::ffi::c_void, + ) -> cuda_types::CUresult; + /** \brief Gets attributes of a memory pool + + Supported attributes are: + - ::CU_MEMPOOL_ATTR_RELEASE_THRESHOLD: (value type = cuuint64_t) + Amount of reserved memory in bytes to hold onto before trying + to release memory back to the OS. When more than the release + threshold bytes of memory are held by the memory pool, the + allocator will try to release memory back to the OS on the + next call to stream, event or context synchronize. (default 0) + - ::CU_MEMPOOL_ATTR_REUSE_FOLLOW_EVENT_DEPENDENCIES: (value type = int) + Allow ::cuMemAllocAsync to use memory asynchronously freed + in another stream as long as a stream ordering dependency + of the allocating stream on the free action exists. + Cuda events and null stream interactions can create the required + stream ordered dependencies. (default enabled) + - ::CU_MEMPOOL_ATTR_REUSE_ALLOW_OPPORTUNISTIC: (value type = int) + Allow reuse of already completed frees when there is no dependency + between the free and allocation. (default enabled) + - ::CU_MEMPOOL_ATTR_REUSE_ALLOW_INTERNAL_DEPENDENCIES: (value type = int) + Allow ::cuMemAllocAsync to insert new stream dependencies + in order to establish the stream ordering required to reuse + a piece of memory released by ::cuMemFreeAsync (default enabled). + - ::CU_MEMPOOL_ATTR_RESERVED_MEM_CURRENT: (value type = cuuint64_t) + Amount of backing memory currently allocated for the mempool + - ::CU_MEMPOOL_ATTR_RESERVED_MEM_HIGH: (value type = cuuint64_t) + High watermark of backing memory allocated for the mempool since the + last time it was reset. + - ::CU_MEMPOOL_ATTR_USED_MEM_CURRENT: (value type = cuuint64_t) + Amount of memory from the pool that is currently in use by the application. + - ::CU_MEMPOOL_ATTR_USED_MEM_HIGH: (value type = cuuint64_t) + High watermark of the amount of memory from the pool that was in use by the application. + + \param[in] pool - The memory pool to get attributes of + \param[in] attr - The attribute to get + \param[out] value - Retrieved value + + \returns + ::CUDA_SUCCESS, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_VALUE + + \sa ::cuMemAllocAsync, ::cuMemFreeAsync, ::cuDeviceGetDefaultMemPool, + ::cuDeviceGetMemPool, ::cuMemPoolCreate*/ + fn cuMemPoolGetAttribute( + pool: cuda_types::CUmemoryPool, + attr: cuda_types::CUmemPool_attribute, + value: *mut ::core::ffi::c_void, + ) -> cuda_types::CUresult; + /** \brief Controls visibility of pools between devices + + \param[in] pool - The pool being modified + \param[in] map - Array of access descriptors. Each descriptor instructs the access to enable for a single gpu. + \param[in] count - Number of descriptors in the map array. + + \returns + ::CUDA_SUCCESS, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_VALUE + + \sa ::cuMemAllocAsync, ::cuMemFreeAsync, ::cuDeviceGetDefaultMemPool, + ::cuDeviceGetMemPool, ::cuMemPoolCreate*/ + fn cuMemPoolSetAccess( + pool: cuda_types::CUmemoryPool, + map: *const cuda_types::CUmemAccessDesc, + count: usize, + ) -> cuda_types::CUresult; + /** \brief Returns the accessibility of a pool from a device + + Returns the accessibility of the pool's memory from the specified location. + + \param[out] flags - the accessibility of the pool from the specified location + \param[in] memPool - the pool being queried + \param[in] location - the location accessing the pool + + \sa ::cuMemAllocAsync, ::cuMemFreeAsync, ::cuDeviceGetDefaultMemPool, + ::cuDeviceGetMemPool, ::cuMemPoolCreate*/ + fn cuMemPoolGetAccess( + flags: *mut cuda_types::CUmemAccess_flags, + memPool: cuda_types::CUmemoryPool, + location: *mut cuda_types::CUmemLocation, + ) -> cuda_types::CUresult; + /** \brief Creates a memory pool + + Creates a CUDA memory pool and returns the handle in \p pool. The \p poolProps determines + the properties of the pool such as the backing device and IPC capabilities. + + To create a memory pool targeting a specific host NUMA node, applications must + set ::CUmemPoolProps::CUmemLocation::type to ::CU_MEM_LOCATION_TYPE_HOST_NUMA and + ::CUmemPoolProps::CUmemLocation::id must specify the NUMA ID of the host memory node. + By default, the pool's memory will be accessible from the device it is allocated on. + In the case of pools created with ::CU_MEM_LOCATION_TYPE_HOST_NUMA, their default accessibility + will be from the host CPU. + Applications can control the maximum size of the pool by specifying a non-zero value for ::CUmemPoolProps::maxSize. + If set to 0, the maximum size of the pool will default to a system dependent value. + + Applications can set ::CUmemPoolProps::handleTypes to ::CU_MEM_HANDLE_TYPE_FABRIC + in order to create ::CUmemoryPool suitable for sharing within an IMEX domain. + An IMEX domain is either an OS instance or a group of securely connected OS instances + using the NVIDIA IMEX daemon. An IMEX channel is a global resource within the IMEX domain + that represents a logical entity that aims to provide fine grained accessibility control + for the participating processes. When exporter and importer CUDA processes have been + granted access to the same IMEX channel, they can securely share memory. + If the allocating process does not have access setup for an IMEX channel, attempting to export + a ::CUmemoryPool with ::CU_MEM_HANDLE_TYPE_FABRIC will result in ::CUDA_ERROR_NOT_PERMITTED. + The nvidia-modprobe CLI provides more information regarding setting up of IMEX channels. + + \note Specifying CU_MEM_HANDLE_TYPE_NONE creates a memory pool that will not support IPC. + + \returns + ::CUDA_SUCCESS, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_OUT_OF_MEMORY, + ::CUDA_ERROR_NOT_PERMITTED + ::CUDA_ERROR_NOT_SUPPORTED + + \sa ::cuDeviceSetMemPool, ::cuDeviceGetMemPool, ::cuDeviceGetDefaultMemPool, + ::cuMemAllocFromPoolAsync, ::cuMemPoolExportToShareableHandle*/ + fn cuMemPoolCreate( + pool: *mut cuda_types::CUmemoryPool, + poolProps: *const cuda_types::CUmemPoolProps, + ) -> cuda_types::CUresult; + /** \brief Destroys the specified memory pool + + If any pointers obtained from this pool haven't been freed or + the pool has free operations that haven't completed + when ::cuMemPoolDestroy is invoked, the function will return immediately and the + resources associated with the pool will be released automatically + once there are no more outstanding allocations. + + Destroying the current mempool of a device sets the default mempool of + that device as the current mempool for that device. + + \note A device's default memory pool cannot be destroyed. + + \returns + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE + + \sa ::cuMemFreeAsync, ::cuDeviceSetMemPool, ::cuDeviceGetMemPool, + ::cuDeviceGetDefaultMemPool, ::cuMemPoolCreate*/ + fn cuMemPoolDestroy(pool: cuda_types::CUmemoryPool) -> cuda_types::CUresult; + /** \brief Allocates memory from a specified pool with stream ordered semantics. + + Inserts an allocation operation into \p hStream. + A pointer to the allocated memory is returned immediately in *dptr. + The allocation must not be accessed until the the allocation operation completes. + The allocation comes from the specified memory pool. + + \note + - The specified memory pool may be from a device different than that of the specified \p hStream. + + - Basic stream ordering allows future work submitted into the same stream to use the allocation. + Stream query, stream synchronize, and CUDA events can be used to guarantee that the allocation + operation completes before work submitted in a separate stream runs. + + \note During stream capture, this function results in the creation of an allocation node. In this case, + the allocation is owned by the graph instead of the memory pool. The memory pool's properties + are used to set the node's creation parameters. + + \param[out] dptr - Returned device pointer + \param[in] bytesize - Number of bytes to allocate + \param[in] pool - The pool to allocate from + \param[in] hStream - The stream establishing the stream ordering semantic + + \returns + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT (default stream specified with no current context), + ::CUDA_ERROR_NOT_SUPPORTED, + ::CUDA_ERROR_OUT_OF_MEMORY + + \sa ::cuMemAllocAsync, ::cuMemFreeAsync, ::cuDeviceGetDefaultMemPool, + ::cuDeviceGetMemPool, ::cuMemPoolCreate, ::cuMemPoolSetAccess, + ::cuMemPoolSetAttribute*/ + fn cuMemAllocFromPoolAsync_ptsz( + dptr: *mut cuda_types::CUdeviceptr, + bytesize: usize, + pool: cuda_types::CUmemoryPool, + hStream: cuda_types::CUstream, + ) -> cuda_types::CUresult; + /** \brief Exports a memory pool to the requested handle type. + + Given an IPC capable mempool, create an OS handle to share the pool with another process. + A recipient process can convert the shareable handle into a mempool with ::cuMemPoolImportFromShareableHandle. + Individual pointers can then be shared with the ::cuMemPoolExportPointer and ::cuMemPoolImportPointer APIs. + The implementation of what the shareable handle is and how it can be transferred is defined by the requested + handle type. + + \note: To create an IPC capable mempool, create a mempool with a CUmemAllocationHandleType other than CU_MEM_HANDLE_TYPE_NONE. + + \param[out] handle_out - Returned OS handle + \param[in] pool - pool to export + \param[in] handleType - the type of handle to create + \param[in] flags - must be 0 + + \returns + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_OUT_OF_MEMORY + + \sa ::cuMemPoolImportFromShareableHandle, ::cuMemPoolExportPointer, + ::cuMemPoolImportPointer, ::cuMemAllocAsync, ::cuMemFreeAsync, + ::cuDeviceGetDefaultMemPool, ::cuDeviceGetMemPool, ::cuMemPoolCreate, + ::cuMemPoolSetAccess, ::cuMemPoolSetAttribute*/ + fn cuMemPoolExportToShareableHandle( + handle_out: *mut ::core::ffi::c_void, + pool: cuda_types::CUmemoryPool, + handleType: cuda_types::CUmemAllocationHandleType, + flags: ::core::ffi::c_ulonglong, + ) -> cuda_types::CUresult; + /** \brief imports a memory pool from a shared handle. + + Specific allocations can be imported from the imported pool with cuMemPoolImportPointer. + + If \p handleType is ::CU_MEM_HANDLE_TYPE_FABRIC and the importer process has not been + granted access to the same IMEX channel as the exporter process, this API will error + as ::CUDA_ERROR_NOT_PERMITTED. + + + \note Imported memory pools do not support creating new allocations. + As such imported memory pools may not be used in cuDeviceSetMemPool + or ::cuMemAllocFromPoolAsync calls. + + \param[out] pool_out - Returned memory pool + \param[in] handle - OS handle of the pool to open + \param[in] handleType - The type of handle being imported + \param[in] flags - must be 0 + + \returns + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_OUT_OF_MEMORY + + \sa ::cuMemPoolExportToShareableHandle, ::cuMemPoolExportPointer, ::cuMemPoolImportPointer*/ + fn cuMemPoolImportFromShareableHandle( + pool_out: *mut cuda_types::CUmemoryPool, + handle: *mut ::core::ffi::c_void, + handleType: cuda_types::CUmemAllocationHandleType, + flags: ::core::ffi::c_ulonglong, + ) -> cuda_types::CUresult; + /** \brief Export data to share a memory pool allocation between processes. + + Constructs \p shareData_out for sharing a specific allocation from an already shared memory pool. + The recipient process can import the allocation with the ::cuMemPoolImportPointer api. + The data is not a handle and may be shared through any IPC mechanism. + + \param[out] shareData_out - Returned export data + \param[in] ptr - pointer to memory being exported + + \returns + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_OUT_OF_MEMORY + + \sa ::cuMemPoolExportToShareableHandle, ::cuMemPoolImportFromShareableHandle, ::cuMemPoolImportPointer*/ + fn cuMemPoolExportPointer( + shareData_out: *mut cuda_types::CUmemPoolPtrExportData, + ptr: cuda_types::CUdeviceptr, + ) -> cuda_types::CUresult; + /** \brief Import a memory pool allocation from another process. + + Returns in \p ptr_out a pointer to the imported memory. + The imported memory must not be accessed before the allocation operation completes + in the exporting process. The imported memory must be freed from all importing processes before + being freed in the exporting process. The pointer may be freed with cuMemFree + or cuMemFreeAsync. If cuMemFreeAsync is used, the free must be completed + on the importing process before the free operation on the exporting process. + + \note The cuMemFreeAsync api may be used in the exporting process before + the cuMemFreeAsync operation completes in its stream as long as the + cuMemFreeAsync in the exporting process specifies a stream with + a stream dependency on the importing process's cuMemFreeAsync. + + \param[out] ptr_out - pointer to imported memory + \param[in] pool - pool from which to import + \param[in] shareData - data specifying the memory to import + + \returns + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_OUT_OF_MEMORY + + \sa ::cuMemPoolExportToShareableHandle, ::cuMemPoolImportFromShareableHandle, ::cuMemPoolExportPointer*/ + fn cuMemPoolImportPointer( + ptr_out: *mut cuda_types::CUdeviceptr, + pool: cuda_types::CUmemoryPool, + shareData: *mut cuda_types::CUmemPoolPtrExportData, + ) -> cuda_types::CUresult; + /** \brief Create a generic allocation handle representing a multicast object described by the given properties. + + This creates a multicast object as described by \p prop. The number of + participating devices is specified by ::CUmulticastObjectProp::numDevices. + Devices can be added to the multicast object via ::cuMulticastAddDevice. + All participating devices must be added to the multicast object before memory + can be bound to it. Memory is bound to the multicast object via either + ::cuMulticastBindMem or ::cuMulticastBindAddr, and can be unbound via + ::cuMulticastUnbind. The total amount of memory that can be bound per device + is specified by :CUmulticastObjectProp::size. This size must be a multiple of + the value returned by ::cuMulticastGetGranularity with the flag + ::CU_MULTICAST_GRANULARITY_MINIMUM. For best performance however, the size + should be aligned to the value returned by ::cuMulticastGetGranularity with + the flag ::CU_MULTICAST_GRANULARITY_RECOMMENDED. + + After all participating devices have been added, multicast objects can also + be mapped to a device's virtual address space using the virtual memory + management APIs (see ::cuMemMap and ::cuMemSetAccess). Multicast objects can + also be shared with other processes by requesting a shareable handle via + ::cuMemExportToShareableHandle. Note that the desired types of shareable + handles must be specified in the bitmask ::CUmulticastObjectProp::handleTypes. + Multicast objects can be released using the virtual memory management API + ::cuMemRelease. + + \param[out] mcHandle Value of handle returned. + \param[in] prop Properties of the multicast object to create. + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_OUT_OF_MEMORY, + ::CUDA_ERROR_INVALID_DEVICE, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_PERMITTED, + ::CUDA_ERROR_NOT_SUPPORTED + + \sa ::cuMulticastAddDevice, ::cuMulticastBindMem, ::cuMulticastBindAddr, ::cuMulticastUnbind + \sa ::cuMemCreate, ::cuMemRelease, ::cuMemExportToShareableHandle, ::cuMemImportFromShareableHandle*/ + fn cuMulticastCreate( + mcHandle: *mut cuda_types::CUmemGenericAllocationHandle, + prop: *const cuda_types::CUmulticastObjectProp, + ) -> cuda_types::CUresult; + /** \brief Associate a device to a multicast object. + + Associates a device to a multicast object. The added device will be a part of + the multicast team of size specified by CUmulticastObjectProp::numDevices + during ::cuMulticastCreate. + The association of the device to the multicast object is permanent during + the life time of the multicast object. + All devices must be added to the multicast team before any memory can be + bound to any device in the team. Any calls to ::cuMulticastBindMem or + ::cuMulticastBindAddr will block until all devices have been added. + Similarly all devices must be added to the multicast team before a virtual + address range can be mapped to the multicast object. A call to ::cuMemMap + will block until all devices have been added. + + \param[in] mcHandle Handle representing a multicast object. + \param[in] dev Device that will be associated to the multicast + object. + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_OUT_OF_MEMORY, + ::CUDA_ERROR_INVALID_DEVICE, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_PERMITTED, + ::CUDA_ERROR_NOT_SUPPORTED + + \sa ::cuMulticastCreate, ::cuMulticastBindMem, ::cuMulticastBindAddr*/ + fn cuMulticastAddDevice( + mcHandle: cuda_types::CUmemGenericAllocationHandle, + dev: cuda_types::CUdevice, + ) -> cuda_types::CUresult; + /** \brief Bind a memory allocation represented by a handle to a multicast object. + + Binds a memory allocation specified by \p memHandle and created via + ::cuMemCreate to a multicast object represented by \p mcHandle and created + via ::cuMulticastCreate. The intended \p size of the bind, the offset in the + multicast range \p mcOffset as well as the offset in the memory \p memOffset + must be a multiple of the value returned by ::cuMulticastGetGranularity with + the flag ::CU_MULTICAST_GRANULARITY_MINIMUM. For best performance however, + \p size, \p mcOffset and \p memOffset should be aligned to the granularity of + the memory allocation(see ::cuMemGetAllocationGranularity) or to the value + returned by ::cuMulticastGetGranularity with the flag + ::CU_MULTICAST_GRANULARITY_RECOMMENDED. + + The \p size + \p memOffset must be smaller than the size of the allocated + memory. Similarly the \p size + \p mcOffset must be smaller than the size + of the multicast object. + The memory allocation must have beeen created on one of the devices + that was added to the multicast team via ::cuMulticastAddDevice. + Externally shareable as well as imported multicast objects can be bound only + to externally shareable memory. + Note that this call will return CUDA_ERROR_OUT_OF_MEMORY if there are + insufficient resources required to perform the bind. This call may also + return CUDA_ERROR_SYSTEM_NOT_READY if the necessary system software is not + initialized or running. + + \param[in] mcHandle Handle representing a multicast object. + \param[in] mcOffset Offset into the multicast object for attachment. + \param[in] memHandle Handle representing a memory allocation. + \param[in] memOffset Offset into the memory for attachment. + \param[in] size Size of the memory that will be bound to the + multicast object. + \param[in] flags Flags for future use, must be zero for now. + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_DEVICE, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_PERMITTED, + ::CUDA_ERROR_NOT_SUPPORTED, + ::CUDA_ERROR_OUT_OF_MEMORY, + ::CUDA_ERROR_SYSTEM_NOT_READY + + \sa ::cuMulticastCreate, ::cuMulticastAddDevice, ::cuMemCreate*/ + fn cuMulticastBindMem( + mcHandle: cuda_types::CUmemGenericAllocationHandle, + mcOffset: usize, + memHandle: cuda_types::CUmemGenericAllocationHandle, + memOffset: usize, + size: usize, + flags: ::core::ffi::c_ulonglong, + ) -> cuda_types::CUresult; + /** \brief Bind a memory allocation represented by a virtual address to a multicast object. + + Binds a memory allocation specified by its mapped address \p memptr to a + multicast object represented by \p mcHandle. + The memory must have been allocated via ::cuMemCreate or ::cudaMallocAsync. + The intended \p size of the bind, the offset in the multicast range + \p mcOffset and \p memptr must be a multiple of the value returned by + ::cuMulticastGetGranularity with the flag ::CU_MULTICAST_GRANULARITY_MINIMUM. + For best performance however, \p size, \p mcOffset and \p memptr should be + aligned to the value returned by ::cuMulticastGetGranularity with the flag + ::CU_MULTICAST_GRANULARITY_RECOMMENDED. + + The \p size must be smaller than the size of the allocated memory. + Similarly the \p size + \p mcOffset must be smaller than the total size + of the multicast object. + The memory allocation must have beeen created on one of the devices + that was added to the multicast team via ::cuMulticastAddDevice. + Externally shareable as well as imported multicast objects can be bound only + to externally shareable memory. + Note that this call will return CUDA_ERROR_OUT_OF_MEMORY if there are + insufficient resources required to perform the bind. This call may also + return CUDA_ERROR_SYSTEM_NOT_READY if the necessary system software is not + initialized or running. + + \param[in] mcHandle Handle representing a multicast object. + \param[in] mcOffset Offset into multicast va range for attachment. + \param[in] memptr Virtual address of the memory allocation. + \param[in] size Size of memory that will be bound to the + multicast object. + \param[in] flags Flags for future use, must be zero now. + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_DEVICE, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_PERMITTED, + ::CUDA_ERROR_NOT_SUPPORTED, + ::CUDA_ERROR_OUT_OF_MEMORY, + ::CUDA_ERROR_SYSTEM_NOT_READY + + \sa ::cuMulticastCreate, ::cuMulticastAddDevice, ::cuMemCreate*/ + fn cuMulticastBindAddr( + mcHandle: cuda_types::CUmemGenericAllocationHandle, + mcOffset: usize, + memptr: cuda_types::CUdeviceptr, + size: usize, + flags: ::core::ffi::c_ulonglong, + ) -> cuda_types::CUresult; + /** \brief Unbind any memory allocations bound to a multicast object at a given offset and upto a given size. + + Unbinds any memory allocations hosted on \p dev and bound to a multicast + object at \p mcOffset and upto a given \p size. + The intended \p size of the unbind and the offset in the multicast range + ( \p mcOffset ) must be a multiple of the value returned by + ::cuMulticastGetGranularity flag ::CU_MULTICAST_GRANULARITY_MINIMUM. + The \p size + \p mcOffset must be smaller than the total size of the + multicast object. + + \note + Warning: + The \p mcOffset and the \p size must match the corresponding values specified + during the bind call. Any other values may result in undefined behavior. + + \param[in] mcHandle Handle representing a multicast object. + \param[in] dev Device that hosts the memory allocation. + \param[in] mcOffset Offset into the multicast object. + \param[in] size Desired size to unbind. + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_DEVICE, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_PERMITTED, + ::CUDA_ERROR_NOT_SUPPORTED + + \sa ::cuMulticastBindMem, ::cuMulticastBindAddr*/ + fn cuMulticastUnbind( + mcHandle: cuda_types::CUmemGenericAllocationHandle, + dev: cuda_types::CUdevice, + mcOffset: usize, + size: usize, + ) -> cuda_types::CUresult; + /** \brief Calculates either the minimal or recommended granularity for multicast object + + Calculates either the minimal or recommended granularity for a given set of + multicast object properties and returns it in granularity. This granularity + can be used as a multiple for size, bind offsets and address mappings of the + multicast object. + + \param[out] granularity Returned granularity. + \param[in] prop Properties of the multicast object. + \param[in] option Determines which granularity to return. + + \returns + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_PERMITTED, + ::CUDA_ERROR_NOT_SUPPORTED + + \sa ::cuMulticastCreate, ::cuMulticastBindMem, ::cuMulticastBindAddr, ::cuMulticastUnbind*/ + fn cuMulticastGetGranularity( + granularity: *mut usize, + prop: *const cuda_types::CUmulticastObjectProp, + option: cuda_types::CUmulticastGranularity_flags, + ) -> cuda_types::CUresult; + /** \brief Returns information about a pointer + + The supported attributes are: + + - ::CU_POINTER_ATTRIBUTE_CONTEXT: + + Returns in \p *data the ::CUcontext in which \p ptr was allocated or + registered. + The type of \p data must be ::CUcontext *. + + If \p ptr was not allocated by, mapped by, or registered with + a ::CUcontext which uses unified virtual addressing then + ::CUDA_ERROR_INVALID_VALUE is returned. + + - ::CU_POINTER_ATTRIBUTE_MEMORY_TYPE: + + Returns in \p *data the physical memory type of the memory that + \p ptr addresses as a ::CUmemorytype enumerated value. + The type of \p data must be unsigned int. + + If \p ptr addresses device memory then \p *data is set to + ::CU_MEMORYTYPE_DEVICE. The particular ::CUdevice on which the + memory resides is the ::CUdevice of the ::CUcontext returned by the + ::CU_POINTER_ATTRIBUTE_CONTEXT attribute of \p ptr. + + If \p ptr addresses host memory then \p *data is set to + ::CU_MEMORYTYPE_HOST. + + If \p ptr was not allocated by, mapped by, or registered with + a ::CUcontext which uses unified virtual addressing then + ::CUDA_ERROR_INVALID_VALUE is returned. + + If the current ::CUcontext does not support unified virtual + addressing then ::CUDA_ERROR_INVALID_CONTEXT is returned. + + - ::CU_POINTER_ATTRIBUTE_DEVICE_POINTER: + + Returns in \p *data the device pointer value through which + \p ptr may be accessed by kernels running in the current + ::CUcontext. + The type of \p data must be CUdeviceptr *. + + If there exists no device pointer value through which + kernels running in the current ::CUcontext may access + \p ptr then ::CUDA_ERROR_INVALID_VALUE is returned. + + If there is no current ::CUcontext then + ::CUDA_ERROR_INVALID_CONTEXT is returned. + + Except in the exceptional disjoint addressing cases discussed + below, the value returned in \p *data will equal the input + value \p ptr. + + - ::CU_POINTER_ATTRIBUTE_HOST_POINTER: + + Returns in \p *data the host pointer value through which + \p ptr may be accessed by by the host program. + The type of \p data must be void **. + If there exists no host pointer value through which + the host program may directly access \p ptr then + ::CUDA_ERROR_INVALID_VALUE is returned. + + Except in the exceptional disjoint addressing cases discussed + below, the value returned in \p *data will equal the input + value \p ptr. + + - ::CU_POINTER_ATTRIBUTE_P2P_TOKENS: + + Returns in \p *data two tokens for use with the nv-p2p.h Linux + kernel interface. \p data must be a struct of type + CUDA_POINTER_ATTRIBUTE_P2P_TOKENS. + + \p ptr must be a pointer to memory obtained from :cuMemAlloc(). + Note that p2pToken and vaSpaceToken are only valid for the + lifetime of the source allocation. A subsequent allocation at + the same address may return completely different tokens. + Querying this attribute has a side effect of setting the attribute + ::CU_POINTER_ATTRIBUTE_SYNC_MEMOPS for the region of memory that + \p ptr points to. + + - ::CU_POINTER_ATTRIBUTE_SYNC_MEMOPS: + + A boolean attribute which when set, ensures that synchronous memory operations + initiated on the region of memory that \p ptr points to will always synchronize. + See further documentation in the section titled "API synchronization behavior" + to learn more about cases when synchronous memory operations can + exhibit asynchronous behavior. + + - ::CU_POINTER_ATTRIBUTE_BUFFER_ID: + + Returns in \p *data a buffer ID which is guaranteed to be unique within the process. + \p data must point to an unsigned long long. + + \p ptr must be a pointer to memory obtained from a CUDA memory allocation API. + Every memory allocation from any of the CUDA memory allocation APIs will + have a unique ID over a process lifetime. Subsequent allocations do not reuse IDs + from previous freed allocations. IDs are only unique within a single process. + + + - ::CU_POINTER_ATTRIBUTE_IS_MANAGED: + + Returns in \p *data a boolean that indicates whether the pointer points to + managed memory or not. + + If \p ptr is not a valid CUDA pointer then ::CUDA_ERROR_INVALID_VALUE is returned. + + - ::CU_POINTER_ATTRIBUTE_DEVICE_ORDINAL: + + Returns in \p *data an integer representing a device ordinal of a device against + which the memory was allocated or registered. + + - ::CU_POINTER_ATTRIBUTE_IS_LEGACY_CUDA_IPC_CAPABLE: + + Returns in \p *data a boolean that indicates if this pointer maps to + an allocation that is suitable for ::cudaIpcGetMemHandle. + + - ::CU_POINTER_ATTRIBUTE_RANGE_START_ADDR: + + Returns in \p *data the starting address for the allocation referenced + by the device pointer \p ptr. Note that this is not necessarily the + address of the mapped region, but the address of the mappable address + range \p ptr references (e.g. from ::cuMemAddressReserve). + + - ::CU_POINTER_ATTRIBUTE_RANGE_SIZE: + + Returns in \p *data the size for the allocation referenced by the device + pointer \p ptr. Note that this is not necessarily the size of the mapped + region, but the size of the mappable address range \p ptr references + (e.g. from ::cuMemAddressReserve). To retrieve the size of the mapped + region, see ::cuMemGetAddressRange + + - ::CU_POINTER_ATTRIBUTE_MAPPED: + + Returns in \p *data a boolean that indicates if this pointer is in a + valid address range that is mapped to a backing allocation. + + - ::CU_POINTER_ATTRIBUTE_ALLOWED_HANDLE_TYPES: + + Returns a bitmask of the allowed handle types for an allocation that may + be passed to ::cuMemExportToShareableHandle. + + - ::CU_POINTER_ATTRIBUTE_MEMPOOL_HANDLE: + + Returns in \p *data the handle to the mempool that the allocation was obtained from. + + \par + + Note that for most allocations in the unified virtual address space + the host and device pointer for accessing the allocation will be the + same. The exceptions to this are + - user memory registered using ::cuMemHostRegister + - host memory allocated using ::cuMemHostAlloc with the + ::CU_MEMHOSTALLOC_WRITECOMBINED flag + For these types of allocation there will exist separate, disjoint host + and device addresses for accessing the allocation. In particular + - The host address will correspond to an invalid unmapped device address + (which will result in an exception if accessed from the device) + - The device address will correspond to an invalid unmapped host address + (which will result in an exception if accessed from the host). + For these types of allocations, querying ::CU_POINTER_ATTRIBUTE_HOST_POINTER + and ::CU_POINTER_ATTRIBUTE_DEVICE_POINTER may be used to retrieve the host + and device addresses from either address. + + \param data - Returned pointer attribute value + \param attribute - Pointer attribute to query + \param ptr - Pointer + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_DEVICE + \notefnerr + + \sa + ::cuPointerSetAttribute, + ::cuMemAlloc, + ::cuMemFree, + ::cuMemAllocHost, + ::cuMemFreeHost, + ::cuMemHostAlloc, + ::cuMemHostRegister, + ::cuMemHostUnregister, + ::cudaPointerGetAttributes*/ + fn cuPointerGetAttribute( + data: *mut ::core::ffi::c_void, + attribute: cuda_types::CUpointer_attribute, + ptr: cuda_types::CUdeviceptr, + ) -> cuda_types::CUresult; + /** \brief Prefetches memory to the specified destination device + + Note there is a later version of this API, ::cuMemPrefetchAsync_v2. It will + supplant this version in 13.0, which is retained for minor version compatibility. + + Prefetches memory to the specified destination device. \p devPtr is the + base device pointer of the memory to be prefetched and \p dstDevice is the + destination device. \p count specifies the number of bytes to copy. \p hStream + is the stream in which the operation is enqueued. The memory range must refer + to managed memory allocated via ::cuMemAllocManaged or declared via __managed__ variables. + + Passing in CU_DEVICE_CPU for \p dstDevice will prefetch the data to host memory. If + \p dstDevice is a GPU, then the device attribute ::CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS + must be non-zero. Additionally, \p hStream must be associated with a device that has a + non-zero value for the device attribute ::CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS. + + The start address and end address of the memory range will be rounded down and rounded up + respectively to be aligned to CPU page size before the prefetch operation is enqueued + in the stream. + + If no physical memory has been allocated for this region, then this memory region + will be populated and mapped on the destination device. If there's insufficient + memory to prefetch the desired region, the Unified Memory driver may evict pages from other + ::cuMemAllocManaged allocations to host memory in order to make room. Device memory + allocated using ::cuMemAlloc or ::cuArrayCreate will not be evicted. + + By default, any mappings to the previous location of the migrated pages are removed and + mappings for the new location are only setup on \p dstDevice. The exact behavior however + also depends on the settings applied to this memory range via ::cuMemAdvise as described + below: + + If ::CU_MEM_ADVISE_SET_READ_MOSTLY was set on any subset of this memory range, + then that subset will create a read-only copy of the pages on \p dstDevice. + + If ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION was called on any subset of this memory + range, then the pages will be migrated to \p dstDevice even if \p dstDevice is not the + preferred location of any pages in the memory range. + + If ::CU_MEM_ADVISE_SET_ACCESSED_BY was called on any subset of this memory range, + then mappings to those pages from all the appropriate processors are updated to + refer to the new location if establishing such a mapping is possible. Otherwise, + those mappings are cleared. + + Note that this API is not required for functionality and only serves to improve performance + by allowing the application to migrate data to a suitable location before it is accessed. + Memory accesses to this range are always coherent and are allowed even when the data is + actively being migrated. + + Note that this function is asynchronous with respect to the host and all work + on other devices. + + \param devPtr - Pointer to be prefetched + \param count - Size in bytes + \param dstDevice - Destination device to prefetch to + \param hStream - Stream to enqueue prefetch operation + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_DEVICE + \notefnerr + \note_async + \note_null_stream + + \sa ::cuMemcpy, ::cuMemcpyPeer, ::cuMemcpyAsync, + ::cuMemcpy3DPeerAsync, ::cuMemAdvise, ::cuMemPrefetchAsync + ::cudaMemPrefetchAsync_v2*/ + fn cuMemPrefetchAsync_ptsz( + devPtr: cuda_types::CUdeviceptr, + count: usize, + dstDevice: cuda_types::CUdevice, + hStream: cuda_types::CUstream, + ) -> cuda_types::CUresult; + /** \brief Prefetches memory to the specified destination location + + Prefetches memory to the specified destination location. \p devPtr is the + base device pointer of the memory to be prefetched and \p location specifies the + destination location. \p count specifies the number of bytes to copy. \p hStream + is the stream in which the operation is enqueued. The memory range must refer + to managed memory allocated via ::cuMemAllocManaged or declared via __managed__ variables. + + Specifying ::CU_MEM_LOCATION_TYPE_DEVICE for ::CUmemLocation::type will prefetch memory to GPU + specified by device ordinal ::CUmemLocation::id which must have non-zero value for the device attribute + ::CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS. Additionally, \p hStream must be associated with a device + that has a non-zero value for the device attribute ::CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS. + Specifying ::CU_MEM_LOCATION_TYPE_HOST as ::CUmemLocation::type will prefetch data to host memory. + Applications can request prefetching memory to a specific host NUMA node by specifying + ::CU_MEM_LOCATION_TYPE_HOST_NUMA for ::CUmemLocation::type and a valid host NUMA node id in ::CUmemLocation::id + Users can also request prefetching memory to the host NUMA node closest to the current thread's CPU by specifying + ::CU_MEM_LOCATION_TYPE_HOST_NUMA_CURRENT for ::CUmemLocation::type. Note when ::CUmemLocation::type is etiher + ::CU_MEM_LOCATION_TYPE_HOST OR ::CU_MEM_LOCATION_TYPE_HOST_NUMA_CURRENT, ::CUmemLocation::id will be ignored. + + The start address and end address of the memory range will be rounded down and rounded up + respectively to be aligned to CPU page size before the prefetch operation is enqueued + in the stream. + + If no physical memory has been allocated for this region, then this memory region + will be populated and mapped on the destination device. If there's insufficient + memory to prefetch the desired region, the Unified Memory driver may evict pages from other + ::cuMemAllocManaged allocations to host memory in order to make room. Device memory + allocated using ::cuMemAlloc or ::cuArrayCreate will not be evicted. + + By default, any mappings to the previous location of the migrated pages are removed and + mappings for the new location are only setup on the destination location. The exact behavior however + also depends on the settings applied to this memory range via ::cuMemAdvise as described + below: + + If ::CU_MEM_ADVISE_SET_READ_MOSTLY was set on any subset of this memory range, + then that subset will create a read-only copy of the pages on destination location. + If however the destination location is a host NUMA node, then any pages of that subset + that are already in another host NUMA node will be transferred to the destination. + + If ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION was called on any subset of this memory + range, then the pages will be migrated to \p location even if \p location is not the + preferred location of any pages in the memory range. + + If ::CU_MEM_ADVISE_SET_ACCESSED_BY was called on any subset of this memory range, + then mappings to those pages from all the appropriate processors are updated to + refer to the new location if establishing such a mapping is possible. Otherwise, + those mappings are cleared. + + Note that this API is not required for functionality and only serves to improve performance + by allowing the application to migrate data to a suitable location before it is accessed. + Memory accesses to this range are always coherent and are allowed even when the data is + actively being migrated. + + Note that this function is asynchronous with respect to the host and all work + on other devices. + + \param devPtr - Pointer to be prefetched + \param count - Size in bytes + \param dstDevice - Destination device to prefetch to + \param flags - flags for future use, must be zero now. + \param hStream - Stream to enqueue prefetch operation + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_DEVICE + \notefnerr + \note_async + \note_null_stream + + \sa ::cuMemcpy, ::cuMemcpyPeer, ::cuMemcpyAsync, + ::cuMemcpy3DPeerAsync, ::cuMemAdvise, ::cuMemPrefetchAsync + ::cudaMemPrefetchAsync_v2*/ + fn cuMemPrefetchAsync_v2_ptsz( + devPtr: cuda_types::CUdeviceptr, + count: usize, + location: cuda_types::CUmemLocation, + flags: ::core::ffi::c_uint, + hStream: cuda_types::CUstream, + ) -> cuda_types::CUresult; + /** \brief Advise about the usage of a given memory range + + Note there is a later version of this API, ::cuMemAdvise_v2. It will + supplant this version in 13.0, which is retained for minor version compatibility. + + Advise the Unified Memory subsystem about the usage pattern for the memory range + starting at \p devPtr with a size of \p count bytes. The start address and end address of the memory + range will be rounded down and rounded up respectively to be aligned to CPU page size before the + advice is applied. The memory range must refer to managed memory allocated via ::cuMemAllocManaged + or declared via __managed__ variables. The memory range could also refer to system-allocated pageable + memory provided it represents a valid, host-accessible region of memory and all additional constraints + imposed by \p advice as outlined below are also satisfied. Specifying an invalid system-allocated pageable + memory range results in an error being returned. + + The \p advice parameter can take the following values: + - ::CU_MEM_ADVISE_SET_READ_MOSTLY: This implies that the data is mostly going to be read + from and only occasionally written to. Any read accesses from any processor to this region will create a + read-only copy of at least the accessed pages in that processor's memory. Additionally, if ::cuMemPrefetchAsync + is called on this region, it will create a read-only copy of the data on the destination processor. + If any processor writes to this region, all copies of the corresponding page will be invalidated + except for the one where the write occurred. The \p device argument is ignored for this advice. + Note that for a page to be read-duplicated, the accessing processor must either be the CPU or a GPU + that has a non-zero value for the device attribute ::CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS. + Also, if a context is created on a device that does not have the device attribute + ::CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS set, then read-duplication will not occur until + all such contexts are destroyed. + If the memory region refers to valid system-allocated pageable memory, then the accessing device must + have a non-zero value for the device attribute ::CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS for a read-only + copy to be created on that device. Note however that if the accessing device also has a non-zero value for the + device attribute ::CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES, then setting this advice + will not create a read-only copy when that device accesses this memory region. + + - ::CU_MEM_ADVISE_UNSET_READ_MOSTLY: Undoes the effect of ::CU_MEM_ADVISE_SET_READ_MOSTLY and also prevents the + Unified Memory driver from attempting heuristic read-duplication on the memory range. Any read-duplicated + copies of the data will be collapsed into a single copy. The location for the collapsed + copy will be the preferred location if the page has a preferred location and one of the read-duplicated + copies was resident at that location. Otherwise, the location chosen is arbitrary. + + - ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION: This advice sets the preferred location for the + data to be the memory belonging to \p device. Passing in CU_DEVICE_CPU for \p device sets the + preferred location as host memory. If \p device is a GPU, then it must have a non-zero value for the + device attribute ::CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS. Setting the preferred location + does not cause data to migrate to that location immediately. Instead, it guides the migration policy + when a fault occurs on that memory region. If the data is already in its preferred location and the + faulting processor can establish a mapping without requiring the data to be migrated, then + data migration will be avoided. On the other hand, if the data is not in its preferred location + or if a direct mapping cannot be established, then it will be migrated to the processor accessing + it. It is important to note that setting the preferred location does not prevent data prefetching + done using ::cuMemPrefetchAsync. + Having a preferred location can override the page thrash detection and resolution logic in the Unified + Memory driver. Normally, if a page is detected to be constantly thrashing between for example host and device + memory, the page may eventually be pinned to host memory by the Unified Memory driver. But + if the preferred location is set as device memory, then the page will continue to thrash indefinitely. + If ::CU_MEM_ADVISE_SET_READ_MOSTLY is also set on this memory region or any subset of it, then the + policies associated with that advice will override the policies of this advice, unless read accesses from + \p device will not result in a read-only copy being created on that device as outlined in description for + the advice ::CU_MEM_ADVISE_SET_READ_MOSTLY. + If the memory region refers to valid system-allocated pageable memory, then \p device must have a non-zero + value for the device attribute ::CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS. + + - ::CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION: Undoes the effect of ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION + and changes the preferred location to none. + + - ::CU_MEM_ADVISE_SET_ACCESSED_BY: This advice implies that the data will be accessed by \p device. + Passing in ::CU_DEVICE_CPU for \p device will set the advice for the CPU. If \p device is a GPU, then + the device attribute ::CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS must be non-zero. + This advice does not cause data migration and has no impact on the location of the data per se. Instead, + it causes the data to always be mapped in the specified processor's page tables, as long as the + location of the data permits a mapping to be established. If the data gets migrated for any reason, + the mappings are updated accordingly. + This advice is recommended in scenarios where data locality is not important, but avoiding faults is. + Consider for example a system containing multiple GPUs with peer-to-peer access enabled, where the + data located on one GPU is occasionally accessed by peer GPUs. In such scenarios, migrating data + over to the other GPUs is not as important because the accesses are infrequent and the overhead of + migration may be too high. But preventing faults can still help improve performance, and so having + a mapping set up in advance is useful. Note that on CPU access of this data, the data may be migrated + to host memory because the CPU typically cannot access device memory directly. Any GPU that had the + ::CU_MEM_ADVISE_SET_ACCESSED_BY flag set for this data will now have its mapping updated to point to the + page in host memory. + If ::CU_MEM_ADVISE_SET_READ_MOSTLY is also set on this memory region or any subset of it, then the + policies associated with that advice will override the policies of this advice. Additionally, if the + preferred location of this memory region or any subset of it is also \p device, then the policies + associated with ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION will override the policies of this advice. + If the memory region refers to valid system-allocated pageable memory, then \p device must have a non-zero + value for the device attribute ::CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS. Additionally, if \p device has + a non-zero value for the device attribute ::CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES, + then this call has no effect. + + - ::CU_MEM_ADVISE_UNSET_ACCESSED_BY: Undoes the effect of ::CU_MEM_ADVISE_SET_ACCESSED_BY. Any mappings to + the data from \p device may be removed at any time causing accesses to result in non-fatal page faults. + If the memory region refers to valid system-allocated pageable memory, then \p device must have a non-zero + value for the device attribute ::CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS. Additionally, if \p device has + a non-zero value for the device attribute ::CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES, + then this call has no effect. + + \param devPtr - Pointer to memory to set the advice for + \param count - Size in bytes of the memory range + \param advice - Advice to be applied for the specified memory range + \param device - Device to apply the advice for + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_DEVICE + \notefnerr + \note_async + \note_null_stream + + \sa ::cuMemcpy, ::cuMemcpyPeer, ::cuMemcpyAsync, + ::cuMemcpy3DPeerAsync, ::cuMemPrefetchAsync, ::cuMemAdvise_v2 + ::cudaMemAdvise*/ + fn cuMemAdvise( + devPtr: cuda_types::CUdeviceptr, + count: usize, + advice: cuda_types::CUmem_advise, + device: cuda_types::CUdevice, + ) -> cuda_types::CUresult; + /** \brief Advise about the usage of a given memory range + + Advise the Unified Memory subsystem about the usage pattern for the memory range + starting at \p devPtr with a size of \p count bytes. The start address and end address of the memory + range will be rounded down and rounded up respectively to be aligned to CPU page size before the + advice is applied. The memory range must refer to managed memory allocated via ::cuMemAllocManaged + or declared via __managed__ variables. The memory range could also refer to system-allocated pageable + memory provided it represents a valid, host-accessible region of memory and all additional constraints + imposed by \p advice as outlined below are also satisfied. Specifying an invalid system-allocated pageable + memory range results in an error being returned. + + The \p advice parameter can take the following values: + - ::CU_MEM_ADVISE_SET_READ_MOSTLY: This implies that the data is mostly going to be read + from and only occasionally written to. Any read accesses from any processor to this region will create a + read-only copy of at least the accessed pages in that processor's memory. Additionally, if ::cuMemPrefetchAsync + or ::cuMemPrefetchAsync_v2 is called on this region, it will create a read-only copy of the data on the destination processor. + If the target location for ::cuMemPrefetchAsync_v2 is a host NUMA node and a read-only copy already exists on + another host NUMA node, that copy will be migrated to the targeted host NUMA node. + If any processor writes to this region, all copies of the corresponding page will be invalidated + except for the one where the write occurred. If the writing processor is the CPU and the preferred location of + the page is a host NUMA node, then the page will also be migrated to that host NUMA node. The \p location argument is ignored for this advice. + Note that for a page to be read-duplicated, the accessing processor must either be the CPU or a GPU + that has a non-zero value for the device attribute ::CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS. + Also, if a context is created on a device that does not have the device attribute + ::CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS set, then read-duplication will not occur until + all such contexts are destroyed. + If the memory region refers to valid system-allocated pageable memory, then the accessing device must + have a non-zero value for the device attribute ::CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS for a read-only + copy to be created on that device. Note however that if the accessing device also has a non-zero value for the + device attribute ::CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES, then setting this advice + will not create a read-only copy when that device accesses this memory region. + + - ::CU_MEM_ADVISE_UNSET_READ_MOSTLY: Undoes the effect of ::CU_MEM_ADVISE_SET_READ_MOSTLY and also prevents the + Unified Memory driver from attempting heuristic read-duplication on the memory range. Any read-duplicated + copies of the data will be collapsed into a single copy. The location for the collapsed + copy will be the preferred location if the page has a preferred location and one of the read-duplicated + copies was resident at that location. Otherwise, the location chosen is arbitrary. + Note: The \p location argument is ignored for this advice. + + - ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION: This advice sets the preferred location for the + data to be the memory belonging to \p location. When ::CUmemLocation::type is ::CU_MEM_LOCATION_TYPE_HOST, + ::CUmemLocation::id is ignored and the preferred location is set to be host memory. To set the preferred location + to a specific host NUMA node, applications must set ::CUmemLocation::type to ::CU_MEM_LOCATION_TYPE_HOST_NUMA and + ::CUmemLocation::id must specify the NUMA ID of the host NUMA node. If ::CUmemLocation::type is set to ::CU_MEM_LOCATION_TYPE_HOST_NUMA_CURRENT, + ::CUmemLocation::id will be ignored and the the host NUMA node closest to the calling thread's CPU will be used as the preferred location. + If ::CUmemLocation::type is a ::CU_MEM_LOCATION_TYPE_DEVICE, then ::CUmemLocation::id must be a valid device ordinal + and the device must have a non-zero value for the device attribute ::CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS. + Setting the preferred location does not cause data to migrate to that location immediately. Instead, it guides the migration policy + when a fault occurs on that memory region. If the data is already in its preferred location and the + faulting processor can establish a mapping without requiring the data to be migrated, then + data migration will be avoided. On the other hand, if the data is not in its preferred location + or if a direct mapping cannot be established, then it will be migrated to the processor accessing + it. It is important to note that setting the preferred location does not prevent data prefetching + done using ::cuMemPrefetchAsync. + Having a preferred location can override the page thrash detection and resolution logic in the Unified + Memory driver. Normally, if a page is detected to be constantly thrashing between for example host and device + memory, the page may eventually be pinned to host memory by the Unified Memory driver. But + if the preferred location is set as device memory, then the page will continue to thrash indefinitely. + If ::CU_MEM_ADVISE_SET_READ_MOSTLY is also set on this memory region or any subset of it, then the + policies associated with that advice will override the policies of this advice, unless read accesses from + \p location will not result in a read-only copy being created on that procesor as outlined in description for + the advice ::CU_MEM_ADVISE_SET_READ_MOSTLY. + If the memory region refers to valid system-allocated pageable memory, and ::CUmemLocation::type is CU_MEM_LOCATION_TYPE_DEVICE + then ::CUmemLocation::id must be a valid device that has a non-zero alue for the device attribute ::CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS. + + - ::CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION: Undoes the effect of ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION + and changes the preferred location to none. The \p location argument is ignored for this advice. + + - ::CU_MEM_ADVISE_SET_ACCESSED_BY: This advice implies that the data will be accessed by processor \p location. + The ::CUmemLocation::type must be either ::CU_MEM_LOCATION_TYPE_DEVICE with ::CUmemLocation::id representing a valid device + ordinal or ::CU_MEM_LOCATION_TYPE_HOST and ::CUmemLocation::id will be ignored. All other location types are invalid. + If ::CUmemLocation::id is a GPU, then the device attribute ::CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS must be non-zero. + This advice does not cause data migration and has no impact on the location of the data per se. Instead, + it causes the data to always be mapped in the specified processor's page tables, as long as the + location of the data permits a mapping to be established. If the data gets migrated for any reason, + the mappings are updated accordingly. + This advice is recommended in scenarios where data locality is not important, but avoiding faults is. + Consider for example a system containing multiple GPUs with peer-to-peer access enabled, where the + data located on one GPU is occasionally accessed by peer GPUs. In such scenarios, migrating data + over to the other GPUs is not as important because the accesses are infrequent and the overhead of + migration may be too high. But preventing faults can still help improve performance, and so having + a mapping set up in advance is useful. Note that on CPU access of this data, the data may be migrated + to host memory because the CPU typically cannot access device memory directly. Any GPU that had the + ::CU_MEM_ADVISE_SET_ACCESSED_BY flag set for this data will now have its mapping updated to point to the + page in host memory. + If ::CU_MEM_ADVISE_SET_READ_MOSTLY is also set on this memory region or any subset of it, then the + policies associated with that advice will override the policies of this advice. Additionally, if the + preferred location of this memory region or any subset of it is also \p location, then the policies + associated with ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION will override the policies of this advice. + If the memory region refers to valid system-allocated pageable memory, and ::CUmemLocation::type is ::CU_MEM_LOCATION_TYPE_DEVICE + then device in ::CUmemLocation::id must have a non-zero value for the device attribute ::CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS. + Additionally, if ::CUmemLocation::id has a non-zero value for the device attribute ::CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES, + then this call has no effect. + + - ::CU_MEM_ADVISE_UNSET_ACCESSED_BY: Undoes the effect of ::CU_MEM_ADVISE_SET_ACCESSED_BY. Any mappings to + the data from \p location may be removed at any time causing accesses to result in non-fatal page faults. + If the memory region refers to valid system-allocated pageable memory, and ::CUmemLocation::type is ::CU_MEM_LOCATION_TYPE_DEVICE + then device in ::CUmemLocation::id must have a non-zero value for the device attribute ::CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS. + Additionally, if ::CUmemLocation::id has a non-zero value for the device attribute ::CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES, + then this call has no effect. + + \param devPtr - Pointer to memory to set the advice for + \param count - Size in bytes of the memory range + \param advice - Advice to be applied for the specified memory range + \param location - location to apply the advice for + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_DEVICE + \notefnerr + \note_async + \note_null_stream + + \sa ::cuMemcpy, ::cuMemcpyPeer, ::cuMemcpyAsync, + ::cuMemcpy3DPeerAsync, ::cuMemPrefetchAsync, ::cuMemAdvise + ::cudaMemAdvise*/ + fn cuMemAdvise_v2( + devPtr: cuda_types::CUdeviceptr, + count: usize, + advice: cuda_types::CUmem_advise, + location: cuda_types::CUmemLocation, + ) -> cuda_types::CUresult; + /** \brief Query an attribute of a given memory range + + Query an attribute about the memory range starting at \p devPtr with a size of \p count bytes. The + memory range must refer to managed memory allocated via ::cuMemAllocManaged or declared via + __managed__ variables. + + The \p attribute parameter can take the following values: + - ::CU_MEM_RANGE_ATTRIBUTE_READ_MOSTLY: If this attribute is specified, \p data will be interpreted + as a 32-bit integer, and \p dataSize must be 4. The result returned will be 1 if all pages in the given + memory range have read-duplication enabled, or 0 otherwise. + - ::CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION: If this attribute is specified, \p data will be + interpreted as a 32-bit integer, and \p dataSize must be 4. The result returned will be a GPU device + id if all pages in the memory range have that GPU as their preferred location, or it will be CU_DEVICE_CPU + if all pages in the memory range have the CPU as their preferred location, or it will be CU_DEVICE_INVALID + if either all the pages don't have the same preferred location or some of the pages don't have a + preferred location at all. Note that the actual location of the pages in the memory range at the time of + the query may be different from the preferred location. + - ::CU_MEM_RANGE_ATTRIBUTE_ACCESSED_BY: If this attribute is specified, \p data will be interpreted + as an array of 32-bit integers, and \p dataSize must be a non-zero multiple of 4. The result returned + will be a list of device ids that had ::CU_MEM_ADVISE_SET_ACCESSED_BY set for that entire memory range. + If any device does not have that advice set for the entire memory range, that device will not be included. + If \p data is larger than the number of devices that have that advice set for that memory range, + CU_DEVICE_INVALID will be returned in all the extra space provided. For ex., if \p dataSize is 12 + (i.e. \p data has 3 elements) and only device 0 has the advice set, then the result returned will be + { 0, CU_DEVICE_INVALID, CU_DEVICE_INVALID }. If \p data is smaller than the number of devices that have + that advice set, then only as many devices will be returned as can fit in the array. There is no + guarantee on which specific devices will be returned, however. + - ::CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION: If this attribute is specified, \p data will be + interpreted as a 32-bit integer, and \p dataSize must be 4. The result returned will be the last location + to which all pages in the memory range were prefetched explicitly via ::cuMemPrefetchAsync. This will either be + a GPU id or CU_DEVICE_CPU depending on whether the last location for prefetch was a GPU or the CPU + respectively. If any page in the memory range was never explicitly prefetched or if all pages were not + prefetched to the same location, CU_DEVICE_INVALID will be returned. Note that this simply returns the + last location that the application requested to prefetch the memory range to. It gives no indication as to + whether the prefetch operation to that location has completed or even begun. + - ::CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION_TYPE: If this attribute is specified, \p data will be + interpreted as a ::CUmemLocationType, and \p dataSize must be sizeof(CUmemLocationType). The ::CUmemLocationType returned will be + ::CU_MEM_LOCATION_TYPE_DEVICE if all pages in the memory range have the same GPU as their preferred location, or ::CUmemLocationType + will be ::CU_MEM_LOCATION_TYPE_HOST if all pages in the memory range have the CPU as their preferred location, or it will be ::CU_MEM_LOCATION_TYPE_HOST_NUMA + if all the pages in the memory range have the same host NUMA node ID as their preferred location or it will be ::CU_MEM_LOCATION_TYPE_INVALID + if either all the pages don't have the same preferred location or some of the pages don't have a preferred location at all. + Note that the actual location type of the pages in the memory range at the time of the query may be different from the preferred location type. + - ::CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION_ID: If this attribute is specified, \p data will be + interpreted as a 32-bit integer, and \p dataSize must be 4. If the ::CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION_TYPE query for the same address range + returns ::CU_MEM_LOCATION_TYPE_DEVICE, it will be a valid device ordinal or if it returns ::CU_MEM_LOCATION_TYPE_HOST_NUMA, it will be a valid host NUMA node ID + or if it returns any other location type, the id should be ignored. + - ::CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION_TYPE: If this attribute is specified, \p data will be + interpreted as a ::CUmemLocationType, and \p dataSize must be sizeof(CUmemLocationType). The result returned will be the last location + to which all pages in the memory range were prefetched explicitly via ::cuMemPrefetchAsync. The ::CUmemLocationType returned + will be ::CU_MEM_LOCATION_TYPE_DEVICE if the last prefetch location was a GPU or ::CU_MEM_LOCATION_TYPE_HOST if it was the CPU or ::CU_MEM_LOCATION_TYPE_HOST_NUMA if + the last prefetch location was a specific host NUMA node. If any page in the memory range was never explicitly prefetched or if all pages were not + prefetched to the same location, ::CUmemLocationType will be ::CU_MEM_LOCATION_TYPE_INVALID. + Note that this simply returns the last location type that the application requested to prefetch the memory range to. It gives no indication as to + whether the prefetch operation to that location has completed or even begun. + - ::CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION_ID: If this attribute is specified, \p data will be + interpreted as a 32-bit integer, and \p dataSize must be 4. If the ::CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION_TYPE query for the same address range + returns ::CU_MEM_LOCATION_TYPE_DEVICE, it will be a valid device ordinal or if it returns ::CU_MEM_LOCATION_TYPE_HOST_NUMA, it will be a valid host NUMA node ID + or if it returns any other location type, the id should be ignored. + + \param data - A pointers to a memory location where the result + of each attribute query will be written to. + \param dataSize - Array containing the size of data + \param attribute - The attribute to query + \param devPtr - Start of the range to query + \param count - Size of the range to query + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_DEVICE + \notefnerr + \note_async + \note_null_stream + + \sa ::cuMemRangeGetAttributes, ::cuMemPrefetchAsync, + ::cuMemAdvise, + ::cudaMemRangeGetAttribute*/ + fn cuMemRangeGetAttribute( + data: *mut ::core::ffi::c_void, + dataSize: usize, + attribute: cuda_types::CUmem_range_attribute, + devPtr: cuda_types::CUdeviceptr, + count: usize, + ) -> cuda_types::CUresult; + /** \brief Query attributes of a given memory range. + + Query attributes of the memory range starting at \p devPtr with a size of \p count bytes. The + memory range must refer to managed memory allocated via ::cuMemAllocManaged or declared via + __managed__ variables. The \p attributes array will be interpreted to have \p numAttributes + entries. The \p dataSizes array will also be interpreted to have \p numAttributes entries. + The results of the query will be stored in \p data. + + The list of supported attributes are given below. Please refer to ::cuMemRangeGetAttribute for + attribute descriptions and restrictions. + + - ::CU_MEM_RANGE_ATTRIBUTE_READ_MOSTLY + - ::CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION + - ::CU_MEM_RANGE_ATTRIBUTE_ACCESSED_BY + - ::CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION + - ::CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION_TYPE + - ::CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION_ID + - ::CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION_TYPE + - ::CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION_ID + + \param data - A two-dimensional array containing pointers to memory + locations where the result of each attribute query will be written to. + \param dataSizes - Array containing the sizes of each result + \param attributes - An array of attributes to query + (numAttributes and the number of attributes in this array should match) + \param numAttributes - Number of attributes to query + \param devPtr - Start of the range to query + \param count - Size of the range to query + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_DEVICE + \notefnerr + + \sa ::cuMemRangeGetAttribute, ::cuMemAdvise, + ::cuMemPrefetchAsync, + ::cudaMemRangeGetAttributes*/ + fn cuMemRangeGetAttributes( + data: *mut *mut ::core::ffi::c_void, + dataSizes: *mut usize, + attributes: *mut cuda_types::CUmem_range_attribute, + numAttributes: usize, + devPtr: cuda_types::CUdeviceptr, + count: usize, + ) -> cuda_types::CUresult; + /** \brief Set attributes on a previously allocated memory region + + The supported attributes are: + + - ::CU_POINTER_ATTRIBUTE_SYNC_MEMOPS: + + A boolean attribute that can either be set (1) or unset (0). When set, + the region of memory that \p ptr points to is guaranteed to always synchronize + memory operations that are synchronous. If there are some previously initiated + synchronous memory operations that are pending when this attribute is set, the + function does not return until those memory operations are complete. + See further documentation in the section titled "API synchronization behavior" + to learn more about cases when synchronous memory operations can + exhibit asynchronous behavior. + \p value will be considered as a pointer to an unsigned integer to which this attribute is to be set. + + \param value - Pointer to memory containing the value to be set + \param attribute - Pointer attribute to set + \param ptr - Pointer to a memory region allocated using CUDA memory allocation APIs + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_DEVICE + \notefnerr + + \sa ::cuPointerGetAttribute, + ::cuPointerGetAttributes, + ::cuMemAlloc, + ::cuMemFree, + ::cuMemAllocHost, + ::cuMemFreeHost, + ::cuMemHostAlloc, + ::cuMemHostRegister, + ::cuMemHostUnregister*/ + fn cuPointerSetAttribute( + value: *const ::core::ffi::c_void, + attribute: cuda_types::CUpointer_attribute, + ptr: cuda_types::CUdeviceptr, + ) -> cuda_types::CUresult; + /** \brief Returns information about a pointer. + + The supported attributes are (refer to ::cuPointerGetAttribute for attribute descriptions and restrictions): + + - ::CU_POINTER_ATTRIBUTE_CONTEXT + - ::CU_POINTER_ATTRIBUTE_MEMORY_TYPE + - ::CU_POINTER_ATTRIBUTE_DEVICE_POINTER + - ::CU_POINTER_ATTRIBUTE_HOST_POINTER + - ::CU_POINTER_ATTRIBUTE_SYNC_MEMOPS + - ::CU_POINTER_ATTRIBUTE_BUFFER_ID + - ::CU_POINTER_ATTRIBUTE_IS_MANAGED + - ::CU_POINTER_ATTRIBUTE_DEVICE_ORDINAL + - ::CU_POINTER_ATTRIBUTE_RANGE_START_ADDR + - ::CU_POINTER_ATTRIBUTE_RANGE_SIZE + - ::CU_POINTER_ATTRIBUTE_MAPPED + - ::CU_POINTER_ATTRIBUTE_IS_LEGACY_CUDA_IPC_CAPABLE + - ::CU_POINTER_ATTRIBUTE_ALLOWED_HANDLE_TYPES + - ::CU_POINTER_ATTRIBUTE_MEMPOOL_HANDLE + + \param numAttributes - Number of attributes to query + \param attributes - An array of attributes to query + (numAttributes and the number of attributes in this array should match) + \param data - A two-dimensional array containing pointers to memory + locations where the result of each attribute query will be written to. + \param ptr - Pointer to query + + Unlike ::cuPointerGetAttribute, this function will not return an error when the \p ptr + encountered is not a valid CUDA pointer. Instead, the attributes are assigned default NULL values + and CUDA_SUCCESS is returned. + + If \p ptr was not allocated by, mapped by, or registered with a ::CUcontext which uses UVA + (Unified Virtual Addressing), ::CUDA_ERROR_INVALID_CONTEXT is returned. + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_DEVICE + \notefnerr + + \sa + ::cuPointerGetAttribute, + ::cuPointerSetAttribute, + ::cudaPointerGetAttributes*/ + fn cuPointerGetAttributes( + numAttributes: ::core::ffi::c_uint, + attributes: *mut cuda_types::CUpointer_attribute, + data: *mut *mut ::core::ffi::c_void, + ptr: cuda_types::CUdeviceptr, + ) -> cuda_types::CUresult; + /** \brief Create a stream + + Creates a stream and returns a handle in \p phStream. The \p Flags argument + determines behaviors of the stream. + + Valid values for \p Flags are: + - ::CU_STREAM_DEFAULT: Default stream creation flag. + - ::CU_STREAM_NON_BLOCKING: Specifies that work running in the created + stream may run concurrently with work in stream 0 (the NULL stream), and that + the created stream should perform no implicit synchronization with stream 0. + + \param phStream - Returned newly created stream + \param Flags - Parameters for stream creation + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_OUT_OF_MEMORY + \notefnerr + + \sa ::cuStreamDestroy, + ::cuStreamCreateWithPriority, + ::cuStreamGetPriority, + ::cuStreamGetFlags, + ::cuStreamWaitEvent, + ::cuStreamQuery, + ::cuStreamSynchronize, + ::cuStreamAddCallback, + ::cudaStreamCreate, + ::cudaStreamCreateWithFlags*/ + fn cuStreamCreate( + phStream: *mut cuda_types::CUstream, + Flags: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + /** \brief Create a stream with the given priority + + Creates a stream with the specified priority and returns a handle in \p phStream. + This affects the scheduling priority of work in the stream. Priorities provide a + hint to preferentially run work with higher priority when possible, but do + not preempt already-running work or provide any other functional guarantee on + execution order. + + \p priority follows a convention where lower numbers represent higher priorities. + '0' represents default priority. The range of meaningful numerical priorities can + be queried using ::cuCtxGetStreamPriorityRange. If the specified priority is + outside the numerical range returned by ::cuCtxGetStreamPriorityRange, + it will automatically be clamped to the lowest or the highest number in the range. + + \param phStream - Returned newly created stream + \param flags - Flags for stream creation. See ::cuStreamCreate for a list of + valid flags + \param priority - Stream priority. Lower numbers represent higher priorities. + See ::cuCtxGetStreamPriorityRange for more information about + meaningful stream priorities that can be passed. + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_OUT_OF_MEMORY + \notefnerr + + \note Stream priorities are supported only on GPUs + with compute capability 3.5 or higher. + + \note In the current implementation, only compute kernels launched in + priority streams are affected by the stream's priority. Stream priorities have + no effect on host-to-device and device-to-host memory operations. + + \sa ::cuStreamDestroy, + ::cuStreamCreate, + ::cuStreamGetPriority, + ::cuCtxGetStreamPriorityRange, + ::cuStreamGetFlags, + ::cuStreamWaitEvent, + ::cuStreamQuery, + ::cuStreamSynchronize, + ::cuStreamAddCallback, + ::cudaStreamCreateWithPriority*/ + fn cuStreamCreateWithPriority( + phStream: *mut cuda_types::CUstream, + flags: ::core::ffi::c_uint, + priority: ::core::ffi::c_int, + ) -> cuda_types::CUresult; + /** \brief Query the priority of a given stream + + Query the priority of a stream created using ::cuStreamCreate or ::cuStreamCreateWithPriority + and return the priority in \p priority. Note that if the stream was created with a + priority outside the numerical range returned by ::cuCtxGetStreamPriorityRange, + this function returns the clamped priority. + See ::cuStreamCreateWithPriority for details about priority clamping. + + \param hStream - Handle to the stream to be queried + \param priority - Pointer to a signed integer in which the stream's priority is returned + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_HANDLE, + ::CUDA_ERROR_OUT_OF_MEMORY + \notefnerr + + \sa ::cuStreamDestroy, + ::cuStreamCreate, + ::cuStreamCreateWithPriority, + ::cuCtxGetStreamPriorityRange, + ::cuStreamGetFlags, + ::cudaStreamGetPriority*/ + fn cuStreamGetPriority_ptsz( + hStream: cuda_types::CUstream, + priority: *mut ::core::ffi::c_int, + ) -> cuda_types::CUresult; + /** \brief Query the flags of a given stream + + Query the flags of a stream created using ::cuStreamCreate or ::cuStreamCreateWithPriority + and return the flags in \p flags. + + \param hStream - Handle to the stream to be queried + \param flags - Pointer to an unsigned integer in which the stream's flags are returned + The value returned in \p flags is a logical 'OR' of all flags that + were used while creating this stream. See ::cuStreamCreate for the list + of valid flags + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_HANDLE, + ::CUDA_ERROR_OUT_OF_MEMORY + \notefnerr + + \sa ::cuStreamDestroy, + ::cuStreamCreate, + ::cuStreamGetPriority, + ::cudaStreamGetFlags*/ + fn cuStreamGetFlags_ptsz( + hStream: cuda_types::CUstream, + flags: *mut ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + /** \brief Returns the unique Id associated with the stream handle supplied + + Returns in \p streamId the unique Id which is associated with the given stream handle. + The Id is unique for the life of the program. + + The stream handle \p hStream can refer to any of the following: + <ul> + <li>a stream created via any of the CUDA driver APIs such as ::cuStreamCreate + and ::cuStreamCreateWithPriority, or their runtime API equivalents such as + ::cudaStreamCreate, ::cudaStreamCreateWithFlags and ::cudaStreamCreateWithPriority. + Passing an invalid handle will result in undefined behavior.</li> + <li>any of the special streams such as the NULL stream, ::CU_STREAM_LEGACY and + ::CU_STREAM_PER_THREAD. The runtime API equivalents of these are also accepted, + which are NULL, ::cudaStreamLegacy and ::cudaStreamPerThread respectively.</li> + </ul> + + \param hStream - Handle to the stream to be queried + \param streamId - Pointer to store the Id of the stream + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_HANDLE + \notefnerr + + \sa ::cuStreamDestroy, + ::cuStreamCreate, + ::cuStreamGetPriority, + ::cudaStreamGetId*/ + fn cuStreamGetId_ptsz( + hStream: cuda_types::CUstream, + streamId: *mut ::core::ffi::c_ulonglong, + ) -> cuda_types::CUresult; + /** \brief Query the context associated with a stream + + Returns the CUDA context that the stream is associated with. + + The stream handle \p hStream can refer to any of the following: + <ul> + <li>a stream created via any of the CUDA driver APIs such as ::cuStreamCreate + and ::cuStreamCreateWithPriority, or their runtime API equivalents such as + ::cudaStreamCreate, ::cudaStreamCreateWithFlags and ::cudaStreamCreateWithPriority. + The returned context is the context that was active in the calling thread when the + stream was created. Passing an invalid handle will result in undefined behavior.</li> + <li>any of the special streams such as the NULL stream, ::CU_STREAM_LEGACY and + ::CU_STREAM_PER_THREAD. The runtime API equivalents of these are also accepted, + which are NULL, ::cudaStreamLegacy and ::cudaStreamPerThread respectively. + Specifying any of the special handles will return the context current to the + calling thread. If no context is current to the calling thread, + ::CUDA_ERROR_INVALID_CONTEXT is returned.</li> + </ul> + + \param hStream - Handle to the stream to be queried + \param pctx - Returned context associated with the stream + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_HANDLE, + \notefnerr + + \sa ::cuStreamDestroy, + ::cuStreamCreateWithPriority, + ::cuStreamGetPriority, + ::cuStreamGetFlags, + ::cuStreamWaitEvent, + ::cuStreamQuery, + ::cuStreamSynchronize, + ::cuStreamAddCallback, + ::cudaStreamCreate, + ::cudaStreamCreateWithFlags*/ + fn cuStreamGetCtx_ptsz( + hStream: cuda_types::CUstream, + pctx: *mut cuda_types::CUcontext, + ) -> cuda_types::CUresult; + /** \brief Make a compute stream wait on an event + + Makes all future work submitted to \p hStream wait for all work captured in + \p hEvent. See ::cuEventRecord() for details on what is captured by an event. + The synchronization will be performed efficiently on the device when applicable. + \p hEvent may be from a different context or device than \p hStream. + + flags include: + - ::CU_EVENT_WAIT_DEFAULT: Default event creation flag. + - ::CU_EVENT_WAIT_EXTERNAL: Event is captured in the graph as an external + event node when performing stream capture. This flag is invalid outside + of stream capture. + + \param hStream - Stream to wait + \param hEvent - Event to wait on (may not be NULL) + \param Flags - See ::CUevent_capture_flags + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_HANDLE, + \note_null_stream + \notefnerr + + \sa ::cuStreamCreate, + ::cuEventRecord, + ::cuStreamQuery, + ::cuStreamSynchronize, + ::cuStreamAddCallback, + ::cuStreamDestroy, + ::cudaStreamWaitEvent*/ + fn cuStreamWaitEvent_ptsz( + hStream: cuda_types::CUstream, + hEvent: cuda_types::CUevent, + Flags: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + /** \brief Add a callback to a compute stream + + \note This function is slated for eventual deprecation and removal. If + you do not require the callback to execute in case of a device error, + consider using ::cuLaunchHostFunc. Additionally, this function is not + supported with ::cuStreamBeginCapture and ::cuStreamEndCapture, unlike + ::cuLaunchHostFunc. + + Adds a callback to be called on the host after all currently enqueued + items in the stream have completed. For each + cuStreamAddCallback call, the callback will be executed exactly once. + The callback will block later work in the stream until it is finished. + + The callback may be passed ::CUDA_SUCCESS or an error code. In the event + of a device error, all subsequently executed callbacks will receive an + appropriate ::CUresult. + + Callbacks must not make any CUDA API calls. Attempting to use a CUDA API + will result in ::CUDA_ERROR_NOT_PERMITTED. Callbacks must not perform any + synchronization that may depend on outstanding device work or other callbacks + that are not mandated to run earlier. Callbacks without a mandated order + (in independent streams) execute in undefined order and may be serialized. + + For the purposes of Unified Memory, callback execution makes a number of + guarantees: + <ul> + <li>The callback stream is considered idle for the duration of the + callback. Thus, for example, a callback may always use memory attached + to the callback stream.</li> + <li>The start of execution of a callback has the same effect as + synchronizing an event recorded in the same stream immediately prior to + the callback. It thus synchronizes streams which have been "joined" + prior to the callback.</li> + <li>Adding device work to any stream does not have the effect of making + the stream active until all preceding host functions and stream callbacks + have executed. Thus, for + example, a callback might use global attached memory even if work has + been added to another stream, if the work has been ordered behind the + callback with an event.</li> + <li>Completion of a callback does not cause a stream to become + active except as described above. The callback stream will remain idle + if no device work follows the callback, and will remain idle across + consecutive callbacks without device work in between. Thus, for example, + stream synchronization can be done by signaling from a callback at the + end of the stream.</li> + </ul> + + \param hStream - Stream to add callback to + \param callback - The function to call once preceding stream operations are complete + \param userData - User specified data to be passed to the callback function + \param flags - Reserved for future use, must be 0 + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_HANDLE, + ::CUDA_ERROR_NOT_SUPPORTED + \note_null_stream + \notefnerr + + \sa ::cuStreamCreate, + ::cuStreamQuery, + ::cuStreamSynchronize, + ::cuStreamWaitEvent, + ::cuStreamDestroy, + ::cuMemAllocManaged, + ::cuStreamAttachMemAsync, + ::cuLaunchHostFunc, + ::cudaStreamAddCallback*/ + fn cuStreamAddCallback_ptsz( + hStream: cuda_types::CUstream, + callback: cuda_types::CUstreamCallback, + userData: *mut ::core::ffi::c_void, + flags: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + /** \brief Begins graph capture on a stream + + Begin graph capture on \p hStream. When a stream is in capture mode, all operations + pushed into the stream will not be executed, but will instead be captured into + a graph, which will be returned via ::cuStreamEndCapture. Capture may not be initiated + if \p stream is CU_STREAM_LEGACY. Capture must be ended on the same stream in which + it was initiated, and it may only be initiated if the stream is not already in capture + mode. The capture mode may be queried via ::cuStreamIsCapturing. A unique id + representing the capture sequence may be queried via ::cuStreamGetCaptureInfo. + + If \p mode is not ::CU_STREAM_CAPTURE_MODE_RELAXED, ::cuStreamEndCapture must be + called on this stream from the same thread. + + \param hStream - Stream in which to initiate capture + \param mode - Controls the interaction of this capture sequence with other API + calls that are potentially unsafe. For more details see + ::cuThreadExchangeStreamCaptureMode. + + \note Kernels captured using this API must not use texture and surface references. + Reading or writing through any texture or surface reference is undefined + behavior. This restriction does not apply to texture and surface objects. + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_VALUE + \notefnerr + + \sa + ::cuStreamCreate, + ::cuStreamIsCapturing, + ::cuStreamEndCapture, + ::cuThreadExchangeStreamCaptureMode*/ + fn cuStreamBeginCapture_v2_ptsz( + hStream: cuda_types::CUstream, + mode: cuda_types::CUstreamCaptureMode, + ) -> cuda_types::CUresult; + /** \brief Begins graph capture on a stream to an existing graph + + Begin graph capture on \p hStream, placing new nodes into an existing graph. When a stream is + in capture mode, all operations pushed into the stream will not be executed, but will instead + be captured into \p hGraph. The graph will not be instantiable until the user calls + ::cuStreamEndCapture. + + Capture may not be initiated if \p stream is CU_STREAM_LEGACY. Capture must be ended on the + same stream in which it was initiated, and it may only be initiated if the stream is not + already in capture mode. The capture mode may be queried via ::cuStreamIsCapturing. A unique id + representing the capture sequence may be queried via ::cuStreamGetCaptureInfo. + + If \p mode is not ::CU_STREAM_CAPTURE_MODE_RELAXED, ::cuStreamEndCapture must be + called on this stream from the same thread. + + \param hStream - Stream in which to initiate capture. + \param hGraph - Graph to capture into. + \param dependencies - Dependencies of the first node captured in the stream. Can be NULL if numDependencies is 0. + \param dependencyData - Optional array of data associated with each dependency. + \param numDependencies - Number of dependencies. + \param mode - Controls the interaction of this capture sequence with other API + calls that are potentially unsafe. For more details see + ::cuThreadExchangeStreamCaptureMode. + + \note Kernels captured using this API must not use texture and surface references. + Reading or writing through any texture or surface reference is undefined + behavior. This restriction does not apply to texture and surface objects. + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_VALUE + \notefnerr + + \sa + ::cuStreamBeginCapture, + ::cuStreamCreate, + ::cuStreamIsCapturing, + ::cuStreamEndCapture, + ::cuThreadExchangeStreamCaptureMode, + ::cuGraphAddNode,*/ + fn cuStreamBeginCaptureToGraph_ptsz( + hStream: cuda_types::CUstream, + hGraph: cuda_types::CUgraph, + dependencies: *const cuda_types::CUgraphNode, + dependencyData: *const cuda_types::CUgraphEdgeData, + numDependencies: usize, + mode: cuda_types::CUstreamCaptureMode, + ) -> cuda_types::CUresult; + /** \brief Swaps the stream capture interaction mode for a thread + + Sets the calling thread's stream capture interaction mode to the value contained + in \p *mode, and overwrites \p *mode with the previous mode for the thread. To + facilitate deterministic behavior across function or module boundaries, callers + are encouraged to use this API in a push-pop fashion: \code +CUstreamCaptureMode mode = desiredMode; +cuThreadExchangeStreamCaptureMode(&mode); +... +cuThreadExchangeStreamCaptureMode(&mode); // restore previous mode + \endcode + + During stream capture (see ::cuStreamBeginCapture), some actions, such as a call + to ::cudaMalloc, may be unsafe. In the case of ::cudaMalloc, the operation is + not enqueued asynchronously to a stream, and is not observed by stream capture. + Therefore, if the sequence of operations captured via ::cuStreamBeginCapture + depended on the allocation being replayed whenever the graph is launched, the + captured graph would be invalid. + + Therefore, stream capture places restrictions on API calls that can be made within + or concurrently to a ::cuStreamBeginCapture-::cuStreamEndCapture sequence. This + behavior can be controlled via this API and flags to ::cuStreamBeginCapture. + + A thread's mode is one of the following: + - \p CU_STREAM_CAPTURE_MODE_GLOBAL: This is the default mode. If the local thread has + an ongoing capture sequence that was not initiated with + \p CU_STREAM_CAPTURE_MODE_RELAXED at \p cuStreamBeginCapture, or if any other thread + has a concurrent capture sequence initiated with \p CU_STREAM_CAPTURE_MODE_GLOBAL, + this thread is prohibited from potentially unsafe API calls. + - \p CU_STREAM_CAPTURE_MODE_THREAD_LOCAL: If the local thread has an ongoing capture + sequence not initiated with \p CU_STREAM_CAPTURE_MODE_RELAXED, it is prohibited + from potentially unsafe API calls. Concurrent capture sequences in other threads + are ignored. + - \p CU_STREAM_CAPTURE_MODE_RELAXED: The local thread is not prohibited from potentially + unsafe API calls. Note that the thread is still prohibited from API calls which + necessarily conflict with stream capture, for example, attempting ::cuEventQuery + on an event that was last recorded inside a capture sequence. + + \param mode - Pointer to mode value to swap with the current mode + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_VALUE + \notefnerr + + \sa + ::cuStreamBeginCapture*/ + fn cuThreadExchangeStreamCaptureMode( + mode: *mut cuda_types::CUstreamCaptureMode, + ) -> cuda_types::CUresult; + /** \brief Ends capture on a stream, returning the captured graph + + End capture on \p hStream, returning the captured graph via \p phGraph. + Capture must have been initiated on \p hStream via a call to ::cuStreamBeginCapture. + If capture was invalidated, due to a violation of the rules of stream capture, then + a NULL graph will be returned. + + If the \p mode argument to ::cuStreamBeginCapture was not + ::CU_STREAM_CAPTURE_MODE_RELAXED, this call must be from the same thread as + ::cuStreamBeginCapture. + + \param hStream - Stream to query + \param phGraph - The captured graph + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_STREAM_CAPTURE_WRONG_THREAD + \notefnerr + + \sa + ::cuStreamCreate, + ::cuStreamBeginCapture, + ::cuStreamIsCapturing, + ::cuGraphDestroy*/ + fn cuStreamEndCapture_ptsz( + hStream: cuda_types::CUstream, + phGraph: *mut cuda_types::CUgraph, + ) -> cuda_types::CUresult; + /** \brief Returns a stream's capture status + + Return the capture status of \p hStream via \p captureStatus. After a successful + call, \p *captureStatus will contain one of the following: + - ::CU_STREAM_CAPTURE_STATUS_NONE: The stream is not capturing. + - ::CU_STREAM_CAPTURE_STATUS_ACTIVE: The stream is capturing. + - ::CU_STREAM_CAPTURE_STATUS_INVALIDATED: The stream was capturing but an error + has invalidated the capture sequence. The capture sequence must be terminated + with ::cuStreamEndCapture on the stream where it was initiated in order to + continue using \p hStream. + + Note that, if this is called on ::CU_STREAM_LEGACY (the "null stream") while + a blocking stream in the same context is capturing, it will return + ::CUDA_ERROR_STREAM_CAPTURE_IMPLICIT and \p *captureStatus is unspecified + after the call. The blocking stream capture is not invalidated. + + When a blocking stream is capturing, the legacy stream is in an + unusable state until the blocking stream capture is terminated. The legacy + stream is not supported for stream capture, but attempted use would have an + implicit dependency on the capturing stream(s). + + \param hStream - Stream to query + \param captureStatus - Returns the stream's capture status + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_STREAM_CAPTURE_IMPLICIT + \notefnerr + + \sa + ::cuStreamCreate, + ::cuStreamBeginCapture, + ::cuStreamEndCapture*/ + fn cuStreamIsCapturing_ptsz( + hStream: cuda_types::CUstream, + captureStatus: *mut cuda_types::CUstreamCaptureStatus, + ) -> cuda_types::CUresult; + /** \brief Query a stream's capture state + + Query stream state related to stream capture. + + If called on ::CU_STREAM_LEGACY (the "null stream") while a stream not created + with ::CU_STREAM_NON_BLOCKING is capturing, returns ::CUDA_ERROR_STREAM_CAPTURE_IMPLICIT. + + Valid data (other than capture status) is returned only if both of the following are true: + - the call returns CUDA_SUCCESS + - the returned capture status is ::CU_STREAM_CAPTURE_STATUS_ACTIVE + + \param hStream - The stream to query + \param captureStatus_out - Location to return the capture status of the stream; required + \param id_out - Optional location to return an id for the capture sequence, which is + unique over the lifetime of the process + \param graph_out - Optional location to return the graph being captured into. All + operations other than destroy and node removal are permitted on the graph + while the capture sequence is in progress. This API does not transfer + ownership of the graph, which is transferred or destroyed at + ::cuStreamEndCapture. Note that the graph handle may be invalidated before + end of capture for certain errors. Nodes that are or become + unreachable from the original stream at ::cuStreamEndCapture due to direct + actions on the graph do not trigger ::CUDA_ERROR_STREAM_CAPTURE_UNJOINED. + \param dependencies_out - Optional location to store a pointer to an array of nodes. + The next node to be captured in the stream will depend on this set of nodes, + absent operations such as event wait which modify this set. The array pointer + is valid until the next API call which operates on the stream or until the + capture is terminated. The node handles may be copied out and are valid until + they or the graph is destroyed. The driver-owned array may also be passed + directly to APIs that operate on the graph (not the stream) without copying. + \param numDependencies_out - Optional location to store the size of the array + returned in dependencies_out. + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_STREAM_CAPTURE_IMPLICIT + \note_graph_thread_safety + \notefnerr + + \sa + ::cuStreamGetCaptureInfo_v3 + ::cuStreamBeginCapture, + ::cuStreamIsCapturing, + ::cuStreamUpdateCaptureDependencies*/ + fn cuStreamGetCaptureInfo_v2_ptsz( + hStream: cuda_types::CUstream, + captureStatus_out: *mut cuda_types::CUstreamCaptureStatus, + id_out: *mut cuda_types::cuuint64_t, + graph_out: *mut cuda_types::CUgraph, + dependencies_out: *mut *const cuda_types::CUgraphNode, + numDependencies_out: *mut usize, + ) -> cuda_types::CUresult; + /** \brief Query a stream's capture state (12.3+) + + Query stream state related to stream capture. + + If called on ::CU_STREAM_LEGACY (the "null stream") while a stream not created + with ::CU_STREAM_NON_BLOCKING is capturing, returns ::CUDA_ERROR_STREAM_CAPTURE_IMPLICIT. + + Valid data (other than capture status) is returned only if both of the following are true: + - the call returns CUDA_SUCCESS + - the returned capture status is ::CU_STREAM_CAPTURE_STATUS_ACTIVE + + If \p edgeData_out is non-NULL then \p dependencies_out must be as well. If + \p dependencies_out is non-NULL and \p edgeData_out is NULL, but there is non-zero edge + data for one or more of the current stream dependencies, the call will return + ::CUDA_ERROR_LOSSY_QUERY. + + \param hStream - The stream to query + \param captureStatus_out - Location to return the capture status of the stream; required + \param id_out - Optional location to return an id for the capture sequence, which is + unique over the lifetime of the process + \param graph_out - Optional location to return the graph being captured into. All + operations other than destroy and node removal are permitted on the graph + while the capture sequence is in progress. This API does not transfer + ownership of the graph, which is transferred or destroyed at + ::cuStreamEndCapture. Note that the graph handle may be invalidated before + end of capture for certain errors. Nodes that are or become + unreachable from the original stream at ::cuStreamEndCapture due to direct + actions on the graph do not trigger ::CUDA_ERROR_STREAM_CAPTURE_UNJOINED. + \param dependencies_out - Optional location to store a pointer to an array of nodes. + The next node to be captured in the stream will depend on this set of nodes, + absent operations such as event wait which modify this set. The array pointer + is valid until the next API call which operates on the stream or until the + capture is terminated. The node handles may be copied out and are valid until + they or the graph is destroyed. The driver-owned array may also be passed + directly to APIs that operate on the graph (not the stream) without copying. + \param edgeData_out - Optional location to store a pointer to an array of graph edge + data. This array parallels \c dependencies_out; the next node to be added + has an edge to \c dependencies_out[i] with annotation \c edgeData_out[i] for + each \c i. The array pointer is valid until the next API call which operates + on the stream or until the capture is terminated. + \param numDependencies_out - Optional location to store the size of the array + returned in dependencies_out. + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_STREAM_CAPTURE_IMPLICIT, + ::CUDA_ERROR_LOSSY_QUERY + \note_graph_thread_safety + \notefnerr + + \sa + ::cuStreamGetCaptureInfo + ::cuStreamBeginCapture, + ::cuStreamIsCapturing, + ::cuStreamUpdateCaptureDependencies*/ + fn cuStreamGetCaptureInfo_v3_ptsz( + hStream: cuda_types::CUstream, + captureStatus_out: *mut cuda_types::CUstreamCaptureStatus, + id_out: *mut cuda_types::cuuint64_t, + graph_out: *mut cuda_types::CUgraph, + dependencies_out: *mut *const cuda_types::CUgraphNode, + edgeData_out: *mut *const cuda_types::CUgraphEdgeData, + numDependencies_out: *mut usize, + ) -> cuda_types::CUresult; + /** \brief Update the set of dependencies in a capturing stream (11.3+) + + Modifies the dependency set of a capturing stream. The dependency set is the set + of nodes that the next captured node in the stream will depend on. + + Valid flags are ::CU_STREAM_ADD_CAPTURE_DEPENDENCIES and + ::CU_STREAM_SET_CAPTURE_DEPENDENCIES. These control whether the set passed to + the API is added to the existing set or replaces it. A flags value of 0 defaults + to ::CU_STREAM_ADD_CAPTURE_DEPENDENCIES. + + Nodes that are removed from the dependency set via this API do not result in + ::CUDA_ERROR_STREAM_CAPTURE_UNJOINED if they are unreachable from the stream at + ::cuStreamEndCapture. + + Returns ::CUDA_ERROR_ILLEGAL_STATE if the stream is not capturing. + + This API is new in CUDA 11.3. Developers requiring compatibility across minor + versions to CUDA 11.0 should not use this API or provide a fallback. + + \param hStream - The stream to update + \param dependencies - The set of dependencies to add + \param numDependencies - The size of the dependencies array + \param flags - See above + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_ILLEGAL_STATE + + \sa + ::cuStreamBeginCapture, + ::cuStreamGetCaptureInfo,*/ + fn cuStreamUpdateCaptureDependencies_ptsz( + hStream: cuda_types::CUstream, + dependencies: *mut cuda_types::CUgraphNode, + numDependencies: usize, + flags: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + /** \brief Update the set of dependencies in a capturing stream (12.3+) + + Modifies the dependency set of a capturing stream. The dependency set is the set + of nodes that the next captured node in the stream will depend on along with the + edge data for those dependencies. + + Valid flags are ::CU_STREAM_ADD_CAPTURE_DEPENDENCIES and + ::CU_STREAM_SET_CAPTURE_DEPENDENCIES. These control whether the set passed to + the API is added to the existing set or replaces it. A flags value of 0 defaults + to ::CU_STREAM_ADD_CAPTURE_DEPENDENCIES. + + Nodes that are removed from the dependency set via this API do not result in + ::CUDA_ERROR_STREAM_CAPTURE_UNJOINED if they are unreachable from the stream at + ::cuStreamEndCapture. + + Returns ::CUDA_ERROR_ILLEGAL_STATE if the stream is not capturing. + + \param hStream - The stream to update + \param dependencies - The set of dependencies to add + \param dependencyData - Optional array of data associated with each dependency. + \param numDependencies - The size of the dependencies array + \param flags - See above + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_ILLEGAL_STATE + + \sa + ::cuStreamBeginCapture, + ::cuStreamGetCaptureInfo,*/ + fn cuStreamUpdateCaptureDependencies_v2_ptsz( + hStream: cuda_types::CUstream, + dependencies: *mut cuda_types::CUgraphNode, + dependencyData: *const cuda_types::CUgraphEdgeData, + numDependencies: usize, + flags: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + /** \brief Attach memory to a stream asynchronously + + Enqueues an operation in \p hStream to specify stream association of + \p length bytes of memory starting from \p dptr. This function is a + stream-ordered operation, meaning that it is dependent on, and will + only take effect when, previous work in stream has completed. Any + previous association is automatically replaced. + + \p dptr must point to one of the following types of memories: + - managed memory declared using the __managed__ keyword or allocated with + ::cuMemAllocManaged. + - a valid host-accessible region of system-allocated pageable memory. This + type of memory may only be specified if the device associated with the + stream reports a non-zero value for the device attribute + ::CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS. + + For managed allocations, \p length must be either zero or the entire + allocation's size. Both indicate that the entire allocation's stream + association is being changed. Currently, it is not possible to change stream + association for a portion of a managed allocation. + + For pageable host allocations, \p length must be non-zero. + + The stream association is specified using \p flags which must be + one of ::CUmemAttach_flags. + If the ::CU_MEM_ATTACH_GLOBAL flag is specified, the memory can be accessed + by any stream on any device. + If the ::CU_MEM_ATTACH_HOST flag is specified, the program makes a guarantee + that it won't access the memory on the device from any stream on a device that + has a zero value for the device attribute ::CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS. + If the ::CU_MEM_ATTACH_SINGLE flag is specified and \p hStream is associated with + a device that has a zero value for the device attribute ::CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS, + the program makes a guarantee that it will only access the memory on the device + from \p hStream. It is illegal to attach singly to the NULL stream, because the + NULL stream is a virtual global stream and not a specific stream. An error will + be returned in this case. + + When memory is associated with a single stream, the Unified Memory system will + allow CPU access to this memory region so long as all operations in \p hStream + have completed, regardless of whether other streams are active. In effect, + this constrains exclusive ownership of the managed memory region by + an active GPU to per-stream activity instead of whole-GPU activity. + + Accessing memory on the device from streams that are not associated with + it will produce undefined results. No error checking is performed by the + Unified Memory system to ensure that kernels launched into other streams + do not access this region. + + It is a program's responsibility to order calls to ::cuStreamAttachMemAsync + via events, synchronization or other means to ensure legal access to memory + at all times. Data visibility and coherency will be changed appropriately + for all kernels which follow a stream-association change. + + If \p hStream is destroyed while data is associated with it, the association is + removed and the association reverts to the default visibility of the allocation + as specified at ::cuMemAllocManaged. For __managed__ variables, the default + association is always ::CU_MEM_ATTACH_GLOBAL. Note that destroying a stream is an + asynchronous operation, and as a result, the change to default association won't + happen until all work in the stream has completed. + + \param hStream - Stream in which to enqueue the attach operation + \param dptr - Pointer to memory (must be a pointer to managed memory or + to a valid host-accessible region of system-allocated + pageable memory) + \param length - Length of memory + \param flags - Must be one of ::CUmemAttach_flags + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_HANDLE, + ::CUDA_ERROR_NOT_SUPPORTED + \note_null_stream + \notefnerr + + \sa ::cuStreamCreate, + ::cuStreamQuery, + ::cuStreamSynchronize, + ::cuStreamWaitEvent, + ::cuStreamDestroy, + ::cuMemAllocManaged, + ::cudaStreamAttachMemAsync*/ + fn cuStreamAttachMemAsync_ptsz( + hStream: cuda_types::CUstream, + dptr: cuda_types::CUdeviceptr, + length: usize, + flags: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + /** \brief Determine status of a compute stream + + Returns ::CUDA_SUCCESS if all operations in the stream specified by + \p hStream have completed, or ::CUDA_ERROR_NOT_READY if not. + + For the purposes of Unified Memory, a return value of ::CUDA_SUCCESS + is equivalent to having called ::cuStreamSynchronize(). + + \param hStream - Stream to query status of + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_HANDLE, + ::CUDA_ERROR_NOT_READY + \note_null_stream + \notefnerr + + \sa ::cuStreamCreate, + ::cuStreamWaitEvent, + ::cuStreamDestroy, + ::cuStreamSynchronize, + ::cuStreamAddCallback, + ::cudaStreamQuery*/ + fn cuStreamQuery_ptsz(hStream: cuda_types::CUstream) -> cuda_types::CUresult; + /** \brief Wait until a stream's tasks are completed + + Waits until the device has completed all operations in the stream specified + by \p hStream. If the context was created with the + ::CU_CTX_SCHED_BLOCKING_SYNC flag, the CPU thread will block until the + stream is finished with all of its tasks. + + \param hStream - Stream to wait for + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_HANDLE + + \note_null_stream + \notefnerr + + \sa ::cuStreamCreate, + ::cuStreamDestroy, + ::cuStreamWaitEvent, + ::cuStreamQuery, + ::cuStreamAddCallback, + ::cudaStreamSynchronize*/ + fn cuStreamSynchronize_ptsz(hStream: cuda_types::CUstream) -> cuda_types::CUresult; + /** \brief Destroys a stream + + Destroys the stream specified by \p hStream. + + In case the device is still doing work in the stream \p hStream + when ::cuStreamDestroy() is called, the function will return immediately + and the resources associated with \p hStream will be released automatically + once the device has completed all work in \p hStream. + + \param hStream - Stream to destroy + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_HANDLE + \notefnerr + + \sa ::cuStreamCreate, + ::cuStreamWaitEvent, + ::cuStreamQuery, + ::cuStreamSynchronize, + ::cuStreamAddCallback, + ::cudaStreamDestroy*/ + fn cuStreamDestroy_v2(hStream: cuda_types::CUstream) -> cuda_types::CUresult; + /** \brief Copies attributes from source stream to destination stream. + + Copies attributes from source stream \p src to destination stream \p dst. + Both streams must have the same context. + + \param[out] dst Destination stream + \param[in] src Source stream + For list of attributes see ::CUstreamAttrID + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE + \notefnerr + + \sa + ::CUaccessPolicyWindow*/ + fn cuStreamCopyAttributes_ptsz( + dst: cuda_types::CUstream, + src: cuda_types::CUstream, + ) -> cuda_types::CUresult; + /** \brief Queries stream attribute. + + Queries attribute \p attr from \p hStream and stores it in corresponding + member of \p value_out. + + \param[in] hStream + \param[in] attr + \param[out] value_out + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_HANDLE + \notefnerr + + \sa + ::CUaccessPolicyWindow*/ + fn cuStreamGetAttribute_ptsz( + hStream: cuda_types::CUstream, + attr: cuda_types::CUstreamAttrID, + value_out: *mut cuda_types::CUstreamAttrValue, + ) -> cuda_types::CUresult; + /** \brief Sets stream attribute. + + Sets attribute \p attr on \p hStream from corresponding attribute of + \p value. The updated attribute will be applied to subsequent work + submitted to the stream. It will not affect previously submitted work. + + \param[out] hStream + \param[in] attr + \param[in] value + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_HANDLE + \notefnerr + + \sa + ::CUaccessPolicyWindow*/ + fn cuStreamSetAttribute_ptsz( + hStream: cuda_types::CUstream, + attr: cuda_types::CUstreamAttrID, + value: *const cuda_types::CUstreamAttrValue, + ) -> cuda_types::CUresult; + /** \brief Creates an event + + Creates an event *phEvent for the current context with the flags specified via + \p Flags. Valid flags include: + - ::CU_EVENT_DEFAULT: Default event creation flag. + - ::CU_EVENT_BLOCKING_SYNC: Specifies that the created event should use blocking + synchronization. A CPU thread that uses ::cuEventSynchronize() to wait on + an event created with this flag will block until the event has actually + been recorded. + - ::CU_EVENT_DISABLE_TIMING: Specifies that the created event does not need + to record timing data. Events created with this flag specified and + the ::CU_EVENT_BLOCKING_SYNC flag not specified will provide the best + performance when used with ::cuStreamWaitEvent() and ::cuEventQuery(). + - ::CU_EVENT_INTERPROCESS: Specifies that the created event may be used as an + interprocess event by ::cuIpcGetEventHandle(). ::CU_EVENT_INTERPROCESS must + be specified along with ::CU_EVENT_DISABLE_TIMING. + + \param phEvent - Returns newly created event + \param Flags - Event creation flags + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_OUT_OF_MEMORY + \notefnerr + + \sa + ::cuEventRecord, + ::cuEventQuery, + ::cuEventSynchronize, + ::cuEventDestroy, + ::cuEventElapsedTime, + ::cudaEventCreate, + ::cudaEventCreateWithFlags*/ + fn cuEventCreate( + phEvent: *mut cuda_types::CUevent, + Flags: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + /** \brief Records an event + + Captures in \p hEvent the contents of \p hStream at the time of this call. + \p hEvent and \p hStream must be from the same context. + Calls such as ::cuEventQuery() or ::cuStreamWaitEvent() will then + examine or wait for completion of the work that was captured. Uses of + \p hStream after this call do not modify \p hEvent. See note on default + stream behavior for what is captured in the default case. + + ::cuEventRecord() can be called multiple times on the same event and + will overwrite the previously captured state. Other APIs such as + ::cuStreamWaitEvent() use the most recently captured state at the time + of the API call, and are not affected by later calls to + ::cuEventRecord(). Before the first call to ::cuEventRecord(), an + event represents an empty set of work, so for example ::cuEventQuery() + would return ::CUDA_SUCCESS. + + \param hEvent - Event to record + \param hStream - Stream to record event for + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_HANDLE, + ::CUDA_ERROR_INVALID_VALUE + \note_null_stream + \notefnerr + + \sa ::cuEventCreate, + ::cuEventQuery, + ::cuEventSynchronize, + ::cuStreamWaitEvent, + ::cuEventDestroy, + ::cuEventElapsedTime, + ::cudaEventRecord, + ::cuEventRecordWithFlags*/ + fn cuEventRecord_ptsz( + hEvent: cuda_types::CUevent, + hStream: cuda_types::CUstream, + ) -> cuda_types::CUresult; + /** \brief Records an event + + Captures in \p hEvent the contents of \p hStream at the time of this call. + \p hEvent and \p hStream must be from the same context. + Calls such as ::cuEventQuery() or ::cuStreamWaitEvent() will then + examine or wait for completion of the work that was captured. Uses of + \p hStream after this call do not modify \p hEvent. See note on default + stream behavior for what is captured in the default case. + + ::cuEventRecordWithFlags() can be called multiple times on the same event and + will overwrite the previously captured state. Other APIs such as + ::cuStreamWaitEvent() use the most recently captured state at the time + of the API call, and are not affected by later calls to + ::cuEventRecordWithFlags(). Before the first call to ::cuEventRecordWithFlags(), an + event represents an empty set of work, so for example ::cuEventQuery() + would return ::CUDA_SUCCESS. + + flags include: + - ::CU_EVENT_RECORD_DEFAULT: Default event creation flag. + - ::CU_EVENT_RECORD_EXTERNAL: Event is captured in the graph as an external + event node when performing stream capture. This flag is invalid outside + of stream capture. + + \param hEvent - Event to record + \param hStream - Stream to record event for + \param flags - See ::CUevent_capture_flags + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_HANDLE, + ::CUDA_ERROR_INVALID_VALUE + \note_null_stream + \notefnerr + + \sa ::cuEventCreate, + ::cuEventQuery, + ::cuEventSynchronize, + ::cuStreamWaitEvent, + ::cuEventDestroy, + ::cuEventElapsedTime, + ::cuEventRecord, + ::cudaEventRecord*/ + fn cuEventRecordWithFlags_ptsz( + hEvent: cuda_types::CUevent, + hStream: cuda_types::CUstream, + flags: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + /** \brief Queries an event's status + + Queries the status of all work currently captured by \p hEvent. See + ::cuEventRecord() for details on what is captured by an event. + + Returns ::CUDA_SUCCESS if all captured work has been completed, or + ::CUDA_ERROR_NOT_READY if any captured work is incomplete. + + For the purposes of Unified Memory, a return value of ::CUDA_SUCCESS + is equivalent to having called ::cuEventSynchronize(). + + \param hEvent - Event to query + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_HANDLE, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_NOT_READY + \notefnerr + + \sa ::cuEventCreate, + ::cuEventRecord, + ::cuEventSynchronize, + ::cuEventDestroy, + ::cuEventElapsedTime, + ::cudaEventQuery*/ + fn cuEventQuery(hEvent: cuda_types::CUevent) -> cuda_types::CUresult; + /** \brief Waits for an event to complete + + Waits until the completion of all work currently captured in \p hEvent. + See ::cuEventRecord() for details on what is captured by an event. + + Waiting for an event that was created with the ::CU_EVENT_BLOCKING_SYNC + flag will cause the calling CPU thread to block until the event has + been completed by the device. If the ::CU_EVENT_BLOCKING_SYNC flag has + not been set, then the CPU thread will busy-wait until the event has + been completed by the device. + + \param hEvent - Event to wait for + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_HANDLE + \notefnerr + + \sa ::cuEventCreate, + ::cuEventRecord, + ::cuEventQuery, + ::cuEventDestroy, + ::cuEventElapsedTime, + ::cudaEventSynchronize*/ + fn cuEventSynchronize(hEvent: cuda_types::CUevent) -> cuda_types::CUresult; + /** \brief Destroys an event + + Destroys the event specified by \p hEvent. + + An event may be destroyed before it is complete (i.e., while + ::cuEventQuery() would return ::CUDA_ERROR_NOT_READY). In this case, the + call does not block on completion of the event, and any associated + resources will automatically be released asynchronously at completion. + + \param hEvent - Event to destroy + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_HANDLE + \notefnerr + + \sa ::cuEventCreate, + ::cuEventRecord, + ::cuEventQuery, + ::cuEventSynchronize, + ::cuEventElapsedTime, + ::cudaEventDestroy*/ + fn cuEventDestroy_v2(hEvent: cuda_types::CUevent) -> cuda_types::CUresult; + /** \brief Computes the elapsed time between two events + + Computes the elapsed time between two events (in milliseconds with a + resolution of around 0.5 microseconds). + + If either event was last recorded in a non-NULL stream, the resulting time + may be greater than expected (even if both used the same stream handle). This + happens because the ::cuEventRecord() operation takes place asynchronously + and there is no guarantee that the measured latency is actually just between + the two events. Any number of other different stream operations could execute + in between the two measured events, thus altering the timing in a significant + way. + + If ::cuEventRecord() has not been called on either event then + ::CUDA_ERROR_INVALID_HANDLE is returned. If ::cuEventRecord() has been called + on both events but one or both of them has not yet been completed (that is, + ::cuEventQuery() would return ::CUDA_ERROR_NOT_READY on at least one of the + events), ::CUDA_ERROR_NOT_READY is returned. If either event was created with + the ::CU_EVENT_DISABLE_TIMING flag, then this function will return + ::CUDA_ERROR_INVALID_HANDLE. + + \param pMilliseconds - Time between \p hStart and \p hEnd in ms + \param hStart - Starting event + \param hEnd - Ending event + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_HANDLE, + ::CUDA_ERROR_NOT_READY, + ::CUDA_ERROR_UNKNOWN + \notefnerr + + \sa ::cuEventCreate, + ::cuEventRecord, + ::cuEventQuery, + ::cuEventSynchronize, + ::cuEventDestroy, + ::cudaEventElapsedTime*/ + fn cuEventElapsedTime( + pMilliseconds: *mut f32, + hStart: cuda_types::CUevent, + hEnd: cuda_types::CUevent, + ) -> cuda_types::CUresult; + /** \brief Imports an external memory object + + Imports an externally allocated memory object and returns + a handle to that in \p extMem_out. + + The properties of the handle being imported must be described in + \p memHandleDesc. The ::CUDA_EXTERNAL_MEMORY_HANDLE_DESC structure + is defined as follows: + + \code +typedef struct CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st { +CUexternalMemoryHandleType type; +union { +int fd; +struct { +void *handle; +const void *name; +} win32; +const void *nvSciBufObject; +} handle; +unsigned long long size; +unsigned int flags; +} CUDA_EXTERNAL_MEMORY_HANDLE_DESC; + \endcode + + where ::CUDA_EXTERNAL_MEMORY_HANDLE_DESC::type specifies the type + of handle being imported. ::CUexternalMemoryHandleType is + defined as: + + \code +typedef enum CUexternalMemoryHandleType_enum { +CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD = 1, +CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32 = 2, +CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT = 3, +CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_HEAP = 4, +CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_RESOURCE = 5, +CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE = 6, +CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE_KMT = 7, +CU_EXTERNAL_MEMORY_HANDLE_TYPE_NVSCIBUF = 8 +} CUexternalMemoryHandleType; + \endcode + + If ::CUDA_EXTERNAL_MEMORY_HANDLE_DESC::type is + ::CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD, then + ::CUDA_EXTERNAL_MEMORY_HANDLE_DESC::handle::fd must be a valid + file descriptor referencing a memory object. Ownership of + the file descriptor is transferred to the CUDA driver when the + handle is imported successfully. Performing any operations on the + file descriptor after it is imported results in undefined behavior. + + If ::CUDA_EXTERNAL_MEMORY_HANDLE_DESC::type is + ::CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32, then exactly one + of ::CUDA_EXTERNAL_MEMORY_HANDLE_DESC::handle::win32::handle and + ::CUDA_EXTERNAL_MEMORY_HANDLE_DESC::handle::win32::name must not be + NULL. If ::CUDA_EXTERNAL_MEMORY_HANDLE_DESC::handle::win32::handle + is not NULL, then it must represent a valid shared NT handle that + references a memory object. Ownership of this handle is + not transferred to CUDA after the import operation, so the + application must release the handle using the appropriate system + call. If ::CUDA_EXTERNAL_MEMORY_HANDLE_DESC::handle::win32::name + is not NULL, then it must point to a NULL-terminated array of + UTF-16 characters that refers to a memory object. + + If ::CUDA_EXTERNAL_MEMORY_HANDLE_DESC::type is + ::CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT, then + ::CUDA_EXTERNAL_MEMORY_HANDLE_DESC::handle::win32::handle must + be non-NULL and + ::CUDA_EXTERNAL_MEMORY_HANDLE_DESC::handle::win32::name + must be NULL. The handle specified must be a globally shared KMT + handle. This handle does not hold a reference to the underlying + object, and thus will be invalid when all references to the + memory object are destroyed. + + If ::CUDA_EXTERNAL_MEMORY_HANDLE_DESC::type is + ::CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_HEAP, then exactly one + of ::CUDA_EXTERNAL_MEMORY_HANDLE_DESC::handle::win32::handle and + ::CUDA_EXTERNAL_MEMORY_HANDLE_DESC::handle::win32::name must not be + NULL. If ::CUDA_EXTERNAL_MEMORY_HANDLE_DESC::handle::win32::handle + is not NULL, then it must represent a valid shared NT handle that + is returned by ID3D12Device::CreateSharedHandle when referring to a + ID3D12Heap object. This handle holds a reference to the underlying + object. If ::CUDA_EXTERNAL_MEMORY_HANDLE_DESC::handle::win32::name + is not NULL, then it must point to a NULL-terminated array of + UTF-16 characters that refers to a ID3D12Heap object. + + If ::CUDA_EXTERNAL_MEMORY_HANDLE_DESC::type is + ::CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_RESOURCE, then exactly one + of ::CUDA_EXTERNAL_MEMORY_HANDLE_DESC::handle::win32::handle and + ::CUDA_EXTERNAL_MEMORY_HANDLE_DESC::handle::win32::name must not be + NULL. If ::CUDA_EXTERNAL_MEMORY_HANDLE_DESC::handle::win32::handle + is not NULL, then it must represent a valid shared NT handle that + is returned by ID3D12Device::CreateSharedHandle when referring to a + ID3D12Resource object. This handle holds a reference to the + underlying object. If + ::CUDA_EXTERNAL_MEMORY_HANDLE_DESC::handle::win32::name + is not NULL, then it must point to a NULL-terminated array of + UTF-16 characters that refers to a ID3D12Resource object. + + If ::CUDA_EXTERNAL_MEMORY_HANDLE_DESC::type is + ::CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE, then + ::CUDA_EXTERNAL_MEMORY_HANDLE_DESC::handle::win32::handle must + represent a valid shared NT handle that is returned by + IDXGIResource1::CreateSharedHandle when referring to a + ID3D11Resource object. If + ::CUDA_EXTERNAL_MEMORY_HANDLE_DESC::handle::win32::name + is not NULL, then it must point to a NULL-terminated array of + UTF-16 characters that refers to a ID3D11Resource object. + + If ::CUDA_EXTERNAL_MEMORY_HANDLE_DESC::type is + ::CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE_KMT, then + ::CUDA_EXTERNAL_MEMORY_HANDLE_DESC::handle::win32::handle must + represent a valid shared KMT handle that is returned by + IDXGIResource::GetSharedHandle when referring to a + ID3D11Resource object and + ::CUDA_EXTERNAL_MEMORY_HANDLE_DESC::handle::win32::name + must be NULL. + + If ::CUDA_EXTERNAL_MEMORY_HANDLE_DESC::type is + ::CU_EXTERNAL_MEMORY_HANDLE_TYPE_NVSCIBUF, then + ::CUDA_EXTERNAL_MEMORY_HANDLE_DESC::handle::nvSciBufObject must be non-NULL + and reference a valid NvSciBuf object. + If the NvSciBuf object imported into CUDA is also mapped by other drivers, then the + application must use ::cuWaitExternalSemaphoresAsync or ::cuSignalExternalSemaphoresAsync + as appropriate barriers to maintain coherence between CUDA and the other drivers. + See ::CUDA_EXTERNAL_SEMAPHORE_SIGNAL_SKIP_NVSCIBUF_MEMSYNC and ::CUDA_EXTERNAL_SEMAPHORE_WAIT_SKIP_NVSCIBUF_MEMSYNC + for memory synchronization. + + + The size of the memory object must be specified in + ::CUDA_EXTERNAL_MEMORY_HANDLE_DESC::size. + + Specifying the flag ::CUDA_EXTERNAL_MEMORY_DEDICATED in + ::CUDA_EXTERNAL_MEMORY_HANDLE_DESC::flags indicates that the + resource is a dedicated resource. The definition of what a + dedicated resource is outside the scope of this extension. + This flag must be set if ::CUDA_EXTERNAL_MEMORY_HANDLE_DESC::type + is one of the following: + ::CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_RESOURCE + ::CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE + ::CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE_KMT + + \param extMem_out - Returned handle to an external memory object + \param memHandleDesc - Memory import handle descriptor + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_HANDLE, + ::CUDA_ERROR_OPERATING_SYSTEM + \notefnerr + + \note If the Vulkan memory imported into CUDA is mapped on the CPU then the + application must use vkInvalidateMappedMemoryRanges/vkFlushMappedMemoryRanges + as well as appropriate Vulkan pipeline barriers to maintain coherence between + CPU and GPU. For more information on these APIs, please refer to "Synchronization + and Cache Control" chapter from Vulkan specification. + + \sa ::cuDestroyExternalMemory, + ::cuExternalMemoryGetMappedBuffer, + ::cuExternalMemoryGetMappedMipmappedArray*/ + fn cuImportExternalMemory( + extMem_out: *mut cuda_types::CUexternalMemory, + memHandleDesc: *const cuda_types::CUDA_EXTERNAL_MEMORY_HANDLE_DESC, + ) -> cuda_types::CUresult; + /** \brief Maps a buffer onto an imported memory object + + Maps a buffer onto an imported memory object and returns a device + pointer in \p devPtr. + + The properties of the buffer being mapped must be described in + \p bufferDesc. The ::CUDA_EXTERNAL_MEMORY_BUFFER_DESC structure is + defined as follows: + + \code +typedef struct CUDA_EXTERNAL_MEMORY_BUFFER_DESC_st { +unsigned long long offset; +unsigned long long size; +unsigned int flags; +} CUDA_EXTERNAL_MEMORY_BUFFER_DESC; + \endcode + + where ::CUDA_EXTERNAL_MEMORY_BUFFER_DESC::offset is the offset in + the memory object where the buffer's base address is. + ::CUDA_EXTERNAL_MEMORY_BUFFER_DESC::size is the size of the buffer. + ::CUDA_EXTERNAL_MEMORY_BUFFER_DESC::flags must be zero. + + The offset and size have to be suitably aligned to match the + requirements of the external API. Mapping two buffers whose ranges + overlap may or may not result in the same virtual address being + returned for the overlapped portion. In such cases, the application + must ensure that all accesses to that region from the GPU are + volatile. Otherwise writes made via one address are not guaranteed + to be visible via the other address, even if they're issued by the + same thread. It is recommended that applications map the combined + range instead of mapping separate buffers and then apply the + appropriate offsets to the returned pointer to derive the + individual buffers. + + The returned pointer \p devPtr must be freed using ::cuMemFree. + + \param devPtr - Returned device pointer to buffer + \param extMem - Handle to external memory object + \param bufferDesc - Buffer descriptor + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_HANDLE + \notefnerr + + \sa ::cuImportExternalMemory, + ::cuDestroyExternalMemory, + ::cuExternalMemoryGetMappedMipmappedArray*/ + fn cuExternalMemoryGetMappedBuffer( + devPtr: *mut cuda_types::CUdeviceptr, + extMem: cuda_types::CUexternalMemory, + bufferDesc: *const cuda_types::CUDA_EXTERNAL_MEMORY_BUFFER_DESC, + ) -> cuda_types::CUresult; + /** \brief Maps a CUDA mipmapped array onto an external memory object + + Maps a CUDA mipmapped array onto an external object and returns a + handle to it in \p mipmap. + + The properties of the CUDA mipmapped array being mapped must be + described in \p mipmapDesc. The structure + ::CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC is defined as follows: + + \code +typedef struct CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC_st { +unsigned long long offset; +CUDA_ARRAY3D_DESCRIPTOR arrayDesc; +unsigned int numLevels; +} CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC; + \endcode + + where ::CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC::offset is the + offset in the memory object where the base level of the mipmap + chain is. + ::CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC::arrayDesc describes + the format, dimensions and type of the base level of the mipmap + chain. For further details on these parameters, please refer to the + documentation for ::cuMipmappedArrayCreate. Note that if the mipmapped + array is bound as a color target in the graphics API, then the flag + ::CUDA_ARRAY3D_COLOR_ATTACHMENT must be specified in + ::CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC::arrayDesc::Flags. + ::CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC::numLevels specifies + the total number of levels in the mipmap chain. + + If \p extMem was imported from a handle of type ::CU_EXTERNAL_MEMORY_HANDLE_TYPE_NVSCIBUF, then + ::CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC::numLevels must be equal to 1. + + The returned CUDA mipmapped array must be freed using ::cuMipmappedArrayDestroy. + + \param mipmap - Returned CUDA mipmapped array + \param extMem - Handle to external memory object + \param mipmapDesc - CUDA array descriptor + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_HANDLE + \notefnerr + + \sa ::cuImportExternalMemory, + ::cuDestroyExternalMemory, + ::cuExternalMemoryGetMappedBuffer*/ + fn cuExternalMemoryGetMappedMipmappedArray( + mipmap: *mut cuda_types::CUmipmappedArray, + extMem: cuda_types::CUexternalMemory, + mipmapDesc: *const cuda_types::CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC, + ) -> cuda_types::CUresult; + /** \brief Destroys an external memory object. + + Destroys the specified external memory object. Any existing buffers + and CUDA mipmapped arrays mapped onto this object must no longer be + used and must be explicitly freed using ::cuMemFree and + ::cuMipmappedArrayDestroy respectively. + + \param extMem - External memory object to be destroyed + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_HANDLE + \notefnerr + + \sa ::cuImportExternalMemory, + ::cuExternalMemoryGetMappedBuffer, + ::cuExternalMemoryGetMappedMipmappedArray*/ + fn cuDestroyExternalMemory( + extMem: cuda_types::CUexternalMemory, + ) -> cuda_types::CUresult; + /** \brief Imports an external semaphore + + Imports an externally allocated synchronization object and returns + a handle to that in \p extSem_out. + + The properties of the handle being imported must be described in + \p semHandleDesc. The ::CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC is + defined as follows: + + \code +typedef struct CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st { +CUexternalSemaphoreHandleType type; +union { +int fd; +struct { +void *handle; +const void *name; +} win32; +const void* NvSciSyncObj; +} handle; +unsigned int flags; +} CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC; + \endcode + + where ::CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC::type specifies the type of + handle being imported. ::CUexternalSemaphoreHandleType is defined + as: + + \code +typedef enum CUexternalSemaphoreHandleType_enum { +CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD = 1, +CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32 = 2, +CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT = 3, +CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D12_FENCE = 4, +CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_FENCE = 5, +CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_NVSCISYNC = 6, +CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX = 7, +CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX_KMT = 8, +CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_FD = 9, +CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_WIN32 = 10 +} CUexternalSemaphoreHandleType; + \endcode + + If ::CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC::type is + ::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD, then + ::CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC::handle::fd must be a valid + file descriptor referencing a synchronization object. Ownership of + the file descriptor is transferred to the CUDA driver when the + handle is imported successfully. Performing any operations on the + file descriptor after it is imported results in undefined behavior. + + If ::CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC::type is + ::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32, then exactly one + of ::CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC::handle::win32::handle and + ::CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC::handle::win32::name must not be + NULL. If + ::CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC::handle::win32::handle + is not NULL, then it must represent a valid shared NT handle that + references a synchronization object. Ownership of this handle is + not transferred to CUDA after the import operation, so the + application must release the handle using the appropriate system + call. If ::CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC::handle::win32::name + is not NULL, then it must name a valid synchronization object. + + If ::CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC::type is + ::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT, then + ::CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC::handle::win32::handle must + be non-NULL and + ::CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC::handle::win32::name + must be NULL. The handle specified must be a globally shared KMT + handle. This handle does not hold a reference to the underlying + object, and thus will be invalid when all references to the + synchronization object are destroyed. + + If ::CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC::type is + ::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D12_FENCE, then exactly one + of ::CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC::handle::win32::handle and + ::CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC::handle::win32::name must not be + NULL. If + ::CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC::handle::win32::handle + is not NULL, then it must represent a valid shared NT handle that + is returned by ID3D12Device::CreateSharedHandle when referring to a + ID3D12Fence object. This handle holds a reference to the underlying + object. If + ::CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC::handle::win32::name + is not NULL, then it must name a valid synchronization object that + refers to a valid ID3D12Fence object. + + If ::CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC::type is + ::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_FENCE, then + ::CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC::handle::win32::handle + represents a valid shared NT handle that is returned by + ID3D11Fence::CreateSharedHandle. If + ::CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC::handle::win32::name + is not NULL, then it must name a valid synchronization object that + refers to a valid ID3D11Fence object. + + If ::CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC::type is + ::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_NVSCISYNC, then + ::CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC::handle::nvSciSyncObj + represents a valid NvSciSyncObj. + + ::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX, then + ::CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC::handle::win32::handle + represents a valid shared NT handle that + is returned by IDXGIResource1::CreateSharedHandle when referring to + a IDXGIKeyedMutex object. If + ::CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC::handle::win32::name + is not NULL, then it must name a valid synchronization object that + refers to a valid IDXGIKeyedMutex object. + + If ::CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC::type is + ::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX_KMT, then + ::CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC::handle::win32::handle + represents a valid shared KMT handle that + is returned by IDXGIResource::GetSharedHandle when referring to + a IDXGIKeyedMutex object and + ::CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC::handle::win32::name must be NULL. + + If ::CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC::type is + ::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_FD, then + ::CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC::handle::fd must be a valid + file descriptor referencing a synchronization object. Ownership of + the file descriptor is transferred to the CUDA driver when the + handle is imported successfully. Performing any operations on the + file descriptor after it is imported results in undefined behavior. + + If ::CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC::type is + ::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_WIN32, then exactly one + of ::CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC::handle::win32::handle and + ::CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC::handle::win32::name must not be + NULL. If + ::CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC::handle::win32::handle + is not NULL, then it must represent a valid shared NT handle that + references a synchronization object. Ownership of this handle is + not transferred to CUDA after the import operation, so the + application must release the handle using the appropriate system + call. If ::CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC::handle::win32::name + is not NULL, then it must name a valid synchronization object. + + \param extSem_out - Returned handle to an external semaphore + \param semHandleDesc - Semaphore import handle descriptor + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_NOT_SUPPORTED, + ::CUDA_ERROR_INVALID_HANDLE, + ::CUDA_ERROR_OPERATING_SYSTEM + \notefnerr + + \sa ::cuDestroyExternalSemaphore, + ::cuSignalExternalSemaphoresAsync, + ::cuWaitExternalSemaphoresAsync*/ + fn cuImportExternalSemaphore( + extSem_out: *mut cuda_types::CUexternalSemaphore, + semHandleDesc: *const cuda_types::CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC, + ) -> cuda_types::CUresult; + /** \brief Signals a set of external semaphore objects + + Enqueues a signal operation on a set of externally allocated + semaphore object in the specified stream. The operations will be + executed when all prior operations in the stream complete. + + The exact semantics of signaling a semaphore depends on the type of + the object. + + If the semaphore object is any one of the following types: + ::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD, + ::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32, + ::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT + then signaling the semaphore will set it to the signaled state. + + If the semaphore object is any one of the following types: + ::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D12_FENCE, + ::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_FENCE, + ::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_FD, + ::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_WIN32 + then the semaphore will be set to the value specified in + ::CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS::params::fence::value. + + If the semaphore object is of the type ::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_NVSCISYNC + this API sets ::CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS::params::nvSciSync::fence + to a value that can be used by subsequent waiters of the same NvSciSync object + to order operations with those currently submitted in \p stream. Such an update + will overwrite previous contents of + ::CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS::params::nvSciSync::fence. By default, + signaling such an external semaphore object causes appropriate memory synchronization + operations to be performed over all external memory objects that are imported as + ::CU_EXTERNAL_MEMORY_HANDLE_TYPE_NVSCIBUF. This ensures that any subsequent accesses + made by other importers of the same set of NvSciBuf memory object(s) are coherent. + These operations can be skipped by specifying the flag + ::CUDA_EXTERNAL_SEMAPHORE_SIGNAL_SKIP_NVSCIBUF_MEMSYNC, which can be used as a + performance optimization when data coherency is not required. But specifying this + flag in scenarios where data coherency is required results in undefined behavior. + Also, for semaphore object of the type ::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_NVSCISYNC, + if the NvSciSyncAttrList used to create the NvSciSyncObj had not set the flags in + ::cuDeviceGetNvSciSyncAttributes to CUDA_NVSCISYNC_ATTR_SIGNAL, this API will return + CUDA_ERROR_NOT_SUPPORTED. + NvSciSyncFence associated with semaphore object of the type + ::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_NVSCISYNC can be deterministic. For this the + NvSciSyncAttrList used to create the semaphore object must have value of + NvSciSyncAttrKey_RequireDeterministicFences key set to true. Deterministic fences + allow users to enqueue a wait over the semaphore object even before corresponding + signal is enqueued. For such a semaphore object, CUDA guarantees that each signal + operation will increment the fence value by '1'. Users are expected to track count + of signals enqueued on the semaphore object and insert waits accordingly. When such + a semaphore object is signaled from multiple streams, due to concurrent stream + execution, it is possible that the order in which the semaphore gets signaled is + indeterministic. This could lead to waiters of the semaphore getting unblocked + incorrectly. Users are expected to handle such situations, either by not using the + same semaphore object with deterministic fence support enabled in different streams + or by adding explicit dependency amongst such streams so that the semaphore is + signaled in order. + + If the semaphore object is any one of the following types: + ::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX, + ::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX_KMT + then the keyed mutex will be released with the key specified in + ::CUDA_EXTERNAL_SEMAPHORE_PARAMS::params::keyedmutex::key. + + \param extSemArray - Set of external semaphores to be signaled + \param paramsArray - Array of semaphore parameters + \param numExtSems - Number of semaphores to signal + \param stream - Stream to enqueue the signal operations in + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_HANDLE, + ::CUDA_ERROR_NOT_SUPPORTED + \notefnerr + + \sa ::cuImportExternalSemaphore, + ::cuDestroyExternalSemaphore, + ::cuWaitExternalSemaphoresAsync*/ + fn cuSignalExternalSemaphoresAsync_ptsz( + extSemArray: *const cuda_types::CUexternalSemaphore, + paramsArray: *const cuda_types::CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS, + numExtSems: ::core::ffi::c_uint, + stream: cuda_types::CUstream, + ) -> cuda_types::CUresult; + /** \brief Waits on a set of external semaphore objects + + Enqueues a wait operation on a set of externally allocated + semaphore object in the specified stream. The operations will be + executed when all prior operations in the stream complete. + + The exact semantics of waiting on a semaphore depends on the type + of the object. + + If the semaphore object is any one of the following types: + ::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD, + ::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32, + ::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT + then waiting on the semaphore will wait until the semaphore reaches + the signaled state. The semaphore will then be reset to the + unsignaled state. Therefore for every signal operation, there can + only be one wait operation. + + If the semaphore object is any one of the following types: + ::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D12_FENCE, + ::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_FENCE, + ::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_FD, + ::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_WIN32 + then waiting on the semaphore will wait until the value of the + semaphore is greater than or equal to + ::CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS::params::fence::value. + + If the semaphore object is of the type ::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_NVSCISYNC + then, waiting on the semaphore will wait until the + ::CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS::params::nvSciSync::fence is signaled by the + signaler of the NvSciSyncObj that was associated with this semaphore object. + By default, waiting on such an external semaphore object causes appropriate + memory synchronization operations to be performed over all external memory objects + that are imported as ::CU_EXTERNAL_MEMORY_HANDLE_TYPE_NVSCIBUF. This ensures that + any subsequent accesses made by other importers of the same set of NvSciBuf memory + object(s) are coherent. These operations can be skipped by specifying the flag + ::CUDA_EXTERNAL_SEMAPHORE_WAIT_SKIP_NVSCIBUF_MEMSYNC, which can be used as a + performance optimization when data coherency is not required. But specifying this + flag in scenarios where data coherency is required results in undefined behavior. + Also, for semaphore object of the type ::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_NVSCISYNC, + if the NvSciSyncAttrList used to create the NvSciSyncObj had not set the flags in + ::cuDeviceGetNvSciSyncAttributes to CUDA_NVSCISYNC_ATTR_WAIT, this API will return + CUDA_ERROR_NOT_SUPPORTED. + + If the semaphore object is any one of the following types: + ::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX, + ::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX_KMT + then the keyed mutex will be acquired when it is released with the key + specified in ::CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS::params::keyedmutex::key + or until the timeout specified by + ::CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS::params::keyedmutex::timeoutMs + has lapsed. The timeout interval can either be a finite value + specified in milliseconds or an infinite value. In case an infinite + value is specified the timeout never elapses. The windows INFINITE + macro must be used to specify infinite timeout. + + \param extSemArray - External semaphores to be waited on + \param paramsArray - Array of semaphore parameters + \param numExtSems - Number of semaphores to wait on + \param stream - Stream to enqueue the wait operations in + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_HANDLE, + ::CUDA_ERROR_NOT_SUPPORTED, + ::CUDA_ERROR_TIMEOUT + \notefnerr + + \sa ::cuImportExternalSemaphore, + ::cuDestroyExternalSemaphore, + ::cuSignalExternalSemaphoresAsync*/ + fn cuWaitExternalSemaphoresAsync_ptsz( + extSemArray: *const cuda_types::CUexternalSemaphore, + paramsArray: *const cuda_types::CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS, + numExtSems: ::core::ffi::c_uint, + stream: cuda_types::CUstream, + ) -> cuda_types::CUresult; + /** \brief Destroys an external semaphore + + Destroys an external semaphore object and releases any references + to the underlying resource. Any outstanding signals or waits must + have completed before the semaphore is destroyed. + + \param extSem - External semaphore to be destroyed + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_HANDLE + \notefnerr + + \sa ::cuImportExternalSemaphore, + ::cuSignalExternalSemaphoresAsync, + ::cuWaitExternalSemaphoresAsync*/ + fn cuDestroyExternalSemaphore( + extSem: cuda_types::CUexternalSemaphore, + ) -> cuda_types::CUresult; + /** \brief Wait on a memory location + + Enqueues a synchronization of the stream on the given memory location. Work + ordered after the operation will block until the given condition on the + memory is satisfied. By default, the condition is to wait for + (int32_t)(*addr - value) >= 0, a cyclic greater-or-equal. + Other condition types can be specified via \p flags. + + If the memory was registered via ::cuMemHostRegister(), the device pointer + should be obtained with ::cuMemHostGetDevicePointer(). This function cannot + be used with managed memory (::cuMemAllocManaged). + + Support for CU_STREAM_WAIT_VALUE_NOR can be queried with ::cuDeviceGetAttribute() and + ::CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR_V2. + + \note + Warning: + Improper use of this API may deadlock the application. Synchronization + ordering established through this API is not visible to CUDA. CUDA tasks + that are (even indirectly) ordered by this API should also have that order + expressed with CUDA-visible dependencies such as events. This ensures that + the scheduler does not serialize them in an improper order. + + \param stream The stream to synchronize on the memory location. + \param addr The memory location to wait on. + \param value The value to compare with the memory location. + \param flags See ::CUstreamWaitValue_flags. + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_NOT_SUPPORTED + \notefnerr + + \sa ::cuStreamWaitValue64, + ::cuStreamWriteValue32, + ::cuStreamWriteValue64, + ::cuStreamBatchMemOp, + ::cuMemHostRegister, + ::cuStreamWaitEvent*/ + fn cuStreamWaitValue32_v2_ptsz( + stream: cuda_types::CUstream, + addr: cuda_types::CUdeviceptr, + value: cuda_types::cuuint32_t, + flags: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + /** \brief Wait on a memory location + + Enqueues a synchronization of the stream on the given memory location. Work + ordered after the operation will block until the given condition on the + memory is satisfied. By default, the condition is to wait for + (int64_t)(*addr - value) >= 0, a cyclic greater-or-equal. + Other condition types can be specified via \p flags. + + If the memory was registered via ::cuMemHostRegister(), the device pointer + should be obtained with ::cuMemHostGetDevicePointer(). + + Support for this can be queried with ::cuDeviceGetAttribute() and + ::CU_DEVICE_ATTRIBUTE_CAN_USE_64_BIT_STREAM_MEM_OPS. + + \note + Warning: + Improper use of this API may deadlock the application. Synchronization + ordering established through this API is not visible to CUDA. CUDA tasks + that are (even indirectly) ordered by this API should also have that order + expressed with CUDA-visible dependencies such as events. This ensures that + the scheduler does not serialize them in an improper order. + + \param stream The stream to synchronize on the memory location. + \param addr The memory location to wait on. + \param value The value to compare with the memory location. + \param flags See ::CUstreamWaitValue_flags. + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_NOT_SUPPORTED + \notefnerr + + \sa ::cuStreamWaitValue32, + ::cuStreamWriteValue32, + ::cuStreamWriteValue64, + ::cuStreamBatchMemOp, + ::cuMemHostRegister, + ::cuStreamWaitEvent*/ + fn cuStreamWaitValue64_v2_ptsz( + stream: cuda_types::CUstream, + addr: cuda_types::CUdeviceptr, + value: cuda_types::cuuint64_t, + flags: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + /** \brief Write a value to memory + + Write a value to memory. + + If the memory was registered via ::cuMemHostRegister(), the device pointer + should be obtained with ::cuMemHostGetDevicePointer(). This function cannot + be used with managed memory (::cuMemAllocManaged). + + \param stream The stream to do the write in. + \param addr The device address to write to. + \param value The value to write. + \param flags See ::CUstreamWriteValue_flags. + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_NOT_SUPPORTED + \notefnerr + + \sa ::cuStreamWriteValue64, + ::cuStreamWaitValue32, + ::cuStreamWaitValue64, + ::cuStreamBatchMemOp, + ::cuMemHostRegister, + ::cuEventRecord*/ + fn cuStreamWriteValue32_v2_ptsz( + stream: cuda_types::CUstream, + addr: cuda_types::CUdeviceptr, + value: cuda_types::cuuint32_t, + flags: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + /** \brief Write a value to memory + + Write a value to memory. + + If the memory was registered via ::cuMemHostRegister(), the device pointer + should be obtained with ::cuMemHostGetDevicePointer(). + + Support for this can be queried with ::cuDeviceGetAttribute() and + ::CU_DEVICE_ATTRIBUTE_CAN_USE_64_BIT_STREAM_MEM_OPS. + + \param stream The stream to do the write in. + \param addr The device address to write to. + \param value The value to write. + \param flags See ::CUstreamWriteValue_flags. + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_NOT_SUPPORTED + \notefnerr + + \sa ::cuStreamWriteValue32, + ::cuStreamWaitValue32, + ::cuStreamWaitValue64, + ::cuStreamBatchMemOp, + ::cuMemHostRegister, + ::cuEventRecord*/ + fn cuStreamWriteValue64_v2_ptsz( + stream: cuda_types::CUstream, + addr: cuda_types::CUdeviceptr, + value: cuda_types::cuuint64_t, + flags: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + /** \brief Batch operations to synchronize the stream via memory operations + + This is a batch version of ::cuStreamWaitValue32() and ::cuStreamWriteValue32(). + Batching operations may avoid some performance overhead in both the API call + and the device execution versus adding them to the stream in separate API + calls. The operations are enqueued in the order they appear in the array. + + See ::CUstreamBatchMemOpType for the full set of supported operations, and + ::cuStreamWaitValue32(), ::cuStreamWaitValue64(), ::cuStreamWriteValue32(), + and ::cuStreamWriteValue64() for details of specific operations. + + See related APIs for details on querying support for specific operations. + + \note + Warning: + Improper use of this API may deadlock the application. Synchronization + ordering established through this API is not visible to CUDA. CUDA tasks + that are (even indirectly) ordered by this API should also have that order + expressed with CUDA-visible dependencies such as events. This ensures that + the scheduler does not serialize them in an improper order. For more + information, see the Stream Memory Operations section in the programming + guide(https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html). + + \param stream The stream to enqueue the operations in. + \param count The number of operations in the array. Must be less than 256. + \param paramArray The types and parameters of the individual operations. + \param flags Reserved for future expansion; must be 0. + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_NOT_SUPPORTED + \notefnerr + + \sa ::cuStreamWaitValue32, + ::cuStreamWaitValue64, + ::cuStreamWriteValue32, + ::cuStreamWriteValue64, + ::cuMemHostRegister*/ + fn cuStreamBatchMemOp_v2_ptsz( + stream: cuda_types::CUstream, + count: ::core::ffi::c_uint, + paramArray: *mut cuda_types::CUstreamBatchMemOpParams, + flags: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + /** \brief Returns information about a function + + Returns in \p *pi the integer value of the attribute \p attrib on the kernel + given by \p hfunc. The supported attributes are: + - ::CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK: The maximum number of threads + per block, beyond which a launch of the function would fail. This number + depends on both the function and the device on which the function is + currently loaded. + - ::CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES: The size in bytes of + statically-allocated shared memory per block required by this function. + This does not include dynamically-allocated shared memory requested by + the user at runtime. + - ::CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES: The size in bytes of user-allocated + constant memory required by this function. + - ::CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES: The size in bytes of local memory + used by each thread of this function. + - ::CU_FUNC_ATTRIBUTE_NUM_REGS: The number of registers used by each thread + of this function. + - ::CU_FUNC_ATTRIBUTE_PTX_VERSION: The PTX virtual architecture version for + which the function was compiled. This value is the major PTX version * 10 + + the minor PTX version, so a PTX version 1.3 function would return the + value 13. Note that this may return the undefined value of 0 for cubins + compiled prior to CUDA 3.0. + - ::CU_FUNC_ATTRIBUTE_BINARY_VERSION: The binary architecture version for + which the function was compiled. This value is the major binary + version * 10 + the minor binary version, so a binary version 1.3 function + would return the value 13. Note that this will return a value of 10 for + legacy cubins that do not have a properly-encoded binary architecture + version. + - ::CU_FUNC_CACHE_MODE_CA: The attribute to indicate whether the function has + been compiled with user specified option "-Xptxas --dlcm=ca" set . + - ::CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES: The maximum size in bytes of + dynamically-allocated shared memory. + - ::CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT: Preferred shared memory-L1 + cache split ratio in percent of total shared memory. + - ::CU_FUNC_ATTRIBUTE_CLUSTER_SIZE_MUST_BE_SET: If this attribute is set, the + kernel must launch with a valid cluster size specified. + - ::CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_WIDTH: The required cluster width in + blocks. + - ::CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_HEIGHT: The required cluster height in + blocks. + - ::CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_DEPTH: The required cluster depth in + blocks. + - ::CU_FUNC_ATTRIBUTE_NON_PORTABLE_CLUSTER_SIZE_ALLOWED: Indicates whether + the function can be launched with non-portable cluster size. 1 is allowed, + 0 is disallowed. A non-portable cluster size may only function on the + specific SKUs the program is tested on. The launch might fail if the + program is run on a different hardware platform. CUDA API provides + cudaOccupancyMaxActiveClusters to assist with checking whether the desired + size can be launched on the current device. A portable cluster size is + guaranteed to be functional on all compute capabilities higher than the + target compute capability. The portable cluster size for sm_90 is 8 blocks + per cluster. This value may increase for future compute capabilities. The + specific hardware unit may support higher cluster sizes that’s not + guaranteed to be portable. + - ::CU_FUNC_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE: The block + scheduling policy of a function. The value type is CUclusterSchedulingPolicy. + + With a few execeptions, function attributes may also be queried on unloaded + function handles returned from ::cuModuleEnumerateFunctions. + ::CUDA_ERROR_FUNCTION_NOT_LOADED is returned if the attribute requires a fully + loaded function but the function is not loaded. The loading state of a function + may be queried using ::cuFuncIsloaded. ::cuFuncLoad may be called to explicitly + load a function before querying the following attributes that require the function + to be loaded: + - ::CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK + - ::CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES + - ::CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES + + \param pi - Returned attribute value + \param attrib - Attribute requested + \param hfunc - Function to query attribute of + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_HANDLE, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_FUNCTION_NOT_LOADED + \notefnerr + + \sa ::cuCtxGetCacheConfig, + ::cuCtxSetCacheConfig, + ::cuFuncSetCacheConfig, + ::cuLaunchKernel, + ::cudaFuncGetAttributes, + ::cudaFuncSetAttribute, + ::cuFuncIsLoaded, + ::cuFuncLoad, + ::cuKernelGetAttribute*/ + fn cuFuncGetAttribute( + pi: *mut ::core::ffi::c_int, + attrib: cuda_types::CUfunction_attribute, + hfunc: cuda_types::CUfunction, + ) -> cuda_types::CUresult; + /** \brief Sets information about a function + + This call sets the value of a specified attribute \p attrib on the kernel given + by \p hfunc to an integer value specified by \p val + This function returns CUDA_SUCCESS if the new value of the attribute could be + successfully set. If the set fails, this call will return an error. + Not all attributes can have values set. Attempting to set a value on a read-only + attribute will result in an error (CUDA_ERROR_INVALID_VALUE) + + Supported attributes for the cuFuncSetAttribute call are: + - ::CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES: This maximum size in bytes of + dynamically-allocated shared memory. The value should contain the requested + maximum size of dynamically-allocated shared memory. The sum of this value and + the function attribute ::CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES cannot exceed the + device attribute ::CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN. + The maximal size of requestable dynamic shared memory may differ by GPU + architecture. + - ::CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT: On devices where the L1 + cache and shared memory use the same hardware resources, this sets the shared memory + carveout preference, in percent of the total shared memory. + See ::CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR + This is only a hint, and the driver can choose a different ratio if required to execute the function. + - ::CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_WIDTH: The required cluster width in + blocks. The width, height, and depth values must either all be 0 or all be + positive. The validity of the cluster dimensions is checked at launch time. + If the value is set during compile time, it cannot be set at runtime. + Setting it at runtime will return CUDA_ERROR_NOT_PERMITTED. + - ::CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_HEIGHT: The required cluster height in + blocks. The width, height, and depth values must either all be 0 or all be + positive. The validity of the cluster dimensions is checked at launch time. + If the value is set during compile time, it cannot be set at runtime. + Setting it at runtime will return CUDA_ERROR_NOT_PERMITTED. + - ::CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_DEPTH: The required cluster depth in + blocks. The width, height, and depth values must either all be 0 or all be + positive. The validity of the cluster dimensions is checked at launch time. + If the value is set during compile time, it cannot be set at runtime. + Setting it at runtime will return CUDA_ERROR_NOT_PERMITTED. + - ::CU_FUNC_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE: The block + scheduling policy of a function. The value type is CUclusterSchedulingPolicy. + + \param hfunc - Function to query attribute of + \param attrib - Attribute requested + \param value - The value to set + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_HANDLE, + ::CUDA_ERROR_INVALID_VALUE + \notefnerr + + \sa ::cuCtxGetCacheConfig, + ::cuCtxSetCacheConfig, + ::cuFuncSetCacheConfig, + ::cuLaunchKernel, + ::cudaFuncGetAttributes, + ::cudaFuncSetAttribute, + ::cuKernelSetAttribute*/ + fn cuFuncSetAttribute( + hfunc: cuda_types::CUfunction, + attrib: cuda_types::CUfunction_attribute, + value: ::core::ffi::c_int, + ) -> cuda_types::CUresult; + /** \brief Sets the preferred cache configuration for a device function + + On devices where the L1 cache and shared memory use the same hardware + resources, this sets through \p config the preferred cache configuration for + the device function \p hfunc. This is only a preference. The driver will use + the requested configuration if possible, but it is free to choose a different + configuration if required to execute \p hfunc. Any context-wide preference + set via ::cuCtxSetCacheConfig() will be overridden by this per-function + setting unless the per-function setting is ::CU_FUNC_CACHE_PREFER_NONE. In + that case, the current context-wide setting will be used. + + This setting does nothing on devices where the size of the L1 cache and + shared memory are fixed. + + Launching a kernel with a different preference than the most recent + preference setting may insert a device-side synchronization point. + + + The supported cache configurations are: + - ::CU_FUNC_CACHE_PREFER_NONE: no preference for shared memory or L1 (default) + - ::CU_FUNC_CACHE_PREFER_SHARED: prefer larger shared memory and smaller L1 cache + - ::CU_FUNC_CACHE_PREFER_L1: prefer larger L1 cache and smaller shared memory + - ::CU_FUNC_CACHE_PREFER_EQUAL: prefer equal sized L1 cache and shared memory + + \param hfunc - Kernel to configure cache for + \param config - Requested cache configuration + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT + \notefnerr + + \sa ::cuCtxGetCacheConfig, + ::cuCtxSetCacheConfig, + ::cuFuncGetAttribute, + ::cuLaunchKernel, + ::cudaFuncSetCacheConfig, + ::cuKernelSetCacheConfig*/ + fn cuFuncSetCacheConfig( + hfunc: cuda_types::CUfunction, + config: cuda_types::CUfunc_cache, + ) -> cuda_types::CUresult; + /** \brief Returns a module handle + + Returns in \p *hmod the handle of the module that function \p hfunc + is located in. The lifetime of the module corresponds to the lifetime of + the context it was loaded in or until the module is explicitly unloaded. + + The CUDA runtime manages its own modules loaded into the primary context. + If the handle returned by this API refers to a module loaded by the CUDA runtime, + calling ::cuModuleUnload() on that module will result in undefined behavior. + + \param hmod - Returned module handle + \param hfunc - Function to retrieve module for + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_NOT_FOUND + \notefnerr +*/ + fn cuFuncGetModule( + hmod: *mut cuda_types::CUmodule, + hfunc: cuda_types::CUfunction, + ) -> cuda_types::CUresult; + /** \brief Returns the function name for a ::CUfunction handle + + Returns in \p **name the function name associated with the function handle \p hfunc . + The function name is returned as a null-terminated string. The returned name is only + valid when the function handle is valid. If the module is unloaded or reloaded, one + must call the API again to get the updated name. This API may return a mangled name if + the function is not declared as having C linkage. If either \p **name or \p hfunc + is NULL, ::CUDA_ERROR_INVALID_VALUE is returned. + + \param name - The returned name of the function + \param hfunc - The function handle to retrieve the name for + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE, + \notefnerr +*/ + fn cuFuncGetName( + name: *mut *const ::core::ffi::c_char, + hfunc: cuda_types::CUfunction, + ) -> cuda_types::CUresult; + /** \brief Returns the offset and size of a kernel parameter in the device-side parameter layout + + Queries the kernel parameter at \p paramIndex into \p func's list of parameters, and returns + in \p paramOffset and \p paramSize the offset and size, respectively, where the parameter + will reside in the device-side parameter layout. This information can be used to update kernel + node parameters from the device via ::cudaGraphKernelNodeSetParam() and + ::cudaGraphKernelNodeUpdatesApply(). \p paramIndex must be less than the number of parameters + that \p func takes. \p paramSize can be set to NULL if only the parameter offset is desired. + + \param func - The function to query + \param paramIndex - The parameter index to query + \param paramOffset - Returns the offset into the device-side parameter layout at which the parameter resides + \param paramSize - Optionally returns the size of the parameter in the device-side parameter layout + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE, + \notefnerr + + \sa ::cuKernelGetParamInfo*/ + fn cuFuncGetParamInfo( + func: cuda_types::CUfunction, + paramIndex: usize, + paramOffset: *mut usize, + paramSize: *mut usize, + ) -> cuda_types::CUresult; + /** \brief Returns if the function is loaded + + Returns in \p state the loading state of \p function. + + \param state - returned loading state + \param function - the function to check + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_HANDLE, + ::CUDA_ERROR_INVALID_VALUE + + \sa ::cuFuncLoad, + ::cuModuleEnumerateFunctions*/ + fn cuFuncIsLoaded( + state: *mut cuda_types::CUfunctionLoadingState, + function: cuda_types::CUfunction, + ) -> cuda_types::CUresult; + /** \brief Loads a function + + Finalizes function loading for \p function. Calling this API with a + fully loaded function has no effect. + + \param function - the function to load + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_HANDLE, + ::CUDA_ERROR_INVALID_VALUE + + \sa ::cuModuleEnumerateFunctions, + ::cuFuncIsLoaded*/ + fn cuFuncLoad(function: cuda_types::CUfunction) -> cuda_types::CUresult; + /** \brief Launches a CUDA function ::CUfunction or a CUDA kernel ::CUkernel + + Invokes the function ::CUfunction or the kernel ::CUkernel \p f + on a \p gridDimX x \p gridDimY x \p gridDimZ grid of blocks. + Each block contains \p blockDimX x \p blockDimY x + \p blockDimZ threads. + + \p sharedMemBytes sets the amount of dynamic shared memory that will be + available to each thread block. + + Kernel parameters to \p f can be specified in one of two ways: + + 1) Kernel parameters can be specified via \p kernelParams. If \p f + has N parameters, then \p kernelParams needs to be an array of N + pointers. Each of \p kernelParams[0] through \p kernelParams[N-1] + must point to a region of memory from which the actual kernel + parameter will be copied. The number of kernel parameters and their + offsets and sizes do not need to be specified as that information is + retrieved directly from the kernel's image. + + 2) Kernel parameters can also be packaged by the application into + a single buffer that is passed in via the \p extra parameter. + This places the burden on the application of knowing each kernel + parameter's size and alignment/padding within the buffer. Here is + an example of using the \p extra parameter in this manner: + \code +size_t argBufferSize; +char argBuffer[256]; + +// populate argBuffer and argBufferSize + +void *config[] = { +CU_LAUNCH_PARAM_BUFFER_POINTER, argBuffer, +CU_LAUNCH_PARAM_BUFFER_SIZE, &argBufferSize, +CU_LAUNCH_PARAM_END +}; +status = cuLaunchKernel(f, gx, gy, gz, bx, by, bz, sh, s, NULL, config); + \endcode + + The \p extra parameter exists to allow ::cuLaunchKernel to take + additional less commonly used arguments. \p extra specifies a list of + names of extra settings and their corresponding values. Each extra + setting name is immediately followed by the corresponding value. The + list must be terminated with either NULL or ::CU_LAUNCH_PARAM_END. + + - ::CU_LAUNCH_PARAM_END, which indicates the end of the \p extra + array; + - ::CU_LAUNCH_PARAM_BUFFER_POINTER, which specifies that the next + value in \p extra will be a pointer to a buffer containing all + the kernel parameters for launching kernel \p f; + - ::CU_LAUNCH_PARAM_BUFFER_SIZE, which specifies that the next + value in \p extra will be a pointer to a size_t containing the + size of the buffer specified with ::CU_LAUNCH_PARAM_BUFFER_POINTER; + + The error ::CUDA_ERROR_INVALID_VALUE will be returned if kernel + parameters are specified with both \p kernelParams and \p extra + (i.e. both \p kernelParams and \p extra are non-NULL). + + Calling ::cuLaunchKernel() invalidates the persistent function state + set through the following deprecated APIs: + ::cuFuncSetBlockShape(), + ::cuFuncSetSharedSize(), + ::cuParamSetSize(), + ::cuParamSeti(), + ::cuParamSetf(), + ::cuParamSetv(). + + Note that to use ::cuLaunchKernel(), the kernel \p f must either have + been compiled with toolchain version 3.2 or later so that it will + contain kernel parameter information, or have no kernel parameters. + If either of these conditions is not met, then ::cuLaunchKernel() will + return ::CUDA_ERROR_INVALID_IMAGE. + + Note that the API can also be used to launch context-less kernel ::CUkernel + by querying the handle using ::cuLibraryGetKernel() and then passing it + to the API by casting to ::CUfunction. Here, the context to launch + the kernel on will either be taken from the specified stream \p hStream + or the current context in case of NULL stream. + + \param f - Function ::CUfunction or Kernel ::CUkernel to launch + \param gridDimX - Width of grid in blocks + \param gridDimY - Height of grid in blocks + \param gridDimZ - Depth of grid in blocks + \param blockDimX - X dimension of each thread block + \param blockDimY - Y dimension of each thread block + \param blockDimZ - Z dimension of each thread block + \param sharedMemBytes - Dynamic shared-memory size per thread block in bytes + \param hStream - Stream identifier + \param kernelParams - Array of pointers to kernel parameters + \param extra - Extra options + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_HANDLE, + ::CUDA_ERROR_INVALID_IMAGE, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_LAUNCH_FAILED, + ::CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES, + ::CUDA_ERROR_LAUNCH_TIMEOUT, + ::CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING, + ::CUDA_ERROR_SHARED_OBJECT_INIT_FAILED, + ::CUDA_ERROR_NOT_FOUND + \note_null_stream + \notefnerr + + \sa ::cuCtxGetCacheConfig, + ::cuCtxSetCacheConfig, + ::cuFuncSetCacheConfig, + ::cuFuncGetAttribute, + ::cudaLaunchKernel, + ::cuLibraryGetKernel, + ::cuKernelSetCacheConfig, + ::cuKernelGetAttribute, + ::cuKernelSetAttribute*/ + fn cuLaunchKernel_ptsz( + f: cuda_types::CUfunction, + gridDimX: ::core::ffi::c_uint, + gridDimY: ::core::ffi::c_uint, + gridDimZ: ::core::ffi::c_uint, + blockDimX: ::core::ffi::c_uint, + blockDimY: ::core::ffi::c_uint, + blockDimZ: ::core::ffi::c_uint, + sharedMemBytes: ::core::ffi::c_uint, + hStream: cuda_types::CUstream, + kernelParams: *mut *mut ::core::ffi::c_void, + extra: *mut *mut ::core::ffi::c_void, + ) -> cuda_types::CUresult; + /** \brief Launches a CUDA function ::CUfunction or a CUDA kernel ::CUkernel with launch-time configuration + + Invokes the function ::CUfunction or the kernel ::CUkernel \p f with the specified launch-time configuration + \p config. + + The ::CUlaunchConfig structure is defined as: + + \code + typedef struct CUlaunchConfig_st { + unsigned int gridDimX; + unsigned int gridDimY; + unsigned int gridDimZ; + unsigned int blockDimX; + unsigned int blockDimY; + unsigned int blockDimZ; + unsigned int sharedMemBytes; + CUstream hStream; + CUlaunchAttribute *attrs; + unsigned int numAttrs; + } CUlaunchConfig; + \endcode + + where: + - ::CUlaunchConfig::gridDimX is the width of the grid in blocks. + - ::CUlaunchConfig::gridDimY is the height of the grid in blocks. + - ::CUlaunchConfig::gridDimZ is the depth of the grid in blocks. + - ::CUlaunchConfig::blockDimX is the X dimension of each thread block. + - ::CUlaunchConfig::blockDimX is the Y dimension of each thread block. + - ::CUlaunchConfig::blockDimZ is the Z dimension of each thread block. + - ::CUlaunchConfig::sharedMemBytes is the dynamic shared-memory size per + thread block in bytes. + - ::CUlaunchConfig::hStream is the handle to the stream to perform the launch + in. The CUDA context associated with this stream must match that associated + with function f. + - ::CUlaunchConfig::attrs is an array of ::CUlaunchConfig::numAttrs + continguous ::CUlaunchAttribute elements. The value of this pointer is not + considered if ::CUlaunchConfig::numAttrs is zero. However, in that case, it + is recommended to set the pointer to NULL. + - ::CUlaunchConfig::numAttrs is the number of attributes populating the + first ::CUlaunchConfig::numAttrs positions of the ::CUlaunchConfig::attrs + array. + + Launch-time configuration is specified by adding entries to + ::CUlaunchConfig::attrs. Each entry is an attribute ID and a corresponding + attribute value. + + The ::CUlaunchAttribute structure is defined as: + \code + typedef struct CUlaunchAttribute_st { + CUlaunchAttributeID id; + CUlaunchAttributeValue value; + } CUlaunchAttribute; + \endcode + where: + - ::CUlaunchAttribute::id is a unique enum identifying the attribute. + - ::CUlaunchAttribute::value is a union that hold the attribute value. + + An example of using the \p config parameter: + \code + CUlaunchAttribute coopAttr = {.id = CU_LAUNCH_ATTRIBUTE_COOPERATIVE, + .value = 1}; + CUlaunchConfig config = {... // set block and grid dimensions + .attrs = &coopAttr, + .numAttrs = 1}; + + cuLaunchKernelEx(&config, kernel, NULL, NULL); + \endcode + + The ::CUlaunchAttributeID enum is defined as: + \code + typedef enum CUlaunchAttributeID_enum { + CU_LAUNCH_ATTRIBUTE_IGNORE = 0, + CU_LAUNCH_ATTRIBUTE_ACCESS_POLICY_WINDOW = 1, + CU_LAUNCH_ATTRIBUTE_COOPERATIVE = 2, + CU_LAUNCH_ATTRIBUTE_SYNCHRONIZATION_POLICY = 3, + CU_LAUNCH_ATTRIBUTE_CLUSTER_DIMENSION = 4, + CU_LAUNCH_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE = 5, + CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_STREAM_SERIALIZATION = 6, + CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_EVENT = 7, + CU_LAUNCH_ATTRIBUTE_PRIORITY = 8, + CU_LAUNCH_ATTRIBUTE_MEM_SYNC_DOMAIN_MAP = 9, + CU_LAUNCH_ATTRIBUTE_MEM_SYNC_DOMAIN = 10, + CU_LAUNCH_ATTRIBUTE_LAUNCH_COMPLETION_EVENT = 12, + CU_LAUNCH_ATTRIBUTE_DEVICE_UPDATABLE_KERNEL_NODE = 13, + } CUlaunchAttributeID; + \endcode + + and the corresponding ::CUlaunchAttributeValue union as : + \code + typedef union CUlaunchAttributeValue_union { + CUaccessPolicyWindow accessPolicyWindow; + int cooperative; + CUsynchronizationPolicy syncPolicy; + struct { + unsigned int x; + unsigned int y; + unsigned int z; + } clusterDim; + CUclusterSchedulingPolicy clusterSchedulingPolicyPreference; + int programmaticStreamSerializationAllowed; + struct { + CUevent event; + int flags; + int triggerAtBlockStart; + } programmaticEvent; + int priority; + CUlaunchMemSyncDomainMap memSyncDomainMap; + CUlaunchMemSyncDomain memSyncDomain; + struct { + CUevent event; + int flags; + } launchCompletionEvent; + struct { + int deviceUpdatable; + CUgraphDeviceNode devNode; + } deviceUpdatableKernelNode; + } CUlaunchAttributeValue; + \endcode + + Setting ::CU_LAUNCH_ATTRIBUTE_COOPERATIVE to a non-zero value causes the + kernel launch to be a cooperative launch, with exactly the same usage and + semantics of ::cuLaunchCooperativeKernel. + + Setting ::CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_STREAM_SERIALIZATION to a non-zero + values causes the kernel to use programmatic means to resolve its stream + dependency -- enabling the CUDA runtime to opportunistically allow the grid's + execution to overlap with the previous kernel in the stream, if that kernel + requests the overlap. + + ::CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_EVENT records an event along with the + kernel launch. Event recorded through this launch attribute is guaranteed to + only trigger after all block in the associated kernel trigger the event. A + block can trigger the event through PTX launchdep.release or CUDA builtin + function cudaTriggerProgrammaticLaunchCompletion(). A trigger can also be + inserted at the beginning of each block's execution if triggerAtBlockStart is + set to non-0. Note that dependents (including the CPU thread calling + cuEventSynchronize()) are not guaranteed to observe the release precisely + when it is released. For example, cuEventSynchronize() may only observe the + event trigger long after the associated kernel has completed. This recording + type is primarily meant for establishing programmatic dependency between + device tasks. The event supplied must not be an interprocess or interop + event. The event must disable timing (i.e. created with + ::CU_EVENT_DISABLE_TIMING flag set). + + ::CU_LAUNCH_ATTRIBUTE_LAUNCH_COMPLETION_EVENT records an event along with + the kernel launch. Nominally, the event is triggered once all blocks of the + kernel have begun execution. Currently this is a best effort. If a kernel B + has a launch completion dependency on a kernel A, B may wait until A is + complete. Alternatively, blocks of B may begin before all blocks of A have + begun, for example: + + - If B can claim execution resources unavaiable to A, for example if they + run on different GPUs. + - If B is a higher priority than A. + + Exercise caution if such an ordering inversion could lead to deadlock. The + event supplied must not be an interprocess or interop event. The event must + disable timing (i.e. must be created with the ::CU_EVENT_DISABLE_TIMING flag + set). + + Setting ::CU_LAUNCH_ATTRIBUTE_DEVICE_UPDATABLE_KERNEL_NODE to 1 + on a captured launch causes the resulting kernel node to be device-updatable. + This attribute is specific to graphs, and passing it to a launch in a + non-capturing stream results in an error. Passing a value other than 0 or 1 is + not allowed. + + On success, a handle will be returned via + ::CUlaunchAttributeValue::deviceUpdatableKernelNode::devNode which can be passed + to the various device-side update functions to update the node's kernel parameters + from within another kernel. For more information on the types of device updates + that can be made, as well as the relevant limitations thereof, see + ::cudaGraphKernelNodeUpdatesApply. + + Kernel nodes which are device-updatable have additional restrictions compared to regular + kernel nodes. Firstly, device-updatable nodes cannot be removed from their graph via + ::cuGraphDestroyNode. Additionally, once opted-in to this functionality, a node cannot + opt out, and any attempt to set the attribute to 0 will result in an error. Graphs + containing one or more device-updatable node also do not allow multiple instantiation. + + + The effect of other attributes is consistent with their effect when set via + persistent APIs. + + See ::cuStreamSetAttribute for + - ::CU_LAUNCH_ATTRIBUTE_ACCESS_POLICY_WINDOW + - ::CU_LAUNCH_ATTRIBUTE_SYNCHRONIZATION_POLICY + + See ::cuFuncSetAttribute for + - ::CU_LAUNCH_ATTRIBUTE_CLUSTER_DIMENSION + - ::CU_LAUNCH_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE + + Kernel parameters to \p f can be specified in the same ways that they can be + using ::cuLaunchKernel. + + Note that the API can also be used to launch context-less kernel ::CUkernel + by querying the handle using ::cuLibraryGetKernel() and then passing it + to the API by casting to ::CUfunction. Here, the context to launch + the kernel on will either be taken from the specified stream ::CUlaunchConfig::hStream + or the current context in case of NULL stream. + + \param config - Config to launch + \param f - Function ::CUfunction or Kernel ::CUkernel to launch + \param kernelParams - Array of pointers to kernel parameters + \param extra - Extra options + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_HANDLE, + ::CUDA_ERROR_INVALID_IMAGE, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_LAUNCH_FAILED, + ::CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES, + ::CUDA_ERROR_LAUNCH_TIMEOUT, + ::CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING, + ::CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE, + ::CUDA_ERROR_SHARED_OBJECT_INIT_FAILED, + ::CUDA_ERROR_NOT_FOUND + \note_null_stream + \notefnerr + + \sa ::cuCtxGetCacheConfig, + ::cuCtxSetCacheConfig, + ::cuFuncSetCacheConfig, + ::cuFuncGetAttribute, + ::cudaLaunchKernel, + ::cudaLaunchKernelEx, + ::cuLibraryGetKernel, + ::cuKernelSetCacheConfig, + ::cuKernelGetAttribute, + ::cuKernelSetAttribute*/ + fn cuLaunchKernelEx_ptsz( + config: *const cuda_types::CUlaunchConfig, + f: cuda_types::CUfunction, + kernelParams: *mut *mut ::core::ffi::c_void, + extra: *mut *mut ::core::ffi::c_void, + ) -> cuda_types::CUresult; + /** \brief Launches a CUDA function ::CUfunction or a CUDA kernel ::CUkernel where thread blocks + can cooperate and synchronize as they execute + + Invokes the function ::CUfunction or the kernel ::CUkernel \p f on a \p gridDimX x \p gridDimY x \p gridDimZ + grid of blocks. Each block contains \p blockDimX x \p blockDimY x + \p blockDimZ threads. + + Note that the API can also be used to launch context-less kernel ::CUkernel + by querying the handle using ::cuLibraryGetKernel() and then passing it + to the API by casting to ::CUfunction. Here, the context to launch + the kernel on will either be taken from the specified stream \p hStream + or the current context in case of NULL stream. + + \p sharedMemBytes sets the amount of dynamic shared memory that will be + available to each thread block. + + The device on which this kernel is invoked must have a non-zero value for + the device attribute ::CU_DEVICE_ATTRIBUTE_COOPERATIVE_LAUNCH. + + The total number of blocks launched cannot exceed the maximum number of blocks per + multiprocessor as returned by ::cuOccupancyMaxActiveBlocksPerMultiprocessor (or + ::cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags) times the number of multiprocessors + as specified by the device attribute ::CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT. + + The kernel cannot make use of CUDA dynamic parallelism. + + Kernel parameters must be specified via \p kernelParams. If \p f + has N parameters, then \p kernelParams needs to be an array of N + pointers. Each of \p kernelParams[0] through \p kernelParams[N-1] + must point to a region of memory from which the actual kernel + parameter will be copied. The number of kernel parameters and their + offsets and sizes do not need to be specified as that information is + retrieved directly from the kernel's image. + + Calling ::cuLaunchCooperativeKernel() sets persistent function state that is + the same as function state set through ::cuLaunchKernel API + + When the kernel \p f is launched via ::cuLaunchCooperativeKernel(), the previous + block shape, shared size and parameter info associated with \p f + is overwritten. + + Note that to use ::cuLaunchCooperativeKernel(), the kernel \p f must either have + been compiled with toolchain version 3.2 or later so that it will + contain kernel parameter information, or have no kernel parameters. + If either of these conditions is not met, then ::cuLaunchCooperativeKernel() will + return ::CUDA_ERROR_INVALID_IMAGE. + + Note that the API can also be used to launch context-less kernel ::CUkernel + by querying the handle using ::cuLibraryGetKernel() and then passing it + to the API by casting to ::CUfunction. Here, the context to launch + the kernel on will either be taken from the specified stream \p hStream + or the current context in case of NULL stream. + + \param f - Function ::CUfunction or Kernel ::CUkernel to launch + \param gridDimX - Width of grid in blocks + \param gridDimY - Height of grid in blocks + \param gridDimZ - Depth of grid in blocks + \param blockDimX - X dimension of each thread block + \param blockDimY - Y dimension of each thread block + \param blockDimZ - Z dimension of each thread block + \param sharedMemBytes - Dynamic shared-memory size per thread block in bytes + \param hStream - Stream identifier + \param kernelParams - Array of pointers to kernel parameters + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_HANDLE, + ::CUDA_ERROR_INVALID_IMAGE, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_LAUNCH_FAILED, + ::CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES, + ::CUDA_ERROR_LAUNCH_TIMEOUT, + ::CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING, + ::CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE, + ::CUDA_ERROR_SHARED_OBJECT_INIT_FAILED, + ::CUDA_ERROR_NOT_FOUND + \note_null_stream + \notefnerr + + \sa ::cuCtxGetCacheConfig, + ::cuCtxSetCacheConfig, + ::cuFuncSetCacheConfig, + ::cuFuncGetAttribute, + ::cuLaunchCooperativeKernelMultiDevice, + ::cudaLaunchCooperativeKernel, + ::cuLibraryGetKernel, + ::cuKernelSetCacheConfig, + ::cuKernelGetAttribute, + ::cuKernelSetAttribute*/ + fn cuLaunchCooperativeKernel_ptsz( + f: cuda_types::CUfunction, + gridDimX: ::core::ffi::c_uint, + gridDimY: ::core::ffi::c_uint, + gridDimZ: ::core::ffi::c_uint, + blockDimX: ::core::ffi::c_uint, + blockDimY: ::core::ffi::c_uint, + blockDimZ: ::core::ffi::c_uint, + sharedMemBytes: ::core::ffi::c_uint, + hStream: cuda_types::CUstream, + kernelParams: *mut *mut ::core::ffi::c_void, + ) -> cuda_types::CUresult; + /** \brief Launches CUDA functions on multiple devices where thread blocks can cooperate and synchronize as they execute + + \deprecated This function is deprecated as of CUDA 11.3. + + Invokes kernels as specified in the \p launchParamsList array where each element + of the array specifies all the parameters required to perform a single kernel launch. + These kernels can cooperate and synchronize as they execute. The size of the array is + specified by \p numDevices. + + No two kernels can be launched on the same device. All the devices targeted by this + multi-device launch must be identical. All devices must have a non-zero value for the + device attribute ::CU_DEVICE_ATTRIBUTE_COOPERATIVE_MULTI_DEVICE_LAUNCH. + + All kernels launched must be identical with respect to the compiled code. Note that + any __device__, __constant__ or __managed__ variables present in the module that owns + the kernel launched on each device, are independently instantiated on every device. + It is the application's responsibility to ensure these variables are initialized and + used appropriately. + + The size of the grids as specified in blocks, the size of the blocks themselves + and the amount of shared memory used by each thread block must also match across + all launched kernels. + + The streams used to launch these kernels must have been created via either ::cuStreamCreate + or ::cuStreamCreateWithPriority. The NULL stream or ::CU_STREAM_LEGACY or ::CU_STREAM_PER_THREAD + cannot be used. + + The total number of blocks launched per kernel cannot exceed the maximum number of blocks + per multiprocessor as returned by ::cuOccupancyMaxActiveBlocksPerMultiprocessor (or + ::cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags) times the number of multiprocessors + as specified by the device attribute ::CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT. Since the + total number of blocks launched per device has to match across all devices, the maximum + number of blocks that can be launched per device will be limited by the device with the + least number of multiprocessors. + + The kernels cannot make use of CUDA dynamic parallelism. + + The ::CUDA_LAUNCH_PARAMS structure is defined as: + \code +typedef struct CUDA_LAUNCH_PARAMS_st +{ +CUfunction function; +unsigned int gridDimX; +unsigned int gridDimY; +unsigned int gridDimZ; +unsigned int blockDimX; +unsigned int blockDimY; +unsigned int blockDimZ; +unsigned int sharedMemBytes; +CUstream hStream; +void **kernelParams; +} CUDA_LAUNCH_PARAMS; + \endcode + where: + - ::CUDA_LAUNCH_PARAMS::function specifies the kernel to be launched. All functions must + be identical with respect to the compiled code. + Note that you can also specify context-less kernel ::CUkernel by querying the handle + using ::cuLibraryGetKernel() and then casting to ::CUfunction. In this case, the context to + launch the kernel on be taken from the specified stream ::CUDA_LAUNCH_PARAMS::hStream. + - ::CUDA_LAUNCH_PARAMS::gridDimX is the width of the grid in blocks. This must match across + all kernels launched. + - ::CUDA_LAUNCH_PARAMS::gridDimY is the height of the grid in blocks. This must match across + all kernels launched. + - ::CUDA_LAUNCH_PARAMS::gridDimZ is the depth of the grid in blocks. This must match across + all kernels launched. + - ::CUDA_LAUNCH_PARAMS::blockDimX is the X dimension of each thread block. This must match across + all kernels launched. + - ::CUDA_LAUNCH_PARAMS::blockDimX is the Y dimension of each thread block. This must match across + all kernels launched. + - ::CUDA_LAUNCH_PARAMS::blockDimZ is the Z dimension of each thread block. This must match across + all kernels launched. + - ::CUDA_LAUNCH_PARAMS::sharedMemBytes is the dynamic shared-memory size per thread block in bytes. + This must match across all kernels launched. + - ::CUDA_LAUNCH_PARAMS::hStream is the handle to the stream to perform the launch in. This cannot + be the NULL stream or ::CU_STREAM_LEGACY or ::CU_STREAM_PER_THREAD. The CUDA context associated + with this stream must match that associated with ::CUDA_LAUNCH_PARAMS::function. + - ::CUDA_LAUNCH_PARAMS::kernelParams is an array of pointers to kernel parameters. If + ::CUDA_LAUNCH_PARAMS::function has N parameters, then ::CUDA_LAUNCH_PARAMS::kernelParams + needs to be an array of N pointers. Each of ::CUDA_LAUNCH_PARAMS::kernelParams[0] through + ::CUDA_LAUNCH_PARAMS::kernelParams[N-1] must point to a region of memory from which the actual + kernel parameter will be copied. The number of kernel parameters and their offsets and sizes + do not need to be specified as that information is retrieved directly from the kernel's image. + + By default, the kernel won't begin execution on any GPU until all prior work in all the specified + streams has completed. This behavior can be overridden by specifying the flag + ::CUDA_COOPERATIVE_LAUNCH_MULTI_DEVICE_NO_PRE_LAUNCH_SYNC. When this flag is specified, each kernel + will only wait for prior work in the stream corresponding to that GPU to complete before it begins + execution. + + Similarly, by default, any subsequent work pushed in any of the specified streams will not begin + execution until the kernels on all GPUs have completed. This behavior can be overridden by specifying + the flag ::CUDA_COOPERATIVE_LAUNCH_MULTI_DEVICE_NO_POST_LAUNCH_SYNC. When this flag is specified, + any subsequent work pushed in any of the specified streams will only wait for the kernel launched + on the GPU corresponding to that stream to complete before it begins execution. + + Calling ::cuLaunchCooperativeKernelMultiDevice() sets persistent function state that is + the same as function state set through ::cuLaunchKernel API when called individually for each + element in \p launchParamsList. + + When kernels are launched via ::cuLaunchCooperativeKernelMultiDevice(), the previous + block shape, shared size and parameter info associated with each ::CUDA_LAUNCH_PARAMS::function + in \p launchParamsList is overwritten. + + Note that to use ::cuLaunchCooperativeKernelMultiDevice(), the kernels must either have + been compiled with toolchain version 3.2 or later so that it will + contain kernel parameter information, or have no kernel parameters. + If either of these conditions is not met, then ::cuLaunchCooperativeKernelMultiDevice() will + return ::CUDA_ERROR_INVALID_IMAGE. + + \param launchParamsList - List of launch parameters, one per device + \param numDevices - Size of the \p launchParamsList array + \param flags - Flags to control launch behavior + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_HANDLE, + ::CUDA_ERROR_INVALID_IMAGE, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_LAUNCH_FAILED, + ::CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES, + ::CUDA_ERROR_LAUNCH_TIMEOUT, + ::CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING, + ::CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE, + ::CUDA_ERROR_SHARED_OBJECT_INIT_FAILED + \note_null_stream + \notefnerr + + \sa ::cuCtxGetCacheConfig, + ::cuCtxSetCacheConfig, + ::cuFuncSetCacheConfig, + ::cuFuncGetAttribute, + ::cuLaunchCooperativeKernel, + ::cudaLaunchCooperativeKernelMultiDevice*/ + fn cuLaunchCooperativeKernelMultiDevice( + launchParamsList: *mut cuda_types::CUDA_LAUNCH_PARAMS, + numDevices: ::core::ffi::c_uint, + flags: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + /** \brief Enqueues a host function call in a stream + + Enqueues a host function to run in a stream. The function will be called + after currently enqueued work and will block work added after it. + + The host function must not make any CUDA API calls. Attempting to use a + CUDA API may result in ::CUDA_ERROR_NOT_PERMITTED, but this is not required. + The host function must not perform any synchronization that may depend on + outstanding CUDA work not mandated to run earlier. Host functions without a + mandated order (such as in independent streams) execute in undefined order + and may be serialized. + + For the purposes of Unified Memory, execution makes a number of guarantees: + <ul> + <li>The stream is considered idle for the duration of the function's + execution. Thus, for example, the function may always use memory attached + to the stream it was enqueued in.</li> + <li>The start of execution of the function has the same effect as + synchronizing an event recorded in the same stream immediately prior to + the function. It thus synchronizes streams which have been "joined" + prior to the function.</li> + <li>Adding device work to any stream does not have the effect of making + the stream active until all preceding host functions and stream callbacks + have executed. Thus, for + example, a function might use global attached memory even if work has + been added to another stream, if the work has been ordered behind the + function call with an event.</li> + <li>Completion of the function does not cause a stream to become + active except as described above. The stream will remain idle + if no device work follows the function, and will remain idle across + consecutive host functions or stream callbacks without device work in + between. Thus, for example, + stream synchronization can be done by signaling from a host function at the + end of the stream.</li> + </ul> + + Note that, in contrast to ::cuStreamAddCallback, the function will not be + called in the event of an error in the CUDA context. + + \param hStream - Stream to enqueue function call in + \param fn - The function to call once preceding stream operations are complete + \param userData - User-specified data to be passed to the function + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_HANDLE, + ::CUDA_ERROR_NOT_SUPPORTED + \note_null_stream + \notefnerr + + \sa ::cuStreamCreate, + ::cuStreamQuery, + ::cuStreamSynchronize, + ::cuStreamWaitEvent, + ::cuStreamDestroy, + ::cuMemAllocManaged, + ::cuStreamAttachMemAsync, + ::cuStreamAddCallback*/ + fn cuLaunchHostFunc_ptsz( + hStream: cuda_types::CUstream, + fn_: cuda_types::CUhostFn, + userData: *mut ::core::ffi::c_void, + ) -> cuda_types::CUresult; + /** \brief Sets the block-dimensions for the function + + \deprecated + + Specifies the \p x, \p y, and \p z dimensions of the thread blocks that are + created when the kernel given by \p hfunc is launched. + + \param hfunc - Kernel to specify dimensions of + \param x - X dimension + \param y - Y dimension + \param z - Z dimension + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_HANDLE, + ::CUDA_ERROR_INVALID_VALUE + \notefnerr + + \sa ::cuFuncSetSharedSize, + ::cuFuncSetCacheConfig, + ::cuFuncGetAttribute, + ::cuParamSetSize, + ::cuParamSeti, + ::cuParamSetf, + ::cuParamSetv, + ::cuLaunch, + ::cuLaunchGrid, + ::cuLaunchGridAsync, + ::cuLaunchKernel*/ + fn cuFuncSetBlockShape( + hfunc: cuda_types::CUfunction, + x: ::core::ffi::c_int, + y: ::core::ffi::c_int, + z: ::core::ffi::c_int, + ) -> cuda_types::CUresult; + /** \brief Sets the dynamic shared-memory size for the function + + \deprecated + + Sets through \p bytes the amount of dynamic shared memory that will be + available to each thread block when the kernel given by \p hfunc is launched. + + \param hfunc - Kernel to specify dynamic shared-memory size for + \param bytes - Dynamic shared-memory size per thread in bytes + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_HANDLE, + ::CUDA_ERROR_INVALID_VALUE + \notefnerr + + \sa ::cuFuncSetBlockShape, + ::cuFuncSetCacheConfig, + ::cuFuncGetAttribute, + ::cuParamSetSize, + ::cuParamSeti, + ::cuParamSetf, + ::cuParamSetv, + ::cuLaunch, + ::cuLaunchGrid, + ::cuLaunchGridAsync, + ::cuLaunchKernel*/ + fn cuFuncSetSharedSize( + hfunc: cuda_types::CUfunction, + bytes: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + /** \brief Sets the parameter size for the function + + \deprecated + + Sets through \p numbytes the total size in bytes needed by the function + parameters of the kernel corresponding to \p hfunc. + + \param hfunc - Kernel to set parameter size for + \param numbytes - Size of parameter list in bytes + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE + \notefnerr + + \sa ::cuFuncSetBlockShape, + ::cuFuncSetSharedSize, + ::cuFuncGetAttribute, + ::cuParamSetf, + ::cuParamSeti, + ::cuParamSetv, + ::cuLaunch, + ::cuLaunchGrid, + ::cuLaunchGridAsync, + ::cuLaunchKernel*/ + fn cuParamSetSize( + hfunc: cuda_types::CUfunction, + numbytes: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + /** \brief Adds an integer parameter to the function's argument list + + \deprecated + + Sets an integer parameter that will be specified the next time the + kernel corresponding to \p hfunc will be invoked. \p offset is a byte offset. + + \param hfunc - Kernel to add parameter to + \param offset - Offset to add parameter to argument list + \param value - Value of parameter + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE + \notefnerr + + \sa ::cuFuncSetBlockShape, + ::cuFuncSetSharedSize, + ::cuFuncGetAttribute, + ::cuParamSetSize, + ::cuParamSetf, + ::cuParamSetv, + ::cuLaunch, + ::cuLaunchGrid, + ::cuLaunchGridAsync, + ::cuLaunchKernel*/ + fn cuParamSeti( + hfunc: cuda_types::CUfunction, + offset: ::core::ffi::c_int, + value: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + /** \brief Adds a floating-point parameter to the function's argument list + + \deprecated + + Sets a floating-point parameter that will be specified the next time the + kernel corresponding to \p hfunc will be invoked. \p offset is a byte offset. + + \param hfunc - Kernel to add parameter to + \param offset - Offset to add parameter to argument list + \param value - Value of parameter + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE + \notefnerr + + \sa ::cuFuncSetBlockShape, + ::cuFuncSetSharedSize, + ::cuFuncGetAttribute, + ::cuParamSetSize, + ::cuParamSeti, + ::cuParamSetv, + ::cuLaunch, + ::cuLaunchGrid, + ::cuLaunchGridAsync, + ::cuLaunchKernel*/ + fn cuParamSetf( + hfunc: cuda_types::CUfunction, + offset: ::core::ffi::c_int, + value: f32, + ) -> cuda_types::CUresult; + /** \brief Adds arbitrary data to the function's argument list + + \deprecated + + Copies an arbitrary amount of data (specified in \p numbytes) from \p ptr + into the parameter space of the kernel corresponding to \p hfunc. \p offset + is a byte offset. + + \param hfunc - Kernel to add data to + \param offset - Offset to add data to argument list + \param ptr - Pointer to arbitrary data + \param numbytes - Size of data to copy in bytes + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE + \notefnerr + + \sa ::cuFuncSetBlockShape, + ::cuFuncSetSharedSize, + ::cuFuncGetAttribute, + ::cuParamSetSize, + ::cuParamSetf, + ::cuParamSeti, + ::cuLaunch, + ::cuLaunchGrid, + ::cuLaunchGridAsync, + ::cuLaunchKernel*/ + fn cuParamSetv( + hfunc: cuda_types::CUfunction, + offset: ::core::ffi::c_int, + ptr: *mut ::core::ffi::c_void, + numbytes: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + /** \brief Launches a CUDA function + + \deprecated + + Invokes the kernel \p f on a 1 x 1 x 1 grid of blocks. The block + contains the number of threads specified by a previous call to + ::cuFuncSetBlockShape(). + + The block shape, dynamic shared memory size, and parameter information + must be set using + ::cuFuncSetBlockShape(), + ::cuFuncSetSharedSize(), + ::cuParamSetSize(), + ::cuParamSeti(), + ::cuParamSetf(), and + ::cuParamSetv() + prior to calling this function. + + Launching a function via ::cuLaunchKernel() invalidates the function's + block shape, dynamic shared memory size, and parameter information. After + launching via cuLaunchKernel, this state must be re-initialized prior to + calling this function. Failure to do so results in undefined behavior. + + \param f - Kernel to launch + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_LAUNCH_FAILED, + ::CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES, + ::CUDA_ERROR_LAUNCH_TIMEOUT, + ::CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING, + ::CUDA_ERROR_SHARED_OBJECT_INIT_FAILED + \notefnerr + + \sa ::cuFuncSetBlockShape, + ::cuFuncSetSharedSize, + ::cuFuncGetAttribute, + ::cuParamSetSize, + ::cuParamSetf, + ::cuParamSeti, + ::cuParamSetv, + ::cuLaunchGrid, + ::cuLaunchGridAsync, + ::cuLaunchKernel*/ + fn cuLaunch(f: cuda_types::CUfunction) -> cuda_types::CUresult; + /** \brief Launches a CUDA function + + \deprecated + + Invokes the kernel \p f on a \p grid_width x \p grid_height grid of + blocks. Each block contains the number of threads specified by a previous + call to ::cuFuncSetBlockShape(). + + The block shape, dynamic shared memory size, and parameter information + must be set using + ::cuFuncSetBlockShape(), + ::cuFuncSetSharedSize(), + ::cuParamSetSize(), + ::cuParamSeti(), + ::cuParamSetf(), and + ::cuParamSetv() + prior to calling this function. + + Launching a function via ::cuLaunchKernel() invalidates the function's + block shape, dynamic shared memory size, and parameter information. After + launching via cuLaunchKernel, this state must be re-initialized prior to + calling this function. Failure to do so results in undefined behavior. + + \param f - Kernel to launch + \param grid_width - Width of grid in blocks + \param grid_height - Height of grid in blocks + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_LAUNCH_FAILED, + ::CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES, + ::CUDA_ERROR_LAUNCH_TIMEOUT, + ::CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING, + ::CUDA_ERROR_SHARED_OBJECT_INIT_FAILED + \notefnerr + + \sa ::cuFuncSetBlockShape, + ::cuFuncSetSharedSize, + ::cuFuncGetAttribute, + ::cuParamSetSize, + ::cuParamSetf, + ::cuParamSeti, + ::cuParamSetv, + ::cuLaunch, + ::cuLaunchGridAsync, + ::cuLaunchKernel*/ + fn cuLaunchGrid( + f: cuda_types::CUfunction, + grid_width: ::core::ffi::c_int, + grid_height: ::core::ffi::c_int, + ) -> cuda_types::CUresult; + /** \brief Launches a CUDA function + + \deprecated + + Invokes the kernel \p f on a \p grid_width x \p grid_height grid of + blocks. Each block contains the number of threads specified by a previous + call to ::cuFuncSetBlockShape(). + + The block shape, dynamic shared memory size, and parameter information + must be set using + ::cuFuncSetBlockShape(), + ::cuFuncSetSharedSize(), + ::cuParamSetSize(), + ::cuParamSeti(), + ::cuParamSetf(), and + ::cuParamSetv() + prior to calling this function. + + Launching a function via ::cuLaunchKernel() invalidates the function's + block shape, dynamic shared memory size, and parameter information. After + launching via cuLaunchKernel, this state must be re-initialized prior to + calling this function. Failure to do so results in undefined behavior. + + \param f - Kernel to launch + \param grid_width - Width of grid in blocks + \param grid_height - Height of grid in blocks + \param hStream - Stream identifier + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_HANDLE, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_LAUNCH_FAILED, + ::CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES, + ::CUDA_ERROR_LAUNCH_TIMEOUT, + ::CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING, + ::CUDA_ERROR_SHARED_OBJECT_INIT_FAILED + + \note In certain cases where cubins are created with no ABI (i.e., using \p ptxas \p --abi-compile \p no), + this function may serialize kernel launches. The CUDA driver retains asynchronous behavior by + growing the per-thread stack as needed per launch and not shrinking it afterwards. + + \note_null_stream + \notefnerr + + \sa ::cuFuncSetBlockShape, + ::cuFuncSetSharedSize, + ::cuFuncGetAttribute, + ::cuParamSetSize, + ::cuParamSetf, + ::cuParamSeti, + ::cuParamSetv, + ::cuLaunch, + ::cuLaunchGrid, + ::cuLaunchKernel*/ + fn cuLaunchGridAsync( + f: cuda_types::CUfunction, + grid_width: ::core::ffi::c_int, + grid_height: ::core::ffi::c_int, + hStream: cuda_types::CUstream, + ) -> cuda_types::CUresult; + /** \brief Adds a texture-reference to the function's argument list + + \deprecated + + Makes the CUDA array or linear memory bound to the texture reference + \p hTexRef available to a device program as a texture. In this version of + CUDA, the texture-reference must be obtained via ::cuModuleGetTexRef() and + the \p texunit parameter must be set to ::CU_PARAM_TR_DEFAULT. + + \param hfunc - Kernel to add texture-reference to + \param texunit - Texture unit (must be ::CU_PARAM_TR_DEFAULT) + \param hTexRef - Texture-reference to add to argument list + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE + \notefnerr*/ + fn cuParamSetTexRef( + hfunc: cuda_types::CUfunction, + texunit: ::core::ffi::c_int, + hTexRef: cuda_types::CUtexref, + ) -> cuda_types::CUresult; + /** \brief Sets the shared memory configuration for a device function. + + \deprecated + + On devices with configurable shared memory banks, this function will + force all subsequent launches of the specified device function to have + the given shared memory bank size configuration. On any given launch of the + function, the shared memory configuration of the device will be temporarily + changed if needed to suit the function's preferred configuration. Changes in + shared memory configuration between subsequent launches of functions, + may introduce a device side synchronization point. + + Any per-function setting of shared memory bank size set via + ::cuFuncSetSharedMemConfig will override the context wide setting set with + ::cuCtxSetSharedMemConfig. + + Changing the shared memory bank size will not increase shared memory usage + or affect occupancy of kernels, but may have major effects on performance. + Larger bank sizes will allow for greater potential bandwidth to shared memory, + but will change what kinds of accesses to shared memory will result in bank + conflicts. + + This function will do nothing on devices with fixed shared memory bank size. + + The supported bank configurations are: + - ::CU_SHARED_MEM_CONFIG_DEFAULT_BANK_SIZE: use the context's shared memory + configuration when launching this function. + - ::CU_SHARED_MEM_CONFIG_FOUR_BYTE_BANK_SIZE: set shared memory bank width to + be natively four bytes when launching this function. + - ::CU_SHARED_MEM_CONFIG_EIGHT_BYTE_BANK_SIZE: set shared memory bank width to + be natively eight bytes when launching this function. + + \param hfunc - kernel to be given a shared memory config + \param config - requested shared memory configuration + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT + \notefnerr + + \sa ::cuCtxGetCacheConfig, + ::cuCtxSetCacheConfig, + ::cuCtxGetSharedMemConfig, + ::cuCtxSetSharedMemConfig, + ::cuFuncGetAttribute, + ::cuLaunchKernel, + ::cudaFuncSetSharedMemConfig*/ + fn cuFuncSetSharedMemConfig( + hfunc: cuda_types::CUfunction, + config: cuda_types::CUsharedconfig, + ) -> cuda_types::CUresult; + /** \brief Creates a graph + + Creates an empty graph, which is returned via \p phGraph. + + \param phGraph - Returns newly created graph + \param flags - Graph creation flags, must be 0 + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_OUT_OF_MEMORY + \note_graph_thread_safety + \notefnerr + + \sa + ::cuGraphAddChildGraphNode, + ::cuGraphAddEmptyNode, + ::cuGraphAddKernelNode, + ::cuGraphAddHostNode, + ::cuGraphAddMemcpyNode, + ::cuGraphAddMemsetNode, + ::cuGraphInstantiate, + ::cuGraphDestroy, + ::cuGraphGetNodes, + ::cuGraphGetRootNodes, + ::cuGraphGetEdges, + ::cuGraphClone*/ + fn cuGraphCreate( + phGraph: *mut cuda_types::CUgraph, + flags: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + /** \brief Creates a kernel execution node and adds it to a graph + + Creates a new kernel execution node and adds it to \p hGraph with \p numDependencies + dependencies specified via \p dependencies and arguments specified in \p nodeParams. + It is possible for \p numDependencies to be 0, in which case the node will be placed + at the root of the graph. \p dependencies may not have any duplicate entries. + A handle to the new node will be returned in \p phGraphNode. + + The CUDA_KERNEL_NODE_PARAMS structure is defined as: + + \code + typedef struct CUDA_KERNEL_NODE_PARAMS_st { + CUfunction func; + unsigned int gridDimX; + unsigned int gridDimY; + unsigned int gridDimZ; + unsigned int blockDimX; + unsigned int blockDimY; + unsigned int blockDimZ; + unsigned int sharedMemBytes; + void **kernelParams; + void **extra; + CUkernel kern; + CUcontext ctx; + } CUDA_KERNEL_NODE_PARAMS; + \endcode + + When the graph is launched, the node will invoke kernel \p func on a (\p gridDimX x + \p gridDimY x \p gridDimZ) grid of blocks. Each block contains + (\p blockDimX x \p blockDimY x \p blockDimZ) threads. + + \p sharedMemBytes sets the amount of dynamic shared memory that will be + available to each thread block. + + Kernel parameters to \p func can be specified in one of two ways: + + 1) Kernel parameters can be specified via \p kernelParams. If the kernel has N + parameters, then \p kernelParams needs to be an array of N pointers. Each pointer, + from \p kernelParams[0] to \p kernelParams[N-1], points to the region of memory from which the actual + parameter will be copied. The number of kernel parameters and their offsets and sizes do not need + to be specified as that information is retrieved directly from the kernel's image. + + 2) Kernel parameters for non-cooperative kernels can also be packaged by the application into a single + buffer that is passed in via \p extra. This places the burden on the application of knowing each + kernel parameter's size and alignment/padding within the buffer. The \p extra parameter exists + to allow this function to take additional less commonly used arguments. \p extra specifies + a list of names of extra settings and their corresponding values. Each extra setting name is + immediately followed by the corresponding value. The list must be terminated with either NULL or + CU_LAUNCH_PARAM_END. + + - ::CU_LAUNCH_PARAM_END, which indicates the end of the \p extra + array; + - ::CU_LAUNCH_PARAM_BUFFER_POINTER, which specifies that the next + value in \p extra will be a pointer to a buffer + containing all the kernel parameters for launching kernel + \p func; + - ::CU_LAUNCH_PARAM_BUFFER_SIZE, which specifies that the next + value in \p extra will be a pointer to a size_t + containing the size of the buffer specified with + ::CU_LAUNCH_PARAM_BUFFER_POINTER; + + The error ::CUDA_ERROR_INVALID_VALUE will be returned if kernel parameters are specified with both + \p kernelParams and \p extra (i.e. both \p kernelParams and \p extra are non-NULL). + ::CUDA_ERROR_INVALID_VALUE will be returned if \p extra is used for a cooperative kernel. + + The \p kernelParams or \p extra array, as well as the argument values it points to, + are copied during this call. + + \note Kernels launched using graphs must not use texture and surface references. Reading or + writing through any texture or surface reference is undefined behavior. + This restriction does not apply to texture and surface objects. + + \param phGraphNode - Returns newly created node + \param hGraph - Graph to which to add the node + \param dependencies - Dependencies of the node + \param numDependencies - Number of dependencies + \param nodeParams - Parameters for the GPU execution node + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_VALUE + \note_graph_thread_safety + \notefnerr + + \sa + ::cuGraphAddNode, + ::cuLaunchKernel, + ::cuLaunchCooperativeKernel, + ::cuGraphKernelNodeGetParams, + ::cuGraphKernelNodeSetParams, + ::cuGraphCreate, + ::cuGraphDestroyNode, + ::cuGraphAddChildGraphNode, + ::cuGraphAddEmptyNode, + ::cuGraphAddHostNode, + ::cuGraphAddMemcpyNode, + ::cuGraphAddMemsetNode*/ + fn cuGraphAddKernelNode_v2( + phGraphNode: *mut cuda_types::CUgraphNode, + hGraph: cuda_types::CUgraph, + dependencies: *const cuda_types::CUgraphNode, + numDependencies: usize, + nodeParams: *const cuda_types::CUDA_KERNEL_NODE_PARAMS, + ) -> cuda_types::CUresult; + /** \brief Returns a kernel node's parameters + + Returns the parameters of kernel node \p hNode in \p nodeParams. + The \p kernelParams or \p extra array returned in \p nodeParams, + as well as the argument values it points to, are owned by the node. + This memory remains valid until the node is destroyed or its + parameters are modified, and should not be modified + directly. Use ::cuGraphKernelNodeSetParams to update the + parameters of this node. + + The params will contain either \p kernelParams or \p extra, + according to which of these was most recently set on the node. + + \param hNode - Node to get the parameters for + \param nodeParams - Pointer to return the parameters + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_VALUE + \note_graph_thread_safety + \notefnerr + + \sa + ::cuLaunchKernel, + ::cuGraphAddKernelNode, + ::cuGraphKernelNodeSetParams*/ + fn cuGraphKernelNodeGetParams_v2( + hNode: cuda_types::CUgraphNode, + nodeParams: *mut cuda_types::CUDA_KERNEL_NODE_PARAMS, + ) -> cuda_types::CUresult; + /** \brief Sets a kernel node's parameters + + Sets the parameters of kernel node \p hNode to \p nodeParams. + + \param hNode - Node to set the parameters for + \param nodeParams - Parameters to copy + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_HANDLE, + ::CUDA_ERROR_OUT_OF_MEMORY + \note_graph_thread_safety + \notefnerr + + \sa + ::cuGraphNodeSetParams, + ::cuLaunchKernel, + ::cuGraphAddKernelNode, + ::cuGraphKernelNodeGetParams*/ + fn cuGraphKernelNodeSetParams_v2( + hNode: cuda_types::CUgraphNode, + nodeParams: *const cuda_types::CUDA_KERNEL_NODE_PARAMS, + ) -> cuda_types::CUresult; + /** \brief Creates a memcpy node and adds it to a graph + + Creates a new memcpy node and adds it to \p hGraph with \p numDependencies + dependencies specified via \p dependencies. + It is possible for \p numDependencies to be 0, in which case the node will be placed + at the root of the graph. \p dependencies may not have any duplicate entries. + A handle to the new node will be returned in \p phGraphNode. + + When the graph is launched, the node will perform the memcpy described by \p copyParams. + See ::cuMemcpy3D() for a description of the structure and its restrictions. + + Memcpy nodes have some additional restrictions with regards to managed memory, if the + system contains at least one device which has a zero value for the device attribute + ::CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS. If one or more of the operands refer + to managed memory, then using the memory type ::CU_MEMORYTYPE_UNIFIED is disallowed + for those operand(s). The managed memory will be treated as residing on either the + host or the device, depending on which memory type is specified. + + \param phGraphNode - Returns newly created node + \param hGraph - Graph to which to add the node + \param dependencies - Dependencies of the node + \param numDependencies - Number of dependencies + \param copyParams - Parameters for the memory copy + \param ctx - Context on which to run the node + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_VALUE + \note_graph_thread_safety + \notefnerr + + \sa + ::cuGraphAddNode, + ::cuMemcpy3D, + ::cuGraphMemcpyNodeGetParams, + ::cuGraphMemcpyNodeSetParams, + ::cuGraphCreate, + ::cuGraphDestroyNode, + ::cuGraphAddChildGraphNode, + ::cuGraphAddEmptyNode, + ::cuGraphAddKernelNode, + ::cuGraphAddHostNode, + ::cuGraphAddMemsetNode*/ + fn cuGraphAddMemcpyNode( + phGraphNode: *mut cuda_types::CUgraphNode, + hGraph: cuda_types::CUgraph, + dependencies: *const cuda_types::CUgraphNode, + numDependencies: usize, + copyParams: *const cuda_types::CUDA_MEMCPY3D, + ctx: cuda_types::CUcontext, + ) -> cuda_types::CUresult; + /** \brief Returns a memcpy node's parameters + + Returns the parameters of memcpy node \p hNode in \p nodeParams. + + \param hNode - Node to get the parameters for + \param nodeParams - Pointer to return the parameters + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_VALUE + \note_graph_thread_safety + \notefnerr + + \sa + ::cuMemcpy3D, + ::cuGraphAddMemcpyNode, + ::cuGraphMemcpyNodeSetParams*/ + fn cuGraphMemcpyNodeGetParams( + hNode: cuda_types::CUgraphNode, + nodeParams: *mut cuda_types::CUDA_MEMCPY3D, + ) -> cuda_types::CUresult; + /** \brief Sets a memcpy node's parameters + + Sets the parameters of memcpy node \p hNode to \p nodeParams. + + \param hNode - Node to set the parameters for + \param nodeParams - Parameters to copy + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_VALUE, + \note_graph_thread_safety + \notefnerr + + \sa + ::cuGraphNodeSetParams, + ::cuMemcpy3D, + ::cuGraphAddMemcpyNode, + ::cuGraphMemcpyNodeGetParams*/ + fn cuGraphMemcpyNodeSetParams( + hNode: cuda_types::CUgraphNode, + nodeParams: *const cuda_types::CUDA_MEMCPY3D, + ) -> cuda_types::CUresult; + /** \brief Creates a memset node and adds it to a graph + + Creates a new memset node and adds it to \p hGraph with \p numDependencies + dependencies specified via \p dependencies. + It is possible for \p numDependencies to be 0, in which case the node will be placed + at the root of the graph. \p dependencies may not have any duplicate entries. + A handle to the new node will be returned in \p phGraphNode. + + The element size must be 1, 2, or 4 bytes. + When the graph is launched, the node will perform the memset described by \p memsetParams. + + \param phGraphNode - Returns newly created node + \param hGraph - Graph to which to add the node + \param dependencies - Dependencies of the node + \param numDependencies - Number of dependencies + \param memsetParams - Parameters for the memory set + \param ctx - Context on which to run the node + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_CONTEXT + \note_graph_thread_safety + \notefnerr + + \sa + ::cuGraphAddNode, + ::cuMemsetD2D32, + ::cuGraphMemsetNodeGetParams, + ::cuGraphMemsetNodeSetParams, + ::cuGraphCreate, + ::cuGraphDestroyNode, + ::cuGraphAddChildGraphNode, + ::cuGraphAddEmptyNode, + ::cuGraphAddKernelNode, + ::cuGraphAddHostNode, + ::cuGraphAddMemcpyNode*/ + fn cuGraphAddMemsetNode( + phGraphNode: *mut cuda_types::CUgraphNode, + hGraph: cuda_types::CUgraph, + dependencies: *const cuda_types::CUgraphNode, + numDependencies: usize, + memsetParams: *const cuda_types::CUDA_MEMSET_NODE_PARAMS, + ctx: cuda_types::CUcontext, + ) -> cuda_types::CUresult; + /** \brief Returns a memset node's parameters + + Returns the parameters of memset node \p hNode in \p nodeParams. + + \param hNode - Node to get the parameters for + \param nodeParams - Pointer to return the parameters + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_VALUE + \note_graph_thread_safety + \notefnerr + + \sa + ::cuMemsetD2D32, + ::cuGraphAddMemsetNode, + ::cuGraphMemsetNodeSetParams*/ + fn cuGraphMemsetNodeGetParams( + hNode: cuda_types::CUgraphNode, + nodeParams: *mut cuda_types::CUDA_MEMSET_NODE_PARAMS, + ) -> cuda_types::CUresult; + /** \brief Sets a memset node's parameters + + Sets the parameters of memset node \p hNode to \p nodeParams. + + \param hNode - Node to set the parameters for + \param nodeParams - Parameters to copy + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_VALUE + \note_graph_thread_safety + \notefnerr + + \sa + ::cuGraphNodeSetParams, + ::cuMemsetD2D32, + ::cuGraphAddMemsetNode, + ::cuGraphMemsetNodeGetParams*/ + fn cuGraphMemsetNodeSetParams( + hNode: cuda_types::CUgraphNode, + nodeParams: *const cuda_types::CUDA_MEMSET_NODE_PARAMS, + ) -> cuda_types::CUresult; + /** \brief Creates a host execution node and adds it to a graph + + Creates a new CPU execution node and adds it to \p hGraph with \p numDependencies + dependencies specified via \p dependencies and arguments specified in \p nodeParams. + It is possible for \p numDependencies to be 0, in which case the node will be placed + at the root of the graph. \p dependencies may not have any duplicate entries. + A handle to the new node will be returned in \p phGraphNode. + + When the graph is launched, the node will invoke the specified CPU function. + Host nodes are not supported under MPS with pre-Volta GPUs. + + \param phGraphNode - Returns newly created node + \param hGraph - Graph to which to add the node + \param dependencies - Dependencies of the node + \param numDependencies - Number of dependencies + \param nodeParams - Parameters for the host node + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_NOT_SUPPORTED, + ::CUDA_ERROR_INVALID_VALUE + \note_graph_thread_safety + \notefnerr + + \sa + ::cuGraphAddNode, + ::cuLaunchHostFunc, + ::cuGraphHostNodeGetParams, + ::cuGraphHostNodeSetParams, + ::cuGraphCreate, + ::cuGraphDestroyNode, + ::cuGraphAddChildGraphNode, + ::cuGraphAddEmptyNode, + ::cuGraphAddKernelNode, + ::cuGraphAddMemcpyNode, + ::cuGraphAddMemsetNode*/ + fn cuGraphAddHostNode( + phGraphNode: *mut cuda_types::CUgraphNode, + hGraph: cuda_types::CUgraph, + dependencies: *const cuda_types::CUgraphNode, + numDependencies: usize, + nodeParams: *const cuda_types::CUDA_HOST_NODE_PARAMS, + ) -> cuda_types::CUresult; + /** \brief Returns a host node's parameters + + Returns the parameters of host node \p hNode in \p nodeParams. + + \param hNode - Node to get the parameters for + \param nodeParams - Pointer to return the parameters + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_VALUE + \note_graph_thread_safety + \notefnerr + + \sa + ::cuLaunchHostFunc, + ::cuGraphAddHostNode, + ::cuGraphHostNodeSetParams*/ + fn cuGraphHostNodeGetParams( + hNode: cuda_types::CUgraphNode, + nodeParams: *mut cuda_types::CUDA_HOST_NODE_PARAMS, + ) -> cuda_types::CUresult; + /** \brief Sets a host node's parameters + + Sets the parameters of host node \p hNode to \p nodeParams. + + \param hNode - Node to set the parameters for + \param nodeParams - Parameters to copy + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_VALUE + \note_graph_thread_safety + \notefnerr + + \sa + ::cuGraphNodeSetParams, + ::cuLaunchHostFunc, + ::cuGraphAddHostNode, + ::cuGraphHostNodeGetParams*/ + fn cuGraphHostNodeSetParams( + hNode: cuda_types::CUgraphNode, + nodeParams: *const cuda_types::CUDA_HOST_NODE_PARAMS, + ) -> cuda_types::CUresult; + /** \brief Creates a child graph node and adds it to a graph + + Creates a new node which executes an embedded graph, and adds it to \p hGraph with + \p numDependencies dependencies specified via \p dependencies. + It is possible for \p numDependencies to be 0, in which case the node will be placed + at the root of the graph. \p dependencies may not have any duplicate entries. + A handle to the new node will be returned in \p phGraphNode. + + If \p hGraph contains allocation or free nodes, this call will return an error. + + The node executes an embedded child graph. The child graph is cloned in this call. + + \param phGraphNode - Returns newly created node + \param hGraph - Graph to which to add the node + \param dependencies - Dependencies of the node + \param numDependencies - Number of dependencies + \param childGraph - The graph to clone into this node + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_VALUE, + \note_graph_thread_safety + \notefnerr + + \sa + ::cuGraphAddNode, + ::cuGraphChildGraphNodeGetGraph, + ::cuGraphCreate, + ::cuGraphDestroyNode, + ::cuGraphAddEmptyNode, + ::cuGraphAddKernelNode, + ::cuGraphAddHostNode, + ::cuGraphAddMemcpyNode, + ::cuGraphAddMemsetNode, + ::cuGraphClone*/ + fn cuGraphAddChildGraphNode( + phGraphNode: *mut cuda_types::CUgraphNode, + hGraph: cuda_types::CUgraph, + dependencies: *const cuda_types::CUgraphNode, + numDependencies: usize, + childGraph: cuda_types::CUgraph, + ) -> cuda_types::CUresult; + /** \brief Gets a handle to the embedded graph of a child graph node + + Gets a handle to the embedded graph in a child graph node. This call + does not clone the graph. Changes to the graph will be reflected in + the node, and the node retains ownership of the graph. + + Allocation and free nodes cannot be added to the returned graph. + Attempting to do so will return an error. + + \param hNode - Node to get the embedded graph for + \param phGraph - Location to store a handle to the graph + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_VALUE, + \note_graph_thread_safety + \notefnerr + + \sa + ::cuGraphAddChildGraphNode, + ::cuGraphNodeFindInClone*/ + fn cuGraphChildGraphNodeGetGraph( + hNode: cuda_types::CUgraphNode, + phGraph: *mut cuda_types::CUgraph, + ) -> cuda_types::CUresult; + /** \brief Creates an empty node and adds it to a graph + + Creates a new node which performs no operation, and adds it to \p hGraph with + \p numDependencies dependencies specified via \p dependencies. + It is possible for \p numDependencies to be 0, in which case the node will be placed + at the root of the graph. \p dependencies may not have any duplicate entries. + A handle to the new node will be returned in \p phGraphNode. + + An empty node performs no operation during execution, but can be used for + transitive ordering. For example, a phased execution graph with 2 groups of n + nodes with a barrier between them can be represented using an empty node and + 2*n dependency edges, rather than no empty node and n^2 dependency edges. + + \param phGraphNode - Returns newly created node + \param hGraph - Graph to which to add the node + \param dependencies - Dependencies of the node + \param numDependencies - Number of dependencies + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_VALUE, + \note_graph_thread_safety + \notefnerr + + \sa + ::cuGraphAddNode, + ::cuGraphCreate, + ::cuGraphDestroyNode, + ::cuGraphAddChildGraphNode, + ::cuGraphAddKernelNode, + ::cuGraphAddHostNode, + ::cuGraphAddMemcpyNode, + ::cuGraphAddMemsetNode*/ + fn cuGraphAddEmptyNode( + phGraphNode: *mut cuda_types::CUgraphNode, + hGraph: cuda_types::CUgraph, + dependencies: *const cuda_types::CUgraphNode, + numDependencies: usize, + ) -> cuda_types::CUresult; + /** \brief Creates an event record node and adds it to a graph + + Creates a new event record node and adds it to \p hGraph with \p numDependencies + dependencies specified via \p dependencies and event specified in \p event. + It is possible for \p numDependencies to be 0, in which case the node will be placed + at the root of the graph. \p dependencies may not have any duplicate entries. + A handle to the new node will be returned in \p phGraphNode. + + Each launch of the graph will record \p event to capture execution of the + node's dependencies. + + \param phGraphNode - Returns newly created node + \param hGraph - Graph to which to add the node + \param dependencies - Dependencies of the node + \param numDependencies - Number of dependencies + \param event - Event for the node + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_NOT_SUPPORTED, + ::CUDA_ERROR_INVALID_VALUE + \note_graph_thread_safety + \notefnerr + + \sa + ::cuGraphAddNode, + ::cuGraphAddEventWaitNode, + ::cuEventRecordWithFlags, + ::cuStreamWaitEvent, + ::cuGraphCreate, + ::cuGraphDestroyNode, + ::cuGraphAddChildGraphNode, + ::cuGraphAddEmptyNode, + ::cuGraphAddKernelNode, + ::cuGraphAddMemcpyNode, + ::cuGraphAddMemsetNode*/ + fn cuGraphAddEventRecordNode( + phGraphNode: *mut cuda_types::CUgraphNode, + hGraph: cuda_types::CUgraph, + dependencies: *const cuda_types::CUgraphNode, + numDependencies: usize, + event: cuda_types::CUevent, + ) -> cuda_types::CUresult; + /** \brief Returns the event associated with an event record node + + Returns the event of event record node \p hNode in \p event_out. + + \param hNode - Node to get the event for + \param event_out - Pointer to return the event + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_VALUE + \note_graph_thread_safety + \notefnerr + + \sa + ::cuGraphAddEventRecordNode, + ::cuGraphEventRecordNodeSetEvent, + ::cuGraphEventWaitNodeGetEvent, + ::cuEventRecordWithFlags, + ::cuStreamWaitEvent*/ + fn cuGraphEventRecordNodeGetEvent( + hNode: cuda_types::CUgraphNode, + event_out: *mut cuda_types::CUevent, + ) -> cuda_types::CUresult; + /** \brief Sets an event record node's event + + Sets the event of event record node \p hNode to \p event. + + \param hNode - Node to set the event for + \param event - Event to use + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_HANDLE, + ::CUDA_ERROR_OUT_OF_MEMORY + \note_graph_thread_safety + \notefnerr + + \sa + ::cuGraphNodeSetParams, + ::cuGraphAddEventRecordNode, + ::cuGraphEventRecordNodeGetEvent, + ::cuGraphEventWaitNodeSetEvent, + ::cuEventRecordWithFlags, + ::cuStreamWaitEvent*/ + fn cuGraphEventRecordNodeSetEvent( + hNode: cuda_types::CUgraphNode, + event: cuda_types::CUevent, + ) -> cuda_types::CUresult; + /** \brief Creates an event wait node and adds it to a graph + + Creates a new event wait node and adds it to \p hGraph with \p numDependencies + dependencies specified via \p dependencies and event specified in \p event. + It is possible for \p numDependencies to be 0, in which case the node will be placed + at the root of the graph. \p dependencies may not have any duplicate entries. + A handle to the new node will be returned in \p phGraphNode. + + The graph node will wait for all work captured in \p event. See ::cuEventRecord() + for details on what is captured by an event. \p event may be from a different context + or device than the launch stream. + + \param phGraphNode - Returns newly created node + \param hGraph - Graph to which to add the node + \param dependencies - Dependencies of the node + \param numDependencies - Number of dependencies + \param event - Event for the node + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_NOT_SUPPORTED, + ::CUDA_ERROR_INVALID_VALUE + \note_graph_thread_safety + \notefnerr + + \sa + ::cuGraphAddNode, + ::cuGraphAddEventRecordNode, + ::cuEventRecordWithFlags, + ::cuStreamWaitEvent, + ::cuGraphCreate, + ::cuGraphDestroyNode, + ::cuGraphAddChildGraphNode, + ::cuGraphAddEmptyNode, + ::cuGraphAddKernelNode, + ::cuGraphAddMemcpyNode, + ::cuGraphAddMemsetNode*/ + fn cuGraphAddEventWaitNode( + phGraphNode: *mut cuda_types::CUgraphNode, + hGraph: cuda_types::CUgraph, + dependencies: *const cuda_types::CUgraphNode, + numDependencies: usize, + event: cuda_types::CUevent, + ) -> cuda_types::CUresult; + /** \brief Returns the event associated with an event wait node + + Returns the event of event wait node \p hNode in \p event_out. + + \param hNode - Node to get the event for + \param event_out - Pointer to return the event + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_VALUE + \note_graph_thread_safety + \notefnerr + + \sa + ::cuGraphAddEventWaitNode, + ::cuGraphEventWaitNodeSetEvent, + ::cuGraphEventRecordNodeGetEvent, + ::cuEventRecordWithFlags, + ::cuStreamWaitEvent*/ + fn cuGraphEventWaitNodeGetEvent( + hNode: cuda_types::CUgraphNode, + event_out: *mut cuda_types::CUevent, + ) -> cuda_types::CUresult; + /** \brief Sets an event wait node's event + + Sets the event of event wait node \p hNode to \p event. + + \param hNode - Node to set the event for + \param event - Event to use + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_HANDLE, + ::CUDA_ERROR_OUT_OF_MEMORY + \note_graph_thread_safety + \notefnerr + + \sa + ::cuGraphNodeSetParams, + ::cuGraphAddEventWaitNode, + ::cuGraphEventWaitNodeGetEvent, + ::cuGraphEventRecordNodeSetEvent, + ::cuEventRecordWithFlags, + ::cuStreamWaitEvent*/ + fn cuGraphEventWaitNodeSetEvent( + hNode: cuda_types::CUgraphNode, + event: cuda_types::CUevent, + ) -> cuda_types::CUresult; + /** \brief Creates an external semaphore signal node and adds it to a graph + + Creates a new external semaphore signal node and adds it to \p hGraph with \p + numDependencies dependencies specified via \p dependencies and arguments specified + in \p nodeParams. It is possible for \p numDependencies to be 0, in which case the + node will be placed at the root of the graph. \p dependencies may not have any + duplicate entries. A handle to the new node will be returned in \p phGraphNode. + + Performs a signal operation on a set of externally allocated semaphore objects + when the node is launched. The operation(s) will occur after all of the node's + dependencies have completed. + + \param phGraphNode - Returns newly created node + \param hGraph - Graph to which to add the node + \param dependencies - Dependencies of the node + \param numDependencies - Number of dependencies + \param nodeParams - Parameters for the node + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_NOT_SUPPORTED, + ::CUDA_ERROR_INVALID_VALUE + \note_graph_thread_safety + \notefnerr + + \sa + ::cuGraphAddNode, + ::cuGraphExternalSemaphoresSignalNodeGetParams, + ::cuGraphExternalSemaphoresSignalNodeSetParams, + ::cuGraphExecExternalSemaphoresSignalNodeSetParams, + ::cuGraphAddExternalSemaphoresWaitNode, + ::cuImportExternalSemaphore, + ::cuSignalExternalSemaphoresAsync, + ::cuWaitExternalSemaphoresAsync, + ::cuGraphCreate, + ::cuGraphDestroyNode, + ::cuGraphAddEventRecordNode, + ::cuGraphAddEventWaitNode, + ::cuGraphAddChildGraphNode, + ::cuGraphAddEmptyNode, + ::cuGraphAddKernelNode, + ::cuGraphAddMemcpyNode, + ::cuGraphAddMemsetNode*/ + fn cuGraphAddExternalSemaphoresSignalNode( + phGraphNode: *mut cuda_types::CUgraphNode, + hGraph: cuda_types::CUgraph, + dependencies: *const cuda_types::CUgraphNode, + numDependencies: usize, + nodeParams: *const cuda_types::CUDA_EXT_SEM_SIGNAL_NODE_PARAMS, + ) -> cuda_types::CUresult; + /** \brief Returns an external semaphore signal node's parameters + + Returns the parameters of an external semaphore signal node \p hNode in \p params_out. + The \p extSemArray and \p paramsArray returned in \p params_out, + are owned by the node. This memory remains valid until the node is destroyed or its + parameters are modified, and should not be modified + directly. Use ::cuGraphExternalSemaphoresSignalNodeSetParams to update the + parameters of this node. + + \param hNode - Node to get the parameters for + \param params_out - Pointer to return the parameters + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_VALUE + \note_graph_thread_safety + \notefnerr + + \sa + ::cuLaunchKernel, + ::cuGraphAddExternalSemaphoresSignalNode, + ::cuGraphExternalSemaphoresSignalNodeSetParams, + ::cuGraphAddExternalSemaphoresWaitNode, + ::cuSignalExternalSemaphoresAsync, + ::cuWaitExternalSemaphoresAsync*/ + fn cuGraphExternalSemaphoresSignalNodeGetParams( + hNode: cuda_types::CUgraphNode, + params_out: *mut cuda_types::CUDA_EXT_SEM_SIGNAL_NODE_PARAMS, + ) -> cuda_types::CUresult; + /** \brief Sets an external semaphore signal node's parameters + + Sets the parameters of an external semaphore signal node \p hNode to \p nodeParams. + + \param hNode - Node to set the parameters for + \param nodeParams - Parameters to copy + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_HANDLE, + ::CUDA_ERROR_OUT_OF_MEMORY + \note_graph_thread_safety + \notefnerr + + \sa + ::cuGraphNodeSetParams, + ::cuGraphAddExternalSemaphoresSignalNode, + ::cuGraphExternalSemaphoresSignalNodeSetParams, + ::cuGraphAddExternalSemaphoresWaitNode, + ::cuSignalExternalSemaphoresAsync, + ::cuWaitExternalSemaphoresAsync*/ + fn cuGraphExternalSemaphoresSignalNodeSetParams( + hNode: cuda_types::CUgraphNode, + nodeParams: *const cuda_types::CUDA_EXT_SEM_SIGNAL_NODE_PARAMS, + ) -> cuda_types::CUresult; + /** \brief Creates an external semaphore wait node and adds it to a graph + + Creates a new external semaphore wait node and adds it to \p hGraph with \p numDependencies + dependencies specified via \p dependencies and arguments specified in \p nodeParams. + It is possible for \p numDependencies to be 0, in which case the node will be placed + at the root of the graph. \p dependencies may not have any duplicate entries. A handle + to the new node will be returned in \p phGraphNode. + + Performs a wait operation on a set of externally allocated semaphore objects + when the node is launched. The node's dependencies will not be launched until + the wait operation has completed. + + \param phGraphNode - Returns newly created node + \param hGraph - Graph to which to add the node + \param dependencies - Dependencies of the node + \param numDependencies - Number of dependencies + \param nodeParams - Parameters for the node + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_NOT_SUPPORTED, + ::CUDA_ERROR_INVALID_VALUE + \note_graph_thread_safety + \notefnerr + + \sa + ::cuGraphAddNode, + ::cuGraphExternalSemaphoresWaitNodeGetParams, + ::cuGraphExternalSemaphoresWaitNodeSetParams, + ::cuGraphExecExternalSemaphoresWaitNodeSetParams, + ::cuGraphAddExternalSemaphoresSignalNode, + ::cuImportExternalSemaphore, + ::cuSignalExternalSemaphoresAsync, + ::cuWaitExternalSemaphoresAsync, + ::cuGraphCreate, + ::cuGraphDestroyNode, + ::cuGraphAddEventRecordNode, + ::cuGraphAddEventWaitNode, + ::cuGraphAddChildGraphNode, + ::cuGraphAddEmptyNode, + ::cuGraphAddKernelNode, + ::cuGraphAddMemcpyNode, + ::cuGraphAddMemsetNode*/ + fn cuGraphAddExternalSemaphoresWaitNode( + phGraphNode: *mut cuda_types::CUgraphNode, + hGraph: cuda_types::CUgraph, + dependencies: *const cuda_types::CUgraphNode, + numDependencies: usize, + nodeParams: *const cuda_types::CUDA_EXT_SEM_WAIT_NODE_PARAMS, + ) -> cuda_types::CUresult; + /** \brief Returns an external semaphore wait node's parameters + + Returns the parameters of an external semaphore wait node \p hNode in \p params_out. + The \p extSemArray and \p paramsArray returned in \p params_out, + are owned by the node. This memory remains valid until the node is destroyed or its + parameters are modified, and should not be modified + directly. Use ::cuGraphExternalSemaphoresSignalNodeSetParams to update the + parameters of this node. + + \param hNode - Node to get the parameters for + \param params_out - Pointer to return the parameters + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_VALUE + \note_graph_thread_safety + \notefnerr + + \sa + ::cuLaunchKernel, + ::cuGraphAddExternalSemaphoresWaitNode, + ::cuGraphExternalSemaphoresWaitNodeSetParams, + ::cuGraphAddExternalSemaphoresWaitNode, + ::cuSignalExternalSemaphoresAsync, + ::cuWaitExternalSemaphoresAsync*/ + fn cuGraphExternalSemaphoresWaitNodeGetParams( + hNode: cuda_types::CUgraphNode, + params_out: *mut cuda_types::CUDA_EXT_SEM_WAIT_NODE_PARAMS, + ) -> cuda_types::CUresult; + /** \brief Sets an external semaphore wait node's parameters + + Sets the parameters of an external semaphore wait node \p hNode to \p nodeParams. + + \param hNode - Node to set the parameters for + \param nodeParams - Parameters to copy + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_HANDLE, + ::CUDA_ERROR_OUT_OF_MEMORY + \note_graph_thread_safety + \notefnerr + + \sa + ::cuGraphNodeSetParams, + ::cuGraphAddExternalSemaphoresWaitNode, + ::cuGraphExternalSemaphoresWaitNodeSetParams, + ::cuGraphAddExternalSemaphoresWaitNode, + ::cuSignalExternalSemaphoresAsync, + ::cuWaitExternalSemaphoresAsync*/ + fn cuGraphExternalSemaphoresWaitNodeSetParams( + hNode: cuda_types::CUgraphNode, + nodeParams: *const cuda_types::CUDA_EXT_SEM_WAIT_NODE_PARAMS, + ) -> cuda_types::CUresult; + /** \brief Creates a batch memory operation node and adds it to a graph + + Creates a new batch memory operation node and adds it to \p hGraph with \p + numDependencies dependencies specified via \p dependencies and arguments specified in \p nodeParams. + It is possible for \p numDependencies to be 0, in which case the node will be placed + at the root of the graph. \p dependencies may not have any duplicate entries. + A handle to the new node will be returned in \p phGraphNode. + + When the node is added, the paramArray inside \p nodeParams is copied and therefore it can be + freed after the call returns. + + \note + Warning: + Improper use of this API may deadlock the application. Synchronization + ordering established through this API is not visible to CUDA. CUDA tasks + that are (even indirectly) ordered by this API should also have that order + expressed with CUDA-visible dependencies such as events. This ensures that + the scheduler does not serialize them in an improper order. For more + information, see the Stream Memory Operations section in the programming + guide(https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html). + + \param phGraphNode - Returns newly created node + \param hGraph - Graph to which to add the node + \param dependencies - Dependencies of the node + \param numDependencies - Number of dependencies + \param nodeParams - Parameters for the node + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_NOT_SUPPORTED, + ::CUDA_ERROR_INVALID_VALUE + \note_graph_thread_safety + \notefnerr + + \sa + ::cuGraphAddNode, + ::cuStreamBatchMemOp, + ::cuStreamWaitValue32, + ::cuStreamWriteValue32, + ::cuStreamWaitValue64, + ::cuStreamWriteValue64, + ::cuGraphBatchMemOpNodeGetParams, + ::cuGraphBatchMemOpNodeSetParams, + ::cuGraphCreate, + ::cuGraphDestroyNode, + ::cuGraphAddChildGraphNode, + ::cuGraphAddEmptyNode, + ::cuGraphAddKernelNode, + ::cuGraphAddMemcpyNode, + ::cuGraphAddMemsetNode*/ + fn cuGraphAddBatchMemOpNode( + phGraphNode: *mut cuda_types::CUgraphNode, + hGraph: cuda_types::CUgraph, + dependencies: *const cuda_types::CUgraphNode, + numDependencies: usize, + nodeParams: *const cuda_types::CUDA_BATCH_MEM_OP_NODE_PARAMS, + ) -> cuda_types::CUresult; + /** \brief Returns a batch mem op node's parameters + + Returns the parameters of batch mem op node \p hNode in \p nodeParams_out. + The \p paramArray returned in \p nodeParams_out is owned by the node. + This memory remains valid until the node is destroyed or its + parameters are modified, and should not be modified + directly. Use ::cuGraphBatchMemOpNodeSetParams to update the + parameters of this node. + + \param hNode - Node to get the parameters for + \param nodeParams_out - Pointer to return the parameters + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_VALUE + \note_graph_thread_safety + \notefnerr + + \sa + ::cuStreamBatchMemOp, + ::cuGraphAddBatchMemOpNode, + ::cuGraphBatchMemOpNodeSetParams*/ + fn cuGraphBatchMemOpNodeGetParams( + hNode: cuda_types::CUgraphNode, + nodeParams_out: *mut cuda_types::CUDA_BATCH_MEM_OP_NODE_PARAMS, + ) -> cuda_types::CUresult; + /** \brief Sets a batch mem op node's parameters + + Sets the parameters of batch mem op node \p hNode to \p nodeParams. + + The paramArray inside \p nodeParams is copied and therefore it can be + freed after the call returns. + + \param hNode - Node to set the parameters for + \param nodeParams - Parameters to copy + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_HANDLE, + ::CUDA_ERROR_OUT_OF_MEMORY + \note_graph_thread_safety + \notefnerr + + \sa + ::cuGraphNodeSetParams, + ::cuStreamBatchMemOp, + ::cuGraphAddBatchMemOpNode, + ::cuGraphBatchMemOpNodeGetParams*/ + fn cuGraphBatchMemOpNodeSetParams( + hNode: cuda_types::CUgraphNode, + nodeParams: *const cuda_types::CUDA_BATCH_MEM_OP_NODE_PARAMS, + ) -> cuda_types::CUresult; + /** \brief Sets the parameters for a batch mem op node in the given graphExec + + Sets the parameters of a batch mem op node in an executable graph \p hGraphExec. + The node is identified by the corresponding node \p hNode in the + non-executable graph, from which the executable graph was instantiated. + + The following fields on operations may be modified on an executable graph: + + op.waitValue.address + op.waitValue.value[64] + op.waitValue.flags bits corresponding to wait type (i.e. CU_STREAM_WAIT_VALUE_FLUSH bit cannot be modified) + op.writeValue.address + op.writeValue.value[64] + + Other fields, such as the context, count or type of operations, and other types of operations such as membars, + may not be modified. + + \p hNode must not have been removed from the original graph. + + The modifications only affect future launches of \p hGraphExec. Already + enqueued or running launches of \p hGraphExec are not affected by this call. + \p hNode is also not modified by this call. + + The paramArray inside \p nodeParams is copied and therefore it can be + freed after the call returns. + + \param hGraphExec - The executable graph in which to set the specified node + \param hNode - Batch mem op node from the graph from which graphExec was instantiated + \param nodeParams - Updated Parameters to set + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE, + \note_graph_thread_safety + \notefnerr + + \sa + ::cuGraphExecNodeSetParams, + ::cuStreamBatchMemOp, + ::cuGraphAddBatchMemOpNode, + ::cuGraphBatchMemOpNodeGetParams, + ::cuGraphBatchMemOpNodeSetParams, + ::cuGraphInstantiate*/ + fn cuGraphExecBatchMemOpNodeSetParams( + hGraphExec: cuda_types::CUgraphExec, + hNode: cuda_types::CUgraphNode, + nodeParams: *const cuda_types::CUDA_BATCH_MEM_OP_NODE_PARAMS, + ) -> cuda_types::CUresult; + /** \brief Creates an allocation node and adds it to a graph + + Creates a new allocation node and adds it to \p hGraph with \p numDependencies + dependencies specified via \p dependencies and arguments specified in \p nodeParams. + It is possible for \p numDependencies to be 0, in which case the node will be placed + at the root of the graph. \p dependencies may not have any duplicate entries. A handle + to the new node will be returned in \p phGraphNode. + + \param phGraphNode - Returns newly created node + \param hGraph - Graph to which to add the node + \param dependencies - Dependencies of the node + \param numDependencies - Number of dependencies + \param nodeParams - Parameters for the node + + When ::cuGraphAddMemAllocNode creates an allocation node, it returns the address of the allocation in + \p nodeParams.dptr. The allocation's address remains fixed across instantiations and launches. + + If the allocation is freed in the same graph, by creating a free node using ::cuGraphAddMemFreeNode, + the allocation can be accessed by nodes ordered after the allocation node but before the free node. + These allocations cannot be freed outside the owning graph, and they can only be freed once in the + owning graph. + + If the allocation is not freed in the same graph, then it can be accessed not only by nodes in the + graph which are ordered after the allocation node, but also by stream operations ordered after the + graph's execution but before the allocation is freed. + + Allocations which are not freed in the same graph can be freed by: + - passing the allocation to ::cuMemFreeAsync or ::cuMemFree; + - launching a graph with a free node for that allocation; or + - specifying ::CUDA_GRAPH_INSTANTIATE_FLAG_AUTO_FREE_ON_LAUNCH during instantiation, which makes + each launch behave as though it called ::cuMemFreeAsync for every unfreed allocation. + + It is not possible to free an allocation in both the owning graph and another graph. If the allocation + is freed in the same graph, a free node cannot be added to another graph. If the allocation is freed + in another graph, a free node can no longer be added to the owning graph. + + The following restrictions apply to graphs which contain allocation and/or memory free nodes: + - Nodes and edges of the graph cannot be deleted. + - The graph cannot be used in a child node. + - Only one instantiation of the graph may exist at any point in time. + - The graph cannot be cloned. + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_NOT_SUPPORTED, + ::CUDA_ERROR_INVALID_VALUE + \note_graph_thread_safety + \notefnerr + + \sa + ::cuGraphAddNode, + ::cuGraphAddMemFreeNode, + ::cuGraphMemAllocNodeGetParams, + ::cuDeviceGraphMemTrim, + ::cuDeviceGetGraphMemAttribute, + ::cuDeviceSetGraphMemAttribute, + ::cuMemAllocAsync, + ::cuMemFreeAsync, + ::cuGraphCreate, + ::cuGraphDestroyNode, + ::cuGraphAddChildGraphNode, + ::cuGraphAddEmptyNode, + ::cuGraphAddEventRecordNode, + ::cuGraphAddEventWaitNode, + ::cuGraphAddExternalSemaphoresSignalNode, + ::cuGraphAddExternalSemaphoresWaitNode, + ::cuGraphAddKernelNode, + ::cuGraphAddMemcpyNode, + ::cuGraphAddMemsetNode*/ + fn cuGraphAddMemAllocNode( + phGraphNode: *mut cuda_types::CUgraphNode, + hGraph: cuda_types::CUgraph, + dependencies: *const cuda_types::CUgraphNode, + numDependencies: usize, + nodeParams: *mut cuda_types::CUDA_MEM_ALLOC_NODE_PARAMS, + ) -> cuda_types::CUresult; + /** \brief Returns a memory alloc node's parameters + + Returns the parameters of a memory alloc node \p hNode in \p params_out. + The \p poolProps and \p accessDescs returned in \p params_out, are owned by the + node. This memory remains valid until the node is destroyed. The returned + parameters must not be modified. + + \param hNode - Node to get the parameters for + \param params_out - Pointer to return the parameters + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_VALUE + \note_graph_thread_safety + \notefnerr + + \sa + ::cuGraphAddMemAllocNode, + ::cuGraphMemFreeNodeGetParams*/ + fn cuGraphMemAllocNodeGetParams( + hNode: cuda_types::CUgraphNode, + params_out: *mut cuda_types::CUDA_MEM_ALLOC_NODE_PARAMS, + ) -> cuda_types::CUresult; + /** \brief Creates a memory free node and adds it to a graph + + Creates a new memory free node and adds it to \p hGraph with \p numDependencies + dependencies specified via \p dependencies and arguments specified in \p nodeParams. + It is possible for \p numDependencies to be 0, in which case the node will be placed + at the root of the graph. \p dependencies may not have any duplicate entries. A handle + to the new node will be returned in \p phGraphNode. + + \param phGraphNode - Returns newly created node + \param hGraph - Graph to which to add the node + \param dependencies - Dependencies of the node + \param numDependencies - Number of dependencies + \param dptr - Address of memory to free + + ::cuGraphAddMemFreeNode will return ::CUDA_ERROR_INVALID_VALUE if the user attempts to free: + - an allocation twice in the same graph. + - an address that was not returned by an allocation node. + - an invalid address. + + The following restrictions apply to graphs which contain allocation and/or memory free nodes: + - Nodes and edges of the graph cannot be deleted. + - The graph cannot be used in a child node. + - Only one instantiation of the graph may exist at any point in time. + - The graph cannot be cloned. + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_NOT_SUPPORTED, + ::CUDA_ERROR_INVALID_VALUE + \note_graph_thread_safety + \notefnerr + + \sa + ::cuGraphAddNode, + ::cuGraphAddMemAllocNode, + ::cuGraphMemFreeNodeGetParams, + ::cuDeviceGraphMemTrim, + ::cuDeviceGetGraphMemAttribute, + ::cuDeviceSetGraphMemAttribute, + ::cuMemAllocAsync, + ::cuMemFreeAsync, + ::cuGraphCreate, + ::cuGraphDestroyNode, + ::cuGraphAddChildGraphNode, + ::cuGraphAddEmptyNode, + ::cuGraphAddEventRecordNode, + ::cuGraphAddEventWaitNode, + ::cuGraphAddExternalSemaphoresSignalNode, + ::cuGraphAddExternalSemaphoresWaitNode, + ::cuGraphAddKernelNode, + ::cuGraphAddMemcpyNode, + ::cuGraphAddMemsetNode*/ + fn cuGraphAddMemFreeNode( + phGraphNode: *mut cuda_types::CUgraphNode, + hGraph: cuda_types::CUgraph, + dependencies: *const cuda_types::CUgraphNode, + numDependencies: usize, + dptr: cuda_types::CUdeviceptr, + ) -> cuda_types::CUresult; + /** \brief Returns a memory free node's parameters + + Returns the address of a memory free node \p hNode in \p dptr_out. + + \param hNode - Node to get the parameters for + \param dptr_out - Pointer to return the device address + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_VALUE + \note_graph_thread_safety + \notefnerr + + \sa + ::cuGraphAddMemFreeNode, + ::cuGraphMemAllocNodeGetParams*/ + fn cuGraphMemFreeNodeGetParams( + hNode: cuda_types::CUgraphNode, + dptr_out: *mut cuda_types::CUdeviceptr, + ) -> cuda_types::CUresult; + /** \brief Free unused memory that was cached on the specified device for use with graphs back to the OS. + + Blocks which are not in use by a graph that is either currently executing or scheduled to execute are + freed back to the operating system. + + \param device - The device for which cached memory should be freed. + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_DEVICE + + \sa + ::cuGraphAddMemAllocNode, + ::cuGraphAddMemFreeNode, + ::cuDeviceSetGraphMemAttribute, + ::cuDeviceGetGraphMemAttribute*/ + fn cuDeviceGraphMemTrim(device: cuda_types::CUdevice) -> cuda_types::CUresult; + /** \brief Query asynchronous allocation attributes related to graphs + + Valid attributes are: + + - ::CU_GRAPH_MEM_ATTR_USED_MEM_CURRENT: Amount of memory, in bytes, currently associated with graphs + - ::CU_GRAPH_MEM_ATTR_USED_MEM_HIGH: High watermark of memory, in bytes, associated with graphs since the + last time it was reset. High watermark can only be reset to zero. + - ::CU_GRAPH_MEM_ATTR_RESERVED_MEM_CURRENT: Amount of memory, in bytes, currently allocated for use by + the CUDA graphs asynchronous allocator. + - ::CU_GRAPH_MEM_ATTR_RESERVED_MEM_HIGH: High watermark of memory, in bytes, currently allocated for use by + the CUDA graphs asynchronous allocator. + + \param device - Specifies the scope of the query + \param attr - attribute to get + \param value - retrieved value + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_DEVICE + + \sa + ::cuDeviceSetGraphMemAttribute, + ::cuGraphAddMemAllocNode, + ::cuGraphAddMemFreeNode*/ + fn cuDeviceGetGraphMemAttribute( + device: cuda_types::CUdevice, + attr: cuda_types::CUgraphMem_attribute, + value: *mut ::core::ffi::c_void, + ) -> cuda_types::CUresult; + /** \brief Set asynchronous allocation attributes related to graphs + + Valid attributes are: + + - ::CU_GRAPH_MEM_ATTR_USED_MEM_HIGH: High watermark of memory, in bytes, associated with graphs since the + last time it was reset. High watermark can only be reset to zero. + - ::CU_GRAPH_MEM_ATTR_RESERVED_MEM_HIGH: High watermark of memory, in bytes, currently allocated for use by + the CUDA graphs asynchronous allocator. + + \param device - Specifies the scope of the query + \param attr - attribute to get + \param value - pointer to value to set + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_DEVICE + + \sa + ::cuDeviceGetGraphMemAttribute, + ::cuGraphAddMemAllocNode, + ::cuGraphAddMemFreeNode*/ + fn cuDeviceSetGraphMemAttribute( + device: cuda_types::CUdevice, + attr: cuda_types::CUgraphMem_attribute, + value: *mut ::core::ffi::c_void, + ) -> cuda_types::CUresult; + /** \brief Clones a graph + + This function creates a copy of \p originalGraph and returns it in \p phGraphClone. + All parameters are copied into the cloned graph. The original graph may be modified + after this call without affecting the clone. + + Child graph nodes in the original graph are recursively copied into the clone. + + \param phGraphClone - Returns newly created cloned graph + \param originalGraph - Graph to clone + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_OUT_OF_MEMORY + \note_graph_thread_safety + \notefnerr + + \sa + ::cuGraphCreate, + ::cuGraphNodeFindInClone*/ + fn cuGraphClone( + phGraphClone: *mut cuda_types::CUgraph, + originalGraph: cuda_types::CUgraph, + ) -> cuda_types::CUresult; + /** \brief Finds a cloned version of a node + + This function returns the node in \p hClonedGraph corresponding to \p hOriginalNode + in the original graph. + + \p hClonedGraph must have been cloned from \p hOriginalGraph via ::cuGraphClone. + \p hOriginalNode must have been in \p hOriginalGraph at the time of the call to + ::cuGraphClone, and the corresponding cloned node in \p hClonedGraph must not have + been removed. The cloned node is then returned via \p phClonedNode. + + \param phNode - Returns handle to the cloned node + \param hOriginalNode - Handle to the original node + \param hClonedGraph - Cloned graph to query + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE, + \note_graph_thread_safety + \notefnerr + + \sa + ::cuGraphClone*/ + fn cuGraphNodeFindInClone( + phNode: *mut cuda_types::CUgraphNode, + hOriginalNode: cuda_types::CUgraphNode, + hClonedGraph: cuda_types::CUgraph, + ) -> cuda_types::CUresult; + /** \brief Returns a node's type + + Returns the node type of \p hNode in \p type. + + \param hNode - Node to query + \param type - Pointer to return the node type + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_VALUE + \note_graph_thread_safety + \notefnerr + + \sa + ::cuGraphGetNodes, + ::cuGraphGetRootNodes, + ::cuGraphChildGraphNodeGetGraph, + ::cuGraphKernelNodeGetParams, + ::cuGraphKernelNodeSetParams, + ::cuGraphHostNodeGetParams, + ::cuGraphHostNodeSetParams, + ::cuGraphMemcpyNodeGetParams, + ::cuGraphMemcpyNodeSetParams, + ::cuGraphMemsetNodeGetParams, + ::cuGraphMemsetNodeSetParams*/ + fn cuGraphNodeGetType( + hNode: cuda_types::CUgraphNode, + type_: *mut cuda_types::CUgraphNodeType, + ) -> cuda_types::CUresult; + /** \brief Returns a graph's nodes + + Returns a list of \p hGraph's nodes. \p nodes may be NULL, in which case this + function will return the number of nodes in \p numNodes. Otherwise, + \p numNodes entries will be filled in. If \p numNodes is higher than the actual + number of nodes, the remaining entries in \p nodes will be set to NULL, and the + number of nodes actually obtained will be returned in \p numNodes. + + \param hGraph - Graph to query + \param nodes - Pointer to return the nodes + \param numNodes - See description + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_VALUE + \note_graph_thread_safety + \notefnerr + + \sa + ::cuGraphCreate, + ::cuGraphGetRootNodes, + ::cuGraphGetEdges, + ::cuGraphNodeGetType, + ::cuGraphNodeGetDependencies, + ::cuGraphNodeGetDependentNodes*/ + fn cuGraphGetNodes( + hGraph: cuda_types::CUgraph, + nodes: *mut cuda_types::CUgraphNode, + numNodes: *mut usize, + ) -> cuda_types::CUresult; + /** \brief Returns a graph's root nodes + + Returns a list of \p hGraph's root nodes. \p rootNodes may be NULL, in which case this + function will return the number of root nodes in \p numRootNodes. Otherwise, + \p numRootNodes entries will be filled in. If \p numRootNodes is higher than the actual + number of root nodes, the remaining entries in \p rootNodes will be set to NULL, and the + number of nodes actually obtained will be returned in \p numRootNodes. + + \param hGraph - Graph to query + \param rootNodes - Pointer to return the root nodes + \param numRootNodes - See description + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_VALUE + \note_graph_thread_safety + \notefnerr + + \sa + ::cuGraphCreate, + ::cuGraphGetNodes, + ::cuGraphGetEdges, + ::cuGraphNodeGetType, + ::cuGraphNodeGetDependencies, + ::cuGraphNodeGetDependentNodes*/ + fn cuGraphGetRootNodes( + hGraph: cuda_types::CUgraph, + rootNodes: *mut cuda_types::CUgraphNode, + numRootNodes: *mut usize, + ) -> cuda_types::CUresult; + /** \brief Returns a graph's dependency edges + + Returns a list of \p hGraph's dependency edges. Edges are returned via corresponding + indices in \p from and \p to; that is, the node in \p to[i] has a dependency on the + node in \p from[i]. \p from and \p to may both be NULL, in which + case this function only returns the number of edges in \p numEdges. Otherwise, + \p numEdges entries will be filled in. If \p numEdges is higher than the actual + number of edges, the remaining entries in \p from and \p to will be set to NULL, and + the number of edges actually returned will be written to \p numEdges. + + \param hGraph - Graph to get the edges from + \param from - Location to return edge endpoints + \param to - Location to return edge endpoints + \param numEdges - See description + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_VALUE + \note_graph_thread_safety + \notefnerr + + \sa + ::cuGraphGetNodes, + ::cuGraphGetRootNodes, + ::cuGraphAddDependencies, + ::cuGraphRemoveDependencies, + ::cuGraphNodeGetDependencies, + ::cuGraphNodeGetDependentNodes*/ + fn cuGraphGetEdges( + hGraph: cuda_types::CUgraph, + from: *mut cuda_types::CUgraphNode, + to: *mut cuda_types::CUgraphNode, + numEdges: *mut usize, + ) -> cuda_types::CUresult; + /** \brief Returns a graph's dependency edges (12.3+) + + Returns a list of \p hGraph's dependency edges. Edges are returned via corresponding + indices in \p from, \p to and \p edgeData; that is, the node in \p to[i] has a + dependency on the node in \p from[i] with data \p edgeData[i]. \p from and \p to may + both be NULL, in which case this function only returns the number of edges in + \p numEdges. Otherwise, \p numEdges entries will be filled in. If \p numEdges is higher + than the actual number of edges, the remaining entries in \p from and \p to will be + set to NULL, and the number of edges actually returned will be written to \p numEdges. + \p edgeData may alone be NULL, in which case the edges must all have default (zeroed) + edge data. Attempting a lossy query via NULL \p edgeData will result in + ::CUDA_ERROR_LOSSY_QUERY. If \p edgeData is non-NULL then \p from and \p to must be + as well. + + \param hGraph - Graph to get the edges from + \param from - Location to return edge endpoints + \param to - Location to return edge endpoints + \param edgeData - Optional location to return edge data + \param numEdges - See description + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_LOSSY_QUERY, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_VALUE + \note_graph_thread_safety + \notefnerr + + \sa + ::cuGraphGetNodes, + ::cuGraphGetRootNodes, + ::cuGraphAddDependencies, + ::cuGraphRemoveDependencies, + ::cuGraphNodeGetDependencies, + ::cuGraphNodeGetDependentNodes*/ + fn cuGraphGetEdges_v2( + hGraph: cuda_types::CUgraph, + from: *mut cuda_types::CUgraphNode, + to: *mut cuda_types::CUgraphNode, + edgeData: *mut cuda_types::CUgraphEdgeData, + numEdges: *mut usize, + ) -> cuda_types::CUresult; + /** \brief Returns a node's dependencies + + Returns a list of \p node's dependencies. \p dependencies may be NULL, in which case this + function will return the number of dependencies in \p numDependencies. Otherwise, + \p numDependencies entries will be filled in. If \p numDependencies is higher than the actual + number of dependencies, the remaining entries in \p dependencies will be set to NULL, and the + number of nodes actually obtained will be returned in \p numDependencies. + + \param hNode - Node to query + \param dependencies - Pointer to return the dependencies + \param numDependencies - See description + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_VALUE + \note_graph_thread_safety + \notefnerr + + \sa + ::cuGraphNodeGetDependentNodes, + ::cuGraphGetNodes, + ::cuGraphGetRootNodes, + ::cuGraphGetEdges, + ::cuGraphAddDependencies, + ::cuGraphRemoveDependencies*/ + fn cuGraphNodeGetDependencies( + hNode: cuda_types::CUgraphNode, + dependencies: *mut cuda_types::CUgraphNode, + numDependencies: *mut usize, + ) -> cuda_types::CUresult; + /** \brief Returns a node's dependencies (12.3+) + + Returns a list of \p node's dependencies. \p dependencies may be NULL, in which case this + function will return the number of dependencies in \p numDependencies. Otherwise, + \p numDependencies entries will be filled in. If \p numDependencies is higher than the actual + number of dependencies, the remaining entries in \p dependencies will be set to NULL, and the + number of nodes actually obtained will be returned in \p numDependencies. + + Note that if an edge has non-zero (non-default) edge data and \p edgeData is NULL, + this API will return ::CUDA_ERROR_LOSSY_QUERY. If \p edgeData is non-NULL, then + \p dependencies must be as well. + + \param hNode - Node to query + \param dependencies - Pointer to return the dependencies + \param edgeData - Optional array to return edge data for each dependency + \param numDependencies - See description + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_LOSSY_QUERY, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_VALUE + \note_graph_thread_safety + \notefnerr + + \sa + ::cuGraphNodeGetDependentNodes, + ::cuGraphGetNodes, + ::cuGraphGetRootNodes, + ::cuGraphGetEdges, + ::cuGraphAddDependencies, + ::cuGraphRemoveDependencies*/ + fn cuGraphNodeGetDependencies_v2( + hNode: cuda_types::CUgraphNode, + dependencies: *mut cuda_types::CUgraphNode, + edgeData: *mut cuda_types::CUgraphEdgeData, + numDependencies: *mut usize, + ) -> cuda_types::CUresult; + /** \brief Returns a node's dependent nodes + + Returns a list of \p node's dependent nodes. \p dependentNodes may be NULL, in which + case this function will return the number of dependent nodes in \p numDependentNodes. + Otherwise, \p numDependentNodes entries will be filled in. If \p numDependentNodes is + higher than the actual number of dependent nodes, the remaining entries in + \p dependentNodes will be set to NULL, and the number of nodes actually obtained will + be returned in \p numDependentNodes. + + \param hNode - Node to query + \param dependentNodes - Pointer to return the dependent nodes + \param numDependentNodes - See description + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_VALUE + \note_graph_thread_safety + \notefnerr + + \sa + ::cuGraphNodeGetDependencies, + ::cuGraphGetNodes, + ::cuGraphGetRootNodes, + ::cuGraphGetEdges, + ::cuGraphAddDependencies, + ::cuGraphRemoveDependencies*/ + fn cuGraphNodeGetDependentNodes( + hNode: cuda_types::CUgraphNode, + dependentNodes: *mut cuda_types::CUgraphNode, + numDependentNodes: *mut usize, + ) -> cuda_types::CUresult; + /** \brief Returns a node's dependent nodes (12.3+) + + Returns a list of \p node's dependent nodes. \p dependentNodes may be NULL, in which + case this function will return the number of dependent nodes in \p numDependentNodes. + Otherwise, \p numDependentNodes entries will be filled in. If \p numDependentNodes is + higher than the actual number of dependent nodes, the remaining entries in + \p dependentNodes will be set to NULL, and the number of nodes actually obtained will + be returned in \p numDependentNodes. + + Note that if an edge has non-zero (non-default) edge data and \p edgeData is NULL, + this API will return ::CUDA_ERROR_LOSSY_QUERY. If \p edgeData is non-NULL, then + \p dependentNodes must be as well. + + \param hNode - Node to query + \param dependentNodes - Pointer to return the dependent nodes + \param edgeData - Optional pointer to return edge data for dependent nodes + \param numDependentNodes - See description + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_LOSSY_QUERY, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_VALUE + \note_graph_thread_safety + \notefnerr + + \sa + ::cuGraphNodeGetDependencies, + ::cuGraphGetNodes, + ::cuGraphGetRootNodes, + ::cuGraphGetEdges, + ::cuGraphAddDependencies, + ::cuGraphRemoveDependencies*/ + fn cuGraphNodeGetDependentNodes_v2( + hNode: cuda_types::CUgraphNode, + dependentNodes: *mut cuda_types::CUgraphNode, + edgeData: *mut cuda_types::CUgraphEdgeData, + numDependentNodes: *mut usize, + ) -> cuda_types::CUresult; + /** \brief Adds dependency edges to a graph + + The number of dependencies to be added is defined by \p numDependencies + Elements in \p from and \p to at corresponding indices define a dependency. + Each node in \p from and \p to must belong to \p hGraph. + + If \p numDependencies is 0, elements in \p from and \p to will be ignored. + Specifying an existing dependency will return an error. + + \param hGraph - Graph to which dependencies are added + \param from - Array of nodes that provide the dependencies + \param to - Array of dependent nodes + \param numDependencies - Number of dependencies to be added + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE + \note_graph_thread_safety + \notefnerr + + \sa + ::cuGraphRemoveDependencies, + ::cuGraphGetEdges, + ::cuGraphNodeGetDependencies, + ::cuGraphNodeGetDependentNodes*/ + fn cuGraphAddDependencies( + hGraph: cuda_types::CUgraph, + from: *const cuda_types::CUgraphNode, + to: *const cuda_types::CUgraphNode, + numDependencies: usize, + ) -> cuda_types::CUresult; + /** \brief Adds dependency edges to a graph (12.3+) + + The number of dependencies to be added is defined by \p numDependencies + Elements in \p from and \p to at corresponding indices define a dependency. + Each node in \p from and \p to must belong to \p hGraph. + + If \p numDependencies is 0, elements in \p from and \p to will be ignored. + Specifying an existing dependency will return an error. + + \param hGraph - Graph to which dependencies are added + \param from - Array of nodes that provide the dependencies + \param to - Array of dependent nodes + \param edgeData - Optional array of edge data. If NULL, default (zeroed) edge data is assumed. + \param numDependencies - Number of dependencies to be added + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE + \note_graph_thread_safety + \notefnerr + + \sa + ::cuGraphRemoveDependencies, + ::cuGraphGetEdges, + ::cuGraphNodeGetDependencies, + ::cuGraphNodeGetDependentNodes*/ + fn cuGraphAddDependencies_v2( + hGraph: cuda_types::CUgraph, + from: *const cuda_types::CUgraphNode, + to: *const cuda_types::CUgraphNode, + edgeData: *const cuda_types::CUgraphEdgeData, + numDependencies: usize, + ) -> cuda_types::CUresult; + /** \brief Removes dependency edges from a graph + + The number of \p dependencies to be removed is defined by \p numDependencies. + Elements in \p from and \p to at corresponding indices define a dependency. + Each node in \p from and \p to must belong to \p hGraph. + + If \p numDependencies is 0, elements in \p from and \p to will be ignored. + Specifying a non-existing dependency will return an error. + + Dependencies cannot be removed from graphs which contain allocation or free nodes. + Any attempt to do so will return an error. + + \param hGraph - Graph from which to remove dependencies + \param from - Array of nodes that provide the dependencies + \param to - Array of dependent nodes + \param numDependencies - Number of dependencies to be removed + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE + \note_graph_thread_safety + \notefnerr + + \sa + ::cuGraphAddDependencies, + ::cuGraphGetEdges, + ::cuGraphNodeGetDependencies, + ::cuGraphNodeGetDependentNodes*/ + fn cuGraphRemoveDependencies( + hGraph: cuda_types::CUgraph, + from: *const cuda_types::CUgraphNode, + to: *const cuda_types::CUgraphNode, + numDependencies: usize, + ) -> cuda_types::CUresult; + /** \brief Removes dependency edges from a graph (12.3+) + + The number of \p dependencies to be removed is defined by \p numDependencies. + Elements in \p from and \p to at corresponding indices define a dependency. + Each node in \p from and \p to must belong to \p hGraph. + + If \p numDependencies is 0, elements in \p from and \p to will be ignored. + Specifying an edge that does not exist in the graph, with data matching + \p edgeData, results in an error. \p edgeData is nullable, which is equivalent + to passing default (zeroed) data for each edge. + + Dependencies cannot be removed from graphs which contain allocation or free nodes. + Any attempt to do so will return an error. + + \param hGraph - Graph from which to remove dependencies + \param from - Array of nodes that provide the dependencies + \param to - Array of dependent nodes + \param edgeData - Optional array of edge data. If NULL, edge data is assumed to + be default (zeroed). + \param numDependencies - Number of dependencies to be removed + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE + \note_graph_thread_safety + \notefnerr + + \sa + ::cuGraphAddDependencies, + ::cuGraphGetEdges, + ::cuGraphNodeGetDependencies, + ::cuGraphNodeGetDependentNodes*/ + fn cuGraphRemoveDependencies_v2( + hGraph: cuda_types::CUgraph, + from: *const cuda_types::CUgraphNode, + to: *const cuda_types::CUgraphNode, + edgeData: *const cuda_types::CUgraphEdgeData, + numDependencies: usize, + ) -> cuda_types::CUresult; + /** \brief Remove a node from the graph + + Removes \p hNode from its graph. This operation also severs any dependencies of other nodes + on \p hNode and vice versa. + + Nodes which belong to a graph which contains allocation or free nodes cannot be destroyed. + Any attempt to do so will return an error. + + \param hNode - Node to remove + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE + \note_graph_thread_safety + \notefnerr + + \sa + ::cuGraphAddChildGraphNode, + ::cuGraphAddEmptyNode, + ::cuGraphAddKernelNode, + ::cuGraphAddHostNode, + ::cuGraphAddMemcpyNode, + ::cuGraphAddMemsetNode*/ + fn cuGraphDestroyNode(hNode: cuda_types::CUgraphNode) -> cuda_types::CUresult; + /** \brief Creates an executable graph from a graph + + Instantiates \p hGraph as an executable graph. The graph is validated for any + structural constraints or intra-node constraints which were not previously + validated. If instantiation is successful, a handle to the instantiated graph + is returned in \p phGraphExec. + + The \p flags parameter controls the behavior of instantiation and subsequent + graph launches. Valid flags are: + + - ::CUDA_GRAPH_INSTANTIATE_FLAG_AUTO_FREE_ON_LAUNCH, which configures a + graph containing memory allocation nodes to automatically free any + unfreed memory allocations before the graph is relaunched. + + - ::CUDA_GRAPH_INSTANTIATE_FLAG_DEVICE_LAUNCH, which configures the graph for launch + from the device. If this flag is passed, the executable graph handle returned can be + used to launch the graph from both the host and device. This flag can only be used + on platforms which support unified addressing. This flag cannot be used in + conjunction with ::CUDA_GRAPH_INSTANTIATE_FLAG_AUTO_FREE_ON_LAUNCH. + + - ::CUDA_GRAPH_INSTANTIATE_FLAG_USE_NODE_PRIORITY, which causes the graph + to use the priorities from the per-node attributes rather than the priority + of the launch stream during execution. Note that priorities are only available + on kernel nodes, and are copied from stream priority during stream capture. + + If \p hGraph contains any allocation or free nodes, there can be at most one + executable graph in existence for that graph at a time. An attempt to instantiate + a second executable graph before destroying the first with ::cuGraphExecDestroy + will result in an error. + The same also applies if \p hGraph contains any device-updatable kernel nodes. + + If \p hGraph contains kernels which call device-side cudaGraphLaunch() from multiple + contexts, this will result in an error. + + Graphs instantiated for launch on the device have additional restrictions which do not + apply to host graphs: + + - The graph's nodes must reside on a single context. + - The graph can only contain kernel nodes, memcpy nodes, memset nodes, and child graph nodes. + - The graph cannot be empty and must contain at least one kernel, memcpy, or memset node. + Operation-specific restrictions are outlined below. + - Kernel nodes: + - Use of CUDA Dynamic Parallelism is not permitted. + - Cooperative launches are permitted as long as MPS is not in use. + - Memcpy nodes: + - Only copies involving device memory and/or pinned device-mapped host memory are permitted. + - Copies involving CUDA arrays are not permitted. + - Both operands must be accessible from the current context, and the current context must + match the context of other nodes in the graph. + + \param phGraphExec - Returns instantiated graph + \param hGraph - Graph to instantiate + \param flags - Flags to control instantiation. See ::CUgraphInstantiate_flags. + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_VALUE + \note_graph_thread_safety + \notefnerr + + \sa + ::cuGraphInstantiate, + ::cuGraphCreate, + ::cuGraphUpload, + ::cuGraphLaunch, + ::cuGraphExecDestroy*/ + fn cuGraphInstantiateWithFlags( + phGraphExec: *mut cuda_types::CUgraphExec, + hGraph: cuda_types::CUgraph, + flags: ::core::ffi::c_ulonglong, + ) -> cuda_types::CUresult; + /** \brief Creates an executable graph from a graph + + Instantiates \p hGraph as an executable graph according to the \p instantiateParams structure. + The graph is validated for any structural constraints or intra-node constraints + which were not previously validated. If instantiation is successful, a handle to + the instantiated graph is returned in \p phGraphExec. + + \p instantiateParams controls the behavior of instantiation and subsequent + graph launches, as well as returning more detailed information in the event of an error. + ::CUDA_GRAPH_INSTANTIATE_PARAMS is defined as: + + \code +typedef struct { +cuuint64_t flags; +CUstream hUploadStream; +CUgraphNode hErrNode_out; +CUgraphInstantiateResult result_out; +} CUDA_GRAPH_INSTANTIATE_PARAMS; + \endcode + + The \p flags field controls the behavior of instantiation and subsequent + graph launches. Valid flags are: + + - ::CUDA_GRAPH_INSTANTIATE_FLAG_AUTO_FREE_ON_LAUNCH, which configures a + graph containing memory allocation nodes to automatically free any + unfreed memory allocations before the graph is relaunched. + + - ::CUDA_GRAPH_INSTANTIATE_FLAG_UPLOAD, which will perform an upload of the graph + into \p hUploadStream once the graph has been instantiated. + + - ::CUDA_GRAPH_INSTANTIATE_FLAG_DEVICE_LAUNCH, which configures the graph for launch + from the device. If this flag is passed, the executable graph handle returned can be + used to launch the graph from both the host and device. This flag can only be used + on platforms which support unified addressing. This flag cannot be used in + conjunction with ::CUDA_GRAPH_INSTANTIATE_FLAG_AUTO_FREE_ON_LAUNCH. + + - ::CUDA_GRAPH_INSTANTIATE_FLAG_USE_NODE_PRIORITY, which causes the graph + to use the priorities from the per-node attributes rather than the priority + of the launch stream during execution. Note that priorities are only available + on kernel nodes, and are copied from stream priority during stream capture. + + If \p hGraph contains any allocation or free nodes, there can be at most one + executable graph in existence for that graph at a time. An attempt to instantiate a + second executable graph before destroying the first with ::cuGraphExecDestroy will + result in an error. + The same also applies if \p hGraph contains any device-updatable kernel nodes. + + If \p hGraph contains kernels which call device-side cudaGraphLaunch() from multiple + contexts, this will result in an error. + + Graphs instantiated for launch on the device have additional restrictions which do not + apply to host graphs: + + - The graph's nodes must reside on a single context. + - The graph can only contain kernel nodes, memcpy nodes, memset nodes, and child graph nodes. + - The graph cannot be empty and must contain at least one kernel, memcpy, or memset node. + Operation-specific restrictions are outlined below. + - Kernel nodes: + - Use of CUDA Dynamic Parallelism is not permitted. + - Cooperative launches are permitted as long as MPS is not in use. + - Memcpy nodes: + - Only copies involving device memory and/or pinned device-mapped host memory are permitted. + - Copies involving CUDA arrays are not permitted. + - Both operands must be accessible from the current context, and the current context must + match the context of other nodes in the graph. + + In the event of an error, the \p result_out and \p hErrNode_out fields will contain more + information about the nature of the error. Possible error reporting includes: + + - ::CUDA_GRAPH_INSTANTIATE_ERROR, if passed an invalid value or if an unexpected error occurred + which is described by the return value of the function. \p hErrNode_out will be set to NULL. + - ::CUDA_GRAPH_INSTANTIATE_INVALID_STRUCTURE, if the graph structure is invalid. \p hErrNode_out + will be set to one of the offending nodes. + - ::CUDA_GRAPH_INSTANTIATE_NODE_OPERATION_NOT_SUPPORTED, if the graph is instantiated for device + launch but contains a node of an unsupported node type, or a node which performs unsupported + operations, such as use of CUDA dynamic parallelism within a kernel node. \p hErrNode_out will + be set to this node. + - ::CUDA_GRAPH_INSTANTIATE_MULTIPLE_CTXS_NOT_SUPPORTED, if the graph is instantiated for device + launch but a node’s context differs from that of another node. This error can also be returned + if a graph is not instantiated for device launch and it contains kernels which call device-side + cudaGraphLaunch() from multiple contexts. \p hErrNode_out will be set to this node. + + If instantiation is successful, \p result_out will be set to ::CUDA_GRAPH_INSTANTIATE_SUCCESS, + and \p hErrNode_out will be set to NULL. + + \param phGraphExec - Returns instantiated graph + \param hGraph - Graph to instantiate + \param instantiateParams - Instantiation parameters + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE, + \note_graph_thread_safety + \notefnerr + + \sa + ::cuGraphCreate, + ::cuGraphInstantiate, + ::cuGraphExecDestroy*/ + fn cuGraphInstantiateWithParams_ptsz( + phGraphExec: *mut cuda_types::CUgraphExec, + hGraph: cuda_types::CUgraph, + instantiateParams: *mut cuda_types::CUDA_GRAPH_INSTANTIATE_PARAMS, + ) -> cuda_types::CUresult; + /** \brief Query the instantiation flags of an executable graph + + Returns the flags that were passed to instantiation for the given executable graph. + ::CUDA_GRAPH_INSTANTIATE_FLAG_UPLOAD will not be returned by this API as it does + not affect the resulting executable graph. + + \param hGraphExec - The executable graph to query + \param flags - Returns the instantiation flags + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE, + \note_graph_thread_safety + \notefnerr + + \sa + ::cuGraphInstantiate, + ::cuGraphInstantiateWithParams*/ + fn cuGraphExecGetFlags( + hGraphExec: cuda_types::CUgraphExec, + flags: *mut cuda_types::cuuint64_t, + ) -> cuda_types::CUresult; + /** \brief Sets the parameters for a kernel node in the given graphExec + + Sets the parameters of a kernel node in an executable graph \p hGraphExec. + The node is identified by the corresponding node \p hNode in the + non-executable graph, from which the executable graph was instantiated. + + \p hNode must not have been removed from the original graph. All \p nodeParams + fields may change, but the following restrictions apply to \p func updates: + + - The owning context of the function cannot change. + - A node whose function originally did not use CUDA dynamic parallelism cannot be updated + to a function which uses CDP + - A node whose function originally did not make device-side update calls cannot be updated + to a function which makes device-side update calls. + - If \p hGraphExec was not instantiated for device launch, a node whose function originally + did not use device-side cudaGraphLaunch() cannot be updated to a function which uses + device-side cudaGraphLaunch() unless the node resides on the same context as nodes which + contained such calls at instantiate-time. If no such calls were present at instantiation, + these updates cannot be performed at all. + + The modifications only affect future launches of \p hGraphExec. Already + enqueued or running launches of \p hGraphExec are not affected by this call. + \p hNode is also not modified by this call. + + If \p hNode is a device-updatable kernel node, the next upload/launch of \p hGraphExec + will overwrite any previous device-side updates. Additionally, applying host updates to a + device-updatable kernel node while it is being updated from the device will result in + undefined behavior. + + \param hGraphExec - The executable graph in which to set the specified node + \param hNode - kernel node from the graph from which graphExec was instantiated + \param nodeParams - Updated Parameters to set + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE, + \note_graph_thread_safety + \notefnerr + + \sa + ::cuGraphExecNodeSetParams, + ::cuGraphAddKernelNode, + ::cuGraphKernelNodeSetParams, + ::cuGraphExecMemcpyNodeSetParams, + ::cuGraphExecMemsetNodeSetParams, + ::cuGraphExecHostNodeSetParams, + ::cuGraphExecChildGraphNodeSetParams, + ::cuGraphExecEventRecordNodeSetEvent, + ::cuGraphExecEventWaitNodeSetEvent, + ::cuGraphExecExternalSemaphoresSignalNodeSetParams, + ::cuGraphExecExternalSemaphoresWaitNodeSetParams, + ::cuGraphExecUpdate, + ::cuGraphInstantiate*/ + fn cuGraphExecKernelNodeSetParams_v2( + hGraphExec: cuda_types::CUgraphExec, + hNode: cuda_types::CUgraphNode, + nodeParams: *const cuda_types::CUDA_KERNEL_NODE_PARAMS, + ) -> cuda_types::CUresult; + /** \brief Sets the parameters for a memcpy node in the given graphExec. + + Updates the work represented by \p hNode in \p hGraphExec as though \p hNode had + contained \p copyParams at instantiation. hNode must remain in the graph which was + used to instantiate \p hGraphExec. Changed edges to and from hNode are ignored. + + The source and destination memory in \p copyParams must be allocated from the same + contexts as the original source and destination memory. Both the instantiation-time + memory operands and the memory operands in \p copyParams must be 1-dimensional. + Zero-length operations are not supported. + + The modifications only affect future launches of \p hGraphExec. Already enqueued + or running launches of \p hGraphExec are not affected by this call. hNode is also + not modified by this call. + + Returns CUDA_ERROR_INVALID_VALUE if the memory operands' mappings changed or + either the original or new memory operands are multidimensional. + + \param hGraphExec - The executable graph in which to set the specified node + \param hNode - Memcpy node from the graph which was used to instantiate graphExec + \param copyParams - The updated parameters to set + \param ctx - Context on which to run the node + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE, + \note_graph_thread_safety + \notefnerr + + \sa + ::cuGraphExecNodeSetParams, + ::cuGraphAddMemcpyNode, + ::cuGraphMemcpyNodeSetParams, + ::cuGraphExecKernelNodeSetParams, + ::cuGraphExecMemsetNodeSetParams, + ::cuGraphExecHostNodeSetParams, + ::cuGraphExecChildGraphNodeSetParams, + ::cuGraphExecEventRecordNodeSetEvent, + ::cuGraphExecEventWaitNodeSetEvent, + ::cuGraphExecExternalSemaphoresSignalNodeSetParams, + ::cuGraphExecExternalSemaphoresWaitNodeSetParams, + ::cuGraphExecUpdate, + ::cuGraphInstantiate*/ + fn cuGraphExecMemcpyNodeSetParams( + hGraphExec: cuda_types::CUgraphExec, + hNode: cuda_types::CUgraphNode, + copyParams: *const cuda_types::CUDA_MEMCPY3D, + ctx: cuda_types::CUcontext, + ) -> cuda_types::CUresult; + /** \brief Sets the parameters for a memset node in the given graphExec. + + Updates the work represented by \p hNode in \p hGraphExec as though \p hNode had + contained \p memsetParams at instantiation. hNode must remain in the graph which was + used to instantiate \p hGraphExec. Changed edges to and from hNode are ignored. + + The destination memory in \p memsetParams must be allocated from the same + contexts as the original destination memory. Both the instantiation-time + memory operand and the memory operand in \p memsetParams must be 1-dimensional. + Zero-length operations are not supported. + + The modifications only affect future launches of \p hGraphExec. Already enqueued + or running launches of \p hGraphExec are not affected by this call. hNode is also + not modified by this call. + + Returns CUDA_ERROR_INVALID_VALUE if the memory operand's mappings changed or + either the original or new memory operand are multidimensional. + + \param hGraphExec - The executable graph in which to set the specified node + \param hNode - Memset node from the graph which was used to instantiate graphExec + \param memsetParams - The updated parameters to set + \param ctx - Context on which to run the node + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE, + \note_graph_thread_safety + \notefnerr + + \sa + ::cuGraphExecNodeSetParams, + ::cuGraphAddMemsetNode, + ::cuGraphMemsetNodeSetParams, + ::cuGraphExecKernelNodeSetParams, + ::cuGraphExecMemcpyNodeSetParams, + ::cuGraphExecHostNodeSetParams, + ::cuGraphExecChildGraphNodeSetParams, + ::cuGraphExecEventRecordNodeSetEvent, + ::cuGraphExecEventWaitNodeSetEvent, + ::cuGraphExecExternalSemaphoresSignalNodeSetParams, + ::cuGraphExecExternalSemaphoresWaitNodeSetParams, + ::cuGraphExecUpdate, + ::cuGraphInstantiate*/ + fn cuGraphExecMemsetNodeSetParams( + hGraphExec: cuda_types::CUgraphExec, + hNode: cuda_types::CUgraphNode, + memsetParams: *const cuda_types::CUDA_MEMSET_NODE_PARAMS, + ctx: cuda_types::CUcontext, + ) -> cuda_types::CUresult; + /** \brief Sets the parameters for a host node in the given graphExec. + + Updates the work represented by \p hNode in \p hGraphExec as though \p hNode had + contained \p nodeParams at instantiation. hNode must remain in the graph which was + used to instantiate \p hGraphExec. Changed edges to and from hNode are ignored. + + The modifications only affect future launches of \p hGraphExec. Already enqueued + or running launches of \p hGraphExec are not affected by this call. hNode is also + not modified by this call. + + \param hGraphExec - The executable graph in which to set the specified node + \param hNode - Host node from the graph which was used to instantiate graphExec + \param nodeParams - The updated parameters to set + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE, + \note_graph_thread_safety + \notefnerr + + \sa + ::cuGraphExecNodeSetParams, + ::cuGraphAddHostNode, + ::cuGraphHostNodeSetParams, + ::cuGraphExecKernelNodeSetParams, + ::cuGraphExecMemcpyNodeSetParams, + ::cuGraphExecMemsetNodeSetParams, + ::cuGraphExecChildGraphNodeSetParams, + ::cuGraphExecEventRecordNodeSetEvent, + ::cuGraphExecEventWaitNodeSetEvent, + ::cuGraphExecExternalSemaphoresSignalNodeSetParams, + ::cuGraphExecExternalSemaphoresWaitNodeSetParams, + ::cuGraphExecUpdate, + ::cuGraphInstantiate*/ + fn cuGraphExecHostNodeSetParams( + hGraphExec: cuda_types::CUgraphExec, + hNode: cuda_types::CUgraphNode, + nodeParams: *const cuda_types::CUDA_HOST_NODE_PARAMS, + ) -> cuda_types::CUresult; + /** \brief Updates node parameters in the child graph node in the given graphExec. + + Updates the work represented by \p hNode in \p hGraphExec as though the nodes contained + in \p hNode's graph had the parameters contained in \p childGraph's nodes at instantiation. + \p hNode must remain in the graph which was used to instantiate \p hGraphExec. + Changed edges to and from \p hNode are ignored. + + The modifications only affect future launches of \p hGraphExec. Already enqueued + or running launches of \p hGraphExec are not affected by this call. \p hNode is also + not modified by this call. + + The topology of \p childGraph, as well as the node insertion order, must match that + of the graph contained in \p hNode. See ::cuGraphExecUpdate() for a list of restrictions + on what can be updated in an instantiated graph. The update is recursive, so child graph + nodes contained within the top level child graph will also be updated. + + \param hGraphExec - The executable graph in which to set the specified node + \param hNode - Host node from the graph which was used to instantiate graphExec + \param childGraph - The graph supplying the updated parameters + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE, + \note_graph_thread_safety + \notefnerr + + \sa + ::cuGraphExecNodeSetParams, + ::cuGraphAddChildGraphNode, + ::cuGraphChildGraphNodeGetGraph, + ::cuGraphExecKernelNodeSetParams, + ::cuGraphExecMemcpyNodeSetParams, + ::cuGraphExecMemsetNodeSetParams, + ::cuGraphExecHostNodeSetParams, + ::cuGraphExecEventRecordNodeSetEvent, + ::cuGraphExecEventWaitNodeSetEvent, + ::cuGraphExecExternalSemaphoresSignalNodeSetParams, + ::cuGraphExecExternalSemaphoresWaitNodeSetParams, + ::cuGraphExecUpdate, + ::cuGraphInstantiate*/ + fn cuGraphExecChildGraphNodeSetParams( + hGraphExec: cuda_types::CUgraphExec, + hNode: cuda_types::CUgraphNode, + childGraph: cuda_types::CUgraph, + ) -> cuda_types::CUresult; + /** \brief Sets the event for an event record node in the given graphExec + + Sets the event of an event record node in an executable graph \p hGraphExec. + The node is identified by the corresponding node \p hNode in the + non-executable graph, from which the executable graph was instantiated. + + The modifications only affect future launches of \p hGraphExec. Already + enqueued or running launches of \p hGraphExec are not affected by this call. + \p hNode is also not modified by this call. + + \param hGraphExec - The executable graph in which to set the specified node + \param hNode - event record node from the graph from which graphExec was instantiated + \param event - Updated event to use + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE, + \note_graph_thread_safety + \notefnerr + + \sa + ::cuGraphExecNodeSetParams, + ::cuGraphAddEventRecordNode, + ::cuGraphEventRecordNodeGetEvent, + ::cuGraphEventWaitNodeSetEvent, + ::cuEventRecordWithFlags, + ::cuStreamWaitEvent, + ::cuGraphExecKernelNodeSetParams, + ::cuGraphExecMemcpyNodeSetParams, + ::cuGraphExecMemsetNodeSetParams, + ::cuGraphExecHostNodeSetParams, + ::cuGraphExecChildGraphNodeSetParams, + ::cuGraphExecEventWaitNodeSetEvent, + ::cuGraphExecExternalSemaphoresSignalNodeSetParams, + ::cuGraphExecExternalSemaphoresWaitNodeSetParams, + ::cuGraphExecUpdate, + ::cuGraphInstantiate*/ + fn cuGraphExecEventRecordNodeSetEvent( + hGraphExec: cuda_types::CUgraphExec, + hNode: cuda_types::CUgraphNode, + event: cuda_types::CUevent, + ) -> cuda_types::CUresult; + /** \brief Sets the event for an event wait node in the given graphExec + + Sets the event of an event wait node in an executable graph \p hGraphExec. + The node is identified by the corresponding node \p hNode in the + non-executable graph, from which the executable graph was instantiated. + + The modifications only affect future launches of \p hGraphExec. Already + enqueued or running launches of \p hGraphExec are not affected by this call. + \p hNode is also not modified by this call. + + \param hGraphExec - The executable graph in which to set the specified node + \param hNode - event wait node from the graph from which graphExec was instantiated + \param event - Updated event to use + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE, + \note_graph_thread_safety + \notefnerr + + \sa + ::cuGraphExecNodeSetParams, + ::cuGraphAddEventWaitNode, + ::cuGraphEventWaitNodeGetEvent, + ::cuGraphEventRecordNodeSetEvent, + ::cuEventRecordWithFlags, + ::cuStreamWaitEvent, + ::cuGraphExecKernelNodeSetParams, + ::cuGraphExecMemcpyNodeSetParams, + ::cuGraphExecMemsetNodeSetParams, + ::cuGraphExecHostNodeSetParams, + ::cuGraphExecChildGraphNodeSetParams, + ::cuGraphExecEventRecordNodeSetEvent, + ::cuGraphExecExternalSemaphoresSignalNodeSetParams, + ::cuGraphExecExternalSemaphoresWaitNodeSetParams, + ::cuGraphExecUpdate, + ::cuGraphInstantiate*/ + fn cuGraphExecEventWaitNodeSetEvent( + hGraphExec: cuda_types::CUgraphExec, + hNode: cuda_types::CUgraphNode, + event: cuda_types::CUevent, + ) -> cuda_types::CUresult; + /** \brief Sets the parameters for an external semaphore signal node in the given graphExec + + Sets the parameters of an external semaphore signal node in an executable graph \p hGraphExec. + The node is identified by the corresponding node \p hNode in the + non-executable graph, from which the executable graph was instantiated. + + \p hNode must not have been removed from the original graph. + + The modifications only affect future launches of \p hGraphExec. Already + enqueued or running launches of \p hGraphExec are not affected by this call. + \p hNode is also not modified by this call. + + Changing \p nodeParams->numExtSems is not supported. + + \param hGraphExec - The executable graph in which to set the specified node + \param hNode - semaphore signal node from the graph from which graphExec was instantiated + \param nodeParams - Updated Parameters to set + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE, + \note_graph_thread_safety + \notefnerr + + \sa + ::cuGraphExecNodeSetParams, + ::cuGraphAddExternalSemaphoresSignalNode, + ::cuImportExternalSemaphore, + ::cuSignalExternalSemaphoresAsync, + ::cuWaitExternalSemaphoresAsync, + ::cuGraphExecKernelNodeSetParams, + ::cuGraphExecMemcpyNodeSetParams, + ::cuGraphExecMemsetNodeSetParams, + ::cuGraphExecHostNodeSetParams, + ::cuGraphExecChildGraphNodeSetParams, + ::cuGraphExecEventRecordNodeSetEvent, + ::cuGraphExecEventWaitNodeSetEvent, + ::cuGraphExecExternalSemaphoresWaitNodeSetParams, + ::cuGraphExecUpdate, + ::cuGraphInstantiate*/ + fn cuGraphExecExternalSemaphoresSignalNodeSetParams( + hGraphExec: cuda_types::CUgraphExec, + hNode: cuda_types::CUgraphNode, + nodeParams: *const cuda_types::CUDA_EXT_SEM_SIGNAL_NODE_PARAMS, + ) -> cuda_types::CUresult; + /** \brief Sets the parameters for an external semaphore wait node in the given graphExec + + Sets the parameters of an external semaphore wait node in an executable graph \p hGraphExec. + The node is identified by the corresponding node \p hNode in the + non-executable graph, from which the executable graph was instantiated. + + \p hNode must not have been removed from the original graph. + + The modifications only affect future launches of \p hGraphExec. Already + enqueued or running launches of \p hGraphExec are not affected by this call. + \p hNode is also not modified by this call. + + Changing \p nodeParams->numExtSems is not supported. + + \param hGraphExec - The executable graph in which to set the specified node + \param hNode - semaphore wait node from the graph from which graphExec was instantiated + \param nodeParams - Updated Parameters to set + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE, + \note_graph_thread_safety + \notefnerr + + \sa + ::cuGraphExecNodeSetParams, + ::cuGraphAddExternalSemaphoresWaitNode, + ::cuImportExternalSemaphore, + ::cuSignalExternalSemaphoresAsync, + ::cuWaitExternalSemaphoresAsync, + ::cuGraphExecKernelNodeSetParams, + ::cuGraphExecMemcpyNodeSetParams, + ::cuGraphExecMemsetNodeSetParams, + ::cuGraphExecHostNodeSetParams, + ::cuGraphExecChildGraphNodeSetParams, + ::cuGraphExecEventRecordNodeSetEvent, + ::cuGraphExecEventWaitNodeSetEvent, + ::cuGraphExecExternalSemaphoresSignalNodeSetParams, + ::cuGraphExecUpdate, + ::cuGraphInstantiate*/ + fn cuGraphExecExternalSemaphoresWaitNodeSetParams( + hGraphExec: cuda_types::CUgraphExec, + hNode: cuda_types::CUgraphNode, + nodeParams: *const cuda_types::CUDA_EXT_SEM_WAIT_NODE_PARAMS, + ) -> cuda_types::CUresult; + /** \brief Enables or disables the specified node in the given graphExec + + Sets \p hNode to be either enabled or disabled. Disabled nodes are functionally equivalent + to empty nodes until they are reenabled. Existing node parameters are not affected by + disabling/enabling the node. + + The node is identified by the corresponding node \p hNode in the non-executable + graph, from which the executable graph was instantiated. + + \p hNode must not have been removed from the original graph. + + The modifications only affect future launches of \p hGraphExec. Already + enqueued or running launches of \p hGraphExec are not affected by this call. + \p hNode is also not modified by this call. + + If \p hNode is a device-updatable kernel node, the next upload/launch of \p hGraphExec + will overwrite any previous device-side updates. Additionally, applying host updates to a + device-updatable kernel node while it is being updated from the device will result in + undefined behavior. + + \note Currently only kernel, memset and memcpy nodes are supported. + + \param hGraphExec - The executable graph in which to set the specified node + \param hNode - Node from the graph from which graphExec was instantiated + \param isEnabled - Node is enabled if != 0, otherwise the node is disabled + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE, + \note_graph_thread_safety + \notefnerr + + \sa + ::cuGraphNodeGetEnabled, + ::cuGraphExecUpdate, + ::cuGraphInstantiate + ::cuGraphLaunch*/ + fn cuGraphNodeSetEnabled( + hGraphExec: cuda_types::CUgraphExec, + hNode: cuda_types::CUgraphNode, + isEnabled: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + /** \brief Query whether a node in the given graphExec is enabled + + Sets isEnabled to 1 if \p hNode is enabled, or 0 if \p hNode is disabled. + + The node is identified by the corresponding node \p hNode in the non-executable + graph, from which the executable graph was instantiated. + + \p hNode must not have been removed from the original graph. + + \note Currently only kernel, memset and memcpy nodes are supported. + \note This function will not reflect device-side updates for device-updatable kernel nodes. + + \param hGraphExec - The executable graph in which to set the specified node + \param hNode - Node from the graph from which graphExec was instantiated + \param isEnabled - Location to return the enabled status of the node + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE, + \note_graph_thread_safety + \notefnerr + + \sa + ::cuGraphNodeSetEnabled, + ::cuGraphExecUpdate, + ::cuGraphInstantiate + ::cuGraphLaunch*/ + fn cuGraphNodeGetEnabled( + hGraphExec: cuda_types::CUgraphExec, + hNode: cuda_types::CUgraphNode, + isEnabled: *mut ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + /** \brief Uploads an executable graph in a stream + + Uploads \p hGraphExec to the device in \p hStream without executing it. Uploads of + the same \p hGraphExec will be serialized. Each upload is ordered behind both any + previous work in \p hStream and any previous launches of \p hGraphExec. + Uses memory cached by \p stream to back the allocations owned by \p hGraphExec. + + \param hGraphExec - Executable graph to upload + \param hStream - Stream in which to upload the graph + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_VALUE + \note_graph_thread_safety + \notefnerr + + \sa + ::cuGraphInstantiate, + ::cuGraphLaunch, + ::cuGraphExecDestroy*/ + fn cuGraphUpload_ptsz( + hGraphExec: cuda_types::CUgraphExec, + hStream: cuda_types::CUstream, + ) -> cuda_types::CUresult; + /** \brief Launches an executable graph in a stream + + Executes \p hGraphExec in \p hStream. Only one instance of \p hGraphExec may be executing + at a time. Each launch is ordered behind both any previous work in \p hStream + and any previous launches of \p hGraphExec. To execute a graph concurrently, it must be + instantiated multiple times into multiple executable graphs. + + If any allocations created by \p hGraphExec remain unfreed (from a previous launch) and + \p hGraphExec was not instantiated with ::CUDA_GRAPH_INSTANTIATE_FLAG_AUTO_FREE_ON_LAUNCH, + the launch will fail with ::CUDA_ERROR_INVALID_VALUE. + + \param hGraphExec - Executable graph to launch + \param hStream - Stream in which to launch the graph + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_VALUE + \note_graph_thread_safety + \notefnerr + + \sa + ::cuGraphInstantiate, + ::cuGraphUpload, + ::cuGraphExecDestroy*/ + fn cuGraphLaunch_ptsz( + hGraphExec: cuda_types::CUgraphExec, + hStream: cuda_types::CUstream, + ) -> cuda_types::CUresult; + /** \brief Destroys an executable graph + + Destroys the executable graph specified by \p hGraphExec, as well + as all of its executable nodes. If the executable graph is + in-flight, it will not be terminated, but rather freed + asynchronously on completion. + + \param hGraphExec - Executable graph to destroy + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_VALUE + \note_graph_thread_safety + \notefnerr + + \sa + ::cuGraphInstantiate, + ::cuGraphUpload, + ::cuGraphLaunch*/ + fn cuGraphExecDestroy(hGraphExec: cuda_types::CUgraphExec) -> cuda_types::CUresult; + /** \brief Destroys a graph + + Destroys the graph specified by \p hGraph, as well as all of its nodes. + + \param hGraph - Graph to destroy + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_VALUE + \note_graph_thread_safety + \notefnerr + + \sa + ::cuGraphCreate*/ + fn cuGraphDestroy(hGraph: cuda_types::CUgraph) -> cuda_types::CUresult; + /** \brief Check whether an executable graph can be updated with a graph and perform the update if possible + + Updates the node parameters in the instantiated graph specified by \p hGraphExec with the + node parameters in a topologically identical graph specified by \p hGraph. + + Limitations: + + - Kernel nodes: + - The owning context of the function cannot change. + - A node whose function originally did not use CUDA dynamic parallelism cannot be updated + to a function which uses CDP. + - A node whose function originally did not make device-side update calls cannot be updated + to a function which makes device-side update calls. + - A cooperative node cannot be updated to a non-cooperative node, and vice-versa. + - If the graph was instantiated with CUDA_GRAPH_INSTANTIATE_FLAG_USE_NODE_PRIORITY, the + priority attribute cannot change. Equality is checked on the originally requested + priority values, before they are clamped to the device's supported range. + - If \p hGraphExec was not instantiated for device launch, a node whose function originally + did not use device-side cudaGraphLaunch() cannot be updated to a function which uses + device-side cudaGraphLaunch() unless the node resides on the same context as nodes which + contained such calls at instantiate-time. If no such calls were present at instantiation, + these updates cannot be performed at all. + - Neither \p hGraph nor \p hGraphExec may contain device-updatable kernel nodes. + - Memset and memcpy nodes: + - The CUDA device(s) to which the operand(s) was allocated/mapped cannot change. + - The source/destination memory must be allocated from the same contexts as the original + source/destination memory. + - Only 1D memsets can be changed. + - Additional memcpy node restrictions: + - Changing either the source or destination memory type(i.e. CU_MEMORYTYPE_DEVICE, + CU_MEMORYTYPE_ARRAY, etc.) is not supported. + - External semaphore wait nodes and record nodes: + - Changing the number of semaphores is not supported. + - Conditional nodes: + - Changing node parameters is not supported. + - Changeing parameters of nodes within the conditional body graph is subject to the rules above. + - Conditional handle flags and default values are updated as part of the graph update. + + Note: The API may add further restrictions in future releases. The return code should always be checked. + + cuGraphExecUpdate sets the result member of \p resultInfo to CU_GRAPH_EXEC_UPDATE_ERROR_TOPOLOGY_CHANGED + under the following conditions: + - The count of nodes directly in \p hGraphExec and \p hGraph differ, in which case resultInfo->errorNode + is set to NULL. + - \p hGraph has more exit nodes than \p hGraph, in which case resultInfo->errorNode is set to one of + the exit nodes in hGraph. + - A node in \p hGraph has a different number of dependencies than the node from \p hGraphExec it is paired with, + in which case resultInfo->errorNode is set to the node from \p hGraph. + - A node in \p hGraph has a dependency that does not match with the corresponding dependency of the paired node + from \p hGraphExec. resultInfo->errorNode will be set to the node from \p hGraph. resultInfo->errorFromNode + will be set to the mismatched dependency. The dependencies are paired based on edge order and a dependency + does not match when the nodes are already paired based on other edges examined in the graph. + + cuGraphExecUpdate sets the result member of \p resultInfo to: + - CU_GRAPH_EXEC_UPDATE_ERROR if passed an invalid value. + - CU_GRAPH_EXEC_UPDATE_ERROR_TOPOLOGY_CHANGED if the graph topology changed + - CU_GRAPH_EXEC_UPDATE_ERROR_NODE_TYPE_CHANGED if the type of a node changed, in which case + \p hErrorNode_out is set to the node from \p hGraph. + - CU_GRAPH_EXEC_UPDATE_ERROR_UNSUPPORTED_FUNCTION_CHANGE if the function changed in an unsupported + way(see note above), in which case \p hErrorNode_out is set to the node from \p hGraph + - CU_GRAPH_EXEC_UPDATE_ERROR_PARAMETERS_CHANGED if any parameters to a node changed in a way + that is not supported, in which case \p hErrorNode_out is set to the node from \p hGraph. + - CU_GRAPH_EXEC_UPDATE_ERROR_ATTRIBUTES_CHANGED if any attributes of a node changed in a way + that is not supported, in which case \p hErrorNode_out is set to the node from \p hGraph. + - CU_GRAPH_EXEC_UPDATE_ERROR_NOT_SUPPORTED if something about a node is unsupported, like + the node's type or configuration, in which case \p hErrorNode_out is set to the node from \p hGraph + + If the update fails for a reason not listed above, the result member of \p resultInfo will be set + to CU_GRAPH_EXEC_UPDATE_ERROR. If the update succeeds, the result member will be set to CU_GRAPH_EXEC_UPDATE_SUCCESS. + + cuGraphExecUpdate returns CUDA_SUCCESS when the updated was performed successfully. It returns + CUDA_ERROR_GRAPH_EXEC_UPDATE_FAILURE if the graph update was not performed because it included + changes which violated constraints specific to instantiated graph update. + + \param hGraphExec The instantiated graph to be updated + \param hGraph The graph containing the updated parameters + \param resultInfo the error info structure + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_GRAPH_EXEC_UPDATE_FAILURE, + \note_graph_thread_safety + \notefnerr + + \sa + ::cuGraphInstantiate*/ + fn cuGraphExecUpdate_v2( + hGraphExec: cuda_types::CUgraphExec, + hGraph: cuda_types::CUgraph, + resultInfo: *mut cuda_types::CUgraphExecUpdateResultInfo, + ) -> cuda_types::CUresult; + /** \brief Copies attributes from source node to destination node. + + Copies attributes from source node \p src to destination node \p dst. + Both node must have the same context. + + \param[out] dst Destination node + \param[in] src Source node + For list of attributes see ::CUkernelNodeAttrID + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE + \notefnerr + + \sa + ::CUaccessPolicyWindow*/ + fn cuGraphKernelNodeCopyAttributes( + dst: cuda_types::CUgraphNode, + src: cuda_types::CUgraphNode, + ) -> cuda_types::CUresult; + /** \brief Queries node attribute. + + Queries attribute \p attr from node \p hNode and stores it in corresponding + member of \p value_out. + + \param[in] hNode + \param[in] attr + \param[out] value_out + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_HANDLE + \notefnerr + + \sa + ::CUaccessPolicyWindow*/ + fn cuGraphKernelNodeGetAttribute( + hNode: cuda_types::CUgraphNode, + attr: cuda_types::CUkernelNodeAttrID, + value_out: *mut cuda_types::CUkernelNodeAttrValue, + ) -> cuda_types::CUresult; + /** \brief Sets node attribute. + + Sets attribute \p attr on node \p hNode from corresponding attribute of + \p value. + + \param[out] hNode + \param[in] attr + \param[out] value + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_HANDLE + \notefnerr + + \sa + ::CUaccessPolicyWindow*/ + fn cuGraphKernelNodeSetAttribute( + hNode: cuda_types::CUgraphNode, + attr: cuda_types::CUkernelNodeAttrID, + value: *const cuda_types::CUkernelNodeAttrValue, + ) -> cuda_types::CUresult; + /** \brief Write a DOT file describing graph structure + + Using the provided \p hGraph, write to \p path a DOT formatted description of the graph. + By default this includes the graph topology, node types, node id, kernel names and memcpy direction. + \p flags can be specified to write more detailed information about each node type such as + parameter values, kernel attributes, node and function handles. + + \param hGraph - The graph to create a DOT file from + \param path - The path to write the DOT file to + \param flags - Flags from CUgraphDebugDot_flags for specifying which additional node information to write + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_OPERATING_SYSTEM*/ + fn cuGraphDebugDotPrint( + hGraph: cuda_types::CUgraph, + path: *const ::core::ffi::c_char, + flags: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + /** \brief Create a user object + + Create a user object with the specified destructor callback and initial reference count. The + initial references are owned by the caller. + + Destructor callbacks cannot make CUDA API calls and should avoid blocking behavior, as they + are executed by a shared internal thread. Another thread may be signaled to perform such + actions, if it does not block forward progress of tasks scheduled through CUDA. + + See CUDA User Objects in the CUDA C++ Programming Guide for more information on user objects. + + \param object_out - Location to return the user object handle + \param ptr - The pointer to pass to the destroy function + \param destroy - Callback to free the user object when it is no longer in use + \param initialRefcount - The initial refcount to create the object with, typically 1. The + initial references are owned by the calling thread. + \param flags - Currently it is required to pass ::CU_USER_OBJECT_NO_DESTRUCTOR_SYNC, + which is the only defined flag. This indicates that the destroy + callback cannot be waited on by any CUDA API. Users requiring + synchronization of the callback should signal its completion + manually. + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE + + \sa + ::cuUserObjectRetain, + ::cuUserObjectRelease, + ::cuGraphRetainUserObject, + ::cuGraphReleaseUserObject, + ::cuGraphCreate*/ + fn cuUserObjectCreate( + object_out: *mut cuda_types::CUuserObject, + ptr: *mut ::core::ffi::c_void, + destroy: cuda_types::CUhostFn, + initialRefcount: ::core::ffi::c_uint, + flags: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + /** \brief Retain a reference to a user object + + Retains new references to a user object. The new references are owned by the caller. + + See CUDA User Objects in the CUDA C++ Programming Guide for more information on user objects. + + \param object - The object to retain + \param count - The number of references to retain, typically 1. Must be nonzero + and not larger than INT_MAX. + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE + + \sa + ::cuUserObjectCreate, + ::cuUserObjectRelease, + ::cuGraphRetainUserObject, + ::cuGraphReleaseUserObject, + ::cuGraphCreate*/ + fn cuUserObjectRetain( + object: cuda_types::CUuserObject, + count: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + /** \brief Release a reference to a user object + + Releases user object references owned by the caller. The object's destructor is invoked if + the reference count reaches zero. + + It is undefined behavior to release references not owned by the caller, or to use a user + object handle after all references are released. + + See CUDA User Objects in the CUDA C++ Programming Guide for more information on user objects. + + \param object - The object to release + \param count - The number of references to release, typically 1. Must be nonzero + and not larger than INT_MAX. + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE + + \sa + ::cuUserObjectCreate, + ::cuUserObjectRetain, + ::cuGraphRetainUserObject, + ::cuGraphReleaseUserObject, + ::cuGraphCreate*/ + fn cuUserObjectRelease( + object: cuda_types::CUuserObject, + count: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + /** \brief Retain a reference to a user object from a graph + + Creates or moves user object references that will be owned by a CUDA graph. + + See CUDA User Objects in the CUDA C++ Programming Guide for more information on user objects. + + \param graph - The graph to associate the reference with + \param object - The user object to retain a reference for + \param count - The number of references to add to the graph, typically 1. Must be + nonzero and not larger than INT_MAX. + \param flags - The optional flag ::CU_GRAPH_USER_OBJECT_MOVE transfers references + from the calling thread, rather than create new references. Pass 0 + to create new references. + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE + + \sa + ::cuUserObjectCreate, + ::cuUserObjectRetain, + ::cuUserObjectRelease, + ::cuGraphReleaseUserObject, + ::cuGraphCreate*/ + fn cuGraphRetainUserObject( + graph: cuda_types::CUgraph, + object: cuda_types::CUuserObject, + count: ::core::ffi::c_uint, + flags: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + /** \brief Release a user object reference from a graph + + Releases user object references owned by a graph. + + See CUDA User Objects in the CUDA C++ Programming Guide for more information on user objects. + + \param graph - The graph that will release the reference + \param object - The user object to release a reference for + \param count - The number of references to release, typically 1. Must be nonzero + and not larger than INT_MAX. + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE + + \sa + ::cuUserObjectCreate, + ::cuUserObjectRetain, + ::cuUserObjectRelease, + ::cuGraphRetainUserObject, + ::cuGraphCreate*/ + fn cuGraphReleaseUserObject( + graph: cuda_types::CUgraph, + object: cuda_types::CUuserObject, + count: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + /** \brief Adds a node of arbitrary type to a graph + + Creates a new node in \p hGraph described by \p nodeParams with \p numDependencies + dependencies specified via \p dependencies. \p numDependencies may be 0. + \p dependencies may be null if \p numDependencies is 0. \p dependencies may not have + any duplicate entries. + + \p nodeParams is a tagged union. The node type should be specified in the \p type field, + and type-specific parameters in the corresponding union member. All unused bytes - that + is, \p reserved0 and all bytes past the utilized union member - must be set to zero. + It is recommended to use brace initialization or memset to ensure all bytes are + initialized. + + Note that for some node types, \p nodeParams may contain "out parameters" which are + modified during the call, such as \p nodeParams->alloc.dptr. + + A handle to the new node will be returned in \p phGraphNode. + + \param phGraphNode - Returns newly created node + \param hGraph - Graph to which to add the node + \param dependencies - Dependencies of the node + \param numDependencies - Number of dependencies + \param nodeParams - Specification of the node + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_NOT_SUPPORTED + \note_graph_thread_safety + \notefnerr + + \sa + ::cuGraphCreate, + ::cuGraphNodeSetParams, + ::cuGraphExecNodeSetParams*/ + fn cuGraphAddNode( + phGraphNode: *mut cuda_types::CUgraphNode, + hGraph: cuda_types::CUgraph, + dependencies: *const cuda_types::CUgraphNode, + numDependencies: usize, + nodeParams: *mut cuda_types::CUgraphNodeParams, + ) -> cuda_types::CUresult; + /** \brief Adds a node of arbitrary type to a graph (12.3+) + + Creates a new node in \p hGraph described by \p nodeParams with \p numDependencies + dependencies specified via \p dependencies. \p numDependencies may be 0. + \p dependencies may be null if \p numDependencies is 0. \p dependencies may not have + any duplicate entries. + + \p nodeParams is a tagged union. The node type should be specified in the \p type field, + and type-specific parameters in the corresponding union member. All unused bytes - that + is, \p reserved0 and all bytes past the utilized union member - must be set to zero. + It is recommended to use brace initialization or memset to ensure all bytes are + initialized. + + Note that for some node types, \p nodeParams may contain "out parameters" which are + modified during the call, such as \p nodeParams->alloc.dptr. + + A handle to the new node will be returned in \p phGraphNode. + + \param phGraphNode - Returns newly created node + \param hGraph - Graph to which to add the node + \param dependencies - Dependencies of the node + \param dependencyData - Optional edge data for the dependencies. If NULL, the data is + assumed to be default (zeroed) for all dependencies. + \param numDependencies - Number of dependencies + \param nodeParams - Specification of the node + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_NOT_SUPPORTED + \note_graph_thread_safety + \notefnerr + + \sa + ::cuGraphCreate, + ::cuGraphNodeSetParams, + ::cuGraphExecNodeSetParams*/ + fn cuGraphAddNode_v2( + phGraphNode: *mut cuda_types::CUgraphNode, + hGraph: cuda_types::CUgraph, + dependencies: *const cuda_types::CUgraphNode, + dependencyData: *const cuda_types::CUgraphEdgeData, + numDependencies: usize, + nodeParams: *mut cuda_types::CUgraphNodeParams, + ) -> cuda_types::CUresult; + /** \brief Update's a graph node's parameters + + Sets the parameters of graph node \p hNode to \p nodeParams. The node type specified by + \p nodeParams->type must match the type of \p hNode. \p nodeParams must be fully + initialized and all unused bytes (reserved, padding) zeroed. + + Modifying parameters is not supported for node types CU_GRAPH_NODE_TYPE_MEM_ALLOC and + CU_GRAPH_NODE_TYPE_MEM_FREE. + + \param hNode - Node to set the parameters for + \param nodeParams - Parameters to copy + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_NOT_SUPPORTED + \note_graph_thread_safety + \notefnerr + + \sa + ::cuGraphAddNode, + ::cuGraphExecNodeSetParams*/ + fn cuGraphNodeSetParams( + hNode: cuda_types::CUgraphNode, + nodeParams: *mut cuda_types::CUgraphNodeParams, + ) -> cuda_types::CUresult; + /** \brief Update's a graph node's parameters in an instantiated graph + + Sets the parameters of a node in an executable graph \p hGraphExec. The node is identified + by the corresponding node \p hNode in the non-executable graph from which the executable + graph was instantiated. \p hNode must not have been removed from the original graph. + + The modifications only affect future launches of \p hGraphExec. Already + enqueued or running launches of \p hGraphExec are not affected by this call. + \p hNode is also not modified by this call. + + Allowed changes to parameters on executable graphs are as follows: + <table> + <tr><th>Node type<th>Allowed changes + <tr><td>kernel<td>See ::cuGraphExecKernelNodeSetParams + <tr><td>memcpy<td>Addresses for 1-dimensional copies if allocated in same context; see ::cuGraphExecMemcpyNodeSetParams + <tr><td>memset<td>Addresses for 1-dimensional memsets if allocated in same context; see ::cuGraphExecMemsetNodeSetParams + <tr><td>host<td>Unrestricted + <tr><td>child graph<td>Topology must match and restrictions apply recursively; see ::cuGraphExecUpdate + <tr><td>event wait<td>Unrestricted + <tr><td>event record<td>Unrestricted + <tr><td>external semaphore signal<td>Number of semaphore operations cannot change + <tr><td>external semaphore wait<td>Number of semaphore operations cannot change + <tr><td>memory allocation<td>API unsupported + <tr><td>memory free<td>API unsupported + <tr><td>batch memops<td>Addresses, values, and operation type for wait operations; see ::cuGraphExecBatchMemOpNodeSetParams + </table> + + \param hGraphExec - The executable graph in which to update the specified node + \param hNode - Corresponding node from the graph from which graphExec was instantiated + \param nodeParams - Updated Parameters to set + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_NOT_SUPPORTED + \note_graph_thread_safety + \notefnerr + + \sa + ::cuGraphAddNode, + ::cuGraphNodeSetParams + ::cuGraphExecUpdate, + ::cuGraphInstantiate*/ + fn cuGraphExecNodeSetParams( + hGraphExec: cuda_types::CUgraphExec, + hNode: cuda_types::CUgraphNode, + nodeParams: *mut cuda_types::CUgraphNodeParams, + ) -> cuda_types::CUresult; + /** \brief Create a conditional handle + + Creates a conditional handle associated with \p hGraph. + + The conditional handle must be associated with a conditional node in this graph or one of its children. + + Handles not associated with a conditional node may cause graph instantiation to fail. + + Handles can only be set from the context with which they are associated. + + \param pHandle_out - Pointer used to return the handle to the caller. + \param hGraph - Graph which will contain the conditional node using this handle. + \param ctx - Context for the handle and associated conditional node. + \param defaultLaunchValue - Optional initial value for the conditional variable. + \param flags - Currently must be CU_GRAPH_COND_ASSIGN_DEFAULT or 0. + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_NOT_SUPPORTED + \note_graph_thread_safety + \notefnerr + + \sa + ::cuGraphAddNode*/ + fn cuGraphConditionalHandleCreate( + pHandle_out: *mut cuda_types::CUgraphConditionalHandle, + hGraph: cuda_types::CUgraph, + ctx: cuda_types::CUcontext, + defaultLaunchValue: ::core::ffi::c_uint, + flags: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + /** \brief Returns occupancy of a function + + Returns in \p *numBlocks the number of the maximum active blocks per + streaming multiprocessor. + + \param numBlocks - Returned occupancy + \param func - Kernel for which occupancy is calculated + \param blockSize - Block size the kernel is intended to be launched with + \param dynamicSMemSize - Per-block dynamic shared memory usage intended, in bytes + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_UNKNOWN + \notefnerr + + \sa + ::cudaOccupancyMaxActiveBlocksPerMultiprocessor*/ + fn cuOccupancyMaxActiveBlocksPerMultiprocessor( + numBlocks: *mut ::core::ffi::c_int, + func: cuda_types::CUfunction, + blockSize: ::core::ffi::c_int, + dynamicSMemSize: usize, + ) -> cuda_types::CUresult; + /** \brief Returns occupancy of a function + + Returns in \p *numBlocks the number of the maximum active blocks per + streaming multiprocessor. + + The \p Flags parameter controls how special cases are handled. The + valid flags are: + + - ::CU_OCCUPANCY_DEFAULT, which maintains the default behavior as + ::cuOccupancyMaxActiveBlocksPerMultiprocessor; + + - ::CU_OCCUPANCY_DISABLE_CACHING_OVERRIDE, which suppresses the + default behavior on platform where global caching affects + occupancy. On such platforms, if caching is enabled, but + per-block SM resource usage would result in zero occupancy, the + occupancy calculator will calculate the occupancy as if caching + is disabled. Setting ::CU_OCCUPANCY_DISABLE_CACHING_OVERRIDE makes + the occupancy calculator to return 0 in such cases. More information + can be found about this feature in the "Unified L1/Texture Cache" + section of the Maxwell tuning guide. + + \param numBlocks - Returned occupancy + \param func - Kernel for which occupancy is calculated + \param blockSize - Block size the kernel is intended to be launched with + \param dynamicSMemSize - Per-block dynamic shared memory usage intended, in bytes + \param flags - Requested behavior for the occupancy calculator + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_UNKNOWN + \notefnerr + + \sa + ::cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags*/ + fn cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( + numBlocks: *mut ::core::ffi::c_int, + func: cuda_types::CUfunction, + blockSize: ::core::ffi::c_int, + dynamicSMemSize: usize, + flags: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + /** \brief Suggest a launch configuration with reasonable occupancy + + Returns in \p *blockSize a reasonable block size that can achieve + the maximum occupancy (or, the maximum number of active warps with + the fewest blocks per multiprocessor), and in \p *minGridSize the + minimum grid size to achieve the maximum occupancy. + + If \p blockSizeLimit is 0, the configurator will use the maximum + block size permitted by the device / function instead. + + If per-block dynamic shared memory allocation is not needed, the + user should leave both \p blockSizeToDynamicSMemSize and \p + dynamicSMemSize as 0. + + If per-block dynamic shared memory allocation is needed, then if + the dynamic shared memory size is constant regardless of block + size, the size should be passed through \p dynamicSMemSize, and \p + blockSizeToDynamicSMemSize should be NULL. + + Otherwise, if the per-block dynamic shared memory size varies with + different block sizes, the user needs to provide a unary function + through \p blockSizeToDynamicSMemSize that computes the dynamic + shared memory needed by \p func for any given block size. \p + dynamicSMemSize is ignored. An example signature is: + + \code + // Take block size, returns dynamic shared memory needed + size_t blockToSmem(int blockSize); + \endcode + + \param minGridSize - Returned minimum grid size needed to achieve the maximum occupancy + \param blockSize - Returned maximum block size that can achieve the maximum occupancy + \param func - Kernel for which launch configuration is calculated + \param blockSizeToDynamicSMemSize - A function that calculates how much per-block dynamic shared memory \p func uses based on the block size + \param dynamicSMemSize - Dynamic shared memory usage intended, in bytes + \param blockSizeLimit - The maximum block size \p func is designed to handle + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_UNKNOWN + \notefnerr + + \sa + ::cudaOccupancyMaxPotentialBlockSize*/ + fn cuOccupancyMaxPotentialBlockSize( + minGridSize: *mut ::core::ffi::c_int, + blockSize: *mut ::core::ffi::c_int, + func: cuda_types::CUfunction, + blockSizeToDynamicSMemSize: cuda_types::CUoccupancyB2DSize, + dynamicSMemSize: usize, + blockSizeLimit: ::core::ffi::c_int, + ) -> cuda_types::CUresult; + /** \brief Suggest a launch configuration with reasonable occupancy + + An extended version of ::cuOccupancyMaxPotentialBlockSize. In + addition to arguments passed to ::cuOccupancyMaxPotentialBlockSize, + ::cuOccupancyMaxPotentialBlockSizeWithFlags also takes a \p Flags + parameter. + + The \p Flags parameter controls how special cases are handled. The + valid flags are: + + - ::CU_OCCUPANCY_DEFAULT, which maintains the default behavior as + ::cuOccupancyMaxPotentialBlockSize; + + - ::CU_OCCUPANCY_DISABLE_CACHING_OVERRIDE, which suppresses the + default behavior on platform where global caching affects + occupancy. On such platforms, the launch configurations that + produces maximal occupancy might not support global + caching. Setting ::CU_OCCUPANCY_DISABLE_CACHING_OVERRIDE + guarantees that the the produced launch configuration is global + caching compatible at a potential cost of occupancy. More information + can be found about this feature in the "Unified L1/Texture Cache" + section of the Maxwell tuning guide. + + \param minGridSize - Returned minimum grid size needed to achieve the maximum occupancy + \param blockSize - Returned maximum block size that can achieve the maximum occupancy + \param func - Kernel for which launch configuration is calculated + \param blockSizeToDynamicSMemSize - A function that calculates how much per-block dynamic shared memory \p func uses based on the block size + \param dynamicSMemSize - Dynamic shared memory usage intended, in bytes + \param blockSizeLimit - The maximum block size \p func is designed to handle + \param flags - Options + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_UNKNOWN + \notefnerr + + \sa + ::cudaOccupancyMaxPotentialBlockSizeWithFlags*/ + fn cuOccupancyMaxPotentialBlockSizeWithFlags( + minGridSize: *mut ::core::ffi::c_int, + blockSize: *mut ::core::ffi::c_int, + func: cuda_types::CUfunction, + blockSizeToDynamicSMemSize: cuda_types::CUoccupancyB2DSize, + dynamicSMemSize: usize, + blockSizeLimit: ::core::ffi::c_int, + flags: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + /** \brief Returns dynamic shared memory available per block when launching \p numBlocks blocks on SM + + Returns in \p *dynamicSmemSize the maximum size of dynamic shared memory to allow \p numBlocks blocks per SM. + + \param dynamicSmemSize - Returned maximum dynamic shared memory + \param func - Kernel function for which occupancy is calculated + \param numBlocks - Number of blocks to fit on SM + \param blockSize - Size of the blocks + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_UNKNOWN + \notefnerr*/ + fn cuOccupancyAvailableDynamicSMemPerBlock( + dynamicSmemSize: *mut usize, + func: cuda_types::CUfunction, + numBlocks: ::core::ffi::c_int, + blockSize: ::core::ffi::c_int, + ) -> cuda_types::CUresult; + /** \brief Given the kernel function (\p func) and launch configuration + (\p config), return the maximum cluster size in \p *clusterSize. + + The cluster dimensions in \p config are ignored. If func has a required + cluster size set (see ::cudaFuncGetAttributes / ::cuFuncGetAttribute),\p + *clusterSize will reflect the required cluster size. + + By default this function will always return a value that's portable on + future hardware. A higher value may be returned if the kernel function + allows non-portable cluster sizes. + + This function will respect the compile time launch bounds. + + \param clusterSize - Returned maximum cluster size that can be launched + for the given kernel function and launch configuration + \param func - Kernel function for which maximum cluster + size is calculated + \param config - Launch configuration for the given kernel function + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_UNKNOWN + \notefnerr + + \sa + ::cudaFuncGetAttributes, + ::cuFuncGetAttribute*/ + fn cuOccupancyMaxPotentialClusterSize( + clusterSize: *mut ::core::ffi::c_int, + func: cuda_types::CUfunction, + config: *const cuda_types::CUlaunchConfig, + ) -> cuda_types::CUresult; + /** \brief Given the kernel function (\p func) and launch configuration + (\p config), return the maximum number of clusters that could co-exist + on the target device in \p *numClusters. + + If the function has required cluster size already set (see + ::cudaFuncGetAttributes / ::cuFuncGetAttribute), the cluster size + from config must either be unspecified or match the required size. + Without required sizes, the cluster size must be specified in config, + else the function will return an error. + + Note that various attributes of the kernel function may affect occupancy + calculation. Runtime environment may affect how the hardware schedules + the clusters, so the calculated occupancy is not guaranteed to be achievable. + + \param numClusters - Returned maximum number of clusters that + could co-exist on the target device + \param func - Kernel function for which maximum number + of clusters are calculated + \param config - Launch configuration for the given kernel function + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_CLUSTER_SIZE, + ::CUDA_ERROR_UNKNOWN + \notefnerr + + \sa + ::cudaFuncGetAttributes, + ::cuFuncGetAttribute*/ + fn cuOccupancyMaxActiveClusters( + numClusters: *mut ::core::ffi::c_int, + func: cuda_types::CUfunction, + config: *const cuda_types::CUlaunchConfig, + ) -> cuda_types::CUresult; + /** \brief Binds an array as a texture reference + + \deprecated + + Binds the CUDA array \p hArray to the texture reference \p hTexRef. Any + previous address or CUDA array state associated with the texture reference + is superseded by this function. \p Flags must be set to + ::CU_TRSA_OVERRIDE_FORMAT. Any CUDA array previously bound to \p hTexRef is + unbound. + + \param hTexRef - Texture reference to bind + \param hArray - Array to bind + \param Flags - Options (must be ::CU_TRSA_OVERRIDE_FORMAT) + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE + + \sa + ::cuTexRefSetAddress, + ::cuTexRefSetAddress2D, ::cuTexRefSetAddressMode, + ::cuTexRefSetFilterMode, ::cuTexRefSetFlags, ::cuTexRefSetFormat, + ::cuTexRefGetAddress, ::cuTexRefGetAddressMode, ::cuTexRefGetArray, + ::cuTexRefGetFilterMode, ::cuTexRefGetFlags, ::cuTexRefGetFormat*/ + fn cuTexRefSetArray( + hTexRef: cuda_types::CUtexref, + hArray: cuda_types::CUarray, + Flags: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + /** \brief Binds a mipmapped array to a texture reference + + \deprecated + + Binds the CUDA mipmapped array \p hMipmappedArray to the texture reference \p hTexRef. + Any previous address or CUDA array state associated with the texture reference + is superseded by this function. \p Flags must be set to ::CU_TRSA_OVERRIDE_FORMAT. + Any CUDA array previously bound to \p hTexRef is unbound. + + \param hTexRef - Texture reference to bind + \param hMipmappedArray - Mipmapped array to bind + \param Flags - Options (must be ::CU_TRSA_OVERRIDE_FORMAT) + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE + + \sa + ::cuTexRefSetAddress, + ::cuTexRefSetAddress2D, ::cuTexRefSetAddressMode, + ::cuTexRefSetFilterMode, ::cuTexRefSetFlags, ::cuTexRefSetFormat, + ::cuTexRefGetAddress, ::cuTexRefGetAddressMode, ::cuTexRefGetArray, + ::cuTexRefGetFilterMode, ::cuTexRefGetFlags, ::cuTexRefGetFormat*/ + fn cuTexRefSetMipmappedArray( + hTexRef: cuda_types::CUtexref, + hMipmappedArray: cuda_types::CUmipmappedArray, + Flags: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + /** \brief Binds an address as a texture reference + + \deprecated + + Binds a linear address range to the texture reference \p hTexRef. Any + previous address or CUDA array state associated with the texture reference + is superseded by this function. Any memory previously bound to \p hTexRef + is unbound. + + Since the hardware enforces an alignment requirement on texture base + addresses, ::cuTexRefSetAddress() passes back a byte offset in + \p *ByteOffset that must be applied to texture fetches in order to read from + the desired memory. This offset must be divided by the texel size and + passed to kernels that read from the texture so they can be applied to the + ::tex1Dfetch() function. + + If the device memory pointer was returned from ::cuMemAlloc(), the offset + is guaranteed to be 0 and NULL may be passed as the \p ByteOffset parameter. + + The total number of elements (or texels) in the linear address range + cannot exceed ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH. + The number of elements is computed as (\p bytes / bytesPerElement), + where bytesPerElement is determined from the data format and number of + components set using ::cuTexRefSetFormat(). + + \param ByteOffset - Returned byte offset + \param hTexRef - Texture reference to bind + \param dptr - Device pointer to bind + \param bytes - Size of memory to bind in bytes + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE + + \sa + ::cuTexRefSetAddress2D, ::cuTexRefSetAddressMode, ::cuTexRefSetArray, + ::cuTexRefSetFilterMode, ::cuTexRefSetFlags, ::cuTexRefSetFormat, + ::cuTexRefGetAddress, ::cuTexRefGetAddressMode, ::cuTexRefGetArray, + ::cuTexRefGetFilterMode, ::cuTexRefGetFlags, ::cuTexRefGetFormat*/ + fn cuTexRefSetAddress_v2( + ByteOffset: *mut usize, + hTexRef: cuda_types::CUtexref, + dptr: cuda_types::CUdeviceptr, + bytes: usize, + ) -> cuda_types::CUresult; + /** \brief Binds an address as a 2D texture reference + + \deprecated + + Binds a linear address range to the texture reference \p hTexRef. Any + previous address or CUDA array state associated with the texture reference + is superseded by this function. Any memory previously bound to \p hTexRef + is unbound. + + Using a ::tex2D() function inside a kernel requires a call to either + ::cuTexRefSetArray() to bind the corresponding texture reference to an + array, or ::cuTexRefSetAddress2D() to bind the texture reference to linear + memory. + + Function calls to ::cuTexRefSetFormat() cannot follow calls to + ::cuTexRefSetAddress2D() for the same texture reference. + + It is required that \p dptr be aligned to the appropriate hardware-specific + texture alignment. You can query this value using the device attribute + ::CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT. If an unaligned \p dptr is + supplied, ::CUDA_ERROR_INVALID_VALUE is returned. + + \p Pitch has to be aligned to the hardware-specific texture pitch alignment. + This value can be queried using the device attribute + ::CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT. If an unaligned \p Pitch is + supplied, ::CUDA_ERROR_INVALID_VALUE is returned. + + Width and Height, which are specified in elements (or texels), cannot exceed + ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH and + ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT respectively. + \p Pitch, which is specified in bytes, cannot exceed + ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH. + + \param hTexRef - Texture reference to bind + \param desc - Descriptor of CUDA array + \param dptr - Device pointer to bind + \param Pitch - Line pitch in bytes + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE + + \sa + ::cuTexRefSetAddress, + ::cuTexRefSetAddressMode, ::cuTexRefSetArray, + ::cuTexRefSetFilterMode, ::cuTexRefSetFlags, ::cuTexRefSetFormat, + ::cuTexRefGetAddress, ::cuTexRefGetAddressMode, ::cuTexRefGetArray, + ::cuTexRefGetFilterMode, ::cuTexRefGetFlags, ::cuTexRefGetFormat*/ + fn cuTexRefSetAddress2D_v3( + hTexRef: cuda_types::CUtexref, + desc: *const cuda_types::CUDA_ARRAY_DESCRIPTOR, + dptr: cuda_types::CUdeviceptr, + Pitch: usize, + ) -> cuda_types::CUresult; + /** \brief Sets the format for a texture reference + + \deprecated + + Specifies the format of the data to be read by the texture reference + \p hTexRef. \p fmt and \p NumPackedComponents are exactly analogous to the + ::Format and ::NumChannels members of the ::CUDA_ARRAY_DESCRIPTOR structure: + They specify the format of each component and the number of components per + array element. + + \param hTexRef - Texture reference + \param fmt - Format to set + \param NumPackedComponents - Number of components per array element + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE + + \sa + ::cuTexRefSetAddress, + ::cuTexRefSetAddress2D, ::cuTexRefSetAddressMode, ::cuTexRefSetArray, + ::cuTexRefSetFilterMode, ::cuTexRefSetFlags, + ::cuTexRefGetAddress, ::cuTexRefGetAddressMode, ::cuTexRefGetArray, + ::cuTexRefGetFilterMode, ::cuTexRefGetFlags, ::cuTexRefGetFormat, + ::cudaCreateChannelDesc*/ + fn cuTexRefSetFormat( + hTexRef: cuda_types::CUtexref, + fmt: cuda_types::CUarray_format, + NumPackedComponents: ::core::ffi::c_int, + ) -> cuda_types::CUresult; + /** \brief Sets the addressing mode for a texture reference + + \deprecated + + Specifies the addressing mode \p am for the given dimension \p dim of the + texture reference \p hTexRef. If \p dim is zero, the addressing mode is + applied to the first parameter of the functions used to fetch from the + texture; if \p dim is 1, the second, and so on. ::CUaddress_mode is defined + as: + \code +typedef enum CUaddress_mode_enum { +CU_TR_ADDRESS_MODE_WRAP = 0, +CU_TR_ADDRESS_MODE_CLAMP = 1, +CU_TR_ADDRESS_MODE_MIRROR = 2, +CU_TR_ADDRESS_MODE_BORDER = 3 +} CUaddress_mode; + \endcode + + Note that this call has no effect if \p hTexRef is bound to linear memory. + Also, if the flag, ::CU_TRSF_NORMALIZED_COORDINATES, is not set, the only + supported address mode is ::CU_TR_ADDRESS_MODE_CLAMP. + + \param hTexRef - Texture reference + \param dim - Dimension + \param am - Addressing mode to set + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE + + \sa + ::cuTexRefSetAddress, + ::cuTexRefSetAddress2D, ::cuTexRefSetArray, + ::cuTexRefSetFilterMode, ::cuTexRefSetFlags, ::cuTexRefSetFormat, + ::cuTexRefGetAddress, ::cuTexRefGetAddressMode, ::cuTexRefGetArray, + ::cuTexRefGetFilterMode, ::cuTexRefGetFlags, ::cuTexRefGetFormat*/ + fn cuTexRefSetAddressMode( + hTexRef: cuda_types::CUtexref, + dim: ::core::ffi::c_int, + am: cuda_types::CUaddress_mode, + ) -> cuda_types::CUresult; + /** \brief Sets the filtering mode for a texture reference + + \deprecated + + Specifies the filtering mode \p fm to be used when reading memory through + the texture reference \p hTexRef. ::CUfilter_mode_enum is defined as: + + \code +typedef enum CUfilter_mode_enum { +CU_TR_FILTER_MODE_POINT = 0, +CU_TR_FILTER_MODE_LINEAR = 1 +} CUfilter_mode; + \endcode + + Note that this call has no effect if \p hTexRef is bound to linear memory. + + \param hTexRef - Texture reference + \param fm - Filtering mode to set + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE + + \sa + ::cuTexRefSetAddress, + ::cuTexRefSetAddress2D, ::cuTexRefSetAddressMode, ::cuTexRefSetArray, + ::cuTexRefSetFlags, ::cuTexRefSetFormat, + ::cuTexRefGetAddress, ::cuTexRefGetAddressMode, ::cuTexRefGetArray, + ::cuTexRefGetFilterMode, ::cuTexRefGetFlags, ::cuTexRefGetFormat*/ + fn cuTexRefSetFilterMode( + hTexRef: cuda_types::CUtexref, + fm: cuda_types::CUfilter_mode, + ) -> cuda_types::CUresult; + /** \brief Sets the mipmap filtering mode for a texture reference + + \deprecated + + Specifies the mipmap filtering mode \p fm to be used when reading memory through + the texture reference \p hTexRef. ::CUfilter_mode_enum is defined as: + + \code +typedef enum CUfilter_mode_enum { +CU_TR_FILTER_MODE_POINT = 0, +CU_TR_FILTER_MODE_LINEAR = 1 +} CUfilter_mode; + \endcode + + Note that this call has no effect if \p hTexRef is not bound to a mipmapped array. + + \param hTexRef - Texture reference + \param fm - Filtering mode to set + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE + + \sa + ::cuTexRefSetAddress, + ::cuTexRefSetAddress2D, ::cuTexRefSetAddressMode, ::cuTexRefSetArray, + ::cuTexRefSetFlags, ::cuTexRefSetFormat, + ::cuTexRefGetAddress, ::cuTexRefGetAddressMode, ::cuTexRefGetArray, + ::cuTexRefGetFilterMode, ::cuTexRefGetFlags, ::cuTexRefGetFormat*/ + fn cuTexRefSetMipmapFilterMode( + hTexRef: cuda_types::CUtexref, + fm: cuda_types::CUfilter_mode, + ) -> cuda_types::CUresult; + /** \brief Sets the mipmap level bias for a texture reference + + \deprecated + + Specifies the mipmap level bias \p bias to be added to the specified mipmap level when + reading memory through the texture reference \p hTexRef. + + Note that this call has no effect if \p hTexRef is not bound to a mipmapped array. + + \param hTexRef - Texture reference + \param bias - Mipmap level bias + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE + + \sa + ::cuTexRefSetAddress, + ::cuTexRefSetAddress2D, ::cuTexRefSetAddressMode, ::cuTexRefSetArray, + ::cuTexRefSetFlags, ::cuTexRefSetFormat, + ::cuTexRefGetAddress, ::cuTexRefGetAddressMode, ::cuTexRefGetArray, + ::cuTexRefGetFilterMode, ::cuTexRefGetFlags, ::cuTexRefGetFormat*/ + fn cuTexRefSetMipmapLevelBias( + hTexRef: cuda_types::CUtexref, + bias: f32, + ) -> cuda_types::CUresult; + /** \brief Sets the mipmap min/max mipmap level clamps for a texture reference + + \deprecated + + Specifies the min/max mipmap level clamps, \p minMipmapLevelClamp and \p maxMipmapLevelClamp + respectively, to be used when reading memory through the texture reference + \p hTexRef. + + Note that this call has no effect if \p hTexRef is not bound to a mipmapped array. + + \param hTexRef - Texture reference + \param minMipmapLevelClamp - Mipmap min level clamp + \param maxMipmapLevelClamp - Mipmap max level clamp + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE + + \sa + ::cuTexRefSetAddress, + ::cuTexRefSetAddress2D, ::cuTexRefSetAddressMode, ::cuTexRefSetArray, + ::cuTexRefSetFlags, ::cuTexRefSetFormat, + ::cuTexRefGetAddress, ::cuTexRefGetAddressMode, ::cuTexRefGetArray, + ::cuTexRefGetFilterMode, ::cuTexRefGetFlags, ::cuTexRefGetFormat*/ + fn cuTexRefSetMipmapLevelClamp( + hTexRef: cuda_types::CUtexref, + minMipmapLevelClamp: f32, + maxMipmapLevelClamp: f32, + ) -> cuda_types::CUresult; + /** \brief Sets the maximum anisotropy for a texture reference + + \deprecated + + Specifies the maximum anisotropy \p maxAniso to be used when reading memory through + the texture reference \p hTexRef. + + Note that this call has no effect if \p hTexRef is bound to linear memory. + + \param hTexRef - Texture reference + \param maxAniso - Maximum anisotropy + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE + + \sa + ::cuTexRefSetAddress, + ::cuTexRefSetAddress2D, ::cuTexRefSetAddressMode, ::cuTexRefSetArray, + ::cuTexRefSetFlags, ::cuTexRefSetFormat, + ::cuTexRefGetAddress, ::cuTexRefGetAddressMode, ::cuTexRefGetArray, + ::cuTexRefGetFilterMode, ::cuTexRefGetFlags, ::cuTexRefGetFormat*/ + fn cuTexRefSetMaxAnisotropy( + hTexRef: cuda_types::CUtexref, + maxAniso: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + /** \brief Sets the border color for a texture reference + + \deprecated + + Specifies the value of the RGBA color via the \p pBorderColor to the texture reference + \p hTexRef. The color value supports only float type and holds color components in + the following sequence: + pBorderColor[0] holds 'R' component + pBorderColor[1] holds 'G' component + pBorderColor[2] holds 'B' component + pBorderColor[3] holds 'A' component + + Note that the color values can be set only when the Address mode is set to + CU_TR_ADDRESS_MODE_BORDER using ::cuTexRefSetAddressMode. + Applications using integer border color values have to "reinterpret_cast" their values to float. + + \param hTexRef - Texture reference + \param pBorderColor - RGBA color + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE + + \sa + ::cuTexRefSetAddressMode, + ::cuTexRefGetAddressMode, ::cuTexRefGetBorderColor*/ + fn cuTexRefSetBorderColor( + hTexRef: cuda_types::CUtexref, + pBorderColor: *mut f32, + ) -> cuda_types::CUresult; + /** \brief Sets the flags for a texture reference + + \deprecated + + Specifies optional flags via \p Flags to specify the behavior of data + returned through the texture reference \p hTexRef. The valid flags are: + + - ::CU_TRSF_READ_AS_INTEGER, which suppresses the default behavior of + having the texture promote integer data to floating point data in the + range [0, 1]. Note that texture with 32-bit integer format + would not be promoted, regardless of whether or not this + flag is specified; + - ::CU_TRSF_NORMALIZED_COORDINATES, which suppresses the + default behavior of having the texture coordinates range + from [0, Dim) where Dim is the width or height of the CUDA + array. Instead, the texture coordinates [0, 1.0) reference + the entire breadth of the array dimension; + - ::CU_TRSF_DISABLE_TRILINEAR_OPTIMIZATION, which disables any trilinear + filtering optimizations. Trilinear optimizations improve texture filtering + performance by allowing bilinear filtering on textures in scenarios where + it can closely approximate the expected results. + + \param hTexRef - Texture reference + \param Flags - Optional flags to set + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE + + \sa + ::cuTexRefSetAddress, + ::cuTexRefSetAddress2D, ::cuTexRefSetAddressMode, ::cuTexRefSetArray, + ::cuTexRefSetFilterMode, ::cuTexRefSetFormat, + ::cuTexRefGetAddress, ::cuTexRefGetAddressMode, ::cuTexRefGetArray, + ::cuTexRefGetFilterMode, ::cuTexRefGetFlags, ::cuTexRefGetFormat*/ + fn cuTexRefSetFlags( + hTexRef: cuda_types::CUtexref, + Flags: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + /** \brief Gets the address associated with a texture reference + + \deprecated + + Returns in \p *pdptr the base address bound to the texture reference + \p hTexRef, or returns ::CUDA_ERROR_INVALID_VALUE if the texture reference + is not bound to any device memory range. + + \param pdptr - Returned device address + \param hTexRef - Texture reference + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE + + \sa ::cuTexRefSetAddress, + ::cuTexRefSetAddress2D, ::cuTexRefSetAddressMode, ::cuTexRefSetArray, + ::cuTexRefSetFilterMode, ::cuTexRefSetFlags, ::cuTexRefSetFormat, + ::cuTexRefGetAddressMode, ::cuTexRefGetArray, + ::cuTexRefGetFilterMode, ::cuTexRefGetFlags, ::cuTexRefGetFormat*/ + fn cuTexRefGetAddress_v2( + pdptr: *mut cuda_types::CUdeviceptr, + hTexRef: cuda_types::CUtexref, + ) -> cuda_types::CUresult; + /** \brief Gets the array bound to a texture reference + + \deprecated + + Returns in \p *phArray the CUDA array bound to the texture reference + \p hTexRef, or returns ::CUDA_ERROR_INVALID_VALUE if the texture reference + is not bound to any CUDA array. + + \param phArray - Returned array + \param hTexRef - Texture reference + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE + + \sa ::cuTexRefSetAddress, + ::cuTexRefSetAddress2D, ::cuTexRefSetAddressMode, ::cuTexRefSetArray, + ::cuTexRefSetFilterMode, ::cuTexRefSetFlags, ::cuTexRefSetFormat, + ::cuTexRefGetAddress, ::cuTexRefGetAddressMode, + ::cuTexRefGetFilterMode, ::cuTexRefGetFlags, ::cuTexRefGetFormat*/ + fn cuTexRefGetArray( + phArray: *mut cuda_types::CUarray, + hTexRef: cuda_types::CUtexref, + ) -> cuda_types::CUresult; + /** \brief Gets the mipmapped array bound to a texture reference + + \deprecated + + Returns in \p *phMipmappedArray the CUDA mipmapped array bound to the texture + reference \p hTexRef, or returns ::CUDA_ERROR_INVALID_VALUE if the texture reference + is not bound to any CUDA mipmapped array. + + \param phMipmappedArray - Returned mipmapped array + \param hTexRef - Texture reference + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE + + \sa ::cuTexRefSetAddress, + ::cuTexRefSetAddress2D, ::cuTexRefSetAddressMode, ::cuTexRefSetArray, + ::cuTexRefSetFilterMode, ::cuTexRefSetFlags, ::cuTexRefSetFormat, + ::cuTexRefGetAddress, ::cuTexRefGetAddressMode, + ::cuTexRefGetFilterMode, ::cuTexRefGetFlags, ::cuTexRefGetFormat*/ + fn cuTexRefGetMipmappedArray( + phMipmappedArray: *mut cuda_types::CUmipmappedArray, + hTexRef: cuda_types::CUtexref, + ) -> cuda_types::CUresult; + /** \brief Gets the addressing mode used by a texture reference + + \deprecated + + Returns in \p *pam the addressing mode corresponding to the + dimension \p dim of the texture reference \p hTexRef. Currently, the only + valid value for \p dim are 0 and 1. + + \param pam - Returned addressing mode + \param hTexRef - Texture reference + \param dim - Dimension + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE + + \sa ::cuTexRefSetAddress, + ::cuTexRefSetAddress2D, ::cuTexRefSetAddressMode, ::cuTexRefSetArray, + ::cuTexRefSetFilterMode, ::cuTexRefSetFlags, ::cuTexRefSetFormat, + ::cuTexRefGetAddress, ::cuTexRefGetArray, + ::cuTexRefGetFilterMode, ::cuTexRefGetFlags, ::cuTexRefGetFormat*/ + fn cuTexRefGetAddressMode( + pam: *mut cuda_types::CUaddress_mode, + hTexRef: cuda_types::CUtexref, + dim: ::core::ffi::c_int, + ) -> cuda_types::CUresult; + /** \brief Gets the filter-mode used by a texture reference + + \deprecated + + Returns in \p *pfm the filtering mode of the texture reference + \p hTexRef. + + \param pfm - Returned filtering mode + \param hTexRef - Texture reference + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE + + \sa ::cuTexRefSetAddress, + ::cuTexRefSetAddress2D, ::cuTexRefSetAddressMode, ::cuTexRefSetArray, + ::cuTexRefSetFilterMode, ::cuTexRefSetFlags, ::cuTexRefSetFormat, + ::cuTexRefGetAddress, ::cuTexRefGetAddressMode, ::cuTexRefGetArray, + ::cuTexRefGetFlags, ::cuTexRefGetFormat*/ + fn cuTexRefGetFilterMode( + pfm: *mut cuda_types::CUfilter_mode, + hTexRef: cuda_types::CUtexref, + ) -> cuda_types::CUresult; + /** \brief Gets the format used by a texture reference + + \deprecated + + Returns in \p *pFormat and \p *pNumChannels the format and number + of components of the CUDA array bound to the texture reference \p hTexRef. + If \p pFormat or \p pNumChannels is NULL, it will be ignored. + + \param pFormat - Returned format + \param pNumChannels - Returned number of components + \param hTexRef - Texture reference + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE + + \sa ::cuTexRefSetAddress, + ::cuTexRefSetAddress2D, ::cuTexRefSetAddressMode, ::cuTexRefSetArray, + ::cuTexRefSetFilterMode, ::cuTexRefSetFlags, ::cuTexRefSetFormat, + ::cuTexRefGetAddress, ::cuTexRefGetAddressMode, ::cuTexRefGetArray, + ::cuTexRefGetFilterMode, ::cuTexRefGetFlags*/ + fn cuTexRefGetFormat( + pFormat: *mut cuda_types::CUarray_format, + pNumChannels: *mut ::core::ffi::c_int, + hTexRef: cuda_types::CUtexref, + ) -> cuda_types::CUresult; + /** \brief Gets the mipmap filtering mode for a texture reference + + \deprecated + + Returns the mipmap filtering mode in \p pfm that's used when reading memory through + the texture reference \p hTexRef. + + \param pfm - Returned mipmap filtering mode + \param hTexRef - Texture reference + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE + + \sa ::cuTexRefSetAddress, + ::cuTexRefSetAddress2D, ::cuTexRefSetAddressMode, ::cuTexRefSetArray, + ::cuTexRefSetFlags, ::cuTexRefSetFormat, + ::cuTexRefGetAddress, ::cuTexRefGetAddressMode, ::cuTexRefGetArray, + ::cuTexRefGetFilterMode, ::cuTexRefGetFlags, ::cuTexRefGetFormat*/ + fn cuTexRefGetMipmapFilterMode( + pfm: *mut cuda_types::CUfilter_mode, + hTexRef: cuda_types::CUtexref, + ) -> cuda_types::CUresult; + /** \brief Gets the mipmap level bias for a texture reference + + \deprecated + + Returns the mipmap level bias in \p pBias that's added to the specified mipmap + level when reading memory through the texture reference \p hTexRef. + + \param pbias - Returned mipmap level bias + \param hTexRef - Texture reference + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE + + \sa ::cuTexRefSetAddress, + ::cuTexRefSetAddress2D, ::cuTexRefSetAddressMode, ::cuTexRefSetArray, + ::cuTexRefSetFlags, ::cuTexRefSetFormat, + ::cuTexRefGetAddress, ::cuTexRefGetAddressMode, ::cuTexRefGetArray, + ::cuTexRefGetFilterMode, ::cuTexRefGetFlags, ::cuTexRefGetFormat*/ + fn cuTexRefGetMipmapLevelBias( + pbias: *mut f32, + hTexRef: cuda_types::CUtexref, + ) -> cuda_types::CUresult; + /** \brief Gets the min/max mipmap level clamps for a texture reference + + \deprecated + + Returns the min/max mipmap level clamps in \p pminMipmapLevelClamp and \p pmaxMipmapLevelClamp + that's used when reading memory through the texture reference \p hTexRef. + + \param pminMipmapLevelClamp - Returned mipmap min level clamp + \param pmaxMipmapLevelClamp - Returned mipmap max level clamp + \param hTexRef - Texture reference + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE + + \sa ::cuTexRefSetAddress, + ::cuTexRefSetAddress2D, ::cuTexRefSetAddressMode, ::cuTexRefSetArray, + ::cuTexRefSetFlags, ::cuTexRefSetFormat, + ::cuTexRefGetAddress, ::cuTexRefGetAddressMode, ::cuTexRefGetArray, + ::cuTexRefGetFilterMode, ::cuTexRefGetFlags, ::cuTexRefGetFormat*/ + fn cuTexRefGetMipmapLevelClamp( + pminMipmapLevelClamp: *mut f32, + pmaxMipmapLevelClamp: *mut f32, + hTexRef: cuda_types::CUtexref, + ) -> cuda_types::CUresult; + /** \brief Gets the maximum anisotropy for a texture reference + + \deprecated + + Returns the maximum anisotropy in \p pmaxAniso that's used when reading memory through + the texture reference \p hTexRef. + + \param pmaxAniso - Returned maximum anisotropy + \param hTexRef - Texture reference + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE + + \sa ::cuTexRefSetAddress, + ::cuTexRefSetAddress2D, ::cuTexRefSetAddressMode, ::cuTexRefSetArray, + ::cuTexRefSetFlags, ::cuTexRefSetFormat, + ::cuTexRefGetAddress, ::cuTexRefGetAddressMode, ::cuTexRefGetArray, + ::cuTexRefGetFilterMode, ::cuTexRefGetFlags, ::cuTexRefGetFormat*/ + fn cuTexRefGetMaxAnisotropy( + pmaxAniso: *mut ::core::ffi::c_int, + hTexRef: cuda_types::CUtexref, + ) -> cuda_types::CUresult; + /** \brief Gets the border color used by a texture reference + + \deprecated + + Returns in \p pBorderColor, values of the RGBA color used by + the texture reference \p hTexRef. + The color value is of type float and holds color components in + the following sequence: + pBorderColor[0] holds 'R' component + pBorderColor[1] holds 'G' component + pBorderColor[2] holds 'B' component + pBorderColor[3] holds 'A' component + + \param hTexRef - Texture reference + \param pBorderColor - Returned Type and Value of RGBA color + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE + + \sa ::cuTexRefSetAddressMode, + ::cuTexRefSetAddressMode, ::cuTexRefSetBorderColor*/ + fn cuTexRefGetBorderColor( + pBorderColor: *mut f32, + hTexRef: cuda_types::CUtexref, + ) -> cuda_types::CUresult; + /** \brief Gets the flags used by a texture reference + + \deprecated + + Returns in \p *pFlags the flags of the texture reference \p hTexRef. + + \param pFlags - Returned flags + \param hTexRef - Texture reference + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE + + \sa ::cuTexRefSetAddress, + ::cuTexRefSetAddress2D, ::cuTexRefSetAddressMode, ::cuTexRefSetArray, + ::cuTexRefSetFilterMode, ::cuTexRefSetFlags, ::cuTexRefSetFormat, + ::cuTexRefGetAddress, ::cuTexRefGetAddressMode, ::cuTexRefGetArray, + ::cuTexRefGetFilterMode, ::cuTexRefGetFormat*/ + fn cuTexRefGetFlags( + pFlags: *mut ::core::ffi::c_uint, + hTexRef: cuda_types::CUtexref, + ) -> cuda_types::CUresult; + /** \brief Creates a texture reference + + \deprecated + + Creates a texture reference and returns its handle in \p *pTexRef. Once + created, the application must call ::cuTexRefSetArray() or + ::cuTexRefSetAddress() to associate the reference with allocated memory. + Other texture reference functions are used to specify the format and + interpretation (addressing, filtering, etc.) to be used when the memory is + read through this texture reference. + + \param pTexRef - Returned texture reference + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE + + \sa ::cuTexRefDestroy*/ + fn cuTexRefCreate(pTexRef: *mut cuda_types::CUtexref) -> cuda_types::CUresult; + /** \brief Destroys a texture reference + + \deprecated + + Destroys the texture reference specified by \p hTexRef. + + \param hTexRef - Texture reference to destroy + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE + + \sa ::cuTexRefCreate*/ + fn cuTexRefDestroy(hTexRef: cuda_types::CUtexref) -> cuda_types::CUresult; + /** \brief Sets the CUDA array for a surface reference. + + \deprecated + + Sets the CUDA array \p hArray to be read and written by the surface reference + \p hSurfRef. Any previous CUDA array state associated with the surface + reference is superseded by this function. \p Flags must be set to 0. + The ::CUDA_ARRAY3D_SURFACE_LDST flag must have been set for the CUDA array. + Any CUDA array previously bound to \p hSurfRef is unbound. + + \param hSurfRef - Surface reference handle + \param hArray - CUDA array handle + \param Flags - set to 0 + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE + + \sa + ::cuModuleGetSurfRef, + ::cuSurfRefGetArray*/ + fn cuSurfRefSetArray( + hSurfRef: cuda_types::CUsurfref, + hArray: cuda_types::CUarray, + Flags: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + /** \brief Passes back the CUDA array bound to a surface reference. + + \deprecated + + Returns in \p *phArray the CUDA array bound to the surface reference + \p hSurfRef, or returns ::CUDA_ERROR_INVALID_VALUE if the surface reference + is not bound to any CUDA array. + + \param phArray - Surface reference handle + \param hSurfRef - Surface reference handle + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE + + \sa ::cuModuleGetSurfRef, ::cuSurfRefSetArray*/ + fn cuSurfRefGetArray( + phArray: *mut cuda_types::CUarray, + hSurfRef: cuda_types::CUsurfref, + ) -> cuda_types::CUresult; + /** \brief Creates a texture object + + Creates a texture object and returns it in \p pTexObject. \p pResDesc describes + the data to texture from. \p pTexDesc describes how the data should be sampled. + \p pResViewDesc is an optional argument that specifies an alternate format for + the data described by \p pResDesc, and also describes the subresource region + to restrict access to when texturing. \p pResViewDesc can only be specified if + the type of resource is a CUDA array or a CUDA mipmapped array. + + Texture objects are only supported on devices of compute capability 3.0 or higher. + Additionally, a texture object is an opaque value, and, as such, should only be + accessed through CUDA API calls. + + The ::CUDA_RESOURCE_DESC structure is defined as: + \code +typedef struct CUDA_RESOURCE_DESC_st +{ +CUresourcetype resType; + +union { +struct { +CUarray hArray; +} array; +struct { +CUmipmappedArray hMipmappedArray; +} mipmap; +struct { +CUdeviceptr devPtr; +CUarray_format format; +unsigned int numChannels; +size_t sizeInBytes; +} linear; +struct { +CUdeviceptr devPtr; +CUarray_format format; +unsigned int numChannels; +size_t width; +size_t height; +size_t pitchInBytes; +} pitch2D; +} res; + +unsigned int flags; +} CUDA_RESOURCE_DESC; + + \endcode + where: + - ::CUDA_RESOURCE_DESC::resType specifies the type of resource to texture from. + CUresourceType is defined as: + \code +typedef enum CUresourcetype_enum { +CU_RESOURCE_TYPE_ARRAY = 0x00, +CU_RESOURCE_TYPE_MIPMAPPED_ARRAY = 0x01, +CU_RESOURCE_TYPE_LINEAR = 0x02, +CU_RESOURCE_TYPE_PITCH2D = 0x03 +} CUresourcetype; + \endcode + + \par + If ::CUDA_RESOURCE_DESC::resType is set to ::CU_RESOURCE_TYPE_ARRAY, ::CUDA_RESOURCE_DESC::res::array::hArray + must be set to a valid CUDA array handle. + + \par + If ::CUDA_RESOURCE_DESC::resType is set to ::CU_RESOURCE_TYPE_MIPMAPPED_ARRAY, ::CUDA_RESOURCE_DESC::res::mipmap::hMipmappedArray + must be set to a valid CUDA mipmapped array handle. + + \par + If ::CUDA_RESOURCE_DESC::resType is set to ::CU_RESOURCE_TYPE_LINEAR, ::CUDA_RESOURCE_DESC::res::linear::devPtr + must be set to a valid device pointer, that is aligned to ::CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT. + ::CUDA_RESOURCE_DESC::res::linear::format and ::CUDA_RESOURCE_DESC::res::linear::numChannels + describe the format of each component and the number of components per array element. ::CUDA_RESOURCE_DESC::res::linear::sizeInBytes + specifies the size of the array in bytes. The total number of elements in the linear address range cannot exceed + ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH. The number of elements is computed as (sizeInBytes / (sizeof(format) * numChannels)). + + \par + If ::CUDA_RESOURCE_DESC::resType is set to ::CU_RESOURCE_TYPE_PITCH2D, ::CUDA_RESOURCE_DESC::res::pitch2D::devPtr + must be set to a valid device pointer, that is aligned to ::CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT. + ::CUDA_RESOURCE_DESC::res::pitch2D::format and ::CUDA_RESOURCE_DESC::res::pitch2D::numChannels + describe the format of each component and the number of components per array element. ::CUDA_RESOURCE_DESC::res::pitch2D::width + and ::CUDA_RESOURCE_DESC::res::pitch2D::height specify the width and height of the array in elements, and cannot exceed + ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH and ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT respectively. + ::CUDA_RESOURCE_DESC::res::pitch2D::pitchInBytes specifies the pitch between two rows in bytes and has to be aligned to + ::CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT. Pitch cannot exceed ::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH. + + - ::flags must be set to zero. + + + The ::CUDA_TEXTURE_DESC struct is defined as + \code +typedef struct CUDA_TEXTURE_DESC_st { +CUaddress_mode addressMode[3]; +CUfilter_mode filterMode; +unsigned int flags; +unsigned int maxAnisotropy; +CUfilter_mode mipmapFilterMode; +float mipmapLevelBias; +float minMipmapLevelClamp; +float maxMipmapLevelClamp; +} CUDA_TEXTURE_DESC; + \endcode + where + - ::CUDA_TEXTURE_DESC::addressMode specifies the addressing mode for each dimension of the texture data. ::CUaddress_mode is defined as: + \code +typedef enum CUaddress_mode_enum { +CU_TR_ADDRESS_MODE_WRAP = 0, +CU_TR_ADDRESS_MODE_CLAMP = 1, +CU_TR_ADDRESS_MODE_MIRROR = 2, +CU_TR_ADDRESS_MODE_BORDER = 3 +} CUaddress_mode; + \endcode + This is ignored if ::CUDA_RESOURCE_DESC::resType is ::CU_RESOURCE_TYPE_LINEAR. Also, if the flag, ::CU_TRSF_NORMALIZED_COORDINATES + is not set, the only supported address mode is ::CU_TR_ADDRESS_MODE_CLAMP. + + - ::CUDA_TEXTURE_DESC::filterMode specifies the filtering mode to be used when fetching from the texture. CUfilter_mode is defined as: + \code +typedef enum CUfilter_mode_enum { +CU_TR_FILTER_MODE_POINT = 0, +CU_TR_FILTER_MODE_LINEAR = 1 +} CUfilter_mode; + \endcode + This is ignored if ::CUDA_RESOURCE_DESC::resType is ::CU_RESOURCE_TYPE_LINEAR. + + - ::CUDA_TEXTURE_DESC::flags can be any combination of the following: + - ::CU_TRSF_READ_AS_INTEGER, which suppresses the default behavior of + having the texture promote integer data to floating point data in the + range [0, 1]. Note that texture with 32-bit integer format would not be + promoted, regardless of whether or not this flag is specified. + - ::CU_TRSF_NORMALIZED_COORDINATES, which suppresses the default behavior + of having the texture coordinates range from [0, Dim) where Dim is the + width or height of the CUDA array. Instead, the texture coordinates + [0, 1.0) reference the entire breadth of the array dimension; Note that + for CUDA mipmapped arrays, this flag has to be set. + - ::CU_TRSF_DISABLE_TRILINEAR_OPTIMIZATION, which disables any trilinear + filtering optimizations. Trilinear optimizations improve texture filtering + performance by allowing bilinear filtering on textures in scenarios where + it can closely approximate the expected results. + - ::CU_TRSF_SEAMLESS_CUBEMAP, which enables seamless cube map filtering. + This flag can only be specified if the underlying resource is a CUDA array + or a CUDA mipmapped array that was created with the flag ::CUDA_ARRAY3D_CUBEMAP. + When seamless cube map filtering is enabled, texture address modes specified + by ::CUDA_TEXTURE_DESC::addressMode are ignored. Instead, if the ::CUDA_TEXTURE_DESC::filterMode + is set to ::CU_TR_FILTER_MODE_POINT the address mode ::CU_TR_ADDRESS_MODE_CLAMP + will be applied for all dimensions. If the ::CUDA_TEXTURE_DESC::filterMode is + set to ::CU_TR_FILTER_MODE_LINEAR seamless cube map filtering will be performed + when sampling along the cube face borders. + + - ::CUDA_TEXTURE_DESC::maxAnisotropy specifies the maximum anisotropy ratio to be used when doing anisotropic filtering. This value will be + clamped to the range [1,16]. + + - ::CUDA_TEXTURE_DESC::mipmapFilterMode specifies the filter mode when the calculated mipmap level lies between two defined mipmap levels. + + - ::CUDA_TEXTURE_DESC::mipmapLevelBias specifies the offset to be applied to the calculated mipmap level. + + - ::CUDA_TEXTURE_DESC::minMipmapLevelClamp specifies the lower end of the mipmap level range to clamp access to. + + - ::CUDA_TEXTURE_DESC::maxMipmapLevelClamp specifies the upper end of the mipmap level range to clamp access to. + + + The ::CUDA_RESOURCE_VIEW_DESC struct is defined as + \code +typedef struct CUDA_RESOURCE_VIEW_DESC_st +{ +CUresourceViewFormat format; +size_t width; +size_t height; +size_t depth; +unsigned int firstMipmapLevel; +unsigned int lastMipmapLevel; +unsigned int firstLayer; +unsigned int lastLayer; +} CUDA_RESOURCE_VIEW_DESC; + \endcode + where: + - ::CUDA_RESOURCE_VIEW_DESC::format specifies how the data contained in the CUDA array or CUDA mipmapped array should + be interpreted. Note that this can incur a change in size of the texture data. If the resource view format is a block + compressed format, then the underlying CUDA array or CUDA mipmapped array has to have a base of format ::CU_AD_FORMAT_UNSIGNED_INT32. + with 2 or 4 channels, depending on the block compressed format. For ex., BC1 and BC4 require the underlying CUDA array to have + a format of ::CU_AD_FORMAT_UNSIGNED_INT32 with 2 channels. The other BC formats require the underlying resource to have the same base + format but with 4 channels. + + - ::CUDA_RESOURCE_VIEW_DESC::width specifies the new width of the texture data. If the resource view format is a block + compressed format, this value has to be 4 times the original width of the resource. For non block compressed formats, + this value has to be equal to that of the original resource. + + - ::CUDA_RESOURCE_VIEW_DESC::height specifies the new height of the texture data. If the resource view format is a block + compressed format, this value has to be 4 times the original height of the resource. For non block compressed formats, + this value has to be equal to that of the original resource. + + - ::CUDA_RESOURCE_VIEW_DESC::depth specifies the new depth of the texture data. This value has to be equal to that of the + original resource. + + - ::CUDA_RESOURCE_VIEW_DESC::firstMipmapLevel specifies the most detailed mipmap level. This will be the new mipmap level zero. + For non-mipmapped resources, this value has to be zero.::CUDA_TEXTURE_DESC::minMipmapLevelClamp and ::CUDA_TEXTURE_DESC::maxMipmapLevelClamp + will be relative to this value. For ex., if the firstMipmapLevel is set to 2, and a minMipmapLevelClamp of 1.2 is specified, + then the actual minimum mipmap level clamp will be 3.2. + + - ::CUDA_RESOURCE_VIEW_DESC::lastMipmapLevel specifies the least detailed mipmap level. For non-mipmapped resources, this value + has to be zero. + + - ::CUDA_RESOURCE_VIEW_DESC::firstLayer specifies the first layer index for layered textures. This will be the new layer zero. + For non-layered resources, this value has to be zero. + + - ::CUDA_RESOURCE_VIEW_DESC::lastLayer specifies the last layer index for layered textures. For non-layered resources, + this value has to be zero. + + + \param pTexObject - Texture object to create + \param pResDesc - Resource descriptor + \param pTexDesc - Texture descriptor + \param pResViewDesc - Resource view descriptor + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE + + \sa + ::cuTexObjectDestroy, + ::cudaCreateTextureObject*/ + fn cuTexObjectCreate( + pTexObject: *mut cuda_types::CUtexObject, + pResDesc: *const cuda_types::CUDA_RESOURCE_DESC, + pTexDesc: *const cuda_types::CUDA_TEXTURE_DESC, + pResViewDesc: *const cuda_types::CUDA_RESOURCE_VIEW_DESC, + ) -> cuda_types::CUresult; + /** \brief Destroys a texture object + + Destroys the texture object specified by \p texObject. + + \param texObject - Texture object to destroy + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE + + \sa + ::cuTexObjectCreate, + ::cudaDestroyTextureObject*/ + fn cuTexObjectDestroy(texObject: cuda_types::CUtexObject) -> cuda_types::CUresult; + /** \brief Returns a texture object's resource descriptor + + Returns the resource descriptor for the texture object specified by \p texObject. + + \param pResDesc - Resource descriptor + \param texObject - Texture object + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE + + \sa + ::cuTexObjectCreate, + ::cudaGetTextureObjectResourceDesc,*/ + fn cuTexObjectGetResourceDesc( + pResDesc: *mut cuda_types::CUDA_RESOURCE_DESC, + texObject: cuda_types::CUtexObject, + ) -> cuda_types::CUresult; + /** \brief Returns a texture object's texture descriptor + + Returns the texture descriptor for the texture object specified by \p texObject. + + \param pTexDesc - Texture descriptor + \param texObject - Texture object + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE + + \sa + ::cuTexObjectCreate, + ::cudaGetTextureObjectTextureDesc*/ + fn cuTexObjectGetTextureDesc( + pTexDesc: *mut cuda_types::CUDA_TEXTURE_DESC, + texObject: cuda_types::CUtexObject, + ) -> cuda_types::CUresult; + /** \brief Returns a texture object's resource view descriptor + + Returns the resource view descriptor for the texture object specified by \p texObject. + If no resource view was set for \p texObject, the ::CUDA_ERROR_INVALID_VALUE is returned. + + \param pResViewDesc - Resource view descriptor + \param texObject - Texture object + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE + + \sa + ::cuTexObjectCreate, + ::cudaGetTextureObjectResourceViewDesc*/ + fn cuTexObjectGetResourceViewDesc( + pResViewDesc: *mut cuda_types::CUDA_RESOURCE_VIEW_DESC, + texObject: cuda_types::CUtexObject, + ) -> cuda_types::CUresult; + /** \brief Creates a surface object + + Creates a surface object and returns it in \p pSurfObject. \p pResDesc describes + the data to perform surface load/stores on. ::CUDA_RESOURCE_DESC::resType must be + ::CU_RESOURCE_TYPE_ARRAY and ::CUDA_RESOURCE_DESC::res::array::hArray + must be set to a valid CUDA array handle. ::CUDA_RESOURCE_DESC::flags must be set to zero. + + Surface objects are only supported on devices of compute capability 3.0 or higher. + Additionally, a surface object is an opaque value, and, as such, should only be + accessed through CUDA API calls. + + \param pSurfObject - Surface object to create + \param pResDesc - Resource descriptor + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE + + \sa + ::cuSurfObjectDestroy, + ::cudaCreateSurfaceObject*/ + fn cuSurfObjectCreate( + pSurfObject: *mut cuda_types::CUsurfObject, + pResDesc: *const cuda_types::CUDA_RESOURCE_DESC, + ) -> cuda_types::CUresult; + /** \brief Destroys a surface object + + Destroys the surface object specified by \p surfObject. + + \param surfObject - Surface object to destroy + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE + + \sa + ::cuSurfObjectCreate, + ::cudaDestroySurfaceObject*/ + fn cuSurfObjectDestroy(surfObject: cuda_types::CUsurfObject) -> cuda_types::CUresult; + /** \brief Returns a surface object's resource descriptor + + Returns the resource descriptor for the surface object specified by \p surfObject. + + \param pResDesc - Resource descriptor + \param surfObject - Surface object + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE + + \sa + ::cuSurfObjectCreate, + ::cudaGetSurfaceObjectResourceDesc*/ + fn cuSurfObjectGetResourceDesc( + pResDesc: *mut cuda_types::CUDA_RESOURCE_DESC, + surfObject: cuda_types::CUsurfObject, + ) -> cuda_types::CUresult; + /** \brief Create a tensor map descriptor object representing tiled memory region + + Creates a descriptor for Tensor Memory Access (TMA) object specified + by the parameters describing a tiled region and returns it in \p tensorMap. + + Tensor map objects are only supported on devices of compute capability 9.0 or higher. + Additionally, a tensor map object is an opaque value, and, as such, should only be + accessed through CUDA API calls. + + The parameters passed are bound to the following requirements: + + - \p tensorMap address must be aligned to 64 bytes. + + - \p tensorDataType has to be an enum from ::CUtensorMapDataType which is defined as: + \code +typedef enum CUtensorMapDataType_enum { +CU_TENSOR_MAP_DATA_TYPE_UINT8 = 0, // 1 byte +CU_TENSOR_MAP_DATA_TYPE_UINT16, // 2 bytes +CU_TENSOR_MAP_DATA_TYPE_UINT32, // 4 bytes +CU_TENSOR_MAP_DATA_TYPE_INT32, // 4 bytes +CU_TENSOR_MAP_DATA_TYPE_UINT64, // 8 bytes +CU_TENSOR_MAP_DATA_TYPE_INT64, // 8 bytes +CU_TENSOR_MAP_DATA_TYPE_FLOAT16, // 2 bytes +CU_TENSOR_MAP_DATA_TYPE_FLOAT32, // 4 bytes +CU_TENSOR_MAP_DATA_TYPE_FLOAT64, // 8 bytes +CU_TENSOR_MAP_DATA_TYPE_BFLOAT16, // 2 bytes +CU_TENSOR_MAP_DATA_TYPE_FLOAT32_FTZ, // 4 bytes +CU_TENSOR_MAP_DATA_TYPE_TFLOAT32, // 4 bytes +CU_TENSOR_MAP_DATA_TYPE_TFLOAT32_FTZ // 4 bytes +} CUtensorMapDataType; + \endcode + + - \p tensorRank must be non-zero and less than or equal to the maximum supported dimensionality of 5. If \p interleave is not + ::CU_TENSOR_MAP_INTERLEAVE_NONE, then \p tensorRank must additionally be greater than or equal to 3. + + - \p globalAddress, which specifies the starting address of the memory region described, must be 32 byte aligned when \p interleave is + ::CU_TENSOR_MAP_INTERLEAVE_32B and 16 byte aligned otherwise. + + - \p globalDim array, which specifies tensor size of each of the \p tensorRank dimensions, must be non-zero and less than or + equal to 2^32. + + - \p globalStrides array, which specifies tensor stride of each of the lower \p tensorRank - 1 dimensions in bytes, must be a + multiple of 16 and less than 2^40. Additionally, the stride must be a multiple of 32 when \p interleave is ::CU_TENSOR_MAP_INTERLEAVE_32B. + Each following dimension specified includes previous dimension stride: + \code +globalStrides[0] = globalDim[0] * elementSizeInBytes(tensorDataType) + padding[0]; +for (i = 1; i < tensorRank - 1; i++) +globalStrides[i] = globalStrides[i – 1] * (globalDim[i] + padding[i]); +assert(globalStrides[i] >= globalDim[i]); + \endcode + + - \p boxDim array, which specifies number of elements to be traversed along each of the \p tensorRank dimensions, must be non-zero + and less than or equal to 256. + When \p interleave is ::CU_TENSOR_MAP_INTERLEAVE_NONE, { \p boxDim[0] * elementSizeInBytes( \p tensorDataType ) } must be a multiple + of 16 bytes. + + - \p elementStrides array, which specifies the iteration step along each of the \p tensorRank dimensions, must be non-zero and less + than or equal to 8. Note that when \p interleave is ::CU_TENSOR_MAP_INTERLEAVE_NONE, the first element of this array is ignored since + TMA doesn’t support the stride for dimension zero. + When all elements of \p elementStrides array is one, \p boxDim specifies the number of elements to load. However, if the \p elementStrides[i] + is not equal to one, then TMA loads ceil( \p boxDim[i] / \p elementStrides[i]) number of elements along i-th dimension. To load N elements along + i-th dimension, \p boxDim[i] must be set to N * \p elementStrides[i]. + + - \p interleave specifies the interleaved layout of type ::CUtensorMapInterleave, which is defined as: + \code +typedef enum CUtensorMapInterleave_enum { +CU_TENSOR_MAP_INTERLEAVE_NONE = 0, +CU_TENSOR_MAP_INTERLEAVE_16B, +CU_TENSOR_MAP_INTERLEAVE_32B +} CUtensorMapInterleave; + \endcode + TMA supports interleaved layouts like NC/8HWC8 where C8 utilizes 16 bytes in memory assuming 2 byte per channel or NC/16HWC16 where C16 + uses 32 bytes. + When \p interleave is ::CU_TENSOR_MAP_INTERLEAVE_NONE and \p swizzle is not ::CU_TENSOR_MAP_SWIZZLE_NONE, the bounding box inner dimension + (computed as \p boxDim[0] multiplied by element size derived from \p tensorDataType) must be less than or equal to the swizzle size. + - CU_TENSOR_MAP_SWIZZLE_32B implies the bounding box inner dimension will be <= 32. + - CU_TENSOR_MAP_SWIZZLE_64B implies the bounding box inner dimension will be <= 64. + - CU_TENSOR_MAP_SWIZZLE_128B implies the bounding box inner dimension will be <= 128. + + - \p swizzle, which specifies the shared memory bank swizzling pattern, has to be of type ::CUtensorMapSwizzle which is defined as: + \code +typedef enum CUtensorMapSwizzle_enum { +CU_TENSOR_MAP_SWIZZLE_NONE = 0, +CU_TENSOR_MAP_SWIZZLE_32B, +CU_TENSOR_MAP_SWIZZLE_64B, +CU_TENSOR_MAP_SWIZZLE_128B +} CUtensorMapSwizzle; + \endcode + Data are organized in a specific order in global memory; however, this may not match the order in which the application accesses data + in shared memory. This difference in data organization may cause bank conflicts when shared memory is accessed. In order to avoid this + problem, data can be loaded to shared memory with shuffling across shared memory banks. + When \p interleave is ::CU_TENSOR_MAP_INTERLEAVE_32B, \p swizzle must be ::CU_TENSOR_MAP_SWIZZLE_32B. + Other interleave modes can have any swizzling pattern. + + - \p l2Promotion specifies L2 fetch size which indicates the byte granurality at which L2 requests is filled from DRAM. It must be of + type ::CUtensorMapL2promotion, which is defined as: + \code +typedef enum CUtensorMapL2promotion_enum { +CU_TENSOR_MAP_L2_PROMOTION_NONE = 0, +CU_TENSOR_MAP_L2_PROMOTION_L2_64B, +CU_TENSOR_MAP_L2_PROMOTION_L2_128B, +CU_TENSOR_MAP_L2_PROMOTION_L2_256B +} CUtensorMapL2promotion; + \endcode + + - \p oobFill, which indicates whether zero or a special NaN constant should be used to fill out-of-bound elements, must be of type + ::CUtensorMapFloatOOBfill which is defined as: + \code +typedef enum CUtensorMapFloatOOBfill_enum { +CU_TENSOR_MAP_FLOAT_OOB_FILL_NONE = 0, +CU_TENSOR_MAP_FLOAT_OOB_FILL_NAN_REQUEST_ZERO_FMA +} CUtensorMapFloatOOBfill; + \endcode + Note that ::CU_TENSOR_MAP_FLOAT_OOB_FILL_NAN_REQUEST_ZERO_FMA can only be used when \p tensorDataType represents a floating-point data type. + + \param tensorMap - Tensor map object to create + \param tensorDataType - Tensor data type + \param tensorRank - Dimensionality of tensor + \param globalAddress - Starting address of memory region described by tensor + \param globalDim - Array containing tensor size (number of elements) along each of the \p tensorRank dimensions + \param globalStrides - Array containing stride size (in bytes) along each of the \p tensorRank - 1 dimensions + \param boxDim - Array containing traversal box size (number of elments) along each of the \p tensorRank dimensions. Specifies how many elements to be traversed along each tensor dimension. + \param elementStrides - Array containing traversal stride in each of the \p tensorRank dimensions + \param interleave - Type of interleaved layout the tensor addresses + \param swizzle - Bank swizzling pattern inside shared memory + \param l2Promotion - L2 promotion size + \param oobFill - Indicate whether zero or special NaN constant must be used to fill out-of-bound elements + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE + + \sa + ::cuTensorMapEncodeIm2col, + ::cuTensorMapReplaceAddress*/ + fn cuTensorMapEncodeTiled( + tensorMap: *mut cuda_types::CUtensorMap, + tensorDataType: cuda_types::CUtensorMapDataType, + tensorRank: cuda_types::cuuint32_t, + globalAddress: *mut ::core::ffi::c_void, + globalDim: *const cuda_types::cuuint64_t, + globalStrides: *const cuda_types::cuuint64_t, + boxDim: *const cuda_types::cuuint32_t, + elementStrides: *const cuda_types::cuuint32_t, + interleave: cuda_types::CUtensorMapInterleave, + swizzle: cuda_types::CUtensorMapSwizzle, + l2Promotion: cuda_types::CUtensorMapL2promotion, + oobFill: cuda_types::CUtensorMapFloatOOBfill, + ) -> cuda_types::CUresult; + /** \brief Create a tensor map descriptor object representing im2col memory region + + Creates a descriptor for Tensor Memory Access (TMA) object specified + by the parameters describing a im2col memory layout and returns it in \p tensorMap. + + Tensor map objects are only supported on devices of compute capability 9.0 or higher. + Additionally, a tensor map object is an opaque value, and, as such, should only be + accessed through CUDA API calls. + + The parameters passed are bound to the following requirements: + + - \p tensorMap address must be aligned to 64 bytes. + + - \p tensorDataType has to be an enum from ::CUtensorMapDataType which is defined as: + \code +typedef enum CUtensorMapDataType_enum { +CU_TENSOR_MAP_DATA_TYPE_UINT8 = 0, // 1 byte +CU_TENSOR_MAP_DATA_TYPE_UINT16, // 2 bytes +CU_TENSOR_MAP_DATA_TYPE_UINT32, // 4 bytes +CU_TENSOR_MAP_DATA_TYPE_INT32, // 4 bytes +CU_TENSOR_MAP_DATA_TYPE_UINT64, // 8 bytes +CU_TENSOR_MAP_DATA_TYPE_INT64, // 8 bytes +CU_TENSOR_MAP_DATA_TYPE_FLOAT16, // 2 bytes +CU_TENSOR_MAP_DATA_TYPE_FLOAT32, // 4 bytes +CU_TENSOR_MAP_DATA_TYPE_FLOAT64, // 8 bytes +CU_TENSOR_MAP_DATA_TYPE_BFLOAT16, // 2 bytes +CU_TENSOR_MAP_DATA_TYPE_FLOAT32_FTZ, // 4 bytes +CU_TENSOR_MAP_DATA_TYPE_TFLOAT32, // 4 bytes +CU_TENSOR_MAP_DATA_TYPE_TFLOAT32_FTZ // 4 bytes +} CUtensorMapDataType; + \endcode + + - \p tensorRank, which specifies the number of tensor dimensions, must be 3, 4, or 5. + + - \p globalAddress, which specifies the starting address of the memory region described, must be 32 byte aligned when \p interleave is + ::CU_TENSOR_MAP_INTERLEAVE_32B and 16 byte aligned otherwise. + + - \p globalDim array, which specifies tensor size of each of the \p tensorRank dimensions, must be non-zero and less than or + equal to 2^32. + + - \p globalStrides array, which specifies tensor stride of each of the lower \p tensorRank - 1 dimensions in bytes, must be a + multiple of 16 and less than 2^40. Additionally, the stride must be a multiple of 32 when \p interleave is ::CU_TENSOR_MAP_INTERLEAVE_32B. + Each following dimension specified includes previous dimension stride: + \code +globalStrides[0] = globalDim[0] * elementSizeInBytes(tensorDataType) + padding[0]; +for (i = 1; i < tensorRank - 1; i++) +globalStrides[i] = globalStrides[i – 1] * (globalDim[i] + padding[i]); +assert(globalStrides[i] >= globalDim[i]); + \endcode + + - \p pixelBoxLowerCorner array specifies the coordinate offsets {D, H, W} of the bounding box from top/left/front corner. The number of + offsets and their precision depend on the tensor dimensionality: + - When \p tensorRank is 3, one signed offset within range [-32768, 32767] is supported. + - When \p tensorRank is 4, two signed offsets each within range [-128, 127] are supported. + - When \p tensorRank is 5, three offsets each within range [-16, 15] are supported. + + - \p pixelBoxUpperCorner array specifies the coordinate offsets {D, H, W} of the bounding box from bottom/right/back corner. The number of + offsets and their precision depend on the tensor dimensionality: + - When \p tensorRank is 3, one signed offset within range [-32768, 32767] is supported. + - When \p tensorRank is 4, two signed offsets each within range [-128, 127] are supported. + - When \p tensorRank is 5, three offsets each within range [-16, 15] are supported. + The bounding box specified by \p pixelBoxLowerCorner and \p pixelBoxUpperCorner must have non-zero area. + + - \p channelsPerPixel, which specifies the number of elements which must be accessed along C dimension, must be less than or equal to 256. + + - \p pixelsPerColumn, which specifies the number of elements that must be accessed along the {N, D, H, W} dimensions, must be less than or + equal to 1024. + + - \p elementStrides array, which specifies the iteration step along each of the \p tensorRank dimensions, must be non-zero and less + than or equal to 8. Note that when \p interleave is ::CU_TENSOR_MAP_INTERLEAVE_NONE, the first element of this array is ignored since + TMA doesn’t support the stride for dimension zero. + When all elements of the \p elementStrides array are one, \p boxDim specifies the number of elements to load. However, if \p elementStrides[i] + is not equal to one for some \p i, then TMA loads ceil( \p boxDim[i] / \p elementStrides[i]) number of elements along i-th dimension. + To load N elements along i-th dimension, \p boxDim[i] must be set to N * \p elementStrides[i]. + + - \p interleave specifies the interleaved layout of type ::CUtensorMapInterleave, which is defined as: + \code +typedef enum CUtensorMapInterleave_enum { +CU_TENSOR_MAP_INTERLEAVE_NONE = 0, +CU_TENSOR_MAP_INTERLEAVE_16B, +CU_TENSOR_MAP_INTERLEAVE_32B +} CUtensorMapInterleave; + \endcode + TMA supports interleaved layouts like NC/8HWC8 where C8 utilizes 16 bytes in memory assuming 2 byte per channel or NC/16HWC16 where C16 + uses 32 bytes. + When \p interleave is ::CU_TENSOR_MAP_INTERLEAVE_NONE and \p swizzle is not ::CU_TENSOR_MAP_SWIZZLE_NONE, the bounding box inner dimension + (computed as \p boxDim[0] multiplied by element size derived from \p tensorDataType) must be less than or equal to the swizzle size. + - CU_TENSOR_MAP_SWIZZLE_32B implies the bounding box inner dimension will be <= 32. + - CU_TENSOR_MAP_SWIZZLE_64B implies the bounding box inner dimension will be <= 64. + - CU_TENSOR_MAP_SWIZZLE_128B implies the bounding box inner dimension will be <= 128. + + - \p swizzle, which specifies the shared memory bank swizzling pattern, has to be of type ::CUtensorMapSwizzle which is defined as: + \code +typedef enum CUtensorMapSwizzle_enum { +CU_TENSOR_MAP_SWIZZLE_NONE = 0, +CU_TENSOR_MAP_SWIZZLE_32B, +CU_TENSOR_MAP_SWIZZLE_64B, +CU_TENSOR_MAP_SWIZZLE_128B +} CUtensorMapSwizzle; + \endcode + Data are organized in a specific order in global memory; however, this may not match the order in which the application accesses data + in shared memory. This difference in data organization may cause bank conflicts when shared memory is accessed. In order to avoid this + problem, data can be loaded to shared memory with shuffling across shared memory banks. + When \p interleave is ::CU_TENSOR_MAP_INTERLEAVE_32B, \p swizzle must be ::CU_TENSOR_MAP_SWIZZLE_32B. + Other interleave modes can have any swizzling pattern. + + - \p l2Promotion specifies L2 fetch size which indicates the byte granularity at which L2 requests are filled from DRAM. It must be of + type ::CUtensorMapL2promotion, which is defined as: + \code +typedef enum CUtensorMapL2promotion_enum { +CU_TENSOR_MAP_L2_PROMOTION_NONE = 0, +CU_TENSOR_MAP_L2_PROMOTION_L2_64B, +CU_TENSOR_MAP_L2_PROMOTION_L2_128B, +CU_TENSOR_MAP_L2_PROMOTION_L2_256B +} CUtensorMapL2promotion; + \endcode + + - \p oobFill, which indicates whether zero or a special NaN constant should be used to fill out-of-bound elements, must be of type + ::CUtensorMapFloatOOBfill which is defined as: + \code +typedef enum CUtensorMapFloatOOBfill_enum { +CU_TENSOR_MAP_FLOAT_OOB_FILL_NONE = 0, +CU_TENSOR_MAP_FLOAT_OOB_FILL_NAN_REQUEST_ZERO_FMA +} CUtensorMapFloatOOBfill; + \endcode + Note that ::CU_TENSOR_MAP_FLOAT_OOB_FILL_NAN_REQUEST_ZERO_FMA can only be used when \p tensorDataType represents a floating-point data type. + + \param tensorMap - Tensor map object to create + \param tensorDataType - Tensor data type + \param tensorRank - Dimensionality of tensor; must be at least 3 + \param globalAddress - Starting address of memory region described by tensor + \param globalDim - Array containing tensor size (number of elements) along each of the \p tensorRank dimensions + \param globalStrides - Array containing stride size (in bytes) along each of the \p tensorRank - 1 dimensions + \param pixelBoxLowerCorner - Array containing DHW dimensions of lower box corner + \param pixelBoxUpperCorner - Array containing DHW dimensions of upper box corner + \param channelsPerPixel - Number of channels per pixel + \param pixelsPerColumn - Number of pixels per column + \param elementStrides - Array containing traversal stride in each of the \p tensorRank dimensions + \param interleave - Type of interleaved layout the tensor addresses + \param swizzle - Bank swizzling pattern inside shared memory + \param l2Promotion - L2 promotion size + \param oobFill - Indicate whether zero or special NaN constant will be used to fill out-of-bound elements + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE + + \sa + ::cuTensorMapEncodeTiled, + ::cuTensorMapReplaceAddress*/ + fn cuTensorMapEncodeIm2col( + tensorMap: *mut cuda_types::CUtensorMap, + tensorDataType: cuda_types::CUtensorMapDataType, + tensorRank: cuda_types::cuuint32_t, + globalAddress: *mut ::core::ffi::c_void, + globalDim: *const cuda_types::cuuint64_t, + globalStrides: *const cuda_types::cuuint64_t, + pixelBoxLowerCorner: *const ::core::ffi::c_int, + pixelBoxUpperCorner: *const ::core::ffi::c_int, + channelsPerPixel: cuda_types::cuuint32_t, + pixelsPerColumn: cuda_types::cuuint32_t, + elementStrides: *const cuda_types::cuuint32_t, + interleave: cuda_types::CUtensorMapInterleave, + swizzle: cuda_types::CUtensorMapSwizzle, + l2Promotion: cuda_types::CUtensorMapL2promotion, + oobFill: cuda_types::CUtensorMapFloatOOBfill, + ) -> cuda_types::CUresult; + /** \brief Modify an existing tensor map descriptor with an updated global address + + Modifies the descriptor for Tensor Memory Access (TMA) object passed in \p tensorMap with + an updated \p globalAddress. + + Tensor map objects are only supported on devices of compute capability 9.0 or higher. + Additionally, a tensor map object is an opaque value, and, as such, should only be + accessed through CUDA API calls. + + \param tensorMap - Tensor map object to modify + \param globalAddress - Starting address of memory region described by tensor, must follow previous alignment requirements + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE + + \sa + ::cuTensorMapEncodeTiled, + ::cuTensorMapEncodeIm2col*/ + fn cuTensorMapReplaceAddress( + tensorMap: *mut cuda_types::CUtensorMap, + globalAddress: *mut ::core::ffi::c_void, + ) -> cuda_types::CUresult; + /** \brief Queries if a device may directly access a peer device's memory. + + Returns in \p *canAccessPeer a value of 1 if contexts on \p dev are capable of + directly accessing memory from contexts on \p peerDev and 0 otherwise. + If direct access of \p peerDev from \p dev is possible, then access may be + enabled on two specific contexts by calling ::cuCtxEnablePeerAccess(). + + \param canAccessPeer - Returned access capability + \param dev - Device from which allocations on \p peerDev are to + be directly accessed. + \param peerDev - Device on which the allocations to be directly accessed + by \p dev reside. + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_DEVICE + \notefnerr + + \sa + ::cuCtxEnablePeerAccess, + ::cuCtxDisablePeerAccess, + ::cudaDeviceCanAccessPeer*/ + fn cuDeviceCanAccessPeer( + canAccessPeer: *mut ::core::ffi::c_int, + dev: cuda_types::CUdevice, + peerDev: cuda_types::CUdevice, + ) -> cuda_types::CUresult; + /** \brief Enables direct access to memory allocations in a peer context. + + If both the current context and \p peerContext are on devices which support unified + addressing (as may be queried using ::CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING) and same + major compute capability, then on success all allocations from \p peerContext will + immediately be accessible by the current context. See \ref CUDA_UNIFIED for additional + details. + + Note that access granted by this call is unidirectional and that in order to access + memory from the current context in \p peerContext, a separate symmetric call + to ::cuCtxEnablePeerAccess() is required. + + Note that there are both device-wide and system-wide limitations per system + configuration, as noted in the CUDA Programming Guide under the section + "Peer-to-Peer Memory Access". + + Returns ::CUDA_ERROR_PEER_ACCESS_UNSUPPORTED if ::cuDeviceCanAccessPeer() indicates + that the ::CUdevice of the current context cannot directly access memory + from the ::CUdevice of \p peerContext. + + Returns ::CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED if direct access of + \p peerContext from the current context has already been enabled. + + Returns ::CUDA_ERROR_TOO_MANY_PEERS if direct peer access is not possible + because hardware resources required for peer access have been exhausted. + + Returns ::CUDA_ERROR_INVALID_CONTEXT if there is no current context, \p peerContext + is not a valid context, or if the current context is \p peerContext. + + Returns ::CUDA_ERROR_INVALID_VALUE if \p Flags is not 0. + + \param peerContext - Peer context to enable direct access to from the current context + \param Flags - Reserved for future use and must be set to 0 + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED, + ::CUDA_ERROR_TOO_MANY_PEERS, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_PEER_ACCESS_UNSUPPORTED, + ::CUDA_ERROR_INVALID_VALUE + \notefnerr + + \sa + ::cuDeviceCanAccessPeer, + ::cuCtxDisablePeerAccess, + ::cudaDeviceEnablePeerAccess*/ + fn cuCtxEnablePeerAccess( + peerContext: cuda_types::CUcontext, + Flags: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + /** \brief Disables direct access to memory allocations in a peer context and + unregisters any registered allocations. + +Returns ::CUDA_ERROR_PEER_ACCESS_NOT_ENABLED if direct peer access has + not yet been enabled from \p peerContext to the current context. + + Returns ::CUDA_ERROR_INVALID_CONTEXT if there is no current context, or if + \p peerContext is not a valid context. + + \param peerContext - Peer context to disable direct access to + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_PEER_ACCESS_NOT_ENABLED, + ::CUDA_ERROR_INVALID_CONTEXT, + \notefnerr + + \sa + ::cuDeviceCanAccessPeer, + ::cuCtxEnablePeerAccess, + ::cudaDeviceDisablePeerAccess*/ + fn cuCtxDisablePeerAccess( + peerContext: cuda_types::CUcontext, + ) -> cuda_types::CUresult; + /** \brief Queries attributes of the link between two devices. + + Returns in \p *value the value of the requested attribute \p attrib of the + link between \p srcDevice and \p dstDevice. The supported attributes are: + - ::CU_DEVICE_P2P_ATTRIBUTE_PERFORMANCE_RANK: A relative value indicating the + performance of the link between two devices. + - ::CU_DEVICE_P2P_ATTRIBUTE_ACCESS_SUPPORTED P2P: 1 if P2P Access is enable. + - ::CU_DEVICE_P2P_ATTRIBUTE_NATIVE_ATOMIC_SUPPORTED: 1 if Atomic operations over + the link are supported. + - ::CU_DEVICE_P2P_ATTRIBUTE_CUDA_ARRAY_ACCESS_SUPPORTED: 1 if cudaArray can + be accessed over the link. + + Returns ::CUDA_ERROR_INVALID_DEVICE if \p srcDevice or \p dstDevice are not valid + or if they represent the same device. + + Returns ::CUDA_ERROR_INVALID_VALUE if \p attrib is not valid or if \p value is + a null pointer. + + \param value - Returned value of the requested attribute + \param attrib - The requested attribute of the link between \p srcDevice and \p dstDevice. + \param srcDevice - The source device of the target link. + \param dstDevice - The destination device of the target link. + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_DEVICE, + ::CUDA_ERROR_INVALID_VALUE + \notefnerr + + \sa + ::cuCtxEnablePeerAccess, + ::cuCtxDisablePeerAccess, + ::cuDeviceCanAccessPeer, + ::cudaDeviceGetP2PAttribute*/ + fn cuDeviceGetP2PAttribute( + value: *mut ::core::ffi::c_int, + attrib: cuda_types::CUdevice_P2PAttribute, + srcDevice: cuda_types::CUdevice, + dstDevice: cuda_types::CUdevice, + ) -> cuda_types::CUresult; + /** \brief Unregisters a graphics resource for access by CUDA + + Unregisters the graphics resource \p resource so it is not accessible by + CUDA unless registered again. + + If \p resource is invalid then ::CUDA_ERROR_INVALID_HANDLE is + returned. + + \param resource - Resource to unregister + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_HANDLE, + ::CUDA_ERROR_UNKNOWN + \notefnerr + + \sa + ::cuGraphicsD3D9RegisterResource, + ::cuGraphicsD3D10RegisterResource, + ::cuGraphicsD3D11RegisterResource, + ::cuGraphicsGLRegisterBuffer, + ::cuGraphicsGLRegisterImage, + ::cudaGraphicsUnregisterResource*/ + fn cuGraphicsUnregisterResource( + resource: cuda_types::CUgraphicsResource, + ) -> cuda_types::CUresult; + /** \brief Get an array through which to access a subresource of a mapped graphics resource. + + Returns in \p *pArray an array through which the subresource of the mapped + graphics resource \p resource which corresponds to array index \p arrayIndex + and mipmap level \p mipLevel may be accessed. The value set in \p *pArray may + change every time that \p resource is mapped. + + If \p resource is not a texture then it cannot be accessed via an array and + ::CUDA_ERROR_NOT_MAPPED_AS_ARRAY is returned. + If \p arrayIndex is not a valid array index for \p resource then + ::CUDA_ERROR_INVALID_VALUE is returned. + If \p mipLevel is not a valid mipmap level for \p resource then + ::CUDA_ERROR_INVALID_VALUE is returned. + If \p resource is not mapped then ::CUDA_ERROR_NOT_MAPPED is returned. + + \param pArray - Returned array through which a subresource of \p resource may be accessed + \param resource - Mapped resource to access + \param arrayIndex - Array index for array textures or cubemap face + index as defined by ::CUarray_cubemap_face for + cubemap textures for the subresource to access + \param mipLevel - Mipmap level for the subresource to access + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_HANDLE, + ::CUDA_ERROR_NOT_MAPPED, + ::CUDA_ERROR_NOT_MAPPED_AS_ARRAY + \notefnerr + + \sa + ::cuGraphicsResourceGetMappedPointer, + ::cudaGraphicsSubResourceGetMappedArray*/ + fn cuGraphicsSubResourceGetMappedArray( + pArray: *mut cuda_types::CUarray, + resource: cuda_types::CUgraphicsResource, + arrayIndex: ::core::ffi::c_uint, + mipLevel: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + /** \brief Get a mipmapped array through which to access a mapped graphics resource. + + Returns in \p *pMipmappedArray a mipmapped array through which the mapped graphics + resource \p resource. The value set in \p *pMipmappedArray may change every time + that \p resource is mapped. + + If \p resource is not a texture then it cannot be accessed via a mipmapped array and + ::CUDA_ERROR_NOT_MAPPED_AS_ARRAY is returned. + If \p resource is not mapped then ::CUDA_ERROR_NOT_MAPPED is returned. + + \param pMipmappedArray - Returned mipmapped array through which \p resource may be accessed + \param resource - Mapped resource to access + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_HANDLE, + ::CUDA_ERROR_NOT_MAPPED, + ::CUDA_ERROR_NOT_MAPPED_AS_ARRAY + \notefnerr + + \sa + ::cuGraphicsResourceGetMappedPointer, + ::cudaGraphicsResourceGetMappedMipmappedArray*/ + fn cuGraphicsResourceGetMappedMipmappedArray( + pMipmappedArray: *mut cuda_types::CUmipmappedArray, + resource: cuda_types::CUgraphicsResource, + ) -> cuda_types::CUresult; + /** \brief Get a device pointer through which to access a mapped graphics resource. + + Returns in \p *pDevPtr a pointer through which the mapped graphics resource + \p resource may be accessed. + Returns in \p pSize the size of the memory in bytes which may be accessed from that pointer. + The value set in \p pPointer may change every time that \p resource is mapped. + + If \p resource is not a buffer then it cannot be accessed via a pointer and + ::CUDA_ERROR_NOT_MAPPED_AS_POINTER is returned. + If \p resource is not mapped then ::CUDA_ERROR_NOT_MAPPED is returned. + * + \param pDevPtr - Returned pointer through which \p resource may be accessed + \param pSize - Returned size of the buffer accessible starting at \p *pPointer + \param resource - Mapped resource to access + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_HANDLE, + ::CUDA_ERROR_NOT_MAPPED, + ::CUDA_ERROR_NOT_MAPPED_AS_POINTER + \notefnerr + + \sa + ::cuGraphicsMapResources, + ::cuGraphicsSubResourceGetMappedArray, + ::cudaGraphicsResourceGetMappedPointer*/ + fn cuGraphicsResourceGetMappedPointer_v2( + pDevPtr: *mut cuda_types::CUdeviceptr, + pSize: *mut usize, + resource: cuda_types::CUgraphicsResource, + ) -> cuda_types::CUresult; + /** \brief Set usage flags for mapping a graphics resource + + Set \p flags for mapping the graphics resource \p resource. + + Changes to \p flags will take effect the next time \p resource is mapped. + The \p flags argument may be any of the following: + + - ::CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE: Specifies no hints about how this + resource will be used. It is therefore assumed that this resource will be + read from and written to by CUDA kernels. This is the default value. + - ::CU_GRAPHICS_MAP_RESOURCE_FLAGS_READONLY: Specifies that CUDA kernels which + access this resource will not write to this resource. + - ::CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITEDISCARD: Specifies that CUDA kernels + which access this resource will not read from this resource and will + write over the entire contents of the resource, so none of the data + previously stored in the resource will be preserved. + + If \p resource is presently mapped for access by CUDA then + ::CUDA_ERROR_ALREADY_MAPPED is returned. + If \p flags is not one of the above values then ::CUDA_ERROR_INVALID_VALUE is returned. + + \param resource - Registered resource to set flags for + \param flags - Parameters for resource mapping + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_HANDLE, + ::CUDA_ERROR_ALREADY_MAPPED + \notefnerr + + \sa + ::cuGraphicsMapResources, + ::cudaGraphicsResourceSetMapFlags*/ + fn cuGraphicsResourceSetMapFlags_v2( + resource: cuda_types::CUgraphicsResource, + flags: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + /** \brief Map graphics resources for access by CUDA + + Maps the \p count graphics resources in \p resources for access by CUDA. + + The resources in \p resources may be accessed by CUDA until they + are unmapped. The graphics API from which \p resources were registered + should not access any resources while they are mapped by CUDA. If an + application does so, the results are undefined. + + This function provides the synchronization guarantee that any graphics calls + issued before ::cuGraphicsMapResources() will complete before any subsequent CUDA + work issued in \p stream begins. + + If \p resources includes any duplicate entries then ::CUDA_ERROR_INVALID_HANDLE is returned. + If any of \p resources are presently mapped for access by CUDA then ::CUDA_ERROR_ALREADY_MAPPED is returned. + + \param count - Number of resources to map + \param resources - Resources to map for CUDA usage + \param hStream - Stream with which to synchronize + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_HANDLE, + ::CUDA_ERROR_ALREADY_MAPPED, + ::CUDA_ERROR_UNKNOWN + \note_null_stream + \notefnerr + + \sa + ::cuGraphicsResourceGetMappedPointer, + ::cuGraphicsSubResourceGetMappedArray, + ::cuGraphicsUnmapResources, + ::cudaGraphicsMapResources*/ + fn cuGraphicsMapResources_ptsz( + count: ::core::ffi::c_uint, + resources: *mut cuda_types::CUgraphicsResource, + hStream: cuda_types::CUstream, + ) -> cuda_types::CUresult; + /** \brief Unmap graphics resources. + + Unmaps the \p count graphics resources in \p resources. + + Once unmapped, the resources in \p resources may not be accessed by CUDA + until they are mapped again. + + This function provides the synchronization guarantee that any CUDA work issued + in \p stream before ::cuGraphicsUnmapResources() will complete before any + subsequently issued graphics work begins. + + + If \p resources includes any duplicate entries then ::CUDA_ERROR_INVALID_HANDLE is returned. + If any of \p resources are not presently mapped for access by CUDA then ::CUDA_ERROR_NOT_MAPPED is returned. + + \param count - Number of resources to unmap + \param resources - Resources to unmap + \param hStream - Stream with which to synchronize + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_HANDLE, + ::CUDA_ERROR_NOT_MAPPED, + ::CUDA_ERROR_UNKNOWN + \note_null_stream + \notefnerr + + \sa + ::cuGraphicsMapResources, + ::cudaGraphicsUnmapResources*/ + fn cuGraphicsUnmapResources_ptsz( + count: ::core::ffi::c_uint, + resources: *mut cuda_types::CUgraphicsResource, + hStream: cuda_types::CUstream, + ) -> cuda_types::CUresult; + /** \brief Returns the requested driver API function pointer + + Returns in \p **pfn the address of the CUDA driver function for the requested + CUDA version and flags. + + The CUDA version is specified as (1000 * major + 10 * minor), so CUDA 11.2 + should be specified as 11020. For a requested driver symbol, if the specified + CUDA version is greater than or equal to the CUDA version in which the driver symbol + was introduced, this API will return the function pointer to the corresponding + versioned function. + + The pointer returned by the API should be cast to a function pointer matching the + requested driver function's definition in the API header file. The function pointer + typedef can be picked up from the corresponding typedefs header file. For example, + cudaTypedefs.h consists of function pointer typedefs for driver APIs defined in cuda.h. + + The API will return ::CUDA_SUCCESS and set the returned \p pfn to NULL if the + requested driver function is not supported on the platform, no ABI + compatible driver function exists for the specified \p cudaVersion or if the + driver symbol is invalid. + + It will also set the optional \p symbolStatus to one of the values in + ::CUdriverProcAddressQueryResult with the following meanings: + - ::CU_GET_PROC_ADDRESS_SUCCESS - The requested symbol was succesfully found based + on input arguments and \p pfn is valid + - ::CU_GET_PROC_ADDRESS_SYMBOL_NOT_FOUND - The requested symbol was not found + - ::CU_GET_PROC_ADDRESS_VERSION_NOT_SUFFICIENT - The requested symbol was found but is + not supported by cudaVersion specified + + The requested flags can be: + - ::CU_GET_PROC_ADDRESS_DEFAULT: This is the default mode. This is equivalent to + ::CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM if the code is compiled with + --default-stream per-thread compilation flag or the macro CUDA_API_PER_THREAD_DEFAULT_STREAM + is defined; ::CU_GET_PROC_ADDRESS_LEGACY_STREAM otherwise. + - ::CU_GET_PROC_ADDRESS_LEGACY_STREAM: This will enable the search for all driver symbols + that match the requested driver symbol name except the corresponding per-thread versions. + - ::CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM: This will enable the search for all + driver symbols that match the requested driver symbol name including the per-thread + versions. If a per-thread version is not found, the API will return the legacy version + of the driver function. + + \param symbol - The base name of the driver API function to look for. As an example, + for the driver API ::cuMemAlloc_v2, \p symbol would be cuMemAlloc and + \p cudaVersion would be the ABI compatible CUDA version for the _v2 variant. + \param pfn - Location to return the function pointer to the requested driver function + \param cudaVersion - The CUDA version to look for the requested driver symbol + \param flags - Flags to specify search options. + \param symbolStatus - Optional location to store the status of the search for + \p symbol based on \p cudaVersion. See ::CUdriverProcAddressQueryResult + for possible values. + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_NOT_SUPPORTED + \note_version_mixing + + \sa + ::cudaGetDriverEntryPoint*/ + fn cuGetProcAddress_v2( + symbol: *const ::core::ffi::c_char, + pfn: *mut *mut ::core::ffi::c_void, + cudaVersion: ::core::ffi::c_int, + flags: cuda_types::cuuint64_t, + symbolStatus: *mut cuda_types::CUdriverProcAddressQueryResult, + ) -> cuda_types::CUresult; + /** \brief Allows caller to fetch a coredump attribute value for the current context + + Returns in \p *value the requested value specified by \p attrib. It is up to the caller + to ensure that the data type and size of \p *value matches the request. + + If the caller calls this function with \p *value equal to NULL, the size of the memory + region (in bytes) expected for \p attrib will be placed in \p size. + + The supported attributes are: + - ::CU_COREDUMP_ENABLE_ON_EXCEPTION: Bool where ::true means that GPU exceptions from + this context will create a coredump at the location specified by ::CU_COREDUMP_FILE. + The default value is ::false unless set to ::true globally or locally, or the + CU_CTX_USER_COREDUMP_ENABLE flag was set during context creation. + - ::CU_COREDUMP_TRIGGER_HOST: Bool where ::true means that the host CPU will + also create a coredump. The default value is ::true unless set to ::false globally or + or locally. + - ::CU_COREDUMP_LIGHTWEIGHT: Bool where ::true means that any resulting coredumps + will not have a dump of GPU memory or non-reloc ELF images. The default value is + ::false unless set to ::true globally or locally. + - ::CU_COREDUMP_ENABLE_USER_TRIGGER: Bool where ::true means that a coredump can be + created by writing to the system pipe specified by ::CU_COREDUMP_PIPE. The default + value is ::false unless set to ::true globally or locally. + - ::CU_COREDUMP_FILE: String of up to 1023 characters that defines the location where + any coredumps generated by this context will be written. The default value is + ::core.cuda.HOSTNAME.PID where ::HOSTNAME is the host name of the machine running + the CUDA applications and ::PID is the process ID of the CUDA application. + - ::CU_COREDUMP_PIPE: String of up to 1023 characters that defines the name of the pipe + that will be monitored if user-triggered coredumps are enabled. The default value is + ::corepipe.cuda.HOSTNAME.PID where ::HOSTNAME is the host name of the machine running + the CUDA application and ::PID is the process ID of the CUDA application. + + \param attrib - The enum defining which value to fetch. + \param value - void* containing the requested data. + \param size - The size of the memory region \p value points to. + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_NOT_PERMITTED, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_CONTEXT_IS_DESTROYED + + \sa + ::cuCoredumpGetAttributeGlobal, + ::cuCoredumpSetAttribute, + ::cuCoredumpSetAttributeGlobal*/ + fn cuCoredumpGetAttribute( + attrib: cuda_types::CUcoredumpSettings, + value: *mut ::core::ffi::c_void, + size: *mut usize, + ) -> cuda_types::CUresult; + /** \brief Allows caller to fetch a coredump attribute value for the entire application + + Returns in \p *value the requested value specified by \p attrib. It is up to the caller + to ensure that the data type and size of \p *value matches the request. + + If the caller calls this function with \p *value equal to NULL, the size of the memory + region (in bytes) expected for \p attrib will be placed in \p size. + + The supported attributes are: + - ::CU_COREDUMP_ENABLE_ON_EXCEPTION: Bool where ::true means that GPU exceptions from + this context will create a coredump at the location specified by ::CU_COREDUMP_FILE. + The default value is ::false. + - ::CU_COREDUMP_TRIGGER_HOST: Bool where ::true means that the host CPU will + also create a coredump. The default value is ::true. + - ::CU_COREDUMP_LIGHTWEIGHT: Bool where ::true means that any resulting coredumps + will not have a dump of GPU memory or non-reloc ELF images. The default value is + ::false. + - ::CU_COREDUMP_ENABLE_USER_TRIGGER: Bool where ::true means that a coredump can be + created by writing to the system pipe specified by ::CU_COREDUMP_PIPE. The default + value is ::false. + - ::CU_COREDUMP_FILE: String of up to 1023 characters that defines the location where + any coredumps generated by this context will be written. The default value is + ::core.cuda.HOSTNAME.PID where ::HOSTNAME is the host name of the machine running + the CUDA applications and ::PID is the process ID of the CUDA application. + - ::CU_COREDUMP_PIPE: String of up to 1023 characters that defines the name of the pipe + that will be monitored if user-triggered coredumps are enabled. The default value is + ::corepipe.cuda.HOSTNAME.PID where ::HOSTNAME is the host name of the machine running + the CUDA application and ::PID is the process ID of the CUDA application. + + \param attrib - The enum defining which value to fetch. + \param value - void* containing the requested data. + \param size - The size of the memory region \p value points to. + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE + + \sa + ::cuCoredumpGetAttribute, + ::cuCoredumpSetAttribute, + ::cuCoredumpSetAttributeGlobal*/ + fn cuCoredumpGetAttributeGlobal( + attrib: cuda_types::CUcoredumpSettings, + value: *mut ::core::ffi::c_void, + size: *mut usize, + ) -> cuda_types::CUresult; + /** \brief Allows caller to set a coredump attribute value for the current context + + This function should be considered an alternate interface to the CUDA-GDB environment + variables defined in this document: https://docs.nvidia.com/cuda/cuda-gdb/index.html#gpu-coredump + + An important design decision to note is that any coredump environment variable values + set before CUDA initializes will take permanent precedence over any values set with this + this function. This decision was made to ensure no change in behavior for any users that + may be currently using these variables to get coredumps. + + \p *value shall contain the requested value specified by \p set. It is up to the caller + to ensure that the data type and size of \p *value matches the request. + + If the caller calls this function with \p *value equal to NULL, the size of the memory + region (in bytes) expected for \p set will be placed in \p size. + + /note This function will return ::CUDA_ERROR_NOT_SUPPORTED if the caller attempts to set + ::CU_COREDUMP_ENABLE_ON_EXCEPTION on a GPU of with Compute Capability < 6.0. ::cuCoredumpSetAttributeGlobal + works on those platforms as an alternative. + + /note ::CU_COREDUMP_ENABLE_USER_TRIGGER and ::CU_COREDUMP_PIPE cannot be set on a per-context basis. + + The supported attributes are: + - ::CU_COREDUMP_ENABLE_ON_EXCEPTION: Bool where ::true means that GPU exceptions from + this context will create a coredump at the location specified by ::CU_COREDUMP_FILE. + The default value is ::false. + - ::CU_COREDUMP_TRIGGER_HOST: Bool where ::true means that the host CPU will + also create a coredump. The default value is ::true. + - ::CU_COREDUMP_LIGHTWEIGHT: Bool where ::true means that any resulting coredumps + will not have a dump of GPU memory or non-reloc ELF images. The default value is + ::false. + - ::CU_COREDUMP_FILE: String of up to 1023 characters that defines the location where + any coredumps generated by this context will be written. The default value is + ::core.cuda.HOSTNAME.PID where ::HOSTNAME is the host name of the machine running + the CUDA applications and ::PID is the process ID of the CUDA application. + + \param attrib - The enum defining which value to set. + \param value - void* containing the requested data. + \param size - The size of the memory region \p value points to. + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_NOT_PERMITTED, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_CONTEXT_IS_DESTROYED, + ::CUDA_ERROR_NOT_SUPPORTED + + \sa + ::cuCoredumpGetAttributeGlobal, + ::cuCoredumpGetAttribute, + ::cuCoredumpSetAttributeGlobal*/ + fn cuCoredumpSetAttribute( + attrib: cuda_types::CUcoredumpSettings, + value: *mut ::core::ffi::c_void, + size: *mut usize, + ) -> cuda_types::CUresult; + /** \brief Allows caller to set a coredump attribute value globally + + This function should be considered an alternate interface to the CUDA-GDB environment + variables defined in this document: https://docs.nvidia.com/cuda/cuda-gdb/index.html#gpu-coredump + + An important design decision to note is that any coredump environment variable values + set before CUDA initializes will take permanent precedence over any values set with this + this function. This decision was made to ensure no change in behavior for any users that + may be currently using these variables to get coredumps. + + \p *value shall contain the requested value specified by \p set. It is up to the caller + to ensure that the data type and size of \p *value matches the request. + + If the caller calls this function with \p *value equal to NULL, the size of the memory + region (in bytes) expected for \p set will be placed in \p size. + + The supported attributes are: + - ::CU_COREDUMP_ENABLE_ON_EXCEPTION: Bool where ::true means that GPU exceptions from + this context will create a coredump at the location specified by ::CU_COREDUMP_FILE. + The default value is ::false. + - ::CU_COREDUMP_TRIGGER_HOST: Bool where ::true means that the host CPU will + also create a coredump. The default value is ::true. + - ::CU_COREDUMP_LIGHTWEIGHT: Bool where ::true means that any resulting coredumps + will not have a dump of GPU memory or non-reloc ELF images. The default value is + ::false. + - ::CU_COREDUMP_ENABLE_USER_TRIGGER: Bool where ::true means that a coredump can be + created by writing to the system pipe specified by ::CU_COREDUMP_PIPE. The default + value is ::false. + - ::CU_COREDUMP_FILE: String of up to 1023 characters that defines the location where + any coredumps generated by this context will be written. The default value is + ::core.cuda.HOSTNAME.PID where ::HOSTNAME is the host name of the machine running + the CUDA applications and ::PID is the process ID of the CUDA application. + - ::CU_COREDUMP_PIPE: String of up to 1023 characters that defines the name of the pipe + that will be monitored if user-triggered coredumps are enabled. This value may not be + changed after ::CU_COREDUMP_ENABLE_USER_TRIGGER is set to ::true. The default + value is ::corepipe.cuda.HOSTNAME.PID where ::HOSTNAME is the host name of the machine + running the CUDA application and ::PID is the process ID of the CUDA application. + + \param attrib - The enum defining which value to set. + \param value - void* containing the requested data. + \param size - The size of the memory region \p value points to. + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_NOT_PERMITTED + + \sa + ::cuCoredumpGetAttribute, + ::cuCoredumpGetAttributeGlobal, + ::cuCoredumpSetAttribute*/ + fn cuCoredumpSetAttributeGlobal( + attrib: cuda_types::CUcoredumpSettings, + value: *mut ::core::ffi::c_void, + size: *mut usize, + ) -> cuda_types::CUresult; + /// @} + fn cuGetExportTable( + ppExportTable: *mut *const ::core::ffi::c_void, + pExportTableId: *const cuda_types::CUuuid, + ) -> cuda_types::CUresult; + /** \brief Creates a green context with a specified set of resources. + + This API creates a green context with the resources specified in the descriptor \p desc and + returns it in the handle represented by \p phCtx. This API will retain the primary context on device \p dev, + which will is released when the green context is destroyed. It is advised to have the primary context active + before calling this API to avoid the heavy cost of triggering primary context initialization and + deinitialization multiple times. + + The API does not set the green context current. In order to set it current, you need to explicitly set it current + by first converting the green context to a CUcontext using ::cuCtxFromGreenCtx and subsequently calling + ::cuCtxSetCurrent / ::cuCtxPushCurrent. It should be noted that a green context can be current to only one + thread at a time. There is no internal synchronization to make API calls accessing the same green context + from multiple threads work. + + Note: The API is not supported on 32-bit platforms. + + \param phCtx - Pointer for the output handle to the green context + \param desc - Descriptor generated via ::cuDevResourceGenerateDesc which contains the set of resources to be used + \param dev - Device on which to create the green context. + \param flags - One of the supported green context creation flags. \p CU_GREEN_CTX_DEFAULT_STREAM is required. + + The supported flags are: + - \p CU_GREEN_CTX_DEFAULT_STREAM : Creates a default stream to use inside the green context. Required. + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_DEVICE, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_NOT_SUPPORTED, + ::CUDA_ERROR_OUT_OF_MEMORY + + \sa + ::cuGreenCtxDestroy, + ::cuCtxFromGreenCtx, + ::cuCtxSetCurrent, + ::cuCtxPushCurrent, + ::cuDevResourceGenerateDesc, + ::cuDevicePrimaryCtxRetain, + ::cuCtxCreate, + ::cuCtxCreate_v3*/ + fn cuGreenCtxCreate( + phCtx: *mut cuda_types::CUgreenCtx, + desc: cuda_types::CUdevResourceDesc, + dev: cuda_types::CUdevice, + flags: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + /** \brief Destroys a green context + + Destroys the green context, releasing the primary context of the device that this green context was created for. + Any resources provisioned for this green context (that were initially available via the resource descriptor) + are released as well. + \param hCtx - Green context to be destroyed + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_CONTEXT_IS_DESTROYED + + \sa + ::cuGreenCtxCreate, + ::cuCtxDestroy*/ + fn cuGreenCtxDestroy(hCtx: cuda_types::CUgreenCtx) -> cuda_types::CUresult; + /** \brief Converts a green context into the primary context + + The API converts a green context into the primary context returned in \p pContext. It is important + to note that the converted context \p pContext is a normal primary context but with + the resources of the specified green context \p hCtx. Once converted, it can then + be used to set the context current with ::cuCtxSetCurrent or with any of the CUDA APIs + that accept a CUcontext parameter. + + Users are expected to call this API before calling any CUDA APIs that accept a + CUcontext. Failing to do so will result in the APIs returning ::CUDA_ERROR_INVALID_CONTEXT. + + \param pContext Returned primary context with green context resources + \param hCtx Green context to convert + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE + + \sa + ::cuGreenCtxCreate*/ + fn cuCtxFromGreenCtx( + pContext: *mut cuda_types::CUcontext, + hCtx: cuda_types::CUgreenCtx, + ) -> cuda_types::CUresult; + /** \brief Get device resources + + Get the \p type resources available to the \p device. + This may often be the starting point for further partitioning or configuring of resources. + + Note: The API is not supported on 32-bit platforms. + + \param device - Device to get resource for + \param resource - Output pointer to a CUdevResource structure + \param type - Type of resource to retrieve + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_RESOURCE_TYPE, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_DEVICE + + \sa + ::cuDevResourceGenerateDesc*/ + fn cuDeviceGetDevResource( + device: cuda_types::CUdevice, + resource: *mut cuda_types::CUdevResource, + type_: cuda_types::CUdevResourceType, + ) -> cuda_types::CUresult; + /** \brief Get context resources + + Get the \p type resources available to the context represented by \p hCtx + \param hCtx - Context to get resource for + + Note: The API is not supported on 32-bit platforms. + + \param resource - Output pointer to a CUdevResource structure + \param type - Type of resource to retrieve + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_RESOURCE_TYPE, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_CONTEXT + + \sa + ::cuDevResourceGenerateDesc*/ + fn cuCtxGetDevResource( + hCtx: cuda_types::CUcontext, + resource: *mut cuda_types::CUdevResource, + type_: cuda_types::CUdevResourceType, + ) -> cuda_types::CUresult; + /** \brief Get green context resources + + Get the \p type resources available to the green context represented by \p hCtx + \param hCtx - Green context to get resource for + \param resource - Output pointer to a CUdevResource structure + \param type - Type of resource to retrieve + + \return + ::CUDA_SUCCESS + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_RESOURCE_TYPE, + ::CUDA_ERROR_INVALID_VALUE + + \sa + ::cuDevResourceGenerateDesc*/ + fn cuGreenCtxGetDevResource( + hCtx: cuda_types::CUgreenCtx, + resource: *mut cuda_types::CUdevResource, + type_: cuda_types::CUdevResourceType, + ) -> cuda_types::CUresult; + /** \brief Splits \p CU_DEV_RESOURCE_TYPE_SM resources. + + Splits \p CU_DEV_RESOURCE_TYPE_SM resources into \p nbGroups, adhering to the minimum SM count specified in \p minCount + and the usage flags in \p useFlags. If \p result is NULL, the API simulates a split and provides the amount of groups that + would be created in \p nbGroups. Otherwise, \p nbGroups must point to the amount of elements in \p result and on return, + the API will overwrite \p nbGroups with the amount actually created. The groups are written to the array in \p result. + \p nbGroups can be less than the total amount if a smaller number of groups is needed. + + This API is used to spatially partition the input resource. The input resource needs to come from one of + ::cuDeviceGetDevResource, ::cuCtxGetDevResource, or ::cuGreenCtxGetDevResource. + A limitation of the API is that the output results cannot be split again without + first creating a descriptor and a green context with that descriptor. + + When creating the groups, the API will take into account the performance and functional characteristics of the + input resource, and guarantee a split that will create a disjoint set of symmetrical partitions. This may lead to less groups created + than purely dividing the total SM count by the \p minCount due to cluster requirements or + alignment and granularity requirements for the minCount. + + The \p remainder set, might not have the same functional or performance guarantees as the groups in \p result. + Its use should be carefully planned and future partitions of the \p remainder set are discouraged. + + A successful API call must either have: + - A valid array of \p result pointers of size passed in \p nbGroups, with \p Input of type \p CU_DEV_RESOURCE_TYPE_SM. + Value of \p minCount must be between 0 and the SM count specified in \p input. \p remaining and \p useFlags are optional. + - NULL passed in for \p result, with a valid integer pointer in \p nbGroups and \p Input of type \p CU_DEV_RESOURCE_TYPE_SM. + Value of \p minCount must be between 0 and the SM count specified in \p input. + This queries the number of groups that would be created by the API. + + Note: The API is not supported on 32-bit platforms. + + \param result - Output array of \p CUdevResource resources. Can be NULL to query the number of groups. + \param nbGroups - This is a pointer, specifying the number of groups that would be or should be created as described below. + \param input - Input SM resource to be split. Must be a valid \p CU_DEV_RESOURCE_TYPE_SM resource. + \param remaining - If the input resource cannot be cleanly split among \p nbGroups, the remaining is placed in here. + Can be ommitted (NULL) if the user does not need the remaining set. + \param useFlags - Flags specifying how these partitions are used or which constraints to abide by when splitting the input. + \param minCount - Minimum number of SMs required + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_DEVICE, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_RESOURCE_TYPE, + ::CUDA_ERROR_INVALID_RESOURCE_CONFIGURATION + + \sa + ::cuGreenCtxGetDevResource, + ::cuCtxGetDevResource, + ::cuDeviceGetDevResource*/ + fn cuDevSmResourceSplitByCount( + result: *mut cuda_types::CUdevResource, + nbGroups: *mut ::core::ffi::c_uint, + input: *const cuda_types::CUdevResource, + remaining: *mut cuda_types::CUdevResource, + useFlags: ::core::ffi::c_uint, + minCount: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + /** \brief Generate a resource descriptor + + Generates a resource descriptor with the set of resources specified in \p resources. + The generated resource descriptor is necessary for the creation of green contexts via the ::cuGreenCtxCreate API. + The API expects \p nbResources == 1, as there is only one type of resource and merging the same + types of resource is currently not supported. + + Note: The API is not supported on 32-bit platforms. + + \param phDesc - Output descriptor + \param resources - Array of resources to be included in the descriptor + \param nbResources - Number of resources passed in \p resources + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_RESOURCE_TYPE, + ::CUDA_ERROR_INVALID_RESOURCE_CONFIGURATION + + \sa + ::cuDevSmResourceSplitByCount*/ + fn cuDevResourceGenerateDesc( + phDesc: *mut cuda_types::CUdevResourceDesc, + resources: *mut cuda_types::CUdevResource, + nbResources: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + /** \brief Records an event. + + Captures in \phEvent all the activities of the green context of \phCtx + at the time of this call. \phEvent and \phCtx must be from the same + CUDA context. Calls such as ::cuEventQuery() or ::cuGreenCtxWaitEvent() will + then examine or wait for completion of the work that was captured. Uses of + \p hCtx after this call do not modify \p hEvent. + + \note The API will return an error if the specified green context \p hCtx + has a stream in the capture mode. In such a case, the call will invalidate + all the conflicting captures. + + \param hCtx - Green context to record event for + \param hEvent - Event to record + + \return + ::CUDA_SUCCESS + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_HANDLE + + \sa + ::cuGreenCtxWaitEvent, + ::cuEventRecord*/ + fn cuGreenCtxRecordEvent( + hCtx: cuda_types::CUgreenCtx, + hEvent: cuda_types::CUevent, + ) -> cuda_types::CUresult; + /** \brief Make a green context wait on an event + + Makes all future work submitted to green context \phCtx wait for all work + captured in \phEvent. The synchronization will be performed on the device + and will not block the calling CPU thread. See ::cuGreenCtxRecordEvent() + for details on what is captured by an event. + + \note The API will return an error and invalidate the capture if the specified + event \p hEvent is part of an ongoing capture sequence. + + \param hCtx - Green context to wait + \param hEvent - Event to wait on (may not be NULL) + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_HANDLE + + \sa + ::cuGreenCtxRecordEvent, + ::cuStreamWaitEvent*/ + fn cuGreenCtxWaitEvent( + hCtx: cuda_types::CUgreenCtx, + hEvent: cuda_types::CUevent, + ) -> cuda_types::CUresult; + /** \brief Query the green context associated with a stream + + Returns the CUDA green context that the stream is associated with, or NULL if the stream + is not associated with any green context. + + The stream handle \p hStream can refer to any of the following: + <ul> + <li> + a stream created via any of the CUDA driver APIs such as ::cuStreamCreate. + If during stream creation the context that was active in the calling thread was obtained + with cuCtxFromGreenCtx, that green context is returned in \p phCtx. + Otherwise, \p *phCtx is set to NULL instead. + </li> + <li> + special stream such as the NULL stream or ::CU_STREAM_LEGACY. + In that case if context that is active in the calling thread was obtained + with cuCtxFromGreenCtx, that green context is returned. + Otherwise, \p *phCtx is set to NULL instead. + </li> + </ul> + Passing an invalid handle will result in undefined behavior. + + \param hStream - Handle to the stream to be queried + \param phCtx - Returned green context associated with the stream + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_HANDLE, + \notefnerr + + \sa ::cuStreamDestroy, + ::cuStreamCreateWithPriority, + ::cuStreamGetPriority, + ::cuStreamGetFlags, + ::cuStreamWaitEvent, + ::cuStreamQuery, + ::cuStreamSynchronize, + ::cuStreamAddCallback, + ::cudaStreamCreate, + ::cudaStreamCreateWithFlags*/ + fn cuStreamGetGreenCtx( + hStream: cuda_types::CUstream, + phCtx: *mut cuda_types::CUgreenCtx, + ) -> cuda_types::CUresult; + fn cuMemHostRegister( + p: *mut ::core::ffi::c_void, + bytesize: usize, + Flags: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + fn cuGraphicsResourceSetMapFlags( + resource: cuda_types::CUgraphicsResource, + flags: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + fn cuLinkCreate( + numOptions: ::core::ffi::c_uint, + options: *mut cuda_types::CUjit_option, + optionValues: *mut *mut ::core::ffi::c_void, + stateOut: *mut cuda_types::CUlinkState, + ) -> cuda_types::CUresult; + fn cuLinkAddData( + state: cuda_types::CUlinkState, + type_: cuda_types::CUjitInputType, + data: *mut ::core::ffi::c_void, + size: usize, + name: *const ::core::ffi::c_char, + numOptions: ::core::ffi::c_uint, + options: *mut cuda_types::CUjit_option, + optionValues: *mut *mut ::core::ffi::c_void, + ) -> cuda_types::CUresult; + fn cuLinkAddFile( + state: cuda_types::CUlinkState, + type_: cuda_types::CUjitInputType, + path: *const ::core::ffi::c_char, + numOptions: ::core::ffi::c_uint, + options: *mut cuda_types::CUjit_option, + optionValues: *mut *mut ::core::ffi::c_void, + ) -> cuda_types::CUresult; + fn cuTexRefSetAddress2D_v2( + hTexRef: cuda_types::CUtexref, + desc: *const cuda_types::CUDA_ARRAY_DESCRIPTOR, + dptr: cuda_types::CUdeviceptr, + Pitch: usize, + ) -> cuda_types::CUresult; + fn cuDeviceTotalMem( + bytes: *mut ::core::ffi::c_uint, + dev: cuda_types::CUdevice, + ) -> cuda_types::CUresult; + fn cuCtxCreate( + pctx: *mut cuda_types::CUcontext, + flags: ::core::ffi::c_uint, + dev: cuda_types::CUdevice, + ) -> cuda_types::CUresult; + fn cuModuleGetGlobal( + dptr: *mut cuda_types::CUdeviceptr_v1, + bytes: *mut ::core::ffi::c_uint, + hmod: cuda_types::CUmodule, + name: *const ::core::ffi::c_char, + ) -> cuda_types::CUresult; + fn cuMemGetInfo( + free: *mut ::core::ffi::c_uint, + total: *mut ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + fn cuMemAlloc( + dptr: *mut cuda_types::CUdeviceptr_v1, + bytesize: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + fn cuMemAllocPitch( + dptr: *mut cuda_types::CUdeviceptr_v1, + pPitch: *mut ::core::ffi::c_uint, + WidthInBytes: ::core::ffi::c_uint, + Height: ::core::ffi::c_uint, + ElementSizeBytes: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + fn cuMemFree(dptr: cuda_types::CUdeviceptr_v1) -> cuda_types::CUresult; + fn cuMemGetAddressRange( + pbase: *mut cuda_types::CUdeviceptr_v1, + psize: *mut ::core::ffi::c_uint, + dptr: cuda_types::CUdeviceptr_v1, + ) -> cuda_types::CUresult; + fn cuMemAllocHost( + pp: *mut *mut ::core::ffi::c_void, + bytesize: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + fn cuMemHostGetDevicePointer( + pdptr: *mut cuda_types::CUdeviceptr_v1, + p: *mut ::core::ffi::c_void, + Flags: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + fn cuMemcpyHtoD( + dstDevice: cuda_types::CUdeviceptr_v1, + srcHost: *const ::core::ffi::c_void, + ByteCount: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + fn cuMemcpyDtoH( + dstHost: *mut ::core::ffi::c_void, + srcDevice: cuda_types::CUdeviceptr_v1, + ByteCount: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + fn cuMemcpyDtoD( + dstDevice: cuda_types::CUdeviceptr_v1, + srcDevice: cuda_types::CUdeviceptr_v1, + ByteCount: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + fn cuMemcpyDtoA( + dstArray: cuda_types::CUarray, + dstOffset: ::core::ffi::c_uint, + srcDevice: cuda_types::CUdeviceptr_v1, + ByteCount: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + fn cuMemcpyAtoD( + dstDevice: cuda_types::CUdeviceptr_v1, + srcArray: cuda_types::CUarray, + srcOffset: ::core::ffi::c_uint, + ByteCount: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + fn cuMemcpyHtoA( + dstArray: cuda_types::CUarray, + dstOffset: ::core::ffi::c_uint, + srcHost: *const ::core::ffi::c_void, + ByteCount: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + fn cuMemcpyAtoH( + dstHost: *mut ::core::ffi::c_void, + srcArray: cuda_types::CUarray, + srcOffset: ::core::ffi::c_uint, + ByteCount: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + fn cuMemcpyAtoA( + dstArray: cuda_types::CUarray, + dstOffset: ::core::ffi::c_uint, + srcArray: cuda_types::CUarray, + srcOffset: ::core::ffi::c_uint, + ByteCount: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + fn cuMemcpyHtoAAsync( + dstArray: cuda_types::CUarray, + dstOffset: ::core::ffi::c_uint, + srcHost: *const ::core::ffi::c_void, + ByteCount: ::core::ffi::c_uint, + hStream: cuda_types::CUstream, + ) -> cuda_types::CUresult; + fn cuMemcpyAtoHAsync( + dstHost: *mut ::core::ffi::c_void, + srcArray: cuda_types::CUarray, + srcOffset: ::core::ffi::c_uint, + ByteCount: ::core::ffi::c_uint, + hStream: cuda_types::CUstream, + ) -> cuda_types::CUresult; + fn cuMemcpy2D(pCopy: *const cuda_types::CUDA_MEMCPY2D_v1) -> cuda_types::CUresult; + fn cuMemcpy2DUnaligned( + pCopy: *const cuda_types::CUDA_MEMCPY2D_v1, + ) -> cuda_types::CUresult; + fn cuMemcpy3D(pCopy: *const cuda_types::CUDA_MEMCPY3D_v1) -> cuda_types::CUresult; + fn cuMemcpyHtoDAsync( + dstDevice: cuda_types::CUdeviceptr_v1, + srcHost: *const ::core::ffi::c_void, + ByteCount: ::core::ffi::c_uint, + hStream: cuda_types::CUstream, + ) -> cuda_types::CUresult; + fn cuMemcpyDtoHAsync( + dstHost: *mut ::core::ffi::c_void, + srcDevice: cuda_types::CUdeviceptr_v1, + ByteCount: ::core::ffi::c_uint, + hStream: cuda_types::CUstream, + ) -> cuda_types::CUresult; + fn cuMemcpyDtoDAsync( + dstDevice: cuda_types::CUdeviceptr_v1, + srcDevice: cuda_types::CUdeviceptr_v1, + ByteCount: ::core::ffi::c_uint, + hStream: cuda_types::CUstream, + ) -> cuda_types::CUresult; + fn cuMemcpy2DAsync( + pCopy: *const cuda_types::CUDA_MEMCPY2D_v1, + hStream: cuda_types::CUstream, + ) -> cuda_types::CUresult; + fn cuMemcpy3DAsync( + pCopy: *const cuda_types::CUDA_MEMCPY3D_v1, + hStream: cuda_types::CUstream, + ) -> cuda_types::CUresult; + fn cuMemsetD8( + dstDevice: cuda_types::CUdeviceptr_v1, + uc: ::core::ffi::c_uchar, + N: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + fn cuMemsetD16( + dstDevice: cuda_types::CUdeviceptr_v1, + us: ::core::ffi::c_ushort, + N: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + fn cuMemsetD32( + dstDevice: cuda_types::CUdeviceptr_v1, + ui: ::core::ffi::c_uint, + N: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + fn cuMemsetD2D8( + dstDevice: cuda_types::CUdeviceptr_v1, + dstPitch: ::core::ffi::c_uint, + uc: ::core::ffi::c_uchar, + Width: ::core::ffi::c_uint, + Height: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + fn cuMemsetD2D16( + dstDevice: cuda_types::CUdeviceptr_v1, + dstPitch: ::core::ffi::c_uint, + us: ::core::ffi::c_ushort, + Width: ::core::ffi::c_uint, + Height: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + fn cuMemsetD2D32( + dstDevice: cuda_types::CUdeviceptr_v1, + dstPitch: ::core::ffi::c_uint, + ui: ::core::ffi::c_uint, + Width: ::core::ffi::c_uint, + Height: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + fn cuArrayCreate( + pHandle: *mut cuda_types::CUarray, + pAllocateArray: *const cuda_types::CUDA_ARRAY_DESCRIPTOR_v1, + ) -> cuda_types::CUresult; + fn cuArrayGetDescriptor( + pArrayDescriptor: *mut cuda_types::CUDA_ARRAY_DESCRIPTOR_v1, + hArray: cuda_types::CUarray, + ) -> cuda_types::CUresult; + fn cuArray3DCreate( + pHandle: *mut cuda_types::CUarray, + pAllocateArray: *const cuda_types::CUDA_ARRAY3D_DESCRIPTOR_v1, + ) -> cuda_types::CUresult; + fn cuArray3DGetDescriptor( + pArrayDescriptor: *mut cuda_types::CUDA_ARRAY3D_DESCRIPTOR_v1, + hArray: cuda_types::CUarray, + ) -> cuda_types::CUresult; + fn cuTexRefSetAddress( + ByteOffset: *mut ::core::ffi::c_uint, + hTexRef: cuda_types::CUtexref, + dptr: cuda_types::CUdeviceptr_v1, + bytes: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + fn cuTexRefSetAddress2D( + hTexRef: cuda_types::CUtexref, + desc: *const cuda_types::CUDA_ARRAY_DESCRIPTOR_v1, + dptr: cuda_types::CUdeviceptr_v1, + Pitch: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + fn cuTexRefGetAddress( + pdptr: *mut cuda_types::CUdeviceptr_v1, + hTexRef: cuda_types::CUtexref, + ) -> cuda_types::CUresult; + fn cuGraphicsResourceGetMappedPointer( + pDevPtr: *mut cuda_types::CUdeviceptr_v1, + pSize: *mut ::core::ffi::c_uint, + resource: cuda_types::CUgraphicsResource, + ) -> cuda_types::CUresult; + fn cuCtxDestroy(ctx: cuda_types::CUcontext) -> cuda_types::CUresult; + fn cuCtxPopCurrent(pctx: *mut cuda_types::CUcontext) -> cuda_types::CUresult; + fn cuCtxPushCurrent(ctx: cuda_types::CUcontext) -> cuda_types::CUresult; + fn cuStreamDestroy(hStream: cuda_types::CUstream) -> cuda_types::CUresult; + fn cuEventDestroy(hEvent: cuda_types::CUevent) -> cuda_types::CUresult; + fn cuDevicePrimaryCtxRelease(dev: cuda_types::CUdevice) -> cuda_types::CUresult; + fn cuDevicePrimaryCtxReset(dev: cuda_types::CUdevice) -> cuda_types::CUresult; + fn cuDevicePrimaryCtxSetFlags( + dev: cuda_types::CUdevice, + flags: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + fn cuMemcpyHtoD_v2( + dstDevice: cuda_types::CUdeviceptr, + srcHost: *const ::core::ffi::c_void, + ByteCount: usize, + ) -> cuda_types::CUresult; + fn cuMemcpyDtoH_v2( + dstHost: *mut ::core::ffi::c_void, + srcDevice: cuda_types::CUdeviceptr, + ByteCount: usize, + ) -> cuda_types::CUresult; + fn cuMemcpyDtoD_v2( + dstDevice: cuda_types::CUdeviceptr, + srcDevice: cuda_types::CUdeviceptr, + ByteCount: usize, + ) -> cuda_types::CUresult; + fn cuMemcpyDtoA_v2( + dstArray: cuda_types::CUarray, + dstOffset: usize, + srcDevice: cuda_types::CUdeviceptr, + ByteCount: usize, + ) -> cuda_types::CUresult; + fn cuMemcpyAtoD_v2( + dstDevice: cuda_types::CUdeviceptr, + srcArray: cuda_types::CUarray, + srcOffset: usize, + ByteCount: usize, + ) -> cuda_types::CUresult; + fn cuMemcpyHtoA_v2( + dstArray: cuda_types::CUarray, + dstOffset: usize, + srcHost: *const ::core::ffi::c_void, + ByteCount: usize, + ) -> cuda_types::CUresult; + fn cuMemcpyAtoH_v2( + dstHost: *mut ::core::ffi::c_void, + srcArray: cuda_types::CUarray, + srcOffset: usize, + ByteCount: usize, + ) -> cuda_types::CUresult; + fn cuMemcpyAtoA_v2( + dstArray: cuda_types::CUarray, + dstOffset: usize, + srcArray: cuda_types::CUarray, + srcOffset: usize, + ByteCount: usize, + ) -> cuda_types::CUresult; + fn cuMemcpyHtoAAsync_v2( + dstArray: cuda_types::CUarray, + dstOffset: usize, + srcHost: *const ::core::ffi::c_void, + ByteCount: usize, + hStream: cuda_types::CUstream, + ) -> cuda_types::CUresult; + fn cuMemcpyAtoHAsync_v2( + dstHost: *mut ::core::ffi::c_void, + srcArray: cuda_types::CUarray, + srcOffset: usize, + ByteCount: usize, + hStream: cuda_types::CUstream, + ) -> cuda_types::CUresult; + fn cuMemcpy2D_v2(pCopy: *const cuda_types::CUDA_MEMCPY2D) -> cuda_types::CUresult; + fn cuMemcpy2DUnaligned_v2( + pCopy: *const cuda_types::CUDA_MEMCPY2D, + ) -> cuda_types::CUresult; + fn cuMemcpy3D_v2(pCopy: *const cuda_types::CUDA_MEMCPY3D) -> cuda_types::CUresult; + fn cuMemcpyHtoDAsync_v2( + dstDevice: cuda_types::CUdeviceptr, + srcHost: *const ::core::ffi::c_void, + ByteCount: usize, + hStream: cuda_types::CUstream, + ) -> cuda_types::CUresult; + fn cuMemcpyDtoHAsync_v2( + dstHost: *mut ::core::ffi::c_void, + srcDevice: cuda_types::CUdeviceptr, + ByteCount: usize, + hStream: cuda_types::CUstream, + ) -> cuda_types::CUresult; + fn cuMemcpyDtoDAsync_v2( + dstDevice: cuda_types::CUdeviceptr, + srcDevice: cuda_types::CUdeviceptr, + ByteCount: usize, + hStream: cuda_types::CUstream, + ) -> cuda_types::CUresult; + fn cuMemcpy2DAsync_v2( + pCopy: *const cuda_types::CUDA_MEMCPY2D, + hStream: cuda_types::CUstream, + ) -> cuda_types::CUresult; + fn cuMemcpy3DAsync_v2( + pCopy: *const cuda_types::CUDA_MEMCPY3D, + hStream: cuda_types::CUstream, + ) -> cuda_types::CUresult; + fn cuMemsetD8_v2( + dstDevice: cuda_types::CUdeviceptr, + uc: ::core::ffi::c_uchar, + N: usize, + ) -> cuda_types::CUresult; + fn cuMemsetD16_v2( + dstDevice: cuda_types::CUdeviceptr, + us: ::core::ffi::c_ushort, + N: usize, + ) -> cuda_types::CUresult; + fn cuMemsetD32_v2( + dstDevice: cuda_types::CUdeviceptr, + ui: ::core::ffi::c_uint, + N: usize, + ) -> cuda_types::CUresult; + fn cuMemsetD2D8_v2( + dstDevice: cuda_types::CUdeviceptr, + dstPitch: usize, + uc: ::core::ffi::c_uchar, + Width: usize, + Height: usize, + ) -> cuda_types::CUresult; + fn cuMemsetD2D16_v2( + dstDevice: cuda_types::CUdeviceptr, + dstPitch: usize, + us: ::core::ffi::c_ushort, + Width: usize, + Height: usize, + ) -> cuda_types::CUresult; + fn cuMemsetD2D32_v2( + dstDevice: cuda_types::CUdeviceptr, + dstPitch: usize, + ui: ::core::ffi::c_uint, + Width: usize, + Height: usize, + ) -> cuda_types::CUresult; + fn cuMemcpy( + dst: cuda_types::CUdeviceptr, + src: cuda_types::CUdeviceptr, + ByteCount: usize, + ) -> cuda_types::CUresult; + fn cuMemcpyAsync( + dst: cuda_types::CUdeviceptr, + src: cuda_types::CUdeviceptr, + ByteCount: usize, + hStream: cuda_types::CUstream, + ) -> cuda_types::CUresult; + fn cuMemcpyPeer( + dstDevice: cuda_types::CUdeviceptr, + dstContext: cuda_types::CUcontext, + srcDevice: cuda_types::CUdeviceptr, + srcContext: cuda_types::CUcontext, + ByteCount: usize, + ) -> cuda_types::CUresult; + fn cuMemcpyPeerAsync( + dstDevice: cuda_types::CUdeviceptr, + dstContext: cuda_types::CUcontext, + srcDevice: cuda_types::CUdeviceptr, + srcContext: cuda_types::CUcontext, + ByteCount: usize, + hStream: cuda_types::CUstream, + ) -> cuda_types::CUresult; + fn cuMemcpy3DPeer( + pCopy: *const cuda_types::CUDA_MEMCPY3D_PEER, + ) -> cuda_types::CUresult; + fn cuMemcpy3DPeerAsync( + pCopy: *const cuda_types::CUDA_MEMCPY3D_PEER, + hStream: cuda_types::CUstream, + ) -> cuda_types::CUresult; + fn cuMemsetD8Async( + dstDevice: cuda_types::CUdeviceptr, + uc: ::core::ffi::c_uchar, + N: usize, + hStream: cuda_types::CUstream, + ) -> cuda_types::CUresult; + fn cuMemsetD16Async( + dstDevice: cuda_types::CUdeviceptr, + us: ::core::ffi::c_ushort, + N: usize, + hStream: cuda_types::CUstream, + ) -> cuda_types::CUresult; + fn cuMemsetD32Async( + dstDevice: cuda_types::CUdeviceptr, + ui: ::core::ffi::c_uint, + N: usize, + hStream: cuda_types::CUstream, + ) -> cuda_types::CUresult; + fn cuMemsetD2D8Async( + dstDevice: cuda_types::CUdeviceptr, + dstPitch: usize, + uc: ::core::ffi::c_uchar, + Width: usize, + Height: usize, + hStream: cuda_types::CUstream, + ) -> cuda_types::CUresult; + fn cuMemsetD2D16Async( + dstDevice: cuda_types::CUdeviceptr, + dstPitch: usize, + us: ::core::ffi::c_ushort, + Width: usize, + Height: usize, + hStream: cuda_types::CUstream, + ) -> cuda_types::CUresult; + fn cuMemsetD2D32Async( + dstDevice: cuda_types::CUdeviceptr, + dstPitch: usize, + ui: ::core::ffi::c_uint, + Width: usize, + Height: usize, + hStream: cuda_types::CUstream, + ) -> cuda_types::CUresult; + fn cuStreamGetPriority( + hStream: cuda_types::CUstream, + priority: *mut ::core::ffi::c_int, + ) -> cuda_types::CUresult; + fn cuStreamGetId( + hStream: cuda_types::CUstream, + streamId: *mut ::core::ffi::c_ulonglong, + ) -> cuda_types::CUresult; + fn cuStreamGetFlags( + hStream: cuda_types::CUstream, + flags: *mut ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + fn cuStreamGetCtx( + hStream: cuda_types::CUstream, + pctx: *mut cuda_types::CUcontext, + ) -> cuda_types::CUresult; + fn cuStreamWaitEvent( + hStream: cuda_types::CUstream, + hEvent: cuda_types::CUevent, + Flags: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + fn cuStreamAddCallback( + hStream: cuda_types::CUstream, + callback: cuda_types::CUstreamCallback, + userData: *mut ::core::ffi::c_void, + flags: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + fn cuStreamAttachMemAsync( + hStream: cuda_types::CUstream, + dptr: cuda_types::CUdeviceptr, + length: usize, + flags: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + fn cuStreamQuery(hStream: cuda_types::CUstream) -> cuda_types::CUresult; + fn cuStreamSynchronize(hStream: cuda_types::CUstream) -> cuda_types::CUresult; + fn cuEventRecord( + hEvent: cuda_types::CUevent, + hStream: cuda_types::CUstream, + ) -> cuda_types::CUresult; + fn cuEventRecordWithFlags( + hEvent: cuda_types::CUevent, + hStream: cuda_types::CUstream, + flags: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + fn cuLaunchKernel( + f: cuda_types::CUfunction, + gridDimX: ::core::ffi::c_uint, + gridDimY: ::core::ffi::c_uint, + gridDimZ: ::core::ffi::c_uint, + blockDimX: ::core::ffi::c_uint, + blockDimY: ::core::ffi::c_uint, + blockDimZ: ::core::ffi::c_uint, + sharedMemBytes: ::core::ffi::c_uint, + hStream: cuda_types::CUstream, + kernelParams: *mut *mut ::core::ffi::c_void, + extra: *mut *mut ::core::ffi::c_void, + ) -> cuda_types::CUresult; + fn cuLaunchKernelEx( + config: *const cuda_types::CUlaunchConfig, + f: cuda_types::CUfunction, + kernelParams: *mut *mut ::core::ffi::c_void, + extra: *mut *mut ::core::ffi::c_void, + ) -> cuda_types::CUresult; + fn cuLaunchHostFunc( + hStream: cuda_types::CUstream, + fn_: cuda_types::CUhostFn, + userData: *mut ::core::ffi::c_void, + ) -> cuda_types::CUresult; + fn cuGraphicsMapResources( + count: ::core::ffi::c_uint, + resources: *mut cuda_types::CUgraphicsResource, + hStream: cuda_types::CUstream, + ) -> cuda_types::CUresult; + fn cuGraphicsUnmapResources( + count: ::core::ffi::c_uint, + resources: *mut cuda_types::CUgraphicsResource, + hStream: cuda_types::CUstream, + ) -> cuda_types::CUresult; + fn cuStreamWriteValue32( + stream: cuda_types::CUstream, + addr: cuda_types::CUdeviceptr, + value: cuda_types::cuuint32_t, + flags: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + fn cuStreamWaitValue32( + stream: cuda_types::CUstream, + addr: cuda_types::CUdeviceptr, + value: cuda_types::cuuint32_t, + flags: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + fn cuStreamWriteValue64( + stream: cuda_types::CUstream, + addr: cuda_types::CUdeviceptr, + value: cuda_types::cuuint64_t, + flags: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + fn cuStreamWaitValue64( + stream: cuda_types::CUstream, + addr: cuda_types::CUdeviceptr, + value: cuda_types::cuuint64_t, + flags: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + fn cuStreamBatchMemOp( + stream: cuda_types::CUstream, + count: ::core::ffi::c_uint, + paramArray: *mut cuda_types::CUstreamBatchMemOpParams, + flags: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + fn cuStreamWriteValue32_ptsz( + stream: cuda_types::CUstream, + addr: cuda_types::CUdeviceptr, + value: cuda_types::cuuint32_t, + flags: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + fn cuStreamWaitValue32_ptsz( + stream: cuda_types::CUstream, + addr: cuda_types::CUdeviceptr, + value: cuda_types::cuuint32_t, + flags: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + fn cuStreamWriteValue64_ptsz( + stream: cuda_types::CUstream, + addr: cuda_types::CUdeviceptr, + value: cuda_types::cuuint64_t, + flags: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + fn cuStreamWaitValue64_ptsz( + stream: cuda_types::CUstream, + addr: cuda_types::CUdeviceptr, + value: cuda_types::cuuint64_t, + flags: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + fn cuStreamBatchMemOp_ptsz( + stream: cuda_types::CUstream, + count: ::core::ffi::c_uint, + paramArray: *mut cuda_types::CUstreamBatchMemOpParams, + flags: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + fn cuStreamWriteValue32_v2( + stream: cuda_types::CUstream, + addr: cuda_types::CUdeviceptr, + value: cuda_types::cuuint32_t, + flags: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + fn cuStreamWaitValue32_v2( + stream: cuda_types::CUstream, + addr: cuda_types::CUdeviceptr, + value: cuda_types::cuuint32_t, + flags: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + fn cuStreamWriteValue64_v2( + stream: cuda_types::CUstream, + addr: cuda_types::CUdeviceptr, + value: cuda_types::cuuint64_t, + flags: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + fn cuStreamWaitValue64_v2( + stream: cuda_types::CUstream, + addr: cuda_types::CUdeviceptr, + value: cuda_types::cuuint64_t, + flags: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + fn cuStreamBatchMemOp_v2( + stream: cuda_types::CUstream, + count: ::core::ffi::c_uint, + paramArray: *mut cuda_types::CUstreamBatchMemOpParams, + flags: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + fn cuMemPrefetchAsync( + devPtr: cuda_types::CUdeviceptr, + count: usize, + dstDevice: cuda_types::CUdevice, + hStream: cuda_types::CUstream, + ) -> cuda_types::CUresult; + fn cuMemPrefetchAsync_v2( + devPtr: cuda_types::CUdeviceptr, + count: usize, + location: cuda_types::CUmemLocation, + flags: ::core::ffi::c_uint, + hStream: cuda_types::CUstream, + ) -> cuda_types::CUresult; + fn cuLaunchCooperativeKernel( + f: cuda_types::CUfunction, + gridDimX: ::core::ffi::c_uint, + gridDimY: ::core::ffi::c_uint, + gridDimZ: ::core::ffi::c_uint, + blockDimX: ::core::ffi::c_uint, + blockDimY: ::core::ffi::c_uint, + blockDimZ: ::core::ffi::c_uint, + sharedMemBytes: ::core::ffi::c_uint, + hStream: cuda_types::CUstream, + kernelParams: *mut *mut ::core::ffi::c_void, + ) -> cuda_types::CUresult; + fn cuSignalExternalSemaphoresAsync( + extSemArray: *const cuda_types::CUexternalSemaphore, + paramsArray: *const cuda_types::CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS, + numExtSems: ::core::ffi::c_uint, + stream: cuda_types::CUstream, + ) -> cuda_types::CUresult; + fn cuWaitExternalSemaphoresAsync( + extSemArray: *const cuda_types::CUexternalSemaphore, + paramsArray: *const cuda_types::CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS, + numExtSems: ::core::ffi::c_uint, + stream: cuda_types::CUstream, + ) -> cuda_types::CUresult; + fn cuStreamBeginCapture(hStream: cuda_types::CUstream) -> cuda_types::CUresult; + fn cuStreamBeginCapture_ptsz(hStream: cuda_types::CUstream) -> cuda_types::CUresult; + fn cuStreamBeginCapture_v2( + hStream: cuda_types::CUstream, + mode: cuda_types::CUstreamCaptureMode, + ) -> cuda_types::CUresult; + fn cuStreamBeginCaptureToGraph( + hStream: cuda_types::CUstream, + hGraph: cuda_types::CUgraph, + dependencies: *const cuda_types::CUgraphNode, + dependencyData: *const cuda_types::CUgraphEdgeData, + numDependencies: usize, + mode: cuda_types::CUstreamCaptureMode, + ) -> cuda_types::CUresult; + fn cuStreamEndCapture( + hStream: cuda_types::CUstream, + phGraph: *mut cuda_types::CUgraph, + ) -> cuda_types::CUresult; + fn cuStreamIsCapturing( + hStream: cuda_types::CUstream, + captureStatus: *mut cuda_types::CUstreamCaptureStatus, + ) -> cuda_types::CUresult; + fn cuStreamGetCaptureInfo( + hStream: cuda_types::CUstream, + captureStatus_out: *mut cuda_types::CUstreamCaptureStatus, + id_out: *mut cuda_types::cuuint64_t, + ) -> cuda_types::CUresult; + fn cuStreamGetCaptureInfo_ptsz( + hStream: cuda_types::CUstream, + captureStatus_out: *mut cuda_types::CUstreamCaptureStatus, + id_out: *mut cuda_types::cuuint64_t, + ) -> cuda_types::CUresult; + fn cuStreamGetCaptureInfo_v2( + hStream: cuda_types::CUstream, + captureStatus_out: *mut cuda_types::CUstreamCaptureStatus, + id_out: *mut cuda_types::cuuint64_t, + graph_out: *mut cuda_types::CUgraph, + dependencies_out: *mut *const cuda_types::CUgraphNode, + numDependencies_out: *mut usize, + ) -> cuda_types::CUresult; + fn cuStreamGetCaptureInfo_v3( + hStream: cuda_types::CUstream, + captureStatus_out: *mut cuda_types::CUstreamCaptureStatus, + id_out: *mut cuda_types::cuuint64_t, + graph_out: *mut cuda_types::CUgraph, + dependencies_out: *mut *const cuda_types::CUgraphNode, + edgeData_out: *mut *const cuda_types::CUgraphEdgeData, + numDependencies_out: *mut usize, + ) -> cuda_types::CUresult; + fn cuGraphAddKernelNode( + phGraphNode: *mut cuda_types::CUgraphNode, + hGraph: cuda_types::CUgraph, + dependencies: *const cuda_types::CUgraphNode, + numDependencies: usize, + nodeParams: *const cuda_types::CUDA_KERNEL_NODE_PARAMS_v1, + ) -> cuda_types::CUresult; + fn cuGraphKernelNodeGetParams( + hNode: cuda_types::CUgraphNode, + nodeParams: *mut cuda_types::CUDA_KERNEL_NODE_PARAMS_v1, + ) -> cuda_types::CUresult; + fn cuGraphKernelNodeSetParams( + hNode: cuda_types::CUgraphNode, + nodeParams: *const cuda_types::CUDA_KERNEL_NODE_PARAMS_v1, + ) -> cuda_types::CUresult; + fn cuGraphExecKernelNodeSetParams( + hGraphExec: cuda_types::CUgraphExec, + hNode: cuda_types::CUgraphNode, + nodeParams: *const cuda_types::CUDA_KERNEL_NODE_PARAMS_v1, + ) -> cuda_types::CUresult; + fn cuGraphInstantiateWithParams( + phGraphExec: *mut cuda_types::CUgraphExec, + hGraph: cuda_types::CUgraph, + instantiateParams: *mut cuda_types::CUDA_GRAPH_INSTANTIATE_PARAMS, + ) -> cuda_types::CUresult; + fn cuGraphExecUpdate( + hGraphExec: cuda_types::CUgraphExec, + hGraph: cuda_types::CUgraph, + hErrorNode_out: *mut cuda_types::CUgraphNode, + updateResult_out: *mut cuda_types::CUgraphExecUpdateResult, + ) -> cuda_types::CUresult; + fn cuGraphUpload( + hGraph: cuda_types::CUgraphExec, + hStream: cuda_types::CUstream, + ) -> cuda_types::CUresult; + fn cuGraphLaunch( + hGraph: cuda_types::CUgraphExec, + hStream: cuda_types::CUstream, + ) -> cuda_types::CUresult; + fn cuStreamCopyAttributes( + dstStream: cuda_types::CUstream, + srcStream: cuda_types::CUstream, + ) -> cuda_types::CUresult; + fn cuStreamGetAttribute( + hStream: cuda_types::CUstream, + attr: cuda_types::CUstreamAttrID, + value: *mut cuda_types::CUstreamAttrValue, + ) -> cuda_types::CUresult; + fn cuStreamSetAttribute( + hStream: cuda_types::CUstream, + attr: cuda_types::CUstreamAttrID, + param: *const cuda_types::CUstreamAttrValue, + ) -> cuda_types::CUresult; + fn cuIpcOpenMemHandle( + pdptr: *mut cuda_types::CUdeviceptr, + handle: cuda_types::CUipcMemHandle, + Flags: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + fn cuGraphInstantiate( + phGraphExec: *mut cuda_types::CUgraphExec, + hGraph: cuda_types::CUgraph, + phErrorNode: *mut cuda_types::CUgraphNode, + logBuffer: *mut ::core::ffi::c_char, + bufferSize: usize, + ) -> cuda_types::CUresult; + fn cuGraphInstantiate_v2( + phGraphExec: *mut cuda_types::CUgraphExec, + hGraph: cuda_types::CUgraph, + phErrorNode: *mut cuda_types::CUgraphNode, + logBuffer: *mut ::core::ffi::c_char, + bufferSize: usize, + ) -> cuda_types::CUresult; + fn cuMemMapArrayAsync( + mapInfoList: *mut cuda_types::CUarrayMapInfo, + count: ::core::ffi::c_uint, + hStream: cuda_types::CUstream, + ) -> cuda_types::CUresult; + fn cuMemFreeAsync( + dptr: cuda_types::CUdeviceptr, + hStream: cuda_types::CUstream, + ) -> cuda_types::CUresult; + fn cuMemAllocAsync( + dptr: *mut cuda_types::CUdeviceptr, + bytesize: usize, + hStream: cuda_types::CUstream, + ) -> cuda_types::CUresult; + fn cuMemAllocFromPoolAsync( + dptr: *mut cuda_types::CUdeviceptr, + bytesize: usize, + pool: cuda_types::CUmemoryPool, + hStream: cuda_types::CUstream, + ) -> cuda_types::CUresult; + fn cuStreamUpdateCaptureDependencies( + hStream: cuda_types::CUstream, + dependencies: *mut cuda_types::CUgraphNode, + numDependencies: usize, + flags: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + fn cuStreamUpdateCaptureDependencies_v2( + hStream: cuda_types::CUstream, + dependencies: *mut cuda_types::CUgraphNode, + dependencyData: *const cuda_types::CUgraphEdgeData, + numDependencies: usize, + flags: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + fn cuGetProcAddress( + symbol: *const ::core::ffi::c_char, + pfn: *mut *mut ::core::ffi::c_void, + cudaVersion: ::core::ffi::c_int, + flags: cuda_types::cuuint64_t, + ) -> cuda_types::CUresult; + /** \brief Initialize the profiling. + + \deprecated + + Note that this function is deprecated and should not be used. + Starting with CUDA 12.0, it always returns error code ::CUDA_ERROR_NOT_SUPPORTED. + + Using this API user can initialize the CUDA profiler by specifying + the configuration file, output file and output file format. This + API is generally used to profile different set of counters by + looping the kernel launch. The \p configFile parameter can be used + to select profiling options including profiler counters. Refer to + the "Compute Command Line Profiler User Guide" for supported + profiler options and counters. + + Limitation: The CUDA profiler cannot be initialized with this API + if another profiling tool is already active, as indicated by the + ::CUDA_ERROR_PROFILER_DISABLED return code. + + Typical usage of the profiling APIs is as follows: + + for each set of counters/options\n + {\n + cuProfilerInitialize(); //Initialize profiling, set the counters or options in the config file \n + ...\n + cuProfilerStart(); \n + // code to be profiled \n + cuProfilerStop(); \n + ...\n + cuProfilerStart(); \n + // code to be profiled \n + cuProfilerStop(); \n + ...\n + }\n + + \param configFile - Name of the config file that lists the counters/options + for profiling. + \param outputFile - Name of the outputFile where the profiling results will + be stored. + \param outputMode - outputMode, can be ::CU_OUT_KEY_VALUE_PAIR or ::CU_OUT_CSV. + + \return + ::CUDA_ERROR_NOT_SUPPORTED + \notefnerr + + \sa + ::cuProfilerStart, + ::cuProfilerStop,*/ + fn cuProfilerInitialize( + configFile: *const ::core::ffi::c_char, + outputFile: *const ::core::ffi::c_char, + outputMode: cuda_types::CUoutput_mode, + ) -> cuda_types::CUresult; + /** \brief Enable profiling. + + Enables profile collection by the active profiling tool for the + current context. If profiling is already enabled, then + cuProfilerStart() has no effect. + + cuProfilerStart and cuProfilerStop APIs are used to + programmatically control the profiling granularity by allowing + profiling to be done only on selective pieces of code. + + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_CONTEXT + \notefnerr + + \sa + ::cuProfilerInitialize, + ::cuProfilerStop, + ::cudaProfilerStart*/ + fn cuProfilerStart() -> cuda_types::CUresult; + /** \brief Disable profiling. + + Disables profile collection by the active profiling tool for the + current context. If profiling is already disabled, then + cuProfilerStop() has no effect. + + cuProfilerStart and cuProfilerStop APIs are used to + programmatically control the profiling granularity by allowing + profiling to be done only on selective pieces of code. + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_CONTEXT + \notefnerr + + \sa + ::cuProfilerInitialize, + ::cuProfilerStart, + ::cudaProfilerStop*/ + fn cuProfilerStop() -> cuda_types::CUresult; + /** \brief Registers an OpenGL buffer object + + Registers the buffer object specified by \p buffer for access by + CUDA. A handle to the registered object is returned as \p + pCudaResource. The register flags \p Flags specify the intended usage, + as follows: + + - ::CU_GRAPHICS_REGISTER_FLAGS_NONE: Specifies no hints about how this + resource will be used. It is therefore assumed that this resource will be + read from and written to by CUDA. This is the default value. + - ::CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY: Specifies that CUDA + will not write to this resource. + - ::CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD: Specifies that + CUDA will not read from this resource and will write over the + entire contents of the resource, so none of the data previously + stored in the resource will be preserved. + + \param pCudaResource - Pointer to the returned object handle + \param buffer - name of buffer object to be registered + \param Flags - Register flags + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_HANDLE, + ::CUDA_ERROR_ALREADY_MAPPED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_OPERATING_SYSTEM + \notefnerr + + \sa + ::cuGraphicsUnregisterResource, + ::cuGraphicsMapResources, + ::cuGraphicsResourceGetMappedPointer, + ::cudaGraphicsGLRegisterBuffer*/ + fn cuGraphicsGLRegisterBuffer( + pCudaResource: *mut cuda_types::CUgraphicsResource, + buffer: cuda_types::GLuint, + Flags: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + /** \brief Register an OpenGL texture or renderbuffer object + + Registers the texture or renderbuffer object specified by \p image for access by CUDA. + A handle to the registered object is returned as \p pCudaResource. + + \p target must match the type of the object, and must be one of ::GL_TEXTURE_2D, + ::GL_TEXTURE_RECTANGLE, ::GL_TEXTURE_CUBE_MAP, ::GL_TEXTURE_3D, ::GL_TEXTURE_2D_ARRAY, + or ::GL_RENDERBUFFER. + + The register flags \p Flags specify the intended usage, as follows: + + - ::CU_GRAPHICS_REGISTER_FLAGS_NONE: Specifies no hints about how this + resource will be used. It is therefore assumed that this resource will be + read from and written to by CUDA. This is the default value. + - ::CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY: Specifies that CUDA + will not write to this resource. + - ::CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD: Specifies that + CUDA will not read from this resource and will write over the + entire contents of the resource, so none of the data previously + stored in the resource will be preserved. + - ::CU_GRAPHICS_REGISTER_FLAGS_SURFACE_LDST: Specifies that CUDA will + bind this resource to a surface reference. + - ::CU_GRAPHICS_REGISTER_FLAGS_TEXTURE_GATHER: Specifies that CUDA will perform + texture gather operations on this resource. + + The following image formats are supported. For brevity's sake, the list is abbreviated. + For ex., {GL_R, GL_RG} X {8, 16} would expand to the following 4 formats + {GL_R8, GL_R16, GL_RG8, GL_RG16} : + - GL_RED, GL_RG, GL_RGBA, GL_LUMINANCE, GL_ALPHA, GL_LUMINANCE_ALPHA, GL_INTENSITY + - {GL_R, GL_RG, GL_RGBA} X {8, 16, 16F, 32F, 8UI, 16UI, 32UI, 8I, 16I, 32I} + - {GL_LUMINANCE, GL_ALPHA, GL_LUMINANCE_ALPHA, GL_INTENSITY} X + {8, 16, 16F_ARB, 32F_ARB, 8UI_EXT, 16UI_EXT, 32UI_EXT, 8I_EXT, 16I_EXT, 32I_EXT} + + The following image classes are currently disallowed: + - Textures with borders + - Multisampled renderbuffers + + \param pCudaResource - Pointer to the returned object handle + \param image - name of texture or renderbuffer object to be registered + \param target - Identifies the type of object specified by \p image + \param Flags - Register flags + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_HANDLE, + ::CUDA_ERROR_ALREADY_MAPPED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_OPERATING_SYSTEM + \notefnerr + + \sa + ::cuGraphicsUnregisterResource, + ::cuGraphicsMapResources, + ::cuGraphicsSubResourceGetMappedArray, + ::cudaGraphicsGLRegisterImage*/ + fn cuGraphicsGLRegisterImage( + pCudaResource: *mut cuda_types::CUgraphicsResource, + image: cuda_types::GLuint, + target: cuda_types::GLenum, + Flags: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + /** \brief Gets the CUDA devices associated with the current OpenGL context + + Returns in \p *pCudaDeviceCount the number of CUDA-compatible devices + corresponding to the current OpenGL context. Also returns in \p *pCudaDevices + at most cudaDeviceCount of the CUDA-compatible devices corresponding to + the current OpenGL context. If any of the GPUs being used by the current OpenGL + context are not CUDA capable then the call will return CUDA_ERROR_NO_DEVICE. + + The \p deviceList argument may be any of the following: + - ::CU_GL_DEVICE_LIST_ALL: Query all devices used by the current OpenGL context. + - ::CU_GL_DEVICE_LIST_CURRENT_FRAME: Query the devices used by the current OpenGL context to + render the current frame (in SLI). + - ::CU_GL_DEVICE_LIST_NEXT_FRAME: Query the devices used by the current OpenGL context to + render the next frame (in SLI). Note that this is a prediction, it can't be guaranteed that + this is correct in all cases. + + \param pCudaDeviceCount - Returned number of CUDA devices. + \param pCudaDevices - Returned CUDA devices. + \param cudaDeviceCount - The size of the output device array pCudaDevices. + \param deviceList - The set of devices to return. + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_NO_DEVICE, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_GRAPHICS_CONTEXT, + ::CUDA_ERROR_OPERATING_SYSTEM + + \notefnerr + + \sa + ::cuWGLGetDevice, + ::cudaGLGetDevices*/ + fn cuGLGetDevices_v2( + pCudaDeviceCount: *mut ::core::ffi::c_uint, + pCudaDevices: *mut cuda_types::CUdevice, + cudaDeviceCount: ::core::ffi::c_uint, + deviceList: cuda_types::CUGLDeviceList, + ) -> cuda_types::CUresult; + /** \brief Create a CUDA context for interoperability with OpenGL + + \deprecated This function is deprecated as of Cuda 5.0. + + This function is deprecated and should no longer be used. It is + no longer necessary to associate a CUDA context with an OpenGL + context in order to achieve maximum interoperability performance. + + \param pCtx - Returned CUDA context + \param Flags - Options for CUDA context creation + \param device - Device on which to create the context + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_OUT_OF_MEMORY + \notefnerr + + \sa ::cuCtxCreate, ::cuGLInit, ::cuGLMapBufferObject, + ::cuGLRegisterBufferObject, ::cuGLUnmapBufferObject, + ::cuGLUnregisterBufferObject, ::cuGLMapBufferObjectAsync, + ::cuGLUnmapBufferObjectAsync, ::cuGLSetBufferObjectMapFlags, + ::cuWGLGetDevice*/ + fn cuGLCtxCreate_v2( + pCtx: *mut cuda_types::CUcontext, + Flags: ::core::ffi::c_uint, + device: cuda_types::CUdevice, + ) -> cuda_types::CUresult; + /** \brief Initializes OpenGL interoperability + + \deprecated This function is deprecated as of Cuda 3.0. + + Initializes OpenGL interoperability. This function is deprecated + and calling it is no longer required. It may fail if the needed + OpenGL driver facilities are not available. + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_UNKNOWN + \notefnerr + + \sa ::cuGLMapBufferObject, + ::cuGLRegisterBufferObject, ::cuGLUnmapBufferObject, + ::cuGLUnregisterBufferObject, ::cuGLMapBufferObjectAsync, + ::cuGLUnmapBufferObjectAsync, ::cuGLSetBufferObjectMapFlags, + ::cuWGLGetDevice*/ + fn cuGLInit() -> cuda_types::CUresult; + /** \brief Registers an OpenGL buffer object + + \deprecated This function is deprecated as of Cuda 3.0. + + Registers the buffer object specified by \p buffer for access by + CUDA. This function must be called before CUDA can map the buffer + object. There must be a valid OpenGL context bound to the current + thread when this function is called, and the buffer name is + resolved by that context. + + \param buffer - The name of the buffer object to register. + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_ALREADY_MAPPED + \notefnerr + + \sa ::cuGraphicsGLRegisterBuffer*/ + fn cuGLRegisterBufferObject(buffer: cuda_types::GLuint) -> cuda_types::CUresult; + /** \brief Maps an OpenGL buffer object + + \deprecated This function is deprecated as of Cuda 3.0. + + Maps the buffer object specified by \p buffer into the address space of the + current CUDA context and returns in \p *dptr and \p *size the base pointer + and size of the resulting mapping. + + There must be a valid OpenGL context bound to the current thread + when this function is called. This must be the same context, or a + member of the same shareGroup, as the context that was bound when + the buffer was registered. + + All streams in the current CUDA context are synchronized with the + current GL context. + + \param dptr - Returned mapped base pointer + \param size - Returned size of mapping + \param buffer - The name of the buffer object to map + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_MAP_FAILED + \notefnerr + + \sa ::cuGraphicsMapResources*/ + fn cuGLMapBufferObject_v2_ptds( + dptr: *mut cuda_types::CUdeviceptr, + size: *mut usize, + buffer: cuda_types::GLuint, + ) -> cuda_types::CUresult; + /** \brief Unmaps an OpenGL buffer object + + \deprecated This function is deprecated as of Cuda 3.0. + + Unmaps the buffer object specified by \p buffer for access by CUDA. + + There must be a valid OpenGL context bound to the current thread + when this function is called. This must be the same context, or a + member of the same shareGroup, as the context that was bound when + the buffer was registered. + + All streams in the current CUDA context are synchronized with the + current GL context. + + \param buffer - Buffer object to unmap + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE + \notefnerr + + \sa ::cuGraphicsUnmapResources*/ + fn cuGLUnmapBufferObject(buffer: cuda_types::GLuint) -> cuda_types::CUresult; + /** \brief Unregister an OpenGL buffer object + + \deprecated This function is deprecated as of Cuda 3.0. + + Unregisters the buffer object specified by \p buffer. This + releases any resources associated with the registered buffer. + After this call, the buffer may no longer be mapped for access by + CUDA. + + There must be a valid OpenGL context bound to the current thread + when this function is called. This must be the same context, or a + member of the same shareGroup, as the context that was bound when + the buffer was registered. + + \param buffer - Name of the buffer object to unregister + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE + \notefnerr + + \sa ::cuGraphicsUnregisterResource*/ + fn cuGLUnregisterBufferObject(buffer: cuda_types::GLuint) -> cuda_types::CUresult; + /** \brief Set the map flags for an OpenGL buffer object + + \deprecated This function is deprecated as of Cuda 3.0. + + Sets the map flags for the buffer object specified by \p buffer. + + Changes to \p Flags will take effect the next time \p buffer is mapped. + The \p Flags argument may be any of the following: + - ::CU_GL_MAP_RESOURCE_FLAGS_NONE: Specifies no hints about how this + resource will be used. It is therefore assumed that this resource will be + read from and written to by CUDA kernels. This is the default value. + - ::CU_GL_MAP_RESOURCE_FLAGS_READ_ONLY: Specifies that CUDA kernels which + access this resource will not write to this resource. + - ::CU_GL_MAP_RESOURCE_FLAGS_WRITE_DISCARD: Specifies that CUDA kernels + which access this resource will not read from this resource and will + write over the entire contents of the resource, so none of the data + previously stored in the resource will be preserved. + + If \p buffer has not been registered for use with CUDA, then + ::CUDA_ERROR_INVALID_HANDLE is returned. If \p buffer is presently + mapped for access by CUDA, then ::CUDA_ERROR_ALREADY_MAPPED is returned. + + There must be a valid OpenGL context bound to the current thread + when this function is called. This must be the same context, or a + member of the same shareGroup, as the context that was bound when + the buffer was registered. + + \param buffer - Buffer object to unmap + \param Flags - Map flags + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_HANDLE, + ::CUDA_ERROR_ALREADY_MAPPED, + ::CUDA_ERROR_INVALID_CONTEXT, + \notefnerr + + \sa ::cuGraphicsResourceSetMapFlags*/ + fn cuGLSetBufferObjectMapFlags( + buffer: cuda_types::GLuint, + Flags: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + /** \brief Maps an OpenGL buffer object + + \deprecated This function is deprecated as of Cuda 3.0. + + Maps the buffer object specified by \p buffer into the address space of the + current CUDA context and returns in \p *dptr and \p *size the base pointer + and size of the resulting mapping. + + There must be a valid OpenGL context bound to the current thread + when this function is called. This must be the same context, or a + member of the same shareGroup, as the context that was bound when + the buffer was registered. + + Stream \p hStream in the current CUDA context is synchronized with + the current GL context. + + \param dptr - Returned mapped base pointer + \param size - Returned size of mapping + \param buffer - The name of the buffer object to map + \param hStream - Stream to synchronize + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_MAP_FAILED + \notefnerr + + \sa ::cuGraphicsMapResources*/ + fn cuGLMapBufferObjectAsync_v2_ptsz( + dptr: *mut cuda_types::CUdeviceptr, + size: *mut usize, + buffer: cuda_types::GLuint, + hStream: cuda_types::CUstream, + ) -> cuda_types::CUresult; + /** \brief Unmaps an OpenGL buffer object + + \deprecated This function is deprecated as of Cuda 3.0. + + Unmaps the buffer object specified by \p buffer for access by CUDA. + + There must be a valid OpenGL context bound to the current thread + when this function is called. This must be the same context, or a + member of the same shareGroup, as the context that was bound when + the buffer was registered. + + Stream \p hStream in the current CUDA context is synchronized with + the current GL context. + + \param buffer - Name of the buffer object to unmap + \param hStream - Stream to synchronize + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE + \notefnerr + + \sa ::cuGraphicsUnmapResources*/ + fn cuGLUnmapBufferObjectAsync( + buffer: cuda_types::GLuint, + hStream: cuda_types::CUstream, + ) -> cuda_types::CUresult; + fn cuGLGetDevices( + pCudaDeviceCount: *mut ::core::ffi::c_uint, + pCudaDevices: *mut cuda_types::CUdevice, + cudaDeviceCount: ::core::ffi::c_uint, + deviceList: cuda_types::CUGLDeviceList, + ) -> cuda_types::CUresult; + fn cuGLMapBufferObject_v2( + dptr: *mut cuda_types::CUdeviceptr, + size: *mut usize, + buffer: cuda_types::GLuint, + ) -> cuda_types::CUresult; + fn cuGLMapBufferObjectAsync_v2( + dptr: *mut cuda_types::CUdeviceptr, + size: *mut usize, + buffer: cuda_types::GLuint, + hStream: cuda_types::CUstream, + ) -> cuda_types::CUresult; + fn cuGLCtxCreate( + pCtx: *mut cuda_types::CUcontext, + Flags: ::core::ffi::c_uint, + device: cuda_types::CUdevice, + ) -> cuda_types::CUresult; + fn cuGLMapBufferObject( + dptr: *mut cuda_types::CUdeviceptr_v1, + size: *mut ::core::ffi::c_uint, + buffer: cuda_types::GLuint, + ) -> cuda_types::CUresult; + fn cuGLMapBufferObjectAsync( + dptr: *mut cuda_types::CUdeviceptr_v1, + size: *mut ::core::ffi::c_uint, + buffer: cuda_types::GLuint, + hStream: cuda_types::CUstream, + ) -> cuda_types::CUresult; + /** \brief Registers an EGL image + + Registers the EGLImageKHR specified by \p image for access by + CUDA. A handle to the registered object is returned as \p pCudaResource. + Additional Mapping/Unmapping is not required for the registered resource and + ::cuGraphicsResourceGetMappedEglFrame can be directly called on the \p pCudaResource. + + The application will be responsible for synchronizing access to shared objects. + The application must ensure that any pending operation which access the objects have completed + before passing control to CUDA. This may be accomplished by issuing and waiting for + glFinish command on all GLcontexts (for OpenGL and likewise for other APIs). + The application will be also responsible for ensuring that any pending operation on the + registered CUDA resource has completed prior to executing subsequent commands in other APIs + accesing the same memory objects. + This can be accomplished by calling cuCtxSynchronize or cuEventSynchronize (preferably). + + The surface's intended usage is specified using \p flags, as follows: + + - ::CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE: Specifies no hints about how this + resource will be used. It is therefore assumed that this resource will be + read from and written to by CUDA. This is the default value. + - ::CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY: Specifies that CUDA + will not write to this resource. + - ::CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITE_DISCARD: Specifies that + CUDA will not read from this resource and will write over the + entire contents of the resource, so none of the data previously + stored in the resource will be preserved. + + The EGLImageKHR is an object which can be used to create EGLImage target resource. It is defined as a void pointer. + typedef void* EGLImageKHR + + \param pCudaResource - Pointer to the returned object handle + \param image - An EGLImageKHR image which can be used to create target resource. + \param flags - Map flags + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_HANDLE, + ::CUDA_ERROR_ALREADY_MAPPED, + ::CUDA_ERROR_INVALID_CONTEXT, + + \sa ::cuGraphicsEGLRegisterImage, ::cuGraphicsUnregisterResource, + ::cuGraphicsResourceSetMapFlags, ::cuGraphicsMapResources, + ::cuGraphicsUnmapResources, + ::cudaGraphicsEGLRegisterImage*/ + fn cuGraphicsEGLRegisterImage( + pCudaResource: *mut cuda_types::CUgraphicsResource, + image: cuda_types::EGLImageKHR, + flags: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + /** \brief Connect CUDA to EGLStream as a consumer. + + Connect CUDA as a consumer to EGLStreamKHR specified by \p stream. + + The EGLStreamKHR is an EGL object that transfers a sequence of image frames from one + API to another. + + \param conn - Pointer to the returned connection handle + \param stream - EGLStreamKHR handle + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_HANDLE, + ::CUDA_ERROR_INVALID_CONTEXT, + + \sa ::cuEGLStreamConsumerConnect, ::cuEGLStreamConsumerDisconnect, + ::cuEGLStreamConsumerAcquireFrame, ::cuEGLStreamConsumerReleaseFrame, + ::cudaEGLStreamConsumerConnect*/ + fn cuEGLStreamConsumerConnect( + conn: *mut cuda_types::CUeglStreamConnection, + stream: cuda_types::EGLStreamKHR, + ) -> cuda_types::CUresult; + /** \brief Connect CUDA to EGLStream as a consumer with given flags. + + Connect CUDA as a consumer to EGLStreamKHR specified by \p stream with specified \p flags defined by CUeglResourceLocationFlags. + + The flags specify whether the consumer wants to access frames from system memory or video memory. + Default is ::CU_EGL_RESOURCE_LOCATION_VIDMEM. + + \param conn - Pointer to the returned connection handle + \param stream - EGLStreamKHR handle + \param flags - Flags denote intended location - system or video. + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_HANDLE, + ::CUDA_ERROR_INVALID_CONTEXT, + + \sa ::cuEGLStreamConsumerConnect, ::cuEGLStreamConsumerDisconnect, + ::cuEGLStreamConsumerAcquireFrame, ::cuEGLStreamConsumerReleaseFrame, + ::cudaEGLStreamConsumerConnectWithFlags*/ + fn cuEGLStreamConsumerConnectWithFlags( + conn: *mut cuda_types::CUeglStreamConnection, + stream: cuda_types::EGLStreamKHR, + flags: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + /** \brief Disconnect CUDA as a consumer to EGLStream . + + Disconnect CUDA as a consumer to EGLStreamKHR. + + \param conn - Conection to disconnect. + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_HANDLE, + ::CUDA_ERROR_INVALID_CONTEXT, + + \sa ::cuEGLStreamConsumerConnect, ::cuEGLStreamConsumerDisconnect, + ::cuEGLStreamConsumerAcquireFrame, ::cuEGLStreamConsumerReleaseFrame, + ::cudaEGLStreamConsumerDisconnect*/ + fn cuEGLStreamConsumerDisconnect( + conn: *mut cuda_types::CUeglStreamConnection, + ) -> cuda_types::CUresult; + /** \brief Acquire an image frame from the EGLStream with CUDA as a consumer. + + Acquire an image frame from EGLStreamKHR. This API can also acquire an old frame presented + by the producer unless explicitly disabled by setting EGL_SUPPORT_REUSE_NV flag to EGL_FALSE + during stream initialization. By default, EGLStream is created with this flag set to EGL_TRUE. + ::cuGraphicsResourceGetMappedEglFrame can be called on \p pCudaResource to get + ::CUeglFrame. + + \param conn - Connection on which to acquire + \param pCudaResource - CUDA resource on which the stream frame will be mapped for use. + \param pStream - CUDA stream for synchronization and any data migrations + implied by ::CUeglResourceLocationFlags. + \param timeout - Desired timeout in usec for a new frame to be acquired. + If set as ::CUDA_EGL_INFINITE_TIMEOUT, acquire waits infinitely. + After timeout occurs CUDA consumer tries to acquire an old frame + if available and EGL_SUPPORT_REUSE_NV flag is set. + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_HANDLE, + ::CUDA_ERROR_LAUNCH_TIMEOUT, + + \sa ::cuEGLStreamConsumerConnect, ::cuEGLStreamConsumerDisconnect, + ::cuEGLStreamConsumerAcquireFrame, ::cuEGLStreamConsumerReleaseFrame, + ::cudaEGLStreamConsumerAcquireFrame*/ + fn cuEGLStreamConsumerAcquireFrame( + conn: *mut cuda_types::CUeglStreamConnection, + pCudaResource: *mut cuda_types::CUgraphicsResource, + pStream: *mut cuda_types::CUstream, + timeout: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + /** \brief Releases the last frame acquired from the EGLStream. + + Release the acquired image frame specified by \p pCudaResource to EGLStreamKHR. + If EGL_SUPPORT_REUSE_NV flag is set to EGL_TRUE, at the time of EGL creation + this API doesn't release the last frame acquired on the EGLStream. + By default, EGLStream is created with this flag set to EGL_TRUE. + + \param conn - Connection on which to release + \param pCudaResource - CUDA resource whose corresponding frame is to be released + \param pStream - CUDA stream on which release will be done. + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_HANDLE, + + \sa ::cuEGLStreamConsumerConnect, ::cuEGLStreamConsumerDisconnect, + ::cuEGLStreamConsumerAcquireFrame, ::cuEGLStreamConsumerReleaseFrame, + ::cudaEGLStreamConsumerReleaseFrame*/ + fn cuEGLStreamConsumerReleaseFrame( + conn: *mut cuda_types::CUeglStreamConnection, + pCudaResource: cuda_types::CUgraphicsResource, + pStream: *mut cuda_types::CUstream, + ) -> cuda_types::CUresult; + /** \brief Connect CUDA to EGLStream as a producer. + + Connect CUDA as a producer to EGLStreamKHR specified by \p stream. + + The EGLStreamKHR is an EGL object that transfers a sequence of image frames from one + API to another. + + \param conn - Pointer to the returned connection handle + \param stream - EGLStreamKHR handle + \param width - width of the image to be submitted to the stream + \param height - height of the image to be submitted to the stream + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_HANDLE, + ::CUDA_ERROR_INVALID_CONTEXT, + + \sa ::cuEGLStreamProducerConnect, ::cuEGLStreamProducerDisconnect, + ::cuEGLStreamProducerPresentFrame, + ::cudaEGLStreamProducerConnect*/ + fn cuEGLStreamProducerConnect( + conn: *mut cuda_types::CUeglStreamConnection, + stream: cuda_types::EGLStreamKHR, + width: cuda_types::EGLint, + height: cuda_types::EGLint, + ) -> cuda_types::CUresult; + /** \brief Disconnect CUDA as a producer to EGLStream . + + Disconnect CUDA as a producer to EGLStreamKHR. + + \param conn - Conection to disconnect. + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_HANDLE, + ::CUDA_ERROR_INVALID_CONTEXT, + + \sa ::cuEGLStreamProducerConnect, ::cuEGLStreamProducerDisconnect, + ::cuEGLStreamProducerPresentFrame, + ::cudaEGLStreamProducerDisconnect*/ + fn cuEGLStreamProducerDisconnect( + conn: *mut cuda_types::CUeglStreamConnection, + ) -> cuda_types::CUresult; + /** \brief Present a CUDA eglFrame to the EGLStream with CUDA as a producer. + + When a frame is presented by the producer, it gets associated with the EGLStream + and thus it is illegal to free the frame before the producer is disconnected. + If a frame is freed and reused it may lead to undefined behavior. + + If producer and consumer are on different GPUs (iGPU and dGPU) then frametype + ::CU_EGL_FRAME_TYPE_ARRAY is not supported. ::CU_EGL_FRAME_TYPE_PITCH can be used for + such cross-device applications. + + The ::CUeglFrame is defined as: + \code + typedef struct CUeglFrame_st { + union { + CUarray pArray[MAX_PLANES]; + void* pPitch[MAX_PLANES]; + } frame; + unsigned int width; + unsigned int height; + unsigned int depth; + unsigned int pitch; + unsigned int planeCount; + unsigned int numChannels; + CUeglFrameType frameType; + CUeglColorFormat eglColorFormat; + CUarray_format cuFormat; + } CUeglFrame; + \endcode + + For ::CUeglFrame of type ::CU_EGL_FRAME_TYPE_PITCH, the application may present sub-region of a memory + allocation. In that case, the pitched pointer will specify the start address of the sub-region in + the allocation and corresponding ::CUeglFrame fields will specify the dimensions of the sub-region. + + \param conn - Connection on which to present the CUDA array + \param eglframe - CUDA Eglstream Proucer Frame handle to be sent to the consumer over EglStream. + \param pStream - CUDA stream on which to present the frame. + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_HANDLE, + + \sa ::cuEGLStreamProducerConnect, ::cuEGLStreamProducerDisconnect, + ::cuEGLStreamProducerReturnFrame, + ::cudaEGLStreamProducerPresentFrame*/ + fn cuEGLStreamProducerPresentFrame( + conn: *mut cuda_types::CUeglStreamConnection, + eglframe: cuda_types::CUeglFrame, + pStream: *mut cuda_types::CUstream, + ) -> cuda_types::CUresult; + /** \brief Return the CUDA eglFrame to the EGLStream released by the consumer. + + This API can potentially return CUDA_ERROR_LAUNCH_TIMEOUT if the consumer has not + returned a frame to EGL stream. If timeout is returned the application can retry. + + \param conn - Connection on which to return + \param eglframe - CUDA Eglstream Proucer Frame handle returned from the consumer over EglStream. + \param pStream - CUDA stream on which to return the frame. + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_HANDLE, + ::CUDA_ERROR_LAUNCH_TIMEOUT + + \sa ::cuEGLStreamProducerConnect, ::cuEGLStreamProducerDisconnect, + ::cuEGLStreamProducerPresentFrame, + ::cudaEGLStreamProducerReturnFrame*/ + fn cuEGLStreamProducerReturnFrame( + conn: *mut cuda_types::CUeglStreamConnection, + eglframe: *mut cuda_types::CUeglFrame, + pStream: *mut cuda_types::CUstream, + ) -> cuda_types::CUresult; + /** \brief Get an eglFrame through which to access a registered EGL graphics resource. + + Returns in \p *eglFrame an eglFrame pointer through which the registered graphics resource + \p resource may be accessed. + This API can only be called for registered EGL graphics resources. + + The ::CUeglFrame is defined as: + \code + typedef struct CUeglFrame_st { + union { + CUarray pArray[MAX_PLANES]; + void* pPitch[MAX_PLANES]; + } frame; + unsigned int width; + unsigned int height; + unsigned int depth; + unsigned int pitch; + unsigned int planeCount; + unsigned int numChannels; + CUeglFrameType frameType; + CUeglColorFormat eglColorFormat; + CUarray_format cuFormat; + } CUeglFrame; + \endcode + + If \p resource is not registered then ::CUDA_ERROR_NOT_MAPPED is returned. + * + \param eglFrame - Returned eglFrame. + \param resource - Registered resource to access. + \param index - Index for cubemap surfaces. + \param mipLevel - Mipmap level for the subresource to access. + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_HANDLE, + ::CUDA_ERROR_NOT_MAPPED + + \sa + ::cuGraphicsMapResources, + ::cuGraphicsSubResourceGetMappedArray, + ::cuGraphicsResourceGetMappedPointer, + ::cudaGraphicsResourceGetMappedEglFrame*/ + fn cuGraphicsResourceGetMappedEglFrame( + eglFrame: *mut cuda_types::CUeglFrame, + resource: cuda_types::CUgraphicsResource, + index: ::core::ffi::c_uint, + mipLevel: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + /** \brief Creates an event from EGLSync object + + Creates an event *phEvent from an EGLSyncKHR eglSync with the flags specified + via \p flags. Valid flags include: + - ::CU_EVENT_DEFAULT: Default event creation flag. + - ::CU_EVENT_BLOCKING_SYNC: Specifies that the created event should use blocking + synchronization. A CPU thread that uses ::cuEventSynchronize() to wait on + an event created with this flag will block until the event has actually + been completed. + + Once the \p eglSync gets destroyed, ::cuEventDestroy is the only API + that can be invoked on the event. + + ::cuEventRecord and TimingData are not supported for events created from EGLSync. + + The EGLSyncKHR is an opaque handle to an EGL sync object. + typedef void* EGLSyncKHR + + \param phEvent - Returns newly created event + \param eglSync - Opaque handle to EGLSync object + \param flags - Event creation flags + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_OUT_OF_MEMORY + + \sa + ::cuEventQuery, + ::cuEventSynchronize, + ::cuEventDestroy*/ + fn cuEventCreateFromEGLSync( + phEvent: *mut cuda_types::CUevent, + eglSync: cuda_types::EGLSyncKHR, + flags: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + /** \brief Gets the CUDA device associated with a VDPAU device + + Returns in \p *pDevice the CUDA device associated with a \p vdpDevice, if + applicable. + + \param pDevice - Device associated with vdpDevice + \param vdpDevice - A VdpDevice handle + \param vdpGetProcAddress - VDPAU's VdpGetProcAddress function pointer + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE + \notefnerr + + \sa ::cuCtxCreate, ::cuVDPAUCtxCreate, ::cuGraphicsVDPAURegisterVideoSurface, + ::cuGraphicsVDPAURegisterOutputSurface, ::cuGraphicsUnregisterResource, + ::cuGraphicsResourceSetMapFlags, ::cuGraphicsMapResources, + ::cuGraphicsUnmapResources, ::cuGraphicsSubResourceGetMappedArray, + ::cudaVDPAUGetDevice*/ + fn cuVDPAUGetDevice( + pDevice: *mut cuda_types::CUdevice, + vdpDevice: cuda_types::VdpDevice, + vdpGetProcAddress: cuda_types::VdpGetProcAddress, + ) -> cuda_types::CUresult; + /** \brief Create a CUDA context for interoperability with VDPAU + + Creates a new CUDA context, initializes VDPAU interoperability, and + associates the CUDA context with the calling thread. It must be called + before performing any other VDPAU interoperability operations. It may fail + if the needed VDPAU driver facilities are not available. For usage of the + \p flags parameter, see ::cuCtxCreate(). + + \param pCtx - Returned CUDA context + \param flags - Options for CUDA context creation + \param device - Device on which to create the context + \param vdpDevice - The VdpDevice to interop with + \param vdpGetProcAddress - VDPAU's VdpGetProcAddress function pointer + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_OUT_OF_MEMORY + \notefnerr + + \sa ::cuCtxCreate, ::cuGraphicsVDPAURegisterVideoSurface, + ::cuGraphicsVDPAURegisterOutputSurface, ::cuGraphicsUnregisterResource, + ::cuGraphicsResourceSetMapFlags, ::cuGraphicsMapResources, + ::cuGraphicsUnmapResources, ::cuGraphicsSubResourceGetMappedArray, + ::cuVDPAUGetDevice*/ + fn cuVDPAUCtxCreate_v2( + pCtx: *mut cuda_types::CUcontext, + flags: ::core::ffi::c_uint, + device: cuda_types::CUdevice, + vdpDevice: cuda_types::VdpDevice, + vdpGetProcAddress: cuda_types::VdpGetProcAddress, + ) -> cuda_types::CUresult; + /** \brief Registers a VDPAU VdpVideoSurface object + + Registers the VdpVideoSurface specified by \p vdpSurface for access by + CUDA. A handle to the registered object is returned as \p pCudaResource. + The surface's intended usage is specified using \p flags, as follows: + + - ::CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE: Specifies no hints about how this + resource will be used. It is therefore assumed that this resource will be + read from and written to by CUDA. This is the default value. + - ::CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY: Specifies that CUDA + will not write to this resource. + - ::CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITE_DISCARD: Specifies that + CUDA will not read from this resource and will write over the + entire contents of the resource, so none of the data previously + stored in the resource will be preserved. + + The VdpVideoSurface is presented as an array of subresources that may be + accessed using pointers returned by ::cuGraphicsSubResourceGetMappedArray. + The exact number of valid \p arrayIndex values depends on the VDPAU surface + format. The mapping is shown in the table below. \p mipLevel must be 0. + + \htmlonly + <table> + <tr><th>VdpChromaType </th><th>arrayIndex</th><th>Size </th><th>Format</th><th>Content </th></tr> + <tr><td rowspan="4" valign="top">VDP_CHROMA_TYPE_420</td><td>0 </td><td>w x h/2</td><td>R8 </td><td>Top-field luma </td></tr> + <tr> <td>1 </td><td>w x h/2</td><td>R8 </td><td>Bottom-field luma </td></tr> + <tr> <td>2 </td><td>w/2 x h/4</td><td>R8G8 </td><td>Top-field chroma </td></tr> + <tr> <td>3 </td><td>w/2 x h/4</td><td>R8G8 </td><td>Bottom-field chroma</td></tr> + <tr><td rowspan="4" valign="top">VDP_CHROMA_TYPE_422</td><td>0 </td><td>w x h/2</td><td>R8 </td><td>Top-field luma </td></tr> + <tr> <td>1 </td><td>w x h/2</td><td>R8 </td><td>Bottom-field luma </td></tr> + <tr> <td>2 </td><td>w/2 x h/2</td><td>R8G8 </td><td>Top-field chroma </td></tr> + <tr> <td>3 </td><td>w/2 x h/2</td><td>R8G8 </td><td>Bottom-field chroma</td></tr> + </table> + \endhtmlonly + + \latexonly + \begin{tabular}{|l|l|l|l|l|} + \hline + VdpChromaType & arrayIndex & Size & Format & Content \\ + \hline + VDP\_CHROMA\_TYPE\_420 & 0 & w x h/2 & R8 & Top-field luma \\ + & 1 & w x h/2 & R8 & Bottom-field luma \\ + & 2 & w/2 x h/4 & R8G8 & Top-field chroma \\ + & 3 & w/2 x h/4 & R8G8 & Bottom-field chroma \\ + \hline + VDP\_CHROMA\_TYPE\_422 & 0 & w x h/2 & R8 & Top-field luma \\ + & 1 & w x h/2 & R8 & Bottom-field luma \\ + & 2 & w/2 x h/2 & R8G8 & Top-field chroma \\ + & 3 & w/2 x h/2 & R8G8 & Bottom-field chroma \\ + \hline + \end{tabular} + \endlatexonly + + \param pCudaResource - Pointer to the returned object handle + \param vdpSurface - The VdpVideoSurface to be registered + \param flags - Map flags + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_HANDLE, + ::CUDA_ERROR_ALREADY_MAPPED, + ::CUDA_ERROR_INVALID_CONTEXT, + \notefnerr + + \sa ::cuCtxCreate, ::cuVDPAUCtxCreate, + ::cuGraphicsVDPAURegisterOutputSurface, ::cuGraphicsUnregisterResource, + ::cuGraphicsResourceSetMapFlags, ::cuGraphicsMapResources, + ::cuGraphicsUnmapResources, ::cuGraphicsSubResourceGetMappedArray, + ::cuVDPAUGetDevice, + ::cudaGraphicsVDPAURegisterVideoSurface*/ + fn cuGraphicsVDPAURegisterVideoSurface( + pCudaResource: *mut cuda_types::CUgraphicsResource, + vdpSurface: cuda_types::VdpVideoSurface, + flags: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + /** \brief Registers a VDPAU VdpOutputSurface object + + Registers the VdpOutputSurface specified by \p vdpSurface for access by + CUDA. A handle to the registered object is returned as \p pCudaResource. + The surface's intended usage is specified using \p flags, as follows: + + - ::CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE: Specifies no hints about how this + resource will be used. It is therefore assumed that this resource will be + read from and written to by CUDA. This is the default value. + - ::CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY: Specifies that CUDA + will not write to this resource. + - ::CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITE_DISCARD: Specifies that + CUDA will not read from this resource and will write over the + entire contents of the resource, so none of the data previously + stored in the resource will be preserved. + + The VdpOutputSurface is presented as an array of subresources that may be + accessed using pointers returned by ::cuGraphicsSubResourceGetMappedArray. + The exact number of valid \p arrayIndex values depends on the VDPAU surface + format. The mapping is shown in the table below. \p mipLevel must be 0. + + \htmlonly + <table> + <tr><th>VdpRGBAFormat </th><th>arrayIndex</th><th>Size </th><th>Format </th><th>Content </th></tr> + <tr><td>VDP_RGBA_FORMAT_B8G8R8A8 </td><td>0 </td><td>w x h</td><td>ARGB8 </td><td>Entire surface</td></tr> + <tr><td>VDP_RGBA_FORMAT_R10G10B10A2</td><td>0 </td><td>w x h</td><td>A2BGR10</td><td>Entire surface</td></tr> + </table> + \endhtmlonly + + \latexonly + \begin{tabular}{|l|l|l|l|l|} + \hline + VdpRGBAFormat & arrayIndex & Size & Format & Content \\ + \hline + VDP\_RGBA\_FORMAT\_B8G8R8A8 & 0 & w x h & ARGB8 & Entire surface \\ + VDP\_RGBA\_FORMAT\_R10G10B10A2 & 0 & w x h & A2BGR10 & Entire surface \\ + \hline + \end{tabular} + \endlatexonly + + \param pCudaResource - Pointer to the returned object handle + \param vdpSurface - The VdpOutputSurface to be registered + \param flags - Map flags + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_HANDLE, + ::CUDA_ERROR_ALREADY_MAPPED, + ::CUDA_ERROR_INVALID_CONTEXT, + \notefnerr + + \sa ::cuCtxCreate, ::cuVDPAUCtxCreate, + ::cuGraphicsVDPAURegisterVideoSurface, ::cuGraphicsUnregisterResource, + ::cuGraphicsResourceSetMapFlags, ::cuGraphicsMapResources, + ::cuGraphicsUnmapResources, ::cuGraphicsSubResourceGetMappedArray, + ::cuVDPAUGetDevice, + ::cudaGraphicsVDPAURegisterOutputSurface*/ + fn cuGraphicsVDPAURegisterOutputSurface( + pCudaResource: *mut cuda_types::CUgraphicsResource, + vdpSurface: cuda_types::VdpOutputSurface, + flags: ::core::ffi::c_uint, + ) -> cuda_types::CUresult; + fn cuVDPAUCtxCreate( + pCtx: *mut cuda_types::CUcontext, + flags: ::core::ffi::c_uint, + device: cuda_types::CUdevice, + vdpDevice: cuda_types::VdpDevice, + vdpGetProcAddress: cuda_types::VdpGetProcAddress, + ) -> cuda_types::CUresult; +} |