diff options
Diffstat (limited to 'zluda_blaslt/src/cublaslt.rs')
-rw-r--r-- | zluda_blaslt/src/cublaslt.rs | 5372 |
1 files changed, 5372 insertions, 0 deletions
diff --git a/zluda_blaslt/src/cublaslt.rs b/zluda_blaslt/src/cublaslt.rs new file mode 100644 index 0000000..bba58e3 --- /dev/null +++ b/zluda_blaslt/src/cublaslt.rs @@ -0,0 +1,5372 @@ +/* automatically generated by rust-bindgen 0.66.1 */ + +#[repr(C)] +#[repr(align(8))] +#[derive(Copy, Clone)] +pub struct float2 { + pub x: f32, + pub y: f32, +} +#[repr(C)] +#[repr(align(16))] +#[derive(Copy, Clone)] +pub struct double2 { + pub x: f64, + pub y: f64, +} +#[doc = " *\n *\n *"] +#[repr(C)] +#[derive(Copy, Clone)] +pub struct dim3 { + pub x: ::std::os::raw::c_uint, + pub y: ::std::os::raw::c_uint, + pub z: ::std::os::raw::c_uint, +} +impl cudaError { + #[doc = " The API call returned with no errors. In the case of query calls, this\n also means that the operation being queried is complete (see\n ::cudaEventQuery() and ::cudaStreamQuery())."] + pub const cudaSuccess: cudaError = cudaError(0); +} +impl cudaError { + #[doc = " This indicates that one or more of the parameters passed to the API call\n is not within an acceptable range of values."] + pub const cudaErrorInvalidValue: cudaError = cudaError(1); +} +impl cudaError { + #[doc = " The API call failed because it was unable to allocate enough memory to\n perform the requested operation."] + pub const cudaErrorMemoryAllocation: cudaError = cudaError(2); +} +impl cudaError { + #[doc = " The API call failed because the CUDA driver and runtime could not be\n initialized."] + pub const cudaErrorInitializationError: cudaError = cudaError(3); +} +impl cudaError { + #[doc = " This indicates that a CUDA Runtime API call cannot be executed because\n it is being called during process shut down, at a point in time after\n CUDA driver has been unloaded."] + pub const cudaErrorCudartUnloading: cudaError = cudaError(4); +} +impl cudaError { + #[doc = " This indicates profiler is not initialized for this run. This can\n happen when the application is running with external profiling tools\n like visual profiler."] + pub const cudaErrorProfilerDisabled: cudaError = cudaError(5); +} +impl cudaError { + #[doc = " \\deprecated\n This error return is deprecated as of CUDA 5.0. It is no longer an error\n to attempt to enable/disable the profiling via ::cudaProfilerStart or\n ::cudaProfilerStop without initialization."] + pub const cudaErrorProfilerNotInitialized: cudaError = cudaError(6); +} +impl cudaError { + #[doc = " \\deprecated\n This error return is deprecated as of CUDA 5.0. It is no longer an error\n to call cudaProfilerStart() when profiling is already enabled."] + pub const cudaErrorProfilerAlreadyStarted: cudaError = cudaError(7); +} +impl cudaError { + #[doc = " \\deprecated\n This error return is deprecated as of CUDA 5.0. It is no longer an error\n to call cudaProfilerStop() when profiling is already disabled."] + pub const cudaErrorProfilerAlreadyStopped: cudaError = cudaError(8); +} +impl cudaError { + #[doc = " This indicates that a kernel launch is requesting resources that can\n never be satisfied by the current device. Requesting more shared memory\n per block than the device supports will trigger this error, as will\n requesting too many threads or blocks. See ::cudaDeviceProp for more\n device limitations."] + pub const cudaErrorInvalidConfiguration: cudaError = cudaError(9); +} +impl cudaError { + #[doc = " This indicates that one or more of the pitch-related parameters passed\n to the API call is not within the acceptable range for pitch."] + pub const cudaErrorInvalidPitchValue: cudaError = cudaError(12); +} +impl cudaError { + #[doc = " This indicates that the symbol name/identifier passed to the API call\n is not a valid name or identifier."] + pub const cudaErrorInvalidSymbol: cudaError = cudaError(13); +} +impl cudaError { + #[doc = " This indicates that at least one host pointer passed to the API call is\n not a valid host pointer.\n \\deprecated\n This error return is deprecated as of CUDA 10.1."] + pub const cudaErrorInvalidHostPointer: cudaError = cudaError(16); +} +impl cudaError { + #[doc = " This indicates that at least one device pointer passed to the API call is\n not a valid device pointer.\n \\deprecated\n This error return is deprecated as of CUDA 10.1."] + pub const cudaErrorInvalidDevicePointer: cudaError = cudaError(17); +} +impl cudaError { + #[doc = " This indicates that the texture passed to the API call is not a valid\n texture."] + pub const cudaErrorInvalidTexture: cudaError = cudaError(18); +} +impl cudaError { + #[doc = " This indicates that the texture binding is not valid. This occurs if you\n call ::cudaGetTextureAlignmentOffset() with an unbound texture."] + pub const cudaErrorInvalidTextureBinding: cudaError = cudaError(19); +} +impl cudaError { + #[doc = " This indicates that the channel descriptor passed to the API call is not\n valid. This occurs if the format is not one of the formats specified by\n ::cudaChannelFormatKind, or if one of the dimensions is invalid."] + pub const cudaErrorInvalidChannelDescriptor: cudaError = cudaError(20); +} +impl cudaError { + #[doc = " This indicates that the direction of the memcpy passed to the API call is\n not one of the types specified by ::cudaMemcpyKind."] + pub const cudaErrorInvalidMemcpyDirection: cudaError = cudaError(21); +} +impl cudaError { + #[doc = " This indicated that the user has taken the address of a constant variable,\n which was forbidden up until the CUDA 3.1 release.\n \\deprecated\n This error return is deprecated as of CUDA 3.1. Variables in constant\n memory may now have their address taken by the runtime via\n ::cudaGetSymbolAddress()."] + pub const cudaErrorAddressOfConstant: cudaError = cudaError(22); +} +impl cudaError { + #[doc = " This indicated that a texture fetch was not able to be performed.\n This was previously used for device emulation of texture operations.\n \\deprecated\n This error return is deprecated as of CUDA 3.1. Device emulation mode was\n removed with the CUDA 3.1 release."] + pub const cudaErrorTextureFetchFailed: cudaError = cudaError(23); +} +impl cudaError { + #[doc = " This indicated that a texture was not bound for access.\n This was previously used for device emulation of texture operations.\n \\deprecated\n This error return is deprecated as of CUDA 3.1. Device emulation mode was\n removed with the CUDA 3.1 release."] + pub const cudaErrorTextureNotBound: cudaError = cudaError(24); +} +impl cudaError { + #[doc = " This indicated that a synchronization operation had failed.\n This was previously used for some device emulation functions.\n \\deprecated\n This error return is deprecated as of CUDA 3.1. Device emulation mode was\n removed with the CUDA 3.1 release."] + pub const cudaErrorSynchronizationError: cudaError = cudaError(25); +} +impl cudaError { + #[doc = " This indicates that a non-float texture was being accessed with linear\n filtering. This is not supported by CUDA."] + pub const cudaErrorInvalidFilterSetting: cudaError = cudaError(26); +} +impl cudaError { + #[doc = " This indicates that an attempt was made to read a non-float texture as a\n normalized float. This is not supported by CUDA."] + pub const cudaErrorInvalidNormSetting: cudaError = cudaError(27); +} +impl cudaError { + #[doc = " Mixing of device and device emulation code was not allowed.\n \\deprecated\n This error return is deprecated as of CUDA 3.1. Device emulation mode was\n removed with the CUDA 3.1 release."] + pub const cudaErrorMixedDeviceExecution: cudaError = cudaError(28); +} +impl cudaError { + #[doc = " This indicates that the API call is not yet implemented. Production\n releases of CUDA will never return this error.\n \\deprecated\n This error return is deprecated as of CUDA 4.1."] + pub const cudaErrorNotYetImplemented: cudaError = cudaError(31); +} +impl cudaError { + #[doc = " This indicated that an emulated device pointer exceeded the 32-bit address\n range.\n \\deprecated\n This error return is deprecated as of CUDA 3.1. Device emulation mode was\n removed with the CUDA 3.1 release."] + pub const cudaErrorMemoryValueTooLarge: cudaError = cudaError(32); +} +impl cudaError { + #[doc = " This indicates that the CUDA driver that the application has loaded is a\n stub library. Applications that run with the stub rather than a real\n driver loaded will result in CUDA API returning this error."] + pub const cudaErrorStubLibrary: cudaError = cudaError(34); +} +impl cudaError { + #[doc = " This indicates that the installed NVIDIA CUDA driver is older than the\n CUDA runtime library. This is not a supported configuration. Users should\n install an updated NVIDIA display driver to allow the application to run."] + pub const cudaErrorInsufficientDriver: cudaError = cudaError(35); +} +impl cudaError { + #[doc = " This indicates that the API call requires a newer CUDA driver than the one\n currently installed. Users should install an updated NVIDIA CUDA driver\n to allow the API call to succeed."] + pub const cudaErrorCallRequiresNewerDriver: cudaError = cudaError(36); +} +impl cudaError { + #[doc = " This indicates that the surface passed to the API call is not a valid\n surface."] + pub const cudaErrorInvalidSurface: cudaError = cudaError(37); +} +impl cudaError { + #[doc = " This indicates that multiple global or constant variables (across separate\n CUDA source files in the application) share the same string name."] + pub const cudaErrorDuplicateVariableName: cudaError = cudaError(43); +} +impl cudaError { + #[doc = " This indicates that multiple textures (across separate CUDA source\n files in the application) share the same string name."] + pub const cudaErrorDuplicateTextureName: cudaError = cudaError(44); +} +impl cudaError { + #[doc = " This indicates that multiple surfaces (across separate CUDA source\n files in the application) share the same string name."] + pub const cudaErrorDuplicateSurfaceName: cudaError = cudaError(45); +} +impl cudaError { + #[doc = " This indicates that all CUDA devices are busy or unavailable at the current\n time. Devices are often busy/unavailable due to use of\n ::cudaComputeModeProhibited, ::cudaComputeModeExclusiveProcess, or when long\n running CUDA kernels have filled up the GPU and are blocking new work\n from starting. They can also be unavailable due to memory constraints\n on a device that already has active CUDA work being performed."] + pub const cudaErrorDevicesUnavailable: cudaError = cudaError(46); +} +impl cudaError { + #[doc = " This indicates that the current context is not compatible with this\n the CUDA Runtime. This can only occur if you are using CUDA\n Runtime/Driver interoperability and have created an existing Driver\n context using the driver API. The Driver context may be incompatible\n either because the Driver context was created using an older version\n of the API, because the Runtime API call expects a primary driver\n context and the Driver context is not primary, or because the Driver\n context has been destroyed. Please see \\ref CUDART_DRIVER \"Interactions\n with the CUDA Driver API\" for more information."] + pub const cudaErrorIncompatibleDriverContext: cudaError = cudaError(49); +} +impl cudaError { + #[doc = " The device function being invoked (usually via ::cudaLaunchKernel()) was not\n previously configured via the ::cudaConfigureCall() function."] + pub const cudaErrorMissingConfiguration: cudaError = cudaError(52); +} +impl cudaError { + #[doc = " This indicated that a previous kernel launch failed. This was previously\n used for device emulation of kernel launches.\n \\deprecated\n This error return is deprecated as of CUDA 3.1. Device emulation mode was\n removed with the CUDA 3.1 release."] + pub const cudaErrorPriorLaunchFailure: cudaError = cudaError(53); +} +impl cudaError { + #[doc = " This error indicates that a device runtime grid launch did not occur\n because the depth of the child grid would exceed the maximum supported\n number of nested grid launches."] + pub const cudaErrorLaunchMaxDepthExceeded: cudaError = cudaError(65); +} +impl cudaError { + #[doc = " This error indicates that a grid launch did not occur because the kernel\n uses file-scoped textures which are unsupported by the device runtime.\n Kernels launched via the device runtime only support textures created with\n the Texture Object API's."] + pub const cudaErrorLaunchFileScopedTex: cudaError = cudaError(66); +} +impl cudaError { + #[doc = " This error indicates that a grid launch did not occur because the kernel\n uses file-scoped surfaces which are unsupported by the device runtime.\n Kernels launched via the device runtime only support surfaces created with\n the Surface Object API's."] + pub const cudaErrorLaunchFileScopedSurf: cudaError = cudaError(67); +} +impl cudaError { + #[doc = " This error indicates that a call to ::cudaDeviceSynchronize made from\n the device runtime failed because the call was made at grid depth greater\n than than either the default (2 levels of grids) or user specified device\n limit ::cudaLimitDevRuntimeSyncDepth. To be able to synchronize on\n launched grids at a greater depth successfully, the maximum nested\n depth at which ::cudaDeviceSynchronize will be called must be specified\n with the ::cudaLimitDevRuntimeSyncDepth limit to the ::cudaDeviceSetLimit\n api before the host-side launch of a kernel using the device runtime.\n Keep in mind that additional levels of sync depth require the runtime\n to reserve large amounts of device memory that cannot be used for\n user allocations."] + pub const cudaErrorSyncDepthExceeded: cudaError = cudaError(68); +} +impl cudaError { + #[doc = " This error indicates that a device runtime grid launch failed because\n the launch would exceed the limit ::cudaLimitDevRuntimePendingLaunchCount.\n For this launch to proceed successfully, ::cudaDeviceSetLimit must be\n called to set the ::cudaLimitDevRuntimePendingLaunchCount to be higher\n than the upper bound of outstanding launches that can be issued to the\n device runtime. Keep in mind that raising the limit of pending device\n runtime launches will require the runtime to reserve device memory that\n cannot be used for user allocations."] + pub const cudaErrorLaunchPendingCountExceeded: cudaError = cudaError(69); +} +impl cudaError { + #[doc = " The requested device function does not exist or is not compiled for the\n proper device architecture."] + pub const cudaErrorInvalidDeviceFunction: cudaError = cudaError(98); +} +impl cudaError { + #[doc = " This indicates that no CUDA-capable devices were detected by the installed\n CUDA driver."] + pub const cudaErrorNoDevice: cudaError = cudaError(100); +} +impl cudaError { + #[doc = " This indicates that the device ordinal supplied by the user does not\n correspond to a valid CUDA device or that the action requested is\n invalid for the specified device."] + pub const cudaErrorInvalidDevice: cudaError = cudaError(101); +} +impl cudaError { + #[doc = " This indicates that the device doesn't have a valid Grid License."] + pub const cudaErrorDeviceNotLicensed: cudaError = cudaError(102); +} +impl cudaError { + #[doc = " By default, the CUDA runtime may perform a minimal set of self-tests,\n as well as CUDA driver tests, to establish the validity of both.\n Introduced in CUDA 11.2, this error return indicates that at least one\n of these tests has failed and the validity of either the runtime\n or the driver could not be established."] + pub const cudaErrorSoftwareValidityNotEstablished: cudaError = cudaError(103); +} +impl cudaError { + #[doc = " This indicates an internal startup failure in the CUDA runtime."] + pub const cudaErrorStartupFailure: cudaError = cudaError(127); +} +impl cudaError { + #[doc = " This indicates that the device kernel image is invalid."] + pub const cudaErrorInvalidKernelImage: cudaError = cudaError(200); +} +impl cudaError { + #[doc = " This most frequently indicates that there is no context bound to the\n current thread. This can also be returned if the context passed to an\n API call is not a valid handle (such as a context that has had\n ::cuCtxDestroy() invoked on it). This can also be returned if a user\n mixes different API versions (i.e. 3010 context with 3020 API calls).\n See ::cuCtxGetApiVersion() for more details."] + pub const cudaErrorDeviceUninitialized: cudaError = cudaError(201); +} +impl cudaError { + #[doc = " This indicates that the buffer object could not be mapped."] + pub const cudaErrorMapBufferObjectFailed: cudaError = cudaError(205); +} +impl cudaError { + #[doc = " This indicates that the buffer object could not be unmapped."] + pub const cudaErrorUnmapBufferObjectFailed: cudaError = cudaError(206); +} +impl cudaError { + #[doc = " This indicates that the specified array is currently mapped and thus\n cannot be destroyed."] + pub const cudaErrorArrayIsMapped: cudaError = cudaError(207); +} +impl cudaError { + #[doc = " This indicates that the resource is already mapped."] + pub const cudaErrorAlreadyMapped: cudaError = cudaError(208); +} +impl cudaError { + #[doc = " This indicates that there is no kernel image available that is suitable\n for the device. This can occur when a user specifies code generation\n options for a particular CUDA source file that do not include the\n corresponding device configuration."] + pub const cudaErrorNoKernelImageForDevice: cudaError = cudaError(209); +} +impl cudaError { + #[doc = " This indicates that a resource has already been acquired."] + pub const cudaErrorAlreadyAcquired: cudaError = cudaError(210); +} +impl cudaError { + #[doc = " This indicates that a resource is not mapped."] + pub const cudaErrorNotMapped: cudaError = cudaError(211); +} +impl cudaError { + #[doc = " This indicates that a mapped resource is not available for access as an\n array."] + pub const cudaErrorNotMappedAsArray: cudaError = cudaError(212); +} +impl cudaError { + #[doc = " This indicates that a mapped resource is not available for access as a\n pointer."] + pub const cudaErrorNotMappedAsPointer: cudaError = cudaError(213); +} +impl cudaError { + #[doc = " This indicates that an uncorrectable ECC error was detected during\n execution."] + pub const cudaErrorECCUncorrectable: cudaError = cudaError(214); +} +impl cudaError { + #[doc = " This indicates that the ::cudaLimit passed to the API call is not\n supported by the active device."] + pub const cudaErrorUnsupportedLimit: cudaError = cudaError(215); +} +impl cudaError { + #[doc = " This indicates that a call tried to access an exclusive-thread device that\n is already in use by a different thread."] + pub const cudaErrorDeviceAlreadyInUse: cudaError = cudaError(216); +} +impl cudaError { + #[doc = " This error indicates that P2P access is not supported across the given\n devices."] + pub const cudaErrorPeerAccessUnsupported: cudaError = cudaError(217); +} +impl cudaError { + #[doc = " A PTX compilation failed. The runtime may fall back to compiling PTX if\n an application does not contain a suitable binary for the current device."] + pub const cudaErrorInvalidPtx: cudaError = cudaError(218); +} +impl cudaError { + #[doc = " This indicates an error with the OpenGL or DirectX context."] + pub const cudaErrorInvalidGraphicsContext: cudaError = cudaError(219); +} +impl cudaError { + #[doc = " This indicates that an uncorrectable NVLink error was detected during the\n execution."] + pub const cudaErrorNvlinkUncorrectable: cudaError = cudaError(220); +} +impl cudaError { + #[doc = " This indicates that the PTX JIT compiler library was not found. The JIT Compiler\n library is used for PTX compilation. The runtime may fall back to compiling PTX\n if an application does not contain a suitable binary for the current device."] + pub const cudaErrorJitCompilerNotFound: cudaError = cudaError(221); +} +impl cudaError { + #[doc = " This indicates that the provided PTX was compiled with an unsupported toolchain.\n The most common reason for this, is the PTX was generated by a compiler newer\n than what is supported by the CUDA driver and PTX JIT compiler."] + pub const cudaErrorUnsupportedPtxVersion: cudaError = cudaError(222); +} +impl cudaError { + #[doc = " This indicates that the JIT compilation was disabled. The JIT compilation compiles\n PTX. The runtime may fall back to compiling PTX if an application does not contain\n a suitable binary for the current device."] + pub const cudaErrorJitCompilationDisabled: cudaError = cudaError(223); +} +impl cudaError { + #[doc = " This indicates that the provided execution affinity is not supported by the device."] + pub const cudaErrorUnsupportedExecAffinity: cudaError = cudaError(224); +} +impl cudaError { + #[doc = " This indicates that the device kernel source is invalid."] + pub const cudaErrorInvalidSource: cudaError = cudaError(300); +} +impl cudaError { + #[doc = " This indicates that the file specified was not found."] + pub const cudaErrorFileNotFound: cudaError = cudaError(301); +} +impl cudaError { + #[doc = " This indicates that a link to a shared object failed to resolve."] + pub const cudaErrorSharedObjectSymbolNotFound: cudaError = cudaError(302); +} +impl cudaError { + #[doc = " This indicates that initialization of a shared object failed."] + pub const cudaErrorSharedObjectInitFailed: cudaError = cudaError(303); +} +impl cudaError { + #[doc = " This error indicates that an OS call failed."] + pub const cudaErrorOperatingSystem: cudaError = cudaError(304); +} +impl cudaError { + #[doc = " This indicates that a resource handle passed to the API call was not\n valid. Resource handles are opaque types like ::cudaStream_t and\n ::cudaEvent_t."] + pub const cudaErrorInvalidResourceHandle: cudaError = cudaError(400); +} +impl cudaError { + #[doc = " This indicates that a resource required by the API call is not in a\n valid state to perform the requested operation."] + pub const cudaErrorIllegalState: cudaError = cudaError(401); +} +impl cudaError { + #[doc = " This indicates that a named symbol was not found. Examples of symbols\n are global/constant variable names, driver function names, texture names,\n and surface names."] + pub const cudaErrorSymbolNotFound: cudaError = cudaError(500); +} +impl cudaError { + #[doc = " This indicates that asynchronous operations issued previously have not\n completed yet. This result is not actually an error, but must be indicated\n differently than ::cudaSuccess (which indicates completion). Calls that\n may return this value include ::cudaEventQuery() and ::cudaStreamQuery()."] + pub const cudaErrorNotReady: cudaError = cudaError(600); +} +impl cudaError { + #[doc = " The device encountered a load or store instruction on an invalid memory address.\n This leaves the process in an inconsistent state and any further CUDA work\n will return the same error. To continue using CUDA, the process must be terminated\n and relaunched."] + pub const cudaErrorIllegalAddress: cudaError = cudaError(700); +} +impl cudaError { + #[doc = " This indicates that a launch did not occur because it did not have\n appropriate resources. Although this error is similar to\n ::cudaErrorInvalidConfiguration, this error usually indicates that the\n user has attempted to pass too many arguments to the device kernel, or the\n kernel launch specifies too many threads for the kernel's register count."] + pub const cudaErrorLaunchOutOfResources: cudaError = cudaError(701); +} +impl cudaError { + #[doc = " This indicates that the device kernel took too long to execute. This can\n only occur if timeouts are enabled - see the device property\n \\ref ::cudaDeviceProp::kernelExecTimeoutEnabled \"kernelExecTimeoutEnabled\"\n for more information.\n This leaves the process in an inconsistent state and any further CUDA work\n will return the same error. To continue using CUDA, the process must be terminated\n and relaunched."] + pub const cudaErrorLaunchTimeout: cudaError = cudaError(702); +} +impl cudaError { + #[doc = " This error indicates a kernel launch that uses an incompatible texturing\n mode."] + pub const cudaErrorLaunchIncompatibleTexturing: cudaError = cudaError(703); +} +impl cudaError { + #[doc = " This error indicates that a call to ::cudaDeviceEnablePeerAccess() is\n trying to re-enable peer addressing on from a context which has already\n had peer addressing enabled."] + pub const cudaErrorPeerAccessAlreadyEnabled: cudaError = cudaError(704); +} +impl cudaError { + #[doc = " This error indicates that ::cudaDeviceDisablePeerAccess() is trying to\n disable peer addressing which has not been enabled yet via\n ::cudaDeviceEnablePeerAccess()."] + pub const cudaErrorPeerAccessNotEnabled: cudaError = cudaError(705); +} +impl cudaError { + #[doc = " This indicates that the user has called ::cudaSetValidDevices(),\n ::cudaSetDeviceFlags(), ::cudaD3D9SetDirect3DDevice(),\n ::cudaD3D10SetDirect3DDevice, ::cudaD3D11SetDirect3DDevice(), or\n ::cudaVDPAUSetVDPAUDevice() after initializing the CUDA runtime by\n calling non-device management operations (allocating memory and\n launching kernels are examples of non-device management operations).\n This error can also be returned if using runtime/driver\n interoperability and there is an existing ::CUcontext active on the\n host thread."] + pub const cudaErrorSetOnActiveProcess: cudaError = cudaError(708); +} +impl cudaError { + #[doc = " This error indicates that the context current to the calling thread\n has been destroyed using ::cuCtxDestroy, or is a primary context which\n has not yet been initialized."] + pub const cudaErrorContextIsDestroyed: cudaError = cudaError(709); +} +impl cudaError { + #[doc = " An assert triggered in device code during kernel execution. The device\n cannot be used again. All existing allocations are invalid. To continue\n using CUDA, the process must be terminated and relaunched."] + pub const cudaErrorAssert: cudaError = cudaError(710); +} +impl cudaError { + #[doc = " This error indicates that the hardware resources required to enable\n peer access have been exhausted for one or more of the devices\n passed to ::cudaEnablePeerAccess()."] + pub const cudaErrorTooManyPeers: cudaError = cudaError(711); +} +impl cudaError { + #[doc = " This error indicates that the memory range passed to ::cudaHostRegister()\n has already been registered."] + pub const cudaErrorHostMemoryAlreadyRegistered: cudaError = cudaError(712); +} +impl cudaError { + #[doc = " This error indicates that the pointer passed to ::cudaHostUnregister()\n does not correspond to any currently registered memory region."] + pub const cudaErrorHostMemoryNotRegistered: cudaError = cudaError(713); +} +impl cudaError { + #[doc = " Device encountered an error in the call stack during kernel execution,\n possibly due to stack corruption or exceeding the stack size limit.\n This leaves the process in an inconsistent state and any further CUDA work\n will return the same error. To continue using CUDA, the process must be terminated\n and relaunched."] + pub const cudaErrorHardwareStackError: cudaError = cudaError(714); +} +impl cudaError { + #[doc = " The device encountered an illegal instruction during kernel execution\n This leaves the process in an inconsistent state and any further CUDA work\n will return the same error. To continue using CUDA, the process must be terminated\n and relaunched."] + pub const cudaErrorIllegalInstruction: cudaError = cudaError(715); +} +impl cudaError { + #[doc = " The device encountered a load or store instruction\n on a memory address which is not aligned.\n This leaves the process in an inconsistent state and any further CUDA work\n will return the same error. To continue using CUDA, the process must be terminated\n and relaunched."] + pub const cudaErrorMisalignedAddress: cudaError = cudaError(716); +} +impl cudaError { + #[doc = " While executing a kernel, the device encountered an instruction\n which can only operate on memory locations in certain address spaces\n (global, shared, or local), but was supplied a memory address not\n belonging to an allowed address space.\n This leaves the process in an inconsistent state and any further CUDA work\n will return the same error. To continue using CUDA, the process must be terminated\n and relaunched."] + pub const cudaErrorInvalidAddressSpace: cudaError = cudaError(717); +} +impl cudaError { + #[doc = " The device encountered an invalid program counter.\n This leaves the process in an inconsistent state and any further CUDA work\n will return the same error. To continue using CUDA, the process must be terminated\n and relaunched."] + pub const cudaErrorInvalidPc: cudaError = cudaError(718); +} +impl cudaError { + #[doc = " An exception occurred on the device while executing a kernel. Common\n causes include dereferencing an invalid device pointer and accessing\n out of bounds shared memory. Less common cases can be system specific - more\n information about these cases can be found in the system specific user guide.\n This leaves the process in an inconsistent state and any further CUDA work\n will return the same error. To continue using CUDA, the process must be terminated\n and relaunched."] + pub const cudaErrorLaunchFailure: cudaError = cudaError(719); +} +impl cudaError { + #[doc = " This error indicates that the number of blocks launched per grid for a kernel that was\n launched via either ::cudaLaunchCooperativeKernel or ::cudaLaunchCooperativeKernelMultiDevice\n exceeds the maximum number of blocks as allowed by ::cudaOccupancyMaxActiveBlocksPerMultiprocessor\n or ::cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags times the number of multiprocessors\n as specified by the device attribute ::cudaDevAttrMultiProcessorCount."] + pub const cudaErrorCooperativeLaunchTooLarge: cudaError = cudaError(720); +} +impl cudaError { + #[doc = " This error indicates the attempted operation is not permitted."] + pub const cudaErrorNotPermitted: cudaError = cudaError(800); +} +impl cudaError { + #[doc = " This error indicates the attempted operation is not supported\n on the current system or device."] + pub const cudaErrorNotSupported: cudaError = cudaError(801); +} +impl cudaError { + #[doc = " This error indicates that the system is not yet ready to start any CUDA\n work. To continue using CUDA, verify the system configuration is in a\n valid state and all required driver daemons are actively running.\n More information about this error can be found in the system specific\n user guide."] + pub const cudaErrorSystemNotReady: cudaError = cudaError(802); +} +impl cudaError { + #[doc = " This error indicates that there is a mismatch between the versions of\n the display driver and the CUDA driver. Refer to the compatibility documentation\n for supported versions."] + pub const cudaErrorSystemDriverMismatch: cudaError = cudaError(803); +} +impl cudaError { + #[doc = " This error indicates that the system was upgraded to run with forward compatibility\n but the visible hardware detected by CUDA does not support this configuration.\n Refer to the compatibility documentation for the supported hardware matrix or ensure\n that only supported hardware is visible during initialization via the CUDA_VISIBLE_DEVICES\n environment variable."] + pub const cudaErrorCompatNotSupportedOnDevice: cudaError = cudaError(804); +} +impl cudaError { + #[doc = " This error indicates that the MPS client failed to connect to the MPS control daemon or the MPS server."] + pub const cudaErrorMpsConnectionFailed: cudaError = cudaError(805); +} +impl cudaError { + #[doc = " This error indicates that the remote procedural call between the MPS server and the MPS client failed."] + pub const cudaErrorMpsRpcFailure: cudaError = cudaError(806); +} +impl cudaError { + #[doc = " This error indicates that the MPS server is not ready to accept new MPS client requests.\n This error can be returned when the MPS server is in the process of recovering from a fatal failure."] + pub const cudaErrorMpsServerNotReady: cudaError = cudaError(807); +} +impl cudaError { + #[doc = " This error indicates that the hardware resources required to create MPS client have been exhausted."] + pub const cudaErrorMpsMaxClientsReached: cudaError = cudaError(808); +} +impl cudaError { + #[doc = " This error indicates the the hardware resources required to device connections have been exhausted."] + pub const cudaErrorMpsMaxConnectionsReached: cudaError = cudaError(809); +} +impl cudaError { + #[doc = " This error indicates that the MPS client has been terminated by the server. To continue using CUDA, the process must be terminated and relaunched."] + pub const cudaErrorMpsClientTerminated: cudaError = cudaError(810); +} +impl cudaError { + #[doc = " The operation is not permitted when the stream is capturing."] + pub const cudaErrorStreamCaptureUnsupported: cudaError = cudaError(900); +} +impl cudaError { + #[doc = " The current capture sequence on the stream has been invalidated due to\n a previous error."] + pub const cudaErrorStreamCaptureInvalidated: cudaError = cudaError(901); +} +impl cudaError { + #[doc = " The operation would have resulted in a merge of two independent capture\n sequences."] + pub const cudaErrorStreamCaptureMerge: cudaError = cudaError(902); +} +impl cudaError { + #[doc = " The capture was not initiated in this stream."] + pub const cudaErrorStreamCaptureUnmatched: cudaError = cudaError(903); +} +impl cudaError { + #[doc = " The capture sequence contains a fork that was not joined to the primary\n stream."] + pub const cudaErrorStreamCaptureUnjoined: cudaError = cudaError(904); +} +impl cudaError { + #[doc = " A dependency would have been created which crosses the capture sequence\n boundary. Only implicit in-stream ordering dependencies are allowed to\n cross the boundary."] + pub const cudaErrorStreamCaptureIsolation: cudaError = cudaError(905); +} +impl cudaError { + #[doc = " The operation would have resulted in a disallowed implicit dependency on\n a current capture sequence from cudaStreamLegacy."] + pub const cudaErrorStreamCaptureImplicit: cudaError = cudaError(906); +} +impl cudaError { + #[doc = " The operation is not permitted on an event which was last recorded in a\n capturing stream."] + pub const cudaErrorCapturedEvent: cudaError = cudaError(907); +} +impl cudaError { + #[doc = " A stream capture sequence not initiated with the ::cudaStreamCaptureModeRelaxed\n argument to ::cudaStreamBeginCapture was passed to ::cudaStreamEndCapture in a\n different thread."] + pub const cudaErrorStreamCaptureWrongThread: cudaError = cudaError(908); +} +impl cudaError { + #[doc = " This indicates that the wait operation has timed out."] + pub const cudaErrorTimeout: cudaError = cudaError(909); +} +impl cudaError { + #[doc = " This error indicates that the graph update was not performed because it included\n changes which violated constraints specific to instantiated graph update."] + pub const cudaErrorGraphExecUpdateFailure: cudaError = cudaError(910); +} +impl cudaError { + #[doc = " This indicates that an async error has occurred in a device outside of CUDA.\n If CUDA was waiting for an external device's signal before consuming shared data,\n the external device signaled an error indicating that the data is not valid for\n consumption. This leaves the process in an inconsistent state and any further CUDA\n work will return the same error. To continue using CUDA, the process must be\n terminated and relaunched."] + pub const cudaErrorExternalDevice: cudaError = cudaError(911); +} +impl cudaError { + #[doc = " This indicates that a kernel launch error has occurred due to cluster\n misconfiguration."] + pub const cudaErrorInvalidClusterSize: cudaError = cudaError(912); +} +impl cudaError { + #[doc = " This indicates that an unknown internal error has occurred."] + pub const cudaErrorUnknown: cudaError = cudaError(999); +} +impl cudaError { + #[doc = " Any unhandled CUDA driver error is added to this value and returned via\n the runtime. Production releases of CUDA should not return such errors.\n \\deprecated\n This error return is deprecated as of CUDA 4.1."] + pub const cudaErrorApiFailureBase: cudaError = cudaError(10000); +} +#[repr(transparent)] +#[doc = " CUDA error types"] +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct cudaError(pub ::std::os::raw::c_uint); +impl cudaChannelFormatKind { + #[doc = "< Signed channel format"] + pub const cudaChannelFormatKindSigned: cudaChannelFormatKind = cudaChannelFormatKind(0); +} +impl cudaChannelFormatKind { + #[doc = "< Unsigned channel format"] + pub const cudaChannelFormatKindUnsigned: cudaChannelFormatKind = cudaChannelFormatKind(1); +} +impl cudaChannelFormatKind { + #[doc = "< Float channel format"] + pub const cudaChannelFormatKindFloat: cudaChannelFormatKind = cudaChannelFormatKind(2); +} +impl cudaChannelFormatKind { + #[doc = "< No channel format"] + pub const cudaChannelFormatKindNone: cudaChannelFormatKind = cudaChannelFormatKind(3); +} +impl cudaChannelFormatKind { + #[doc = "< Unsigned 8-bit integers, planar 4:2:0 YUV format"] + pub const cudaChannelFormatKindNV12: cudaChannelFormatKind = cudaChannelFormatKind(4); +} +impl cudaChannelFormatKind { + #[doc = "< 1 channel unsigned 8-bit normalized integer"] + pub const cudaChannelFormatKindUnsignedNormalized8X1: cudaChannelFormatKind = + cudaChannelFormatKind(5); +} +impl cudaChannelFormatKind { + #[doc = "< 2 channel unsigned 8-bit normalized integer"] + pub const cudaChannelFormatKindUnsignedNormalized8X2: cudaChannelFormatKind = + cudaChannelFormatKind(6); +} +impl cudaChannelFormatKind { + #[doc = "< 4 channel unsigned 8-bit normalized integer"] + pub const cudaChannelFormatKindUnsignedNormalized8X4: cudaChannelFormatKind = + cudaChannelFormatKind(7); +} +impl cudaChannelFormatKind { + #[doc = "< 1 channel unsigned 16-bit normalized integer"] + pub const cudaChannelFormatKindUnsignedNormalized16X1: cudaChannelFormatKind = + cudaChannelFormatKind(8); +} +impl cudaChannelFormatKind { + #[doc = "< 2 channel unsigned 16-bit normalized integer"] + pub const cudaChannelFormatKindUnsignedNormalized16X2: cudaChannelFormatKind = + cudaChannelFormatKind(9); +} +impl cudaChannelFormatKind { + #[doc = "< 4 channel unsigned 16-bit normalized integer"] + pub const cudaChannelFormatKindUnsignedNormalized16X4: cudaChannelFormatKind = + cudaChannelFormatKind(10); +} +impl cudaChannelFormatKind { + #[doc = "< 1 channel signed 8-bit normalized integer"] + pub const cudaChannelFormatKindSignedNormalized8X1: cudaChannelFormatKind = + cudaChannelFormatKind(11); +} +impl cudaChannelFormatKind { + #[doc = "< 2 channel signed 8-bit normalized integer"] + pub const cudaChannelFormatKindSignedNormalized8X2: cudaChannelFormatKind = + cudaChannelFormatKind(12); +} +impl cudaChannelFormatKind { + #[doc = "< 4 channel signed 8-bit normalized integer"] + pub const cudaChannelFormatKindSignedNormalized8X4: cudaChannelFormatKind = + cudaChannelFormatKind(13); +} +impl cudaChannelFormatKind { + #[doc = "< 1 channel signed 16-bit normalized integer"] + pub const cudaChannelFormatKindSignedNormalized16X1: cudaChannelFormatKind = + cudaChannelFormatKind(14); +} +impl cudaChannelFormatKind { + #[doc = "< 2 channel signed 16-bit normalized integer"] + pub const cudaChannelFormatKindSignedNormalized16X2: cudaChannelFormatKind = + cudaChannelFormatKind(15); +} +impl cudaChannelFormatKind { + #[doc = "< 4 channel signed 16-bit normalized integer"] + pub const cudaChannelFormatKindSignedNormalized16X4: cudaChannelFormatKind = + cudaChannelFormatKind(16); +} +impl cudaChannelFormatKind { + #[doc = "< 4 channel unsigned normalized block-compressed (BC1 compression) format"] + pub const cudaChannelFormatKindUnsignedBlockCompressed1: cudaChannelFormatKind = + cudaChannelFormatKind(17); +} +impl cudaChannelFormatKind { + #[doc = "< 4 channel unsigned normalized block-compressed (BC1 compression) format with sRGB encoding"] + pub const cudaChannelFormatKindUnsignedBlockCompressed1SRGB: cudaChannelFormatKind = + cudaChannelFormatKind(18); +} +impl cudaChannelFormatKind { + #[doc = "< 4 channel unsigned normalized block-compressed (BC2 compression) format"] + pub const cudaChannelFormatKindUnsignedBlockCompressed2: cudaChannelFormatKind = + cudaChannelFormatKind(19); +} +impl cudaChannelFormatKind { + #[doc = "< 4 channel unsigned normalized block-compressed (BC2 compression) format with sRGB encoding"] + pub const cudaChannelFormatKindUnsignedBlockCompressed2SRGB: cudaChannelFormatKind = + cudaChannelFormatKind(20); +} +impl cudaChannelFormatKind { + #[doc = "< 4 channel unsigned normalized block-compressed (BC3 compression) format"] + pub const cudaChannelFormatKindUnsignedBlockCompressed3: cudaChannelFormatKind = + cudaChannelFormatKind(21); +} +impl cudaChannelFormatKind { + #[doc = "< 4 channel unsigned normalized block-compressed (BC3 compression) format with sRGB encoding"] + pub const cudaChannelFormatKindUnsignedBlockCompressed3SRGB: cudaChannelFormatKind = + cudaChannelFormatKind(22); +} +impl cudaChannelFormatKind { + #[doc = "< 1 channel unsigned normalized block-compressed (BC4 compression) format"] + pub const cudaChannelFormatKindUnsignedBlockCompressed4: cudaChannelFormatKind = + cudaChannelFormatKind(23); +} +impl cudaChannelFormatKind { + #[doc = "< 1 channel signed normalized block-compressed (BC4 compression) format"] + pub const cudaChannelFormatKindSignedBlockCompressed4: cudaChannelFormatKind = + cudaChannelFormatKind(24); +} +impl cudaChannelFormatKind { + #[doc = "< 2 channel unsigned normalized block-compressed (BC5 compression) format"] + pub const cudaChannelFormatKindUnsignedBlockCompressed5: cudaChannelFormatKind = + cudaChannelFormatKind(25); +} +impl cudaChannelFormatKind { + #[doc = "< 2 channel signed normalized block-compressed (BC5 compression) format"] + pub const cudaChannelFormatKindSignedBlockCompressed5: cudaChannelFormatKind = + cudaChannelFormatKind(26); +} +impl cudaChannelFormatKind { + #[doc = "< 3 channel unsigned half-float block-compressed (BC6H compression) format"] + pub const cudaChannelFormatKindUnsignedBlockCompressed6H: cudaChannelFormatKind = + cudaChannelFormatKind(27); +} +impl cudaChannelFormatKind { + #[doc = "< 3 channel signed half-float block-compressed (BC6H compression) format"] + pub const cudaChannelFormatKindSignedBlockCompressed6H: cudaChannelFormatKind = + cudaChannelFormatKind(28); +} +impl cudaChannelFormatKind { + #[doc = "< 4 channel unsigned normalized block-compressed (BC7 compression) format"] + pub const cudaChannelFormatKindUnsignedBlockCompressed7: cudaChannelFormatKind = + cudaChannelFormatKind(29); +} +impl cudaChannelFormatKind { + #[doc = "< 4 channel unsigned normalized block-compressed (BC7 compression) format with sRGB encoding"] + pub const cudaChannelFormatKindUnsignedBlockCompressed7SRGB: cudaChannelFormatKind = + cudaChannelFormatKind(30); +} +#[repr(transparent)] +#[doc = " Channel format kind"] +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct cudaChannelFormatKind(pub ::std::os::raw::c_uint); +#[doc = " CUDA Channel format descriptor"] +#[repr(C)] +#[derive(Copy, Clone)] +pub struct cudaChannelFormatDesc { + #[doc = "< x"] + pub x: ::std::os::raw::c_int, + #[doc = "< y"] + pub y: ::std::os::raw::c_int, + #[doc = "< z"] + pub z: ::std::os::raw::c_int, + #[doc = "< w"] + pub w: ::std::os::raw::c_int, + #[doc = "< Channel format kind"] + pub f: cudaChannelFormatKind, +} +#[repr(C)] +#[derive(Copy, Clone)] +pub struct cudaArray { + _unused: [u8; 0], +} +#[doc = " CUDA array"] +pub type cudaArray_t = *mut cudaArray; +#[doc = " CUDA array (as source copy argument)"] +pub type cudaArray_const_t = *const cudaArray; +#[repr(C)] +#[derive(Copy, Clone)] +pub struct cudaMipmappedArray { + _unused: [u8; 0], +} +#[doc = " CUDA mipmapped array"] +pub type cudaMipmappedArray_t = *mut cudaMipmappedArray; +#[doc = " CUDA mipmapped array (as source argument)"] +pub type cudaMipmappedArray_const_t = *const cudaMipmappedArray; +#[doc = " Sparse CUDA array and CUDA mipmapped array properties"] +#[repr(C)] +#[derive(Copy, Clone)] +pub struct cudaArraySparseProperties { + pub tileExtent: cudaArraySparseProperties__bindgen_ty_1, + #[doc = "< First mip level at which the mip tail begins"] + pub miptailFirstLevel: ::std::os::raw::c_uint, + #[doc = "< Total size of the mip tail."] + pub miptailSize: ::std::os::raw::c_ulonglong, + #[doc = "< Flags will either be zero or ::cudaArraySparsePropertiesSingleMipTail"] + pub flags: ::std::os::raw::c_uint, + pub reserved: [::std::os::raw::c_uint; 4usize], +} +#[repr(C)] +#[derive(Copy, Clone)] +pub struct cudaArraySparseProperties__bindgen_ty_1 { + #[doc = "< Tile width in elements"] + pub width: ::std::os::raw::c_uint, + #[doc = "< Tile height in elements"] + pub height: ::std::os::raw::c_uint, + #[doc = "< Tile depth in elements"] + pub depth: ::std::os::raw::c_uint, +} +#[doc = " CUDA array and CUDA mipmapped array memory requirements"] +#[repr(C)] +#[derive(Copy, Clone)] +pub struct cudaArrayMemoryRequirements { + #[doc = "< Total size of the array."] + pub size: usize, + #[doc = "< Alignment necessary for mapping the array."] + pub alignment: usize, + pub reserved: [::std::os::raw::c_uint; 4usize], +} +impl cudaMemoryType { + #[doc = "< Unregistered memory"] + pub const cudaMemoryTypeUnregistered: cudaMemoryType = cudaMemoryType(0); +} +impl cudaMemoryType { + #[doc = "< Host memory"] + pub const cudaMemoryTypeHost: cudaMemoryType = cudaMemoryType(1); +} +impl cudaMemoryType { + #[doc = "< Device memory"] + pub const cudaMemoryTypeDevice: cudaMemoryType = cudaMemoryType(2); +} +impl cudaMemoryType { + #[doc = "< Managed memory"] + pub const cudaMemoryTypeManaged: cudaMemoryType = cudaMemoryType(3); +} +#[repr(transparent)] +#[doc = " CUDA memory types"] +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct cudaMemoryType(pub ::std::os::raw::c_uint); +impl cudaMemcpyKind { + #[doc = "< Host -> Host"] + pub const cudaMemcpyHostToHost: cudaMemcpyKind = cudaMemcpyKind(0); +} +impl cudaMemcpyKind { + #[doc = "< Host -> Device"] + pub const cudaMemcpyHostToDevice: cudaMemcpyKind = cudaMemcpyKind(1); +} +impl cudaMemcpyKind { + #[doc = "< Device -> Host"] + pub const cudaMemcpyDeviceToHost: cudaMemcpyKind = cudaMemcpyKind(2); +} +impl cudaMemcpyKind { + #[doc = "< Device -> Device"] + pub const cudaMemcpyDeviceToDevice: cudaMemcpyKind = cudaMemcpyKind(3); +} +impl cudaMemcpyKind { + #[doc = "< Direction of the transfer is inferred from the pointer values. Requires unified virtual addressing"] + pub const cudaMemcpyDefault: cudaMemcpyKind = cudaMemcpyKind(4); +} +#[repr(transparent)] +#[doc = " CUDA memory copy types"] +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct cudaMemcpyKind(pub ::std::os::raw::c_uint); +#[doc = " CUDA Pitched memory pointer\n\n \\sa ::make_cudaPitchedPtr"] +#[repr(C)] +#[derive(Copy, Clone)] +pub struct cudaPitchedPtr { + #[doc = "< Pointer to allocated memory"] + pub ptr: *mut ::std::os::raw::c_void, + #[doc = "< Pitch of allocated memory in bytes"] + pub pitch: usize, + #[doc = "< Logical width of allocation in elements"] + pub xsize: usize, + #[doc = "< Logical height of allocation in elements"] + pub ysize: usize, +} +#[doc = " CUDA extent\n\n \\sa ::make_cudaExtent"] +#[repr(C)] +#[derive(Copy, Clone)] +pub struct cudaExtent { + #[doc = "< Width in elements when referring to array memory, in bytes when referring to linear memory"] + pub width: usize, + #[doc = "< Height in elements"] + pub height: usize, + #[doc = "< Depth in elements"] + pub depth: usize, +} +#[doc = " CUDA 3D position\n\n \\sa ::make_cudaPos"] +#[repr(C)] +#[derive(Copy, Clone)] +pub struct cudaPos { + #[doc = "< x"] + pub x: usize, + #[doc = "< y"] + pub y: usize, + #[doc = "< z"] + pub z: usize, +} +#[doc = " CUDA 3D memory copying parameters"] +#[repr(C)] +#[derive(Copy, Clone)] +pub struct cudaMemcpy3DParms { + #[doc = "< Source memory address"] + pub srcArray: cudaArray_t, + #[doc = "< Source position offset"] + pub srcPos: cudaPos, + #[doc = "< Pitched source memory address"] + pub srcPtr: cudaPitchedPtr, + #[doc = "< Destination memory address"] + pub dstArray: cudaArray_t, + #[doc = "< Destination position offset"] + pub dstPos: cudaPos, + #[doc = "< Pitched destination memory address"] + pub dstPtr: cudaPitchedPtr, + #[doc = "< Requested memory copy size"] + pub extent: cudaExtent, + #[doc = "< Type of transfer"] + pub kind: cudaMemcpyKind, +} +#[doc = " CUDA 3D cross-device memory copying parameters"] +#[repr(C)] +#[derive(Copy, Clone)] +pub struct cudaMemcpy3DPeerParms { + #[doc = "< Source memory address"] + pub srcArray: cudaArray_t, + #[doc = "< Source position offset"] + pub srcPos: cudaPos, + #[doc = "< Pitched source memory address"] + pub srcPtr: cudaPitchedPtr, + #[doc = "< Source device"] + pub srcDevice: ::std::os::raw::c_int, + #[doc = "< Destination memory address"] + pub dstArray: cudaArray_t, + #[doc = "< Destination position offset"] + pub dstPos: cudaPos, + #[doc = "< Pitched destination memory address"] + pub dstPtr: cudaPitchedPtr, + #[doc = "< Destination device"] + pub dstDevice: ::std::os::raw::c_int, + #[doc = "< Requested memory copy size"] + pub extent: cudaExtent, +} +#[doc = " CUDA Memset node parameters"] +#[repr(C)] +#[derive(Copy, Clone)] +pub struct cudaMemsetParams { + #[doc = "< Destination device pointer"] + pub dst: *mut ::std::os::raw::c_void, + #[doc = "< Pitch of destination device pointer. Unused if height is 1"] + pub pitch: usize, + #[doc = "< Value to be set"] + pub value: ::std::os::raw::c_uint, + #[doc = "< Size of each element in bytes. Must be 1, 2, or 4."] + pub elementSize: ::std::os::raw::c_uint, + #[doc = "< Width of the row in elements"] + pub width: usize, + #[doc = "< Number of rows"] + pub height: usize, +} +impl cudaAccessProperty { + #[doc = "< Normal cache persistence."] + pub const cudaAccessPropertyNormal: cudaAccessProperty = cudaAccessProperty(0); +} +impl cudaAccessProperty { + #[doc = "< Streaming access is less likely to persit from cache."] + pub const cudaAccessPropertyStreaming: cudaAccessProperty = cudaAccessProperty(1); +} +impl cudaAccessProperty { + #[doc = "< Persisting access is more likely to persist in cache."] + pub const cudaAccessPropertyPersisting: cudaAccessProperty = cudaAccessProperty(2); +} +#[repr(transparent)] +#[doc = " Specifies performance hint with ::cudaAccessPolicyWindow for hitProp and missProp members."] +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct cudaAccessProperty(pub ::std::os::raw::c_uint); +#[doc = " Specifies an access policy for a window, a contiguous extent of memory\n beginning at base_ptr and ending at base_ptr + num_bytes.\n Partition into many segments and assign segments such that.\n sum of \"hit segments\" / window == approx. ratio.\n sum of \"miss segments\" / window == approx 1-ratio.\n Segments and ratio specifications are fitted to the capabilities of\n the architecture.\n Accesses in a hit segment apply the hitProp access policy.\n Accesses in a miss segment apply the missProp access policy."] +#[repr(C)] +#[derive(Copy, Clone)] +pub struct cudaAccessPolicyWindow { + #[doc = "< Starting address of the access policy window. CUDA driver may align it."] + pub base_ptr: *mut ::std::os::raw::c_void, + #[doc = "< Size in bytes of the window policy. CUDA driver may restrict the maximum size and alignment."] + pub num_bytes: usize, + #[doc = "< hitRatio specifies percentage of lines assigned hitProp, rest are assigned missProp."] + pub hitRatio: f32, + #[doc = "< ::CUaccessProperty set for hit."] + pub hitProp: cudaAccessProperty, + #[doc = "< ::CUaccessProperty set for miss. Must be either NORMAL or STREAMING."] + pub missProp: cudaAccessProperty, +} +#[doc = " CUDA host function\n \\param userData Argument value passed to the function"] +pub type cudaHostFn_t = + ::std::option::Option<unsafe extern "C" fn(userData: *mut ::std::os::raw::c_void)>; +#[doc = " CUDA host node parameters"] +#[repr(C)] +#[derive(Copy, Clone)] +pub struct cudaHostNodeParams { + #[doc = "< The function to call when the node executes"] + pub fn_: cudaHostFn_t, + #[doc = "< Argument to pass to the function"] + pub userData: *mut ::std::os::raw::c_void, +} +impl cudaStreamCaptureStatus { + #[doc = "< Stream is not capturing"] + pub const cudaStreamCaptureStatusNone: cudaStreamCaptureStatus = cudaStreamCaptureStatus(0); +} +impl cudaStreamCaptureStatus { + #[doc = "< Stream is actively capturing"] + pub const cudaStreamCaptureStatusActive: cudaStreamCaptureStatus = cudaStreamCaptureStatus(1); +} +impl cudaStreamCaptureStatus { + #[doc = "< Stream is part of a capture sequence that\nhas been invalidated, but not terminated"] + pub const cudaStreamCaptureStatusInvalidated: cudaStreamCaptureStatus = + cudaStreamCaptureStatus(2); +} +#[repr(transparent)] +#[doc = " Possible stream capture statuses returned by ::cudaStreamIsCapturing"] +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct cudaStreamCaptureStatus(pub ::std::os::raw::c_uint); +impl cudaStreamCaptureMode { + pub const cudaStreamCaptureModeGlobal: cudaStreamCaptureMode = cudaStreamCaptureMode(0); +} +impl cudaStreamCaptureMode { + pub const cudaStreamCaptureModeThreadLocal: cudaStreamCaptureMode = cudaStreamCaptureMode(1); +} +impl cudaStreamCaptureMode { + pub const cudaStreamCaptureModeRelaxed: cudaStreamCaptureMode = cudaStreamCaptureMode(2); +} +#[repr(transparent)] +#[doc = " Possible modes for stream capture thread interactions. For more details see\n ::cudaStreamBeginCapture and ::cudaThreadExchangeStreamCaptureMode"] +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct cudaStreamCaptureMode(pub ::std::os::raw::c_uint); +impl cudaSynchronizationPolicy { + pub const cudaSyncPolicyAuto: cudaSynchronizationPolicy = cudaSynchronizationPolicy(1); +} +impl cudaSynchronizationPolicy { + pub const cudaSyncPolicySpin: cudaSynchronizationPolicy = cudaSynchronizationPolicy(2); +} +impl cudaSynchronizationPolicy { + pub const cudaSyncPolicyYield: cudaSynchronizationPolicy = cudaSynchronizationPolicy(3); +} +impl cudaSynchronizationPolicy { + pub const cudaSyncPolicyBlockingSync: cudaSynchronizationPolicy = cudaSynchronizationPolicy(4); +} +#[repr(transparent)] +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct cudaSynchronizationPolicy(pub ::std::os::raw::c_uint); +impl cudaClusterSchedulingPolicy { + #[doc = "< the default policy"] + pub const cudaClusterSchedulingPolicyDefault: cudaClusterSchedulingPolicy = + cudaClusterSchedulingPolicy(0); +} +impl cudaClusterSchedulingPolicy { + #[doc = "< spread the blocks within a cluster to the SMs"] + pub const cudaClusterSchedulingPolicySpread: cudaClusterSchedulingPolicy = + cudaClusterSchedulingPolicy(1); +} +impl cudaClusterSchedulingPolicy { + #[doc = "< allow the hardware to load-balance the blocks in a cluster to the SMs"] + pub const cudaClusterSchedulingPolicyLoadBalancing: cudaClusterSchedulingPolicy = + cudaClusterSchedulingPolicy(2); +} +#[repr(transparent)] +#[doc = " Cluster scheduling policies. These may be passed to ::cudaFuncSetAttribute"] +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct cudaClusterSchedulingPolicy(pub ::std::os::raw::c_uint); +impl cudaStreamUpdateCaptureDependenciesFlags { + #[doc = "< Add new nodes to the dependency set"] + pub const cudaStreamAddCaptureDependencies: cudaStreamUpdateCaptureDependenciesFlags = + cudaStreamUpdateCaptureDependenciesFlags(0); +} +impl cudaStreamUpdateCaptureDependenciesFlags { + #[doc = "< Replace the dependency set with the new nodes"] + pub const cudaStreamSetCaptureDependencies: cudaStreamUpdateCaptureDependenciesFlags = + cudaStreamUpdateCaptureDependenciesFlags(1); +} +#[repr(transparent)] +#[doc = " Flags for ::cudaStreamUpdateCaptureDependencies"] +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct cudaStreamUpdateCaptureDependenciesFlags(pub ::std::os::raw::c_uint); +impl cudaUserObjectFlags { + #[doc = "< Indicates the destructor execution is not synchronized by any CUDA handle."] + pub const cudaUserObjectNoDestructorSync: cudaUserObjectFlags = cudaUserObjectFlags(1); +} +#[repr(transparent)] +#[doc = " Flags for user objects for graphs"] +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct cudaUserObjectFlags(pub ::std::os::raw::c_uint); +impl cudaUserObjectRetainFlags { + #[doc = "< Transfer references from the caller rather than creating new references."] + pub const cudaGraphUserObjectMove: cudaUserObjectRetainFlags = cudaUserObjectRetainFlags(1); +} +#[repr(transparent)] +#[doc = " Flags for retaining user object references for graphs"] +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct cudaUserObjectRetainFlags(pub ::std::os::raw::c_uint); +#[doc = " CUDA graphics interop resource"] +#[repr(C)] +#[derive(Copy, Clone)] +pub struct cudaGraphicsResource { + _unused: [u8; 0], +} +impl cudaGraphicsRegisterFlags { + #[doc = "< Default"] + pub const cudaGraphicsRegisterFlagsNone: cudaGraphicsRegisterFlags = + cudaGraphicsRegisterFlags(0); +} +impl cudaGraphicsRegisterFlags { + #[doc = "< CUDA will not write to this resource"] + pub const cudaGraphicsRegisterFlagsReadOnly: cudaGraphicsRegisterFlags = + cudaGraphicsRegisterFlags(1); +} +impl cudaGraphicsRegisterFlags { + #[doc = "< CUDA will only write to and will not read from this resource"] + pub const cudaGraphicsRegisterFlagsWriteDiscard: cudaGraphicsRegisterFlags = + cudaGraphicsRegisterFlags(2); +} +impl cudaGraphicsRegisterFlags { + #[doc = "< CUDA will bind this resource to a surface reference"] + pub const cudaGraphicsRegisterFlagsSurfaceLoadStore: cudaGraphicsRegisterFlags = + cudaGraphicsRegisterFlags(4); +} +impl cudaGraphicsRegisterFlags { + #[doc = "< CUDA will perform texture gather operations on this resource"] + pub const cudaGraphicsRegisterFlagsTextureGather: cudaGraphicsRegisterFlags = + cudaGraphicsRegisterFlags(8); +} +#[repr(transparent)] +#[doc = " CUDA graphics interop register flags"] +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct cudaGraphicsRegisterFlags(pub ::std::os::raw::c_uint); +impl cudaGraphicsMapFlags { + #[doc = "< Default; Assume resource can be read/written"] + pub const cudaGraphicsMapFlagsNone: cudaGraphicsMapFlags = cudaGraphicsMapFlags(0); +} +impl cudaGraphicsMapFlags { + #[doc = "< CUDA will not write to this resource"] + pub const cudaGraphicsMapFlagsReadOnly: cudaGraphicsMapFlags = cudaGraphicsMapFlags(1); +} +impl cudaGraphicsMapFlags { + #[doc = "< CUDA will only write to and will not read from this resource"] + pub const cudaGraphicsMapFlagsWriteDiscard: cudaGraphicsMapFlags = cudaGraphicsMapFlags(2); +} +#[repr(transparent)] +#[doc = " CUDA graphics interop map flags"] +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct cudaGraphicsMapFlags(pub ::std::os::raw::c_uint); +impl cudaGraphicsCubeFace { + #[doc = "< Positive X face of cubemap"] + pub const cudaGraphicsCubeFacePositiveX: cudaGraphicsCubeFace = cudaGraphicsCubeFace(0); +} +impl cudaGraphicsCubeFace { + #[doc = "< Negative X face of cubemap"] + pub const cudaGraphicsCubeFaceNegativeX: cudaGraphicsCubeFace = cudaGraphicsCubeFace(1); +} +impl cudaGraphicsCubeFace { + #[doc = "< Positive Y face of cubemap"] + pub const cudaGraphicsCubeFacePositiveY: cudaGraphicsCubeFace = cudaGraphicsCubeFace(2); +} +impl cudaGraphicsCubeFace { + #[doc = "< Negative Y face of cubemap"] + pub const cudaGraphicsCubeFaceNegativeY: cudaGraphicsCubeFace = cudaGraphicsCubeFace(3); +} +impl cudaGraphicsCubeFace { + #[doc = "< Positive Z face of cubemap"] + pub const cudaGraphicsCubeFacePositiveZ: cudaGraphicsCubeFace = cudaGraphicsCubeFace(4); +} +impl cudaGraphicsCubeFace { + #[doc = "< Negative Z face of cubemap"] + pub const cudaGraphicsCubeFaceNegativeZ: cudaGraphicsCubeFace = cudaGraphicsCubeFace(5); +} +#[repr(transparent)] +#[doc = " CUDA graphics interop array indices for cube maps"] +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct cudaGraphicsCubeFace(pub ::std::os::raw::c_uint); +impl cudaResourceType { + #[doc = "< Array resource"] + pub const cudaResourceTypeArray: cudaResourceType = cudaResourceType(0); +} +impl cudaResourceType { + #[doc = "< Mipmapped array resource"] + pub const cudaResourceTypeMipmappedArray: cudaResourceType = cudaResourceType(1); +} +impl cudaResourceType { + #[doc = "< Linear resource"] + pub const cudaResourceTypeLinear: cudaResourceType = cudaResourceType(2); +} +impl cudaResourceType { + #[doc = "< Pitch 2D resource"] + pub const cudaResourceTypePitch2D: cudaResourceType = cudaResourceType(3); +} +#[repr(transparent)] +#[doc = " CUDA resource types"] +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct cudaResourceType(pub ::std::os::raw::c_uint); +impl cudaResourceViewFormat { + #[doc = "< No resource view format (use underlying resource format)"] + pub const cudaResViewFormatNone: cudaResourceViewFormat = cudaResourceViewFormat(0); +} +impl cudaResourceViewFormat { + #[doc = "< 1 channel unsigned 8-bit integers"] + pub const cudaResViewFormatUnsignedChar1: cudaResourceViewFormat = cudaResourceViewFormat(1); +} +impl cudaResourceViewFormat { + #[doc = "< 2 channel unsigned 8-bit integers"] + pub const cudaResViewFormatUnsignedChar2: cudaResourceViewFormat = cudaResourceViewFormat(2); +} +impl cudaResourceViewFormat { + #[doc = "< 4 channel unsigned 8-bit integers"] + pub const cudaResViewFormatUnsignedChar4: cudaResourceViewFormat = cudaResourceViewFormat(3); +} +impl cudaResourceViewFormat { + #[doc = "< 1 channel signed 8-bit integers"] + pub const cudaResViewFormatSignedChar1: cudaResourceViewFormat = cudaResourceViewFormat(4); +} +impl cudaResourceViewFormat { + #[doc = "< 2 channel signed 8-bit integers"] + pub const cudaResViewFormatSignedChar2: cudaResourceViewFormat = cudaResourceViewFormat(5); +} +impl cudaResourceViewFormat { + #[doc = "< 4 channel signed 8-bit integers"] + pub const cudaResViewFormatSignedChar4: cudaResourceViewFormat = cudaResourceViewFormat(6); +} +impl cudaResourceViewFormat { + #[doc = "< 1 channel unsigned 16-bit integers"] + pub const cudaResViewFormatUnsignedShort1: cudaResourceViewFormat = cudaResourceViewFormat(7); +} +impl cudaResourceViewFormat { + #[doc = "< 2 channel unsigned 16-bit integers"] + pub const cudaResViewFormatUnsignedShort2: cudaResourceViewFormat = cudaResourceViewFormat(8); +} +impl cudaResourceViewFormat { + #[doc = "< 4 channel unsigned 16-bit integers"] + pub const cudaResViewFormatUnsignedShort4: cudaResourceViewFormat = cudaResourceViewFormat(9); +} +impl cudaResourceViewFormat { + #[doc = "< 1 channel signed 16-bit integers"] + pub const cudaResViewFormatSignedShort1: cudaResourceViewFormat = cudaResourceViewFormat(10); +} +impl cudaResourceViewFormat { + #[doc = "< 2 channel signed 16-bit integers"] + pub const cudaResViewFormatSignedShort2: cudaResourceViewFormat = cudaResourceViewFormat(11); +} +impl cudaResourceViewFormat { + #[doc = "< 4 channel signed 16-bit integers"] + pub const cudaResViewFormatSignedShort4: cudaResourceViewFormat = cudaResourceViewFormat(12); +} +impl cudaResourceViewFormat { + #[doc = "< 1 channel unsigned 32-bit integers"] + pub const cudaResViewFormatUnsignedInt1: cudaResourceViewFormat = cudaResourceViewFormat(13); +} +impl cudaResourceViewFormat { + #[doc = "< 2 channel unsigned 32-bit integers"] + pub const cudaResViewFormatUnsignedInt2: cudaResourceViewFormat = cudaResourceViewFormat(14); +} +impl cudaResourceViewFormat { + #[doc = "< 4 channel unsigned 32-bit integers"] + pub const cudaResViewFormatUnsignedInt4: cudaResourceViewFormat = cudaResourceViewFormat(15); +} +impl cudaResourceViewFormat { + #[doc = "< 1 channel signed 32-bit integers"] + pub const cudaResViewFormatSignedInt1: cudaResourceViewFormat = cudaResourceViewFormat(16); +} +impl cudaResourceViewFormat { + #[doc = "< 2 channel signed 32-bit integers"] + pub const cudaResViewFormatSignedInt2: cudaResourceViewFormat = cudaResourceViewFormat(17); +} +impl cudaResourceViewFormat { + #[doc = "< 4 channel signed 32-bit integers"] + pub const cudaResViewFormatSignedInt4: cudaResourceViewFormat = cudaResourceViewFormat(18); +} +impl cudaResourceViewFormat { + #[doc = "< 1 channel 16-bit floating point"] + pub const cudaResViewFormatHalf1: cudaResourceViewFormat = cudaResourceViewFormat(19); +} +impl cudaResourceViewFormat { + #[doc = "< 2 channel 16-bit floating point"] + pub const cudaResViewFormatHalf2: cudaResourceViewFormat = cudaResourceViewFormat(20); +} +impl cudaResourceViewFormat { + #[doc = "< 4 channel 16-bit floating point"] + pub const cudaResViewFormatHalf4: cudaResourceViewFormat = cudaResourceViewFormat(21); +} +impl cudaResourceViewFormat { + #[doc = "< 1 channel 32-bit floating point"] + pub const cudaResViewFormatFloat1: cudaResourceViewFormat = cudaResourceViewFormat(22); +} +impl cudaResourceViewFormat { + #[doc = "< 2 channel 32-bit floating point"] + pub const cudaResViewFormatFloat2: cudaResourceViewFormat = cudaResourceViewFormat(23); +} +impl cudaResourceViewFormat { + #[doc = "< 4 channel 32-bit floating point"] + pub const cudaResViewFormatFloat4: cudaResourceViewFormat = cudaResourceViewFormat(24); +} +impl cudaResourceViewFormat { + #[doc = "< Block compressed 1"] + pub const cudaResViewFormatUnsignedBlockCompressed1: cudaResourceViewFormat = + cudaResourceViewFormat(25); +} +impl cudaResourceViewFormat { + #[doc = "< Block compressed 2"] + pub const cudaResViewFormatUnsignedBlockCompressed2: cudaResourceViewFormat = + cudaResourceViewFormat(26); +} +impl cudaResourceViewFormat { + #[doc = "< Block compressed 3"] + pub const cudaResViewFormatUnsignedBlockCompressed3: cudaResourceViewFormat = + cudaResourceViewFormat(27); +} +impl cudaResourceViewFormat { + #[doc = "< Block compressed 4 unsigned"] + pub const cudaResViewFormatUnsignedBlockCompressed4: cudaResourceViewFormat = + cudaResourceViewFormat(28); +} +impl cudaResourceViewFormat { + #[doc = "< Block compressed 4 signed"] + pub const cudaResViewFormatSignedBlockCompressed4: cudaResourceViewFormat = + cudaResourceViewFormat(29); +} +impl cudaResourceViewFormat { + #[doc = "< Block compressed 5 unsigned"] + pub const cudaResViewFormatUnsignedBlockCompressed5: cudaResourceViewFormat = + cudaResourceViewFormat(30); +} +impl cudaResourceViewFormat { + #[doc = "< Block compressed 5 signed"] + pub const cudaResViewFormatSignedBlockCompressed5: cudaResourceViewFormat = + cudaResourceViewFormat(31); +} +impl cudaResourceViewFormat { + #[doc = "< Block compressed 6 unsigned half-float"] + pub const cudaResViewFormatUnsignedBlockCompressed6H: cudaResourceViewFormat = + cudaResourceViewFormat(32); +} +impl cudaResourceViewFormat { + #[doc = "< Block compressed 6 signed half-float"] + pub const cudaResViewFormatSignedBlockCompressed6H: cudaResourceViewFormat = + cudaResourceViewFormat(33); +} +impl cudaResourceViewFormat { + #[doc = "< Block compressed 7"] + pub const cudaResViewFormatUnsignedBlockCompressed7: cudaResourceViewFormat = + cudaResourceViewFormat(34); +} +#[repr(transparent)] +#[doc = " CUDA texture resource view formats"] +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct cudaResourceViewFormat(pub ::std::os::raw::c_uint); +#[doc = " CUDA resource descriptor"] +#[repr(C)] +#[derive(Copy, Clone)] +pub struct cudaResourceDesc { + #[doc = "< Resource type"] + pub resType: cudaResourceType, + pub res: cudaResourceDesc__bindgen_ty_1, +} +#[repr(C)] +#[derive(Copy, Clone)] +pub union cudaResourceDesc__bindgen_ty_1 { + pub array: cudaResourceDesc__bindgen_ty_1__bindgen_ty_1, + pub mipmap: cudaResourceDesc__bindgen_ty_1__bindgen_ty_2, + pub linear: cudaResourceDesc__bindgen_ty_1__bindgen_ty_3, + pub pitch2D: cudaResourceDesc__bindgen_ty_1__bindgen_ty_4, +} +#[repr(C)] +#[derive(Copy, Clone)] +pub struct cudaResourceDesc__bindgen_ty_1__bindgen_ty_1 { + #[doc = "< CUDA array"] + pub array: cudaArray_t, +} +#[repr(C)] +#[derive(Copy, Clone)] +pub struct cudaResourceDesc__bindgen_ty_1__bindgen_ty_2 { + #[doc = "< CUDA mipmapped array"] + pub mipmap: cudaMipmappedArray_t, +} +#[repr(C)] +#[derive(Copy, Clone)] +pub struct cudaResourceDesc__bindgen_ty_1__bindgen_ty_3 { + #[doc = "< Device pointer"] + pub devPtr: *mut ::std::os::raw::c_void, + #[doc = "< Channel descriptor"] + pub desc: cudaChannelFormatDesc, + #[doc = "< Size in bytes"] + pub sizeInBytes: usize, +} +#[repr(C)] +#[derive(Copy, Clone)] +pub struct cudaResourceDesc__bindgen_ty_1__bindgen_ty_4 { + #[doc = "< Device pointer"] + pub devPtr: *mut ::std::os::raw::c_void, + #[doc = "< Channel descriptor"] + pub desc: cudaChannelFormatDesc, + #[doc = "< Width of the array in elements"] + pub width: usize, + #[doc = "< Height of the array in elements"] + pub height: usize, + #[doc = "< Pitch between two rows in bytes"] + pub pitchInBytes: usize, +} +#[doc = " CUDA resource view descriptor"] +#[repr(C)] +#[derive(Copy, Clone)] +pub struct cudaResourceViewDesc { + #[doc = "< Resource view format"] + pub format: cudaResourceViewFormat, + #[doc = "< Width of the resource view"] + pub width: usize, + #[doc = "< Height of the resource view"] + pub height: usize, + #[doc = "< Depth of the resource view"] + pub depth: usize, + #[doc = "< First defined mipmap level"] + pub firstMipmapLevel: ::std::os::raw::c_uint, + #[doc = "< Last defined mipmap level"] + pub lastMipmapLevel: ::std::os::raw::c_uint, + #[doc = "< First layer index"] + pub firstLayer: ::std::os::raw::c_uint, + #[doc = "< Last layer index"] + pub lastLayer: ::std::os::raw::c_uint, +} +#[doc = " CUDA pointer attributes"] +#[repr(C)] +#[derive(Copy, Clone)] +pub struct cudaPointerAttributes { + #[doc = " The type of memory - ::cudaMemoryTypeUnregistered, ::cudaMemoryTypeHost,\n ::cudaMemoryTypeDevice or ::cudaMemoryTypeManaged."] + pub type_: cudaMemoryType, + #[doc = " The device against which the memory was allocated or registered.\n If the memory type is ::cudaMemoryTypeDevice then this identifies\n the device on which the memory referred physically resides. If\n the memory type is ::cudaMemoryTypeHost or::cudaMemoryTypeManaged then\n this identifies the device which was current when the memory was allocated\n or registered (and if that device is deinitialized then this allocation\n will vanish with that device's state)."] + pub device: ::std::os::raw::c_int, + #[doc = " The address which may be dereferenced on the current device to access\n the memory or NULL if no such address exists."] + pub devicePointer: *mut ::std::os::raw::c_void, + #[doc = " The address which may be dereferenced on the host to access the\n memory or NULL if no such address exists.\n\n \\note CUDA doesn't check if unregistered memory is allocated so this field\n may contain invalid pointer if an invalid pointer has been passed to CUDA."] + pub hostPointer: *mut ::std::os::raw::c_void, +} +#[doc = " CUDA function attributes"] +#[repr(C)] +#[derive(Copy, Clone)] +pub struct cudaFuncAttributes { + #[doc = " The size in bytes of statically-allocated shared memory per block\n required by this function. This does not include dynamically-allocated\n shared memory requested by the user at runtime."] + pub sharedSizeBytes: usize, + #[doc = " The size in bytes of user-allocated constant memory required by this\n function."] + pub constSizeBytes: usize, + #[doc = " The size in bytes of local memory used by each thread of this function."] + pub localSizeBytes: usize, + #[doc = " The maximum number of threads per block, beyond which a launch of the\n function would fail. This number depends on both the function and the\n device on which the function is currently loaded."] + pub maxThreadsPerBlock: ::std::os::raw::c_int, + #[doc = " The number of registers used by each thread of this function."] + pub numRegs: ::std::os::raw::c_int, + #[doc = " The PTX virtual architecture version for which the function was\n compiled. This value is the major PTX version * 10 + the minor PTX\n version, so a PTX version 1.3 function would return the value 13."] + pub ptxVersion: ::std::os::raw::c_int, + #[doc = " The binary architecture version for which the function was compiled.\n This value is the major binary version * 10 + the minor binary version,\n so a binary version 1.3 function would return the value 13."] + pub binaryVersion: ::std::os::raw::c_int, + #[doc = " The attribute to indicate whether the function has been compiled with\n user specified option \"-Xptxas --dlcm=ca\" set."] + pub cacheModeCA: ::std::os::raw::c_int, + #[doc = " The maximum size in bytes of dynamic shared memory per block for\n this function. Any launch must have a dynamic shared memory size\n smaller than this value."] + pub maxDynamicSharedSizeBytes: ::std::os::raw::c_int, + #[doc = " On devices where the L1 cache and shared memory use the same hardware resources,\n this sets the shared memory carveout preference, in percent of the maximum shared memory.\n Refer to ::cudaDevAttrMaxSharedMemoryPerMultiprocessor.\n This is only a hint, and the driver can choose a different ratio if required to execute the function.\n See ::cudaFuncSetAttribute"] + pub preferredShmemCarveout: ::std::os::raw::c_int, +} +impl cudaFuncAttribute { + #[doc = "< Maximum dynamic shared memory size"] + pub const cudaFuncAttributeMaxDynamicSharedMemorySize: cudaFuncAttribute = cudaFuncAttribute(8); +} +impl cudaFuncAttribute { + #[doc = "< Preferred shared memory-L1 cache split"] + pub const cudaFuncAttributePreferredSharedMemoryCarveout: cudaFuncAttribute = + cudaFuncAttribute(9); +} +impl cudaFuncAttribute { + #[doc = "< Indicator to enforce valid cluster dimension specification on kernel launch"] + pub const cudaFuncAttributeClusterDimMustBeSet: cudaFuncAttribute = cudaFuncAttribute(10); +} +impl cudaFuncAttribute { + #[doc = "< Required cluster width"] + pub const cudaFuncAttributeRequiredClusterWidth: cudaFuncAttribute = cudaFuncAttribute(11); +} +impl cudaFuncAttribute { + #[doc = "< Required cluster height"] + pub const cudaFuncAttributeRequiredClusterHeight: cudaFuncAttribute = cudaFuncAttribute(12); +} +impl cudaFuncAttribute { + #[doc = "< Required cluster depth"] + pub const cudaFuncAttributeRequiredClusterDepth: cudaFuncAttribute = cudaFuncAttribute(13); +} +impl cudaFuncAttribute { + #[doc = "< Whether non-portable cluster scheduling policy is supported"] + pub const cudaFuncAttributeNonPortableClusterSizeAllowed: cudaFuncAttribute = + cudaFuncAttribute(14); +} +impl cudaFuncAttribute { + #[doc = "< Required cluster scheduling policy preference"] + pub const cudaFuncAttributeClusterSchedulingPolicyPreference: cudaFuncAttribute = + cudaFuncAttribute(15); +} +impl cudaFuncAttribute { + pub const cudaFuncAttributeMax: cudaFuncAttribute = cudaFuncAttribute(16); +} +#[repr(transparent)] +#[doc = " CUDA function attributes that can be set using ::cudaFuncSetAttribute"] +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct cudaFuncAttribute(pub ::std::os::raw::c_uint); +impl cudaFuncCache { + #[doc = "< Default function cache configuration, no preference"] + pub const cudaFuncCachePreferNone: cudaFuncCache = cudaFuncCache(0); +} +impl cudaFuncCache { + #[doc = "< Prefer larger shared memory and smaller L1 cache"] + pub const cudaFuncCachePreferShared: cudaFuncCache = cudaFuncCache(1); +} +impl cudaFuncCache { + #[doc = "< Prefer larger L1 cache and smaller shared memory"] + pub const cudaFuncCachePreferL1: cudaFuncCache = cudaFuncCache(2); +} +impl cudaFuncCache { + #[doc = "< Prefer equal size L1 cache and shared memory"] + pub const cudaFuncCachePreferEqual: cudaFuncCache = cudaFuncCache(3); +} +#[repr(transparent)] +#[doc = " CUDA function cache configurations"] +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct cudaFuncCache(pub ::std::os::raw::c_uint); +impl cudaSharedMemConfig { + pub const cudaSharedMemBankSizeDefault: cudaSharedMemConfig = cudaSharedMemConfig(0); +} +impl cudaSharedMemConfig { + pub const cudaSharedMemBankSizeFourByte: cudaSharedMemConfig = cudaSharedMemConfig(1); +} +impl cudaSharedMemConfig { + pub const cudaSharedMemBankSizeEightByte: cudaSharedMemConfig = cudaSharedMemConfig(2); +} +#[repr(transparent)] +#[doc = " CUDA shared memory configuration"] +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct cudaSharedMemConfig(pub ::std::os::raw::c_uint); +impl cudaSharedCarveout { + #[doc = "< No preference for shared memory or L1 (default)"] + pub const cudaSharedmemCarveoutDefault: cudaSharedCarveout = cudaSharedCarveout(-1); +} +impl cudaSharedCarveout { + #[doc = "< Prefer maximum available shared memory, minimum L1 cache"] + pub const cudaSharedmemCarveoutMaxShared: cudaSharedCarveout = cudaSharedCarveout(100); +} +impl cudaSharedCarveout { + #[doc = "< Prefer maximum available L1 cache, minimum shared memory"] + pub const cudaSharedmemCarveoutMaxL1: cudaSharedCarveout = cudaSharedCarveout(0); +} +#[repr(transparent)] +#[doc = " Shared memory carveout configurations. These may be passed to cudaFuncSetAttribute"] +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct cudaSharedCarveout(pub ::std::os::raw::c_int); +impl cudaComputeMode { + #[doc = "< Default compute mode (Multiple threads can use ::cudaSetDevice() with this device)"] + pub const cudaComputeModeDefault: cudaComputeMode = cudaComputeMode(0); +} +impl cudaComputeMode { + #[doc = "< Compute-exclusive-thread mode (Only one thread in one process will be able to use ::cudaSetDevice() with this device)"] + pub const cudaComputeModeExclusive: cudaComputeMode = cudaComputeMode(1); +} +impl cudaComputeMode { + #[doc = "< Compute-prohibited mode (No threads can use ::cudaSetDevice() with this device)"] + pub const cudaComputeModeProhibited: cudaComputeMode = cudaComputeMode(2); +} +impl cudaComputeMode { + #[doc = "< Compute-exclusive-process mode (Many threads in one process will be able to use ::cudaSetDevice() with this device)"] + pub const cudaComputeModeExclusiveProcess: cudaComputeMode = cudaComputeMode(3); +} +#[repr(transparent)] +#[doc = " CUDA device compute modes"] +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct cudaComputeMode(pub ::std::os::raw::c_uint); +impl cudaLimit { + #[doc = "< GPU thread stack size"] + pub const cudaLimitStackSize: cudaLimit = cudaLimit(0); +} +impl cudaLimit { + #[doc = "< GPU printf FIFO size"] + pub const cudaLimitPrintfFifoSize: cudaLimit = cudaLimit(1); +} +impl cudaLimit { + #[doc = "< GPU malloc heap size"] + pub const cudaLimitMallocHeapSize: cudaLimit = cudaLimit(2); +} +impl cudaLimit { + #[doc = "< GPU device runtime synchronize depth"] + pub const cudaLimitDevRuntimeSyncDepth: cudaLimit = cudaLimit(3); +} +impl cudaLimit { + #[doc = "< GPU device runtime pending launch count"] + pub const cudaLimitDevRuntimePendingLaunchCount: cudaLimit = cudaLimit(4); +} +impl cudaLimit { + #[doc = "< A value between 0 and 128 that indicates the maximum fetch granularity of L2 (in Bytes). This is a hint"] + pub const cudaLimitMaxL2FetchGranularity: cudaLimit = cudaLimit(5); +} +impl cudaLimit { + #[doc = "< A size in bytes for L2 persisting lines cache size"] + pub const cudaLimitPersistingL2CacheSize: cudaLimit = cudaLimit(6); +} +#[repr(transparent)] +#[doc = " CUDA Limits"] +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct cudaLimit(pub ::std::os::raw::c_uint); +impl cudaMemoryAdvise { + #[doc = "< Data will mostly be read and only occassionally be written to"] + pub const cudaMemAdviseSetReadMostly: cudaMemoryAdvise = cudaMemoryAdvise(1); +} +impl cudaMemoryAdvise { + #[doc = "< Undo the effect of ::cudaMemAdviseSetReadMostly"] + pub const cudaMemAdviseUnsetReadMostly: cudaMemoryAdvise = cudaMemoryAdvise(2); +} +impl cudaMemoryAdvise { + #[doc = "< Set the preferred location for the data as the specified device"] + pub const cudaMemAdviseSetPreferredLocation: cudaMemoryAdvise = cudaMemoryAdvise(3); +} +impl cudaMemoryAdvise { + #[doc = "< Clear the preferred location for the data"] + pub const cudaMemAdviseUnsetPreferredLocation: cudaMemoryAdvise = cudaMemoryAdvise(4); +} +impl cudaMemoryAdvise { + #[doc = "< Data will be accessed by the specified device, so prevent page faults as much as possible"] + pub const cudaMemAdviseSetAccessedBy: cudaMemoryAdvise = cudaMemoryAdvise(5); +} +impl cudaMemoryAdvise { + #[doc = "< Let the Unified Memory subsystem decide on the page faulting policy for the specified device"] + pub const cudaMemAdviseUnsetAccessedBy: cudaMemoryAdvise = cudaMemoryAdvise(6); +} +#[repr(transparent)] +#[doc = " CUDA Memory Advise values"] +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct cudaMemoryAdvise(pub ::std::os::raw::c_uint); +impl cudaMemRangeAttribute { + #[doc = "< Whether the range will mostly be read and only occassionally be written to"] + pub const cudaMemRangeAttributeReadMostly: cudaMemRangeAttribute = cudaMemRangeAttribute(1); +} +impl cudaMemRangeAttribute { + #[doc = "< The preferred location of the range"] + pub const cudaMemRangeAttributePreferredLocation: cudaMemRangeAttribute = + cudaMemRangeAttribute(2); +} +impl cudaMemRangeAttribute { + #[doc = "< Memory range has ::cudaMemAdviseSetAccessedBy set for specified device"] + pub const cudaMemRangeAttributeAccessedBy: cudaMemRangeAttribute = cudaMemRangeAttribute(3); +} +impl cudaMemRangeAttribute { + #[doc = "< The last location to which the range was prefetched"] + pub const cudaMemRangeAttributeLastPrefetchLocation: cudaMemRangeAttribute = + cudaMemRangeAttribute(4); +} +#[repr(transparent)] +#[doc = " CUDA range attributes"] +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct cudaMemRangeAttribute(pub ::std::os::raw::c_uint); +impl cudaOutputMode { + #[doc = "< Output mode Key-Value pair format."] + pub const cudaKeyValuePair: cudaOutputMode = cudaOutputMode(0); +} +impl cudaOutputMode { + #[doc = "< Output mode Comma separated values format."] + pub const cudaCSV: cudaOutputMode = cudaOutputMode(1); +} +#[repr(transparent)] +#[doc = " CUDA Profiler Output modes"] +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct cudaOutputMode(pub ::std::os::raw::c_uint); +impl cudaFlushGPUDirectRDMAWritesOptions { + #[doc = "< ::cudaDeviceFlushGPUDirectRDMAWrites() and its CUDA Driver API counterpart are supported on the device."] + pub const cudaFlushGPUDirectRDMAWritesOptionHost: cudaFlushGPUDirectRDMAWritesOptions = + cudaFlushGPUDirectRDMAWritesOptions(1); +} +impl cudaFlushGPUDirectRDMAWritesOptions { + #[doc = "< The ::CU_STREAM_WAIT_VALUE_FLUSH flag and the ::CU_STREAM_MEM_OP_FLUSH_REMOTE_WRITES MemOp are supported on the CUDA device."] + pub const cudaFlushGPUDirectRDMAWritesOptionMemOps: cudaFlushGPUDirectRDMAWritesOptions = + cudaFlushGPUDirectRDMAWritesOptions(2); +} +#[repr(transparent)] +#[doc = " CUDA GPUDirect RDMA flush writes APIs supported on the device"] +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct cudaFlushGPUDirectRDMAWritesOptions(pub ::std::os::raw::c_uint); +impl cudaGPUDirectRDMAWritesOrdering { + #[doc = "< The device does not natively support ordering of GPUDirect RDMA writes. ::cudaFlushGPUDirectRDMAWrites() can be leveraged if supported."] + pub const cudaGPUDirectRDMAWritesOrderingNone: cudaGPUDirectRDMAWritesOrdering = + cudaGPUDirectRDMAWritesOrdering(0); +} +impl cudaGPUDirectRDMAWritesOrdering { + #[doc = "< Natively, the device can consistently consume GPUDirect RDMA writes, although other CUDA devices may not."] + pub const cudaGPUDirectRDMAWritesOrderingOwner: cudaGPUDirectRDMAWritesOrdering = + cudaGPUDirectRDMAWritesOrdering(100); +} +impl cudaGPUDirectRDMAWritesOrdering { + #[doc = "< Any CUDA device in the system can consistently consume GPUDirect RDMA writes to this device."] + pub const cudaGPUDirectRDMAWritesOrderingAllDevices: cudaGPUDirectRDMAWritesOrdering = + cudaGPUDirectRDMAWritesOrdering(200); +} +#[repr(transparent)] +#[doc = " CUDA GPUDirect RDMA flush writes ordering features of the device"] +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct cudaGPUDirectRDMAWritesOrdering(pub ::std::os::raw::c_uint); +impl cudaFlushGPUDirectRDMAWritesScope { + #[doc = "< Blocks until remote writes are visible to the CUDA device context owning the data."] + pub const cudaFlushGPUDirectRDMAWritesToOwner: cudaFlushGPUDirectRDMAWritesScope = + cudaFlushGPUDirectRDMAWritesScope(100); +} +impl cudaFlushGPUDirectRDMAWritesScope { + #[doc = "< Blocks until remote writes are visible to all CUDA device contexts."] + pub const cudaFlushGPUDirectRDMAWritesToAllDevices: cudaFlushGPUDirectRDMAWritesScope = + cudaFlushGPUDirectRDMAWritesScope(200); +} +#[repr(transparent)] +#[doc = " CUDA GPUDirect RDMA flush writes scopes"] +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct cudaFlushGPUDirectRDMAWritesScope(pub ::std::os::raw::c_uint); +impl cudaFlushGPUDirectRDMAWritesTarget { + #[doc = "< Sets the target for ::cudaDeviceFlushGPUDirectRDMAWrites() to the currently active CUDA device context."] + pub const cudaFlushGPUDirectRDMAWritesTargetCurrentDevice: cudaFlushGPUDirectRDMAWritesTarget = + cudaFlushGPUDirectRDMAWritesTarget(0); +} +#[repr(transparent)] +#[doc = " CUDA GPUDirect RDMA flush writes targets"] +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct cudaFlushGPUDirectRDMAWritesTarget(pub ::std::os::raw::c_uint); +impl cudaDeviceAttr { + #[doc = "< Maximum number of threads per block"] + pub const cudaDevAttrMaxThreadsPerBlock: cudaDeviceAttr = cudaDeviceAttr(1); +} +impl cudaDeviceAttr { + #[doc = "< Maximum block dimension X"] + pub const cudaDevAttrMaxBlockDimX: cudaDeviceAttr = cudaDeviceAttr(2); +} +impl cudaDeviceAttr { + #[doc = "< Maximum block dimension Y"] + pub const cudaDevAttrMaxBlockDimY: cudaDeviceAttr = cudaDeviceAttr(3); +} +impl cudaDeviceAttr { + #[doc = "< Maximum block dimension Z"] + pub const cudaDevAttrMaxBlockDimZ: cudaDeviceAttr = cudaDeviceAttr(4); +} +impl cudaDeviceAttr { + #[doc = "< Maximum grid dimension X"] + pub const cudaDevAttrMaxGridDimX: cudaDeviceAttr = cudaDeviceAttr(5); +} +impl cudaDeviceAttr { + #[doc = "< Maximum grid dimension Y"] + pub const cudaDevAttrMaxGridDimY: cudaDeviceAttr = cudaDeviceAttr(6); +} +impl cudaDeviceAttr { + #[doc = "< Maximum grid dimension Z"] + pub const cudaDevAttrMaxGridDimZ: cudaDeviceAttr = cudaDeviceAttr(7); +} +impl cudaDeviceAttr { + #[doc = "< Maximum shared memory available per block in bytes"] + pub const cudaDevAttrMaxSharedMemoryPerBlock: cudaDeviceAttr = cudaDeviceAttr(8); +} +impl cudaDeviceAttr { + #[doc = "< Memory available on device for __constant__ variables in a CUDA C kernel in bytes"] + pub const cudaDevAttrTotalConstantMemory: cudaDeviceAttr = cudaDeviceAttr(9); +} +impl cudaDeviceAttr { + #[doc = "< Warp size in threads"] + pub const cudaDevAttrWarpSize: cudaDeviceAttr = cudaDeviceAttr(10); +} +impl cudaDeviceAttr { + #[doc = "< Maximum pitch in bytes allowed by memory copies"] + pub const cudaDevAttrMaxPitch: cudaDeviceAttr = cudaDeviceAttr(11); +} +impl cudaDeviceAttr { + #[doc = "< Maximum number of 32-bit registers available per block"] + pub const cudaDevAttrMaxRegistersPerBlock: cudaDeviceAttr = cudaDeviceAttr(12); +} +impl cudaDeviceAttr { + #[doc = "< Peak clock frequency in kilohertz"] + pub const cudaDevAttrClockRate: cudaDeviceAttr = cudaDeviceAttr(13); +} +impl cudaDeviceAttr { + #[doc = "< Alignment requirement for textures"] + pub const cudaDevAttrTextureAlignment: cudaDeviceAttr = cudaDeviceAttr(14); +} +impl cudaDeviceAttr { + #[doc = "< Device can possibly copy memory and execute a kernel concurrently"] + pub const cudaDevAttrGpuOverlap: cudaDeviceAttr = cudaDeviceAttr(15); +} +impl cudaDeviceAttr { + #[doc = "< Number of multiprocessors on device"] + pub const cudaDevAttrMultiProcessorCount: cudaDeviceAttr = cudaDeviceAttr(16); +} +impl cudaDeviceAttr { + #[doc = "< Specifies whether there is a run time limit on kernels"] + pub const cudaDevAttrKernelExecTimeout: cudaDeviceAttr = cudaDeviceAttr(17); +} +impl cudaDeviceAttr { + #[doc = "< Device is integrated with host memory"] + pub const cudaDevAttrIntegrated: cudaDeviceAttr = cudaDeviceAttr(18); +} +impl cudaDeviceAttr { + #[doc = "< Device can map host memory into CUDA address space"] + pub const cudaDevAttrCanMapHostMemory: cudaDeviceAttr = cudaDeviceAttr(19); +} +impl cudaDeviceAttr { + #[doc = "< Compute mode (See ::cudaComputeMode for details)"] + pub const cudaDevAttrComputeMode: cudaDeviceAttr = cudaDeviceAttr(20); +} +impl cudaDeviceAttr { + #[doc = "< Maximum 1D texture width"] + pub const cudaDevAttrMaxTexture1DWidth: cudaDeviceAttr = cudaDeviceAttr(21); +} +impl cudaDeviceAttr { + #[doc = "< Maximum 2D texture width"] + pub const cudaDevAttrMaxTexture2DWidth: cudaDeviceAttr = cudaDeviceAttr(22); +} +impl cudaDeviceAttr { + #[doc = "< Maximum 2D texture height"] + pub const cudaDevAttrMaxTexture2DHeight: cudaDeviceAttr = cudaDeviceAttr(23); +} +impl cudaDeviceAttr { + #[doc = "< Maximum 3D texture width"] + pub const cudaDevAttrMaxTexture3DWidth: cudaDeviceAttr = cudaDeviceAttr(24); +} +impl cudaDeviceAttr { + #[doc = "< Maximum 3D texture height"] + pub const cudaDevAttrMaxTexture3DHeight: cudaDeviceAttr = cudaDeviceAttr(25); +} +impl cudaDeviceAttr { + #[doc = "< Maximum 3D texture depth"] + pub const cudaDevAttrMaxTexture3DDepth: cudaDeviceAttr = cudaDeviceAttr(26); +} +impl cudaDeviceAttr { + #[doc = "< Maximum 2D layered texture width"] + pub const cudaDevAttrMaxTexture2DLayeredWidth: cudaDeviceAttr = cudaDeviceAttr(27); +} +impl cudaDeviceAttr { + #[doc = "< Maximum 2D layered texture height"] + pub const cudaDevAttrMaxTexture2DLayeredHeight: cudaDeviceAttr = cudaDeviceAttr(28); +} +impl cudaDeviceAttr { + #[doc = "< Maximum layers in a 2D layered texture"] + pub const cudaDevAttrMaxTexture2DLayeredLayers: cudaDeviceAttr = cudaDeviceAttr(29); +} +impl cudaDeviceAttr { + #[doc = "< Alignment requirement for surfaces"] + pub const cudaDevAttrSurfaceAlignment: cudaDeviceAttr = cudaDeviceAttr(30); +} +impl cudaDeviceAttr { + #[doc = "< Device can possibly execute multiple kernels concurrently"] + pub const cudaDevAttrConcurrentKernels: cudaDeviceAttr = cudaDeviceAttr(31); +} +impl cudaDeviceAttr { + #[doc = "< Device has ECC support enabled"] + pub const cudaDevAttrEccEnabled: cudaDeviceAttr = cudaDeviceAttr(32); +} +impl cudaDeviceAttr { + #[doc = "< PCI bus ID of the device"] + pub const cudaDevAttrPciBusId: cudaDeviceAttr = cudaDeviceAttr(33); +} +impl cudaDeviceAttr { + #[doc = "< PCI device ID of the device"] + pub const cudaDevAttrPciDeviceId: cudaDeviceAttr = cudaDeviceAttr(34); +} +impl cudaDeviceAttr { + #[doc = "< Device is using TCC driver model"] + pub const cudaDevAttrTccDriver: cudaDeviceAttr = cudaDeviceAttr(35); +} +impl cudaDeviceAttr { + #[doc = "< Peak memory clock frequency in kilohertz"] + pub const cudaDevAttrMemoryClockRate: cudaDeviceAttr = cudaDeviceAttr(36); +} +impl cudaDeviceAttr { + #[doc = "< Global memory bus width in bits"] + pub const cudaDevAttrGlobalMemoryBusWidth: cudaDeviceAttr = cudaDeviceAttr(37); +} +impl cudaDeviceAttr { + #[doc = "< Size of L2 cache in bytes"] + pub const cudaDevAttrL2CacheSize: cudaDeviceAttr = cudaDeviceAttr(38); +} +impl cudaDeviceAttr { + #[doc = "< Maximum resident threads per multiprocessor"] + pub const cudaDevAttrMaxThreadsPerMultiProcessor: cudaDeviceAttr = cudaDeviceAttr(39); +} +impl cudaDeviceAttr { + #[doc = "< Number of asynchronous engines"] + pub const cudaDevAttrAsyncEngineCount: cudaDeviceAttr = cudaDeviceAttr(40); +} +impl cudaDeviceAttr { + #[doc = "< Device shares a unified address space with the host"] + pub const cudaDevAttrUnifiedAddressing: cudaDeviceAttr = cudaDeviceAttr(41); +} +impl cudaDeviceAttr { + #[doc = "< Maximum 1D layered texture width"] + pub const cudaDevAttrMaxTexture1DLayeredWidth: cudaDeviceAttr = cudaDeviceAttr(42); +} +impl cudaDeviceAttr { + #[doc = "< Maximum layers in a 1D layered texture"] + pub const cudaDevAttrMaxTexture1DLayeredLayers: cudaDeviceAttr = cudaDeviceAttr(43); +} +impl cudaDeviceAttr { + #[doc = "< Maximum 2D texture width if cudaArrayTextureGather is set"] + pub const cudaDevAttrMaxTexture2DGatherWidth: cudaDeviceAttr = cudaDeviceAttr(45); +} +impl cudaDeviceAttr { + #[doc = "< Maximum 2D texture height if cudaArrayTextureGather is set"] + pub const cudaDevAttrMaxTexture2DGatherHeight: cudaDeviceAttr = cudaDeviceAttr(46); +} +impl cudaDeviceAttr { + #[doc = "< Alternate maximum 3D texture width"] + pub const cudaDevAttrMaxTexture3DWidthAlt: cudaDeviceAttr = cudaDeviceAttr(47); +} +impl cudaDeviceAttr { + #[doc = "< Alternate maximum 3D texture height"] + pub const cudaDevAttrMaxTexture3DHeightAlt: cudaDeviceAttr = cudaDeviceAttr(48); +} +impl cudaDeviceAttr { + #[doc = "< Alternate maximum 3D texture depth"] + pub const cudaDevAttrMaxTexture3DDepthAlt: cudaDeviceAttr = cudaDeviceAttr(49); +} +impl cudaDeviceAttr { + #[doc = "< PCI domain ID of the device"] + pub const cudaDevAttrPciDomainId: cudaDeviceAttr = cudaDeviceAttr(50); +} +impl cudaDeviceAttr { + #[doc = "< Pitch alignment requirement for textures"] + pub const cudaDevAttrTexturePitchAlignment: cudaDeviceAttr = cudaDeviceAttr(51); +} +impl cudaDeviceAttr { + #[doc = "< Maximum cubemap texture width/height"] + pub const cudaDevAttrMaxTextureCubemapWidth: cudaDeviceAttr = cudaDeviceAttr(52); +} +impl cudaDeviceAttr { + #[doc = "< Maximum cubemap layered texture width/height"] + pub const cudaDevAttrMaxTextureCubemapLayeredWidth: cudaDeviceAttr = cudaDeviceAttr(53); +} +impl cudaDeviceAttr { + #[doc = "< Maximum layers in a cubemap layered texture"] + pub const cudaDevAttrMaxTextureCubemapLayeredLayers: cudaDeviceAttr = cudaDeviceAttr(54); +} +impl cudaDeviceAttr { + #[doc = "< Maximum 1D surface width"] + pub const cudaDevAttrMaxSurface1DWidth: cudaDeviceAttr = cudaDeviceAttr(55); +} +impl cudaDeviceAttr { + #[doc = "< Maximum 2D surface width"] + pub const cudaDevAttrMaxSurface2DWidth: cudaDeviceAttr = cudaDeviceAttr(56); +} +impl cudaDeviceAttr { + #[doc = "< Maximum 2D surface height"] + pub const cudaDevAttrMaxSurface2DHeight: cudaDeviceAttr = cudaDeviceAttr(57); +} +impl cudaDeviceAttr { + #[doc = "< Maximum 3D surface width"] + pub const cudaDevAttrMaxSurface3DWidth: cudaDeviceAttr = cudaDeviceAttr(58); +} +impl cudaDeviceAttr { + #[doc = "< Maximum 3D surface height"] + pub const cudaDevAttrMaxSurface3DHeight: cudaDeviceAttr = cudaDeviceAttr(59); +} +impl cudaDeviceAttr { + #[doc = "< Maximum 3D surface depth"] + pub const cudaDevAttrMaxSurface3DDepth: cudaDeviceAttr = cudaDeviceAttr(60); +} +impl cudaDeviceAttr { + #[doc = "< Maximum 1D layered surface width"] + pub const cudaDevAttrMaxSurface1DLayeredWidth: cudaDeviceAttr = cudaDeviceAttr(61); +} +impl cudaDeviceAttr { + #[doc = "< Maximum layers in a 1D layered surface"] + pub const cudaDevAttrMaxSurface1DLayeredLayers: cudaDeviceAttr = cudaDeviceAttr(62); +} +impl cudaDeviceAttr { + #[doc = "< Maximum 2D layered surface width"] + pub const cudaDevAttrMaxSurface2DLayeredWidth: cudaDeviceAttr = cudaDeviceAttr(63); +} +impl cudaDeviceAttr { + #[doc = "< Maximum 2D layered surface height"] + pub const cudaDevAttrMaxSurface2DLayeredHeight: cudaDeviceAttr = cudaDeviceAttr(64); +} +impl cudaDeviceAttr { + #[doc = "< Maximum layers in a 2D layered surface"] + pub const cudaDevAttrMaxSurface2DLayeredLayers: cudaDeviceAttr = cudaDeviceAttr(65); +} +impl cudaDeviceAttr { + #[doc = "< Maximum cubemap surface width"] + pub const cudaDevAttrMaxSurfaceCubemapWidth: cudaDeviceAttr = cudaDeviceAttr(66); +} +impl cudaDeviceAttr { + #[doc = "< Maximum cubemap layered surface width"] + pub const cudaDevAttrMaxSurfaceCubemapLayeredWidth: cudaDeviceAttr = cudaDeviceAttr(67); +} +impl cudaDeviceAttr { + #[doc = "< Maximum layers in a cubemap layered surface"] + pub const cudaDevAttrMaxSurfaceCubemapLayeredLayers: cudaDeviceAttr = cudaDeviceAttr(68); +} +impl cudaDeviceAttr { + #[doc = "< Maximum 1D linear texture width"] + pub const cudaDevAttrMaxTexture1DLinearWidth: cudaDeviceAttr = cudaDeviceAttr(69); +} +impl cudaDeviceAttr { + #[doc = "< Maximum 2D linear texture width"] + pub const cudaDevAttrMaxTexture2DLinearWidth: cudaDeviceAttr = cudaDeviceAttr(70); +} +impl cudaDeviceAttr { + #[doc = "< Maximum 2D linear texture height"] + pub const cudaDevAttrMaxTexture2DLinearHeight: cudaDeviceAttr = cudaDeviceAttr(71); +} +impl cudaDeviceAttr { + #[doc = "< Maximum 2D linear texture pitch in bytes"] + pub const cudaDevAttrMaxTexture2DLinearPitch: cudaDeviceAttr = cudaDeviceAttr(72); +} +impl cudaDeviceAttr { + #[doc = "< Maximum mipmapped 2D texture width"] + pub const cudaDevAttrMaxTexture2DMipmappedWidth: cudaDeviceAttr = cudaDeviceAttr(73); +} +impl cudaDeviceAttr { + #[doc = "< Maximum mipmapped 2D texture height"] + pub const cudaDevAttrMaxTexture2DMipmappedHeight: cudaDeviceAttr = cudaDeviceAttr(74); +} +impl cudaDeviceAttr { + #[doc = "< Major compute capability version number"] + pub const cudaDevAttrComputeCapabilityMajor: cudaDeviceAttr = cudaDeviceAttr(75); +} +impl cudaDeviceAttr { + #[doc = "< Minor compute capability version number"] + pub const cudaDevAttrComputeCapabilityMinor: cudaDeviceAttr = cudaDeviceAttr(76); +} +impl cudaDeviceAttr { + #[doc = "< Maximum mipmapped 1D texture width"] + pub const cudaDevAttrMaxTexture1DMipmappedWidth: cudaDeviceAttr = cudaDeviceAttr(77); +} +impl cudaDeviceAttr { + #[doc = "< Device supports stream priorities"] + pub const cudaDevAttrStreamPrioritiesSupported: cudaDeviceAttr = cudaDeviceAttr(78); +} +impl cudaDeviceAttr { + #[doc = "< Device supports caching globals in L1"] + pub const cudaDevAttrGlobalL1CacheSupported: cudaDeviceAttr = cudaDeviceAttr(79); +} +impl cudaDeviceAttr { + #[doc = "< Device supports caching locals in L1"] + pub const cudaDevAttrLocalL1CacheSupported: cudaDeviceAttr = cudaDeviceAttr(80); +} +impl cudaDeviceAttr { + #[doc = "< Maximum shared memory available per multiprocessor in bytes"] + pub const cudaDevAttrMaxSharedMemoryPerMultiprocessor: cudaDeviceAttr = cudaDeviceAttr(81); +} +impl cudaDeviceAttr { + #[doc = "< Maximum number of 32-bit registers available per multiprocessor"] + pub const cudaDevAttrMaxRegistersPerMultiprocessor: cudaDeviceAttr = cudaDeviceAttr(82); +} +impl cudaDeviceAttr { + #[doc = "< Device can allocate managed memory on this system"] + pub const cudaDevAttrManagedMemory: cudaDeviceAttr = cudaDeviceAttr(83); +} +impl cudaDeviceAttr { + #[doc = "< Device is on a multi-GPU board"] + pub const cudaDevAttrIsMultiGpuBoard: cudaDeviceAttr = cudaDeviceAttr(84); +} +impl cudaDeviceAttr { + #[doc = "< Unique identifier for a group of devices on the same multi-GPU board"] + pub const cudaDevAttrMultiGpuBoardGroupID: cudaDeviceAttr = cudaDeviceAttr(85); +} +impl cudaDeviceAttr { + #[doc = "< Link between the device and the host supports native atomic operations"] + pub const cudaDevAttrHostNativeAtomicSupported: cudaDeviceAttr = cudaDeviceAttr(86); +} +impl cudaDeviceAttr { + #[doc = "< Ratio of single precision performance (in floating-point operations per second) to double precision performance"] + pub const cudaDevAttrSingleToDoublePrecisionPerfRatio: cudaDeviceAttr = cudaDeviceAttr(87); +} +impl cudaDeviceAttr { + #[doc = "< Device supports coherently accessing pageable memory without calling cudaHostRegister on it"] + pub const cudaDevAttrPageableMemoryAccess: cudaDeviceAttr = cudaDeviceAttr(88); +} +impl cudaDeviceAttr { + #[doc = "< Device can coherently access managed memory concurrently with the CPU"] + pub const cudaDevAttrConcurrentManagedAccess: cudaDeviceAttr = cudaDeviceAttr(89); +} +impl cudaDeviceAttr { + #[doc = "< Device supports Compute Preemption"] + pub const cudaDevAttrComputePreemptionSupported: cudaDeviceAttr = cudaDeviceAttr(90); +} +impl cudaDeviceAttr { + #[doc = "< Device can access host registered memory at the same virtual address as the CPU"] + pub const cudaDevAttrCanUseHostPointerForRegisteredMem: cudaDeviceAttr = cudaDeviceAttr(91); +} +impl cudaDeviceAttr { + pub const cudaDevAttrReserved92: cudaDeviceAttr = cudaDeviceAttr(92); +} +impl cudaDeviceAttr { + pub const cudaDevAttrReserved93: cudaDeviceAttr = cudaDeviceAttr(93); +} +impl cudaDeviceAttr { + pub const cudaDevAttrReserved94: cudaDeviceAttr = cudaDeviceAttr(94); +} +impl cudaDeviceAttr { + #[doc = "< Device supports launching cooperative kernels via ::cudaLaunchCooperativeKernel"] + pub const cudaDevAttrCooperativeLaunch: cudaDeviceAttr = cudaDeviceAttr(95); +} +impl cudaDeviceAttr { + #[doc = "< Deprecated, cudaLaunchCooperativeKernelMultiDevice is deprecated."] + pub const cudaDevAttrCooperativeMultiDeviceLaunch: cudaDeviceAttr = cudaDeviceAttr(96); +} +impl cudaDeviceAttr { + #[doc = "< The maximum optin shared memory per block. This value may vary by chip. See ::cudaFuncSetAttribute"] + pub const cudaDevAttrMaxSharedMemoryPerBlockOptin: cudaDeviceAttr = cudaDeviceAttr(97); +} +impl cudaDeviceAttr { + #[doc = "< Device supports flushing of outstanding remote writes."] + pub const cudaDevAttrCanFlushRemoteWrites: cudaDeviceAttr = cudaDeviceAttr(98); +} +impl cudaDeviceAttr { + #[doc = "< Device supports host memory registration via ::cudaHostRegister."] + pub const cudaDevAttrHostRegisterSupported: cudaDeviceAttr = cudaDeviceAttr(99); +} +impl cudaDeviceAttr { + #[doc = "< Device accesses pageable memory via the host's page tables."] + pub const cudaDevAttrPageableMemoryAccessUsesHostPageTables: cudaDeviceAttr = + cudaDeviceAttr(100); +} +impl cudaDeviceAttr { + #[doc = "< Host can directly access managed memory on the device without migration."] + pub const cudaDevAttrDirectManagedMemAccessFromHost: cudaDeviceAttr = cudaDeviceAttr(101); +} +impl cudaDeviceAttr { + #[doc = "< Maximum number of blocks per multiprocessor"] + pub const cudaDevAttrMaxBlocksPerMultiprocessor: cudaDeviceAttr = cudaDeviceAttr(106); +} +impl cudaDeviceAttr { + #[doc = "< Maximum L2 persisting lines capacity setting in bytes."] + pub const cudaDevAttrMaxPersistingL2CacheSize: cudaDeviceAttr = cudaDeviceAttr(108); +} +impl cudaDeviceAttr { + #[doc = "< Maximum value of cudaAccessPolicyWindow::num_bytes."] + pub const cudaDevAttrMaxAccessPolicyWindowSize: cudaDeviceAttr = cudaDeviceAttr(109); +} +impl cudaDeviceAttr { + #[doc = "< Shared memory reserved by CUDA driver per block in bytes"] + pub const cudaDevAttrReservedSharedMemoryPerBlock: cudaDeviceAttr = cudaDeviceAttr(111); +} +impl cudaDeviceAttr { + #[doc = "< Device supports sparse CUDA arrays and sparse CUDA mipmapped arrays"] + pub const cudaDevAttrSparseCudaArraySupported: cudaDeviceAttr = cudaDeviceAttr(112); +} +impl cudaDeviceAttr { + #[doc = "< Device supports using the ::cudaHostRegister flag cudaHostRegisterReadOnly to register memory that must be mapped as read-only to the GPU"] + pub const cudaDevAttrHostRegisterReadOnlySupported: cudaDeviceAttr = cudaDeviceAttr(113); +} +impl cudaDeviceAttr { + #[doc = "< External timeline semaphore interop is supported on the device"] + pub const cudaDevAttrTimelineSemaphoreInteropSupported: cudaDeviceAttr = cudaDeviceAttr(114); +} +impl cudaDeviceAttr { + #[doc = "< Deprecated, External timeline semaphore interop is supported on the device"] + pub const cudaDevAttrMaxTimelineSemaphoreInteropSupported: cudaDeviceAttr = cudaDeviceAttr(114); +} +impl cudaDeviceAttr { + #[doc = "< Device supports using the ::cudaMallocAsync and ::cudaMemPool family of APIs"] + pub const cudaDevAttrMemoryPoolsSupported: cudaDeviceAttr = cudaDeviceAttr(115); +} +impl cudaDeviceAttr { + #[doc = "< Device supports GPUDirect RDMA APIs, like nvidia_p2p_get_pages (see https://docs.nvidia.com/cuda/gpudirect-rdma for more information)"] + pub const cudaDevAttrGPUDirectRDMASupported: cudaDeviceAttr = cudaDeviceAttr(116); +} +impl cudaDeviceAttr { + #[doc = "< The returned attribute shall be interpreted as a bitmask, where the individual bits are listed in the ::cudaFlushGPUDirectRDMAWritesOptions enum"] + pub const cudaDevAttrGPUDirectRDMAFlushWritesOptions: cudaDeviceAttr = cudaDeviceAttr(117); +} +impl cudaDeviceAttr { + #[doc = "< GPUDirect RDMA writes to the device do not need to be flushed for consumers within the scope indicated by the returned attribute. See ::cudaGPUDirectRDMAWritesOrdering for the numerical values returned here."] + pub const cudaDevAttrGPUDirectRDMAWritesOrdering: cudaDeviceAttr = cudaDeviceAttr(118); +} +impl cudaDeviceAttr { + #[doc = "< Handle types supported with mempool based IPC"] + pub const cudaDevAttrMemoryPoolSupportedHandleTypes: cudaDeviceAttr = cudaDeviceAttr(119); +} +impl cudaDeviceAttr { + #[doc = "< Indicates device supports cluster launch"] + pub const cudaDevAttrClusterLaunch: cudaDeviceAttr = cudaDeviceAttr(120); +} +impl cudaDeviceAttr { + #[doc = "< Device supports deferred mapping CUDA arrays and CUDA mipmapped arrays"] + pub const cudaDevAttrDeferredMappingCudaArraySupported: cudaDeviceAttr = cudaDeviceAttr(121); +} +impl cudaDeviceAttr { + pub const cudaDevAttrMax: cudaDeviceAttr = cudaDeviceAttr(122); +} +#[repr(transparent)] +#[doc = " CUDA device attributes"] +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct cudaDeviceAttr(pub ::std::os::raw::c_uint); +impl cudaMemPoolAttr { + #[doc = " (value type = int)\n Allow cuMemAllocAsync to use memory asynchronously freed\n in another streams as long as a stream ordering dependency\n of the allocating stream on the free action exists.\n Cuda events and null stream interactions can create the required\n stream ordered dependencies. (default enabled)"] + pub const cudaMemPoolReuseFollowEventDependencies: cudaMemPoolAttr = cudaMemPoolAttr(1); +} +impl cudaMemPoolAttr { + #[doc = " (value type = int)\n Allow reuse of already completed frees when there is no dependency\n between the free and allocation. (default enabled)"] + pub const cudaMemPoolReuseAllowOpportunistic: cudaMemPoolAttr = cudaMemPoolAttr(2); +} +impl cudaMemPoolAttr { + #[doc = " (value type = int)\n Allow cuMemAllocAsync to insert new stream dependencies\n in order to establish the stream ordering required to reuse\n a piece of memory released by cuFreeAsync (default enabled)."] + pub const cudaMemPoolReuseAllowInternalDependencies: cudaMemPoolAttr = cudaMemPoolAttr(3); +} +impl cudaMemPoolAttr { + #[doc = " (value type = cuuint64_t)\n Amount of reserved memory in bytes to hold onto before trying\n to release memory back to the OS. When more than the release\n threshold bytes of memory are held by the memory pool, the\n allocator will try to release memory back to the OS on the\n next call to stream, event or context synchronize. (default 0)"] + pub const cudaMemPoolAttrReleaseThreshold: cudaMemPoolAttr = cudaMemPoolAttr(4); +} +impl cudaMemPoolAttr { + #[doc = " (value type = cuuint64_t)\n Amount of backing memory currently allocated for the mempool."] + pub const cudaMemPoolAttrReservedMemCurrent: cudaMemPoolAttr = cudaMemPoolAttr(5); +} +impl cudaMemPoolAttr { + #[doc = " (value type = cuuint64_t)\n High watermark of backing memory allocated for the mempool since the\n last time it was reset. High watermark can only be reset to zero."] + pub const cudaMemPoolAttrReservedMemHigh: cudaMemPoolAttr = cudaMemPoolAttr(6); +} +impl cudaMemPoolAttr { + #[doc = " (value type = cuuint64_t)\n Amount of memory from the pool that is currently in use by the application."] + pub const cudaMemPoolAttrUsedMemCurrent: cudaMemPoolAttr = cudaMemPoolAttr(7); +} +impl cudaMemPoolAttr { + #[doc = " (value type = cuuint64_t)\n High watermark of the amount of memory from the pool that was in use by the application since\n the last time it was reset. High watermark can only be reset to zero."] + pub const cudaMemPoolAttrUsedMemHigh: cudaMemPoolAttr = cudaMemPoolAttr(8); +} +#[repr(transparent)] +#[doc = " CUDA memory pool attributes"] +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct cudaMemPoolAttr(pub ::std::os::raw::c_uint); +impl cudaMemLocationType { + pub const cudaMemLocationTypeInvalid: cudaMemLocationType = cudaMemLocationType(0); +} +impl cudaMemLocationType { + #[doc = "< Location is a device location, thus id is a device ordinal"] + pub const cudaMemLocationTypeDevice: cudaMemLocationType = cudaMemLocationType(1); +} +#[repr(transparent)] +#[doc = " Specifies the type of location"] +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct cudaMemLocationType(pub ::std::os::raw::c_uint); +#[doc = " Specifies a memory location.\n\n To specify a gpu, set type = ::cudaMemLocationTypeDevice and set id = the gpu's device ordinal."] +#[repr(C)] +#[derive(Copy, Clone)] +pub struct cudaMemLocation { + #[doc = "< Specifies the location type, which modifies the meaning of id."] + pub type_: cudaMemLocationType, + #[doc = "< identifier for a given this location's ::CUmemLocationType."] + pub id: ::std::os::raw::c_int, +} +impl cudaMemAccessFlags { + #[doc = "< Default, make the address range not accessible"] + pub const cudaMemAccessFlagsProtNone: cudaMemAccessFlags = cudaMemAccessFlags(0); +} +impl cudaMemAccessFlags { + #[doc = "< Make the address range read accessible"] + pub const cudaMemAccessFlagsProtRead: cudaMemAccessFlags = cudaMemAccessFlags(1); +} +impl cudaMemAccessFlags { + #[doc = "< Make the address range read-write accessible"] + pub const cudaMemAccessFlagsProtReadWrite: cudaMemAccessFlags = cudaMemAccessFlags(3); +} +#[repr(transparent)] +#[doc = " Specifies the memory protection flags for mapping."] +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct cudaMemAccessFlags(pub ::std::os::raw::c_uint); +#[doc = " Memory access descriptor"] +#[repr(C)] +#[derive(Copy, Clone)] +pub struct cudaMemAccessDesc { + #[doc = "< Location on which the request is to change it's accessibility"] + pub location: cudaMemLocation, + #[doc = "< ::CUmemProt accessibility flags to set on the request"] + pub flags: cudaMemAccessFlags, +} +impl cudaMemAllocationType { + pub const cudaMemAllocationTypeInvalid: cudaMemAllocationType = cudaMemAllocationType(0); +} +impl cudaMemAllocationType { + #[doc = " This allocation type is 'pinned', i.e. cannot migrate from its current\n location while the application is actively using it"] + pub const cudaMemAllocationTypePinned: cudaMemAllocationType = cudaMemAllocationType(1); +} +impl cudaMemAllocationType { + #[doc = " This allocation type is 'pinned', i.e. cannot migrate from its current\n location while the application is actively using it"] + pub const cudaMemAllocationTypeMax: cudaMemAllocationType = cudaMemAllocationType(2147483647); +} +#[repr(transparent)] +#[doc = " Defines the allocation types available"] +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct cudaMemAllocationType(pub ::std::os::raw::c_uint); +impl cudaMemAllocationHandleType { + #[doc = "< Does not allow any export mechanism. >"] + pub const cudaMemHandleTypeNone: cudaMemAllocationHandleType = cudaMemAllocationHandleType(0); +} +impl cudaMemAllocationHandleType { + #[doc = "< Allows a file descriptor to be used for exporting. Permitted only on POSIX systems. (int)"] + pub const cudaMemHandleTypePosixFileDescriptor: cudaMemAllocationHandleType = + cudaMemAllocationHandleType(1); +} +impl cudaMemAllocationHandleType { + #[doc = "< Allows a Win32 NT handle to be used for exporting. (HANDLE)"] + pub const cudaMemHandleTypeWin32: cudaMemAllocationHandleType = cudaMemAllocationHandleType(2); +} +impl cudaMemAllocationHandleType { + #[doc = "< Allows a Win32 KMT handle to be used for exporting. (D3DKMT_HANDLE)"] + pub const cudaMemHandleTypeWin32Kmt: cudaMemAllocationHandleType = + cudaMemAllocationHandleType(4); +} +#[repr(transparent)] +#[doc = " Flags for specifying particular handle types"] +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct cudaMemAllocationHandleType(pub ::std::os::raw::c_uint); +#[doc = " Specifies the properties of allocations made from the pool."] +#[repr(C)] +#[derive(Copy, Clone)] +pub struct cudaMemPoolProps { + #[doc = "< Allocation type. Currently must be specified as cudaMemAllocationTypePinned"] + pub allocType: cudaMemAllocationType, + #[doc = "< Handle types that will be supported by allocations from the pool."] + pub handleTypes: cudaMemAllocationHandleType, + #[doc = "< Location allocations should reside."] + pub location: cudaMemLocation, + #[doc = " Windows-specific LPSECURITYATTRIBUTES required when\n ::cudaMemHandleTypeWin32 is specified. This security attribute defines\n the scope of which exported allocations may be tranferred to other\n processes. In all other cases, this field is required to be zero."] + pub win32SecurityAttributes: *mut ::std::os::raw::c_void, + #[doc = "< reserved for future use, must be 0"] + pub reserved: [::std::os::raw::c_uchar; 64usize], +} +#[doc = " Opaque data for exporting a pool allocation"] +#[repr(C)] +#[derive(Copy, Clone)] +pub struct cudaMemPoolPtrExportData { + pub reserved: [::std::os::raw::c_uchar; 64usize], +} +#[doc = " Memory allocation node parameters"] +#[repr(C)] +#[derive(Copy, Clone)] +pub struct cudaMemAllocNodeParams { + #[doc = "< in: array of memory access descriptors. Used to describe peer GPU access"] + pub poolProps: cudaMemPoolProps, + #[doc = "< in: number of memory access descriptors. Must not exceed the number of GPUs."] + pub accessDescs: *const cudaMemAccessDesc, + #[doc = "< in: Number of `accessDescs`s"] + pub accessDescCount: usize, + #[doc = "< in: size in bytes of the requested allocation"] + pub bytesize: usize, + #[doc = "< out: address of the allocation returned by CUDA"] + pub dptr: *mut ::std::os::raw::c_void, +} +impl cudaGraphMemAttributeType { + #[doc = " (value type = cuuint64_t)\n Amount of memory, in bytes, currently associated with graphs."] + pub const cudaGraphMemAttrUsedMemCurrent: cudaGraphMemAttributeType = + cudaGraphMemAttributeType(0); +} +impl cudaGraphMemAttributeType { + #[doc = " (value type = cuuint64_t)\n High watermark of memory, in bytes, associated with graphs since the\n last time it was reset. High watermark can only be reset to zero."] + pub const cudaGraphMemAttrUsedMemHigh: cudaGraphMemAttributeType = cudaGraphMemAttributeType(1); +} +impl cudaGraphMemAttributeType { + #[doc = " (value type = cuuint64_t)\n Amount of memory, in bytes, currently allocated for use by\n the CUDA graphs asynchronous allocator."] + pub const cudaGraphMemAttrReservedMemCurrent: cudaGraphMemAttributeType = + cudaGraphMemAttributeType(2); +} +impl cudaGraphMemAttributeType { + #[doc = " (value type = cuuint64_t)\n High watermark of memory, in bytes, currently allocated for use by\n the CUDA graphs asynchronous allocator."] + pub const cudaGraphMemAttrReservedMemHigh: cudaGraphMemAttributeType = + cudaGraphMemAttributeType(3); +} +#[repr(transparent)] +#[doc = " Graph memory attributes"] +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct cudaGraphMemAttributeType(pub ::std::os::raw::c_uint); +impl cudaDeviceP2PAttr { + #[doc = "< A relative value indicating the performance of the link between two devices"] + pub const cudaDevP2PAttrPerformanceRank: cudaDeviceP2PAttr = cudaDeviceP2PAttr(1); +} +impl cudaDeviceP2PAttr { + #[doc = "< Peer access is enabled"] + pub const cudaDevP2PAttrAccessSupported: cudaDeviceP2PAttr = cudaDeviceP2PAttr(2); +} +impl cudaDeviceP2PAttr { + #[doc = "< Native atomic operation over the link supported"] + pub const cudaDevP2PAttrNativeAtomicSupported: cudaDeviceP2PAttr = cudaDeviceP2PAttr(3); +} +impl cudaDeviceP2PAttr { + #[doc = "< Accessing CUDA arrays over the link supported"] + pub const cudaDevP2PAttrCudaArrayAccessSupported: cudaDeviceP2PAttr = cudaDeviceP2PAttr(4); +} +#[repr(transparent)] +#[doc = " CUDA device P2P attributes"] +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct cudaDeviceP2PAttr(pub ::std::os::raw::c_uint); +#[repr(C)] +#[derive(Copy, Clone)] +pub struct CUuuid_st { + pub bytes: [::std::os::raw::c_char; 16usize], +} +pub type cudaUUID_t = CUuuid_st; +#[doc = " CUDA device properties"] +#[repr(C)] +#[derive(Copy, Clone)] +pub struct cudaDeviceProp { + #[doc = "< ASCII string identifying device"] + pub name: [::std::os::raw::c_char; 256usize], + #[doc = "< 16-byte unique identifier"] + pub uuid: cudaUUID_t, + #[doc = "< 8-byte locally unique identifier. Value is undefined on TCC and non-Windows platforms"] + pub luid: [::std::os::raw::c_char; 8usize], + #[doc = "< LUID device node mask. Value is undefined on TCC and non-Windows platforms"] + pub luidDeviceNodeMask: ::std::os::raw::c_uint, + #[doc = "< Global memory available on device in bytes"] + pub totalGlobalMem: usize, + #[doc = "< Shared memory available per block in bytes"] + pub sharedMemPerBlock: usize, + #[doc = "< 32-bit registers available per block"] + pub regsPerBlock: ::std::os::raw::c_int, + #[doc = "< Warp size in threads"] + pub warpSize: ::std::os::raw::c_int, + #[doc = "< Maximum pitch in bytes allowed by memory copies"] + pub memPitch: usize, + #[doc = "< Maximum number of threads per block"] + pub maxThreadsPerBlock: ::std::os::raw::c_int, + #[doc = "< Maximum size of each dimension of a block"] + pub maxThreadsDim: [::std::os::raw::c_int; 3usize], + #[doc = "< Maximum size of each dimension of a grid"] + pub maxGridSize: [::std::os::raw::c_int; 3usize], + #[doc = "< Clock frequency in kilohertz"] + pub clockRate: ::std::os::raw::c_int, + #[doc = "< Constant memory available on device in bytes"] + pub totalConstMem: usize, + #[doc = "< Major compute capability"] + pub major: ::std::os::raw::c_int, + #[doc = "< Minor compute capability"] + pub minor: ::std::os::raw::c_int, + #[doc = "< Alignment requirement for textures"] + pub textureAlignment: usize, + #[doc = "< Pitch alignment requirement for texture references bound to pitched memory"] + pub texturePitchAlignment: usize, + #[doc = "< Device can concurrently copy memory and execute a kernel. Deprecated. Use instead asyncEngineCount."] + pub deviceOverlap: ::std::os::raw::c_int, + #[doc = "< Number of multiprocessors on device"] + pub multiProcessorCount: ::std::os::raw::c_int, + #[doc = "< Specified whether there is a run time limit on kernels"] + pub kernelExecTimeoutEnabled: ::std::os::raw::c_int, + #[doc = "< Device is integrated as opposed to discrete"] + pub integrated: ::std::os::raw::c_int, + #[doc = "< Device can map host memory with cudaHostAlloc/cudaHostGetDevicePointer"] + pub canMapHostMemory: ::std::os::raw::c_int, + #[doc = "< Compute mode (See ::cudaComputeMode)"] + pub computeMode: ::std::os::raw::c_int, + #[doc = "< Maximum 1D texture size"] + pub maxTexture1D: ::std::os::raw::c_int, + #[doc = "< Maximum 1D mipmapped texture size"] + pub maxTexture1DMipmap: ::std::os::raw::c_int, + #[doc = "< Deprecated, do not use. Use cudaDeviceGetTexture1DLinearMaxWidth() or cuDeviceGetTexture1DLinearMaxWidth() instead."] + pub maxTexture1DLinear: ::std::os::raw::c_int, + #[doc = "< Maximum 2D texture dimensions"] + pub maxTexture2D: [::std::os::raw::c_int; 2usize], + #[doc = "< Maximum 2D mipmapped texture dimensions"] + pub maxTexture2DMipmap: [::std::os::raw::c_int; 2usize], + #[doc = "< Maximum dimensions (width, height, pitch) for 2D textures bound to pitched memory"] + pub maxTexture2DLinear: [::std::os::raw::c_int; 3usize], + #[doc = "< Maximum 2D texture dimensions if texture gather operations have to be performed"] + pub maxTexture2DGather: [::std::os::raw::c_int; 2usize], + #[doc = "< Maximum 3D texture dimensions"] + pub maxTexture3D: [::std::os::raw::c_int; 3usize], + #[doc = "< Maximum alternate 3D texture dimensions"] + pub maxTexture3DAlt: [::std::os::raw::c_int; 3usize], + #[doc = "< Maximum Cubemap texture dimensions"] + pub maxTextureCubemap: ::std::os::raw::c_int, + #[doc = "< Maximum 1D layered texture dimensions"] + pub maxTexture1DLayered: [::std::os::raw::c_int; 2usize], + #[doc = "< Maximum 2D layered texture dimensions"] + pub maxTexture2DLayered: [::std::os::raw::c_int; 3usize], + #[doc = "< Maximum Cubemap layered texture dimensions"] + pub maxTextureCubemapLayered: [::std::os::raw::c_int; 2usize], + #[doc = "< Maximum 1D surface size"] + pub maxSurface1D: ::std::os::raw::c_int, + #[doc = "< Maximum 2D surface dimensions"] + pub maxSurface2D: [::std::os::raw::c_int; 2usize], + #[doc = "< Maximum 3D surface dimensions"] + pub maxSurface3D: [::std::os::raw::c_int; 3usize], + #[doc = "< Maximum 1D layered surface dimensions"] + pub maxSurface1DLayered: [::std::os::raw::c_int; 2usize], + #[doc = "< Maximum 2D layered surface dimensions"] + pub maxSurface2DLayered: [::std::os::raw::c_int; 3usize], + #[doc = "< Maximum Cubemap surface dimensions"] + pub maxSurfaceCubemap: ::std::os::raw::c_int, + #[doc = "< Maximum Cubemap layered surface dimensions"] + pub maxSurfaceCubemapLayered: [::std::os::raw::c_int; 2usize], + #[doc = "< Alignment requirements for surfaces"] + pub surfaceAlignment: usize, + #[doc = "< Device can possibly execute multiple kernels concurrently"] + pub concurrentKernels: ::std::os::raw::c_int, + #[doc = "< Device has ECC support enabled"] + pub ECCEnabled: ::std::os::raw::c_int, + #[doc = "< PCI bus ID of the device"] + pub pciBusID: ::std::os::raw::c_int, + #[doc = "< PCI device ID of the device"] + pub pciDeviceID: ::std::os::raw::c_int, + #[doc = "< PCI domain ID of the device"] + pub pciDomainID: ::std::os::raw::c_int, + #[doc = "< 1 if device is a Tesla device using TCC driver, 0 otherwise"] + pub tccDriver: ::std::os::raw::c_int, + #[doc = "< Number of asynchronous engines"] + pub asyncEngineCount: ::std::os::raw::c_int, + #[doc = "< Device shares a unified address space with the host"] + pub unifiedAddressing: ::std::os::raw::c_int, + #[doc = "< Peak memory clock frequency in kilohertz"] + pub memoryClockRate: ::std::os::raw::c_int, + #[doc = "< Global memory bus width in bits"] + pub memoryBusWidth: ::std::os::raw::c_int, + #[doc = "< Size of L2 cache in bytes"] + pub l2CacheSize: ::std::os::raw::c_int, + #[doc = "< Device's maximum l2 persisting lines capacity setting in bytes"] + pub persistingL2CacheMaxSize: ::std::os::raw::c_int, + #[doc = "< Maximum resident threads per multiprocessor"] + pub maxThreadsPerMultiProcessor: ::std::os::raw::c_int, + #[doc = "< Device supports stream priorities"] + pub streamPrioritiesSupported: ::std::os::raw::c_int, + #[doc = "< Device supports caching globals in L1"] + pub globalL1CacheSupported: ::std::os::raw::c_int, + #[doc = "< Device supports caching locals in L1"] + pub localL1CacheSupported: ::std::os::raw::c_int, + #[doc = "< Shared memory available per multiprocessor in bytes"] + pub sharedMemPerMultiprocessor: usize, + #[doc = "< 32-bit registers available per multiprocessor"] + pub regsPerMultiprocessor: ::std::os::raw::c_int, + #[doc = "< Device supports allocating managed memory on this system"] + pub managedMemory: ::std::os::raw::c_int, + #[doc = "< Device is on a multi-GPU board"] + pub isMultiGpuBoard: ::std::os::raw::c_int, + #[doc = "< Unique identifier for a group of devices on the same multi-GPU board"] + pub multiGpuBoardGroupID: ::std::os::raw::c_int, + #[doc = "< Link between the device and the host supports native atomic operations"] + pub hostNativeAtomicSupported: ::std::os::raw::c_int, + #[doc = "< Ratio of single precision performance (in floating-point operations per second) to double precision performance"] + pub singleToDoublePrecisionPerfRatio: ::std::os::raw::c_int, + #[doc = "< Device supports coherently accessing pageable memory without calling cudaHostRegister on it"] + pub pageableMemoryAccess: ::std::os::raw::c_int, + #[doc = "< Device can coherently access managed memory concurrently with the CPU"] + pub concurrentManagedAccess: ::std::os::raw::c_int, + #[doc = "< Device supports Compute Preemption"] + pub computePreemptionSupported: ::std::os::raw::c_int, + #[doc = "< Device can access host registered memory at the same virtual address as the CPU"] + pub canUseHostPointerForRegisteredMem: ::std::os::raw::c_int, + #[doc = "< Device supports launching cooperative kernels via ::cudaLaunchCooperativeKernel"] + pub cooperativeLaunch: ::std::os::raw::c_int, + #[doc = "< Deprecated, cudaLaunchCooperativeKernelMultiDevice is deprecated."] + pub cooperativeMultiDeviceLaunch: ::std::os::raw::c_int, + #[doc = "< Per device maximum shared memory per block usable by special opt in"] + pub sharedMemPerBlockOptin: usize, + #[doc = "< Device accesses pageable memory via the host's page tables"] + pub pageableMemoryAccessUsesHostPageTables: ::std::os::raw::c_int, + #[doc = "< Host can directly access managed memory on the device without migration."] + pub directManagedMemAccessFromHost: ::std::os::raw::c_int, + #[doc = "< Maximum number of resident blocks per multiprocessor"] + pub maxBlocksPerMultiProcessor: ::std::os::raw::c_int, + #[doc = "< The maximum value of ::cudaAccessPolicyWindow::num_bytes."] + pub accessPolicyMaxWindowSize: ::std::os::raw::c_int, + #[doc = "< Shared memory reserved by CUDA driver per block in bytes"] + pub reservedSharedMemPerBlock: usize, +} +#[doc = " CUDA IPC event handle"] +#[repr(C)] +#[derive(Copy, Clone)] +pub struct cudaIpcEventHandle_st { + pub reserved: [::std::os::raw::c_char; 64usize], +} +#[doc = " CUDA IPC event handle"] +pub type cudaIpcEventHandle_t = cudaIpcEventHandle_st; +#[doc = " CUDA IPC memory handle"] +#[repr(C)] +#[derive(Copy, Clone)] +pub struct cudaIpcMemHandle_st { + pub reserved: [::std::os::raw::c_char; 64usize], +} +#[doc = " CUDA IPC memory handle"] +pub type cudaIpcMemHandle_t = cudaIpcMemHandle_st; +impl cudaExternalMemoryHandleType { + #[doc = " Handle is an opaque file descriptor"] + pub const cudaExternalMemoryHandleTypeOpaqueFd: cudaExternalMemoryHandleType = + cudaExternalMemoryHandleType(1); +} +impl cudaExternalMemoryHandleType { + #[doc = " Handle is an opaque shared NT handle"] + pub const cudaExternalMemoryHandleTypeOpaqueWin32: cudaExternalMemoryHandleType = + cudaExternalMemoryHandleType(2); +} +impl cudaExternalMemoryHandleType { + #[doc = " Handle is an opaque, globally shared handle"] + pub const cudaExternalMemoryHandleTypeOpaqueWin32Kmt: cudaExternalMemoryHandleType = + cudaExternalMemoryHandleType(3); +} +impl cudaExternalMemoryHandleType { + #[doc = " Handle is a D3D12 heap object"] + pub const cudaExternalMemoryHandleTypeD3D12Heap: cudaExternalMemoryHandleType = + cudaExternalMemoryHandleType(4); +} +impl cudaExternalMemoryHandleType { + #[doc = " Handle is a D3D12 committed resource"] + pub const cudaExternalMemoryHandleTypeD3D12Resource: cudaExternalMemoryHandleType = + cudaExternalMemoryHandleType(5); +} +impl cudaExternalMemoryHandleType { + #[doc = " Handle is a shared NT handle to a D3D11 resource"] + pub const cudaExternalMemoryHandleTypeD3D11Resource: cudaExternalMemoryHandleType = + cudaExternalMemoryHandleType(6); +} +impl cudaExternalMemoryHandleType { + #[doc = " Handle is a globally shared handle to a D3D11 resource"] + pub const cudaExternalMemoryHandleTypeD3D11ResourceKmt: cudaExternalMemoryHandleType = + cudaExternalMemoryHandleType(7); +} +impl cudaExternalMemoryHandleType { + #[doc = " Handle is an NvSciBuf object"] + pub const cudaExternalMemoryHandleTypeNvSciBuf: cudaExternalMemoryHandleType = + cudaExternalMemoryHandleType(8); +} +#[repr(transparent)] +#[doc = " External memory handle types"] +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct cudaExternalMemoryHandleType(pub ::std::os::raw::c_uint); +#[doc = " External memory handle descriptor"] +#[repr(C)] +#[derive(Copy, Clone)] +pub struct cudaExternalMemoryHandleDesc { + #[doc = " Type of the handle"] + pub type_: cudaExternalMemoryHandleType, + pub handle: cudaExternalMemoryHandleDesc__bindgen_ty_1, + #[doc = " Size of the memory allocation"] + pub size: ::std::os::raw::c_ulonglong, + #[doc = " Flags must either be zero or ::cudaExternalMemoryDedicated"] + pub flags: ::std::os::raw::c_uint, +} +#[repr(C)] +#[derive(Copy, Clone)] +pub union cudaExternalMemoryHandleDesc__bindgen_ty_1 { + #[doc = " File descriptor referencing the memory object. Valid\n when type is\n ::cudaExternalMemoryHandleTypeOpaqueFd"] + pub fd: ::std::os::raw::c_int, + pub win32: cudaExternalMemoryHandleDesc__bindgen_ty_1__bindgen_ty_1, + #[doc = " A handle representing NvSciBuf Object. Valid when type\n is ::cudaExternalMemoryHandleTypeNvSciBuf"] + pub nvSciBufObject: *const ::std::os::raw::c_void, +} +#[doc = " Win32 handle referencing the semaphore object. Valid when\n type is one of the following:\n - ::cudaExternalMemoryHandleTypeOpaqueWin32\n - ::cudaExternalMemoryHandleTypeOpaqueWin32Kmt\n - ::cudaExternalMemoryHandleTypeD3D12Heap\n - ::cudaExternalMemoryHandleTypeD3D12Resource\n - ::cudaExternalMemoryHandleTypeD3D11Resource\n - ::cudaExternalMemoryHandleTypeD3D11ResourceKmt\n Exactly one of 'handle' and 'name' must be non-NULL. If\n type is one of the following:\n ::cudaExternalMemoryHandleTypeOpaqueWin32Kmt\n ::cudaExternalMemoryHandleTypeD3D11ResourceKmt\n then 'name' must be NULL."] +#[repr(C)] +#[derive(Copy, Clone)] +pub struct cudaExternalMemoryHandleDesc__bindgen_ty_1__bindgen_ty_1 { + #[doc = " Valid NT handle. Must be NULL if 'name' is non-NULL"] + pub handle: *mut ::std::os::raw::c_void, + #[doc = " Name of a valid memory object.\n Must be NULL if 'handle' is non-NULL."] + pub name: *const ::std::os::raw::c_void, +} +#[doc = " External memory buffer descriptor"] +#[repr(C)] +#[derive(Copy, Clone)] +pub struct cudaExternalMemoryBufferDesc { + #[doc = " Offset into the memory object where the buffer's base is"] + pub offset: ::std::os::raw::c_ulonglong, + #[doc = " Size of the buffer"] + pub size: ::std::os::raw::c_ulonglong, + #[doc = " Flags reserved for future use. Must be zero."] + pub flags: ::std::os::raw::c_uint, +} +#[doc = " External memory mipmap descriptor"] +#[repr(C)] +#[derive(Copy, Clone)] +pub struct cudaExternalMemoryMipmappedArrayDesc { + #[doc = " Offset into the memory object where the base level of the\n mipmap chain is."] + pub offset: ::std::os::raw::c_ulonglong, + #[doc = " Format of base level of the mipmap chain"] + pub formatDesc: cudaChannelFormatDesc, + #[doc = " Dimensions of base level of the mipmap chain"] + pub extent: cudaExtent, + #[doc = " Flags associated with CUDA mipmapped arrays.\n See ::cudaMallocMipmappedArray"] + pub flags: ::std::os::raw::c_uint, + #[doc = " Total number of levels in the mipmap chain"] + pub numLevels: ::std::os::raw::c_uint, +} +impl cudaExternalSemaphoreHandleType { + #[doc = " Handle is an opaque file descriptor"] + pub const cudaExternalSemaphoreHandleTypeOpaqueFd: cudaExternalSemaphoreHandleType = + cudaExternalSemaphoreHandleType(1); +} +impl cudaExternalSemaphoreHandleType { + #[doc = " Handle is an opaque shared NT handle"] + pub const cudaExternalSemaphoreHandleTypeOpaqueWin32: cudaExternalSemaphoreHandleType = + cudaExternalSemaphoreHandleType(2); +} +impl cudaExternalSemaphoreHandleType { + #[doc = " Handle is an opaque, globally shared handle"] + pub const cudaExternalSemaphoreHandleTypeOpaqueWin32Kmt: cudaExternalSemaphoreHandleType = + cudaExternalSemaphoreHandleType(3); +} +impl cudaExternalSemaphoreHandleType { + #[doc = " Handle is a shared NT handle referencing a D3D12 fence object"] + pub const cudaExternalSemaphoreHandleTypeD3D12Fence: cudaExternalSemaphoreHandleType = + cudaExternalSemaphoreHandleType(4); +} +impl cudaExternalSemaphoreHandleType { + #[doc = " Handle is a shared NT handle referencing a D3D11 fence object"] + pub const cudaExternalSemaphoreHandleTypeD3D11Fence: cudaExternalSemaphoreHandleType = + cudaExternalSemaphoreHandleType(5); +} +impl cudaExternalSemaphoreHandleType { + #[doc = " Opaque handle to NvSciSync Object"] + pub const cudaExternalSemaphoreHandleTypeNvSciSync: cudaExternalSemaphoreHandleType = + cudaExternalSemaphoreHandleType(6); +} +impl cudaExternalSemaphoreHandleType { + #[doc = " Handle is a shared NT handle referencing a D3D11 keyed mutex object"] + pub const cudaExternalSemaphoreHandleTypeKeyedMutex: cudaExternalSemaphoreHandleType = + cudaExternalSemaphoreHandleType(7); +} +impl cudaExternalSemaphoreHandleType { + #[doc = " Handle is a shared KMT handle referencing a D3D11 keyed mutex object"] + pub const cudaExternalSemaphoreHandleTypeKeyedMutexKmt: cudaExternalSemaphoreHandleType = + cudaExternalSemaphoreHandleType(8); +} +impl cudaExternalSemaphoreHandleType { + #[doc = " Handle is an opaque handle file descriptor referencing a timeline semaphore"] + pub const cudaExternalSemaphoreHandleTypeTimelineSemaphoreFd: cudaExternalSemaphoreHandleType = + cudaExternalSemaphoreHandleType(9); +} +impl cudaExternalSemaphoreHandleType { + #[doc = " Handle is an opaque handle file descriptor referencing a timeline semaphore"] + pub const cudaExternalSemaphoreHandleTypeTimelineSemaphoreWin32: + cudaExternalSemaphoreHandleType = cudaExternalSemaphoreHandleType(10); +} +#[repr(transparent)] +#[doc = " External semaphore handle types"] +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct cudaExternalSemaphoreHandleType(pub ::std::os::raw::c_uint); +#[doc = " External semaphore handle descriptor"] +#[repr(C)] +#[derive(Copy, Clone)] +pub struct cudaExternalSemaphoreHandleDesc { + #[doc = " Type of the handle"] + pub type_: cudaExternalSemaphoreHandleType, + pub handle: cudaExternalSemaphoreHandleDesc__bindgen_ty_1, + #[doc = " Flags reserved for the future. Must be zero."] + pub flags: ::std::os::raw::c_uint, +} +#[repr(C)] +#[derive(Copy, Clone)] +pub union cudaExternalSemaphoreHandleDesc__bindgen_ty_1 { + #[doc = " File descriptor referencing the semaphore object. Valid when\n type is one of the following:\n - ::cudaExternalSemaphoreHandleTypeOpaqueFd\n - ::cudaExternalSemaphoreHandleTypeTimelineSemaphoreFd"] + pub fd: ::std::os::raw::c_int, + pub win32: cudaExternalSemaphoreHandleDesc__bindgen_ty_1__bindgen_ty_1, + #[doc = " Valid NvSciSyncObj. Must be non NULL"] + pub nvSciSyncObj: *const ::std::os::raw::c_void, +} +#[doc = " Win32 handle referencing the semaphore object. Valid when\n type is one of the following:\n - ::cudaExternalSemaphoreHandleTypeOpaqueWin32\n - ::cudaExternalSemaphoreHandleTypeOpaqueWin32Kmt\n - ::cudaExternalSemaphoreHandleTypeD3D12Fence\n - ::cudaExternalSemaphoreHandleTypeD3D11Fence\n - ::cudaExternalSemaphoreHandleTypeKeyedMutex\n - ::cudaExternalSemaphoreHandleTypeTimelineSemaphoreWin32\n Exactly one of 'handle' and 'name' must be non-NULL. If\n type is one of the following:\n ::cudaExternalSemaphoreHandleTypeOpaqueWin32Kmt\n ::cudaExternalSemaphoreHandleTypeKeyedMutexKmt\n then 'name' must be NULL."] +#[repr(C)] +#[derive(Copy, Clone)] +pub struct cudaExternalSemaphoreHandleDesc__bindgen_ty_1__bindgen_ty_1 { + #[doc = " Valid NT handle. Must be NULL if 'name' is non-NULL"] + pub handle: *mut ::std::os::raw::c_void, + #[doc = " Name of a valid synchronization primitive.\n Must be NULL if 'handle' is non-NULL."] + pub name: *const ::std::os::raw::c_void, +} +#[doc = " External semaphore signal parameters(deprecated)"] +#[repr(C)] +#[derive(Copy, Clone)] +pub struct cudaExternalSemaphoreSignalParams_v1 { + pub params: cudaExternalSemaphoreSignalParams_v1__bindgen_ty_1, + #[doc = " Only when ::cudaExternalSemaphoreSignalParams is used to\n signal a ::cudaExternalSemaphore_t of type\n ::cudaExternalSemaphoreHandleTypeNvSciSync, the valid flag is\n ::cudaExternalSemaphoreSignalSkipNvSciBufMemSync: which indicates\n that while signaling the ::cudaExternalSemaphore_t, no memory\n synchronization operations should be performed for any external memory\n object imported as ::cudaExternalMemoryHandleTypeNvSciBuf.\n For all other types of ::cudaExternalSemaphore_t, flags must be zero."] + pub flags: ::std::os::raw::c_uint, +} +#[repr(C)] +#[derive(Copy, Clone)] +pub struct cudaExternalSemaphoreSignalParams_v1__bindgen_ty_1 { + pub fence: cudaExternalSemaphoreSignalParams_v1__bindgen_ty_1__bindgen_ty_1, + pub nvSciSync: cudaExternalSemaphoreSignalParams_v1__bindgen_ty_1__bindgen_ty_2, + pub keyedMutex: cudaExternalSemaphoreSignalParams_v1__bindgen_ty_1__bindgen_ty_3, +} +#[doc = " Parameters for fence objects"] +#[repr(C)] +#[derive(Copy, Clone)] +pub struct cudaExternalSemaphoreSignalParams_v1__bindgen_ty_1__bindgen_ty_1 { + #[doc = " Value of fence to be signaled"] + pub value: ::std::os::raw::c_ulonglong, +} +#[repr(C)] +#[derive(Copy, Clone)] +pub union cudaExternalSemaphoreSignalParams_v1__bindgen_ty_1__bindgen_ty_2 { + #[doc = " Pointer to NvSciSyncFence. Valid if ::cudaExternalSemaphoreHandleType\n is of type ::cudaExternalSemaphoreHandleTypeNvSciSync."] + pub fence: *mut ::std::os::raw::c_void, + pub reserved: ::std::os::raw::c_ulonglong, +} +#[doc = " Parameters for keyed mutex objects"] +#[repr(C)] +#[derive(Copy, Clone)] +pub struct cudaExternalSemaphoreSignalParams_v1__bindgen_ty_1__bindgen_ty_3 { + pub key: ::std::os::raw::c_ulonglong, +} +#[doc = " External semaphore wait parameters(deprecated)"] +#[repr(C)] +#[derive(Copy, Clone)] +pub struct cudaExternalSemaphoreWaitParams_v1 { + pub params: cudaExternalSemaphoreWaitParams_v1__bindgen_ty_1, + #[doc = " Only when ::cudaExternalSemaphoreSignalParams is used to\n signal a ::cudaExternalSemaphore_t of type\n ::cudaExternalSemaphoreHandleTypeNvSciSync, the valid flag is\n ::cudaExternalSemaphoreSignalSkipNvSciBufMemSync: which indicates\n that while waiting for the ::cudaExternalSemaphore_t, no memory\n synchronization operations should be performed for any external memory\n object imported as ::cudaExternalMemoryHandleTypeNvSciBuf.\n For all other types of ::cudaExternalSemaphore_t, flags must be zero."] + pub flags: ::std::os::raw::c_uint, +} +#[repr(C)] +#[derive(Copy, Clone)] +pub struct cudaExternalSemaphoreWaitParams_v1__bindgen_ty_1 { + pub fence: cudaExternalSemaphoreWaitParams_v1__bindgen_ty_1__bindgen_ty_1, + pub nvSciSync: cudaExternalSemaphoreWaitParams_v1__bindgen_ty_1__bindgen_ty_2, + pub keyedMutex: cudaExternalSemaphoreWaitParams_v1__bindgen_ty_1__bindgen_ty_3, +} +#[doc = " Parameters for fence objects"] +#[repr(C)] +#[derive(Copy, Clone)] +pub struct cudaExternalSemaphoreWaitParams_v1__bindgen_ty_1__bindgen_ty_1 { + #[doc = " Value of fence to be waited on"] + pub value: ::std::os::raw::c_ulonglong, +} +#[repr(C)] +#[derive(Copy, Clone)] +pub union cudaExternalSemaphoreWaitParams_v1__bindgen_ty_1__bindgen_ty_2 { + #[doc = " Pointer to NvSciSyncFence. Valid if ::cudaExternalSemaphoreHandleType\n is of type ::cudaExternalSemaphoreHandleTypeNvSciSync."] + pub fence: *mut ::std::os::raw::c_void, + pub reserved: ::std::os::raw::c_ulonglong, +} +#[doc = " Parameters for keyed mutex objects"] +#[repr(C)] +#[derive(Copy, Clone)] +pub struct cudaExternalSemaphoreWaitParams_v1__bindgen_ty_1__bindgen_ty_3 { + #[doc = " Value of key to acquire the mutex with"] + pub key: ::std::os::raw::c_ulonglong, + #[doc = " Timeout in milliseconds to wait to acquire the mutex"] + pub timeoutMs: ::std::os::raw::c_uint, +} +#[doc = " External semaphore signal parameters, compatible with driver type"] +#[repr(C)] +#[derive(Copy, Clone)] +pub struct cudaExternalSemaphoreSignalParams { + pub params: cudaExternalSemaphoreSignalParams__bindgen_ty_1, + #[doc = " Only when ::cudaExternalSemaphoreSignalParams is used to\n signal a ::cudaExternalSemaphore_t of type\n ::cudaExternalSemaphoreHandleTypeNvSciSync, the valid flag is\n ::cudaExternalSemaphoreSignalSkipNvSciBufMemSync: which indicates\n that while signaling the ::cudaExternalSemaphore_t, no memory\n synchronization operations should be performed for any external memory\n object imported as ::cudaExternalMemoryHandleTypeNvSciBuf.\n For all other types of ::cudaExternalSemaphore_t, flags must be zero."] + pub flags: ::std::os::raw::c_uint, + pub reserved: [::std::os::raw::c_uint; 16usize], +} +#[repr(C)] +#[derive(Copy, Clone)] +pub struct cudaExternalSemaphoreSignalParams__bindgen_ty_1 { + pub fence: cudaExternalSemaphoreSignalParams__bindgen_ty_1__bindgen_ty_1, + pub nvSciSync: cudaExternalSemaphoreSignalParams__bindgen_ty_1__bindgen_ty_2, + pub keyedMutex: cudaExternalSemaphoreSignalParams__bindgen_ty_1__bindgen_ty_3, + pub reserved: [::std::os::raw::c_uint; 12usize], +} +#[doc = " Parameters for fence objects"] +#[repr(C)] +#[derive(Copy, Clone)] +pub struct cudaExternalSemaphoreSignalParams__bindgen_ty_1__bindgen_ty_1 { + #[doc = " Value of fence to be signaled"] + pub value: ::std::os::raw::c_ulonglong, +} +#[repr(C)] +#[derive(Copy, Clone)] +pub union cudaExternalSemaphoreSignalParams__bindgen_ty_1__bindgen_ty_2 { + #[doc = " Pointer to NvSciSyncFence. Valid if ::cudaExternalSemaphoreHandleType\n is of type ::cudaExternalSemaphoreHandleTypeNvSciSync."] + pub fence: *mut ::std::os::raw::c_void, + pub reserved: ::std::os::raw::c_ulonglong, +} +#[doc = " Parameters for keyed mutex objects"] +#[repr(C)] +#[derive(Copy, Clone)] +pub struct cudaExternalSemaphoreSignalParams__bindgen_ty_1__bindgen_ty_3 { + pub key: ::std::os::raw::c_ulonglong, +} +#[doc = " External semaphore wait parameters, compatible with driver type"] +#[repr(C)] +#[derive(Copy, Clone)] +pub struct cudaExternalSemaphoreWaitParams { + pub params: cudaExternalSemaphoreWaitParams__bindgen_ty_1, + #[doc = " Only when ::cudaExternalSemaphoreSignalParams is used to\n signal a ::cudaExternalSemaphore_t of type\n ::cudaExternalSemaphoreHandleTypeNvSciSync, the valid flag is\n ::cudaExternalSemaphoreSignalSkipNvSciBufMemSync: which indicates\n that while waiting for the ::cudaExternalSemaphore_t, no memory\n synchronization operations should be performed for any external memory\n object imported as ::cudaExternalMemoryHandleTypeNvSciBuf.\n For all other types of ::cudaExternalSemaphore_t, flags must be zero."] + pub flags: ::std::os::raw::c_uint, + pub reserved: [::std::os::raw::c_uint; 16usize], +} +#[repr(C)] +#[derive(Copy, Clone)] +pub struct cudaExternalSemaphoreWaitParams__bindgen_ty_1 { + pub fence: cudaExternalSemaphoreWaitParams__bindgen_ty_1__bindgen_ty_1, + pub nvSciSync: cudaExternalSemaphoreWaitParams__bindgen_ty_1__bindgen_ty_2, + pub keyedMutex: cudaExternalSemaphoreWaitParams__bindgen_ty_1__bindgen_ty_3, + pub reserved: [::std::os::raw::c_uint; 10usize], +} +#[doc = " Parameters for fence objects"] +#[repr(C)] +#[derive(Copy, Clone)] +pub struct cudaExternalSemaphoreWaitParams__bindgen_ty_1__bindgen_ty_1 { + #[doc = " Value of fence to be waited on"] + pub value: ::std::os::raw::c_ulonglong, +} +#[repr(C)] +#[derive(Copy, Clone)] +pub union cudaExternalSemaphoreWaitParams__bindgen_ty_1__bindgen_ty_2 { + #[doc = " Pointer to NvSciSyncFence. Valid if ::cudaExternalSemaphoreHandleType\n is of type ::cudaExternalSemaphoreHandleTypeNvSciSync."] + pub fence: *mut ::std::os::raw::c_void, + pub reserved: ::std::os::raw::c_ulonglong, +} +#[doc = " Parameters for keyed mutex objects"] +#[repr(C)] +#[derive(Copy, Clone)] +pub struct cudaExternalSemaphoreWaitParams__bindgen_ty_1__bindgen_ty_3 { + #[doc = " Value of key to acquire the mutex with"] + pub key: ::std::os::raw::c_ulonglong, + #[doc = " Timeout in milliseconds to wait to acquire the mutex"] + pub timeoutMs: ::std::os::raw::c_uint, +} +#[doc = " CUDA Error types"] +pub use self::cudaError as cudaError_t; +#[repr(C)] +#[derive(Copy, Clone)] +pub struct CUstream_st { + _unused: [u8; 0], +} +#[doc = " CUDA stream"] +pub type cudaStream_t = *mut CUstream_st; +#[repr(C)] +#[derive(Copy, Clone)] +pub struct CUevent_st { + _unused: [u8; 0], +} +#[doc = " CUDA event types"] +pub type cudaEvent_t = *mut CUevent_st; +#[doc = " CUDA graphics resource types"] +pub type cudaGraphicsResource_t = *mut cudaGraphicsResource; +#[doc = " CUDA output file modes"] +pub use self::cudaOutputMode as cudaOutputMode_t; +#[repr(C)] +#[derive(Copy, Clone)] +pub struct CUexternalMemory_st { + _unused: [u8; 0], +} +#[doc = " CUDA external memory"] +pub type cudaExternalMemory_t = *mut CUexternalMemory_st; +#[repr(C)] +#[derive(Copy, Clone)] +pub struct CUexternalSemaphore_st { + _unused: [u8; 0], +} +#[doc = " CUDA external semaphore"] +pub type cudaExternalSemaphore_t = *mut CUexternalSemaphore_st; +#[repr(C)] +#[derive(Copy, Clone)] +pub struct CUgraph_st { + _unused: [u8; 0], +} +#[doc = " CUDA graph"] +pub type cudaGraph_t = *mut CUgraph_st; +#[repr(C)] +#[derive(Copy, Clone)] +pub struct CUgraphNode_st { + _unused: [u8; 0], +} +#[doc = " CUDA graph node."] +pub type cudaGraphNode_t = *mut CUgraphNode_st; +#[repr(C)] +#[derive(Copy, Clone)] +pub struct CUuserObject_st { + _unused: [u8; 0], +} +#[doc = " CUDA user object for graphs"] +pub type cudaUserObject_t = *mut CUuserObject_st; +#[repr(C)] +#[derive(Copy, Clone)] +pub struct CUfunc_st { + _unused: [u8; 0], +} +#[doc = " CUDA function"] +pub type cudaFunction_t = *mut CUfunc_st; +#[repr(C)] +#[derive(Copy, Clone)] +pub struct CUmemPoolHandle_st { + _unused: [u8; 0], +} +#[doc = " CUDA memory pool"] +pub type cudaMemPool_t = *mut CUmemPoolHandle_st; +impl cudaCGScope { + #[doc = "< Invalid cooperative group scope"] + pub const cudaCGScopeInvalid: cudaCGScope = cudaCGScope(0); +} +impl cudaCGScope { + #[doc = "< Scope represented by a grid_group"] + pub const cudaCGScopeGrid: cudaCGScope = cudaCGScope(1); +} +impl cudaCGScope { + #[doc = "< Scope represented by a multi_grid_group"] + pub const cudaCGScopeMultiGrid: cudaCGScope = cudaCGScope(2); +} +#[repr(transparent)] +#[doc = " CUDA cooperative group scope"] +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct cudaCGScope(pub ::std::os::raw::c_uint); +#[doc = " CUDA launch parameters"] +#[repr(C)] +#[derive(Copy, Clone)] +pub struct cudaLaunchParams { + #[doc = "< Device function symbol"] + pub func: *mut ::std::os::raw::c_void, + #[doc = "< Grid dimentions"] + pub gridDim: dim3, + #[doc = "< Block dimentions"] + pub blockDim: dim3, + #[doc = "< Arguments"] + pub args: *mut *mut ::std::os::raw::c_void, + #[doc = "< Shared memory"] + pub sharedMem: usize, + #[doc = "< Stream identifier"] + pub stream: cudaStream_t, +} +#[doc = " CUDA GPU kernel node parameters"] +#[repr(C)] +#[derive(Copy, Clone)] +pub struct cudaKernelNodeParams { + #[doc = "< Kernel to launch"] + pub func: *mut ::std::os::raw::c_void, + #[doc = "< Grid dimensions"] + pub gridDim: dim3, + #[doc = "< Block dimensions"] + pub blockDim: dim3, + #[doc = "< Dynamic shared-memory size per thread block in bytes"] + pub sharedMemBytes: ::std::os::raw::c_uint, + #[doc = "< Array of pointers to individual kernel arguments"] + pub kernelParams: *mut *mut ::std::os::raw::c_void, + #[doc = "< Pointer to kernel arguments in the \"extra\" format"] + pub extra: *mut *mut ::std::os::raw::c_void, +} +#[doc = " External semaphore signal node parameters"] +#[repr(C)] +#[derive(Copy, Clone)] +pub struct cudaExternalSemaphoreSignalNodeParams { + #[doc = "< Array of external semaphore handles."] + pub extSemArray: *mut cudaExternalSemaphore_t, + #[doc = "< Array of external semaphore signal parameters."] + pub paramsArray: *const cudaExternalSemaphoreSignalParams, + #[doc = "< Number of handles and parameters supplied in extSemArray and paramsArray."] + pub numExtSems: ::std::os::raw::c_uint, +} +#[doc = " External semaphore wait node parameters"] +#[repr(C)] +#[derive(Copy, Clone)] +pub struct cudaExternalSemaphoreWaitNodeParams { + #[doc = "< Array of external semaphore handles."] + pub extSemArray: *mut cudaExternalSemaphore_t, + #[doc = "< Array of external semaphore wait parameters."] + pub paramsArray: *const cudaExternalSemaphoreWaitParams, + #[doc = "< Number of handles and parameters supplied in extSemArray and paramsArray."] + pub numExtSems: ::std::os::raw::c_uint, +} +impl cudaGraphNodeType { + #[doc = "< GPU kernel node"] + pub const cudaGraphNodeTypeKernel: cudaGraphNodeType = cudaGraphNodeType(0); +} +impl cudaGraphNodeType { + #[doc = "< Memcpy node"] + pub const cudaGraphNodeTypeMemcpy: cudaGraphNodeType = cudaGraphNodeType(1); +} +impl cudaGraphNodeType { + #[doc = "< Memset node"] + pub const cudaGraphNodeTypeMemset: cudaGraphNodeType = cudaGraphNodeType(2); +} +impl cudaGraphNodeType { + #[doc = "< Host (executable) node"] + pub const cudaGraphNodeTypeHost: cudaGraphNodeType = cudaGraphNodeType(3); +} +impl cudaGraphNodeType { + #[doc = "< Node which executes an embedded graph"] + pub const cudaGraphNodeTypeGraph: cudaGraphNodeType = cudaGraphNodeType(4); +} +impl cudaGraphNodeType { + #[doc = "< Empty (no-op) node"] + pub const cudaGraphNodeTypeEmpty: cudaGraphNodeType = cudaGraphNodeType(5); +} +impl cudaGraphNodeType { + #[doc = "< External event wait node"] + pub const cudaGraphNodeTypeWaitEvent: cudaGraphNodeType = cudaGraphNodeType(6); +} +impl cudaGraphNodeType { + #[doc = "< External event record node"] + pub const cudaGraphNodeTypeEventRecord: cudaGraphNodeType = cudaGraphNodeType(7); +} +impl cudaGraphNodeType { + #[doc = "< External semaphore signal node"] + pub const cudaGraphNodeTypeExtSemaphoreSignal: cudaGraphNodeType = cudaGraphNodeType(8); +} +impl cudaGraphNodeType { + #[doc = "< External semaphore wait node"] + pub const cudaGraphNodeTypeExtSemaphoreWait: cudaGraphNodeType = cudaGraphNodeType(9); +} +impl cudaGraphNodeType { + #[doc = "< Memory allocation node"] + pub const cudaGraphNodeTypeMemAlloc: cudaGraphNodeType = cudaGraphNodeType(10); +} +impl cudaGraphNodeType { + #[doc = "< Memory free node"] + pub const cudaGraphNodeTypeMemFree: cudaGraphNodeType = cudaGraphNodeType(11); +} +impl cudaGraphNodeType { + pub const cudaGraphNodeTypeCount: cudaGraphNodeType = cudaGraphNodeType(12); +} +#[repr(transparent)] +#[doc = " CUDA Graph node types"] +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct cudaGraphNodeType(pub ::std::os::raw::c_uint); +#[repr(C)] +#[derive(Copy, Clone)] +pub struct CUgraphExec_st { + _unused: [u8; 0], +} +#[doc = " CUDA executable (launchable) graph"] +pub type cudaGraphExec_t = *mut CUgraphExec_st; +impl cudaGraphExecUpdateResult { + #[doc = "< The update succeeded"] + pub const cudaGraphExecUpdateSuccess: cudaGraphExecUpdateResult = cudaGraphExecUpdateResult(0); +} +impl cudaGraphExecUpdateResult { + #[doc = "< The update failed for an unexpected reason which is described in the return value of the function"] + pub const cudaGraphExecUpdateError: cudaGraphExecUpdateResult = cudaGraphExecUpdateResult(1); +} +impl cudaGraphExecUpdateResult { + #[doc = "< The update failed because the topology changed"] + pub const cudaGraphExecUpdateErrorTopologyChanged: cudaGraphExecUpdateResult = + cudaGraphExecUpdateResult(2); +} +impl cudaGraphExecUpdateResult { + #[doc = "< The update failed because a node type changed"] + pub const cudaGraphExecUpdateErrorNodeTypeChanged: cudaGraphExecUpdateResult = + cudaGraphExecUpdateResult(3); +} +impl cudaGraphExecUpdateResult { + #[doc = "< The update failed because the function of a kernel node changed (CUDA driver < 11.2)"] + pub const cudaGraphExecUpdateErrorFunctionChanged: cudaGraphExecUpdateResult = + cudaGraphExecUpdateResult(4); +} +impl cudaGraphExecUpdateResult { + #[doc = "< The update failed because the parameters changed in a way that is not supported"] + pub const cudaGraphExecUpdateErrorParametersChanged: cudaGraphExecUpdateResult = + cudaGraphExecUpdateResult(5); +} +impl cudaGraphExecUpdateResult { + #[doc = "< The update failed because something about the node is not supported"] + pub const cudaGraphExecUpdateErrorNotSupported: cudaGraphExecUpdateResult = + cudaGraphExecUpdateResult(6); +} +impl cudaGraphExecUpdateResult { + #[doc = "< The update failed because the function of a kernel node changed in an unsupported way"] + pub const cudaGraphExecUpdateErrorUnsupportedFunctionChange: cudaGraphExecUpdateResult = + cudaGraphExecUpdateResult(7); +} +impl cudaGraphExecUpdateResult { + #[doc = "< The update failed because the node attributes changed in a way that is not supported"] + pub const cudaGraphExecUpdateErrorAttributesChanged: cudaGraphExecUpdateResult = + cudaGraphExecUpdateResult(8); +} +#[repr(transparent)] +#[doc = " CUDA Graph Update error types"] +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct cudaGraphExecUpdateResult(pub ::std::os::raw::c_uint); +impl cudaGetDriverEntryPointFlags { + #[doc = "< Default search mode for driver symbols."] + pub const cudaEnableDefault: cudaGetDriverEntryPointFlags = cudaGetDriverEntryPointFlags(0); +} +impl cudaGetDriverEntryPointFlags { + #[doc = "< Search for legacy versions of driver symbols."] + pub const cudaEnableLegacyStream: cudaGetDriverEntryPointFlags = + cudaGetDriverEntryPointFlags(1); +} +impl cudaGetDriverEntryPointFlags { + #[doc = "< Search for per-thread versions of driver symbols."] + pub const cudaEnablePerThreadDefaultStream: cudaGetDriverEntryPointFlags = + cudaGetDriverEntryPointFlags(2); +} +#[repr(transparent)] +#[doc = " Flags to specify search options to be used with ::cudaGetDriverEntryPoint\n For more details see ::cuGetProcAddress"] +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct cudaGetDriverEntryPointFlags(pub ::std::os::raw::c_uint); +impl cudaGraphDebugDotFlags { + #[doc = "< Output all debug data as if every debug flag is enabled"] + pub const cudaGraphDebugDotFlagsVerbose: cudaGraphDebugDotFlags = cudaGraphDebugDotFlags(1); +} +impl cudaGraphDebugDotFlags { + #[doc = "< Adds cudaKernelNodeParams to output"] + pub const cudaGraphDebugDotFlagsKernelNodeParams: cudaGraphDebugDotFlags = + cudaGraphDebugDotFlags(4); +} +impl cudaGraphDebugDotFlags { + #[doc = "< Adds cudaMemcpy3DParms to output"] + pub const cudaGraphDebugDotFlagsMemcpyNodeParams: cudaGraphDebugDotFlags = + cudaGraphDebugDotFlags(8); +} +impl cudaGraphDebugDotFlags { + #[doc = "< Adds cudaMemsetParams to output"] + pub const cudaGraphDebugDotFlagsMemsetNodeParams: cudaGraphDebugDotFlags = + cudaGraphDebugDotFlags(16); +} +impl cudaGraphDebugDotFlags { + #[doc = "< Adds cudaHostNodeParams to output"] + pub const cudaGraphDebugDotFlagsHostNodeParams: cudaGraphDebugDotFlags = + cudaGraphDebugDotFlags(32); +} +impl cudaGraphDebugDotFlags { + #[doc = "< Adds cudaEvent_t handle from record and wait nodes to output"] + pub const cudaGraphDebugDotFlagsEventNodeParams: cudaGraphDebugDotFlags = + cudaGraphDebugDotFlags(64); +} +impl cudaGraphDebugDotFlags { + #[doc = "< Adds cudaExternalSemaphoreSignalNodeParams values to output"] + pub const cudaGraphDebugDotFlagsExtSemasSignalNodeParams: cudaGraphDebugDotFlags = + cudaGraphDebugDotFlags(128); +} +impl cudaGraphDebugDotFlags { + #[doc = "< Adds cudaExternalSemaphoreWaitNodeParams to output"] + pub const cudaGraphDebugDotFlagsExtSemasWaitNodeParams: cudaGraphDebugDotFlags = + cudaGraphDebugDotFlags(256); +} +impl cudaGraphDebugDotFlags { + #[doc = "< Adds cudaKernelNodeAttrID values to output"] + pub const cudaGraphDebugDotFlagsKernelNodeAttributes: cudaGraphDebugDotFlags = + cudaGraphDebugDotFlags(512); +} +impl cudaGraphDebugDotFlags { + #[doc = "< Adds node handles and every kernel function handle to output"] + pub const cudaGraphDebugDotFlagsHandles: cudaGraphDebugDotFlags = cudaGraphDebugDotFlags(1024); +} +#[repr(transparent)] +#[doc = " CUDA Graph debug write options"] +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct cudaGraphDebugDotFlags(pub ::std::os::raw::c_uint); +impl cudaGraphInstantiateFlags { + #[doc = "< Automatically free memory allocated in a graph before relaunching."] + pub const cudaGraphInstantiateFlagAutoFreeOnLaunch: cudaGraphInstantiateFlags = + cudaGraphInstantiateFlags(1); +} +impl cudaGraphInstantiateFlags { + #[doc = "< Run the graph using the per-node priority attributes rather than the\npriority of the stream it is launched into."] + pub const cudaGraphInstantiateFlagUseNodePriority: cudaGraphInstantiateFlags = + cudaGraphInstantiateFlags(8); +} +#[repr(transparent)] +#[doc = " Flags for instantiating a graph"] +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct cudaGraphInstantiateFlags(pub ::std::os::raw::c_uint); +impl cudaLaunchAttributeID { + #[doc = "< Ignored entry, for convenient composition"] + pub const cudaLaunchAttributeIgnore: cudaLaunchAttributeID = cudaLaunchAttributeID(0); +} +impl cudaLaunchAttributeID { + #[doc = "< Valid for streams, graph nodes, launches."] + pub const cudaLaunchAttributeAccessPolicyWindow: cudaLaunchAttributeID = + cudaLaunchAttributeID(1); +} +impl cudaLaunchAttributeID { + #[doc = "< Valid for graph nodes, launches."] + pub const cudaLaunchAttributeCooperative: cudaLaunchAttributeID = cudaLaunchAttributeID(2); +} +impl cudaLaunchAttributeID { + #[doc = "< Valid for streams."] + pub const cudaLaunchAttributeSynchronizationPolicy: cudaLaunchAttributeID = + cudaLaunchAttributeID(3); +} +impl cudaLaunchAttributeID { + #[doc = "< Valid for graph nodes, launches."] + pub const cudaLaunchAttributeClusterDimension: cudaLaunchAttributeID = cudaLaunchAttributeID(4); +} +impl cudaLaunchAttributeID { + #[doc = "< Valid for graph nodes, launches."] + pub const cudaLaunchAttributeClusterSchedulingPolicyPreference: cudaLaunchAttributeID = + cudaLaunchAttributeID(5); +} +impl cudaLaunchAttributeID { + #[doc = "< Valid for launches. Setting\nprogrammaticStreamSerializationAllowed to non-0\nsignals that the kernel will use programmatic\nmeans to resolve its stream dependency, so that\nthe CUDA runtime should opportunistically allow\nthe grid's execution to overlap with the previous\nkernel in the stream, if that kernel requests the\noverlap."] + pub const cudaLaunchAttributeProgrammaticStreamSerialization: cudaLaunchAttributeID = + cudaLaunchAttributeID(6); +} +impl cudaLaunchAttributeID { + #[doc = "< Valid for launches. Event recorded through this launch\nattribute is guaranteed to only trigger after all\nblock in the associated kernel trigger the event. A\nblock can trigger the event through PTX\ngriddepcontrol.launch_dependents. A trigger can also\nbe inserted at the beginning of each block's execution\nif triggerAtBlockStart is set to non-0. Note that\ndependents (including the CPU thread calling\ncudaEventSynchronize()) are not guaranteed to observe\nthe release precisely when it is released. For\nexample, cudaEventSynchronize() may only observe the\nevent trigger long after the associated kernel has\ncompleted. This recording type is primarily meant for\nestablishing programmatic dependency between device\ntasks. The event supplied must not be an interprocess\nor interop event. The event must disable timing\n(i.e. created with ::cudaEventDisableTiming flag\nset)."] + pub const cudaLaunchAttributeProgrammaticEvent: cudaLaunchAttributeID = + cudaLaunchAttributeID(7); +} +impl cudaLaunchAttributeID { + #[doc = "< Valid for graph nodes."] + pub const cudaLaunchAttributePriority: cudaLaunchAttributeID = cudaLaunchAttributeID(8); +} +#[repr(transparent)] +#[doc = " Launch attributes enum; used as id field of ::cudaLaunchAttribute"] +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct cudaLaunchAttributeID(pub ::std::os::raw::c_uint); +#[doc = " Launch attributes union; used as value field of ::cudaLaunchAttribute"] +#[repr(C)] +#[derive(Copy, Clone)] +pub union cudaLaunchAttributeValue { + pub pad: [::std::os::raw::c_char; 64usize], + pub accessPolicyWindow: cudaAccessPolicyWindow, + pub cooperative: ::std::os::raw::c_int, + pub syncPolicy: cudaSynchronizationPolicy, + pub clusterDim: cudaLaunchAttributeValue__bindgen_ty_1, + pub clusterSchedulingPolicyPreference: cudaClusterSchedulingPolicy, + pub programmaticStreamSerializationAllowed: ::std::os::raw::c_int, + pub programmaticEvent: cudaLaunchAttributeValue__bindgen_ty_2, + pub priority: ::std::os::raw::c_int, +} +#[repr(C)] +#[derive(Copy, Clone)] +pub struct cudaLaunchAttributeValue__bindgen_ty_1 { + pub x: ::std::os::raw::c_uint, + pub y: ::std::os::raw::c_uint, + pub z: ::std::os::raw::c_uint, +} +#[repr(C)] +#[derive(Copy, Clone)] +pub struct cudaLaunchAttributeValue__bindgen_ty_2 { + pub event: cudaEvent_t, + pub flags: ::std::os::raw::c_int, + pub triggerAtBlockStart: ::std::os::raw::c_int, +} +#[doc = " Launch attribute"] +#[repr(C)] +#[derive(Copy, Clone)] +pub struct cudaLaunchAttribute_st { + pub id: cudaLaunchAttributeID, + pub pad: [::std::os::raw::c_char; 4usize], + pub val: cudaLaunchAttributeValue, +} +#[doc = " Launch attribute"] +pub type cudaLaunchAttribute = cudaLaunchAttribute_st; +#[doc = " CUDA extensible launch configuration"] +#[repr(C)] +#[derive(Copy, Clone)] +pub struct cudaLaunchConfig_st { + #[doc = "< Grid dimentions"] + pub gridDim: dim3, + #[doc = "< Block dimentions"] + pub blockDim: dim3, + #[doc = "< Dynamic shared-memory size per thread block in bytes"] + pub dynamicSmemBytes: usize, + #[doc = "< Stream identifier"] + pub stream: cudaStream_t, + #[doc = "< nullable if numAttrs == 0"] + pub attrs: *mut cudaLaunchAttribute, + #[doc = "< Number of attributes populated in attrs"] + pub numAttrs: ::std::os::raw::c_uint, +} +#[doc = " CUDA extensible launch configuration"] +pub type cudaLaunchConfig_t = cudaLaunchConfig_st; +pub type __off_t = ::std::os::raw::c_long; +pub type __off64_t = ::std::os::raw::c_long; +pub type cuFloatComplex = float2; +pub type cuDoubleComplex = double2; +pub type cuComplex = cuFloatComplex; +impl cudaDataType_t { + pub const CUDA_R_16F: cudaDataType_t = cudaDataType_t(2); +} +impl cudaDataType_t { + pub const CUDA_C_16F: cudaDataType_t = cudaDataType_t(6); +} +impl cudaDataType_t { + pub const CUDA_R_16BF: cudaDataType_t = cudaDataType_t(14); +} +impl cudaDataType_t { + pub const CUDA_C_16BF: cudaDataType_t = cudaDataType_t(15); +} +impl cudaDataType_t { + pub const CUDA_R_32F: cudaDataType_t = cudaDataType_t(0); +} +impl cudaDataType_t { + pub const CUDA_C_32F: cudaDataType_t = cudaDataType_t(4); +} +impl cudaDataType_t { + pub const CUDA_R_64F: cudaDataType_t = cudaDataType_t(1); +} +impl cudaDataType_t { + pub const CUDA_C_64F: cudaDataType_t = cudaDataType_t(5); +} +impl cudaDataType_t { + pub const CUDA_R_4I: cudaDataType_t = cudaDataType_t(16); +} +impl cudaDataType_t { + pub const CUDA_C_4I: cudaDataType_t = cudaDataType_t(17); +} +impl cudaDataType_t { + pub const CUDA_R_4U: cudaDataType_t = cudaDataType_t(18); +} +impl cudaDataType_t { + pub const CUDA_C_4U: cudaDataType_t = cudaDataType_t(19); +} +impl cudaDataType_t { + pub const CUDA_R_8I: cudaDataType_t = cudaDataType_t(3); +} +impl cudaDataType_t { + pub const CUDA_C_8I: cudaDataType_t = cudaDataType_t(7); +} +impl cudaDataType_t { + pub const CUDA_R_8U: cudaDataType_t = cudaDataType_t(8); +} +impl cudaDataType_t { + pub const CUDA_C_8U: cudaDataType_t = cudaDataType_t(9); +} +impl cudaDataType_t { + pub const CUDA_R_16I: cudaDataType_t = cudaDataType_t(20); +} +impl cudaDataType_t { + pub const CUDA_C_16I: cudaDataType_t = cudaDataType_t(21); +} +impl cudaDataType_t { + pub const CUDA_R_16U: cudaDataType_t = cudaDataType_t(22); +} +impl cudaDataType_t { + pub const CUDA_C_16U: cudaDataType_t = cudaDataType_t(23); +} +impl cudaDataType_t { + pub const CUDA_R_32I: cudaDataType_t = cudaDataType_t(10); +} +impl cudaDataType_t { + pub const CUDA_C_32I: cudaDataType_t = cudaDataType_t(11); +} +impl cudaDataType_t { + pub const CUDA_R_32U: cudaDataType_t = cudaDataType_t(12); +} +impl cudaDataType_t { + pub const CUDA_C_32U: cudaDataType_t = cudaDataType_t(13); +} +impl cudaDataType_t { + pub const CUDA_R_64I: cudaDataType_t = cudaDataType_t(24); +} +impl cudaDataType_t { + pub const CUDA_C_64I: cudaDataType_t = cudaDataType_t(25); +} +impl cudaDataType_t { + pub const CUDA_R_64U: cudaDataType_t = cudaDataType_t(26); +} +impl cudaDataType_t { + pub const CUDA_C_64U: cudaDataType_t = cudaDataType_t(27); +} +impl cudaDataType_t { + pub const CUDA_R_8F_E4M3: cudaDataType_t = cudaDataType_t(28); +} +impl cudaDataType_t { + pub const CUDA_R_8F_E5M2: cudaDataType_t = cudaDataType_t(29); +} +#[repr(transparent)] +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct cudaDataType_t(pub ::std::os::raw::c_uint); +pub use self::cudaDataType_t as cudaDataType; +impl libraryPropertyType_t { + pub const MAJOR_VERSION: libraryPropertyType_t = libraryPropertyType_t(0); +} +impl libraryPropertyType_t { + pub const MINOR_VERSION: libraryPropertyType_t = libraryPropertyType_t(1); +} +impl libraryPropertyType_t { + pub const PATCH_LEVEL: libraryPropertyType_t = libraryPropertyType_t(2); +} +#[repr(transparent)] +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct libraryPropertyType_t(pub ::std::os::raw::c_uint); +pub use self::libraryPropertyType_t as libraryPropertyType; +impl cublasStatus_t { + pub const CUBLAS_STATUS_SUCCESS: cublasStatus_t = cublasStatus_t(0); +} +impl cublasStatus_t { + pub const CUBLAS_STATUS_NOT_INITIALIZED: cublasStatus_t = cublasStatus_t(1); +} +impl cublasStatus_t { + pub const CUBLAS_STATUS_ALLOC_FAILED: cublasStatus_t = cublasStatus_t(3); +} +impl cublasStatus_t { + pub const CUBLAS_STATUS_INVALID_VALUE: cublasStatus_t = cublasStatus_t(7); +} +impl cublasStatus_t { + pub const CUBLAS_STATUS_ARCH_MISMATCH: cublasStatus_t = cublasStatus_t(8); +} +impl cublasStatus_t { + pub const CUBLAS_STATUS_MAPPING_ERROR: cublasStatus_t = cublasStatus_t(11); +} +impl cublasStatus_t { + pub const CUBLAS_STATUS_EXECUTION_FAILED: cublasStatus_t = cublasStatus_t(13); +} +impl cublasStatus_t { + pub const CUBLAS_STATUS_INTERNAL_ERROR: cublasStatus_t = cublasStatus_t(14); +} +impl cublasStatus_t { + pub const CUBLAS_STATUS_NOT_SUPPORTED: cublasStatus_t = cublasStatus_t(15); +} +impl cublasStatus_t { + pub const CUBLAS_STATUS_LICENSE_ERROR: cublasStatus_t = cublasStatus_t(16); +} +#[repr(transparent)] +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct cublasStatus_t(pub ::std::os::raw::c_uint); +impl cublasFillMode_t { + pub const CUBLAS_FILL_MODE_LOWER: cublasFillMode_t = cublasFillMode_t(0); +} +impl cublasFillMode_t { + pub const CUBLAS_FILL_MODE_UPPER: cublasFillMode_t = cublasFillMode_t(1); +} +impl cublasFillMode_t { + pub const CUBLAS_FILL_MODE_FULL: cublasFillMode_t = cublasFillMode_t(2); +} +#[repr(transparent)] +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct cublasFillMode_t(pub ::std::os::raw::c_uint); +impl cublasDiagType_t { + pub const CUBLAS_DIAG_NON_UNIT: cublasDiagType_t = cublasDiagType_t(0); +} +impl cublasDiagType_t { + pub const CUBLAS_DIAG_UNIT: cublasDiagType_t = cublasDiagType_t(1); +} +#[repr(transparent)] +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct cublasDiagType_t(pub ::std::os::raw::c_uint); +impl cublasSideMode_t { + pub const CUBLAS_SIDE_LEFT: cublasSideMode_t = cublasSideMode_t(0); +} +impl cublasSideMode_t { + pub const CUBLAS_SIDE_RIGHT: cublasSideMode_t = cublasSideMode_t(1); +} +#[repr(transparent)] +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct cublasSideMode_t(pub ::std::os::raw::c_uint); +impl cublasOperation_t { + pub const CUBLAS_OP_N: cublasOperation_t = cublasOperation_t(0); +} +impl cublasOperation_t { + pub const CUBLAS_OP_T: cublasOperation_t = cublasOperation_t(1); +} +impl cublasOperation_t { + pub const CUBLAS_OP_C: cublasOperation_t = cublasOperation_t(2); +} +impl cublasOperation_t { + pub const CUBLAS_OP_HERMITAN: cublasOperation_t = cublasOperation_t(2); +} +impl cublasOperation_t { + pub const CUBLAS_OP_CONJG: cublasOperation_t = cublasOperation_t(3); +} +#[repr(transparent)] +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct cublasOperation_t(pub ::std::os::raw::c_uint); +impl cublasPointerMode_t { + pub const CUBLAS_POINTER_MODE_HOST: cublasPointerMode_t = cublasPointerMode_t(0); +} +impl cublasPointerMode_t { + pub const CUBLAS_POINTER_MODE_DEVICE: cublasPointerMode_t = cublasPointerMode_t(1); +} +#[repr(transparent)] +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct cublasPointerMode_t(pub ::std::os::raw::c_uint); +impl cublasAtomicsMode_t { + pub const CUBLAS_ATOMICS_NOT_ALLOWED: cublasAtomicsMode_t = cublasAtomicsMode_t(0); +} +impl cublasAtomicsMode_t { + pub const CUBLAS_ATOMICS_ALLOWED: cublasAtomicsMode_t = cublasAtomicsMode_t(1); +} +#[repr(transparent)] +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct cublasAtomicsMode_t(pub ::std::os::raw::c_uint); +impl cublasGemmAlgo_t { + pub const CUBLAS_GEMM_DFALT: cublasGemmAlgo_t = cublasGemmAlgo_t(-1); +} +impl cublasGemmAlgo_t { + pub const CUBLAS_GEMM_DEFAULT: cublasGemmAlgo_t = cublasGemmAlgo_t(-1); +} +impl cublasGemmAlgo_t { + pub const CUBLAS_GEMM_ALGO0: cublasGemmAlgo_t = cublasGemmAlgo_t(0); +} +impl cublasGemmAlgo_t { + pub const CUBLAS_GEMM_ALGO1: cublasGemmAlgo_t = cublasGemmAlgo_t(1); +} +impl cublasGemmAlgo_t { + pub const CUBLAS_GEMM_ALGO2: cublasGemmAlgo_t = cublasGemmAlgo_t(2); +} +impl cublasGemmAlgo_t { + pub const CUBLAS_GEMM_ALGO3: cublasGemmAlgo_t = cublasGemmAlgo_t(3); +} +impl cublasGemmAlgo_t { + pub const CUBLAS_GEMM_ALGO4: cublasGemmAlgo_t = cublasGemmAlgo_t(4); +} +impl cublasGemmAlgo_t { + pub const CUBLAS_GEMM_ALGO5: cublasGemmAlgo_t = cublasGemmAlgo_t(5); +} +impl cublasGemmAlgo_t { + pub const CUBLAS_GEMM_ALGO6: cublasGemmAlgo_t = cublasGemmAlgo_t(6); +} +impl cublasGemmAlgo_t { + pub const CUBLAS_GEMM_ALGO7: cublasGemmAlgo_t = cublasGemmAlgo_t(7); +} +impl cublasGemmAlgo_t { + pub const CUBLAS_GEMM_ALGO8: cublasGemmAlgo_t = cublasGemmAlgo_t(8); +} +impl cublasGemmAlgo_t { + pub const CUBLAS_GEMM_ALGO9: cublasGemmAlgo_t = cublasGemmAlgo_t(9); +} +impl cublasGemmAlgo_t { + pub const CUBLAS_GEMM_ALGO10: cublasGemmAlgo_t = cublasGemmAlgo_t(10); +} +impl cublasGemmAlgo_t { + pub const CUBLAS_GEMM_ALGO11: cublasGemmAlgo_t = cublasGemmAlgo_t(11); +} +impl cublasGemmAlgo_t { + pub const CUBLAS_GEMM_ALGO12: cublasGemmAlgo_t = cublasGemmAlgo_t(12); +} +impl cublasGemmAlgo_t { + pub const CUBLAS_GEMM_ALGO13: cublasGemmAlgo_t = cublasGemmAlgo_t(13); +} +impl cublasGemmAlgo_t { + pub const CUBLAS_GEMM_ALGO14: cublasGemmAlgo_t = cublasGemmAlgo_t(14); +} +impl cublasGemmAlgo_t { + pub const CUBLAS_GEMM_ALGO15: cublasGemmAlgo_t = cublasGemmAlgo_t(15); +} +impl cublasGemmAlgo_t { + pub const CUBLAS_GEMM_ALGO16: cublasGemmAlgo_t = cublasGemmAlgo_t(16); +} +impl cublasGemmAlgo_t { + pub const CUBLAS_GEMM_ALGO17: cublasGemmAlgo_t = cublasGemmAlgo_t(17); +} +impl cublasGemmAlgo_t { + pub const CUBLAS_GEMM_ALGO18: cublasGemmAlgo_t = cublasGemmAlgo_t(18); +} +impl cublasGemmAlgo_t { + pub const CUBLAS_GEMM_ALGO19: cublasGemmAlgo_t = cublasGemmAlgo_t(19); +} +impl cublasGemmAlgo_t { + pub const CUBLAS_GEMM_ALGO20: cublasGemmAlgo_t = cublasGemmAlgo_t(20); +} +impl cublasGemmAlgo_t { + pub const CUBLAS_GEMM_ALGO21: cublasGemmAlgo_t = cublasGemmAlgo_t(21); +} +impl cublasGemmAlgo_t { + pub const CUBLAS_GEMM_ALGO22: cublasGemmAlgo_t = cublasGemmAlgo_t(22); +} +impl cublasGemmAlgo_t { + pub const CUBLAS_GEMM_ALGO23: cublasGemmAlgo_t = cublasGemmAlgo_t(23); +} +impl cublasGemmAlgo_t { + pub const CUBLAS_GEMM_DEFAULT_TENSOR_OP: cublasGemmAlgo_t = cublasGemmAlgo_t(99); +} +impl cublasGemmAlgo_t { + pub const CUBLAS_GEMM_DFALT_TENSOR_OP: cublasGemmAlgo_t = cublasGemmAlgo_t(99); +} +impl cublasGemmAlgo_t { + pub const CUBLAS_GEMM_ALGO0_TENSOR_OP: cublasGemmAlgo_t = cublasGemmAlgo_t(100); +} +impl cublasGemmAlgo_t { + pub const CUBLAS_GEMM_ALGO1_TENSOR_OP: cublasGemmAlgo_t = cublasGemmAlgo_t(101); +} +impl cublasGemmAlgo_t { + pub const CUBLAS_GEMM_ALGO2_TENSOR_OP: cublasGemmAlgo_t = cublasGemmAlgo_t(102); +} +impl cublasGemmAlgo_t { + pub const CUBLAS_GEMM_ALGO3_TENSOR_OP: cublasGemmAlgo_t = cublasGemmAlgo_t(103); +} +impl cublasGemmAlgo_t { + pub const CUBLAS_GEMM_ALGO4_TENSOR_OP: cublasGemmAlgo_t = cublasGemmAlgo_t(104); +} +impl cublasGemmAlgo_t { + pub const CUBLAS_GEMM_ALGO5_TENSOR_OP: cublasGemmAlgo_t = cublasGemmAlgo_t(105); +} +impl cublasGemmAlgo_t { + pub const CUBLAS_GEMM_ALGO6_TENSOR_OP: cublasGemmAlgo_t = cublasGemmAlgo_t(106); +} +impl cublasGemmAlgo_t { + pub const CUBLAS_GEMM_ALGO7_TENSOR_OP: cublasGemmAlgo_t = cublasGemmAlgo_t(107); +} +impl cublasGemmAlgo_t { + pub const CUBLAS_GEMM_ALGO8_TENSOR_OP: cublasGemmAlgo_t = cublasGemmAlgo_t(108); +} +impl cublasGemmAlgo_t { + pub const CUBLAS_GEMM_ALGO9_TENSOR_OP: cublasGemmAlgo_t = cublasGemmAlgo_t(109); +} +impl cublasGemmAlgo_t { + pub const CUBLAS_GEMM_ALGO10_TENSOR_OP: cublasGemmAlgo_t = cublasGemmAlgo_t(110); +} +impl cublasGemmAlgo_t { + pub const CUBLAS_GEMM_ALGO11_TENSOR_OP: cublasGemmAlgo_t = cublasGemmAlgo_t(111); +} +impl cublasGemmAlgo_t { + pub const CUBLAS_GEMM_ALGO12_TENSOR_OP: cublasGemmAlgo_t = cublasGemmAlgo_t(112); +} +impl cublasGemmAlgo_t { + pub const CUBLAS_GEMM_ALGO13_TENSOR_OP: cublasGemmAlgo_t = cublasGemmAlgo_t(113); +} +impl cublasGemmAlgo_t { + pub const CUBLAS_GEMM_ALGO14_TENSOR_OP: cublasGemmAlgo_t = cublasGemmAlgo_t(114); +} +impl cublasGemmAlgo_t { + pub const CUBLAS_GEMM_ALGO15_TENSOR_OP: cublasGemmAlgo_t = cublasGemmAlgo_t(115); +} +#[repr(transparent)] +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct cublasGemmAlgo_t(pub ::std::os::raw::c_int); +impl cublasMath_t { + pub const CUBLAS_DEFAULT_MATH: cublasMath_t = cublasMath_t(0); +} +impl cublasMath_t { + pub const CUBLAS_TENSOR_OP_MATH: cublasMath_t = cublasMath_t(1); +} +impl cublasMath_t { + pub const CUBLAS_PEDANTIC_MATH: cublasMath_t = cublasMath_t(2); +} +impl cublasMath_t { + pub const CUBLAS_TF32_TENSOR_OP_MATH: cublasMath_t = cublasMath_t(3); +} +impl cublasMath_t { + pub const CUBLAS_MATH_DISALLOW_REDUCED_PRECISION_REDUCTION: cublasMath_t = cublasMath_t(16); +} +#[repr(transparent)] +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct cublasMath_t(pub ::std::os::raw::c_uint); +pub use self::cudaDataType as cublasDataType_t; +impl cublasComputeType_t { + pub const CUBLAS_COMPUTE_16F: cublasComputeType_t = cublasComputeType_t(64); +} +impl cublasComputeType_t { + pub const CUBLAS_COMPUTE_16F_PEDANTIC: cublasComputeType_t = cublasComputeType_t(65); +} +impl cublasComputeType_t { + pub const CUBLAS_COMPUTE_32F: cublasComputeType_t = cublasComputeType_t(68); +} +impl cublasComputeType_t { + pub const CUBLAS_COMPUTE_32F_PEDANTIC: cublasComputeType_t = cublasComputeType_t(69); +} +impl cublasComputeType_t { + pub const CUBLAS_COMPUTE_32F_FAST_16F: cublasComputeType_t = cublasComputeType_t(74); +} +impl cublasComputeType_t { + pub const CUBLAS_COMPUTE_32F_FAST_16BF: cublasComputeType_t = cublasComputeType_t(75); +} +impl cublasComputeType_t { + pub const CUBLAS_COMPUTE_32F_FAST_TF32: cublasComputeType_t = cublasComputeType_t(77); +} +impl cublasComputeType_t { + pub const CUBLAS_COMPUTE_64F: cublasComputeType_t = cublasComputeType_t(70); +} +impl cublasComputeType_t { + pub const CUBLAS_COMPUTE_64F_PEDANTIC: cublasComputeType_t = cublasComputeType_t(71); +} +impl cublasComputeType_t { + pub const CUBLAS_COMPUTE_32I: cublasComputeType_t = cublasComputeType_t(72); +} +impl cublasComputeType_t { + pub const CUBLAS_COMPUTE_32I_PEDANTIC: cublasComputeType_t = cublasComputeType_t(73); +} +#[repr(transparent)] +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct cublasComputeType_t(pub ::std::os::raw::c_uint); +#[repr(C)] +#[derive(Copy, Clone)] +pub struct cublasContext { + _unused: [u8; 0], +} +pub type cublasHandle_t = *mut cublasContext; +pub type cublasLogCallback = + ::std::option::Option<unsafe extern "C" fn(msg: *const ::std::os::raw::c_char)>; +pub type FILE = _IO_FILE; +#[repr(C)] +#[derive(Copy, Clone)] +pub struct _IO_marker { + _unused: [u8; 0], +} +#[repr(C)] +#[derive(Copy, Clone)] +pub struct _IO_codecvt { + _unused: [u8; 0], +} +#[repr(C)] +#[derive(Copy, Clone)] +pub struct _IO_wide_data { + _unused: [u8; 0], +} +pub type _IO_lock_t = ::std::os::raw::c_void; +#[repr(C)] +#[derive(Copy, Clone)] +pub struct _IO_FILE { + pub _flags: ::std::os::raw::c_int, + pub _IO_read_ptr: *mut ::std::os::raw::c_char, + pub _IO_read_end: *mut ::std::os::raw::c_char, + pub _IO_read_base: *mut ::std::os::raw::c_char, + pub _IO_write_base: *mut ::std::os::raw::c_char, + pub _IO_write_ptr: *mut ::std::os::raw::c_char, + pub _IO_write_end: *mut ::std::os::raw::c_char, + pub _IO_buf_base: *mut ::std::os::raw::c_char, + pub _IO_buf_end: *mut ::std::os::raw::c_char, + pub _IO_save_base: *mut ::std::os::raw::c_char, + pub _IO_backup_base: *mut ::std::os::raw::c_char, + pub _IO_save_end: *mut ::std::os::raw::c_char, + pub _markers: *mut _IO_marker, + pub _chain: *mut _IO_FILE, + pub _fileno: ::std::os::raw::c_int, + pub _flags2: ::std::os::raw::c_int, + pub _old_offset: __off_t, + pub _cur_column: ::std::os::raw::c_ushort, + pub _vtable_offset: ::std::os::raw::c_schar, + pub _shortbuf: [::std::os::raw::c_char; 1usize], + pub _lock: *mut _IO_lock_t, + pub _offset: __off64_t, + pub _codecvt: *mut _IO_codecvt, + pub _wide_data: *mut _IO_wide_data, + pub _freeres_list: *mut _IO_FILE, + pub _freeres_buf: *mut ::std::os::raw::c_void, + pub __pad5: usize, + pub _mode: ::std::os::raw::c_int, + pub _unused2: [::std::os::raw::c_char; 20usize], +} +#[repr(C)] +#[derive(Copy, Clone)] +pub struct cublasLtContext { + _unused: [u8; 0], +} +#[doc = " Opaque structure holding CUBLASLT context"] +pub type cublasLtHandle_t = *mut cublasLtContext; + +#[no_mangle] +pub unsafe extern "system" fn cublasLtCreate(lightHandle: *mut cublasLtHandle_t) -> cublasStatus_t { + crate::create(lightHandle) +} + +#[no_mangle] +pub unsafe extern "system" fn cublasLtDestroy(lightHandle: cublasLtHandle_t) -> cublasStatus_t { + crate::unsupported() +} + +#[no_mangle] +pub unsafe extern "system" fn cublasLtGetStatusName( + status: cublasStatus_t, +) -> *const ::std::os::raw::c_char { + unimplemented!() +} + +#[no_mangle] +pub unsafe extern "system" fn cublasLtGetStatusString( + status: cublasStatus_t, +) -> *const ::std::os::raw::c_char { + unimplemented!() +} + +#[no_mangle] +pub unsafe extern "system" fn cublasLtGetVersion() -> usize { + crate::get_version() +} + +#[no_mangle] +pub unsafe extern "system" fn cublasLtGetCudartVersion() -> usize { + unimplemented!() +} + +#[no_mangle] +pub unsafe extern "system" fn cublasLtGetProperty( + type_: libraryPropertyType, + value: *mut ::std::os::raw::c_int, +) -> cublasStatus_t { + crate::unsupported() +} + +#[no_mangle] +pub unsafe extern "system" fn cublasLtHeuristicsCacheGetCapacity( + capacity: *mut usize, +) -> cublasStatus_t { + crate::unsupported() +} + +#[no_mangle] +pub unsafe extern "system" fn cublasLtHeuristicsCacheSetCapacity( + capacity: usize, +) -> cublasStatus_t { + crate::unsupported() +} +#[doc = " Semi-opaque descriptor for matrix memory layout"] +#[repr(C)] +#[derive(Copy, Clone)] +pub struct cublasLtMatrixLayoutOpaque_t { + pub data: [u64; 8usize], +} +#[doc = " Opaque descriptor for matrix memory layout"] +pub type cublasLtMatrixLayout_t = *mut cublasLtMatrixLayoutOpaque_t; +#[doc = " Semi-opaque algorithm descriptor (to avoid complicated alloc/free schemes)\n\n This structure can be trivially serialized and later restored for use with the same version of cuBLAS library to save\n on selecting the right configuration again."] +#[repr(C)] +#[derive(Copy, Clone)] +pub struct cublasLtMatmulAlgo_t { + pub data: [u64; 8usize], +} +#[doc = " Semi-opaque descriptor for cublasLtMatmul() operation details"] +#[repr(C)] +#[derive(Copy, Clone)] +pub struct cublasLtMatmulDescOpaque_t { + pub data: [u64; 23usize], +} +#[doc = " Opaque descriptor for cublasLtMatmul() operation details"] +pub type cublasLtMatmulDesc_t = *mut cublasLtMatmulDescOpaque_t; +#[doc = " Semi-opaque descriptor for cublasLtMatrixTransform() operation details"] +#[repr(C)] +#[derive(Copy, Clone)] +pub struct cublasLtMatrixTransformDescOpaque_t { + pub data: [u64; 8usize], +} +#[doc = " Opaque descriptor for cublasLtMatrixTransform() operation details"] +pub type cublasLtMatrixTransformDesc_t = *mut cublasLtMatrixTransformDescOpaque_t; +#[doc = " Semi-opaque descriptor for cublasLtMatmulPreference() operation details"] +#[repr(C)] +#[derive(Copy, Clone)] +pub struct cublasLtMatmulPreferenceOpaque_t { + pub data: [u64; 10usize], +} +#[doc = " Opaque descriptor for cublasLtMatmulAlgoGetHeuristic() configuration"] +pub type cublasLtMatmulPreference_t = *mut cublasLtMatmulPreferenceOpaque_t; +impl cublasLtMatmulTile_t { + pub const CUBLASLT_MATMUL_TILE_UNDEFINED: cublasLtMatmulTile_t = cublasLtMatmulTile_t(0); +} +impl cublasLtMatmulTile_t { + pub const CUBLASLT_MATMUL_TILE_8x8: cublasLtMatmulTile_t = cublasLtMatmulTile_t(1); +} +impl cublasLtMatmulTile_t { + pub const CUBLASLT_MATMUL_TILE_8x16: cublasLtMatmulTile_t = cublasLtMatmulTile_t(2); +} +impl cublasLtMatmulTile_t { + pub const CUBLASLT_MATMUL_TILE_16x8: cublasLtMatmulTile_t = cublasLtMatmulTile_t(3); +} +impl cublasLtMatmulTile_t { + pub const CUBLASLT_MATMUL_TILE_8x32: cublasLtMatmulTile_t = cublasLtMatmulTile_t(4); +} +impl cublasLtMatmulTile_t { + pub const CUBLASLT_MATMUL_TILE_16x16: cublasLtMatmulTile_t = cublasLtMatmulTile_t(5); +} +impl cublasLtMatmulTile_t { + pub const CUBLASLT_MATMUL_TILE_32x8: cublasLtMatmulTile_t = cublasLtMatmulTile_t(6); +} +impl cublasLtMatmulTile_t { + pub const CUBLASLT_MATMUL_TILE_8x64: cublasLtMatmulTile_t = cublasLtMatmulTile_t(7); +} +impl cublasLtMatmulTile_t { + pub const CUBLASLT_MATMUL_TILE_16x32: cublasLtMatmulTile_t = cublasLtMatmulTile_t(8); +} +impl cublasLtMatmulTile_t { + pub const CUBLASLT_MATMUL_TILE_32x16: cublasLtMatmulTile_t = cublasLtMatmulTile_t(9); +} +impl cublasLtMatmulTile_t { + pub const CUBLASLT_MATMUL_TILE_64x8: cublasLtMatmulTile_t = cublasLtMatmulTile_t(10); +} +impl cublasLtMatmulTile_t { + pub const CUBLASLT_MATMUL_TILE_32x32: cublasLtMatmulTile_t = cublasLtMatmulTile_t(11); +} +impl cublasLtMatmulTile_t { + pub const CUBLASLT_MATMUL_TILE_32x64: cublasLtMatmulTile_t = cublasLtMatmulTile_t(12); +} +impl cublasLtMatmulTile_t { + pub const CUBLASLT_MATMUL_TILE_64x32: cublasLtMatmulTile_t = cublasLtMatmulTile_t(13); +} +impl cublasLtMatmulTile_t { + pub const CUBLASLT_MATMUL_TILE_32x128: cublasLtMatmulTile_t = cublasLtMatmulTile_t(14); +} +impl cublasLtMatmulTile_t { + pub const CUBLASLT_MATMUL_TILE_64x64: cublasLtMatmulTile_t = cublasLtMatmulTile_t(15); +} +impl cublasLtMatmulTile_t { + pub const CUBLASLT_MATMUL_TILE_128x32: cublasLtMatmulTile_t = cublasLtMatmulTile_t(16); +} +impl cublasLtMatmulTile_t { + pub const CUBLASLT_MATMUL_TILE_64x128: cublasLtMatmulTile_t = cublasLtMatmulTile_t(17); +} +impl cublasLtMatmulTile_t { + pub const CUBLASLT_MATMUL_TILE_128x64: cublasLtMatmulTile_t = cublasLtMatmulTile_t(18); +} +impl cublasLtMatmulTile_t { + pub const CUBLASLT_MATMUL_TILE_64x256: cublasLtMatmulTile_t = cublasLtMatmulTile_t(19); +} +impl cublasLtMatmulTile_t { + pub const CUBLASLT_MATMUL_TILE_128x128: cublasLtMatmulTile_t = cublasLtMatmulTile_t(20); +} +impl cublasLtMatmulTile_t { + pub const CUBLASLT_MATMUL_TILE_256x64: cublasLtMatmulTile_t = cublasLtMatmulTile_t(21); +} +impl cublasLtMatmulTile_t { + pub const CUBLASLT_MATMUL_TILE_64x512: cublasLtMatmulTile_t = cublasLtMatmulTile_t(22); +} +impl cublasLtMatmulTile_t { + pub const CUBLASLT_MATMUL_TILE_128x256: cublasLtMatmulTile_t = cublasLtMatmulTile_t(23); +} +impl cublasLtMatmulTile_t { + pub const CUBLASLT_MATMUL_TILE_256x128: cublasLtMatmulTile_t = cublasLtMatmulTile_t(24); +} +impl cublasLtMatmulTile_t { + pub const CUBLASLT_MATMUL_TILE_512x64: cublasLtMatmulTile_t = cublasLtMatmulTile_t(25); +} +impl cublasLtMatmulTile_t { + pub const CUBLASLT_MATMUL_TILE_64x96: cublasLtMatmulTile_t = cublasLtMatmulTile_t(26); +} +impl cublasLtMatmulTile_t { + pub const CUBLASLT_MATMUL_TILE_96x64: cublasLtMatmulTile_t = cublasLtMatmulTile_t(27); +} +impl cublasLtMatmulTile_t { + pub const CUBLASLT_MATMUL_TILE_96x128: cublasLtMatmulTile_t = cublasLtMatmulTile_t(28); +} +impl cublasLtMatmulTile_t { + pub const CUBLASLT_MATMUL_TILE_128x160: cublasLtMatmulTile_t = cublasLtMatmulTile_t(29); +} +impl cublasLtMatmulTile_t { + pub const CUBLASLT_MATMUL_TILE_160x128: cublasLtMatmulTile_t = cublasLtMatmulTile_t(30); +} +impl cublasLtMatmulTile_t { + pub const CUBLASLT_MATMUL_TILE_192x128: cublasLtMatmulTile_t = cublasLtMatmulTile_t(31); +} +impl cublasLtMatmulTile_t { + pub const CUBLASLT_MATMUL_TILE_128x192: cublasLtMatmulTile_t = cublasLtMatmulTile_t(32); +} +impl cublasLtMatmulTile_t { + pub const CUBLASLT_MATMUL_TILE_128x96: cublasLtMatmulTile_t = cublasLtMatmulTile_t(33); +} +impl cublasLtMatmulTile_t { + pub const CUBLASLT_MATMUL_TILE_END: cublasLtMatmulTile_t = cublasLtMatmulTile_t(34); +} +#[repr(transparent)] +#[doc = " Tile size (in C/D matrix Rows x Cols)\n\n General order of tile IDs is sorted by size first and by first dimension second."] +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct cublasLtMatmulTile_t(pub ::std::os::raw::c_uint); +impl cublasLtMatmulStages_t { + pub const CUBLASLT_MATMUL_STAGES_UNDEFINED: cublasLtMatmulStages_t = cublasLtMatmulStages_t(0); +} +impl cublasLtMatmulStages_t { + pub const CUBLASLT_MATMUL_STAGES_16x1: cublasLtMatmulStages_t = cublasLtMatmulStages_t(1); +} +impl cublasLtMatmulStages_t { + pub const CUBLASLT_MATMUL_STAGES_16x2: cublasLtMatmulStages_t = cublasLtMatmulStages_t(2); +} +impl cublasLtMatmulStages_t { + pub const CUBLASLT_MATMUL_STAGES_16x3: cublasLtMatmulStages_t = cublasLtMatmulStages_t(3); +} +impl cublasLtMatmulStages_t { + pub const CUBLASLT_MATMUL_STAGES_16x4: cublasLtMatmulStages_t = cublasLtMatmulStages_t(4); +} +impl cublasLtMatmulStages_t { + pub const CUBLASLT_MATMUL_STAGES_16x5: cublasLtMatmulStages_t = cublasLtMatmulStages_t(5); +} +impl cublasLtMatmulStages_t { + pub const CUBLASLT_MATMUL_STAGES_16x6: cublasLtMatmulStages_t = cublasLtMatmulStages_t(6); +} +impl cublasLtMatmulStages_t { + pub const CUBLASLT_MATMUL_STAGES_32x1: cublasLtMatmulStages_t = cublasLtMatmulStages_t(7); +} +impl cublasLtMatmulStages_t { + pub const CUBLASLT_MATMUL_STAGES_32x2: cublasLtMatmulStages_t = cublasLtMatmulStages_t(8); +} +impl cublasLtMatmulStages_t { + pub const CUBLASLT_MATMUL_STAGES_32x3: cublasLtMatmulStages_t = cublasLtMatmulStages_t(9); +} +impl cublasLtMatmulStages_t { + pub const CUBLASLT_MATMUL_STAGES_32x4: cublasLtMatmulStages_t = cublasLtMatmulStages_t(10); +} +impl cublasLtMatmulStages_t { + pub const CUBLASLT_MATMUL_STAGES_32x5: cublasLtMatmulStages_t = cublasLtMatmulStages_t(11); +} +impl cublasLtMatmulStages_t { + pub const CUBLASLT_MATMUL_STAGES_32x6: cublasLtMatmulStages_t = cublasLtMatmulStages_t(12); +} +impl cublasLtMatmulStages_t { + pub const CUBLASLT_MATMUL_STAGES_64x1: cublasLtMatmulStages_t = cublasLtMatmulStages_t(13); +} +impl cublasLtMatmulStages_t { + pub const CUBLASLT_MATMUL_STAGES_64x2: cublasLtMatmulStages_t = cublasLtMatmulStages_t(14); +} +impl cublasLtMatmulStages_t { + pub const CUBLASLT_MATMUL_STAGES_64x3: cublasLtMatmulStages_t = cublasLtMatmulStages_t(15); +} +impl cublasLtMatmulStages_t { + pub const CUBLASLT_MATMUL_STAGES_64x4: cublasLtMatmulStages_t = cublasLtMatmulStages_t(16); +} +impl cublasLtMatmulStages_t { + pub const CUBLASLT_MATMUL_STAGES_64x5: cublasLtMatmulStages_t = cublasLtMatmulStages_t(17); +} +impl cublasLtMatmulStages_t { + pub const CUBLASLT_MATMUL_STAGES_64x6: cublasLtMatmulStages_t = cublasLtMatmulStages_t(18); +} +impl cublasLtMatmulStages_t { + pub const CUBLASLT_MATMUL_STAGES_128x1: cublasLtMatmulStages_t = cublasLtMatmulStages_t(19); +} +impl cublasLtMatmulStages_t { + pub const CUBLASLT_MATMUL_STAGES_128x2: cublasLtMatmulStages_t = cublasLtMatmulStages_t(20); +} +impl cublasLtMatmulStages_t { + pub const CUBLASLT_MATMUL_STAGES_128x3: cublasLtMatmulStages_t = cublasLtMatmulStages_t(21); +} +impl cublasLtMatmulStages_t { + pub const CUBLASLT_MATMUL_STAGES_128x4: cublasLtMatmulStages_t = cublasLtMatmulStages_t(22); +} +impl cublasLtMatmulStages_t { + pub const CUBLASLT_MATMUL_STAGES_128x5: cublasLtMatmulStages_t = cublasLtMatmulStages_t(23); +} +impl cublasLtMatmulStages_t { + pub const CUBLASLT_MATMUL_STAGES_128x6: cublasLtMatmulStages_t = cublasLtMatmulStages_t(24); +} +impl cublasLtMatmulStages_t { + pub const CUBLASLT_MATMUL_STAGES_32x10: cublasLtMatmulStages_t = cublasLtMatmulStages_t(25); +} +impl cublasLtMatmulStages_t { + pub const CUBLASLT_MATMUL_STAGES_8x4: cublasLtMatmulStages_t = cublasLtMatmulStages_t(26); +} +impl cublasLtMatmulStages_t { + pub const CUBLASLT_MATMUL_STAGES_16x10: cublasLtMatmulStages_t = cublasLtMatmulStages_t(27); +} +impl cublasLtMatmulStages_t { + pub const CUBLASLT_MATMUL_STAGES_8x5: cublasLtMatmulStages_t = cublasLtMatmulStages_t(28); +} +impl cublasLtMatmulStages_t { + pub const CUBLASLT_MATMUL_STAGES_16x80: cublasLtMatmulStages_t = cublasLtMatmulStages_t(29); +} +impl cublasLtMatmulStages_t { + pub const CUBLASLT_MATMUL_STAGES_64x80: cublasLtMatmulStages_t = cublasLtMatmulStages_t(30); +} +impl cublasLtMatmulStages_t { + pub const CUBLASLT_MATMUL_STAGES_8x3: cublasLtMatmulStages_t = cublasLtMatmulStages_t(31); +} +impl cublasLtMatmulStages_t { + pub const CUBLASLT_MATMUL_STAGES_8xAUTO: cublasLtMatmulStages_t = cublasLtMatmulStages_t(32); +} +impl cublasLtMatmulStages_t { + pub const CUBLASLT_MATMUL_STAGES_16xAUTO: cublasLtMatmulStages_t = cublasLtMatmulStages_t(33); +} +impl cublasLtMatmulStages_t { + pub const CUBLASLT_MATMUL_STAGES_32xAUTO: cublasLtMatmulStages_t = cublasLtMatmulStages_t(34); +} +impl cublasLtMatmulStages_t { + pub const CUBLASLT_MATMUL_STAGES_64xAUTO: cublasLtMatmulStages_t = cublasLtMatmulStages_t(35); +} +impl cublasLtMatmulStages_t { + pub const CUBLASLT_MATMUL_STAGES_128xAUTO: cublasLtMatmulStages_t = cublasLtMatmulStages_t(36); +} +impl cublasLtMatmulStages_t { + pub const CUBLASLT_MATMUL_STAGES_END: cublasLtMatmulStages_t = cublasLtMatmulStages_t(37); +} +#[repr(transparent)] +#[doc = " Size and number of stages in which elements are read into shared memory\n\n General order of stages IDs is sorted by stage size first and by number of stages second."] +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct cublasLtMatmulStages_t(pub ::std::os::raw::c_uint); +impl cublasLtClusterShape_t { + #[doc = " Let library pick cluster shape automatically"] + pub const CUBLASLT_CLUSTER_SHAPE_AUTO: cublasLtClusterShape_t = cublasLtClusterShape_t(0); +} +impl cublasLtClusterShape_t { + #[doc = " Let library pick cluster shape automatically"] + pub const CUBLASLT_CLUSTER_SHAPE_1x1x1: cublasLtClusterShape_t = cublasLtClusterShape_t(2); +} +impl cublasLtClusterShape_t { + #[doc = " Let library pick cluster shape automatically"] + pub const CUBLASLT_CLUSTER_SHAPE_2x1x1: cublasLtClusterShape_t = cublasLtClusterShape_t(3); +} +impl cublasLtClusterShape_t { + #[doc = " Let library pick cluster shape automatically"] + pub const CUBLASLT_CLUSTER_SHAPE_4x1x1: cublasLtClusterShape_t = cublasLtClusterShape_t(4); +} +impl cublasLtClusterShape_t { + #[doc = " Let library pick cluster shape automatically"] + pub const CUBLASLT_CLUSTER_SHAPE_1x2x1: cublasLtClusterShape_t = cublasLtClusterShape_t(5); +} +impl cublasLtClusterShape_t { + #[doc = " Let library pick cluster shape automatically"] + pub const CUBLASLT_CLUSTER_SHAPE_2x2x1: cublasLtClusterShape_t = cublasLtClusterShape_t(6); +} +impl cublasLtClusterShape_t { + #[doc = " Let library pick cluster shape automatically"] + pub const CUBLASLT_CLUSTER_SHAPE_4x2x1: cublasLtClusterShape_t = cublasLtClusterShape_t(7); +} +impl cublasLtClusterShape_t { + #[doc = " Let library pick cluster shape automatically"] + pub const CUBLASLT_CLUSTER_SHAPE_1x4x1: cublasLtClusterShape_t = cublasLtClusterShape_t(8); +} +impl cublasLtClusterShape_t { + #[doc = " Let library pick cluster shape automatically"] + pub const CUBLASLT_CLUSTER_SHAPE_2x4x1: cublasLtClusterShape_t = cublasLtClusterShape_t(9); +} +impl cublasLtClusterShape_t { + #[doc = " Let library pick cluster shape automatically"] + pub const CUBLASLT_CLUSTER_SHAPE_4x4x1: cublasLtClusterShape_t = cublasLtClusterShape_t(10); +} +impl cublasLtClusterShape_t { + #[doc = " Let library pick cluster shape automatically"] + pub const CUBLASLT_CLUSTER_SHAPE_8x1x1: cublasLtClusterShape_t = cublasLtClusterShape_t(11); +} +impl cublasLtClusterShape_t { + #[doc = " Let library pick cluster shape automatically"] + pub const CUBLASLT_CLUSTER_SHAPE_1x8x1: cublasLtClusterShape_t = cublasLtClusterShape_t(12); +} +impl cublasLtClusterShape_t { + #[doc = " Let library pick cluster shape automatically"] + pub const CUBLASLT_CLUSTER_SHAPE_8x2x1: cublasLtClusterShape_t = cublasLtClusterShape_t(13); +} +impl cublasLtClusterShape_t { + #[doc = " Let library pick cluster shape automatically"] + pub const CUBLASLT_CLUSTER_SHAPE_2x8x1: cublasLtClusterShape_t = cublasLtClusterShape_t(14); +} +impl cublasLtClusterShape_t { + #[doc = " Let library pick cluster shape automatically"] + pub const CUBLASLT_CLUSTER_SHAPE_16x1x1: cublasLtClusterShape_t = cublasLtClusterShape_t(15); +} +impl cublasLtClusterShape_t { + #[doc = " Let library pick cluster shape automatically"] + pub const CUBLASLT_CLUSTER_SHAPE_1x16x1: cublasLtClusterShape_t = cublasLtClusterShape_t(16); +} +impl cublasLtClusterShape_t { + #[doc = " Let library pick cluster shape automatically"] + pub const CUBLASLT_CLUSTER_SHAPE_3x1x1: cublasLtClusterShape_t = cublasLtClusterShape_t(17); +} +impl cublasLtClusterShape_t { + #[doc = " Let library pick cluster shape automatically"] + pub const CUBLASLT_CLUSTER_SHAPE_5x1x1: cublasLtClusterShape_t = cublasLtClusterShape_t(18); +} +impl cublasLtClusterShape_t { + #[doc = " Let library pick cluster shape automatically"] + pub const CUBLASLT_CLUSTER_SHAPE_6x1x1: cublasLtClusterShape_t = cublasLtClusterShape_t(19); +} +impl cublasLtClusterShape_t { + #[doc = " Let library pick cluster shape automatically"] + pub const CUBLASLT_CLUSTER_SHAPE_7x1x1: cublasLtClusterShape_t = cublasLtClusterShape_t(20); +} +impl cublasLtClusterShape_t { + #[doc = " Let library pick cluster shape automatically"] + pub const CUBLASLT_CLUSTER_SHAPE_9x1x1: cublasLtClusterShape_t = cublasLtClusterShape_t(21); +} +impl cublasLtClusterShape_t { + #[doc = " Let library pick cluster shape automatically"] + pub const CUBLASLT_CLUSTER_SHAPE_10x1x1: cublasLtClusterShape_t = cublasLtClusterShape_t(22); +} +impl cublasLtClusterShape_t { + #[doc = " Let library pick cluster shape automatically"] + pub const CUBLASLT_CLUSTER_SHAPE_11x1x1: cublasLtClusterShape_t = cublasLtClusterShape_t(23); +} +impl cublasLtClusterShape_t { + #[doc = " Let library pick cluster shape automatically"] + pub const CUBLASLT_CLUSTER_SHAPE_12x1x1: cublasLtClusterShape_t = cublasLtClusterShape_t(24); +} +impl cublasLtClusterShape_t { + #[doc = " Let library pick cluster shape automatically"] + pub const CUBLASLT_CLUSTER_SHAPE_13x1x1: cublasLtClusterShape_t = cublasLtClusterShape_t(25); +} +impl cublasLtClusterShape_t { + #[doc = " Let library pick cluster shape automatically"] + pub const CUBLASLT_CLUSTER_SHAPE_14x1x1: cublasLtClusterShape_t = cublasLtClusterShape_t(26); +} +impl cublasLtClusterShape_t { + #[doc = " Let library pick cluster shape automatically"] + pub const CUBLASLT_CLUSTER_SHAPE_15x1x1: cublasLtClusterShape_t = cublasLtClusterShape_t(27); +} +impl cublasLtClusterShape_t { + #[doc = " Let library pick cluster shape automatically"] + pub const CUBLASLT_CLUSTER_SHAPE_3x2x1: cublasLtClusterShape_t = cublasLtClusterShape_t(28); +} +impl cublasLtClusterShape_t { + #[doc = " Let library pick cluster shape automatically"] + pub const CUBLASLT_CLUSTER_SHAPE_5x2x1: cublasLtClusterShape_t = cublasLtClusterShape_t(29); +} +impl cublasLtClusterShape_t { + #[doc = " Let library pick cluster shape automatically"] + pub const CUBLASLT_CLUSTER_SHAPE_6x2x1: cublasLtClusterShape_t = cublasLtClusterShape_t(30); +} +impl cublasLtClusterShape_t { + #[doc = " Let library pick cluster shape automatically"] + pub const CUBLASLT_CLUSTER_SHAPE_7x2x1: cublasLtClusterShape_t = cublasLtClusterShape_t(31); +} +impl cublasLtClusterShape_t { + #[doc = " Let library pick cluster shape automatically"] + pub const CUBLASLT_CLUSTER_SHAPE_1x3x1: cublasLtClusterShape_t = cublasLtClusterShape_t(32); +} +impl cublasLtClusterShape_t { + #[doc = " Let library pick cluster shape automatically"] + pub const CUBLASLT_CLUSTER_SHAPE_2x3x1: cublasLtClusterShape_t = cublasLtClusterShape_t(33); +} +impl cublasLtClusterShape_t { + #[doc = " Let library pick cluster shape automatically"] + pub const CUBLASLT_CLUSTER_SHAPE_3x3x1: cublasLtClusterShape_t = cublasLtClusterShape_t(34); +} +impl cublasLtClusterShape_t { + #[doc = " Let library pick cluster shape automatically"] + pub const CUBLASLT_CLUSTER_SHAPE_4x3x1: cublasLtClusterShape_t = cublasLtClusterShape_t(35); +} +impl cublasLtClusterShape_t { + #[doc = " Let library pick cluster shape automatically"] + pub const CUBLASLT_CLUSTER_SHAPE_5x3x1: cublasLtClusterShape_t = cublasLtClusterShape_t(36); +} +impl cublasLtClusterShape_t { + #[doc = " Let library pick cluster shape automatically"] + pub const CUBLASLT_CLUSTER_SHAPE_3x4x1: cublasLtClusterShape_t = cublasLtClusterShape_t(37); +} +impl cublasLtClusterShape_t { + #[doc = " Let library pick cluster shape automatically"] + pub const CUBLASLT_CLUSTER_SHAPE_1x5x1: cublasLtClusterShape_t = cublasLtClusterShape_t(38); +} +impl cublasLtClusterShape_t { + #[doc = " Let library pick cluster shape automatically"] + pub const CUBLASLT_CLUSTER_SHAPE_2x5x1: cublasLtClusterShape_t = cublasLtClusterShape_t(39); +} +impl cublasLtClusterShape_t { + #[doc = " Let library pick cluster shape automatically"] + pub const CUBLASLT_CLUSTER_SHAPE_3x5x1: cublasLtClusterShape_t = cublasLtClusterShape_t(40); +} +impl cublasLtClusterShape_t { + #[doc = " Let library pick cluster shape automatically"] + pub const CUBLASLT_CLUSTER_SHAPE_1x6x1: cublasLtClusterShape_t = cublasLtClusterShape_t(41); +} +impl cublasLtClusterShape_t { + #[doc = " Let library pick cluster shape automatically"] + pub const CUBLASLT_CLUSTER_SHAPE_2x6x1: cublasLtClusterShape_t = cublasLtClusterShape_t(42); +} +impl cublasLtClusterShape_t { + #[doc = " Let library pick cluster shape automatically"] + pub const CUBLASLT_CLUSTER_SHAPE_1x7x1: cublasLtClusterShape_t = cublasLtClusterShape_t(43); +} +impl cublasLtClusterShape_t { + #[doc = " Let library pick cluster shape automatically"] + pub const CUBLASLT_CLUSTER_SHAPE_2x7x1: cublasLtClusterShape_t = cublasLtClusterShape_t(44); +} +impl cublasLtClusterShape_t { + #[doc = " Let library pick cluster shape automatically"] + pub const CUBLASLT_CLUSTER_SHAPE_1x9x1: cublasLtClusterShape_t = cublasLtClusterShape_t(45); +} +impl cublasLtClusterShape_t { + #[doc = " Let library pick cluster shape automatically"] + pub const CUBLASLT_CLUSTER_SHAPE_1x10x1: cublasLtClusterShape_t = cublasLtClusterShape_t(46); +} +impl cublasLtClusterShape_t { + #[doc = " Let library pick cluster shape automatically"] + pub const CUBLASLT_CLUSTER_SHAPE_1x11x1: cublasLtClusterShape_t = cublasLtClusterShape_t(47); +} +impl cublasLtClusterShape_t { + #[doc = " Let library pick cluster shape automatically"] + pub const CUBLASLT_CLUSTER_SHAPE_1x12x1: cublasLtClusterShape_t = cublasLtClusterShape_t(48); +} +impl cublasLtClusterShape_t { + #[doc = " Let library pick cluster shape automatically"] + pub const CUBLASLT_CLUSTER_SHAPE_1x13x1: cublasLtClusterShape_t = cublasLtClusterShape_t(49); +} +impl cublasLtClusterShape_t { + #[doc = " Let library pick cluster shape automatically"] + pub const CUBLASLT_CLUSTER_SHAPE_1x14x1: cublasLtClusterShape_t = cublasLtClusterShape_t(50); +} +impl cublasLtClusterShape_t { + #[doc = " Let library pick cluster shape automatically"] + pub const CUBLASLT_CLUSTER_SHAPE_1x15x1: cublasLtClusterShape_t = cublasLtClusterShape_t(51); +} +impl cublasLtClusterShape_t { + #[doc = " Let library pick cluster shape automatically"] + pub const CUBLASLT_CLUSTER_SHAPE_END: cublasLtClusterShape_t = cublasLtClusterShape_t(52); +} +#[repr(transparent)] +#[doc = " Thread Block Cluster size\n\n Typically dimensioned similar to cublasLtMatmulTile_t, with the third coordinate unused at this time."] +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct cublasLtClusterShape_t(pub ::std::os::raw::c_uint); +impl cublasLtMatmulInnerShape_t { + pub const CUBLASLT_MATMUL_INNER_SHAPE_UNDEFINED: cublasLtMatmulInnerShape_t = + cublasLtMatmulInnerShape_t(0); +} +impl cublasLtMatmulInnerShape_t { + pub const CUBLASLT_MATMUL_INNER_SHAPE_MMA884: cublasLtMatmulInnerShape_t = + cublasLtMatmulInnerShape_t(1); +} +impl cublasLtMatmulInnerShape_t { + pub const CUBLASLT_MATMUL_INNER_SHAPE_MMA1684: cublasLtMatmulInnerShape_t = + cublasLtMatmulInnerShape_t(2); +} +impl cublasLtMatmulInnerShape_t { + pub const CUBLASLT_MATMUL_INNER_SHAPE_MMA1688: cublasLtMatmulInnerShape_t = + cublasLtMatmulInnerShape_t(3); +} +impl cublasLtMatmulInnerShape_t { + pub const CUBLASLT_MATMUL_INNER_SHAPE_MMA16816: cublasLtMatmulInnerShape_t = + cublasLtMatmulInnerShape_t(4); +} +impl cublasLtMatmulInnerShape_t { + pub const CUBLASLT_MATMUL_INNER_SHAPE_END: cublasLtMatmulInnerShape_t = + cublasLtMatmulInnerShape_t(5); +} +#[repr(transparent)] +#[doc = " Inner size of the kernel\n\n Represents various aspects of internal kernel design, that don't impact CUDA grid size but may have other more subtle\n effects.\n"] +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct cublasLtMatmulInnerShape_t(pub ::std::os::raw::c_uint); +impl cublasLtPointerMode_t { + #[doc = " matches CUBLAS_POINTER_MODE_HOST, pointer targets a single value host memory"] + pub const CUBLASLT_POINTER_MODE_HOST: cublasLtPointerMode_t = cublasLtPointerMode_t(0); +} +impl cublasLtPointerMode_t { + #[doc = " matches CUBLAS_POINTER_MODE_DEVICE, pointer targets a single value device memory"] + pub const CUBLASLT_POINTER_MODE_DEVICE: cublasLtPointerMode_t = cublasLtPointerMode_t(1); +} +impl cublasLtPointerMode_t { + #[doc = " pointer targets an array in device memory"] + pub const CUBLASLT_POINTER_MODE_DEVICE_VECTOR: cublasLtPointerMode_t = cublasLtPointerMode_t(2); +} +impl cublasLtPointerMode_t { + #[doc = " alpha pointer targets an array in device memory, beta is zero. Note:\nCUBLASLT_MATMUL_DESC_ALPHA_VECTOR_BATCH_STRIDE is not supported, must be 0."] + pub const CUBLASLT_POINTER_MODE_ALPHA_DEVICE_VECTOR_BETA_ZERO: cublasLtPointerMode_t = + cublasLtPointerMode_t(3); +} +impl cublasLtPointerMode_t { + #[doc = " alpha pointer targets an array in device memory, beta is a single value in host memory."] + pub const CUBLASLT_POINTER_MODE_ALPHA_DEVICE_VECTOR_BETA_HOST: cublasLtPointerMode_t = + cublasLtPointerMode_t(4); +} +#[repr(transparent)] +#[doc = " Pointer mode to use for alpha/beta"] +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct cublasLtPointerMode_t(pub ::std::os::raw::c_uint); +impl cublasLtPointerModeMask_t { + #[doc = " no initial filtering is performed when querying pointer mode capabilities, will use gemm pointer mode defined in\noperation description"] + pub const CUBLASLT_POINTER_MODE_MASK_NO_FILTERING: cublasLtPointerModeMask_t = + cublasLtPointerModeMask_t(0); +} +impl cublasLtPointerModeMask_t { + #[doc = " see CUBLASLT_POINTER_MODE_HOST"] + pub const CUBLASLT_POINTER_MODE_MASK_HOST: cublasLtPointerModeMask_t = + cublasLtPointerModeMask_t(1); +} +impl cublasLtPointerModeMask_t { + #[doc = " see CUBLASLT_POINTER_MODE_DEVICE"] + pub const CUBLASLT_POINTER_MODE_MASK_DEVICE: cublasLtPointerModeMask_t = + cublasLtPointerModeMask_t(2); +} +impl cublasLtPointerModeMask_t { + #[doc = " see CUBLASLT_POINTER_MODE_DEVICE_VECTOR"] + pub const CUBLASLT_POINTER_MODE_MASK_DEVICE_VECTOR: cublasLtPointerModeMask_t = + cublasLtPointerModeMask_t(4); +} +impl cublasLtPointerModeMask_t { + #[doc = " see CUBLASLT_POINTER_MODE_ALPHA_DEVICE_VECTOR_BETA_ZERO"] + pub const CUBLASLT_POINTER_MODE_MASK_ALPHA_DEVICE_VECTOR_BETA_ZERO: cublasLtPointerModeMask_t = + cublasLtPointerModeMask_t(8); +} +impl cublasLtPointerModeMask_t { + #[doc = " see CUBLASLT_POINTER_MODE_ALPHA_DEVICE_VECTOR_BETA_HOST"] + pub const CUBLASLT_POINTER_MODE_MASK_ALPHA_DEVICE_VECTOR_BETA_HOST: cublasLtPointerModeMask_t = + cublasLtPointerModeMask_t(16); +} +#[repr(transparent)] +#[doc = " Mask to define and query pointer mode capability"] +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct cublasLtPointerModeMask_t(pub ::std::os::raw::c_uint); +pub type cublasLtNumericalImplFlags_t = u64; + +#[doc = " Execute matrix multiplication (D = alpha * op(A) * op(B) + beta * C).\n\n \\retval CUBLAS_STATUS_NOT_INITIALIZED if cuBLASLt handle has not been initialized\n \\retval CUBLAS_STATUS_INVALID_VALUE if parameters are in conflict or in an impossible configuration; e.g.\n when workspaceSizeInBytes is less than workspace required by configured\n algo\n \\retval CUBLAS_STATUS_NOT_SUPPORTED if current implementation on selected device doesn't support configured\n operation\n \\retval CUBLAS_STATUS_ARCH_MISMATCH if configured operation cannot be run using selected device\n \\retval CUBLAS_STATUS_EXECUTION_FAILED if cuda reported execution error from the device\n \\retval CUBLAS_STATUS_SUCCESS if the operation completed successfully"] +#[no_mangle] +pub unsafe extern "system" fn cublasLtMatmul( + lightHandle: cublasLtHandle_t, + computeDesc: cublasLtMatmulDesc_t, + alpha: *const ::std::os::raw::c_void, + A: *const ::std::os::raw::c_void, + Adesc: cublasLtMatrixLayout_t, + B: *const ::std::os::raw::c_void, + Bdesc: cublasLtMatrixLayout_t, + beta: *const ::std::os::raw::c_void, + C: *const ::std::os::raw::c_void, + Cdesc: cublasLtMatrixLayout_t, + D: *mut ::std::os::raw::c_void, + Ddesc: cublasLtMatrixLayout_t, + algo: *const cublasLtMatmulAlgo_t, + workspace: *mut ::std::os::raw::c_void, + workspaceSizeInBytes: usize, + stream: cudaStream_t, +) -> cublasStatus_t { + crate::matmul( + lightHandle, + computeDesc, + alpha, + A, + Adesc, + B, + Bdesc, + beta, + C, + Cdesc, + D, + Ddesc, + algo, + workspace, + workspaceSizeInBytes, + stream, + ) +} + +#[doc = " Matrix layout conversion helper (C = alpha * op(A) + beta * op(B))\n\n Can be used to change memory order of data or to scale and shift the values.\n\n \\retval CUBLAS_STATUS_NOT_INITIALIZED if cuBLASLt handle has not been initialized\n \\retval CUBLAS_STATUS_INVALID_VALUE if parameters are in conflict or in an impossible configuration; e.g.\n when A is not NULL, but Adesc is NULL\n \\retval CUBLAS_STATUS_NOT_SUPPORTED if current implementation on selected device doesn't support configured\n operation\n \\retval CUBLAS_STATUS_ARCH_MISMATCH if configured operation cannot be run using selected device\n \\retval CUBLAS_STATUS_EXECUTION_FAILED if cuda reported execution error from the device\n \\retval CUBLAS_STATUS_SUCCESS if the operation completed successfully"] +#[no_mangle] +pub unsafe extern "system" fn cublasLtMatrixTransform( + lightHandle: cublasLtHandle_t, + transformDesc: cublasLtMatrixTransformDesc_t, + alpha: *const ::std::os::raw::c_void, + A: *const ::std::os::raw::c_void, + Adesc: cublasLtMatrixLayout_t, + beta: *const ::std::os::raw::c_void, + B: *const ::std::os::raw::c_void, + Bdesc: cublasLtMatrixLayout_t, + C: *mut ::std::os::raw::c_void, + Cdesc: cublasLtMatrixLayout_t, + stream: cudaStream_t, +) -> cublasStatus_t { + crate::unsupported() +} +impl cublasLtOrder_t { + #[doc = " Column-major\n\n Leading dimension is the stride (in elements) to the beginning of next column in memory."] + pub const CUBLASLT_ORDER_COL: cublasLtOrder_t = cublasLtOrder_t(0); +} +impl cublasLtOrder_t { + #[doc = " Row major\n\n Leading dimension is the stride (in elements) to the beginning of next row in memory."] + pub const CUBLASLT_ORDER_ROW: cublasLtOrder_t = cublasLtOrder_t(1); +} +impl cublasLtOrder_t { + #[doc = " Column-major ordered tiles of 32 columns.\n\n Leading dimension is the stride (in elements) to the beginning of next group of 32-columns. E.g. if matrix has 33\n columns and 2 rows, ld must be at least (32) * 2 = 64."] + pub const CUBLASLT_ORDER_COL32: cublasLtOrder_t = cublasLtOrder_t(2); +} +impl cublasLtOrder_t { + #[doc = " Column-major ordered tiles of composite tiles with total 32 columns and 8 rows, tile composed of interleaved\n inner tiles of 4 columns within 4 even or odd rows in an alternating pattern.\n\n Leading dimension is the stride (in elements) to the beginning of the first 32 column x 8 row tile for the next\n 32-wide group of columns. E.g. if matrix has 33 columns and 1 row, ld must be at least (32 * 8) * 1 = 256."] + pub const CUBLASLT_ORDER_COL4_4R2_8C: cublasLtOrder_t = cublasLtOrder_t(3); +} +impl cublasLtOrder_t { + #[doc = " Column-major ordered tiles of composite tiles with total 32 columns ands 32 rows.\n Element offset within the tile is calculated as (((row%8)/2*4+row/8)*2+row%2)*32+col.\n\n Leading dimension is the stride (in elements) to the beginning of the first 32 column x 32 row tile for the next\n 32-wide group of columns. E.g. if matrix has 33 columns and 1 row, ld must be at least (32*32)*1 = 1024."] + pub const CUBLASLT_ORDER_COL32_2R_4R4: cublasLtOrder_t = cublasLtOrder_t(4); +} +#[repr(transparent)] +#[doc = " Enum for data ordering"] +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct cublasLtOrder_t(pub ::std::os::raw::c_uint); +impl cublasLtMatrixLayoutAttribute_t { + #[doc = " Data type, see cudaDataType.\n\n uint32_t"] + pub const CUBLASLT_MATRIX_LAYOUT_TYPE: cublasLtMatrixLayoutAttribute_t = + cublasLtMatrixLayoutAttribute_t(0); +} +impl cublasLtMatrixLayoutAttribute_t { + #[doc = " Memory order of the data, see cublasLtOrder_t.\n\n int32_t, default: CUBLASLT_ORDER_COL"] + pub const CUBLASLT_MATRIX_LAYOUT_ORDER: cublasLtMatrixLayoutAttribute_t = + cublasLtMatrixLayoutAttribute_t(1); +} +impl cublasLtMatrixLayoutAttribute_t { + #[doc = " Number of rows.\n\n Usually only values that can be expressed as int32_t are supported.\n\n uint64_t"] + pub const CUBLASLT_MATRIX_LAYOUT_ROWS: cublasLtMatrixLayoutAttribute_t = + cublasLtMatrixLayoutAttribute_t(2); +} +impl cublasLtMatrixLayoutAttribute_t { + #[doc = " Number of columns.\n\n Usually only values that can be expressed as int32_t are supported.\n\n uint64_t"] + pub const CUBLASLT_MATRIX_LAYOUT_COLS: cublasLtMatrixLayoutAttribute_t = + cublasLtMatrixLayoutAttribute_t(3); +} +impl cublasLtMatrixLayoutAttribute_t { + #[doc = " Matrix leading dimension.\n\n For CUBLASLT_ORDER_COL this is stride (in elements) of matrix column, for more details and documentation for\n other memory orders see documentation for cublasLtOrder_t values.\n\n Currently only non-negative values are supported, must be large enough so that matrix memory locations are not\n overlapping (e.g. greater or equal to CUBLASLT_MATRIX_LAYOUT_ROWS in case of CUBLASLT_ORDER_COL).\n\n int64_t;"] + pub const CUBLASLT_MATRIX_LAYOUT_LD: cublasLtMatrixLayoutAttribute_t = + cublasLtMatrixLayoutAttribute_t(4); +} +impl cublasLtMatrixLayoutAttribute_t { + #[doc = " Number of matmul operations to perform in the batch.\n\n See also CUBLASLT_ALGO_CAP_STRIDED_BATCH_SUPPORT\n\n int32_t, default: 1"] + pub const CUBLASLT_MATRIX_LAYOUT_BATCH_COUNT: cublasLtMatrixLayoutAttribute_t = + cublasLtMatrixLayoutAttribute_t(5); +} +impl cublasLtMatrixLayoutAttribute_t { + #[doc = " Stride (in elements) to the next matrix for strided batch operation.\n\n When matrix type is planar-complex (CUBLASLT_MATRIX_LAYOUT_PLANE_OFFSET != 0), batch stride\n is interpreted by cublasLtMatmul() in number of real valued sub-elements. E.g. for data of type CUDA_C_16F,\n offset of 1024B is encoded as a stride of value 512 (since each element of the real and imaginary matrices\n is a 2B (16bit) floating point type).\n\n NOTE: A bug in cublasLtMatrixTransform() causes it to interpret the batch stride for a planar-complex matrix\n as if it was specified in number of complex elements. Therefore an offset of 1024B must be encoded as stride\n value 256 when calling cublasLtMatrixTransform() (each complex element is 4B with real and imaginary values 2B\n each). This behavior is expected to be corrected in the next major cuBLAS version.\n\n int64_t, default: 0"] + pub const CUBLASLT_MATRIX_LAYOUT_STRIDED_BATCH_OFFSET: cublasLtMatrixLayoutAttribute_t = + cublasLtMatrixLayoutAttribute_t(6); +} +impl cublasLtMatrixLayoutAttribute_t { + #[doc = " Stride (in bytes) to the imaginary plane for planar complex layout.\n\n int64_t, default: 0 - 0 means that layout is regular (real and imaginary parts of complex numbers are interleaved\n in memory in each element)"] + pub const CUBLASLT_MATRIX_LAYOUT_PLANE_OFFSET: cublasLtMatrixLayoutAttribute_t = + cublasLtMatrixLayoutAttribute_t(7); +} +#[repr(transparent)] +#[doc = " Attributes of memory layout"] +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct cublasLtMatrixLayoutAttribute_t(pub ::std::os::raw::c_uint); + +#[doc = " Internal. Do not use directly."] +#[no_mangle] +pub unsafe extern "system" fn cublasLtMatrixLayoutInit_internal( + matLayout: cublasLtMatrixLayout_t, + size: usize, + type_: cudaDataType, + rows: u64, + cols: u64, + ld: i64, +) -> cublasStatus_t { + crate::unsupported() +} + +#[doc = " Create new matrix layout descriptor.\n\n \\retval CUBLAS_STATUS_ALLOC_FAILED if memory could not be allocated\n \\retval CUBLAS_STATUS_SUCCESS if desciptor was created successfully"] +#[no_mangle] +pub unsafe extern "system" fn cublasLtMatrixLayoutCreate( + matLayout: *mut cublasLtMatrixLayout_t, + type_: cudaDataType, + rows: u64, + cols: u64, + ld: i64, +) -> cublasStatus_t { + crate::matrix_layout_create(matLayout, type_, rows, cols, ld) +} + +#[doc = " Destroy matrix layout descriptor.\n\n \\retval CUBLAS_STATUS_SUCCESS if operation was successful"] +#[no_mangle] +pub unsafe extern "system" fn cublasLtMatrixLayoutDestroy( + matLayout: cublasLtMatrixLayout_t, +) -> cublasStatus_t { + crate::matrix_layout_destroy(matLayout) +} + +#[doc = " Set matrix layout descriptor attribute.\n\n \\param[in] matLayout The descriptor\n \\param[in] attr The attribute\n \\param[in] buf memory address containing the new value\n \\param[in] sizeInBytes size of buf buffer for verification (in bytes)\n\n \\retval CUBLAS_STATUS_INVALID_VALUE if buf is NULL or sizeInBytes doesn't match size of internal storage for\n selected attribute\n \\retval CUBLAS_STATUS_SUCCESS if attribute was set successfully"] +#[no_mangle] +pub unsafe extern "system" fn cublasLtMatrixLayoutSetAttribute( + matLayout: cublasLtMatrixLayout_t, + attr: cublasLtMatrixLayoutAttribute_t, + buf: *const ::std::os::raw::c_void, + sizeInBytes: usize, +) -> cublasStatus_t { + crate::matrix_layout_set_attribute(matLayout, attr, buf, sizeInBytes) +} + +#[doc = " Get matrix layout descriptor attribute.\n\n \\param[in] matLayout The descriptor\n \\param[in] attr The attribute\n \\param[out] buf memory address containing the new value\n \\param[in] sizeInBytes size of buf buffer for verification (in bytes)\n \\param[out] sizeWritten only valid when return value is CUBLAS_STATUS_SUCCESS. If sizeInBytes is non-zero: number of\n bytes actually written, if sizeInBytes is 0: number of bytes needed to write full contents\n\n \\retval CUBLAS_STATUS_INVALID_VALUE if sizeInBytes is 0 and sizeWritten is NULL, or if sizeInBytes is non-zero\n and buf is NULL or sizeInBytes doesn't match size of internal storage for\n selected attribute\n \\retval CUBLAS_STATUS_SUCCESS if attribute's value was successfully written to user memory"] +#[no_mangle] +pub unsafe extern "system" fn cublasLtMatrixLayoutGetAttribute( + matLayout: cublasLtMatrixLayout_t, + attr: cublasLtMatrixLayoutAttribute_t, + buf: *mut ::std::os::raw::c_void, + sizeInBytes: usize, + sizeWritten: *mut usize, +) -> cublasStatus_t { + crate::unsupported() +} +impl cublasLtMatmulDescAttributes_t { + #[doc = " Compute type, see cudaDataType. Defines data type used for multiply and accumulate operations and the\n accumulator during matrix multiplication.\n\n int32_t"] + pub const CUBLASLT_MATMUL_DESC_COMPUTE_TYPE: cublasLtMatmulDescAttributes_t = + cublasLtMatmulDescAttributes_t(0); +} +impl cublasLtMatmulDescAttributes_t { + #[doc = " Scale type, see cudaDataType. Defines data type of alpha and beta. Accumulator and value from matrix C are\n typically converted to scale type before final scaling. Value is then converted from scale type to type of matrix\n D before being stored in memory.\n\n int32_t, default: same as CUBLASLT_MATMUL_DESC_COMPUTE_TYPE"] + pub const CUBLASLT_MATMUL_DESC_SCALE_TYPE: cublasLtMatmulDescAttributes_t = + cublasLtMatmulDescAttributes_t(1); +} +impl cublasLtMatmulDescAttributes_t { + #[doc = " Pointer mode of alpha and beta, see cublasLtPointerMode_t. When CUBLASLT_POINTER_MODE_DEVICE_VECTOR is in use,\n alpha/beta vector lenghts must match number of output matrix rows.\n\n int32_t, default: CUBLASLT_POINTER_MODE_HOST"] + pub const CUBLASLT_MATMUL_DESC_POINTER_MODE: cublasLtMatmulDescAttributes_t = + cublasLtMatmulDescAttributes_t(2); +} +impl cublasLtMatmulDescAttributes_t { + #[doc = " Transform of matrix A, see cublasOperation_t.\n\n int32_t, default: CUBLAS_OP_N"] + pub const CUBLASLT_MATMUL_DESC_TRANSA: cublasLtMatmulDescAttributes_t = + cublasLtMatmulDescAttributes_t(3); +} +impl cublasLtMatmulDescAttributes_t { + #[doc = " Transform of matrix B, see cublasOperation_t.\n\n int32_t, default: CUBLAS_OP_N"] + pub const CUBLASLT_MATMUL_DESC_TRANSB: cublasLtMatmulDescAttributes_t = + cublasLtMatmulDescAttributes_t(4); +} +impl cublasLtMatmulDescAttributes_t { + #[doc = " Transform of matrix C, see cublasOperation_t.\n\n Currently only CUBLAS_OP_N is supported.\n\n int32_t, default: CUBLAS_OP_N"] + pub const CUBLASLT_MATMUL_DESC_TRANSC: cublasLtMatmulDescAttributes_t = + cublasLtMatmulDescAttributes_t(5); +} +impl cublasLtMatmulDescAttributes_t { + #[doc = " Matrix fill mode, see cublasFillMode_t.\n\n int32_t, default: CUBLAS_FILL_MODE_FULL"] + pub const CUBLASLT_MATMUL_DESC_FILL_MODE: cublasLtMatmulDescAttributes_t = + cublasLtMatmulDescAttributes_t(6); +} +impl cublasLtMatmulDescAttributes_t { + #[doc = " Epilogue function, see cublasLtEpilogue_t.\n\n uint32_t, default: CUBLASLT_EPILOGUE_DEFAULT"] + pub const CUBLASLT_MATMUL_DESC_EPILOGUE: cublasLtMatmulDescAttributes_t = + cublasLtMatmulDescAttributes_t(7); +} +impl cublasLtMatmulDescAttributes_t { + #[doc = " Bias or bias gradient vector pointer in the device memory.\n\n Bias case. See CUBLASLT_EPILOGUE_BIAS.\n For bias data type see CUBLASLT_MATMUL_DESC_BIAS_DATA_TYPE.\n\n Bias vector length must match matrix D rows count.\n\n Bias gradient case. See CUBLASLT_EPILOGUE_DRELU_BGRAD and CUBLASLT_EPILOGUE_DGELU_BGRAD.\n Bias gradient vector elements are the same type as the output elements\n (Ctype) with the exception of IMMA kernels (see above).\n\n Routines that don't dereference this pointer, like cublasLtMatmulAlgoGetHeuristic()\n depend on its value to determine expected pointer alignment.\n\n Bias case: const void *, default: NULL\n Bias gradient case: void *, default: NULL"] + pub const CUBLASLT_MATMUL_DESC_BIAS_POINTER: cublasLtMatmulDescAttributes_t = + cublasLtMatmulDescAttributes_t(8); +} +impl cublasLtMatmulDescAttributes_t { + #[doc = " Batch stride for bias or bias gradient vector.\n\n Used together with CUBLASLT_MATMUL_DESC_BIAS_POINTER when matrix D's CUBLASLT_MATRIX_LAYOUT_BATCH_COUNT > 1.\n\n int64_t, default: 0"] + pub const CUBLASLT_MATMUL_DESC_BIAS_BATCH_STRIDE: cublasLtMatmulDescAttributes_t = + cublasLtMatmulDescAttributes_t(10); +} +impl cublasLtMatmulDescAttributes_t { + #[doc = " Pointer for epilogue auxiliary buffer.\n\n - Output vector for ReLu bit-mask in forward pass when CUBLASLT_EPILOGUE_RELU_AUX\n or CUBLASLT_EPILOGUE_RELU_AUX_BIAS epilogue is used.\n - Input vector for ReLu bit-mask in backward pass when\n CUBLASLT_EPILOGUE_DRELU_BGRAD epilogue is used.\n\n - Output of GELU input matrix in forward pass when\n CUBLASLT_EPILOGUE_GELU_AUX_BIAS epilogue is used.\n - Input of GELU input matrix for backward pass when\n CUBLASLT_EPILOGUE_DGELU_BGRAD epilogue is used.\n\n For aux data type see CUBLASLT_MATMUL_DESC_EPILOGUE_AUX_DATA_TYPE.\n\n Routines that don't dereference this pointer, like cublasLtMatmulAlgoGetHeuristic()\n depend on its value to determine expected pointer alignment.\n\n Requires setting CUBLASLT_MATMUL_DESC_EPILOGUE_AUX_LD attribute.\n\n Forward pass: void *, default: NULL\n Backward pass: const void *, default: NULL"] + pub const CUBLASLT_MATMUL_DESC_EPILOGUE_AUX_POINTER: cublasLtMatmulDescAttributes_t = + cublasLtMatmulDescAttributes_t(11); +} +impl cublasLtMatmulDescAttributes_t { + #[doc = " Leading dimension for epilogue auxiliary buffer.\n\n - ReLu bit-mask matrix leading dimension in elements (i.e. bits)\n when CUBLASLT_EPILOGUE_RELU_AUX, CUBLASLT_EPILOGUE_RELU_AUX_BIAS or CUBLASLT_EPILOGUE_DRELU_BGRAD epilogue is\n used. Must be divisible by 128 and be no less than the number of rows in the output matrix.\n\n - GELU input matrix leading dimension in elements\n when CUBLASLT_EPILOGUE_GELU_AUX_BIAS or CUBLASLT_EPILOGUE_DGELU_BGRAD epilogue used.\n Must be divisible by 8 and be no less than the number of rows in the output matrix.\n\n int64_t, default: 0"] + pub const CUBLASLT_MATMUL_DESC_EPILOGUE_AUX_LD: cublasLtMatmulDescAttributes_t = + cublasLtMatmulDescAttributes_t(12); +} +impl cublasLtMatmulDescAttributes_t { + #[doc = " Batch stride for epilogue auxiliary buffer.\n\n - ReLu bit-mask matrix batch stride in elements (i.e. bits)\n when CUBLASLT_EPILOGUE_RELU_AUX, CUBLASLT_EPILOGUE_RELU_AUX_BIAS or CUBLASLT_EPILOGUE_DRELU_BGRAD epilogue is\n used. Must be divisible by 128.\n\n - GELU input matrix batch stride in elements\n when CUBLASLT_EPILOGUE_GELU_AUX_BIAS or CUBLASLT_EPILOGUE_DGELU_BGRAD epilogue used.\n Must be divisible by 8.\n\n int64_t, default: 0"] + pub const CUBLASLT_MATMUL_DESC_EPILOGUE_AUX_BATCH_STRIDE: cublasLtMatmulDescAttributes_t = + cublasLtMatmulDescAttributes_t(13); +} +impl cublasLtMatmulDescAttributes_t { + #[doc = " Batch stride for alpha vector.\n\n Used together with CUBLASLT_POINTER_MODE_ALPHA_DEVICE_VECTOR_BETA_HOST when matrix D's\n CUBLASLT_MATRIX_LAYOUT_BATCH_COUNT > 1. If CUBLASLT_POINTER_MODE_ALPHA_DEVICE_VECTOR_BETA_ZERO is set then\n CUBLASLT_MATMUL_DESC_ALPHA_VECTOR_BATCH_STRIDE must be set to 0 as this mode doesnt supported batched alpha vector.\n\n int64_t, default: 0"] + pub const CUBLASLT_MATMUL_DESC_ALPHA_VECTOR_BATCH_STRIDE: cublasLtMatmulDescAttributes_t = + cublasLtMatmulDescAttributes_t(14); +} +impl cublasLtMatmulDescAttributes_t { + #[doc = " Number of SMs to target for parallel execution. Optimizes heuristics for execution on a different number of SMs\n when user expects a concurrent stream to be using some of the device resources.\n\n int32_t, default: 0 - use the number reported by the device."] + pub const CUBLASLT_MATMUL_DESC_SM_COUNT_TARGET: cublasLtMatmulDescAttributes_t = + cublasLtMatmulDescAttributes_t(15); +} +impl cublasLtMatmulDescAttributes_t { + #[doc = " Device pointer to the scale factor value that converts data in matrix A to the compute data type range.\n\n The scaling factor value must have the same type as the compute type.\n\n If not specified, or set to NULL, the scaling factor is assumed to be 1.\n\n If set for an unsupported matrix data, scale, and compute type combination, calling cublasLtMatmul()\n will return CUBLAS_INVALID_VALUE.\n\n const void *, default: NULL"] + pub const CUBLASLT_MATMUL_DESC_A_SCALE_POINTER: cublasLtMatmulDescAttributes_t = + cublasLtMatmulDescAttributes_t(17); +} +impl cublasLtMatmulDescAttributes_t { + #[doc = " Device pointer to the scale factor value to convert data in matrix B to compute data type range.\n\n The scaling factor value must have the same type as the compute type.\n\n If not specified, or set to NULL, the scaling factor is assumed to be 1.\n\n If set for an unsupported matrix data, scale, and compute type combination, calling cublasLtMatmul()\n will return CUBLAS_INVALID_VALUE.\n\n const void *, default: NULL"] + pub const CUBLASLT_MATMUL_DESC_B_SCALE_POINTER: cublasLtMatmulDescAttributes_t = + cublasLtMatmulDescAttributes_t(18); +} +impl cublasLtMatmulDescAttributes_t { + #[doc = " Device pointer to the scale factor value to convert data in matrix C to compute data type range.\n\n The scaling factor value must have the same type as the compute type.\n\n If not specified, or set to NULL, the scaling factor is assumed to be 1.\n\n If set for an unsupported matrix data, scale, and compute type combination, calling cublasLtMatmul()\n will return CUBLAS_INVALID_VALUE.\n\n const void *, default: NULL"] + pub const CUBLASLT_MATMUL_DESC_C_SCALE_POINTER: cublasLtMatmulDescAttributes_t = + cublasLtMatmulDescAttributes_t(19); +} +impl cublasLtMatmulDescAttributes_t { + #[doc = " Device pointer to the scale factor value to convert data in matrix D to compute data type range.\n\n The scaling factor value must have the same type as the compute type.\n\n If not specified, or set to NULL, the scaling factor is assumed to be 1.\n\n If set for an unsupported matrix data, scale, and compute type combination, calling cublasLtMatmul()\n will return CUBLAS_INVALID_VALUE.\n\n const void *, default: NULL"] + pub const CUBLASLT_MATMUL_DESC_D_SCALE_POINTER: cublasLtMatmulDescAttributes_t = + cublasLtMatmulDescAttributes_t(20); +} +impl cublasLtMatmulDescAttributes_t { + #[doc = " Device pointer to the memory location that on completion will be set to the maximum of absolute values in the\n output matrix.\n\n The computed value has the same type as the compute type.\n\n If not specified or set to NULL, the maximum absolute value is not computed. If set for an unsupported matrix\n data, scale, and compute type combination, calling cublasLtMatmul() will return CUBLAS_INVALID_VALUE.\n\n void *, default: NULL"] + pub const CUBLASLT_MATMUL_DESC_AMAX_D_POINTER: cublasLtMatmulDescAttributes_t = + cublasLtMatmulDescAttributes_t(21); +} +impl cublasLtMatmulDescAttributes_t { + #[doc = " Type of the data to be stored to the memory pointed to by CUBLASLT_MATMUL_DESC_EPILOGUE_AUX_POINTER.\n\n If unset, the data type defaults to the type of elements of the output matrix with some exceptions, see details\n below.\n\n ReLu uses a bit-mask.\n\n GELU input matrix elements type is the same as the type of elements of\n the output matrix with some exceptions, see details below.\n\n For fp8 kernels with output type CUDA_R_8F_E4M3 the aux data type can be CUDA_R_8F_E4M3 or CUDA_R_16F with some\n restrictions. See https://docs.nvidia.com/cuda/cublas/index.html#cublasLtMatmulDescAttributes_t for more details.\n\n If set for an unsupported matrix data, scale, and compute type combination, calling cublasLtMatmul()\n will return CUBLAS_INVALID_VALUE.\n\n int32_t based on cudaDataType, default: -1"] + pub const CUBLASLT_MATMUL_DESC_EPILOGUE_AUX_DATA_TYPE: cublasLtMatmulDescAttributes_t = + cublasLtMatmulDescAttributes_t(22); +} +impl cublasLtMatmulDescAttributes_t { + #[doc = " Device pointer to the scaling factor value to convert results from compute type data range to storage\n data range in the auxiliary matrix that is set via CUBLASLT_MATMUL_DESC_EPILOGUE_AUX_POINTER.\n\n The scaling factor value must have the same type as the compute type.\n\n If not specified, or set to NULL, the scaling factor is assumed to be 1. If set for an unsupported matrix data,\n scale, and compute type combination, calling cublasLtMatmul() will return CUBLAS_INVALID_VALUE.\n\n void *, default: NULL"] + pub const CUBLASLT_MATMUL_DESC_EPILOGUE_AUX_SCALE_POINTER: cublasLtMatmulDescAttributes_t = + cublasLtMatmulDescAttributes_t(23); +} +impl cublasLtMatmulDescAttributes_t { + #[doc = " Device pointer to the memory location that on completion will be set to the maximum of absolute values in the\n buffer that is set via CUBLASLT_MATMUL_DESC_EPILOGUE_AUX_POINTER.\n\n The computed value has the same type as the compute type.\n\n If not specified or set to NULL, the maximum absolute value is not computed. If set for an unsupported matrix\n data, scale, and compute type combination, calling cublasLtMatmul() will return CUBLAS_INVALID_VALUE.\n\n void *, default: NULL"] + pub const CUBLASLT_MATMUL_DESC_EPILOGUE_AUX_AMAX_POINTER: cublasLtMatmulDescAttributes_t = + cublasLtMatmulDescAttributes_t(24); +} +impl cublasLtMatmulDescAttributes_t { + #[doc = " Flag for managing fp8 fast accumulation mode.\n When enabled, problem execution might be faster but at the cost of lower accuracy because intermediate results\n will not periodically be promoted to a higher precision.\n\n int8_t, default: 0 - fast accumulation mode is disabled."] + pub const CUBLASLT_MATMUL_DESC_FAST_ACCUM: cublasLtMatmulDescAttributes_t = + cublasLtMatmulDescAttributes_t(25); +} +impl cublasLtMatmulDescAttributes_t { + #[doc = " Type of bias or bias gradient vector in the device memory.\n\n Bias case: see CUBLASLT_EPILOGUE_BIAS.\n\n Bias vector elements are the same type as the elements of output matrix (Dtype) with the following exceptions:\n - IMMA kernels with computeType=CUDA_R_32I and Ctype=CUDA_R_8I where the bias vector elements\n are the same type as alpha, beta (CUBLASLT_MATMUL_DESC_SCALE_TYPE=CUDA_R_32F)\n - fp8 kernels with an output type of CUDA_R_32F, CUDA_R_8F_E4M3 or CUDA_R_8F_E5M2, See\n https://docs.nvidia.com/cuda/cublas/index.html#cublasLtMatmul for details.\n\n int32_t based on cudaDataType, default: -1"] + pub const CUBLASLT_MATMUL_DESC_BIAS_DATA_TYPE: cublasLtMatmulDescAttributes_t = + cublasLtMatmulDescAttributes_t(26); +} +#[repr(transparent)] +#[doc = " Matmul descriptor attributes to define details of the operation."] +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct cublasLtMatmulDescAttributes_t(pub ::std::os::raw::c_uint); + +#[doc = " Internal. Do not use directly."] +#[no_mangle] +pub unsafe extern "system" fn cublasLtMatmulDescInit_internal( + matmulDesc: cublasLtMatmulDesc_t, + size: usize, + computeType: cublasComputeType_t, + scaleType: cudaDataType_t, +) -> cublasStatus_t { + crate::unsupported() +} + +#[doc = " Create new matmul operation descriptor.\n\n \\retval CUBLAS_STATUS_ALLOC_FAILED if memory could not be allocated\n \\retval CUBLAS_STATUS_SUCCESS if desciptor was created successfully"] +#[no_mangle] +pub unsafe extern "system" fn cublasLtMatmulDescCreate( + matmulDesc: *mut cublasLtMatmulDesc_t, + computeType: cublasComputeType_t, + scaleType: cudaDataType_t, +) -> cublasStatus_t { + crate::matmul_desc_create(matmulDesc, computeType, scaleType) +} + +#[doc = " Destroy matmul operation descriptor.\n\n \\retval CUBLAS_STATUS_SUCCESS if operation was successful"] +#[no_mangle] +pub unsafe extern "system" fn cublasLtMatmulDescDestroy( + matmulDesc: cublasLtMatmulDesc_t, +) -> cublasStatus_t { + crate::matmul_desc_destroy(matmulDesc) +} + +#[doc = " Set matmul operation descriptor attribute.\n\n \\param[in] matmulDesc The descriptor\n \\param[in] attr The attribute\n \\param[in] buf memory address containing the new value\n \\param[in] sizeInBytes size of buf buffer for verification (in bytes)\n\n \\retval CUBLAS_STATUS_INVALID_VALUE if buf is NULL or sizeInBytes doesn't match size of internal storage for\n selected attribute\n \\retval CUBLAS_STATUS_SUCCESS if attribute was set successfully"] +#[no_mangle] +pub unsafe extern "system" fn cublasLtMatmulDescSetAttribute( + matmulDesc: cublasLtMatmulDesc_t, + attr: cublasLtMatmulDescAttributes_t, + buf: *const ::std::os::raw::c_void, + sizeInBytes: usize, +) -> cublasStatus_t { + crate::matmul_desc_set_attribute(matmulDesc, attr, buf, sizeInBytes) +} + +#[doc = " Get matmul operation descriptor attribute.\n\n \\param[in] matmulDesc The descriptor\n \\param[in] attr The attribute\n \\param[out] buf memory address containing the new value\n \\param[in] sizeInBytes size of buf buffer for verification (in bytes)\n \\param[out] sizeWritten only valid when return value is CUBLAS_STATUS_SUCCESS. If sizeInBytes is non-zero: number of\n bytes actually written, if sizeInBytes is 0: number of bytes needed to write full contents\n\n \\retval CUBLAS_STATUS_INVALID_VALUE if sizeInBytes is 0 and sizeWritten is NULL, or if sizeInBytes is non-zero\n and buf is NULL or sizeInBytes doesn't match size of internal storage for\n selected attribute\n \\retval CUBLAS_STATUS_SUCCESS if attribute's value was successfully written to user memory"] +#[no_mangle] +pub unsafe extern "system" fn cublasLtMatmulDescGetAttribute( + matmulDesc: cublasLtMatmulDesc_t, + attr: cublasLtMatmulDescAttributes_t, + buf: *mut ::std::os::raw::c_void, + sizeInBytes: usize, + sizeWritten: *mut usize, +) -> cublasStatus_t { + crate::matmul_desc_get_attribute(matmulDesc, attr, buf, sizeInBytes, sizeWritten) +} +impl cublasLtMatrixTransformDescAttributes_t { + #[doc = " Scale type, see cudaDataType. Inputs are converted to scale type for scaling and summation and results are then\n converted to output type to store in memory.\n\n int32_t"] + pub const CUBLASLT_MATRIX_TRANSFORM_DESC_SCALE_TYPE: cublasLtMatrixTransformDescAttributes_t = + cublasLtMatrixTransformDescAttributes_t(0); +} +impl cublasLtMatrixTransformDescAttributes_t { + #[doc = " Pointer mode of alpha and beta, see cublasLtPointerMode_t.\n\n int32_t, default: CUBLASLT_POINTER_MODE_HOST"] + pub const CUBLASLT_MATRIX_TRANSFORM_DESC_POINTER_MODE: cublasLtMatrixTransformDescAttributes_t = + cublasLtMatrixTransformDescAttributes_t(1); +} +impl cublasLtMatrixTransformDescAttributes_t { + #[doc = " Transform of matrix A, see cublasOperation_t.\n\n int32_t, default: CUBLAS_OP_N"] + pub const CUBLASLT_MATRIX_TRANSFORM_DESC_TRANSA: cublasLtMatrixTransformDescAttributes_t = + cublasLtMatrixTransformDescAttributes_t(2); +} +impl cublasLtMatrixTransformDescAttributes_t { + #[doc = " Transform of matrix B, see cublasOperation_t.\n\n int32_t, default: CUBLAS_OP_N"] + pub const CUBLASLT_MATRIX_TRANSFORM_DESC_TRANSB: cublasLtMatrixTransformDescAttributes_t = + cublasLtMatrixTransformDescAttributes_t(3); +} +#[repr(transparent)] +#[doc = " Matrix transform descriptor attributes to define details of the operation."] +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct cublasLtMatrixTransformDescAttributes_t(pub ::std::os::raw::c_uint); + +#[doc = " Internal. Do not use directly."] +#[no_mangle] +pub unsafe extern "system" fn cublasLtMatrixTransformDescInit_internal( + transformDesc: cublasLtMatrixTransformDesc_t, + size: usize, + scaleType: cudaDataType, +) -> cublasStatus_t { + crate::unsupported() +} + +#[doc = " Create new matrix transform operation descriptor.\n\n \\retval CUBLAS_STATUS_ALLOC_FAILED if memory could not be allocated\n \\retval CUBLAS_STATUS_SUCCESS if desciptor was created successfully"] +#[no_mangle] +pub unsafe extern "system" fn cublasLtMatrixTransformDescCreate( + transformDesc: *mut cublasLtMatrixTransformDesc_t, + scaleType: cudaDataType, +) -> cublasStatus_t { + crate::unsupported() +} + +#[doc = " Destroy matrix transform operation descriptor.\n\n \\retval CUBLAS_STATUS_SUCCESS if operation was successful"] +#[no_mangle] +pub unsafe extern "system" fn cublasLtMatrixTransformDescDestroy( + transformDesc: cublasLtMatrixTransformDesc_t, +) -> cublasStatus_t { + crate::unsupported() +} + +#[doc = " Set matrix transform operation descriptor attribute.\n\n \\param[in] transformDesc The descriptor\n \\param[in] attr The attribute\n \\param[in] buf memory address containing the new value\n \\param[in] sizeInBytes size of buf buffer for verification (in bytes)\n\n \\retval CUBLAS_STATUS_INVALID_VALUE if buf is NULL or sizeInBytes doesn't match size of internal storage for\n selected attribute\n \\retval CUBLAS_STATUS_SUCCESS if attribute was set successfully"] +#[no_mangle] +pub unsafe extern "system" fn cublasLtMatrixTransformDescSetAttribute( + transformDesc: cublasLtMatrixTransformDesc_t, + attr: cublasLtMatrixTransformDescAttributes_t, + buf: *const ::std::os::raw::c_void, + sizeInBytes: usize, +) -> cublasStatus_t { + crate::unsupported() +} + +#[doc = " Get matrix transform operation descriptor attribute.\n\n \\param[in] transformDesc The descriptor\n \\param[in] attr The attribute\n \\param[out] buf memory address containing the new value\n \\param[in] sizeInBytes size of buf buffer for verification (in bytes)\n \\param[out] sizeWritten only valid when return value is CUBLAS_STATUS_SUCCESS. If sizeInBytes is non-zero: number\n of bytes actually written, if sizeInBytes is 0: number of bytes needed to write full contents\n\n \\retval CUBLAS_STATUS_INVALID_VALUE if sizeInBytes is 0 and sizeWritten is NULL, or if sizeInBytes is non-zero\n and buf is NULL or sizeInBytes doesn't match size of internal storage for\n selected attribute\n \\retval CUBLAS_STATUS_SUCCESS if attribute's value was successfully written to user memory"] +#[no_mangle] +pub unsafe extern "system" fn cublasLtMatrixTransformDescGetAttribute( + transformDesc: cublasLtMatrixTransformDesc_t, + attr: cublasLtMatrixTransformDescAttributes_t, + buf: *mut ::std::os::raw::c_void, + sizeInBytes: usize, + sizeWritten: *mut usize, +) -> cublasStatus_t { + crate::unsupported() +} +impl cublasLt3mMode_t { + pub const CUBLASLT_3M_MODE_DISALLOWED: cublasLt3mMode_t = cublasLt3mMode_t(0); +} +impl cublasLt3mMode_t { + pub const CUBLASLT_3M_MODE_ALLOWED: cublasLt3mMode_t = cublasLt3mMode_t(1); +} +#[repr(transparent)] +#[doc = " For computation with complex numbers, this enum allows to apply the Gauss Complexity reduction algorithm"] +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct cublasLt3mMode_t(pub ::std::os::raw::c_uint); +impl cublasLtReductionScheme_t { + #[doc = " No reduction scheme, dot-product shall be performed in one sequence."] + pub const CUBLASLT_REDUCTION_SCHEME_NONE: cublasLtReductionScheme_t = + cublasLtReductionScheme_t(0); +} +impl cublasLtReductionScheme_t { + #[doc = " Reduction is performed \"in place\" - using the output buffer (and output data type) and counters (in workspace) to\n guarantee the sequentiality."] + pub const CUBLASLT_REDUCTION_SCHEME_INPLACE: cublasLtReductionScheme_t = + cublasLtReductionScheme_t(1); +} +impl cublasLtReductionScheme_t { + #[doc = " Intermediate results are stored in compute type in the workspace and reduced in a separate step."] + pub const CUBLASLT_REDUCTION_SCHEME_COMPUTE_TYPE: cublasLtReductionScheme_t = + cublasLtReductionScheme_t(2); +} +impl cublasLtReductionScheme_t { + #[doc = " Intermediate results are stored in output type in the workspace and reduced in a separate step."] + pub const CUBLASLT_REDUCTION_SCHEME_OUTPUT_TYPE: cublasLtReductionScheme_t = + cublasLtReductionScheme_t(4); +} +impl cublasLtReductionScheme_t { + #[doc = " Intermediate results are stored in output type in the workspace and reduced in a separate step."] + pub const CUBLASLT_REDUCTION_SCHEME_MASK: cublasLtReductionScheme_t = + cublasLtReductionScheme_t(7); +} +#[repr(transparent)] +#[doc = " Reduction scheme for portions of the dot-product calculated in parallel (a. k. a. \"split - K\")."] +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct cublasLtReductionScheme_t(pub ::std::os::raw::c_uint); +impl cublasLtEpilogue_t { + #[doc = " No special postprocessing, just scale and quantize results if necessary."] + pub const CUBLASLT_EPILOGUE_DEFAULT: cublasLtEpilogue_t = cublasLtEpilogue_t(1); +} +impl cublasLtEpilogue_t { + #[doc = " ReLu, apply ReLu point-wise transform to the results (x:=max(x, 0))."] + pub const CUBLASLT_EPILOGUE_RELU: cublasLtEpilogue_t = cublasLtEpilogue_t(2); +} +impl cublasLtEpilogue_t { + #[doc = " ReLu, apply ReLu point-wise transform to the results (x:=max(x, 0)).\n\n This epilogue mode produces an extra output, a ReLu bit-mask matrix,\n see CUBLASLT_MATMUL_DESC_EPILOGUE_AUX_POINTER."] + pub const CUBLASLT_EPILOGUE_RELU_AUX: cublasLtEpilogue_t = cublasLtEpilogue_t(130); +} +impl cublasLtEpilogue_t { + #[doc = " Bias, apply (broadcasted) Bias from bias vector. Bias vector length must match matrix D rows, it must be packed\n (stride between vector elements is 1). Bias vector is broadcasted to all columns and added before applying final\n postprocessing."] + pub const CUBLASLT_EPILOGUE_BIAS: cublasLtEpilogue_t = cublasLtEpilogue_t(4); +} +impl cublasLtEpilogue_t { + #[doc = " ReLu and Bias, apply Bias and then ReLu transform"] + pub const CUBLASLT_EPILOGUE_RELU_BIAS: cublasLtEpilogue_t = cublasLtEpilogue_t(6); +} +impl cublasLtEpilogue_t { + #[doc = " ReLu and Bias, apply Bias and then ReLu transform\n\n This epilogue mode produces an extra output, a ReLu bit-mask matrix,\n see CUBLASLT_MATMUL_DESC_EPILOGUE_AUX_POINTER."] + pub const CUBLASLT_EPILOGUE_RELU_AUX_BIAS: cublasLtEpilogue_t = cublasLtEpilogue_t(134); +} +impl cublasLtEpilogue_t { + #[doc = " ReLu and Bias, apply Bias and then ReLu transform\n\n This epilogue mode produces an extra output, a ReLu bit-mask matrix,\n see CUBLASLT_MATMUL_DESC_EPILOGUE_AUX_POINTER."] + pub const CUBLASLT_EPILOGUE_DRELU: cublasLtEpilogue_t = cublasLtEpilogue_t(136); +} +impl cublasLtEpilogue_t { + #[doc = " ReLu and Bias, apply Bias and then ReLu transform\n\n This epilogue mode produces an extra output, a ReLu bit-mask matrix,\n see CUBLASLT_MATMUL_DESC_EPILOGUE_AUX_POINTER."] + pub const CUBLASLT_EPILOGUE_DRELU_BGRAD: cublasLtEpilogue_t = cublasLtEpilogue_t(152); +} +impl cublasLtEpilogue_t { + #[doc = " GELU, apply GELU point-wise transform to the results (x:=GELU(x))."] + pub const CUBLASLT_EPILOGUE_GELU: cublasLtEpilogue_t = cublasLtEpilogue_t(32); +} +impl cublasLtEpilogue_t { + #[doc = " GELU, apply GELU point-wise transform to the results (x:=GELU(x)).\n\n This epilogue mode outputs GELU input as a separate matrix (useful for training).\n See CUBLASLT_MATMUL_DESC_EPILOGUE_AUX_POINTER."] + pub const CUBLASLT_EPILOGUE_GELU_AUX: cublasLtEpilogue_t = cublasLtEpilogue_t(160); +} +impl cublasLtEpilogue_t { + #[doc = " GELU and Bias, apply Bias and then GELU transform"] + pub const CUBLASLT_EPILOGUE_GELU_BIAS: cublasLtEpilogue_t = cublasLtEpilogue_t(36); +} +impl cublasLtEpilogue_t { + #[doc = " GELU and Bias, apply Bias and then GELU transform\n\n This epilogue mode outputs GELU input as a separate matrix (useful for training).\n See CUBLASLT_MATMUL_DESC_EPILOGUE_AUX_POINTER."] + pub const CUBLASLT_EPILOGUE_GELU_AUX_BIAS: cublasLtEpilogue_t = cublasLtEpilogue_t(164); +} +impl cublasLtEpilogue_t { + #[doc = " GELU and Bias, apply Bias and then GELU transform\n\n This epilogue mode outputs GELU input as a separate matrix (useful for training).\n See CUBLASLT_MATMUL_DESC_EPILOGUE_AUX_POINTER."] + pub const CUBLASLT_EPILOGUE_DGELU: cublasLtEpilogue_t = cublasLtEpilogue_t(192); +} +impl cublasLtEpilogue_t { + #[doc = " GELU and Bias, apply Bias and then GELU transform\n\n This epilogue mode outputs GELU input as a separate matrix (useful for training).\n See CUBLASLT_MATMUL_DESC_EPILOGUE_AUX_POINTER."] + pub const CUBLASLT_EPILOGUE_DGELU_BGRAD: cublasLtEpilogue_t = cublasLtEpilogue_t(208); +} +impl cublasLtEpilogue_t { + #[doc = " Bias gradient based on the input matrix A.\n\n The bias size corresponds to the number of rows of the matrix D.\n The reduction happens over the GEMM's \"k\" dimension.\n\n Stores Bias gradient in the auxiliary output\n (see CUBLASLT_MATMUL_DESC_BIAS_POINTER)."] + pub const CUBLASLT_EPILOGUE_BGRADA: cublasLtEpilogue_t = cublasLtEpilogue_t(256); +} +impl cublasLtEpilogue_t { + #[doc = " Bias gradient based on the input matrix B.\n\n The bias size corresponds to the number of columns of the matrix D.\n The reduction happens over the GEMM's \"k\" dimension.\n\n Stores Bias gradient in the auxiliary output\n (see CUBLASLT_MATMUL_DESC_BIAS_POINTER)."] + pub const CUBLASLT_EPILOGUE_BGRADB: cublasLtEpilogue_t = cublasLtEpilogue_t(512); +} +#[repr(transparent)] +#[doc = " Postprocessing options for the epilogue"] +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct cublasLtEpilogue_t(pub ::std::os::raw::c_uint); +impl cublasLtMatmulSearch_t { + #[doc = " ask heuristics for best algo for given usecase"] + pub const CUBLASLT_SEARCH_BEST_FIT: cublasLtMatmulSearch_t = cublasLtMatmulSearch_t(0); +} +impl cublasLtMatmulSearch_t { + #[doc = " only try to find best config for preconfigured algo id"] + pub const CUBLASLT_SEARCH_LIMITED_BY_ALGO_ID: cublasLtMatmulSearch_t = + cublasLtMatmulSearch_t(1); +} +impl cublasLtMatmulSearch_t { + #[doc = " reserved for future use"] + pub const CUBLASLT_SEARCH_RESERVED_02: cublasLtMatmulSearch_t = cublasLtMatmulSearch_t(2); +} +impl cublasLtMatmulSearch_t { + #[doc = " reserved for future use"] + pub const CUBLASLT_SEARCH_RESERVED_03: cublasLtMatmulSearch_t = cublasLtMatmulSearch_t(3); +} +impl cublasLtMatmulSearch_t { + #[doc = " reserved for future use"] + pub const CUBLASLT_SEARCH_RESERVED_04: cublasLtMatmulSearch_t = cublasLtMatmulSearch_t(4); +} +impl cublasLtMatmulSearch_t { + #[doc = " reserved for future use"] + pub const CUBLASLT_SEARCH_RESERVED_05: cublasLtMatmulSearch_t = cublasLtMatmulSearch_t(5); +} +#[repr(transparent)] +#[doc = " Matmul heuristic search mode"] +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct cublasLtMatmulSearch_t(pub ::std::os::raw::c_uint); +impl cublasLtMatmulPreferenceAttributes_t { + #[doc = " Search mode, see cublasLtMatmulSearch_t.\n\n uint32_t, default: CUBLASLT_SEARCH_BEST_FIT"] + pub const CUBLASLT_MATMUL_PREF_SEARCH_MODE: cublasLtMatmulPreferenceAttributes_t = + cublasLtMatmulPreferenceAttributes_t(0); +} +impl cublasLtMatmulPreferenceAttributes_t { + #[doc = " Maximum allowed workspace size in bytes.\n\n uint64_t, default: 0 - no workspace allowed"] + pub const CUBLASLT_MATMUL_PREF_MAX_WORKSPACE_BYTES: cublasLtMatmulPreferenceAttributes_t = + cublasLtMatmulPreferenceAttributes_t(1); +} +impl cublasLtMatmulPreferenceAttributes_t { + #[doc = " Math mode mask, see cublasMath_t.\n\n Only algorithms with CUBLASLT_ALGO_CAP_MATHMODE_IMPL that is not masked out by this attribute are allowed.\n\n uint32_t, default: 1 (allows both default and tensor op math)\n DEPRECATED, will be removed in a future release, see cublasLtNumericalImplFlags_t for replacement"] + pub const CUBLASLT_MATMUL_PREF_MATH_MODE_MASK: cublasLtMatmulPreferenceAttributes_t = + cublasLtMatmulPreferenceAttributes_t(2); +} +impl cublasLtMatmulPreferenceAttributes_t { + #[doc = " Reduction scheme mask, see cublasLtReductionScheme_t. Filters heuristic result to only include algo configs that\n use one of the required modes.\n\n E.g. mask value of 0x03 will allow only INPLACE and COMPUTE_TYPE reduction schemes.\n\n uint32_t, default: CUBLASLT_REDUCTION_SCHEME_MASK (allows all reduction schemes)"] + pub const CUBLASLT_MATMUL_PREF_REDUCTION_SCHEME_MASK: cublasLtMatmulPreferenceAttributes_t = + cublasLtMatmulPreferenceAttributes_t(3); +} +impl cublasLtMatmulPreferenceAttributes_t { + #[doc = " Gaussian mode mask, see cublasLt3mMode_t.\n\n Only algorithms with CUBLASLT_ALGO_CAP_GAUSSIAN_IMPL that is not masked out by this attribute are allowed.\n\n uint32_t, default: CUBLASLT_3M_MODE_ALLOWED (allows both gaussian and non-gaussian algorithms)\n DEPRECATED, will be removed in a future release, see cublasLtNumericalImplFlags_t for replacement"] + pub const CUBLASLT_MATMUL_PREF_GAUSSIAN_MODE_MASK: cublasLtMatmulPreferenceAttributes_t = + cublasLtMatmulPreferenceAttributes_t(4); +} +impl cublasLtMatmulPreferenceAttributes_t { + #[doc = " Minimum buffer alignment for matrix A (in bytes).\n\n Selecting a smaller value will exclude algorithms that can not work with matrix A that is not as strictly aligned\n as they need.\n\n uint32_t, default: 256"] + pub const CUBLASLT_MATMUL_PREF_MIN_ALIGNMENT_A_BYTES: cublasLtMatmulPreferenceAttributes_t = + cublasLtMatmulPreferenceAttributes_t(5); +} +impl cublasLtMatmulPreferenceAttributes_t { + #[doc = " Minimum buffer alignment for matrix B (in bytes).\n\n Selecting a smaller value will exclude algorithms that can not work with matrix B that is not as strictly aligned\n as they need.\n\n uint32_t, default: 256"] + pub const CUBLASLT_MATMUL_PREF_MIN_ALIGNMENT_B_BYTES: cublasLtMatmulPreferenceAttributes_t = + cublasLtMatmulPreferenceAttributes_t(6); +} +impl cublasLtMatmulPreferenceAttributes_t { + #[doc = " Minimum buffer alignment for matrix C (in bytes).\n\n Selecting a smaller value will exclude algorithms that can not work with matrix C that is not as strictly aligned\n as they need.\n\n uint32_t, default: 256"] + pub const CUBLASLT_MATMUL_PREF_MIN_ALIGNMENT_C_BYTES: cublasLtMatmulPreferenceAttributes_t = + cublasLtMatmulPreferenceAttributes_t(7); +} +impl cublasLtMatmulPreferenceAttributes_t { + #[doc = " Minimum buffer alignment for matrix D (in bytes).\n\n Selecting a smaller value will exclude algorithms that can not work with matrix D that is not as strictly aligned\n as they need.\n\n uint32_t, default: 256"] + pub const CUBLASLT_MATMUL_PREF_MIN_ALIGNMENT_D_BYTES: cublasLtMatmulPreferenceAttributes_t = + cublasLtMatmulPreferenceAttributes_t(8); +} +impl cublasLtMatmulPreferenceAttributes_t { + #[doc = " Maximum wave count.\n\n See cublasLtMatmulHeuristicResult_t::wavesCount.\n\n Selecting a non-zero value will exclude algorithms that report device utilization higher than specified.\n\n float, default: 0.0f"] + pub const CUBLASLT_MATMUL_PREF_MAX_WAVES_COUNT: cublasLtMatmulPreferenceAttributes_t = + cublasLtMatmulPreferenceAttributes_t(9); +} +impl cublasLtMatmulPreferenceAttributes_t { + #[doc = " Pointer mode mask, see cublasLtPointerModeMask_t. Filters heuristic result to only include algorithms that support\n all required modes.\n\n uint32_t, default: (CUBLASLT_POINTER_MODE_MASK_HOST | CUBLASLT_POINTER_MODE_MASK_DEVICE) (only allows algorithms\n that support both regular host and device pointers)"] + pub const CUBLASLT_MATMUL_PREF_POINTER_MODE_MASK: cublasLtMatmulPreferenceAttributes_t = + cublasLtMatmulPreferenceAttributes_t(10); +} +impl cublasLtMatmulPreferenceAttributes_t { + #[doc = " Epilogue selector mask, see cublasLtEpilogue_t. Filters heuristic result to only include algorithms that support\n all required operations.\n\n uint32_t, default: CUBLASLT_EPILOGUE_DEFAULT (only allows algorithms that support default epilogue)"] + pub const CUBLASLT_MATMUL_PREF_EPILOGUE_MASK: cublasLtMatmulPreferenceAttributes_t = + cublasLtMatmulPreferenceAttributes_t(11); +} +impl cublasLtMatmulPreferenceAttributes_t { + #[doc = " Numerical implementation details mask, see cublasLtNumericalImplFlags_t. Filters heuristic result to only include\n algorithms that use the allowed implementations.\n\n uint64_t, default: uint64_t(-1) (allow everything)"] + pub const CUBLASLT_MATMUL_PREF_IMPL_MASK: cublasLtMatmulPreferenceAttributes_t = + cublasLtMatmulPreferenceAttributes_t(12); +} +impl cublasLtMatmulPreferenceAttributes_t { + #[doc = " Number of SMs to target for parallel execution. Optimizes heuristics for execution on a different number of SMs\n when user expects a concurrent stream to be using some of the device resources.\n\n Overrides the SM count target set in the matrix multiplication descriptor (see cublasLtMatmulDescAttributes_t).\n\n int32_t, default: 0 - use the number reported by the device.\n DEPRECATED, will be removed in a future release, see cublasLtMatmulDescAttributes_t for replacement"] + pub const CUBLASLT_MATMUL_PREF_SM_COUNT_TARGET: cublasLtMatmulPreferenceAttributes_t = + cublasLtMatmulPreferenceAttributes_t(13); +} +#[repr(transparent)] +#[doc = " Algo search preference to fine tune the heuristic function."] +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct cublasLtMatmulPreferenceAttributes_t(pub ::std::os::raw::c_uint); + +#[doc = " Internal. Do not use directly."] +#[no_mangle] +pub unsafe extern "system" fn cublasLtMatmulPreferenceInit_internal( + pref: cublasLtMatmulPreference_t, + size: usize, +) -> cublasStatus_t { + crate::unsupported() +} + +#[doc = " Create new matmul heuristic search preference descriptor.\n\n \\retval CUBLAS_STATUS_ALLOC_FAILED if memory could not be allocated\n \\retval CUBLAS_STATUS_SUCCESS if desciptor was created successfully"] +#[no_mangle] +pub unsafe extern "system" fn cublasLtMatmulPreferenceCreate( + pref: *mut cublasLtMatmulPreference_t, +) -> cublasStatus_t { + crate::matmul_preference_create(pref) +} + +#[doc = " Destroy matmul heuristic search preference descriptor.\n\n \\retval CUBLAS_STATUS_SUCCESS if operation was successful"] +#[no_mangle] +pub unsafe extern "system" fn cublasLtMatmulPreferenceDestroy( + pref: cublasLtMatmulPreference_t, +) -> cublasStatus_t { + crate::matmul_preference_destroy(pref) +} + +#[doc = " Set matmul heuristic search preference descriptor attribute.\n\n \\param[in] pref The descriptor\n \\param[in] attr The attribute\n \\param[in] buf memory address containing the new value\n \\param[in] sizeInBytes size of buf buffer for verification (in bytes)\n\n \\retval CUBLAS_STATUS_INVALID_VALUE if buf is NULL or sizeInBytes doesn't match size of internal storage for\n selected attribute\n \\retval CUBLAS_STATUS_SUCCESS if attribute was set successfully"] +#[no_mangle] +pub unsafe extern "system" fn cublasLtMatmulPreferenceSetAttribute( + pref: cublasLtMatmulPreference_t, + attr: cublasLtMatmulPreferenceAttributes_t, + buf: *const ::std::os::raw::c_void, + sizeInBytes: usize, +) -> cublasStatus_t { + crate::matmul_preference_set_attribute(pref, attr, buf, sizeInBytes) +} + +#[doc = " Get matmul heuristic search preference descriptor attribute.\n\n \\param[in] pref The descriptor\n \\param[in] attr The attribute\n \\param[out] buf memory address containing the new value\n \\param[in] sizeInBytes size of buf buffer for verification (in bytes)\n \\param[out] sizeWritten only valid when return value is CUBLAS_STATUS_SUCCESS. If sizeInBytes is non-zero: number of\n bytes actually written, if sizeInBytes is 0: number of bytes needed to write full contents\n\n \\retval CUBLAS_STATUS_INVALID_VALUE if sizeInBytes is 0 and sizeWritten is NULL, or if sizeInBytes is non-zero\n and buf is NULL or sizeInBytes doesn't match size of internal storage for\n selected attribute\n \\retval CUBLAS_STATUS_SUCCESS if attribute's value was successfully written to user memory"] +#[no_mangle] +pub unsafe extern "system" fn cublasLtMatmulPreferenceGetAttribute( + pref: cublasLtMatmulPreference_t, + attr: cublasLtMatmulPreferenceAttributes_t, + buf: *mut ::std::os::raw::c_void, + sizeInBytes: usize, + sizeWritten: *mut usize, +) -> cublasStatus_t { + crate::unsupported() +} +#[doc = " Results structure used by cublasLtMatmulGetAlgo.\n\n Holds returned configured algo descriptor and its runtime properties."] +#[repr(C)] +#[derive(Copy, Clone)] +pub struct cublasLtMatmulHeuristicResult_t { + #[doc = " Matmul algorithm descriptor.\n\n Must be initialized with cublasLtMatmulAlgoInit() if preferences' CUBLASLT_MATMUL_PERF_SEARCH_MODE is set to\n CUBLASLT_SEARCH_LIMITED_BY_ALGO_ID"] + pub algo: cublasLtMatmulAlgo_t, + #[doc = " Actual size of workspace memory required."] + pub workspaceSize: usize, + #[doc = " Result status, other fields are only valid if after call to cublasLtMatmulAlgoGetHeuristic() this member is set to\n CUBLAS_STATUS_SUCCESS."] + pub state: cublasStatus_t, + #[doc = " Waves count - a device utilization metric.\n\n wavesCount value of 1.0f suggests that when kernel is launched it will fully occupy the GPU."] + pub wavesCount: f32, + pub reserved: [::std::os::raw::c_int; 4usize], +} + +#[doc = " Query cublasLt heuristic for algorithm appropriate for given use case.\n\n \\param[in] lightHandle Pointer to the allocated cuBLASLt handle for the cuBLASLt\n context. See cublasLtHandle_t.\n \\param[in] operationDesc Handle to the matrix multiplication descriptor.\n \\param[in] Adesc Handle to the layout descriptors for matrix A.\n \\param[in] Bdesc Handle to the layout descriptors for matrix B.\n \\param[in] Cdesc Handle to the layout descriptors for matrix C.\n \\param[in] Ddesc Handle to the layout descriptors for matrix D.\n \\param[in] preference Pointer to the structure holding the heuristic search\n preferences descriptor. See cublasLtMatrixLayout_t.\n \\param[in] requestedAlgoCount Size of heuristicResultsArray (in elements) and requested\n maximum number of algorithms to return.\n \\param[in, out] heuristicResultsArray Output algorithms and associated runtime characteristics,\n ordered in increasing estimated compute time.\n \\param[out] returnAlgoCount The number of heuristicResultsArray elements written.\n\n \\retval CUBLAS_STATUS_INVALID_VALUE if requestedAlgoCount is less or equal to zero\n \\retval CUBLAS_STATUS_NOT_SUPPORTED if no heuristic function available for current configuration\n \\retval CUBLAS_STATUS_SUCCESS if query was successful, inspect\n heuristicResultsArray[0 to (returnAlgoCount - 1)].state\n for detail status of results"] +#[no_mangle] +pub unsafe extern "system" fn cublasLtMatmulAlgoGetHeuristic( + lightHandle: cublasLtHandle_t, + operationDesc: cublasLtMatmulDesc_t, + Adesc: cublasLtMatrixLayout_t, + Bdesc: cublasLtMatrixLayout_t, + Cdesc: cublasLtMatrixLayout_t, + Ddesc: cublasLtMatrixLayout_t, + preference: cublasLtMatmulPreference_t, + requestedAlgoCount: ::std::os::raw::c_int, + heuristicResultsArray: *mut cublasLtMatmulHeuristicResult_t, + returnAlgoCount: *mut ::std::os::raw::c_int, +) -> cublasStatus_t { + crate::matmul_algo_get_heuristic( + lightHandle, + operationDesc, + Adesc, + Bdesc, + Cdesc, + Ddesc, + preference, + requestedAlgoCount, + heuristicResultsArray, + returnAlgoCount, + ) +} + +#[doc = " Routine to get all algo IDs that can potentially run\n\n \\param[in] int requestedAlgoCount requested number of algos (must be less or equal to size of algoIdsA\n (in elements)) \\param[out] algoIdsA array to write algoIds to \\param[out] returnAlgoCount number of algoIds\n actually written\n\n \\retval CUBLAS_STATUS_INVALID_VALUE if requestedAlgoCount is less or equal to zero\n \\retval CUBLAS_STATUS_SUCCESS if query was successful, inspect returnAlgoCount to get actual number of IDs\n available"] +#[no_mangle] +pub unsafe extern "system" fn cublasLtMatmulAlgoGetIds( + lightHandle: cublasLtHandle_t, + computeType: cublasComputeType_t, + scaleType: cudaDataType_t, + Atype: cudaDataType_t, + Btype: cudaDataType_t, + Ctype: cudaDataType_t, + Dtype: cudaDataType_t, + requestedAlgoCount: ::std::os::raw::c_int, + algoIdsArray: *mut ::std::os::raw::c_int, + returnAlgoCount: *mut ::std::os::raw::c_int, +) -> cublasStatus_t { + crate::unsupported() +} + +#[doc = " Initialize algo structure\n\n \\retval CUBLAS_STATUS_INVALID_VALUE if algo is NULL or algoId is outside of recognized range\n \\retval CUBLAS_STATUS_NOT_SUPPORTED if algoId is not supported for given combination of data types\n \\retval CUBLAS_STATUS_SUCCESS if the structure was successfully initialized"] +#[no_mangle] +pub unsafe extern "system" fn cublasLtMatmulAlgoInit( + lightHandle: cublasLtHandle_t, + computeType: cublasComputeType_t, + scaleType: cudaDataType_t, + Atype: cudaDataType_t, + Btype: cudaDataType_t, + Ctype: cudaDataType_t, + Dtype: cudaDataType_t, + algoId: ::std::os::raw::c_int, + algo: *mut cublasLtMatmulAlgo_t, +) -> cublasStatus_t { + crate::unsupported() +} + +#[doc = " Check configured algo descriptor for correctness and support on current device.\n\n Result includes required workspace size and calculated wave count.\n\n CUBLAS_STATUS_SUCCESS doesn't fully guarantee algo will run (will fail if e.g. buffers are not correctly aligned);\n but if cublasLtMatmulAlgoCheck fails, the algo will not run.\n\n \\param[in] algo algo configuration to check\n \\param[out] result result structure to report algo runtime characteristics; algo field is never updated\n\n \\retval CUBLAS_STATUS_INVALID_VALUE if matrix layout descriptors or operation descriptor don't match algo\n descriptor\n \\retval CUBLAS_STATUS_NOT_SUPPORTED if algo configuration or data type combination is not currently supported on\n given device\n \\retval CUBLAS_STATUS_ARCH_MISMATCH if algo configuration cannot be run using the selected device\n \\retval CUBLAS_STATUS_SUCCESS if check was successful"] +#[no_mangle] +pub unsafe extern "system" fn cublasLtMatmulAlgoCheck( + lightHandle: cublasLtHandle_t, + operationDesc: cublasLtMatmulDesc_t, + Adesc: cublasLtMatrixLayout_t, + Bdesc: cublasLtMatrixLayout_t, + Cdesc: cublasLtMatrixLayout_t, + Ddesc: cublasLtMatrixLayout_t, + algo: *const cublasLtMatmulAlgo_t, + result: *mut cublasLtMatmulHeuristicResult_t, +) -> cublasStatus_t { + crate::unsupported() +} +impl cublasLtMatmulAlgoCapAttributes_t { + #[doc = " support for split K, see CUBLASLT_ALGO_CONFIG_SPLITK_NUM\n\n int32_t, 0 means no support, supported otherwise"] + pub const CUBLASLT_ALGO_CAP_SPLITK_SUPPORT: cublasLtMatmulAlgoCapAttributes_t = + cublasLtMatmulAlgoCapAttributes_t(0); +} +impl cublasLtMatmulAlgoCapAttributes_t { + #[doc = " reduction scheme mask, see cublasLtReductionScheme_t; shows supported reduction schemes, if reduction scheme is\n not masked out it is supported.\n\n e.g. int isReductionSchemeComputeTypeSupported ? (reductionSchemeMask & CUBLASLT_REDUCTION_SCHEME_COMPUTE_TYPE) ==\n CUBLASLT_REDUCTION_SCHEME_COMPUTE_TYPE ? 1 : 0;\n\n uint32_t"] + pub const CUBLASLT_ALGO_CAP_REDUCTION_SCHEME_MASK: cublasLtMatmulAlgoCapAttributes_t = + cublasLtMatmulAlgoCapAttributes_t(1); +} +impl cublasLtMatmulAlgoCapAttributes_t { + #[doc = " support for cta swizzling, see CUBLASLT_ALGO_CONFIG_CTA_SWIZZLING\n\n uint32_t, 0 means no support, 1 means supported value of 1, other values are reserved"] + pub const CUBLASLT_ALGO_CAP_CTA_SWIZZLING_SUPPORT: cublasLtMatmulAlgoCapAttributes_t = + cublasLtMatmulAlgoCapAttributes_t(2); +} +impl cublasLtMatmulAlgoCapAttributes_t { + #[doc = " support strided batch\n\n int32_t, 0 means no support, supported otherwise"] + pub const CUBLASLT_ALGO_CAP_STRIDED_BATCH_SUPPORT: cublasLtMatmulAlgoCapAttributes_t = + cublasLtMatmulAlgoCapAttributes_t(3); +} +impl cublasLtMatmulAlgoCapAttributes_t { + #[doc = " support results out of place (D != C in D = alpha.A.B + beta.C)\n\n int32_t, 0 means no support, supported otherwise"] + pub const CUBLASLT_ALGO_CAP_OUT_OF_PLACE_RESULT_SUPPORT: cublasLtMatmulAlgoCapAttributes_t = + cublasLtMatmulAlgoCapAttributes_t(4); +} +impl cublasLtMatmulAlgoCapAttributes_t { + #[doc = " syrk/herk support (on top of regular gemm)\n\n int32_t, 0 means no support, supported otherwise"] + pub const CUBLASLT_ALGO_CAP_UPLO_SUPPORT: cublasLtMatmulAlgoCapAttributes_t = + cublasLtMatmulAlgoCapAttributes_t(5); +} +impl cublasLtMatmulAlgoCapAttributes_t { + #[doc = " tile ids possible to use, see cublasLtMatmulTile_t; if no tile ids are supported use\n CUBLASLT_MATMUL_TILE_UNDEFINED\n\n use cublasLtMatmulAlgoCapGetAttribute() with sizeInBytes=0 to query actual count\n\n array of uint32_t"] + pub const CUBLASLT_ALGO_CAP_TILE_IDS: cublasLtMatmulAlgoCapAttributes_t = + cublasLtMatmulAlgoCapAttributes_t(6); +} +impl cublasLtMatmulAlgoCapAttributes_t { + #[doc = " custom option range is from 0 to CUBLASLT_ALGO_CAP_CUSTOM_OPTION_MAX (inclusive), see\n CUBLASLT_ALGO_CONFIG_CUSTOM_OPTION\n\n int32_t"] + pub const CUBLASLT_ALGO_CAP_CUSTOM_OPTION_MAX: cublasLtMatmulAlgoCapAttributes_t = + cublasLtMatmulAlgoCapAttributes_t(7); +} +impl cublasLtMatmulAlgoCapAttributes_t { + #[doc = " whether algorithm is using regular compute or tensor operations\n\n int32_t 0 means regular compute, 1 means tensor operations;\n DEPRECATED"] + pub const CUBLASLT_ALGO_CAP_MATHMODE_IMPL: cublasLtMatmulAlgoCapAttributes_t = + cublasLtMatmulAlgoCapAttributes_t(8); +} +impl cublasLtMatmulAlgoCapAttributes_t { + #[doc = " whether algorithm implements gaussian optimization of complex matrix multiplication, see cublasMath_t\n\n int32_t 0 means regular compute, 1 means gaussian;\n DEPRECATED"] + pub const CUBLASLT_ALGO_CAP_GAUSSIAN_IMPL: cublasLtMatmulAlgoCapAttributes_t = + cublasLtMatmulAlgoCapAttributes_t(9); +} +impl cublasLtMatmulAlgoCapAttributes_t { + #[doc = " whether algorithm supports custom (not COL or ROW memory order), see cublasLtOrder_t\n\n int32_t 0 means only COL and ROW memory order is allowed, non-zero means that algo might have different\n requirements;"] + pub const CUBLASLT_ALGO_CAP_CUSTOM_MEMORY_ORDER: cublasLtMatmulAlgoCapAttributes_t = + cublasLtMatmulAlgoCapAttributes_t(10); +} +impl cublasLtMatmulAlgoCapAttributes_t { + #[doc = " bitmask enumerating pointer modes algorithm supports\n\n uint32_t, see cublasLtPointerModeMask_t"] + pub const CUBLASLT_ALGO_CAP_POINTER_MODE_MASK: cublasLtMatmulAlgoCapAttributes_t = + cublasLtMatmulAlgoCapAttributes_t(11); +} +impl cublasLtMatmulAlgoCapAttributes_t { + #[doc = " bitmask enumerating kinds of postprocessing algorithm supports in the epilogue\n\n uint32_t, see cublasLtEpilogue_t"] + pub const CUBLASLT_ALGO_CAP_EPILOGUE_MASK: cublasLtMatmulAlgoCapAttributes_t = + cublasLtMatmulAlgoCapAttributes_t(12); +} +impl cublasLtMatmulAlgoCapAttributes_t { + #[doc = " stages ids possible to use, see cublasLtMatmulStages_t; if no stages ids are supported use\n CUBLASLT_MATMUL_STAGES_UNDEFINED\n\n use cublasLtMatmulAlgoCapGetAttribute() with sizeInBytes=0 to query actual count\n\n array of uint32_t"] + pub const CUBLASLT_ALGO_CAP_STAGES_IDS: cublasLtMatmulAlgoCapAttributes_t = + cublasLtMatmulAlgoCapAttributes_t(13); +} +impl cublasLtMatmulAlgoCapAttributes_t { + #[doc = " support for nagative ld for all of the matrices\n\n int32_t 0 means no support, supported otherwise"] + pub const CUBLASLT_ALGO_CAP_LD_NEGATIVE: cublasLtMatmulAlgoCapAttributes_t = + cublasLtMatmulAlgoCapAttributes_t(14); +} +impl cublasLtMatmulAlgoCapAttributes_t { + #[doc = " details about algorithm's implementation that affect it's numerical behavior\n\n uint64_t, see cublasLtNumericalImplFlags_t"] + pub const CUBLASLT_ALGO_CAP_NUMERICAL_IMPL_FLAGS: cublasLtMatmulAlgoCapAttributes_t = + cublasLtMatmulAlgoCapAttributes_t(15); +} +impl cublasLtMatmulAlgoCapAttributes_t { + #[doc = " minimum alignment required for A matrix in bytes\n (required for buffer pointer, leading dimension, and possibly other strides defined for matrix memory order)\n\n uint32_t"] + pub const CUBLASLT_ALGO_CAP_MIN_ALIGNMENT_A_BYTES: cublasLtMatmulAlgoCapAttributes_t = + cublasLtMatmulAlgoCapAttributes_t(16); +} +impl cublasLtMatmulAlgoCapAttributes_t { + #[doc = " minimum alignment required for B matrix in bytes\n (required for buffer pointer, leading dimension, and possibly other strides defined for matrix memory order)\n\n uint32_t"] + pub const CUBLASLT_ALGO_CAP_MIN_ALIGNMENT_B_BYTES: cublasLtMatmulAlgoCapAttributes_t = + cublasLtMatmulAlgoCapAttributes_t(17); +} +impl cublasLtMatmulAlgoCapAttributes_t { + #[doc = " minimum alignment required for C matrix in bytes\n (required for buffer pointer, leading dimension, and possibly other strides defined for matrix memory order)\n\n uint32_t"] + pub const CUBLASLT_ALGO_CAP_MIN_ALIGNMENT_C_BYTES: cublasLtMatmulAlgoCapAttributes_t = + cublasLtMatmulAlgoCapAttributes_t(18); +} +impl cublasLtMatmulAlgoCapAttributes_t { + #[doc = " minimum alignment required for D matrix in bytes\n (required for buffer pointer, leading dimension, and possibly other strides defined for matrix memory order)\n\n uint32_t"] + pub const CUBLASLT_ALGO_CAP_MIN_ALIGNMENT_D_BYTES: cublasLtMatmulAlgoCapAttributes_t = + cublasLtMatmulAlgoCapAttributes_t(19); +} +#[repr(transparent)] +#[doc = " Capabilities Attributes that can be retrieved from an initialized Algo structure"] +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct cublasLtMatmulAlgoCapAttributes_t(pub ::std::os::raw::c_uint); + +#[doc = " Get algo capability attribute.\n\n E.g. to get list of supported Tile IDs:\n cublasLtMatmulTile_t tiles[CUBLASLT_MATMUL_TILE_END];\n size_t num_tiles, size_written;\n if (cublasLtMatmulAlgoCapGetAttribute(algo, CUBLASLT_ALGO_CAP_TILE_IDS, tiles, sizeof(tiles), size_written) ==\n CUBLAS_STATUS_SUCCESS) { num_tiles = size_written / sizeof(tiles[0]);\n }\n\n \\param[in] algo The algo descriptor\n \\param[in] attr The attribute\n \\param[out] buf memory address containing the new value\n \\param[in] sizeInBytes size of buf buffer for verification (in bytes)\n \\param[out] sizeWritten only valid when return value is CUBLAS_STATUS_SUCCESS. If sizeInBytes is non-zero: number of\n bytes actually written, if sizeInBytes is 0: number of bytes needed to write full contents\n\n \\retval CUBLAS_STATUS_INVALID_VALUE if sizeInBytes is 0 and sizeWritten is NULL, or if sizeInBytes is non-zero\n and buf is NULL or sizeInBytes doesn't match size of internal storage for\n selected attribute\n \\retval CUBLAS_STATUS_SUCCESS if attribute's value was successfully written to user memory"] +#[no_mangle] +pub unsafe extern "system" fn cublasLtMatmulAlgoCapGetAttribute( + algo: *const cublasLtMatmulAlgo_t, + attr: cublasLtMatmulAlgoCapAttributes_t, + buf: *mut ::std::os::raw::c_void, + sizeInBytes: usize, + sizeWritten: *mut usize, +) -> cublasStatus_t { + crate::unsupported() +} +impl cublasLtMatmulAlgoConfigAttributes_t { + #[doc = " algorithm index, see cublasLtMatmulAlgoGetIds()\n\n readonly, set by cublasLtMatmulAlgoInit()\n int32_t"] + pub const CUBLASLT_ALGO_CONFIG_ID: cublasLtMatmulAlgoConfigAttributes_t = + cublasLtMatmulAlgoConfigAttributes_t(0); +} +impl cublasLtMatmulAlgoConfigAttributes_t { + #[doc = " tile id, see cublasLtMatmulTile_t\n\n uint32_t, default: CUBLASLT_MATMUL_TILE_UNDEFINED"] + pub const CUBLASLT_ALGO_CONFIG_TILE_ID: cublasLtMatmulAlgoConfigAttributes_t = + cublasLtMatmulAlgoConfigAttributes_t(1); +} +impl cublasLtMatmulAlgoConfigAttributes_t { + #[doc = " Number of K splits. If the number of K splits is greater than one, SPLITK_NUM parts\n of matrix multiplication will be computed in parallel. The results will be accumulated\n according to CUBLASLT_ALGO_CONFIG_REDUCTION_SCHEME\n\n int32_t, default: 1"] + pub const CUBLASLT_ALGO_CONFIG_SPLITK_NUM: cublasLtMatmulAlgoConfigAttributes_t = + cublasLtMatmulAlgoConfigAttributes_t(2); +} +impl cublasLtMatmulAlgoConfigAttributes_t { + #[doc = " reduction scheme, see cublasLtReductionScheme_t\n\n uint32_t, default: CUBLASLT_REDUCTION_SCHEME_NONE"] + pub const CUBLASLT_ALGO_CONFIG_REDUCTION_SCHEME: cublasLtMatmulAlgoConfigAttributes_t = + cublasLtMatmulAlgoConfigAttributes_t(3); +} +impl cublasLtMatmulAlgoConfigAttributes_t { + #[doc = " cta swizzling, change mapping from CUDA grid coordinates to parts of the matrices\n\n possible values: 0, 1, other values reserved\n\n uint32_t, default: 0"] + pub const CUBLASLT_ALGO_CONFIG_CTA_SWIZZLING: cublasLtMatmulAlgoConfigAttributes_t = + cublasLtMatmulAlgoConfigAttributes_t(4); +} +impl cublasLtMatmulAlgoConfigAttributes_t { + #[doc = " custom option, each algorithm can support some custom options that don't fit description of the other config\n attributes, see CUBLASLT_ALGO_CAP_CUSTOM_OPTION_MAX to get accepted range for any specific case\n\n uint32_t, default: 0"] + pub const CUBLASLT_ALGO_CONFIG_CUSTOM_OPTION: cublasLtMatmulAlgoConfigAttributes_t = + cublasLtMatmulAlgoConfigAttributes_t(5); +} +impl cublasLtMatmulAlgoConfigAttributes_t { + #[doc = " stages id, see cublasLtMatmulStages_t\n\n uint32_t, default: CUBLASLT_MATMUL_STAGES_UNDEFINED"] + pub const CUBLASLT_ALGO_CONFIG_STAGES_ID: cublasLtMatmulAlgoConfigAttributes_t = + cublasLtMatmulAlgoConfigAttributes_t(6); +} +impl cublasLtMatmulAlgoConfigAttributes_t { + #[doc = " inner shape id, see cublasLtMatmulInnerShape_t\n\n uint16_t, default: 0 (CUBLASLT_MATMUL_INNER_SHAPE_UNDEFINED)"] + pub const CUBLASLT_ALGO_CONFIG_INNER_SHAPE_ID: cublasLtMatmulAlgoConfigAttributes_t = + cublasLtMatmulAlgoConfigAttributes_t(7); +} +impl cublasLtMatmulAlgoConfigAttributes_t { + #[doc = " Thread Block Cluster shape id, see cublasLtClusterShape_t. Defines cluster size to use.\n\n uint16_t, default: 0 (CUBLASLT_CLUSTER_SHAPE_AUTO)"] + pub const CUBLASLT_ALGO_CONFIG_CLUSTER_SHAPE_ID: cublasLtMatmulAlgoConfigAttributes_t = + cublasLtMatmulAlgoConfigAttributes_t(8); +} +#[repr(transparent)] +#[doc = " Algo Configuration Attributes that can be set according to the Algo capabilities"] +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct cublasLtMatmulAlgoConfigAttributes_t(pub ::std::os::raw::c_uint); + +#[doc = " Set algo configuration attribute.\n\n \\param[in] algo The algo descriptor\n \\param[in] attr The attribute\n \\param[in] buf memory address containing the new value\n \\param[in] sizeInBytes size of buf buffer for verification (in bytes)\n\n \\retval CUBLAS_STATUS_INVALID_VALUE if buf is NULL or sizeInBytes doesn't match size of internal storage for\n selected attribute\n \\retval CUBLAS_STATUS_SUCCESS if attribute was set successfully"] +#[no_mangle] +pub unsafe extern "system" fn cublasLtMatmulAlgoConfigSetAttribute( + algo: *mut cublasLtMatmulAlgo_t, + attr: cublasLtMatmulAlgoConfigAttributes_t, + buf: *const ::std::os::raw::c_void, + sizeInBytes: usize, +) -> cublasStatus_t { + crate::unsupported() +} + +#[doc = " Get algo configuration attribute.\n\n \\param[in] algo The algo descriptor\n \\param[in] attr The attribute\n \\param[out] buf memory address containing the new value\n \\param[in] sizeInBytes size of buf buffer for verification (in bytes)\n \\param[out] sizeWritten only valid when return value is CUBLAS_STATUS_SUCCESS. If sizeInBytes is non-zero: number of\n bytes actually written, if sizeInBytes is 0: number of bytes needed to write full contents\n\n \\retval CUBLAS_STATUS_INVALID_VALUE if sizeInBytes is 0 and sizeWritten is NULL, or if sizeInBytes is non-zero\n and buf is NULL or sizeInBytes doesn't match size of internal storage for\n selected attribute\n \\retval CUBLAS_STATUS_SUCCESS if attribute's value was successfully written to user memory"] +#[no_mangle] +pub unsafe extern "system" fn cublasLtMatmulAlgoConfigGetAttribute( + algo: *const cublasLtMatmulAlgo_t, + attr: cublasLtMatmulAlgoConfigAttributes_t, + buf: *mut ::std::os::raw::c_void, + sizeInBytes: usize, + sizeWritten: *mut usize, +) -> cublasStatus_t { + crate::unsupported() +} +#[doc = " Experimental: Logger callback type."] +pub type cublasLtLoggerCallback_t = ::std::option::Option< + unsafe extern "C" fn( + logLevel: ::std::os::raw::c_int, + functionName: *const ::std::os::raw::c_char, + message: *const ::std::os::raw::c_char, + ), +>; + +#[doc = " Experimental: Logger callback setter.\n\n \\param[in] callback a user defined callback function to be called by the logger\n\n \\retval CUBLAS_STATUS_SUCCESS if callback was set successfully"] +#[no_mangle] +pub unsafe extern "system" fn cublasLtLoggerSetCallback( + callback: cublasLtLoggerCallback_t, +) -> cublasStatus_t { + crate::unsupported() +} + +#[doc = " Experimental: Open log file.\n\n \\param[in] logFile log file path. if the log file does not exist, it will be created\n\n \\retval CUBLAS_STATUS_SUCCESS if log file was created successfully"] +#[no_mangle] +pub unsafe extern "system" fn cublasLtLoggerOpenFile( + logFile: *const ::std::os::raw::c_char, +) -> cublasStatus_t { + crate::unsupported() +} + +#[doc = " Experimental: Log level setter.\n\n \\param[in] level log level, should be one of the following:\n 0. Off\n 1. Errors\n 2. Performance Trace\n 3. Performance Hints\n 4. Heuristics Trace\n 5. API Trace\n\n \\retval CUBLAS_STATUS_INVALID_VALUE if log level is not one of the above levels\n\n \\retval CUBLAS_STATUS_SUCCESS if log level was set successfully"] +#[no_mangle] +pub unsafe extern "system" fn cublasLtLoggerSetLevel( + level: ::std::os::raw::c_int, +) -> cublasStatus_t { + crate::unsupported() +} + +#[doc = " Experimental: Log mask setter.\n\n \\param[in] mask log mask, should be a combination of the following masks:\n 0. Off\n 1. Errors\n 2. Performance Trace\n 4. Performance Hints\n 8. Heuristics Trace\n 16. API Trace\n\n \\retval CUBLAS_STATUS_SUCCESS if log mask was set successfully"] +#[no_mangle] +pub unsafe extern "system" fn cublasLtLoggerSetMask(mask: ::std::os::raw::c_int) -> cublasStatus_t { + crate::unsupported() +} + +#[doc = " Experimental: Disable logging for the entire session.\n\n \\retval CUBLAS_STATUS_SUCCESS if disabled logging"] +#[no_mangle] +pub unsafe extern "system" fn cublasLtLoggerForceDisable() -> cublasStatus_t { + crate::unsupported() +} |