diff options
-rw-r--r-- | hip_runtime-sys/README | 2 | ||||
-rw-r--r-- | hip_runtime-sys/src/hip_runtime_api.rs | 1439 | ||||
-rw-r--r-- | zluda/src/cuda.rs | 126 | ||||
-rw-r--r-- | zluda/src/impl/context.rs | 374 | ||||
-rw-r--r-- | zluda/src/impl/device.rs | 160 | ||||
-rw-r--r-- | zluda/src/impl/export_table.rs | 138 | ||||
-rw-r--r-- | zluda/src/impl/function.rs | 258 | ||||
-rw-r--r-- | zluda/src/impl/memory.rs | 175 | ||||
-rw-r--r-- | zluda/src/impl/mod.rs | 273 | ||||
-rw-r--r-- | zluda/src/impl/module.rs | 435 | ||||
-rw-r--r-- | zluda/src/impl/ocl_ext.rs | 0 | ||||
-rw-r--r-- | zluda/src/impl/stream.rs | 253 |
12 files changed, 1431 insertions, 2202 deletions
diff --git a/hip_runtime-sys/README b/hip_runtime-sys/README index becea45..d1b2e3b 100644 --- a/hip_runtime-sys/README +++ b/hip_runtime-sys/README @@ -1,2 +1,2 @@ -bindgen include/hip_runtime_api.h -o src/hip_runtime_api.rs --no-layout-tests --size_t-is-usize --default-enum-style=newtype --whitelist-function "hip.*" --whitelist-type "hip.*" -- -I/opt/rocm/include +bindgen include/hip_runtime_api.h -o src/hip_runtime_api.rs --no-layout-tests --size_t-is-usize --default-enum-style=newtype --whitelist-function "hip.*" --whitelist-type "hip.*" -- -I/home/vosen/HIP/include -I/home/vosen/hipamd/include -I/opt/rocm/include sed -i 's/pub struct hipError_t/#[must_use]\npub struct hipError_t/g' src/hip_runtime_api.rs diff --git a/hip_runtime-sys/src/hip_runtime_api.rs b/hip_runtime-sys/src/hip_runtime_api.rs index b37b10b..b6765ce 100644 --- a/hip_runtime-sys/src/hip_runtime_api.rs +++ b/hip_runtime-sys/src/hip_runtime_api.rs @@ -809,243 +809,482 @@ impl hipError_t { #[must_use] pub struct hipError_t(pub ::std::os::raw::c_uint); impl hipDeviceAttribute_t { - #[doc = "< Maximum number of threads per block."] - pub const hipDeviceAttributeMaxThreadsPerBlock: hipDeviceAttribute_t = hipDeviceAttribute_t(0); + pub const hipDeviceAttributeCudaCompatibleBegin: hipDeviceAttribute_t = hipDeviceAttribute_t(0); } impl hipDeviceAttribute_t { - #[doc = "< Maximum x-dimension of a block."] - pub const hipDeviceAttributeMaxBlockDimX: hipDeviceAttribute_t = hipDeviceAttribute_t(1); + #[doc = "< Whether ECC support is enabled."] + pub const hipDeviceAttributeEccEnabled: hipDeviceAttribute_t = hipDeviceAttribute_t(0); } impl hipDeviceAttribute_t { - #[doc = "< Maximum y-dimension of a block."] - pub const hipDeviceAttributeMaxBlockDimY: hipDeviceAttribute_t = hipDeviceAttribute_t(2); + #[doc = "< Cuda only. The maximum size of the window policy in bytes."] + pub const hipDeviceAttributeAccessPolicyMaxWindowSize: hipDeviceAttribute_t = + hipDeviceAttribute_t(1); } impl hipDeviceAttribute_t { - #[doc = "< Maximum z-dimension of a block."] - pub const hipDeviceAttributeMaxBlockDimZ: hipDeviceAttribute_t = hipDeviceAttribute_t(3); + #[doc = "< Cuda only. Asynchronous engines number."] + pub const hipDeviceAttributeAsyncEngineCount: hipDeviceAttribute_t = hipDeviceAttribute_t(2); } impl hipDeviceAttribute_t { - #[doc = "< Maximum x-dimension of a grid."] - pub const hipDeviceAttributeMaxGridDimX: hipDeviceAttribute_t = hipDeviceAttribute_t(4); + #[doc = "< Whether host memory can be mapped into device address space"] + pub const hipDeviceAttributeCanMapHostMemory: hipDeviceAttribute_t = hipDeviceAttribute_t(3); } impl hipDeviceAttribute_t { - #[doc = "< Maximum y-dimension of a grid."] - pub const hipDeviceAttributeMaxGridDimY: hipDeviceAttribute_t = hipDeviceAttribute_t(5); + #[doc = "< Cuda only. Device can access host registered memory"] + #[doc = "< at the same virtual address as the CPU"] + pub const hipDeviceAttributeCanUseHostPointerForRegisteredMem: hipDeviceAttribute_t = + hipDeviceAttribute_t(4); } impl hipDeviceAttribute_t { - #[doc = "< Maximum z-dimension of a grid."] - pub const hipDeviceAttributeMaxGridDimZ: hipDeviceAttribute_t = hipDeviceAttribute_t(6); + #[doc = "< Peak clock frequency in kilohertz."] + pub const hipDeviceAttributeClockRate: hipDeviceAttribute_t = hipDeviceAttribute_t(5); } impl hipDeviceAttribute_t { - #[doc = "< Maximum shared memory available per block in"] - #[doc = "< bytes."] - pub const hipDeviceAttributeMaxSharedMemoryPerBlock: hipDeviceAttribute_t = + #[doc = "< Compute mode that device is currently in."] + pub const hipDeviceAttributeComputeMode: hipDeviceAttribute_t = hipDeviceAttribute_t(6); +} +impl hipDeviceAttribute_t { + #[doc = "< Cuda only. Device supports Compute Preemption."] + pub const hipDeviceAttributeComputePreemptionSupported: hipDeviceAttribute_t = hipDeviceAttribute_t(7); } impl hipDeviceAttribute_t { - #[doc = "< Constant memory size in bytes."] - pub const hipDeviceAttributeTotalConstantMemory: hipDeviceAttribute_t = hipDeviceAttribute_t(8); + #[doc = "< Device can possibly execute multiple kernels concurrently."] + pub const hipDeviceAttributeConcurrentKernels: hipDeviceAttribute_t = hipDeviceAttribute_t(8); } impl hipDeviceAttribute_t { - #[doc = "< Warp size in threads."] - pub const hipDeviceAttributeWarpSize: hipDeviceAttribute_t = hipDeviceAttribute_t(9); + #[doc = "< Device can coherently access managed memory concurrently with the CPU"] + pub const hipDeviceAttributeConcurrentManagedAccess: hipDeviceAttribute_t = + hipDeviceAttribute_t(9); } impl hipDeviceAttribute_t { - #[doc = "< Maximum number of 32-bit registers available to a"] - #[doc = "< thread block. This number is shared by all thread"] - #[doc = "< blocks simultaneously resident on a"] - #[doc = "< multiprocessor."] - pub const hipDeviceAttributeMaxRegistersPerBlock: hipDeviceAttribute_t = - hipDeviceAttribute_t(10); + #[doc = "< Support cooperative launch"] + pub const hipDeviceAttributeCooperativeLaunch: hipDeviceAttribute_t = hipDeviceAttribute_t(10); } impl hipDeviceAttribute_t { - #[doc = "< Peak clock frequency in kilohertz."] - pub const hipDeviceAttributeClockRate: hipDeviceAttribute_t = hipDeviceAttribute_t(11); + #[doc = "< Support cooperative launch on multiple devices"] + pub const hipDeviceAttributeCooperativeMultiDeviceLaunch: hipDeviceAttribute_t = + hipDeviceAttribute_t(11); } impl hipDeviceAttribute_t { - #[doc = "< Peak memory clock frequency in kilohertz."] - pub const hipDeviceAttributeMemoryClockRate: hipDeviceAttribute_t = hipDeviceAttribute_t(12); + #[doc = "< Cuda only. Device can concurrently copy memory and execute a kernel."] + #[doc = "< Deprecated. Use instead asyncEngineCount."] + pub const hipDeviceAttributeDeviceOverlap: hipDeviceAttribute_t = hipDeviceAttribute_t(12); } impl hipDeviceAttribute_t { - #[doc = "< Global memory bus width in bits."] - pub const hipDeviceAttributeMemoryBusWidth: hipDeviceAttribute_t = hipDeviceAttribute_t(13); + #[doc = "< Host can directly access managed memory on"] + #[doc = "< the device without migration"] + pub const hipDeviceAttributeDirectManagedMemAccessFromHost: hipDeviceAttribute_t = + hipDeviceAttribute_t(13); } impl hipDeviceAttribute_t { - #[doc = "< Number of multiprocessors on the device."] - pub const hipDeviceAttributeMultiprocessorCount: hipDeviceAttribute_t = + #[doc = "< Cuda only. Device supports caching globals in L1"] + pub const hipDeviceAttributeGlobalL1CacheSupported: hipDeviceAttribute_t = hipDeviceAttribute_t(14); } impl hipDeviceAttribute_t { - #[doc = "< Compute mode that device is currently in."] - pub const hipDeviceAttributeComputeMode: hipDeviceAttribute_t = hipDeviceAttribute_t(15); + #[doc = "< Cuda only. Link between the device and the host supports native atomic operations"] + pub const hipDeviceAttributeHostNativeAtomicSupported: hipDeviceAttribute_t = + hipDeviceAttribute_t(15); } impl hipDeviceAttribute_t { - #[doc = "< Size of L2 cache in bytes. 0 if the device doesn't have L2"] - #[doc = "< cache."] - pub const hipDeviceAttributeL2CacheSize: hipDeviceAttribute_t = hipDeviceAttribute_t(16); + #[doc = "< Device is integrated GPU"] + pub const hipDeviceAttributeIntegrated: hipDeviceAttribute_t = hipDeviceAttribute_t(16); } impl hipDeviceAttribute_t { - #[doc = "< Maximum resident threads per"] - #[doc = "< multiprocessor."] - pub const hipDeviceAttributeMaxThreadsPerMultiProcessor: hipDeviceAttribute_t = - hipDeviceAttribute_t(17); + #[doc = "< Multiple GPU devices."] + pub const hipDeviceAttributeIsMultiGpuBoard: hipDeviceAttribute_t = hipDeviceAttribute_t(17); +} +impl hipDeviceAttribute_t { + #[doc = "< Run time limit for kernels executed on the device"] + pub const hipDeviceAttributeKernelExecTimeout: hipDeviceAttribute_t = hipDeviceAttribute_t(18); +} +impl hipDeviceAttribute_t { + #[doc = "< Size of L2 cache in bytes. 0 if the device doesn't have L2 cache."] + pub const hipDeviceAttributeL2CacheSize: hipDeviceAttribute_t = hipDeviceAttribute_t(19); +} +impl hipDeviceAttribute_t { + #[doc = "< caching locals in L1 is supported"] + pub const hipDeviceAttributeLocalL1CacheSupported: hipDeviceAttribute_t = + hipDeviceAttribute_t(20); +} +impl hipDeviceAttribute_t { + #[doc = "< Cuda only. 8-byte locally unique identifier in 8 bytes. Undefined on TCC and non-Windows platforms"] + pub const hipDeviceAttributeLuid: hipDeviceAttribute_t = hipDeviceAttribute_t(21); +} +impl hipDeviceAttribute_t { + #[doc = "< Cuda only. Luid device node mask. Undefined on TCC and non-Windows platforms"] + pub const hipDeviceAttributeLuidDeviceNodeMask: hipDeviceAttribute_t = hipDeviceAttribute_t(22); } impl hipDeviceAttribute_t { #[doc = "< Major compute capability version number."] pub const hipDeviceAttributeComputeCapabilityMajor: hipDeviceAttribute_t = - hipDeviceAttribute_t(18); + hipDeviceAttribute_t(23); +} +impl hipDeviceAttribute_t { + #[doc = "< Device supports allocating managed memory on this system"] + pub const hipDeviceAttributeManagedMemory: hipDeviceAttribute_t = hipDeviceAttribute_t(24); +} +impl hipDeviceAttribute_t { + #[doc = "< Cuda only. Max block size per multiprocessor"] + pub const hipDeviceAttributeMaxBlocksPerMultiProcessor: hipDeviceAttribute_t = + hipDeviceAttribute_t(25); +} +impl hipDeviceAttribute_t { + #[doc = "< Max block size in width."] + pub const hipDeviceAttributeMaxBlockDimX: hipDeviceAttribute_t = hipDeviceAttribute_t(26); +} +impl hipDeviceAttribute_t { + #[doc = "< Max block size in height."] + pub const hipDeviceAttributeMaxBlockDimY: hipDeviceAttribute_t = hipDeviceAttribute_t(27); +} +impl hipDeviceAttribute_t { + #[doc = "< Max block size in depth."] + pub const hipDeviceAttributeMaxBlockDimZ: hipDeviceAttribute_t = hipDeviceAttribute_t(28); +} +impl hipDeviceAttribute_t { + #[doc = "< Max grid size in width."] + pub const hipDeviceAttributeMaxGridDimX: hipDeviceAttribute_t = hipDeviceAttribute_t(29); +} +impl hipDeviceAttribute_t { + #[doc = "< Max grid size in height."] + pub const hipDeviceAttributeMaxGridDimY: hipDeviceAttribute_t = hipDeviceAttribute_t(30); +} +impl hipDeviceAttribute_t { + #[doc = "< Max grid size in depth."] + pub const hipDeviceAttributeMaxGridDimZ: hipDeviceAttribute_t = hipDeviceAttribute_t(31); +} +impl hipDeviceAttribute_t { + #[doc = "< Maximum size of 1D surface."] + pub const hipDeviceAttributeMaxSurface1D: hipDeviceAttribute_t = hipDeviceAttribute_t(32); +} +impl hipDeviceAttribute_t { + #[doc = "< Cuda only. Maximum dimensions of 1D layered surface."] + pub const hipDeviceAttributeMaxSurface1DLayered: hipDeviceAttribute_t = + hipDeviceAttribute_t(33); +} +impl hipDeviceAttribute_t { + #[doc = "< Maximum dimension (width, height) of 2D surface."] + pub const hipDeviceAttributeMaxSurface2D: hipDeviceAttribute_t = hipDeviceAttribute_t(34); +} +impl hipDeviceAttribute_t { + #[doc = "< Cuda only. Maximum dimensions of 2D layered surface."] + pub const hipDeviceAttributeMaxSurface2DLayered: hipDeviceAttribute_t = + hipDeviceAttribute_t(35); +} +impl hipDeviceAttribute_t { + #[doc = "< Maximum dimension (width, height, depth) of 3D surface."] + pub const hipDeviceAttributeMaxSurface3D: hipDeviceAttribute_t = hipDeviceAttribute_t(36); +} +impl hipDeviceAttribute_t { + #[doc = "< Cuda only. Maximum dimensions of Cubemap surface."] + pub const hipDeviceAttributeMaxSurfaceCubemap: hipDeviceAttribute_t = hipDeviceAttribute_t(37); +} +impl hipDeviceAttribute_t { + #[doc = "< Cuda only. Maximum dimension of Cubemap layered surface."] + pub const hipDeviceAttributeMaxSurfaceCubemapLayered: hipDeviceAttribute_t = + hipDeviceAttribute_t(38); +} +impl hipDeviceAttribute_t { + #[doc = "< Maximum size of 1D texture."] + pub const hipDeviceAttributeMaxTexture1DWidth: hipDeviceAttribute_t = hipDeviceAttribute_t(39); +} +impl hipDeviceAttribute_t { + #[doc = "< Cuda only. Maximum dimensions of 1D layered texture."] + pub const hipDeviceAttributeMaxTexture1DLayered: hipDeviceAttribute_t = + hipDeviceAttribute_t(40); +} +impl hipDeviceAttribute_t { + #[doc = "< Maximum number of elements allocatable in a 1D linear texture."] + #[doc = "< Use cudaDeviceGetTexture1DLinearMaxWidth() instead on Cuda."] + pub const hipDeviceAttributeMaxTexture1DLinear: hipDeviceAttribute_t = hipDeviceAttribute_t(41); +} +impl hipDeviceAttribute_t { + #[doc = "< Cuda only. Maximum size of 1D mipmapped texture."] + pub const hipDeviceAttributeMaxTexture1DMipmap: hipDeviceAttribute_t = hipDeviceAttribute_t(42); +} +impl hipDeviceAttribute_t { + #[doc = "< Maximum dimension width of 2D texture."] + pub const hipDeviceAttributeMaxTexture2DWidth: hipDeviceAttribute_t = hipDeviceAttribute_t(43); +} +impl hipDeviceAttribute_t { + #[doc = "< Maximum dimension hight of 2D texture."] + pub const hipDeviceAttributeMaxTexture2DHeight: hipDeviceAttribute_t = hipDeviceAttribute_t(44); +} +impl hipDeviceAttribute_t { + #[doc = "< Cuda only. Maximum dimensions of 2D texture if gather operations performed."] + pub const hipDeviceAttributeMaxTexture2DGather: hipDeviceAttribute_t = hipDeviceAttribute_t(45); +} +impl hipDeviceAttribute_t { + #[doc = "< Cuda only. Maximum dimensions of 2D layered texture."] + pub const hipDeviceAttributeMaxTexture2DLayered: hipDeviceAttribute_t = + hipDeviceAttribute_t(46); +} +impl hipDeviceAttribute_t { + #[doc = "< Cuda only. Maximum dimensions (width, height, pitch) of 2D textures bound to pitched memory."] + pub const hipDeviceAttributeMaxTexture2DLinear: hipDeviceAttribute_t = hipDeviceAttribute_t(47); +} +impl hipDeviceAttribute_t { + #[doc = "< Cuda only. Maximum dimensions of 2D mipmapped texture."] + pub const hipDeviceAttributeMaxTexture2DMipmap: hipDeviceAttribute_t = hipDeviceAttribute_t(48); +} +impl hipDeviceAttribute_t { + #[doc = "< Maximum dimension width of 3D texture."] + pub const hipDeviceAttributeMaxTexture3DWidth: hipDeviceAttribute_t = hipDeviceAttribute_t(49); +} +impl hipDeviceAttribute_t { + #[doc = "< Maximum dimension height of 3D texture."] + pub const hipDeviceAttributeMaxTexture3DHeight: hipDeviceAttribute_t = hipDeviceAttribute_t(50); +} +impl hipDeviceAttribute_t { + #[doc = "< Maximum dimension depth of 3D texture."] + pub const hipDeviceAttributeMaxTexture3DDepth: hipDeviceAttribute_t = hipDeviceAttribute_t(51); +} +impl hipDeviceAttribute_t { + #[doc = "< Cuda only. Maximum dimensions of alternate 3D texture."] + pub const hipDeviceAttributeMaxTexture3DAlt: hipDeviceAttribute_t = hipDeviceAttribute_t(52); +} +impl hipDeviceAttribute_t { + #[doc = "< Cuda only. Maximum dimensions of Cubemap texture"] + pub const hipDeviceAttributeMaxTextureCubemap: hipDeviceAttribute_t = hipDeviceAttribute_t(53); +} +impl hipDeviceAttribute_t { + #[doc = "< Cuda only. Maximum dimensions of Cubemap layered texture."] + pub const hipDeviceAttributeMaxTextureCubemapLayered: hipDeviceAttribute_t = + hipDeviceAttribute_t(54); +} +impl hipDeviceAttribute_t { + #[doc = "< Maximum dimension of a block"] + pub const hipDeviceAttributeMaxThreadsDim: hipDeviceAttribute_t = hipDeviceAttribute_t(55); +} +impl hipDeviceAttribute_t { + #[doc = "< Maximum number of threads per block."] + pub const hipDeviceAttributeMaxThreadsPerBlock: hipDeviceAttribute_t = hipDeviceAttribute_t(56); +} +impl hipDeviceAttribute_t { + #[doc = "< Maximum resident threads per multiprocessor."] + pub const hipDeviceAttributeMaxThreadsPerMultiProcessor: hipDeviceAttribute_t = + hipDeviceAttribute_t(57); +} +impl hipDeviceAttribute_t { + #[doc = "< Maximum pitch in bytes allowed by memory copies"] + pub const hipDeviceAttributeMaxPitch: hipDeviceAttribute_t = hipDeviceAttribute_t(58); +} +impl hipDeviceAttribute_t { + #[doc = "< Global memory bus width in bits."] + pub const hipDeviceAttributeMemoryBusWidth: hipDeviceAttribute_t = hipDeviceAttribute_t(59); +} +impl hipDeviceAttribute_t { + #[doc = "< Peak memory clock frequency in kilohertz."] + pub const hipDeviceAttributeMemoryClockRate: hipDeviceAttribute_t = hipDeviceAttribute_t(60); } impl hipDeviceAttribute_t { #[doc = "< Minor compute capability version number."] pub const hipDeviceAttributeComputeCapabilityMinor: hipDeviceAttribute_t = - hipDeviceAttribute_t(19); + hipDeviceAttribute_t(61); +} +impl hipDeviceAttribute_t { + #[doc = "< Cuda only. Unique ID of device group on the same multi-GPU board"] + pub const hipDeviceAttributeMultiGpuBoardGroupID: hipDeviceAttribute_t = + hipDeviceAttribute_t(62); } impl hipDeviceAttribute_t { - #[doc = "< Device can possibly execute multiple kernels"] - #[doc = "< concurrently."] - pub const hipDeviceAttributeConcurrentKernels: hipDeviceAttribute_t = hipDeviceAttribute_t(20); + #[doc = "< Number of multiprocessors on the device."] + pub const hipDeviceAttributeMultiprocessorCount: hipDeviceAttribute_t = + hipDeviceAttribute_t(63); +} +impl hipDeviceAttribute_t { + #[doc = "< Device name."] + pub const hipDeviceAttributeName: hipDeviceAttribute_t = hipDeviceAttribute_t(64); +} +impl hipDeviceAttribute_t { + #[doc = "< Device supports coherently accessing pageable memory"] + #[doc = "< without calling hipHostRegister on it"] + pub const hipDeviceAttributePageableMemoryAccess: hipDeviceAttribute_t = + hipDeviceAttribute_t(65); +} +impl hipDeviceAttribute_t { + #[doc = "< Device accesses pageable memory via the host's page tables"] + pub const hipDeviceAttributePageableMemoryAccessUsesHostPageTables: hipDeviceAttribute_t = + hipDeviceAttribute_t(66); } impl hipDeviceAttribute_t { #[doc = "< PCI Bus ID."] - pub const hipDeviceAttributePciBusId: hipDeviceAttribute_t = hipDeviceAttribute_t(21); + pub const hipDeviceAttributePciBusId: hipDeviceAttribute_t = hipDeviceAttribute_t(67); } impl hipDeviceAttribute_t { #[doc = "< PCI Device ID."] - pub const hipDeviceAttributePciDeviceId: hipDeviceAttribute_t = hipDeviceAttribute_t(22); + pub const hipDeviceAttributePciDeviceId: hipDeviceAttribute_t = hipDeviceAttribute_t(68); } impl hipDeviceAttribute_t { - #[doc = "< Maximum Shared Memory Per"] - #[doc = "< Multiprocessor."] - pub const hipDeviceAttributeMaxSharedMemoryPerMultiprocessor: hipDeviceAttribute_t = - hipDeviceAttribute_t(23); + #[doc = "< PCI Domain ID."] + pub const hipDeviceAttributePciDomainID: hipDeviceAttribute_t = hipDeviceAttribute_t(69); } impl hipDeviceAttribute_t { - #[doc = "< Multiple GPU devices."] - pub const hipDeviceAttributeIsMultiGpuBoard: hipDeviceAttribute_t = hipDeviceAttribute_t(24); + #[doc = "< Cuda11 only. Maximum l2 persisting lines capacity in bytes"] + pub const hipDeviceAttributePersistingL2CacheMaxSize: hipDeviceAttribute_t = + hipDeviceAttribute_t(70); } impl hipDeviceAttribute_t { - #[doc = "< iGPU"] - pub const hipDeviceAttributeIntegrated: hipDeviceAttribute_t = hipDeviceAttribute_t(25); + #[doc = "< 32-bit registers available to a thread block. This number is shared"] + #[doc = "< by all thread blocks simultaneously resident on a multiprocessor."] + pub const hipDeviceAttributeMaxRegistersPerBlock: hipDeviceAttribute_t = + hipDeviceAttribute_t(71); } impl hipDeviceAttribute_t { - #[doc = "< Support cooperative launch"] - pub const hipDeviceAttributeCooperativeLaunch: hipDeviceAttribute_t = hipDeviceAttribute_t(26); + #[doc = "< 32-bit registers available per block."] + pub const hipDeviceAttributeMaxRegistersPerMultiprocessor: hipDeviceAttribute_t = + hipDeviceAttribute_t(72); } impl hipDeviceAttribute_t { - #[doc = "< Support cooperative launch on multiple devices"] - pub const hipDeviceAttributeCooperativeMultiDeviceLaunch: hipDeviceAttribute_t = - hipDeviceAttribute_t(27); + #[doc = "< Cuda11 only. Shared memory reserved by CUDA driver per block."] + pub const hipDeviceAttributeReservedSharedMemPerBlock: hipDeviceAttribute_t = + hipDeviceAttribute_t(73); } impl hipDeviceAttribute_t { - #[doc = "< Maximum number of elements in 1D images"] - pub const hipDeviceAttributeMaxTexture1DWidth: hipDeviceAttribute_t = hipDeviceAttribute_t(28); + #[doc = "< Maximum shared memory available per block in bytes."] + pub const hipDeviceAttributeMaxSharedMemoryPerBlock: hipDeviceAttribute_t = + hipDeviceAttribute_t(74); } impl hipDeviceAttribute_t { - #[doc = "< Maximum dimension width of 2D images in image elements"] - pub const hipDeviceAttributeMaxTexture2DWidth: hipDeviceAttribute_t = hipDeviceAttribute_t(29); + #[doc = "< Cuda only. Maximum shared memory per block usable by special opt in."] + pub const hipDeviceAttributeSharedMemPerBlockOptin: hipDeviceAttribute_t = + hipDeviceAttribute_t(75); } impl hipDeviceAttribute_t { - #[doc = "< Maximum dimension height of 2D images in image elements"] - pub const hipDeviceAttributeMaxTexture2DHeight: hipDeviceAttribute_t = hipDeviceAttribute_t(30); + #[doc = "< Cuda only. Shared memory available per multiprocessor."] + pub const hipDeviceAttributeSharedMemPerMultiprocessor: hipDeviceAttribute_t = + hipDeviceAttribute_t(76); } impl hipDeviceAttribute_t { - #[doc = "< Maximum dimension width of 3D images in image elements"] - pub const hipDeviceAttributeMaxTexture3DWidth: hipDeviceAttribute_t = hipDeviceAttribute_t(31); + #[doc = "< Cuda only. Performance ratio of single precision to double precision."] + pub const hipDeviceAttributeSingleToDoublePrecisionPerfRatio: hipDeviceAttribute_t = + hipDeviceAttribute_t(77); } impl hipDeviceAttribute_t { - #[doc = "< Maximum dimensions height of 3D images in image elements"] - pub const hipDeviceAttributeMaxTexture3DHeight: hipDeviceAttribute_t = hipDeviceAttribute_t(32); + #[doc = "< Cuda only. Whether to support stream priorities."] + pub const hipDeviceAttributeStreamPrioritiesSupported: hipDeviceAttribute_t = + hipDeviceAttribute_t(78); } impl hipDeviceAttribute_t { - #[doc = "< Maximum dimensions depth of 3D images in image elements"] - pub const hipDeviceAttributeMaxTexture3DDepth: hipDeviceAttribute_t = hipDeviceAttribute_t(33); + #[doc = "< Cuda only. Alignment requirement for surfaces"] + pub const hipDeviceAttributeSurfaceAlignment: hipDeviceAttribute_t = hipDeviceAttribute_t(79); } impl hipDeviceAttribute_t { - #[doc = "< Address of the HDP_MEM_COHERENCY_FLUSH_CNTL register"] - pub const hipDeviceAttributeHdpMemFlushCntl: hipDeviceAttribute_t = hipDeviceAttribute_t(34); + #[doc = "< Cuda only. Whether device is a Tesla device using TCC driver"] + pub const hipDeviceAttributeTccDriver: hipDeviceAttribute_t = hipDeviceAttribute_t(80); } impl hipDeviceAttribute_t { - #[doc = "< Address of the HDP_REG_COHERENCY_FLUSH_CNTL register"] - pub const hipDeviceAttributeHdpRegFlushCntl: hipDeviceAttribute_t = hipDeviceAttribute_t(35); + #[doc = "< Alignment requirement for textures"] + pub const hipDeviceAttributeTextureAlignment: hipDeviceAttribute_t = hipDeviceAttribute_t(81); } impl hipDeviceAttribute_t { - #[doc = "< Maximum pitch in bytes allowed by memory copies"] - pub const hipDeviceAttributeMaxPitch: hipDeviceAttribute_t = hipDeviceAttribute_t(36); + #[doc = "< Pitch alignment requirement for 2D texture references bound to pitched memory;"] + pub const hipDeviceAttributeTexturePitchAlignment: hipDeviceAttribute_t = + hipDeviceAttribute_t(82); } impl hipDeviceAttribute_t { - #[doc = "<Alignment requirement for textures"] - pub const hipDeviceAttributeTextureAlignment: hipDeviceAttribute_t = hipDeviceAttribute_t(37); + #[doc = "< Constant memory size in bytes."] + pub const hipDeviceAttributeTotalConstantMemory: hipDeviceAttribute_t = + hipDeviceAttribute_t(83); } impl hipDeviceAttribute_t { - #[doc = "<Pitch alignment requirement for 2D texture references bound to pitched memory;"] - pub const hipDeviceAttributeTexturePitchAlignment: hipDeviceAttribute_t = - hipDeviceAttribute_t(38); + #[doc = "< Global memory available on devicice."] + pub const hipDeviceAttributeTotalGlobalMem: hipDeviceAttribute_t = hipDeviceAttribute_t(84); } impl hipDeviceAttribute_t { - #[doc = "<Run time limit for kernels executed on the device"] - pub const hipDeviceAttributeKernelExecTimeout: hipDeviceAttribute_t = hipDeviceAttribute_t(39); + #[doc = "< Cuda only. An unified address space shared with the host."] + pub const hipDeviceAttributeUnifiedAddressing: hipDeviceAttribute_t = hipDeviceAttribute_t(85); } impl hipDeviceAttribute_t { - #[doc = "<Device can map host memory into device address space"] - pub const hipDeviceAttributeCanMapHostMemory: hipDeviceAttribute_t = hipDeviceAttribute_t(40); + #[doc = "< Cuda only. Unique ID in 16 byte."] + pub const hipDeviceAttributeUuid: hipDeviceAttribute_t = hipDeviceAttribute_t(86); } impl hipDeviceAttribute_t { - #[doc = "<Device has ECC support enabled"] - pub const hipDeviceAttributeEccEnabled: hipDeviceAttribute_t = hipDeviceAttribute_t(41); + #[doc = "< Warp size in threads."] + pub const hipDeviceAttributeWarpSize: hipDeviceAttribute_t = hipDeviceAttribute_t(87); +} +impl hipDeviceAttribute_t { + pub const hipDeviceAttributeCudaCompatibleEnd: hipDeviceAttribute_t = + hipDeviceAttribute_t(9999); +} +impl hipDeviceAttribute_t { + pub const hipDeviceAttributeAmdSpecificBegin: hipDeviceAttribute_t = + hipDeviceAttribute_t(10000); +} +impl hipDeviceAttribute_t { + #[doc = "< Frequency in khz of the timer used by the device-side \"clock*\""] + pub const hipDeviceAttributeClockInstructionRate: hipDeviceAttribute_t = + hipDeviceAttribute_t(10000); +} +impl hipDeviceAttribute_t { + #[doc = "< Device architecture"] + pub const hipDeviceAttributeArch: hipDeviceAttribute_t = hipDeviceAttribute_t(10001); +} +impl hipDeviceAttribute_t { + #[doc = "< Maximum Shared Memory PerMultiprocessor."] + pub const hipDeviceAttributeMaxSharedMemoryPerMultiprocessor: hipDeviceAttribute_t = + hipDeviceAttribute_t(10002); +} +impl hipDeviceAttribute_t { + #[doc = "< Device gcn architecture"] + pub const hipDeviceAttributeGcnArch: hipDeviceAttribute_t = hipDeviceAttribute_t(10003); +} +impl hipDeviceAttribute_t { + #[doc = "< Device gcnArch name in 256 bytes"] + pub const hipDeviceAttributeGcnArchName: hipDeviceAttribute_t = hipDeviceAttribute_t(10004); +} +impl hipDeviceAttribute_t { + #[doc = "< Address of the HDP_MEM_COHERENCY_FLUSH_CNTL register"] + pub const hipDeviceAttributeHdpMemFlushCntl: hipDeviceAttribute_t = hipDeviceAttribute_t(10005); +} +impl hipDeviceAttribute_t { + #[doc = "< Address of the HDP_REG_COHERENCY_FLUSH_CNTL register"] + pub const hipDeviceAttributeHdpRegFlushCntl: hipDeviceAttribute_t = hipDeviceAttribute_t(10006); } impl hipDeviceAttribute_t { #[doc = "< Supports cooperative launch on multiple"] + #[doc = "< devices with unmatched functions"] pub const hipDeviceAttributeCooperativeMultiDeviceUnmatchedFunc: hipDeviceAttribute_t = - hipDeviceAttribute_t(42); + hipDeviceAttribute_t(10007); } impl hipDeviceAttribute_t { #[doc = "< Supports cooperative launch on multiple"] + #[doc = "< devices with unmatched grid dimensions"] pub const hipDeviceAttributeCooperativeMultiDeviceUnmatchedGridDim: hipDeviceAttribute_t = - hipDeviceAttribute_t(43); + hipDeviceAttribute_t(10008); } impl hipDeviceAttribute_t { #[doc = "< Supports cooperative launch on multiple"] + #[doc = "< devices with unmatched block dimensions"] pub const hipDeviceAttributeCooperativeMultiDeviceUnmatchedBlockDim: hipDeviceAttribute_t = - hipDeviceAttribute_t(44); + hipDeviceAttribute_t(10009); } impl hipDeviceAttribute_t { #[doc = "< Supports cooperative launch on multiple"] + #[doc = "< devices with unmatched shared memories"] pub const hipDeviceAttributeCooperativeMultiDeviceUnmatchedSharedMem: hipDeviceAttribute_t = - hipDeviceAttribute_t(45); + hipDeviceAttribute_t(10010); } impl hipDeviceAttribute_t { - #[doc = "< Revision of the GPU in this device"] - pub const hipDeviceAttributeAsicRevision: hipDeviceAttribute_t = hipDeviceAttribute_t(46); + #[doc = "< Whether it is LargeBar"] + pub const hipDeviceAttributeIsLargeBar: hipDeviceAttribute_t = hipDeviceAttribute_t(10011); } impl hipDeviceAttribute_t { - #[doc = "< Device supports allocating managed memory on this system"] - pub const hipDeviceAttributeManagedMemory: hipDeviceAttribute_t = hipDeviceAttribute_t(47); -} -impl hipDeviceAttribute_t { - #[doc = "< Host can directly access managed memory on"] - pub const hipDeviceAttributeDirectManagedMemAccessFromHost: hipDeviceAttribute_t = - hipDeviceAttribute_t(48); -} -impl hipDeviceAttribute_t { - #[doc = "< Device can coherently access managed memory"] - pub const hipDeviceAttributeConcurrentManagedAccess: hipDeviceAttribute_t = - hipDeviceAttribute_t(49); + #[doc = "< Revision of the GPU in this device"] + pub const hipDeviceAttributeAsicRevision: hipDeviceAttribute_t = hipDeviceAttribute_t(10012); } impl hipDeviceAttribute_t { - #[doc = "< Device supports coherently accessing pageable memory"] - pub const hipDeviceAttributePageableMemoryAccess: hipDeviceAttribute_t = - hipDeviceAttribute_t(50); + #[doc = "< '1' if Device supports hipStreamWaitValue32() and"] + #[doc = "< hipStreamWaitValue64() , '0' otherwise."] + pub const hipDeviceAttributeCanUseStreamWaitValue: hipDeviceAttribute_t = + hipDeviceAttribute_t(10013); } impl hipDeviceAttribute_t { - #[doc = "< Device accesses pageable memory via"] - pub const hipDeviceAttributePageableMemoryAccessUsesHostPageTables: hipDeviceAttribute_t = - hipDeviceAttribute_t(51); + pub const hipDeviceAttributeAmdSpecificEnd: hipDeviceAttribute_t = hipDeviceAttribute_t(19999); } impl hipDeviceAttribute_t { - #[doc = "< '1' if Device supports hipStreamWaitValue32() and"] - #[doc = "< hipStreamWaitValue64() , '0' otherwise."] - pub const hipDeviceAttributeCanUseStreamWaitValue: hipDeviceAttribute_t = - hipDeviceAttribute_t(52); + pub const hipDeviceAttributeVendorSpecificBegin: hipDeviceAttribute_t = + hipDeviceAttribute_t(20000); } #[repr(transparent)] #[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] @@ -1065,9 +1304,7 @@ impl hipComputeMode { #[repr(transparent)] #[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] pub struct hipComputeMode(pub ::std::os::raw::c_uint); -pub type __int32_t = ::std::os::raw::c_int; pub type __uint32_t = ::std::os::raw::c_uint; -pub type __int64_t = ::std::os::raw::c_long; pub type __uint64_t = ::std::os::raw::c_ulong; pub type hipDeviceptr_t = *mut ::std::os::raw::c_void; impl hipChannelFormatKind { @@ -2021,6 +2258,9 @@ pub struct ihipEvent_t { } pub type hipEvent_t = *mut ihipEvent_t; impl hipLimit_t { + pub const hipLimitPrintfFifoSize: hipLimit_t = hipLimit_t(1); +} +impl hipLimit_t { pub const hipLimitMallocHeapSize: hipLimit_t = hipLimit_t(2); } #[repr(transparent)] @@ -2050,13 +2290,47 @@ impl hipMemoryAdvise { pub const hipMemAdviseSetAccessedBy: hipMemoryAdvise = hipMemoryAdvise(5); } impl hipMemoryAdvise { - #[doc = "< Let the Unified Memory subsystem decide on"] - #[doc = "< the page faulting policy for the specified device"] + #[doc = "< Let HIP to decide on the page faulting policy"] + #[doc = "< for the specified device"] pub const hipMemAdviseUnsetAccessedBy: hipMemoryAdvise = hipMemoryAdvise(6); } +impl hipMemoryAdvise { + #[doc = "< The default memory model is fine-grain. That allows"] + #[doc = "< coherent operations between host and device, while"] + #[doc = "< executing kernels. The coarse-grain can be used"] + #[doc = "< for data that only needs to be coherent at dispatch"] + #[doc = "< boundaries for better performance"] + pub const hipMemAdviseSetCoarseGrain: hipMemoryAdvise = hipMemoryAdvise(100); +} +impl hipMemoryAdvise { + #[doc = "< Restores cache coherency policy back to fine-grain"] + pub const hipMemAdviseUnsetCoarseGrain: hipMemoryAdvise = hipMemoryAdvise(101); +} #[repr(transparent)] #[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] pub struct hipMemoryAdvise(pub ::std::os::raw::c_uint); +impl hipMemRangeCoherencyMode { + #[doc = "< Updates to memory with this attribute can be"] + #[doc = "< done coherently from all devices"] + pub const hipMemRangeCoherencyModeFineGrain: hipMemRangeCoherencyMode = + hipMemRangeCoherencyMode(0); +} +impl hipMemRangeCoherencyMode { + #[doc = "< Writes to memory with this attribute can be"] + #[doc = "< performed by a single device at a time"] + pub const hipMemRangeCoherencyModeCoarseGrain: hipMemRangeCoherencyMode = + hipMemRangeCoherencyMode(1); +} +impl hipMemRangeCoherencyMode { + #[doc = "< Memory region queried contains subregions with"] + #[doc = "< both hipMemRangeCoherencyModeFineGrain and"] + #[doc = "< hipMemRangeCoherencyModeCoarseGrain attributes"] + pub const hipMemRangeCoherencyModeIndeterminate: hipMemRangeCoherencyMode = + hipMemRangeCoherencyMode(2); +} +#[repr(transparent)] +#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] +pub struct hipMemRangeCoherencyMode(pub ::std::os::raw::c_uint); impl hipMemRangeAttribute { #[doc = "< Whether the range will mostly be read and"] #[doc = "< only occassionally be written to"] @@ -2067,15 +2341,21 @@ impl hipMemRangeAttribute { pub const hipMemRangeAttributePreferredLocation: hipMemRangeAttribute = hipMemRangeAttribute(2); } impl hipMemRangeAttribute { - #[doc = "< Memory range has cudaMemAdviseSetAccessedBy"] - #[doc = "< set for specified device"] + #[doc = "< Memory range has hipMemAdviseSetAccessedBy"] + #[doc = "< set for the specified device"] pub const hipMemRangeAttributeAccessedBy: hipMemRangeAttribute = hipMemRangeAttribute(3); } impl hipMemRangeAttribute { - #[doc = "< The last location to which the range was prefetched"] + #[doc = "< The last location to where the range was"] + #[doc = "< prefetched"] pub const hipMemRangeAttributeLastPrefetchLocation: hipMemRangeAttribute = hipMemRangeAttribute(4); } +impl hipMemRangeAttribute { + #[doc = "< Returns coherency mode"] + #[doc = "< @ref hipMemRangeCoherencyMode for the range"] + pub const hipMemRangeAttributeCoherencyMode: hipMemRangeAttribute = hipMemRangeAttribute(100); +} #[repr(transparent)] #[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] pub struct hipMemRangeAttribute(pub ::std::os::raw::c_uint); @@ -2278,7 +2558,157 @@ pub struct hipExternalMemoryBufferDesc_st { } pub type hipExternalMemoryBufferDesc = hipExternalMemoryBufferDesc_st; pub type hipExternalMemory_t = *mut ::std::os::raw::c_void; +impl hipExternalSemaphoreHandleType_enum { + pub const hipExternalSemaphoreHandleTypeOpaqueFd: hipExternalSemaphoreHandleType_enum = + hipExternalSemaphoreHandleType_enum(1); +} +impl hipExternalSemaphoreHandleType_enum { + pub const hipExternalSemaphoreHandleTypeOpaqueWin32: hipExternalSemaphoreHandleType_enum = + hipExternalSemaphoreHandleType_enum(2); +} +impl hipExternalSemaphoreHandleType_enum { + pub const hipExternalSemaphoreHandleTypeOpaqueWin32Kmt: hipExternalSemaphoreHandleType_enum = + hipExternalSemaphoreHandleType_enum(3); +} +impl hipExternalSemaphoreHandleType_enum { + pub const hipExternalSemaphoreHandleTypeD3D12Fence: hipExternalSemaphoreHandleType_enum = + hipExternalSemaphoreHandleType_enum(4); +} +#[repr(transparent)] +#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] +pub struct hipExternalSemaphoreHandleType_enum(pub ::std::os::raw::c_uint); +pub use self::hipExternalSemaphoreHandleType_enum as hipExternalSemaphoreHandleType; +#[repr(C)] +#[derive(Copy, Clone)] +pub struct hipExternalSemaphoreHandleDesc_st { + pub type_: hipExternalSemaphoreHandleType, + pub handle: hipExternalSemaphoreHandleDesc_st__bindgen_ty_1, + pub flags: ::std::os::raw::c_uint, +} +#[repr(C)] +#[derive(Copy, Clone)] +pub union hipExternalSemaphoreHandleDesc_st__bindgen_ty_1 { + pub fd: ::std::os::raw::c_int, + pub win32: hipExternalSemaphoreHandleDesc_st__bindgen_ty_1__bindgen_ty_1, +} +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct hipExternalSemaphoreHandleDesc_st__bindgen_ty_1__bindgen_ty_1 { + pub handle: *mut ::std::os::raw::c_void, + pub name: *const ::std::os::raw::c_void, +} +pub type hipExternalSemaphoreHandleDesc = hipExternalSemaphoreHandleDesc_st; +pub type hipExternalSemaphore_t = *mut ::std::os::raw::c_void; +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct hipExternalSemaphoreSignalParams_st { + pub params: hipExternalSemaphoreSignalParams_st__bindgen_ty_1, + pub flags: ::std::os::raw::c_uint, + pub reserved: [::std::os::raw::c_uint; 16usize], +} +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct hipExternalSemaphoreSignalParams_st__bindgen_ty_1 { + pub fence: hipExternalSemaphoreSignalParams_st__bindgen_ty_1__bindgen_ty_1, + pub keyedMutex: hipExternalSemaphoreSignalParams_st__bindgen_ty_1__bindgen_ty_2, + pub reserved: [::std::os::raw::c_uint; 12usize], +} +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct hipExternalSemaphoreSignalParams_st__bindgen_ty_1__bindgen_ty_1 { + pub value: ::std::os::raw::c_ulonglong, +} +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct hipExternalSemaphoreSignalParams_st__bindgen_ty_1__bindgen_ty_2 { + pub key: ::std::os::raw::c_ulonglong, +} +pub type hipExternalSemaphoreSignalParams = hipExternalSemaphoreSignalParams_st; +#[doc = " External semaphore wait parameters, compatible with driver type"] +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct hipExternalSemaphoreWaitParams_st { + pub params: hipExternalSemaphoreWaitParams_st__bindgen_ty_1, + pub flags: ::std::os::raw::c_uint, + pub reserved: [::std::os::raw::c_uint; 16usize], +} +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct hipExternalSemaphoreWaitParams_st__bindgen_ty_1 { + pub fence: hipExternalSemaphoreWaitParams_st__bindgen_ty_1__bindgen_ty_1, + pub keyedMutex: hipExternalSemaphoreWaitParams_st__bindgen_ty_1__bindgen_ty_2, + pub reserved: [::std::os::raw::c_uint; 10usize], +} +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct hipExternalSemaphoreWaitParams_st__bindgen_ty_1__bindgen_ty_1 { + pub value: ::std::os::raw::c_ulonglong, +} +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct hipExternalSemaphoreWaitParams_st__bindgen_ty_1__bindgen_ty_2 { + pub key: ::std::os::raw::c_ulonglong, + pub timeoutMs: ::std::os::raw::c_uint, +} +#[doc = " External semaphore wait parameters, compatible with driver type"] +pub type hipExternalSemaphoreWaitParams = hipExternalSemaphoreWaitParams_st; +impl hipGLDeviceList { + #[doc = "< All hip devices used by current OpenGL context."] + pub const hipGLDeviceListAll: hipGLDeviceList = hipGLDeviceList(1); +} +impl hipGLDeviceList { + #[doc = "< Hip devices used by current OpenGL context in current"] + #[doc = "< frame"] + pub const hipGLDeviceListCurrentFrame: hipGLDeviceList = hipGLDeviceList(2); +} +impl hipGLDeviceList { + #[doc = "< Hip devices used by current OpenGL context in next"] + #[doc = "< frame."] + pub const hipGLDeviceListNextFrame: hipGLDeviceList = hipGLDeviceList(3); +} +#[repr(transparent)] +#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] +pub struct hipGLDeviceList(pub ::std::os::raw::c_uint); +impl hipGraphicsRegisterFlags { + pub const hipGraphicsRegisterFlagsNone: hipGraphicsRegisterFlags = hipGraphicsRegisterFlags(0); +} +impl hipGraphicsRegisterFlags { + #[doc = "< HIP will not write to this registered resource"] + pub const hipGraphicsRegisterFlagsReadOnly: hipGraphicsRegisterFlags = + hipGraphicsRegisterFlags(1); +} +impl hipGraphicsRegisterFlags { + pub const hipGraphicsRegisterFlagsWriteDiscard: hipGraphicsRegisterFlags = + hipGraphicsRegisterFlags(2); +} +impl hipGraphicsRegisterFlags { + #[doc = "< HIP will bind this resource to a surface"] + pub const hipGraphicsRegisterFlagsSurfaceLoadStore: hipGraphicsRegisterFlags = + hipGraphicsRegisterFlags(4); +} +impl hipGraphicsRegisterFlags { + pub const hipGraphicsRegisterFlagsTextureGather: hipGraphicsRegisterFlags = + hipGraphicsRegisterFlags(8); +} +#[repr(transparent)] +#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] +pub struct hipGraphicsRegisterFlags(pub ::std::os::raw::c_uint); +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct _hipGraphicsResource { + _unused: [u8; 0], +} +pub type hipGraphicsResource = _hipGraphicsResource; +pub type hipGraphicsResource_t = *mut hipGraphicsResource; extern "C" { + #[doc = " @defgroup API HIP API"] + #[doc = " @{"] + #[doc = ""] + #[doc = " Defines the HIP API. See the individual sections for more information."] + #[doc = " @defgroup Driver Initialization and Version"] + #[doc = " @{"] + #[doc = " This section describes the initializtion and version functions of HIP runtime API."] + #[doc = ""] #[doc = " @brief Explicitly initializes the HIP runtime."] #[doc = ""] #[doc = " Most HIP APIs implicitly initialize the HIP runtime."] @@ -2396,6 +2826,10 @@ extern "C" { pub fn hipDeviceTotalMem(bytes: *mut usize, device: hipDevice_t) -> hipError_t; } extern "C" { + #[doc = " @}"] + #[doc = " @defgroup Device Device Management"] + #[doc = " @{"] + #[doc = " This section describes the device management functions of HIP runtime API."] #[doc = " @brief Waits on all active streams on current device"] #[doc = ""] #[doc = " When this command is invoked, the host thread gets blocked until all the commands associated"] @@ -2718,13 +3152,40 @@ extern "C" { pub fn hipIpcCloseMemHandle(devPtr: *mut ::std::os::raw::c_void) -> hipError_t; } extern "C" { + #[doc = " @brief Gets an opaque interprocess handle for an event."] + #[doc = ""] + #[doc = " This opaque handle may be copied into other processes and opened with cudaIpcOpenEventHandle."] + #[doc = " Then cudaEventRecord, cudaEventSynchronize, cudaStreamWaitEvent and cudaEventQuery may be used in"] + #[doc = " either process. Operations on the imported event after the exported event has been freed with hipEventDestroy"] + #[doc = " will result in undefined behavior."] + #[doc = ""] + #[doc = " @param[out] handle Pointer to cudaIpcEventHandle to return the opaque event handle"] + #[doc = " @param[in] event Event allocated with cudaEventInterprocess and cudaEventDisableTiming flags"] + #[doc = ""] + #[doc = " @returns #hipSuccess, #hipErrorInvalidConfiguration, #hipErrorInvalidValue"] + #[doc = ""] pub fn hipIpcGetEventHandle(handle: *mut hipIpcEventHandle_t, event: hipEvent_t) -> hipError_t; } extern "C" { + #[doc = " @brief Opens an interprocess event handles."] + #[doc = ""] + #[doc = " Opens an interprocess event handle exported from another process with cudaIpcGetEventHandle. The returned"] + #[doc = " hipEvent_t behaves like a locally created event with the hipEventDisableTiming flag specified. This event"] + #[doc = " need be freed with hipEventDestroy. Operations on the imported event after the exported event has been freed"] + #[doc = " with hipEventDestroy will result in undefined behavior. If the function is called within the same process where"] + #[doc = " handle is returned by hipIpcGetEventHandle, it will return hipErrorInvalidContext."] + #[doc = ""] + #[doc = " @param[out] event Pointer to hipEvent_t to return the event"] + #[doc = " @param[in] handle The opaque interprocess handle to open"] + #[doc = ""] + #[doc = " @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidContext"] + #[doc = ""] pub fn hipIpcOpenEventHandle(event: *mut hipEvent_t, handle: hipIpcEventHandle_t) -> hipError_t; } extern "C" { + #[doc = " @}"] + #[doc = ""] #[doc = " @defgroup Execution Execution Control"] #[doc = " @{"] #[doc = " This section describes the execution control functions of HIP runtime API."] @@ -2777,6 +3238,12 @@ extern "C" { ) -> hipError_t; } extern "C" { + #[doc = " @}"] + #[doc = "-------------------------------------------------------------------------------------------------"] + #[doc = "-------------------------------------------------------------------------------------------------"] + #[doc = " @defgroup Error Error Handling"] + #[doc = " @{"] + #[doc = " This section describes the error handling functions of HIP runtime API."] #[doc = " @brief Return last error returned by any HIP runtime API call and resets the stored error code to"] #[doc = " #hipSuccess"] #[doc = ""] @@ -2975,6 +3442,9 @@ extern "C" { ) -> hipError_t; } extern "C" { + pub fn hipStreamGetCtx(stream: hipStream_t, pctx: *mut hipCtx_t) -> hipError_t; +} +extern "C" { #[doc = " @brief Return flags associated with this stream."] #[doc = ""] #[doc = " @param[in] stream stream to be queried"] @@ -3058,7 +3528,7 @@ pub type hipStreamCallback_t = ::std::option::Option< extern "C" { #[doc = " @brief Adds a callback to be called on the host after all currently enqueued"] #[doc = " items in the stream have completed. For each"] - #[doc = " cudaStreamAddCallback call, a callback will be executed exactly once."] + #[doc = " hipStreamAddCallback call, a callback will be executed exactly once."] #[doc = " The callback will block later work in the stream until it is finished."] #[doc = " @param[in] stream - Stream to add callback to"] #[doc = " @param[in] callback - The function to call once preceding stream operations are complete"] @@ -3077,7 +3547,13 @@ extern "C" { ) -> hipError_t; } extern "C" { - #[doc = " @brief Enqueues a wait command to the stream."] + #[doc = " @}"] + #[doc = "-------------------------------------------------------------------------------------------------"] + #[doc = "-------------------------------------------------------------------------------------------------"] + #[doc = " @defgroup Stream Memory Operations"] + #[doc = " @{"] + #[doc = " This section describes Stream Memory Wait and Write functions of HIP runtime API."] + #[doc = " @brief Enqueues a wait command to the stream.[BETA]"] #[doc = ""] #[doc = " @param [in] stream - Stream identifier"] #[doc = " @param [in] ptr - Pointer to memory object allocated using 'hipMallocSignalMemory' flag"] @@ -3102,18 +3578,21 @@ extern "C" { #[doc = " @note Support for hipStreamWaitValue32 can be queried using 'hipDeviceGetAttribute()' and"] #[doc = " 'hipDeviceAttributeCanUseStreamWaitValue' flag."] #[doc = ""] + #[doc = " @beta This API is marked as beta, meaning, while this is feature complete,"] + #[doc = " it is still open to changes and may have outstanding issues."] + #[doc = ""] #[doc = " @see hipExtMallocWithFlags, hipFree, hipStreamWaitValue64, hipStreamWriteValue64,"] #[doc = " hipStreamWriteValue32, hipDeviceGetAttribute"] pub fn hipStreamWaitValue32( stream: hipStream_t, ptr: *mut ::std::os::raw::c_void, - value: i32, + value: u32, flags: ::std::os::raw::c_uint, mask: u32, ) -> hipError_t; } extern "C" { - #[doc = " @brief Enqueues a wait command to the stream."] + #[doc = " @brief Enqueues a wait command to the stream.[BETA]"] #[doc = ""] #[doc = " @param [in] stream - Stream identifier"] #[doc = " @param [in] ptr - Pointer to memory object allocated using 'hipMallocSignalMemory' flag"] @@ -3138,18 +3617,21 @@ extern "C" { #[doc = " @note Support for hipStreamWaitValue64 can be queried using 'hipDeviceGetAttribute()' and"] #[doc = " 'hipDeviceAttributeCanUseStreamWaitValue' flag."] #[doc = ""] + #[doc = " @beta This API is marked as beta, meaning, while this is feature complete,"] + #[doc = " it is still open to changes and may have outstanding issues."] + #[doc = ""] #[doc = " @see hipExtMallocWithFlags, hipFree, hipStreamWaitValue32, hipStreamWriteValue64,"] #[doc = " hipStreamWriteValue32, hipDeviceGetAttribute"] pub fn hipStreamWaitValue64( stream: hipStream_t, ptr: *mut ::std::os::raw::c_void, - value: i64, + value: u64, flags: ::std::os::raw::c_uint, mask: u64, ) -> hipError_t; } extern "C" { - #[doc = " @brief Enqueues a write command to the stream."] + #[doc = " @brief Enqueues a write command to the stream.[BETA]"] #[doc = ""] #[doc = " @param [in] stream - Stream identifier"] #[doc = " @param [in] ptr - Pointer to a GPU accessible memory object"] @@ -3161,17 +3643,20 @@ extern "C" { #[doc = " Enqueues a write command to the stream, write operation is performed after all earlier commands"] #[doc = " on this stream have completed the execution."] #[doc = ""] + #[doc = " @beta This API is marked as beta, meaning, while this is feature complete,"] + #[doc = " it is still open to changes and may have outstanding issues."] + #[doc = ""] #[doc = " @see hipExtMallocWithFlags, hipFree, hipStreamWriteValue32, hipStreamWaitValue32,"] #[doc = " hipStreamWaitValue64"] pub fn hipStreamWriteValue32( stream: hipStream_t, ptr: *mut ::std::os::raw::c_void, - value: i32, + value: u32, flags: ::std::os::raw::c_uint, ) -> hipError_t; } extern "C" { - #[doc = " @brief Enqueues a write command to the stream."] + #[doc = " @brief Enqueues a write command to the stream.[BETA]"] #[doc = ""] #[doc = " @param [in] stream - Stream identifier"] #[doc = " @param [in] ptr - Pointer to a GPU accessible memory object"] @@ -3183,22 +3668,30 @@ extern "C" { #[doc = " Enqueues a write command to the stream, write operation is performed after all earlier commands"] #[doc = " on this stream have completed the execution."] #[doc = ""] + #[doc = " @beta This API is marked as beta, meaning, while this is feature complete,"] + #[doc = " it is still open to changes and may have outstanding issues."] + #[doc = ""] #[doc = " @see hipExtMallocWithFlags, hipFree, hipStreamWriteValue32, hipStreamWaitValue32,"] #[doc = " hipStreamWaitValue64"] pub fn hipStreamWriteValue64( stream: hipStream_t, ptr: *mut ::std::os::raw::c_void, - value: i64, + value: u64, flags: ::std::os::raw::c_uint, ) -> hipError_t; } extern "C" { + #[doc = " @}"] + #[doc = "-------------------------------------------------------------------------------------------------"] + #[doc = "-------------------------------------------------------------------------------------------------"] + #[doc = " @defgroup Event Event Management"] + #[doc = " @{"] + #[doc = " This section describes the event management functions of HIP runtime API."] #[doc = " @brief Create an event with the specified flags"] #[doc = ""] #[doc = " @param[in,out] event Returns the newly created event."] #[doc = " @param[in] flags Flags to control event behavior. Valid values are #hipEventDefault,"] #[doc = "#hipEventBlockingSync, #hipEventDisableTiming, #hipEventInterprocess"] - #[doc = ""] #[doc = " #hipEventDefault : Default flag. The event will use active synchronization and will support"] #[doc = "timing. Blocking synchronization provides lowest possible latency at the expense of dedicating a"] #[doc = "CPU to poll on the event."] @@ -3207,7 +3700,6 @@ extern "C" { #[doc = "for the synchroniation but can result in lower power and more resources for other CPU threads."] #[doc = " #hipEventDisableTiming : Disable recording of timing information. Events created with this flag"] #[doc = "would not record profiling data and provide best performance if used for synchronization."] - #[doc = ""] #[doc = " @warning On AMD platform, hipEventInterprocess support is under development. Use of this flag"] #[doc = "will return an error."] #[doc = ""] @@ -3323,6 +3815,18 @@ extern "C" { pub fn hipEventQuery(event: hipEvent_t) -> hipError_t; } extern "C" { + #[doc = " @}"] + #[doc = "-------------------------------------------------------------------------------------------------"] + #[doc = "-------------------------------------------------------------------------------------------------"] + #[doc = " @defgroup Memory Memory Management"] + #[doc = " @{"] + #[doc = " This section describes the memory management functions of HIP runtime API."] + #[doc = " The following CUDA APIs are not currently supported:"] + #[doc = " - cudaMalloc3D"] + #[doc = " - cudaMalloc3DArray"] + #[doc = " - TODO - more 2D, 3D, array APIs here."] + #[doc = ""] + #[doc = ""] #[doc = " @brief Return attributes for the specified pointer"] #[doc = ""] #[doc = " @param[out] attributes for the specified pointer"] @@ -3337,6 +3841,66 @@ extern "C" { ) -> hipError_t; } extern "C" { + #[doc = " @brief Imports an external semaphore."] + #[doc = ""] + #[doc = " @param[out] extSem_out External semaphores to be waited on"] + #[doc = " @param[in] semHandleDesc Semaphore import handle descriptor"] + #[doc = ""] + #[doc = " @return #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue"] + #[doc = ""] + #[doc = " @see"] + pub fn hipImportExternalSemaphore( + extSem_out: *mut hipExternalSemaphore_t, + semHandleDesc: *const hipExternalSemaphoreHandleDesc, + ) -> hipError_t; +} +extern "C" { + #[doc = " @brief Signals a set of external semaphore objects."] + #[doc = ""] + #[doc = " @param[in] extSem_out External semaphores to be waited on"] + #[doc = " @param[in] paramsArray Array of semaphore parameters"] + #[doc = " @param[in] numExtSems Number of semaphores to wait on"] + #[doc = " @param[in] stream Stream to enqueue the wait operations in"] + #[doc = ""] + #[doc = " @return #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue"] + #[doc = ""] + #[doc = " @see"] + pub fn hipSignalExternalSemaphoresAsync( + extSemArray: *const hipExternalSemaphore_t, + paramsArray: *const hipExternalSemaphoreSignalParams, + numExtSems: ::std::os::raw::c_uint, + stream: hipStream_t, + ) -> hipError_t; +} +extern "C" { + #[doc = " @brief Waits on a set of external semaphore objects"] + #[doc = ""] + #[doc = " @param[in] extSem_out External semaphores to be waited on"] + #[doc = " @param[in] paramsArray Array of semaphore parameters"] + #[doc = " @param[in] numExtSems Number of semaphores to wait on"] + #[doc = " @param[in] stream Stream to enqueue the wait operations in"] + #[doc = ""] + #[doc = " @return #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue"] + #[doc = ""] + #[doc = " @see"] + pub fn hipWaitExternalSemaphoresAsync( + extSemArray: *const hipExternalSemaphore_t, + paramsArray: *const hipExternalSemaphoreWaitParams, + numExtSems: ::std::os::raw::c_uint, + stream: hipStream_t, + ) -> hipError_t; +} +extern "C" { + #[doc = " @brief Destroys an external semaphore object and releases any references to the underlying resource. Any outstanding signals or waits must have completed before the semaphore is destroyed."] + #[doc = ""] + #[doc = " @param[in] extSem handle to an external memory object"] + #[doc = ""] + #[doc = " @return #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue"] + #[doc = ""] + #[doc = " @see"] + pub fn hipDestroyExternalSemaphore(extSem: hipExternalSemaphore_t) -> hipError_t; +} +extern "C" { #[doc = " @brief Imports an external memory object."] #[doc = ""] #[doc = " @param[out] extMem_out Returned handle to an external memory object"] @@ -3454,7 +4018,14 @@ extern "C" { ) -> hipError_t; } extern "C" { - #[doc = " @brief Allocates memory that will be automatically managed by AMD HMM."] + #[doc = "-------------------------------------------------------------------------------------------------"] + #[doc = "-------------------------------------------------------------------------------------------------"] + #[doc = " @addtogroup Memory Managed Memory"] + #[doc = " @{"] + #[doc = " @ingroup Memory"] + #[doc = " This section describes the managed memory management functions of HIP runtime API."] + #[doc = ""] + #[doc = " @brief Allocates memory that will be automatically managed by HIP."] #[doc = ""] #[doc = " @param [out] dev_ptr - pointer to allocated device memory"] #[doc = " @param [in] size - requested allocation size in bytes"] @@ -3469,7 +4040,7 @@ extern "C" { ) -> hipError_t; } extern "C" { - #[doc = " @brief Prefetches memory to the specified destination device using AMD HMM."] + #[doc = " @brief Prefetches memory to the specified destination device using HIP."] #[doc = ""] #[doc = " @param [in] dev_ptr pointer to be prefetched"] #[doc = " @param [in] count size in bytes for prefetching"] @@ -3485,7 +4056,7 @@ extern "C" { ) -> hipError_t; } extern "C" { - #[doc = " @brief Advise about the usage of a given memory range to AMD HMM."] + #[doc = " @brief Advise about the usage of a given memory range to HIP."] #[doc = ""] #[doc = " @param [in] dev_ptr pointer to memory to set the advice for"] #[doc = " @param [in] count size in bytes of the memory range"] @@ -3501,9 +4072,9 @@ extern "C" { ) -> hipError_t; } extern "C" { - #[doc = " @brief Query an attribute of a given memory range in AMD HMM."] + #[doc = " @brief Query an attribute of a given memory range in HIP."] #[doc = ""] - #[doc = " @param [in/out] data a pointer to a memory location where the result of each"] + #[doc = " @param [in,out] data a pointer to a memory location where the result of each"] #[doc = " attribute query will be written to"] #[doc = " @param [in] data_size the size of data"] #[doc = " @param [in] attribute the attribute to query"] @@ -3520,9 +4091,9 @@ extern "C" { ) -> hipError_t; } extern "C" { - #[doc = " @brief Query attributes of a given memory range in AMD HMM."] + #[doc = " @brief Query attributes of a given memory range in HIP."] #[doc = ""] - #[doc = " @param [in/out] data a two-dimensional array containing pointers to memory locations"] + #[doc = " @param [in,out] data a two-dimensional array containing pointers to memory locations"] #[doc = " where the result of each attribute query will be written to"] #[doc = " @param [in] data_sizes an array, containing the sizes of each result"] #[doc = " @param [in] attributes the attribute to query"] @@ -3542,24 +4113,25 @@ extern "C" { ) -> hipError_t; } extern "C" { - #[doc = " @brief Attach memory to a stream asynchronously in AMD HMM."] + #[doc = " @brief Attach memory to a stream asynchronously in HIP."] #[doc = ""] #[doc = " @param [in] stream - stream in which to enqueue the attach operation"] #[doc = " @param [in] dev_ptr - pointer to memory (must be a pointer to managed memory or"] #[doc = " to a valid host-accessible region of system-allocated memory)"] #[doc = " @param [in] length - length of memory (defaults to zero)"] - #[doc = " @param [in] flags - must be one of cudaMemAttachGlobal, cudaMemAttachHost or"] - #[doc = " cudaMemAttachSingle (defaults to cudaMemAttachSingle)"] + #[doc = " @param [in] flags - must be one of hipMemAttachGlobal, hipMemAttachHost or"] + #[doc = " hipMemAttachSingle (defaults to hipMemAttachSingle)"] #[doc = ""] #[doc = " @returns #hipSuccess, #hipErrorInvalidValue"] pub fn hipStreamAttachMemAsync( stream: hipStream_t, - dev_ptr: *mut hipDeviceptr_t, + dev_ptr: *mut ::std::os::raw::c_void, length: usize, flags: ::std::os::raw::c_uint, ) -> hipError_t; } extern "C" { + #[doc = " @}"] #[doc = " @brief Allocate device accessible page locked host memory [Deprecated]"] #[doc = ""] #[doc = " @param[out] ptr Pointer to the allocated host pinned memory"] @@ -3729,7 +4301,6 @@ extern "C" { #[doc = " @return #hipSuccess,"] #[doc = " #hipErrorInvalidValue (if pointer is invalid, including device pointers allocated with"] #[doc = "hipMalloc)"] - #[doc = ""] #[doc = " @deprecated use hipHostFree() instead"] pub fn hipFreeHost(ptr: *mut ::std::os::raw::c_void) -> hipError_t; } @@ -3923,6 +4494,18 @@ extern "C" { ) -> hipError_t; } extern "C" { + #[doc = " @brief Returns a global pointer from a module."] + #[doc = " Returns in *dptr and *bytes the pointer and size of the global of name name located in module hmod."] + #[doc = " If no variable of that name exists, it returns hipErrorNotFound. Both parameters dptr and bytes are optional."] + #[doc = " If one of them is NULL, it is ignored and hipSuccess is returned."] + #[doc = ""] + #[doc = " @param[out] dptr Returned global device pointer"] + #[doc = " @param[out] bytes Returned global size in bytes"] + #[doc = " @param[in] hmod Module to retrieve global from"] + #[doc = " @param[in] name Name of global to retrieve"] + #[doc = ""] + #[doc = " @return #hipSuccess, #hipErrorInvalidValue, #hipErrorNotFound, #hipErrorInvalidContext"] + #[doc = ""] pub fn hipModuleGetGlobal( dptr: *mut hipDeviceptr_t, bytes: *mut usize, @@ -4648,6 +5231,13 @@ extern "C" { pub fn hipDrvMemcpy3DAsync(pCopy: *const HIP_MEMCPY3D, stream: hipStream_t) -> hipError_t; } extern "C" { + #[doc = " @}"] + #[doc = "-------------------------------------------------------------------------------------------------"] + #[doc = "-------------------------------------------------------------------------------------------------"] + #[doc = " @defgroup PeerToPeer PeerToPeer Device Memory Access"] + #[doc = " @{"] + #[doc = " @warning PeerToPeer support is experimental."] + #[doc = " This section describes the PeerToPeer device memory access functions of HIP runtime API."] #[doc = " @brief Determine if a device can access a peer's memory."] #[doc = ""] #[doc = " @param [out] canAccessPeer Returns the peer access capability (0 or 1)"] @@ -4756,6 +5346,17 @@ extern "C" { ) -> hipError_t; } extern "C" { + #[doc = " @}"] + #[doc = "-------------------------------------------------------------------------------------------------"] + #[doc = "-------------------------------------------------------------------------------------------------"] + #[doc = " @defgroup Context Context Management"] + #[doc = " @{"] + #[doc = " This section describes the context management functions of HIP runtime API."] + #[doc = ""] + #[doc = " @addtogroup ContextD Context Management [Deprecated]"] + #[doc = " @{"] + #[doc = " @ingroup Context"] + #[doc = " This section describes the deprecated context management functions of HIP runtime API."] #[doc = " @brief Create a context and set it as current/ default context"] #[doc = ""] #[doc = " @param [out] ctx"] @@ -4974,6 +5575,7 @@ extern "C" { pub fn hipCtxDisablePeerAccess(peerCtx: hipCtx_t) -> hipError_t; } extern "C" { + #[doc = " @}"] #[doc = " @brief Get the state of the primary context."] #[doc = ""] #[doc = " @param [in] Device to get primary context flags for"] @@ -5042,6 +5644,12 @@ extern "C" { ) -> hipError_t; } extern "C" { + #[doc = " @}"] + #[doc = ""] + #[doc = " @defgroup Module Module Management"] + #[doc = " @{"] + #[doc = " This section describes the module management functions of HIP runtime API."] + #[doc = ""] #[doc = " @brief Loads code object from file into a hipModule_t"] #[doc = ""] #[doc = " @param [in] fname"] @@ -5217,7 +5825,7 @@ extern "C" { #[doc = " @brief Launches kernels on multiple devices where thread blocks can cooperate and"] #[doc = " synchronize as they execute."] #[doc = ""] - #[doc = " @param [in] hipLaunchParams List of launch parameters, one per device."] + #[doc = " @param [in] launchParamsList List of launch parameters, one per device."] #[doc = " @param [in] numDevices Size of the launchParamsList array."] #[doc = " @param [in] flags Flags to control launch behavior."] #[doc = ""] @@ -5245,6 +5853,12 @@ extern "C" { ) -> hipError_t; } extern "C" { + #[doc = " @}"] + #[doc = ""] + #[doc = " @defgroup Occupancy Occupancy"] + #[doc = " @{"] + #[doc = " This section describes the occupancy functions of HIP runtime API."] + #[doc = ""] #[doc = " @brief determine the grid and block sizes to achieves maximum occupancy for a kernel"] #[doc = ""] #[doc = " @param [out] gridSize minimum grid size for maximum potential occupancy"] @@ -5373,6 +5987,12 @@ extern "C" { pub fn hipProfilerStop() -> hipError_t; } extern "C" { + #[doc = " @}"] + #[doc = "-------------------------------------------------------------------------------------------------"] + #[doc = "-------------------------------------------------------------------------------------------------"] + #[doc = " @defgroup Clang Launch API to support the triple-chevron syntax"] + #[doc = " @{"] + #[doc = " This section describes the API to support the triple-chevron syntax."] #[doc = " @brief Configure a kernel launch."] #[doc = ""] #[doc = " @param [in] gridDim grid dimension specified as multiple of blockDim."] @@ -5460,6 +6080,13 @@ extern "C" { ) -> hipError_t; } extern "C" { + #[doc = " @}"] + #[doc = "-------------------------------------------------------------------------------------------------"] + #[doc = "-------------------------------------------------------------------------------------------------"] + #[doc = " @defgroup Texture Texture Management"] + #[doc = " @{"] + #[doc = " This section describes the texture management functions of HIP runtime API."] + #[doc = ""] #[doc = " @addtogroup TexturD Texture Management [Deprecated]"] #[doc = " @{"] #[doc = " @ingroup Texture"] @@ -5778,3 +6405,511 @@ extern "C" { extern "C" { pub fn hipGetStreamDeviceId(stream: hipStream_t) -> ::std::os::raw::c_int; } +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct ihipGraph { + _unused: [u8; 0], +} +#[doc = " An opaque value that represents a hip graph"] +pub type hipGraph_t = *mut ihipGraph; +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct hipGraphNode { + _unused: [u8; 0], +} +#[doc = " An opaque value that represents a hip graph node"] +pub type hipGraphNode_t = *mut hipGraphNode; +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct hipGraphExec { + _unused: [u8; 0], +} +#[doc = " An opaque value that represents a hip graph Exec"] +pub type hipGraphExec_t = *mut hipGraphExec; +impl hipGraphNodeType { + #[doc = "< GPU kernel node"] + pub const hipGraphNodeTypeKernel: hipGraphNodeType = hipGraphNodeType(1); +} +impl hipGraphNodeType { + #[doc = "< Memcpy 3D node"] + pub const hipGraphNodeTypeMemcpy: hipGraphNodeType = hipGraphNodeType(2); +} +impl hipGraphNodeType { + #[doc = "< Memset 1D node"] + pub const hipGraphNodeTypeMemset: hipGraphNodeType = hipGraphNodeType(3); +} +impl hipGraphNodeType { + #[doc = "< Host (executable) node"] + pub const hipGraphNodeTypeHost: hipGraphNodeType = hipGraphNodeType(4); +} +impl hipGraphNodeType { + #[doc = "< Node which executes an embedded graph"] + pub const hipGraphNodeTypeGraph: hipGraphNodeType = hipGraphNodeType(5); +} +impl hipGraphNodeType { + #[doc = "< Empty (no-op) node"] + pub const hipGraphNodeTypeEmpty: hipGraphNodeType = hipGraphNodeType(6); +} +impl hipGraphNodeType { + #[doc = "< External event wait node"] + pub const hipGraphNodeTypeWaitEvent: hipGraphNodeType = hipGraphNodeType(7); +} +impl hipGraphNodeType { + #[doc = "< External event record node"] + pub const hipGraphNodeTypeEventRecord: hipGraphNodeType = hipGraphNodeType(8); +} +impl hipGraphNodeType { + #[doc = "< Memcpy 1D node"] + pub const hipGraphNodeTypeMemcpy1D: hipGraphNodeType = hipGraphNodeType(9); +} +impl hipGraphNodeType { + #[doc = "< MemcpyFromSymbol node"] + pub const hipGraphNodeTypeMemcpyFromSymbol: hipGraphNodeType = hipGraphNodeType(10); +} +impl hipGraphNodeType { + #[doc = "< MemcpyToSymbol node"] + pub const hipGraphNodeTypeMemcpyToSymbol: hipGraphNodeType = hipGraphNodeType(11); +} +impl hipGraphNodeType { + pub const hipGraphNodeTypeCount: hipGraphNodeType = hipGraphNodeType(12); +} +#[repr(transparent)] +#[doc = " @brief hipGraphNodeType"] +#[doc = " @enum"] +#[doc = ""] +#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] +pub struct hipGraphNodeType(pub ::std::os::raw::c_uint); +pub type hipHostFn_t = + ::std::option::Option<unsafe extern "C" fn(userData: *mut ::std::os::raw::c_void)>; +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct hipHostNodeParams { + pub fn_: hipHostFn_t, + pub userData: *mut ::std::os::raw::c_void, +} +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct hipKernelNodeParams { + pub blockDim: dim3, + pub extra: *mut *mut ::std::os::raw::c_void, + pub func: *mut ::std::os::raw::c_void, + pub gridDim: dim3, + pub kernelParams: *mut *mut ::std::os::raw::c_void, + pub sharedMemBytes: ::std::os::raw::c_uint, +} +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct hipMemsetParams { + pub dst: *mut ::std::os::raw::c_void, + pub elementSize: ::std::os::raw::c_uint, + pub height: usize, + pub pitch: usize, + pub value: ::std::os::raw::c_uint, + pub width: usize, +} +impl hipGraphExecUpdateResult { + #[doc = "< The update succeeded"] + pub const hipGraphExecUpdateSuccess: hipGraphExecUpdateResult = hipGraphExecUpdateResult(0); +} +impl hipGraphExecUpdateResult { + #[doc = "< The update failed for an unexpected reason which is described"] + #[doc = "< in the return value of the function"] + pub const hipGraphExecUpdateError: hipGraphExecUpdateResult = hipGraphExecUpdateResult(1); +} +impl hipGraphExecUpdateResult { + #[doc = "< The update failed because the topology changed"] + pub const hipGraphExecUpdateErrorTopologyChanged: hipGraphExecUpdateResult = + hipGraphExecUpdateResult(2); +} +impl hipGraphExecUpdateResult { + #[doc = "< The update failed because a node type changed"] + pub const hipGraphExecUpdateErrorNodeTypeChanged: hipGraphExecUpdateResult = + hipGraphExecUpdateResult(3); +} +impl hipGraphExecUpdateResult { + pub const hipGraphExecUpdateErrorFunctionChanged: hipGraphExecUpdateResult = + hipGraphExecUpdateResult(4); +} +impl hipGraphExecUpdateResult { + pub const hipGraphExecUpdateErrorParametersChanged: hipGraphExecUpdateResult = + hipGraphExecUpdateResult(5); +} +impl hipGraphExecUpdateResult { + pub const hipGraphExecUpdateErrorNotSupported: hipGraphExecUpdateResult = + hipGraphExecUpdateResult(6); +} +impl hipGraphExecUpdateResult { + pub const hipGraphExecUpdateErrorUnsupportedFunctionChange: hipGraphExecUpdateResult = + hipGraphExecUpdateResult(7); +} +#[repr(transparent)] +#[doc = " @brief hipGraphExecUpdateResult"] +#[doc = " @enum"] +#[doc = ""] +#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] +pub struct hipGraphExecUpdateResult(pub ::std::os::raw::c_uint); +impl hipStreamCaptureMode { + pub const hipStreamCaptureModeGlobal: hipStreamCaptureMode = hipStreamCaptureMode(0); +} +impl hipStreamCaptureMode { + pub const hipStreamCaptureModeThreadLocal: hipStreamCaptureMode = hipStreamCaptureMode(1); +} +impl hipStreamCaptureMode { + pub const hipStreamCaptureModeRelaxed: hipStreamCaptureMode = hipStreamCaptureMode(2); +} +#[repr(transparent)] +#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] +pub struct hipStreamCaptureMode(pub ::std::os::raw::c_uint); +impl hipStreamCaptureStatus { + #[doc = "< Stream is not capturing"] + pub const hipStreamCaptureStatusNone: hipStreamCaptureStatus = hipStreamCaptureStatus(0); +} +impl hipStreamCaptureStatus { + #[doc = "< Stream is actively capturing"] + pub const hipStreamCaptureStatusActive: hipStreamCaptureStatus = hipStreamCaptureStatus(1); +} +impl hipStreamCaptureStatus { + #[doc = "< Stream is part of a capture sequence that has been"] + #[doc = "< invalidated, but not terminated"] + pub const hipStreamCaptureStatusInvalidated: hipStreamCaptureStatus = hipStreamCaptureStatus(2); +} +#[repr(transparent)] +#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] +pub struct hipStreamCaptureStatus(pub ::std::os::raw::c_uint); +extern "C" { + pub fn hipStreamBeginCapture(stream: hipStream_t, mode: hipStreamCaptureMode) -> hipError_t; +} +extern "C" { + pub fn hipStreamEndCapture(stream: hipStream_t, pGraph: *mut hipGraph_t) -> hipError_t; +} +extern "C" { + #[doc = "-------------------------------------------------------------------------------------------------"] + #[doc = "-------------------------------------------------------------------------------------------------"] + #[doc = " @defgroup Graph Graph Management"] + #[doc = " @{"] + #[doc = " This section describes the graph management functions of HIP runtime API."] + #[doc = " @brief Creates a graph"] + #[doc = ""] + #[doc = " @param [out] pGraph - pointer to graph to create."] + #[doc = " @param [in] flags - flags for graph creation, must be 0."] + #[doc = ""] + #[doc = " @returns #hipSuccess."] + #[doc = ""] + pub fn hipGraphCreate(pGraph: *mut hipGraph_t, flags: ::std::os::raw::c_uint) -> hipError_t; +} +extern "C" { + #[doc = " @brief Destroys a graph"] + #[doc = ""] + #[doc = " @param [in] graph - instance of graph to destroy."] + #[doc = ""] + #[doc = " @returns #hipSuccess."] + #[doc = ""] + pub fn hipGraphDestroy(graph: hipGraph_t) -> hipError_t; +} +extern "C" { + #[doc = " @brief Destroys an executable graph"] + #[doc = ""] + #[doc = " @param [in] pGraphExec - instance of executable graph to destry."] + #[doc = ""] + #[doc = " @returns #hipSuccess."] + #[doc = ""] + pub fn hipGraphExecDestroy(pGraphExec: hipGraphExec_t) -> hipError_t; +} +extern "C" { + #[doc = " @brief Creates an executable graph from a graph"] + #[doc = ""] + #[doc = " @param [out] pGraphExec - pointer to instantiated executable graph to create."] + #[doc = " @param [in] graph - instance of graph to instantiate."] + #[doc = " @param [out] pErrorNode - pointer to error node in case error occured in graph instantiation,"] + #[doc = " it could modify the correponding node."] + #[doc = " @param [out] pLogBuffer - pointer to log buffer."] + #[doc = " @param [in] bufferSize - the size of log buffer."] + #[doc = ""] + #[doc = " @returns #hipSuccess, #hipErrorOutOfMemory."] + #[doc = ""] + pub fn hipGraphInstantiate( + pGraphExec: *mut hipGraphExec_t, + graph: hipGraph_t, + pErrorNode: *mut hipGraphNode_t, + pLogBuffer: *mut ::std::os::raw::c_char, + bufferSize: usize, + ) -> hipError_t; +} +extern "C" { + #[doc = " @brief launches an executable graph in a stream"] + #[doc = ""] + #[doc = " @param [in] graphExec - instance of executable graph to launch."] + #[doc = " @param [in] stream - instance of stream in which to launch executable graph."] + #[doc = " @returns #hipSuccess, #hipErrorOutOfMemory, #hipErrorInvalidHandle, #hipErrorInvalidValue"] + #[doc = ""] + pub fn hipGraphLaunch(graphExec: hipGraphExec_t, stream: hipStream_t) -> hipError_t; +} +extern "C" { + #[doc = " @brief Creates a kernel execution node and adds it to a graph."] + #[doc = ""] + #[doc = " @param [out] pGraphNode - pointer to graph node to create."] + #[doc = " @param [in,out] graph - instance of graph to add the created node."] + #[doc = " @param [in] pDependencies - pointer to the dependencies on the kernel execution node."] + #[doc = " @param [in] numDependencies - the number of the dependencies."] + #[doc = " @param [in] pNodeParams - pointer to the parameters to the kernel execution node on the GPU."] + #[doc = " @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidDeviceFunction"] + #[doc = ""] + pub fn hipGraphAddKernelNode( + pGraphNode: *mut hipGraphNode_t, + graph: hipGraph_t, + pDependencies: *const hipGraphNode_t, + numDependencies: usize, + pNodeParams: *const hipKernelNodeParams, + ) -> hipError_t; +} +extern "C" { + #[doc = " @brief Creates a memcpy node and adds it to a graph."] + #[doc = ""] + #[doc = " @param [out] pGraphNode - pointer to graph node to create."] + #[doc = " @param [in,out] graph - instance of graph to add the created node."] + #[doc = " @param [in] pDependencies - const pointer to the dependencies on the kernel execution node."] + #[doc = " @param [in] numDependencies - the number of the dependencies."] + #[doc = " @param [in] pCopyParams - const pointer to the parameters for the memory copy."] + #[doc = " @returns #hipSuccess, #hipErrorInvalidValue"] + #[doc = ""] + pub fn hipGraphAddMemcpyNode( + pGraphNode: *mut hipGraphNode_t, + graph: hipGraph_t, + pDependencies: *const hipGraphNode_t, + numDependencies: usize, + pCopyParams: *const hipMemcpy3DParms, + ) -> hipError_t; +} +extern "C" { + #[doc = " @brief Creates a 1D memcpy node and adds it to a graph."] + #[doc = ""] + #[doc = " @param [out] pGraphNode - pointer to graph node to create."] + #[doc = " @param [in,out] graph - instance of the graph to add the created node."] + #[doc = " @param [in] pDependencies - const pointer to the dependencies on the kernel execution node."] + #[doc = " @param [in] numDependencies - the number of the dependencies."] + #[doc = " @param [in] dst - pointer to memory address to the destination."] + #[doc = " @param [in] src - pointer to memory address to the source."] + #[doc = " @param [in] count - the size of the memory to copy."] + #[doc = " @param [in] kind - the type of memory copy."] + #[doc = " @returns #hipSuccess, #hipErrorInvalidValue"] + #[doc = ""] + pub fn hipGraphAddMemcpyNode1D( + pGraphNode: *mut hipGraphNode_t, + graph: hipGraph_t, + pDependencies: *const hipGraphNode_t, + numDependencies: usize, + dst: *mut ::std::os::raw::c_void, + src: *const ::std::os::raw::c_void, + count: usize, + kind: hipMemcpyKind, + ) -> hipError_t; +} +extern "C" { + #[doc = " @brief Creates a memset node and adds it to a graph."] + #[doc = ""] + #[doc = " @param [out] pGraphNode - pointer to the graph node to create."] + #[doc = " @param [in,out] graph - instance of the graph to add the created node."] + #[doc = " @param [in] pDependencies - const pointer to the dependencies on the kernel execution node."] + #[doc = " @param [in] numDependencies - the number of the dependencies."] + #[doc = " @param [in] pMemsetParams - const pointer to the parameters for the memory set."] + #[doc = " @returns #hipSuccess, #hipErrorInvalidValue"] + #[doc = ""] + pub fn hipGraphAddMemsetNode( + pGraphNode: *mut hipGraphNode_t, + graph: hipGraph_t, + pDependencies: *const hipGraphNode_t, + numDependencies: usize, + pMemsetParams: *const hipMemsetParams, + ) -> hipError_t; +} +extern "C" { + #[doc = " @brief Returns graph nodes."] + #[doc = ""] + #[doc = " @param [in] graph - instance of graph to get the nodes."] + #[doc = " @param [out] nodes - pointer to the graph nodes."] + #[doc = " @param [out] numNodes - the number of graph nodes."] + #[doc = " @returns #hipSuccess, #hipErrorInvalidValue"] + #[doc = ""] + pub fn hipGraphGetNodes( + graph: hipGraph_t, + nodes: *mut hipGraphNode_t, + numNodes: *mut usize, + ) -> hipError_t; +} +extern "C" { + #[doc = " @brief Returns graph's root nodes."] + #[doc = ""] + #[doc = " @param [in] graph - instance of the graph to get the nodes."] + #[doc = " @param [out] pRootNodes - pointer to the graph's root nodes."] + #[doc = " @param [out] pNumRootNodes - the number of graph's root nodes."] + #[doc = " @returns #hipSuccess, #hipErrorInvalidValue"] + #[doc = ""] + pub fn hipGraphGetRootNodes( + graph: hipGraph_t, + pRootNodes: *mut hipGraphNode_t, + pNumRootNodes: *mut usize, + ) -> hipError_t; +} +extern "C" { + #[doc = " @brief Gets kernel node's parameters."] + #[doc = ""] + #[doc = " @param [in] node - instance of the node to get parameters from."] + #[doc = " @param [out] pNodeParams - pointer to the parameters"] + #[doc = " @returns #hipSuccess, #hipErrorInvalidValue"] + #[doc = ""] + pub fn hipGraphKernelNodeGetParams( + node: hipGraphNode_t, + pNodeParams: *mut hipKernelNodeParams, + ) -> hipError_t; +} +extern "C" { + #[doc = " @brief Sets a kernel node's parameters."] + #[doc = ""] + #[doc = " @param [in] node - instance of the node to set parameters to."] + #[doc = " @param [in] pNodeParams - const pointer to the parameters."] + #[doc = " @returns #hipSuccess, #hipErrorInvalidValue"] + #[doc = ""] + pub fn hipGraphKernelNodeSetParams( + node: hipGraphNode_t, + pNodeParams: *const hipKernelNodeParams, + ) -> hipError_t; +} +extern "C" { + #[doc = " @brief Gets a memcpy node's parameters."] + #[doc = ""] + #[doc = " @param [in] node - instance of the node to get parameters from."] + #[doc = " @param [out] pNodeParams - pointer to the parameters."] + #[doc = " @returns #hipSuccess, #hipErrorInvalidValue"] + #[doc = ""] + pub fn hipGraphMemcpyNodeGetParams( + node: hipGraphNode_t, + pNodeParams: *mut hipMemcpy3DParms, + ) -> hipError_t; +} +extern "C" { + #[doc = " @brief Sets a memcpy node's parameters."] + #[doc = ""] + #[doc = " @param [in] node - instance of the node to set parameters to."] + #[doc = " @param [in] pNodeParams - const pointer to the parameters."] + #[doc = " @returns #hipSuccess, #hipErrorInvalidValue"] + #[doc = ""] + pub fn hipGraphMemcpyNodeSetParams( + node: hipGraphNode_t, + pNodeParams: *const hipMemcpy3DParms, + ) -> hipError_t; +} +extern "C" { + #[doc = " @brief Gets a memset node's parameters."] + #[doc = ""] + #[doc = " @param [in] node - instane of the node to get parameters from."] + #[doc = " @param [out] pNodeParams - pointer to the parameters."] + #[doc = " @returns #hipSuccess, #hipErrorInvalidValue"] + #[doc = ""] + pub fn hipGraphMemsetNodeGetParams( + node: hipGraphNode_t, + pNodeParams: *mut hipMemsetParams, + ) -> hipError_t; +} +extern "C" { + #[doc = " @brief Sets a memset node's parameters."] + #[doc = ""] + #[doc = " @param [in] node - instance of the node to set parameters to."] + #[doc = " @param [out] pNodeParams - pointer to the parameters."] + #[doc = " @returns #hipSuccess, #hipErrorInvalidValue"] + #[doc = ""] + pub fn hipGraphMemsetNodeSetParams( + node: hipGraphNode_t, + pNodeParams: *const hipMemsetParams, + ) -> hipError_t; +} +extern "C" { + #[doc = " @brief Sets the parameters for a kernel node in the given graphExec."] + #[doc = ""] + #[doc = " @param [in] hGraphExec - instance of the executable graph with the node."] + #[doc = " @param [in] node - instance of the node to set parameters to."] + #[doc = " @param [in] pNodeParams - const pointer to the kernel node parameters."] + #[doc = " @returns #hipSuccess, #hipErrorInvalidValue"] + #[doc = ""] + pub fn hipGraphExecKernelNodeSetParams( + hGraphExec: hipGraphExec_t, + node: hipGraphNode_t, + pNodeParams: *const hipKernelNodeParams, + ) -> hipError_t; +} +extern "C" { + #[doc = " @brief Adds dependency edges to a graph."] + #[doc = ""] + #[doc = " @param [in] graph - instance of the graph to add dependencies."] + #[doc = " @param [in] from - pointer to the graph nodes with dependenties to add from."] + #[doc = " @param [in] to - pointer to the graph nodes to add dependenties to."] + #[doc = " @param [in] numDependencies - the number of dependencies to add."] + #[doc = " @returns #hipSuccess, #hipErrorInvalidValue"] + #[doc = ""] + pub fn hipGraphAddDependencies( + graph: hipGraph_t, + from: *const hipGraphNode_t, + to: *const hipGraphNode_t, + numDependencies: usize, + ) -> hipError_t; +} +extern "C" { + #[doc = " @brief Creates an empty node and adds it to a graph."] + #[doc = ""] + #[doc = " @param [out] pGraphNode - pointer to the graph node to create and add to the graph."] + #[doc = " @param [in,out] graph - instane of the graph the node is add to."] + #[doc = " @param [in] pDependencies - const pointer to the node dependenties."] + #[doc = " @param [in] numDependencies - the number of dependencies."] + #[doc = " @returns #hipSuccess, #hipErrorInvalidValue"] + #[doc = ""] + pub fn hipGraphAddEmptyNode( + pGraphNode: *mut hipGraphNode_t, + graph: hipGraph_t, + pDependencies: *const hipGraphNode_t, + numDependencies: usize, + ) -> hipError_t; +} +#[doc = "-------------------------------------------------------------------------------------------------"] +#[doc = "-------------------------------------------------------------------------------------------------"] +#[doc = " @defgroup GL Interop"] +#[doc = " @{"] +#[doc = " This section describes Stream Memory Wait and Write functions of HIP runtime API."] +pub type GLuint = ::std::os::raw::c_uint; +extern "C" { + pub fn hipGLGetDevices( + pHipDeviceCount: *mut ::std::os::raw::c_uint, + pHipDevices: *mut ::std::os::raw::c_int, + hipDeviceCount: ::std::os::raw::c_uint, + deviceList: hipGLDeviceList, + ) -> hipError_t; +} +extern "C" { + pub fn hipGraphicsGLRegisterBuffer( + resource: *mut *mut hipGraphicsResource, + buffer: GLuint, + flags: ::std::os::raw::c_uint, + ) -> hipError_t; +} +extern "C" { + pub fn hipGraphicsMapResources( + count: ::std::os::raw::c_int, + resources: *mut hipGraphicsResource_t, + stream: hipStream_t, + ) -> hipError_t; +} +extern "C" { + pub fn hipGraphicsResourceGetMappedPointer( + devPtr: *mut *mut ::std::os::raw::c_void, + size: *mut usize, + resource: hipGraphicsResource_t, + ) -> hipError_t; +} +extern "C" { + pub fn hipGraphicsUnmapResources( + count: ::std::os::raw::c_int, + resources: *mut hipGraphicsResource_t, + stream: hipStream_t, + ) -> hipError_t; +} +extern "C" { + pub fn hipGraphicsUnregisterResource(resource: hipGraphicsResource_t) -> hipError_t; +} diff --git a/zluda/src/cuda.rs b/zluda/src/cuda.rs index 7eec241..634c0df 100644 --- a/zluda/src/cuda.rs +++ b/zluda/src/cuda.rs @@ -2234,7 +2234,7 @@ pub extern "system" fn cuDeviceGetName( #[cfg_attr(not(test), no_mangle)] pub extern "system" fn cuDeviceGetUuid(uuid: *mut CUuuid, dev: CUdevice) -> CUresult { - r#impl::device::get_uuid(uuid, dev.decuda()).encuda() + r#impl::device::get_uuid(uuid, dev.0).encuda() } #[cfg_attr(not(test), no_mangle)] @@ -2243,7 +2243,7 @@ pub extern "system" fn cuDeviceGetLuid( deviceNodeMask: *mut ::std::os::raw::c_uint, dev: CUdevice, ) -> CUresult { - r#impl::device::get_luid(luid, deviceNodeMask, dev.decuda()).encuda() + r#impl::device::get_luid(luid, deviceNodeMask, dev.0).encuda() } #[cfg_attr(not(test), no_mangle)] @@ -2284,8 +2284,11 @@ pub extern "system" fn cuDeviceComputeCapability( } #[cfg_attr(not(test), no_mangle)] -pub extern "system" fn cuDevicePrimaryCtxRetain(pctx: *mut CUcontext, dev: CUdevice) -> CUresult { - r#impl::device::primary_ctx_retain(pctx.decuda(), dev.decuda()).encuda() +pub extern "system" fn cuDevicePrimaryCtxRetain( + pctx: *mut CUcontext, + CUdevice(dev): CUdevice, +) -> CUresult { + unsafe { hipDevicePrimaryCtxRetain(pctx as _, dev).into() } } #[cfg_attr(not(test), no_mangle)] @@ -2294,8 +2297,8 @@ pub extern "system" fn cuDevicePrimaryCtxRelease(dev: CUdevice) -> CUresult { } #[cfg_attr(not(test), no_mangle)] -pub extern "system" fn cuDevicePrimaryCtxRelease_v2(dev: CUdevice) -> CUresult { - r#impl::device::primary_ctx_release_v2(dev.decuda()) +pub extern "system" fn cuDevicePrimaryCtxRelease_v2(CUdevice(dev): CUdevice) -> CUresult { + unsafe { hipDevicePrimaryCtxRelease(dev).into() } } #[cfg_attr(not(test), no_mangle)] @@ -2316,11 +2319,11 @@ pub extern "system" fn cuDevicePrimaryCtxSetFlags_v2( #[cfg_attr(not(test), no_mangle)] pub extern "system" fn cuDevicePrimaryCtxGetState( - dev: CUdevice, + CUdevice(dev): CUdevice, flags: *mut ::std::os::raw::c_uint, active: *mut ::std::os::raw::c_int, ) -> CUresult { - r#impl::device::primary_ctx_get_state(dev.decuda(), flags, active).encuda() + unsafe { hipDevicePrimaryCtxGetState(dev, flags, active).into() } } #[cfg_attr(not(test), no_mangle)] @@ -2337,39 +2340,39 @@ pub extern "system" fn cuDevicePrimaryCtxReset_v2(dev: CUdevice) -> CUresult { pub extern "system" fn cuCtxCreate_v2( pctx: *mut CUcontext, flags: ::std::os::raw::c_uint, - dev: CUdevice, + CUdevice(dev): CUdevice, ) -> CUresult { - r#impl::context::create_v2(pctx.decuda(), flags, dev.decuda()).encuda() + unsafe { hipCtxCreate(pctx as _, flags, dev).into() } } #[cfg_attr(not(test), no_mangle)] pub extern "system" fn cuCtxDestroy_v2(ctx: CUcontext) -> CUresult { - r#impl::context::destroy_v2(ctx.decuda()).encuda() + unsafe { hipCtxDestroy(ctx as _).into() } } #[cfg_attr(not(test), no_mangle)] pub extern "system" fn cuCtxPushCurrent_v2(ctx: CUcontext) -> CUresult { - r#impl::context::push_current_v2(ctx.decuda()) + unsafe { hipCtxPushCurrent(ctx as _).into() } } #[cfg_attr(not(test), no_mangle)] pub extern "system" fn cuCtxPopCurrent_v2(pctx: *mut CUcontext) -> CUresult { - r#impl::context::pop_current_v2(pctx.decuda()) + unsafe { hipCtxPopCurrent(pctx as _).into() } } #[cfg_attr(not(test), no_mangle)] pub extern "system" fn cuCtxSetCurrent(ctx: CUcontext) -> CUresult { - r#impl::context::set_current(ctx.decuda()) + unsafe { hipCtxSetCurrent(ctx as _).into() } } #[cfg_attr(not(test), no_mangle)] pub extern "system" fn cuCtxGetCurrent(pctx: *mut CUcontext) -> CUresult { - r#impl::context::get_current(pctx.decuda()).encuda() + unsafe { hipCtxGetCurrent(pctx as _).into() } } #[cfg_attr(not(test), no_mangle)] pub extern "system" fn cuCtxGetDevice(device: *mut CUdevice) -> CUresult { - r#impl::context::get_device(device.decuda()).encuda() + unsafe { hipCtxGetDevice(device as _).into() } } #[cfg_attr(not(test), no_mangle)] @@ -2379,7 +2382,7 @@ pub extern "system" fn cuCtxGetFlags(flags: *mut ::std::os::raw::c_uint) -> CUre #[cfg_attr(not(test), no_mangle)] pub extern "system" fn cuCtxSynchronize() -> CUresult { - r#impl::context::synchronize().encuda() + unsafe { hipCtxSynchronize().into() } } #[cfg_attr(not(test), no_mangle)] @@ -2417,7 +2420,7 @@ pub extern "system" fn cuCtxGetApiVersion( ctx: CUcontext, version: *mut ::std::os::raw::c_uint, ) -> CUresult { - r#impl::context::get_api_version(ctx.decuda(), version).encuda() + unsafe { hipCtxGetApiVersion(ctx as _, version as _).into() } } #[cfg_attr(not(test), no_mangle)] @@ -2438,12 +2441,12 @@ pub extern "system" fn cuCtxAttach( pctx: *mut CUcontext, flags: ::std::os::raw::c_uint, ) -> CUresult { - r#impl::context::attach(pctx.decuda(), flags).encuda() + r#impl::unimplemented() } #[cfg_attr(not(test), no_mangle)] pub extern "system" fn cuCtxDetach(ctx: CUcontext) -> CUresult { - r#impl::context::detach(ctx.decuda()).encuda() + r#impl::unimplemented() } #[cfg_attr(not(test), no_mangle)] @@ -2451,7 +2454,7 @@ pub extern "system" fn cuModuleLoad( module: *mut CUmodule, fname: *const ::std::os::raw::c_char, ) -> CUresult { - r#impl::module::load(module.decuda(), fname).encuda() + unsafe { hipModuleLoad(module as _, fname as _).into() } } #[cfg_attr(not(test), no_mangle)] @@ -2459,7 +2462,7 @@ pub extern "system" fn cuModuleLoadData( module: *mut CUmodule, image: *const ::std::os::raw::c_void, ) -> CUresult { - r#impl::module::load_data(module.decuda(), image).encuda() + unsafe { hipModuleLoadData(module as _, image as _).into() } } // TODO: parse jit options @@ -2471,7 +2474,16 @@ pub extern "system" fn cuModuleLoadDataEx( options: *mut CUjit_option, optionValues: *mut *mut ::std::os::raw::c_void, ) -> CUresult { - r#impl::module::load_data(module.decuda(), image).encuda() + unsafe { + hipModuleLoadDataEx( + module as _, + image as _, + numOptions, + options as _, + optionValues, + ) + .into() + } } #[cfg_attr(not(test), no_mangle)] @@ -2484,7 +2496,7 @@ pub extern "system" fn cuModuleLoadFatBinary( #[cfg_attr(not(test), no_mangle)] pub extern "system" fn cuModuleUnload(hmod: CUmodule) -> CUresult { - r#impl::module::unload(hmod.decuda()).encuda() + unsafe { hipModuleUnload(hmod as _).into() } } #[cfg_attr(not(test), no_mangle)] @@ -2493,7 +2505,7 @@ pub extern "system" fn cuModuleGetFunction( hmod: CUmodule, name: *const ::std::os::raw::c_char, ) -> CUresult { - r#impl::module::get_function(hfunc.decuda(), hmod.decuda(), name).encuda() + unsafe { hipModuleGetFunction(hfunc as _, hmod as _, name).into() } } #[cfg_attr(not(test), no_mangle)] @@ -2581,7 +2593,7 @@ pub extern "system" fn cuMemGetInfo_v2(free: *mut usize, total: *mut usize) -> C #[cfg_attr(not(test), no_mangle)] pub extern "system" fn cuMemAlloc_v2(dptr: *mut CUdeviceptr, bytesize: usize) -> CUresult { - r#impl::memory::alloc_v2(dptr.decuda(), bytesize).encuda() + unsafe { hipMalloc(dptr as _, bytesize).into() } } #[cfg_attr(not(test), no_mangle)] @@ -2597,7 +2609,7 @@ pub extern "system" fn cuMemAllocPitch_v2( #[cfg_attr(not(test), no_mangle)] pub extern "system" fn cuMemFree_v2(dptr: CUdeviceptr) -> CUresult { - r#impl::memory::free_v2(dptr.decuda()).encuda() + unsafe { hipFree(dptr.0 as _).into() } } #[cfg_attr(not(test), no_mangle)] @@ -2757,7 +2769,7 @@ pub extern "system" fn cuMemcpyHtoD_v2( srcHost: *const ::std::os::raw::c_void, ByteCount: usize, ) -> CUresult { - r#impl::memory::copy_v2(dstDevice.decuda(), srcHost, ByteCount).encuda() + unsafe { hipMemcpyHtoD(dstDevice.0 as _, srcHost as _, ByteCount).into() } } // TODO: implement default stream semantics @@ -2767,7 +2779,7 @@ pub extern "system" fn cuMemcpyHtoD_v2_ptds( srcHost: *const ::std::os::raw::c_void, ByteCount: usize, ) -> CUresult { - r#impl::memory::copy_v2(dstDevice.decuda(), srcHost, ByteCount).encuda() + cuMemcpyHtoD_v2(dstDevice, srcHost, ByteCount) } #[cfg_attr(not(test), no_mangle)] @@ -2776,7 +2788,7 @@ pub extern "system" fn cuMemcpyDtoH_v2( srcDevice: CUdeviceptr, ByteCount: usize, ) -> CUresult { - r#impl::memory::copy_v2(dstHost, srcDevice.decuda(), ByteCount).encuda() + unsafe { hipMemcpyDtoH(dstHost as _, srcDevice.0 as _, ByteCount).into() } } // TODO: implement default stream semantics @@ -2786,7 +2798,7 @@ pub extern "system" fn cuMemcpyDtoH_v2_ptds( srcDevice: CUdeviceptr, ByteCount: usize, ) -> CUresult { - r#impl::memory::copy_v2(dstHost, srcDevice.decuda(), ByteCount).encuda() + cuMemcpyDtoH_v2(dstHost, srcDevice, ByteCount) } #[cfg_attr(not(test), no_mangle)] @@ -2973,7 +2985,7 @@ pub extern "system" fn cuMemsetD8_v2( uc: ::std::os::raw::c_uchar, N: usize, ) -> CUresult { - r#impl::memory::set_d8_v2(dstDevice.decuda(), uc, N).encuda() + unsafe { hipMemsetD8(dstDevice.0 as _, uc, N).into() } } // TODO: implement default stream semantics @@ -2983,7 +2995,7 @@ pub extern "system" fn cuMemsetD8_v2_ptds( uc: ::std::os::raw::c_uchar, N: usize, ) -> CUresult { - r#impl::memory::set_d8_v2(dstDevice.decuda(), uc, N).encuda() + cuMemsetD8_v2(dstDevice, uc, N) } #[cfg_attr(not(test), no_mangle)] @@ -3001,7 +3013,7 @@ pub extern "system" fn cuMemsetD32_v2( ui: ::std::os::raw::c_uint, N: usize, ) -> CUresult { - r#impl::memory::set_d32_v2(dstDevice.decuda(), ui, N).encuda() + unsafe { hipMemsetD32(dstDevice.0 as _, ui as _, N).into() } } // TODO: implement default stream semantics @@ -3011,7 +3023,7 @@ pub extern "system" fn cuMemsetD32_v2_ptds( ui: ::std::os::raw::c_uint, N: usize, ) -> CUresult { - r#impl::memory::set_d32_v2(dstDevice.decuda(), ui, N).encuda() + cuMemsetD32_v2(dstDevice, ui, N) } #[cfg_attr(not(test), no_mangle)] @@ -3359,7 +3371,7 @@ pub extern "system" fn cuStreamCreate( phStream: *mut CUstream, Flags: ::std::os::raw::c_uint, ) -> CUresult { - r#impl::stream::create(phStream.decuda(), Flags).encuda() + unsafe { hipStreamCreateWithFlags(phStream as _, Flags) }.into() } #[cfg_attr(not(test), no_mangle)] @@ -3389,13 +3401,13 @@ pub extern "system" fn cuStreamGetFlags( #[cfg_attr(not(test), no_mangle)] pub extern "system" fn cuStreamGetCtx(hStream: CUstream, pctx: *mut CUcontext) -> CUresult { - r#impl::stream::get_ctx(hStream.decuda(), pctx.decuda()).encuda() + unsafe { hipStreamGetCtx(hStream as _, pctx as _) }.into() } // TODO: implement default stream semantics #[cfg_attr(not(test), no_mangle)] pub extern "system" fn cuStreamGetCtx_ptsz(hStream: CUstream, pctx: *mut CUcontext) -> CUresult { - r#impl::stream::get_ctx(hStream.decuda(), pctx.decuda()).encuda() + cuStreamGetCtx(hStream, pctx) } #[cfg_attr(not(test), no_mangle)] @@ -3471,12 +3483,12 @@ pub extern "system" fn cuStreamQuery(hStream: CUstream) -> CUresult { #[cfg_attr(not(test), no_mangle)] pub extern "system" fn cuStreamSynchronize(hStream: CUstream) -> CUresult { - r#impl::stream::synchronize(hStream.decuda()).encuda() + unsafe { hipStreamSynchronize(hStream as _) }.into() } #[cfg_attr(not(test), no_mangle)] pub extern "system" fn cuStreamDestroy_v2(hStream: CUstream) -> CUresult { - r#impl::stream::destroy_v2(hStream.decuda()).encuda() + unsafe { hipStreamDestroy(hStream as _) }.into() } #[cfg_attr(not(test), no_mangle)] @@ -3659,7 +3671,7 @@ pub extern "system" fn cuFuncGetAttribute( attrib: CUfunction_attribute, hfunc: CUfunction, ) -> CUresult { - r#impl::function::get_attribute(pi, attrib, hfunc.decuda()).encuda() + r#impl::function::get_attribute(pi, attrib, hfunc).into() } #[cfg_attr(not(test), no_mangle)] @@ -3698,20 +3710,7 @@ pub extern "system" fn cuLaunchKernel( kernelParams: *mut *mut ::std::os::raw::c_void, extra: *mut *mut ::std::os::raw::c_void, ) -> CUresult { - r#impl::function::launch_kernel( - f.decuda(), - gridDimX, - gridDimY, - gridDimZ, - blockDimX, - blockDimY, - blockDimZ, - sharedMemBytes, - hStream.decuda(), - kernelParams, - extra, - ) - .encuda() + todo!() } // TODO: implement default stream semantics @@ -3729,20 +3728,7 @@ pub extern "system" fn cuLaunchKernel_ptsz( kernelParams: *mut *mut ::std::os::raw::c_void, extra: *mut *mut ::std::os::raw::c_void, ) -> CUresult { - r#impl::function::launch_kernel( - f.decuda(), - gridDimX, - gridDimY, - gridDimZ, - blockDimX, - blockDimY, - blockDimZ, - sharedMemBytes, - hStream.decuda(), - kernelParams, - extra, - ) - .encuda() + todo!() } #[cfg_attr(not(test), no_mangle)] @@ -3786,7 +3772,7 @@ pub extern "system" fn cuFuncSetBlockShape( y: ::std::os::raw::c_int, z: ::std::os::raw::c_int, ) -> CUresult { - r#impl::function::set_block_shape(hfunc.decuda(), x, y, z).encuda() + r#impl::unimplemented() } #[cfg_attr(not(test), no_mangle)] diff --git a/zluda/src/impl/context.rs b/zluda/src/impl/context.rs deleted file mode 100644 index ed3f90c..0000000 --- a/zluda/src/impl/context.rs +++ /dev/null @@ -1,374 +0,0 @@ -use super::{device, stream::Stream, stream::StreamData, HasLivenessCookie, LiveCheck}; -use super::{transmute_lifetime_mut, CUresult, GlobalState}; -use crate::{cuda::CUcontext, cuda_impl}; -use std::{cell::RefCell, num::NonZeroU32, os::raw::c_uint, ptr, sync::atomic::AtomicU32}; -use std::{ - collections::HashSet, - mem::{self}, -}; - -thread_local! { - pub static CONTEXT_STACK: RefCell<Vec<*mut Context>> = RefCell::new(Vec::new()); -} - -pub type Context = LiveCheck<ContextData>; - -impl HasLivenessCookie for ContextData { - #[cfg(target_pointer_width = "64")] - const COOKIE: usize = 0x5f0119560b643ffb; - - #[cfg(target_pointer_width = "32")] - const COOKIE: usize = 0x0b643ffb; - - const LIVENESS_FAIL: CUresult = CUresult::CUDA_ERROR_INVALID_CONTEXT; - - fn try_drop(&mut self) -> Result<(), CUresult> { - for stream in self.streams.iter() { - let stream = unsafe { &mut **stream }; - stream.context = ptr::null_mut(); - Stream::destroy_impl(unsafe { Stream::ptr_from_inner(stream) })?; - } - Ok(()) - } -} - -enum ContextRefCount { - Primary, - NonPrimary(NonZeroU32), -} - -impl ContextRefCount { - fn new(is_primary: bool) -> Self { - if is_primary { - ContextRefCount::Primary - } else { - ContextRefCount::NonPrimary(unsafe { NonZeroU32::new_unchecked(1) }) - } - } - - fn incr(&mut self) -> Result<(), CUresult> { - match self { - ContextRefCount::Primary => Ok(()), - ContextRefCount::NonPrimary(c) => { - let (new_count, overflow) = c.get().overflowing_add(1); - if overflow { - Err(CUresult::CUDA_ERROR_INVALID_VALUE) - } else { - *c = unsafe { NonZeroU32::new_unchecked(new_count) }; - Ok(()) - } - } - } - } - - #[must_use] - fn decr(&mut self) -> bool { - match self { - ContextRefCount::Primary => false, - ContextRefCount::NonPrimary(c) => { - if c.get() == 1 { - return true; - } - *c = unsafe { NonZeroU32::new_unchecked(c.get() - 1) }; - false - } - } - } -} - -pub struct ContextData { - pub flags: AtomicU32, - // This pointer is null only for a moment when constructing primary context - pub device: *mut device::Device, - ref_count: ContextRefCount, - pub default_stream: StreamData, - pub streams: HashSet<*mut StreamData>, - // All the fields below are here to support internal CUDA driver API - pub cuda_manager: *mut cuda_impl::rt::ContextStateManager, - pub cuda_state: *mut cuda_impl::rt::ContextState, - pub cuda_dtor_cb: Option< - extern "system" fn( - CUcontext, - *mut cuda_impl::rt::ContextStateManager, - *mut cuda_impl::rt::ContextState, - ), - >, -} - -impl ContextData { - pub fn new( - flags: c_uint, - is_primary: bool, - dev: *mut device::Device, - ) -> Result<Self, CUresult> { - let default_stream = StreamData::new_unitialized()?; - Ok(ContextData { - flags: AtomicU32::new(flags), - device: dev, - ref_count: ContextRefCount::new(is_primary), - default_stream, - streams: HashSet::new(), - cuda_manager: ptr::null_mut(), - cuda_state: ptr::null_mut(), - cuda_dtor_cb: None, - }) - } -} - -impl Context { - pub fn late_init(&mut self) { - let ctx_data: &'static mut _ = { - let this = self.as_option_mut().unwrap(); - let result = { unsafe { transmute_lifetime_mut(this) } }; - drop(this); - result - }; - { self.as_option_mut().unwrap() } - .default_stream - .late_init(ctx_data); - } -} - -pub fn create_v2( - pctx: *mut *mut Context, - flags: u32, - dev_idx: device::Index, -) -> Result<(), CUresult> { - if pctx == ptr::null_mut() { - return Err(CUresult::CUDA_ERROR_INVALID_VALUE); - } - let mut ctx_box = GlobalState::lock_device(dev_idx, |dev| { - let dev_ptr = dev as *mut _; - let mut ctx_box = Box::new(LiveCheck::new(ContextData::new( - flags, - false, - dev_ptr as *mut _, - )?)); - ctx_box.late_init(); - Ok::<_, CUresult>(ctx_box) - })??; - let ctx_ref = ctx_box.as_mut() as *mut Context; - unsafe { *pctx = ctx_ref }; - mem::forget(ctx_box); - CONTEXT_STACK.with(|stack| stack.borrow_mut().push(ctx_ref)); - Ok(()) -} - -pub fn destroy_v2(ctx: *mut Context) -> Result<(), CUresult> { - if ctx == ptr::null_mut() { - return Err(CUresult::CUDA_ERROR_INVALID_VALUE); - } - CONTEXT_STACK.with(|stack| { - let mut stack = stack.borrow_mut(); - let should_pop = match stack.last() { - Some(active_ctx) => *active_ctx == (ctx as *mut _), - None => false, - }; - if should_pop { - stack.pop(); - } - }); - GlobalState::lock(|_| Context::destroy_impl(ctx))? -} - -pub(crate) fn push_current_v2(pctx: *mut Context) -> CUresult { - if pctx == ptr::null_mut() { - return CUresult::CUDA_ERROR_INVALID_VALUE; - } - CONTEXT_STACK.with(|stack| stack.borrow_mut().push(pctx)); - CUresult::CUDA_SUCCESS -} - -pub fn pop_current_v2(pctx: *mut *mut Context) -> CUresult { - if pctx == ptr::null_mut() { - return CUresult::CUDA_ERROR_INVALID_VALUE; - } - let mut ctx = CONTEXT_STACK.with(|stack| stack.borrow_mut().pop()); - let ctx_ptr = match &mut ctx { - Some(ctx) => *ctx as *mut _, - None => return CUresult::CUDA_ERROR_INVALID_CONTEXT, - }; - unsafe { *pctx = ctx_ptr }; - CUresult::CUDA_SUCCESS -} - -pub fn get_current(pctx: *mut *mut Context) -> Result<(), CUresult> { - if pctx == ptr::null_mut() { - return Err(CUresult::CUDA_ERROR_INVALID_VALUE); - } - let ctx = CONTEXT_STACK.with(|stack| match stack.borrow().last() { - Some(ctx) => *ctx as *mut _, - None => ptr::null_mut(), - }); - unsafe { *pctx = ctx }; - Ok(()) -} - -pub fn set_current(ctx: *mut Context) -> CUresult { - if ctx == ptr::null_mut() { - CONTEXT_STACK.with(|stack| stack.borrow_mut().pop()); - CUresult::CUDA_SUCCESS - } else { - CONTEXT_STACK.with(|stack| stack.borrow_mut().push(ctx)); - CUresult::CUDA_SUCCESS - } -} - -pub fn get_api_version(ctx: *mut Context, version: *mut u32) -> Result<(), CUresult> { - if ctx == ptr::null_mut() { - return Err(CUresult::CUDA_ERROR_INVALID_VALUE); - } - GlobalState::lock(|_| { - unsafe { &*ctx }.as_result()?; - Ok::<_, CUresult>(()) - })??; - //TODO: query device for properties roughly matching CUDA API version - unsafe { *version = 1100 }; - Ok(()) -} - -pub fn get_device(dev: *mut device::Index) -> Result<(), CUresult> { - let dev_idx = GlobalState::lock_current_context(|ctx| unsafe { &*ctx.device }.index)?; - unsafe { *dev = dev_idx }; - Ok(()) -} - -pub fn attach(pctx: *mut *mut Context, _flags: c_uint) -> Result<(), CUresult> { - if pctx == ptr::null_mut() { - return Err(CUresult::CUDA_ERROR_INVALID_VALUE); - } - let ctx = GlobalState::lock_current_context_unchecked(|unchecked_ctx| { - let ctx = unchecked_ctx.as_result_mut()?; - ctx.ref_count.incr()?; - Ok::<_, CUresult>(unchecked_ctx as *mut _) - })??; - unsafe { *pctx = ctx }; - Ok(()) -} - -pub fn detach(pctx: *mut Context) -> Result<(), CUresult> { - if pctx == ptr::null_mut() { - return Err(CUresult::CUDA_ERROR_INVALID_VALUE); - } - GlobalState::lock_current_context_unchecked(|unchecked_ctx| { - let ctx = unchecked_ctx.as_result_mut()?; - if ctx.ref_count.decr() { - Context::destroy_impl(unchecked_ctx)?; - } - Ok::<_, CUresult>(()) - })? -} - -pub(crate) fn synchronize() -> Result<(), CUresult> { - GlobalState::lock_current_context(|ctx| { - ctx.default_stream.synchronize()?; - for stream in ctx.streams.iter().copied() { - unsafe { &mut *stream }.synchronize()?; - } - Ok(()) - })? -} - -#[cfg(test)] -mod test { - use super::super::test::CudaDriverFns; - use super::super::CUresult; - use std::{ffi::c_void, ptr}; - - cuda_driver_test!(destroy_leaves_zombie_context); - - fn destroy_leaves_zombie_context<T: CudaDriverFns>() { - assert_eq!(T::cuInit(0), CUresult::CUDA_SUCCESS); - let mut ctx1 = ptr::null_mut(); - let mut ctx2 = ptr::null_mut(); - let mut ctx3 = ptr::null_mut(); - assert_eq!(T::cuCtxCreate_v2(&mut ctx1, 0, 0), CUresult::CUDA_SUCCESS); - assert_eq!(T::cuCtxCreate_v2(&mut ctx2, 0, 0), CUresult::CUDA_SUCCESS); - assert_eq!(T::cuCtxCreate_v2(&mut ctx3, 0, 0), CUresult::CUDA_SUCCESS); - assert_eq!(T::cuCtxDestroy_v2(ctx2), CUresult::CUDA_SUCCESS); - let mut popped_ctx1 = ptr::null_mut(); - assert_eq!( - T::cuCtxPopCurrent_v2(&mut popped_ctx1), - CUresult::CUDA_SUCCESS - ); - assert_eq!(popped_ctx1, ctx3); - let mut popped_ctx2 = ptr::null_mut(); - assert_eq!( - T::cuCtxPopCurrent_v2(&mut popped_ctx2), - CUresult::CUDA_SUCCESS - ); - assert_eq!(popped_ctx2, ctx2); - let mut popped_ctx3 = ptr::null_mut(); - assert_eq!( - T::cuCtxPopCurrent_v2(&mut popped_ctx3), - CUresult::CUDA_SUCCESS - ); - assert_eq!(popped_ctx3, ctx1); - let mut temp = 0; - assert_eq!( - T::cuCtxGetApiVersion(ctx2, &mut temp), - CUresult::CUDA_ERROR_INVALID_CONTEXT - ); - assert_eq!( - T::cuCtxPopCurrent_v2(&mut ptr::null_mut()), - CUresult::CUDA_ERROR_INVALID_CONTEXT - ); - } - - cuda_driver_test!(empty_pop_fails); - - fn empty_pop_fails<T: CudaDriverFns>() { - assert_eq!(T::cuInit(0), CUresult::CUDA_SUCCESS); - let mut ctx = ptr::null_mut(); - assert_eq!( - T::cuCtxPopCurrent_v2(&mut ctx), - CUresult::CUDA_ERROR_INVALID_CONTEXT - ); - } - - cuda_driver_test!(destroy_pops_top_of_stack); - - fn destroy_pops_top_of_stack<T: CudaDriverFns>() { - assert_eq!(T::cuInit(0), CUresult::CUDA_SUCCESS); - let mut ctx1 = ptr::null_mut(); - let mut ctx2 = ptr::null_mut(); - assert_eq!(T::cuCtxCreate_v2(&mut ctx1, 0, 0), CUresult::CUDA_SUCCESS); - assert_eq!(T::cuCtxCreate_v2(&mut ctx2, 0, 0), CUresult::CUDA_SUCCESS); - assert_eq!(T::cuCtxDestroy_v2(ctx2), CUresult::CUDA_SUCCESS); - let mut popped_ctx1 = ptr::null_mut(); - assert_eq!( - T::cuCtxPopCurrent_v2(&mut popped_ctx1), - CUresult::CUDA_SUCCESS - ); - assert_eq!(popped_ctx1, ctx1); - let mut popped_ctx2 = ptr::null_mut(); - assert_eq!( - T::cuCtxPopCurrent_v2(&mut popped_ctx2), - CUresult::CUDA_ERROR_INVALID_CONTEXT - ); - } - - cuda_driver_test!(double_destroy_fails); - - fn double_destroy_fails<T: CudaDriverFns>() { - assert_eq!(T::cuInit(0), CUresult::CUDA_SUCCESS); - let mut ctx = ptr::null_mut(); - assert_eq!(T::cuCtxCreate_v2(&mut ctx, 0, 0), CUresult::CUDA_SUCCESS); - assert_eq!(T::cuCtxDestroy_v2(ctx), CUresult::CUDA_SUCCESS); - let destroy_result = T::cuCtxDestroy_v2(ctx); - // original CUDA impl returns randomly one or the other - assert!( - destroy_result == CUresult::CUDA_ERROR_INVALID_CONTEXT - || destroy_result == CUresult::CUDA_ERROR_CONTEXT_IS_DESTROYED - ); - } - - cuda_driver_test!(no_current_on_init); - - fn no_current_on_init<T: CudaDriverFns>() { - assert_eq!(T::cuInit(0), CUresult::CUDA_SUCCESS); - let mut ctx = 1 as *mut c_void; - assert_eq!(T::cuCtxGetCurrent(&mut ctx), CUresult::CUDA_SUCCESS); - assert_eq!(ctx, ptr::null_mut()); - } -} diff --git a/zluda/src/impl/device.rs b/zluda/src/impl/device.rs index 2ca7251..bf0545d 100644 --- a/zluda/src/impl/device.rs +++ b/zluda/src/impl/device.rs @@ -1,4 +1,4 @@ -use super::{context, transmute_lifetime, transmute_lifetime_mut, CUresult, GlobalState}; +use super::{transmute_lifetime, transmute_lifetime_mut, CUresult}; use crate::cuda; use cuda::{CUdevice_attribute, CUuuid_st}; use hip_runtime_sys::{ @@ -19,124 +19,6 @@ use std::{ const PROJECT_URL_SUFFIX_SHORT: &'static str = " [ZLUDA]"; const PROJECT_URL_SUFFIX_LONG: &'static str = " [github.com/vosen/ZLUDA]"; -#[repr(transparent)] -#[derive(Clone, Copy, Eq, PartialEq, Hash)] -pub struct Index(pub c_int); - -pub struct Device { - pub index: Index, - pub ocl_base: ocl_core::DeviceId, - pub default_queue: ocl_core::CommandQueue, - pub ocl_context: ocl_core::Context, - pub primary_context: context::Context, - pub allocations: HashSet<*mut c_void>, - pub is_amd: bool, - pub name: String, -} - -unsafe impl Send for Device {} - -impl Device { - pub fn new( - platform: ocl_core::PlatformId, - ocl_dev: ocl_core::DeviceId, - idx: usize, - is_amd: bool, - ) -> Result<Self, CUresult> { - let mut props = ocl_core::ContextProperties::new(); - props.set_platform(platform); - let ctx = ocl_core::create_context(Some(&props), &[ocl_dev], None, None)?; - let queue = ocl_core::create_command_queue(&ctx, ocl_dev, None)?; - let primary_context = - context::Context::new(context::ContextData::new(0, true, ptr::null_mut())?); - let props = ocl_core::get_device_info(ocl_dev, ocl_core::DeviceInfo::Name)?; - let name = if let ocl_core::DeviceInfoResult::Name(name) = props { - Ok(name) - } else { - Err(CUresult::CUDA_ERROR_UNKNOWN) - }?; - Ok(Self { - index: Index(idx as c_int), - ocl_base: ocl_dev, - default_queue: queue, - ocl_context: ctx, - primary_context, - allocations: HashSet::new(), - is_amd, - name, - }) - } - - pub fn late_init(&mut self) { - self.primary_context.as_option_mut().unwrap().device = self as *mut _; - } -} - -pub fn get_count(count: *mut c_int) -> Result<(), CUresult> { - let len = GlobalState::lock(|state| state.devices.len())?; - unsafe { *count = len as c_int }; - Ok(()) -} - -pub fn get(device: *mut Index, ordinal: c_int) -> Result<(), CUresult> { - if device == ptr::null_mut() || ordinal < 0 { - return Err(CUresult::CUDA_ERROR_INVALID_VALUE); - } - let len = GlobalState::lock(|state| state.devices.len())?; - if ordinal < (len as i32) { - unsafe { *device = Index(ordinal) }; - Ok(()) - } else { - Err(CUresult::CUDA_ERROR_INVALID_VALUE) - } -} - -pub fn get_name(name: *mut c_char, len: i32, dev_idx: Index) -> Result<(), CUresult> { - if name == ptr::null_mut() || len < 0 { - return Err(CUresult::CUDA_ERROR_INVALID_VALUE); - } - let name_string = GlobalState::lock_device(dev_idx, |dev| dev.name.clone())?; - let mut dst_null_pos = cmp::min((len - 1) as usize, name_string.len()); - unsafe { std::ptr::copy_nonoverlapping(name_string.as_ptr() as *const _, name, dst_null_pos) }; - if name_string.len() + PROJECT_URL_SUFFIX_LONG.len() < (len as usize) { - unsafe { - std::ptr::copy_nonoverlapping( - PROJECT_URL_SUFFIX_LONG.as_ptr(), - name.add(name_string.len()) as *mut _, - PROJECT_URL_SUFFIX_LONG.len(), - ) - }; - dst_null_pos += PROJECT_URL_SUFFIX_LONG.len(); - } else if name_string.len() + PROJECT_URL_SUFFIX_SHORT.len() < (len as usize) { - unsafe { - std::ptr::copy_nonoverlapping( - PROJECT_URL_SUFFIX_SHORT.as_ptr(), - name.add(name_string.len()) as *mut _, - PROJECT_URL_SUFFIX_SHORT.len(), - ) - }; - dst_null_pos += PROJECT_URL_SUFFIX_SHORT.len(); - } - unsafe { *(name.add(dst_null_pos)) = 0 }; - Ok(()) -} - -pub fn total_mem_v2(bytes: *mut usize, dev_idx: Index) -> Result<(), CUresult> { - if bytes == ptr::null_mut() { - return Err(CUresult::CUDA_ERROR_INVALID_VALUE); - } - let mem_size = GlobalState::lock_device(dev_idx, |dev| { - let props = ocl_core::get_device_info(dev.ocl_base, ocl_core::DeviceInfo::GlobalMemSize)?; - if let ocl_core::DeviceInfoResult::GlobalMemSize(mem_size) = props { - Ok(mem_size) - } else { - Err(CUresult::CUDA_ERROR_UNKNOWN) - } - })??; - unsafe { *bytes = mem_size as usize }; - Ok(()) -} - #[allow(warnings)] trait hipDeviceAttribute_t_ext { const hipDeviceAttributeMaximumTexture1DWidth: hipDeviceAttribute_t = @@ -420,7 +302,7 @@ pub fn get_attribute(pi: *mut i32, attrib: CUdevice_attribute, dev_idx: c_int) - unsafe { hipDeviceGetAttribute(pi, hip_attrib, dev_idx) } } -pub fn get_uuid(uuid: *mut CUuuid_st, _: Index) -> Result<(), CUresult> { +pub fn get_uuid(uuid: *mut CUuuid_st, _dev_idx: c_int) -> Result<(), CUresult> { unsafe { *uuid = CUuuid_st { bytes: mem::zeroed(), @@ -433,45 +315,9 @@ pub fn get_uuid(uuid: *mut CUuuid_st, _: Index) -> Result<(), CUresult> { pub fn get_luid( luid: *mut c_char, dev_node_mask: *mut c_uint, - _dev_idx: Index, + _dev_idx: c_int, ) -> Result<(), CUresult> { unsafe { ptr::write_bytes(luid, 0u8, 8) }; unsafe { *dev_node_mask = 0 }; Ok(()) } - -pub fn primary_ctx_get_state( - dev_idx: Index, - flags: *mut u32, - active: *mut i32, -) -> Result<(), CUresult> { - let (is_active, flags_value) = GlobalState::lock_device(dev_idx, |dev| { - // This is safe because primary context can't be dropped - let ctx_ptr = &mut dev.primary_context as *mut _; - let flags_ptr = - (&unsafe { dev.primary_context.as_ref_unchecked() }.flags) as *const AtomicU32; - let is_active = context::CONTEXT_STACK - .with(|stack| stack.borrow().last().map(|x| *x)) - .map(|current| current == ctx_ptr) - .unwrap_or(false); - let flags_value = unsafe { &*flags_ptr }.load(Ordering::Relaxed); - Ok::<_, CUresult>((is_active, flags_value)) - })??; - unsafe { *active = if is_active { 1 } else { 0 } }; - unsafe { *flags = flags_value }; - Ok(()) -} - -pub fn primary_ctx_retain( - pctx: *mut *mut context::Context, - dev_idx: Index, -) -> Result<(), CUresult> { - let ctx_ptr = GlobalState::lock_device(dev_idx, |dev| &mut dev.primary_context as *mut _)?; - unsafe { *pctx = ctx_ptr }; - Ok(()) -} - -// TODO: allow for retain/reset/release of primary context -pub(crate) fn primary_ctx_release_v2(_dev_idx: Index) -> CUresult { - CUresult::CUDA_SUCCESS -} diff --git a/zluda/src/impl/export_table.rs b/zluda/src/impl/export_table.rs index 00df754..5734f05 100644 --- a/zluda/src/impl/export_table.rs +++ b/zluda/src/impl/export_table.rs @@ -1,3 +1,10 @@ +use hip_runtime_sys::{
+ hipCtxCreate, hipDevicePrimaryCtxGetState, hipDevicePrimaryCtxRelease,
+ hipDevicePrimaryCtxRetain, hipError_t,
+};
+
+use crate::r#impl;
+
use crate::cuda::CUresult;
use crate::r#impl::os;
use crate::{
@@ -5,7 +12,8 @@ use crate::{ cuda_impl,
};
-use super::{context, context::ContextData, device, module, Decuda, Encuda, GlobalState};
+use super::{device, Decuda, Encuda};
+use std::collections::HashMap;
use std::os::raw::{c_uint, c_ulong, c_ushort};
use std::{
ffi::{c_void, CStr},
@@ -125,16 +133,21 @@ static CUDART_INTERFACE_VTABLE: [VTableEntry; CUDART_INTERFACE_LENGTH] = [ ];
unsafe extern "system" fn cudart_interface_fn1(pctx: *mut CUcontext, dev: CUdevice) -> CUresult {
- cudart_interface_fn1_impl(pctx.decuda(), dev.decuda()).encuda()
+ cudart_interface_fn1_impl(pctx, dev.0).into()
}
-fn cudart_interface_fn1_impl(
- pctx: *mut *mut context::Context,
- dev: device::Index,
-) -> Result<(), CUresult> {
- let ctx_ptr = GlobalState::lock_device(dev, |d| &mut d.primary_context as *mut _)?;
- unsafe { *pctx = ctx_ptr };
- Ok(())
+fn cudart_interface_fn1_impl(pctx: *mut CUcontext, dev: c_int) -> hipError_t {
+ let mut hip_ctx = ptr::null_mut();
+ let err = unsafe { hipDevicePrimaryCtxRetain(&mut hip_ctx, dev) };
+ if err != hipError_t::hipSuccess {
+ return err;
+ }
+ let err = unsafe { hipDevicePrimaryCtxRelease(dev) };
+ if err != hipError_t::hipSuccess {
+ return err;
+ }
+ unsafe { *pctx = hip_ctx as _ };
+ hipError_t::hipSuccess
}
/*
@@ -219,7 +232,7 @@ unsafe extern "system" fn get_module_from_cubin( {
return CUresult::CUDA_ERROR_INVALID_VALUE;
}
- let result = result.decuda();
+ //let result = result.decuda();
let fatbin_header = (*fatbinc_wrapper).data;
if (*fatbin_header).magic != FATBIN_MAGIC || (*fatbin_header).version != FATBIN_VERSION {
return CUresult::CUDA_ERROR_INVALID_VALUE;
@@ -240,6 +253,8 @@ unsafe extern "system" fn get_module_from_cubin( },
Err(_) => continue,
};
+ todo!()
+ /*
let module = module::SpirvModule::new(kernel_text_string);
match module {
Ok(module) => {
@@ -251,6 +266,7 @@ unsafe extern "system" fn get_module_from_cubin( }
Err(_) => continue,
}
+ */
}
CUresult::CUDA_ERROR_COMPAT_NOT_SUPPORTED_ON_DEVICE
}
@@ -359,12 +375,20 @@ unsafe extern "system" fn context_local_storage_ctor( ),
>,
) -> CUresult {
- context_local_storage_ctor_impl(cu_ctx.decuda(), mgr, ctx_state, dtor_cb).encuda()
+ context_local_storage_ctor_impl(cu_ctx, mgr, ctx_state, dtor_cb);
+ CUresult::CUDA_SUCCESS
+}
+
+struct ContextRuntimeData {
+ ctx_state: *mut cuda_impl::rt::ContextState,
+ state_mgr: *mut cuda_impl::rt::ContextStateManager,
}
+static mut PRIVATE_CONTEXT_RUNTIME_DATA: Option<HashMap<CUcontext, ContextRuntimeData>> = None;
+
fn context_local_storage_ctor_impl(
- cu_ctx: *mut context::Context,
- mgr: *mut cuda_impl::rt::ContextStateManager,
+ cu_ctx: CUcontext,
+ state_mgr: *mut cuda_impl::rt::ContextStateManager,
ctx_state: *mut cuda_impl::rt::ContextState,
dtor_cb: Option<
extern "system" fn(
@@ -373,12 +397,15 @@ fn context_local_storage_ctor_impl( *mut cuda_impl::rt::ContextState,
),
>,
-) -> Result<(), CUresult> {
- lock_context(cu_ctx, |ctx: &mut ContextData| {
- ctx.cuda_manager = mgr;
- ctx.cuda_state = ctx_state;
- ctx.cuda_dtor_cb = dtor_cb;
- })
+) {
+ let map = unsafe { PRIVATE_CONTEXT_RUNTIME_DATA.get_or_insert_with(|| HashMap::new()) };
+ map.insert(
+ cu_ctx,
+ ContextRuntimeData {
+ ctx_state,
+ state_mgr,
+ },
+ );
}
// some kind of dtor
@@ -391,34 +418,24 @@ unsafe extern "system" fn context_local_storage_get_state( cu_ctx: CUcontext,
state_mgr: *mut cuda_impl::rt::ContextStateManager,
) -> CUresult {
- context_local_storage_get_state_impl(ctx_state, cu_ctx.decuda(), state_mgr).encuda()
+ context_local_storage_get_state_impl(ctx_state, cu_ctx, state_mgr).encuda()
}
fn context_local_storage_get_state_impl(
ctx_state: *mut *mut cuda_impl::rt::ContextState,
- cu_ctx: *mut context::Context,
+ cu_ctx: CUcontext,
_: *mut cuda_impl::rt::ContextStateManager,
-) -> Result<(), CUresult> {
- let cuda_state = lock_context(cu_ctx, |ctx: &mut ContextData| ctx.cuda_state)?;
- if cuda_state == ptr::null_mut() {
- Err(CUresult::CUDA_ERROR_INVALID_VALUE)
- } else {
- unsafe { *ctx_state = cuda_state };
- Ok(())
- }
-}
-
-fn lock_context<T>(
- cu_ctx: *mut context::Context,
- fn_impl: impl FnOnce(&mut ContextData) -> T,
-) -> Result<T, CUresult> {
- if cu_ctx == ptr::null_mut() {
- GlobalState::lock_current_context(fn_impl)
- } else {
- GlobalState::lock(|_| {
- let ctx = unsafe { &mut *cu_ctx }.as_result_mut()?;
- Ok(fn_impl(ctx))
- })?
+) -> CUresult {
+ match unsafe {
+ PRIVATE_CONTEXT_RUNTIME_DATA
+ .as_ref()
+ .and_then(|map| map.get(&cu_ctx))
+ } {
+ Some(val) => {
+ unsafe { *ctx_state = val.ctx_state };
+ CUresult::CUDA_SUCCESS
+ }
+ None => CUresult::CUDA_ERROR_INVALID_VALUE,
}
}
@@ -446,7 +463,7 @@ extern "system" fn ctx_create_v2_bypass( flags: ::std::os::raw::c_uint,
dev: CUdevice,
) -> CUresult {
- context::create_v2(pctx.decuda(), flags, dev.decuda()).encuda()
+ unsafe { hipCtxCreate(pctx as _, flags, dev.0).into() }
}
const HEAP_ACCESS_GUID: CUuuid = CUuuid {
@@ -483,41 +500,10 @@ unsafe extern "system" fn heap_alloc( arg1: usize,
arg2: usize,
) -> CUresult {
- if halloc_ptr == ptr::null_mut() {
- return CUresult::CUDA_ERROR_INVALID_VALUE;
- }
- let halloc = GlobalState::lock(|global_state| {
- let halloc = os::heap_alloc(global_state.global_heap, mem::size_of::<HeapAllocRecord>())
- as *mut HeapAllocRecord;
- if halloc == ptr::null_mut() {
- return Err(CUresult::CUDA_ERROR_OUT_OF_MEMORY);
- }
- (*halloc).arg1 = arg1;
- (*halloc).arg2 = arg2;
- (*halloc)._unknown = 0;
- (*halloc).global_heap = global_state.global_heap;
- Ok(halloc)
- });
- match halloc {
- Ok(Ok(halloc)) => {
- *halloc_ptr = halloc;
- CUresult::CUDA_SUCCESS
- }
- Err(err) | Ok(Err(err)) => err,
- }
+ r#impl::unimplemented()
}
// TODO: reverse and implement for Linux
unsafe extern "system" fn heap_free(halloc: *mut HeapAllocRecord, arg1: *mut usize) -> CUresult {
- if halloc == ptr::null_mut() {
- return CUresult::CUDA_ERROR_INVALID_VALUE;
- }
- if arg1 != ptr::null_mut() {
- *arg1 = (*halloc).arg2;
- }
- GlobalState::lock(|global_state| {
- os::heap_free(global_state.global_heap, halloc as *mut _);
- ()
- })
- .encuda()
+ r#impl::unimplemented()
}
diff --git a/zluda/src/impl/function.rs b/zluda/src/impl/function.rs index 1f756ee..8470620 100644 --- a/zluda/src/impl/function.rs +++ b/zluda/src/impl/function.rs @@ -1,244 +1,28 @@ -use ocl_core::DeviceId; +use hip_runtime_sys::{hipError_t, hipFuncGetAttributes}; -use super::{stream::Stream, CUresult, GlobalState, HasLivenessCookie, LiveCheck}; -use crate::cuda::CUfunction_attribute; +use super::{CUresult, HasLivenessCookie, LiveCheck}; +use crate::cuda::{CUfunction, CUfunction_attribute}; use ::std::os::raw::{c_uint, c_void}; -use std::{hint, mem, ptr}; - -const CU_LAUNCH_PARAM_END: *mut c_void = 0 as *mut _; -const CU_LAUNCH_PARAM_BUFFER_POINTER: *mut c_void = 1 as *mut _; -const CU_LAUNCH_PARAM_BUFFER_SIZE: *mut c_void = 2 as *mut _; - -pub type Function = LiveCheck<FunctionData>; - -impl HasLivenessCookie for FunctionData { - #[cfg(target_pointer_width = "64")] - const COOKIE: usize = 0x5e2ab14d5840678e; - - #[cfg(target_pointer_width = "32")] - const COOKIE: usize = 0x33e6a1e6; - - const LIVENESS_FAIL: CUresult = CUresult::CUDA_ERROR_INVALID_HANDLE; - - fn try_drop(&mut self) -> Result<(), CUresult> { - Ok(()) - } -} - -pub struct FunctionData { - pub base: ocl_core::Kernel, - pub device: ocl_core::DeviceId, - pub arg_size: Vec<(usize, bool)>, - pub use_shared_mem: bool, - pub legacy_args: LegacyArguments, -} - -pub struct LegacyArguments { - block_shape: Option<(i32, i32, i32)>, -} - -impl LegacyArguments { - pub fn new() -> Self { - LegacyArguments { block_shape: None } - } - - #[allow(dead_code)] - pub fn is_initialized(&self) -> bool { - self.block_shape.is_some() - } - - pub fn reset(&mut self) { - self.block_shape = None; - } -} - -unsafe fn set_arg( - kernel: &ocl_core::Kernel, - arg_index: usize, - arg_size: usize, - arg_value: *const c_void, - is_mem: bool, -) -> Result<(), CUresult> { - if is_mem { - let error = 0; - unsafe { - ocl_core::ffi::clSetKernelArgSVMPointer( - kernel.as_ptr(), - arg_index as u32, - *(arg_value as *const _), - ) - }; - if error != 0 { - panic!("clSetKernelArgSVMPointer"); - } - } else { - unsafe { - ocl_core::set_kernel_arg( - kernel, - arg_index as u32, - ocl_core::ArgVal::from_raw(arg_size, arg_value, is_mem), - )?; - }; - } - Ok(()) -} - -pub fn launch_kernel( - f: *mut Function, - grid_dim_x: c_uint, - grid_dim_y: c_uint, - grid_dim_z: c_uint, - block_dim_x: c_uint, - block_dim_y: c_uint, - block_dim_z: c_uint, - shared_mem_bytes: c_uint, - hstream: *mut Stream, - kernel_params: *mut *mut c_void, - extra: *mut *mut c_void, -) -> Result<(), CUresult> { - if f == ptr::null_mut() - || (kernel_params == ptr::null_mut() && extra == ptr::null_mut()) - || (kernel_params != ptr::null_mut() && extra != ptr::null_mut()) - { - return Err(CUresult::CUDA_ERROR_INVALID_VALUE); - } - GlobalState::lock_stream(hstream, |stream_data| { - let dev = unsafe { &mut *(*stream_data.context).device }; - let queue = stream_data.cmd_list.as_ref().unwrap(); - let func: &mut FunctionData = unsafe { &mut *f }.as_result_mut()?; - if kernel_params != ptr::null_mut() { - for (i, &(arg_size, is_mem)) in func.arg_size.iter().enumerate() { - unsafe { set_arg(&func.base, i, arg_size, *kernel_params.add(i), is_mem)? }; - } - } else { - let mut offset = 0; - let mut buffer_ptr = None; - let mut buffer_size = None; - loop { - match unsafe { *extra.add(offset) } { - CU_LAUNCH_PARAM_END => break, - CU_LAUNCH_PARAM_BUFFER_POINTER => { - buffer_ptr = Some(unsafe { *extra.add(offset + 1) as *mut u8 }); - } - CU_LAUNCH_PARAM_BUFFER_SIZE => { - buffer_size = Some(unsafe { *(*extra.add(offset + 1) as *mut usize) }); - } - _ => return Err(CUresult::CUDA_ERROR_INVALID_VALUE), - } - offset += 2; - } - match (buffer_size, buffer_ptr) { - (Some(buffer_size), Some(buffer_ptr)) => { - let sum_of_kernel_argument_sizes = - func.arg_size.iter().fold(0, |offset, &(size_of_arg, _)| { - size_of_arg + round_up_to_multiple(offset, size_of_arg) - }); - if buffer_size < sum_of_kernel_argument_sizes { - return Err(CUresult::CUDA_ERROR_INVALID_VALUE); - } - let mut offset = 0; - for (i, &(arg_size, is_mem)) in func.arg_size.iter().enumerate() { - let buffer_offset = round_up_to_multiple(offset, arg_size); - unsafe { - set_arg( - &func.base, - i, - arg_size, - buffer_ptr.add(buffer_offset) as *const _, - is_mem, - )? - }; - offset = buffer_offset + arg_size; - } - } - _ => return Err(CUresult::CUDA_ERROR_INVALID_VALUE), - } - } - if func.use_shared_mem { - unsafe { - set_arg( - &func.base, - func.arg_size.len(), - shared_mem_bytes as usize, - ptr::null(), - false, - )? - }; - } - let buffers = dev.allocations.iter().copied().collect::<Vec<_>>(); - let err = unsafe { - ocl_core::ffi::clSetKernelExecInfo( - func.base.as_ptr(), - ocl_core::ffi::CL_KERNEL_EXEC_INFO_SVM_PTRS, - buffers.len() * mem::size_of::<*mut c_void>(), - buffers.as_ptr() as *const _, - ) - }; - assert_eq!(err, 0); - let global_dims = [ - (block_dim_x * grid_dim_x) as usize, - (block_dim_y * grid_dim_y) as usize, - (block_dim_z * grid_dim_z) as usize, - ]; - unsafe { - ocl_core::enqueue_kernel::<&mut ocl_core::Event, ocl_core::Event>( - queue, - &func.base, - 3, - None, - &global_dims, - Some([ - block_dim_x as usize, - block_dim_y as usize, - block_dim_z as usize, - ]), - None, - None, - )? - }; - Ok::<_, CUresult>(()) - })? -} - -fn round_up_to_multiple(x: usize, multiple: usize) -> usize { - ((x + multiple - 1) / multiple) * multiple -} +use std::{mem, ptr}; pub(crate) fn get_attribute( pi: *mut i32, - attrib: CUfunction_attribute, - func: *mut Function, -) -> Result<(), CUresult> { + cu_attrib: CUfunction_attribute, + func: CUfunction, +) -> hipError_t { if pi == ptr::null_mut() || func == ptr::null_mut() { - return Err(CUresult::CUDA_ERROR_INVALID_VALUE); - } - match attrib { - CUfunction_attribute::CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK => { - let max_threads = GlobalState::lock_function(func, |func| { - if let ocl_core::KernelWorkGroupInfoResult::WorkGroupSize(size) = - ocl_core::get_kernel_work_group_info( - &func.base, - &func.device, - ocl_core::KernelWorkGroupInfo::WorkGroupSize, - )? - { - Ok(size) - } else { - Err(CUresult::CUDA_ERROR_UNKNOWN) - } - })??; - unsafe { *pi = max_threads as i32 }; - Ok(()) - } - _ => Err(CUresult::CUDA_ERROR_NOT_SUPPORTED), - } -} - -pub(crate) fn set_block_shape(func: *mut Function, x: i32, y: i32, z: i32) -> Result<(), CUresult> { - if func == ptr::null_mut() || x < 0 || y < 0 || z < 0 { - return Err(CUresult::CUDA_ERROR_INVALID_VALUE); - } - GlobalState::lock_function(func, |func| { - func.legacy_args.block_shape = Some((x, y, z)); - }) + return hipError_t::hipErrorInvalidValue; + } + let mut hip_attrib = unsafe { mem::zeroed() }; + let err = unsafe { hipFuncGetAttributes(&mut hip_attrib, func as _) }; + if err != hipError_t::hipSuccess { + return err; + } + let value = match cu_attrib { + CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK => hip_attrib.maxThreadsPerBlock, + CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES => hip_attrib.sharedSizeBytes as i32, + _ => return hipError_t::hipErrorInvalidValue, + }; + unsafe { *pi = value }; + hipError_t::hipSuccess } diff --git a/zluda/src/impl/memory.rs b/zluda/src/impl/memory.rs deleted file mode 100644 index 92cc4b2..0000000 --- a/zluda/src/impl/memory.rs +++ /dev/null @@ -1,175 +0,0 @@ -use super::{
- stream::{self, CU_STREAM_LEGACY},
- CUresult, GlobalState,
-};
-use std::{
- ffi::c_void,
- mem::{self, size_of},
- ptr,
-};
-
-pub fn alloc_v2(dptr: *mut *mut c_void, bytesize: usize) -> Result<(), CUresult> {
- let ptr = GlobalState::lock_stream(CU_STREAM_LEGACY, |stream_data| {
- let dev = unsafe { &mut *(*stream_data.context).device };
- let queue = stream_data.cmd_list.as_ref().unwrap();
- let ptr = unsafe {
- ocl_core::ffi::clSVMAlloc(
- dev.ocl_context.as_ptr(),
- ocl_core::ffi::CL_MEM_READ_WRITE,
- bytesize,
- 0,
- )
- };
- // CUDA does the same thing and e.g. GeekBench relies on this behavior
- let mut event = ptr::null_mut();
- let err = unsafe {
- ocl_core::ffi::clEnqueueSVMMemFill(
- queue.as_ptr(),
- ptr,
- &0u8 as *const u8 as *const c_void,
- 1,
- bytesize,
- 0,
- ptr::null(),
- &mut event,
- )
- };
- assert_eq!(err, 0);
- let err = unsafe { ocl_core::ffi::clWaitForEvents(1, &mut event) };
- assert_eq!(err, 0);
- dev.allocations.insert(ptr);
- Ok::<_, CUresult>(ptr)
- })??;
- unsafe { *dptr = ptr };
- Ok(())
-}
-
-pub fn copy_v2(dst: *mut c_void, src: *const c_void, bytesize: usize) -> Result<(), CUresult> {
- GlobalState::lock_stream(stream::CU_STREAM_LEGACY, |stream_data| {
- let dev = unsafe { &*(*stream_data.context).device };
- let queue = stream_data.cmd_list.as_ref().unwrap();
- let err = unsafe {
- ocl_core::ffi::clEnqueueSVMMemcpy(
- queue.as_ptr(),
- 1,
- dst,
- src,
- bytesize,
- 0,
- ptr::null(),
- ptr::null_mut(),
- )
- };
- assert_eq!(err, 0);
- Ok(())
- })?
-}
-
-pub fn free_v2(ptr: *mut c_void) -> Result<(), CUresult> {
- GlobalState::lock_current_context(|ctx| {
- let dev = unsafe { &mut *ctx.device };
- unsafe { ocl_core::ffi::clSVMFree(dev.ocl_context.as_ptr(), ptr) };
- dev.allocations.remove(&ptr);
- Ok(())
- })?
-}
-
-pub(crate) fn set_d32_v2(dst: *mut c_void, mut ui: u32, n: usize) -> Result<(), CUresult> {
- GlobalState::lock_stream(stream::CU_STREAM_LEGACY, move |stream_data| {
- let dev = unsafe { &*(*stream_data.context).device };
- let queue = stream_data.cmd_list.as_ref().unwrap();
- let pattern_size = mem::size_of_val(&ui);
- let mut event = ptr::null_mut();
- let err = unsafe {
- ocl_core::ffi::clEnqueueSVMMemFill(
- queue.as_ptr(),
- dst,
- &ui as *const _ as *const _,
- pattern_size,
- pattern_size * n,
- 0,
- ptr::null(),
- &mut event,
- )
- };
- assert_eq!(err, 0);
- let err = unsafe { ocl_core::ffi::clWaitForEvents(1, &mut event) };
- assert_eq!(err, 0);
- Ok(())
- })?
-}
-
-pub(crate) fn set_d8_v2(dst: *mut c_void, mut uc: u8, n: usize) -> Result<(), CUresult> {
- GlobalState::lock_stream(stream::CU_STREAM_LEGACY, move |stream_data| {
- let dev = unsafe { &*(*stream_data.context).device };
- let queue = stream_data.cmd_list.as_ref().unwrap();
- let pattern_size = mem::size_of_val(&uc);
- let mut event = ptr::null_mut();
- let err = unsafe {
- ocl_core::ffi::clEnqueueSVMMemFill(
- queue.as_ptr(),
- dst,
- &uc as *const _ as *const _,
- pattern_size,
- pattern_size * n,
- 0,
- ptr::null(),
- &mut event,
- )
- };
- assert_eq!(err, 0);
- let err = unsafe { ocl_core::ffi::clWaitForEvents(1, &mut event) };
- assert_eq!(err, 0);
- Ok(())
- })?
-}
-
-#[cfg(test)]
-mod test {
- use super::super::test::CudaDriverFns;
- use super::super::CUresult;
- use std::ptr;
-
- cuda_driver_test!(alloc_without_ctx);
-
- fn alloc_without_ctx<T: CudaDriverFns>() {
- assert_eq!(T::cuInit(0), CUresult::CUDA_SUCCESS);
- let mut mem = ptr::null_mut();
- assert_eq!(
- T::cuMemAlloc_v2(&mut mem, std::mem::size_of::<usize>()),
- CUresult::CUDA_ERROR_INVALID_CONTEXT
- );
- assert_eq!(mem, ptr::null_mut());
- }
-
- cuda_driver_test!(alloc_with_ctx);
-
- fn alloc_with_ctx<T: CudaDriverFns>() {
- assert_eq!(T::cuInit(0), CUresult::CUDA_SUCCESS);
- let mut ctx = ptr::null_mut();
- assert_eq!(T::cuCtxCreate_v2(&mut ctx, 0, 0), CUresult::CUDA_SUCCESS);
- let mut mem = ptr::null_mut();
- assert_eq!(
- T::cuMemAlloc_v2(&mut mem, std::mem::size_of::<usize>()),
- CUresult::CUDA_SUCCESS
- );
- assert_ne!(mem, ptr::null_mut());
- assert_eq!(T::cuCtxDestroy_v2(ctx), CUresult::CUDA_SUCCESS);
- }
-
- cuda_driver_test!(free_without_ctx);
-
- fn free_without_ctx<T: CudaDriverFns>() {
- assert_eq!(T::cuInit(0), CUresult::CUDA_SUCCESS);
- let mut ctx = ptr::null_mut();
- assert_eq!(T::cuCtxCreate_v2(&mut ctx, 0, 0), CUresult::CUDA_SUCCESS);
- let mut mem = ptr::null_mut();
- assert_eq!(
- T::cuMemAlloc_v2(&mut mem, std::mem::size_of::<usize>()),
- CUresult::CUDA_SUCCESS
- );
- assert_ne!(mem, ptr::null_mut());
- assert_eq!(T::cuCtxDestroy_v2(ctx), CUresult::CUDA_SUCCESS);
- assert_eq!(T::cuMemFree_v2(mem), CUresult::CUDA_ERROR_INVALID_VALUE);
- }
-}
diff --git a/zluda/src/impl/mod.rs b/zluda/src/impl/mod.rs index bb32937..09908bb 100644 --- a/zluda/src/impl/mod.rs +++ b/zluda/src/impl/mod.rs @@ -1,7 +1,4 @@ -use crate::{ - cuda::{CUctx_st, CUdevice, CUdeviceptr, CUfunc_st, CUmod_st, CUresult, CUstream_st}, - r#impl::device::Device, -}; +use crate::cuda::{CUctx_st, CUdevice, CUdeviceptr, CUfunc_st, CUmod_st, CUresult, CUstream_st}; use std::{ ffi::c_void, mem::{self, ManuallyDrop}, @@ -14,16 +11,12 @@ use std::{ #[cfg(test)] #[macro_use] pub mod test; -pub mod context; pub mod device; pub mod export_table; pub mod function; -pub mod memory; -pub mod module; #[cfg_attr(windows, path = "os_win.rs")] #[cfg_attr(not(windows), path = "os_unix.rs")] pub(crate) mod os; -pub mod stream; #[cfg(debug_assertions)] pub fn unimplemented() -> CUresult { @@ -187,244 +180,6 @@ impl<T1: Encuda<To = CUresult>, T2: Encuda<To = CUresult>> Encuda for Result<T1, } } -lazy_static! { - static ref GLOBAL_STATE: Mutex<Option<GlobalState>> = Mutex::new(None); -} - -struct GlobalState { - devices: Vec<Device>, - global_heap: *mut c_void, -} - -unsafe impl Send for GlobalState {} - -impl GlobalState { - fn lock<T>(f: impl FnOnce(&mut GlobalState) -> T) -> Result<T, CUresult> { - let mut mutex = GLOBAL_STATE - .lock() - .unwrap_or_else(|poison| poison.into_inner()); - let global_state = mutex.as_mut().ok_or(CUresult::CUDA_ERROR_ILLEGAL_STATE)?; - Ok(f(global_state)) - } - - fn lock_device<T>( - device::Index(dev_idx): device::Index, - f: impl FnOnce(&'static mut device::Device) -> T, - ) -> Result<T, CUresult> { - if dev_idx < 0 { - return Err(CUresult::CUDA_ERROR_INVALID_DEVICE); - } - Self::lock(|global_state| { - if dev_idx >= global_state.devices.len() as c_int { - Err(CUresult::CUDA_ERROR_INVALID_DEVICE) - } else { - Ok(f(unsafe { - transmute_lifetime_mut(&mut global_state.devices[dev_idx as usize]) - })) - } - })? - } - - fn lock_current_context<F: FnOnce(&mut context::ContextData) -> R, R>( - f: F, - ) -> Result<R, CUresult> { - Self::lock_current_context_unchecked(|ctx| Ok(f(ctx.as_result_mut()?)))? - } - - fn lock_current_context_unchecked<F: FnOnce(&mut context::Context) -> R, R>( - f: F, - ) -> Result<R, CUresult> { - context::CONTEXT_STACK.with(|stack| { - stack - .borrow_mut() - .last_mut() - .ok_or(CUresult::CUDA_ERROR_INVALID_CONTEXT) - .map(|ctx| GlobalState::lock(|_| f(unsafe { &mut **ctx })))? - }) - } - - fn lock_stream<T>( - stream: *mut stream::Stream, - f: impl FnOnce(&mut stream::StreamData) -> T, - ) -> Result<T, CUresult> { - if stream == ptr::null_mut() - || stream == stream::CU_STREAM_LEGACY - || stream == stream::CU_STREAM_PER_THREAD - { - Self::lock_current_context(|ctx| Ok(f(&mut ctx.default_stream)))? - } else { - Self::lock(|_| { - let stream = unsafe { &mut *stream }.as_result_mut()?; - Ok(f(stream)) - })? - } - } - - fn lock_function<T>( - func: *mut function::Function, - f: impl FnOnce(&mut function::FunctionData) -> T, - ) -> Result<T, CUresult> { - if func == ptr::null_mut() { - return Err(CUresult::CUDA_ERROR_INVALID_HANDLE); - } - Self::lock(|_| { - let func = unsafe { &mut *func }.as_result_mut()?; - Ok(f(func)) - })? - } -} - -pub fn init() -> Result<(), CUresult> { - eprintln!("{:?}", unsafe { hip_runtime_sys::hipInit(0) }); - let mut global_state = GLOBAL_STATE - .lock() - .map_err(|_| CUresult::CUDA_ERROR_UNKNOWN)?; - if global_state.is_some() { - return Ok(()); - } - let platforms = ocl_core::get_platform_ids()?; - let mut devices = platforms - .iter() - .filter_map(|plat| { - let devices = - ocl_core::get_device_ids(plat, Some(ocl_core::DeviceType::GPU), None).ok()?; - for dev in devices { - let vendor = ocl_core::get_device_info(dev, ocl_core::DeviceInfo::VendorId).ok()?; - let is_amd = match vendor { - ocl_core::DeviceInfoResult::VendorId(0x8086) => false, - ocl_core::DeviceInfoResult::VendorId(0x1002) => true, - _ => continue, - }; - let dev_type = ocl_core::get_device_info(dev, ocl_core::DeviceInfo::Type).ok()?; - if let ocl_core::DeviceInfoResult::Type(ocl_core::DeviceType::GPU) = dev_type { - return Some((plat.clone(), dev, is_amd)); - } - } - None - }) - .enumerate() - .map(|(idx, (platform, device, is_amd))| device::Device::new(platform, device, idx, is_amd)) - .collect::<Result<Vec<_>, _>>()?; - for d in devices.iter_mut() { - d.late_init(); - d.primary_context.late_init(); - } - let global_heap = unsafe { os::heap_create() }; - if global_heap == ptr::null_mut() { - return Err(CUresult::CUDA_ERROR_OUT_OF_MEMORY); - } - *global_state = Some(GlobalState { - devices, - global_heap, - }); - drop(global_state); - Ok(()) -} - -macro_rules! stringify_curesult { - ($x:ident => [ $($variant:ident),+ ]) => { - match $x { - $( - CUresult::$variant => Some(concat!(stringify!($variant), "\0")), - )+ - _ => None - } - } -} - -pub(crate) fn get_error_string(error: CUresult, str: *mut *const i8) -> CUresult { - if str == ptr::null_mut() { - return CUresult::CUDA_ERROR_INVALID_VALUE; - } - let text = stringify_curesult!( - error => [ - CUDA_SUCCESS, - CUDA_ERROR_INVALID_VALUE, - CUDA_ERROR_OUT_OF_MEMORY, - CUDA_ERROR_NOT_INITIALIZED, - CUDA_ERROR_DEINITIALIZED, - CUDA_ERROR_PROFILER_DISABLED, - CUDA_ERROR_PROFILER_NOT_INITIALIZED, - CUDA_ERROR_PROFILER_ALREADY_STARTED, - CUDA_ERROR_PROFILER_ALREADY_STOPPED, - CUDA_ERROR_NO_DEVICE, - CUDA_ERROR_INVALID_DEVICE, - CUDA_ERROR_INVALID_IMAGE, - CUDA_ERROR_INVALID_CONTEXT, - CUDA_ERROR_CONTEXT_ALREADY_CURRENT, - CUDA_ERROR_MAP_FAILED, - CUDA_ERROR_UNMAP_FAILED, - CUDA_ERROR_ARRAY_IS_MAPPED, - CUDA_ERROR_ALREADY_MAPPED, - CUDA_ERROR_NO_BINARY_FOR_GPU, - CUDA_ERROR_ALREADY_ACQUIRED, - CUDA_ERROR_NOT_MAPPED, - CUDA_ERROR_NOT_MAPPED_AS_ARRAY, - CUDA_ERROR_NOT_MAPPED_AS_POINTER, - CUDA_ERROR_ECC_UNCORRECTABLE, - CUDA_ERROR_UNSUPPORTED_LIMIT, - CUDA_ERROR_CONTEXT_ALREADY_IN_USE, - CUDA_ERROR_PEER_ACCESS_UNSUPPORTED, - CUDA_ERROR_INVALID_PTX, - CUDA_ERROR_INVALID_GRAPHICS_CONTEXT, - CUDA_ERROR_NVLINK_UNCORRECTABLE, - CUDA_ERROR_JIT_COMPILER_NOT_FOUND, - CUDA_ERROR_INVALID_SOURCE, - CUDA_ERROR_FILE_NOT_FOUND, - CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND, - CUDA_ERROR_SHARED_OBJECT_INIT_FAILED, - CUDA_ERROR_OPERATING_SYSTEM, - CUDA_ERROR_INVALID_HANDLE, - CUDA_ERROR_ILLEGAL_STATE, - CUDA_ERROR_NOT_FOUND, - CUDA_ERROR_NOT_READY, - CUDA_ERROR_ILLEGAL_ADDRESS, - CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES, - CUDA_ERROR_LAUNCH_TIMEOUT, - CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING, - CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED, - CUDA_ERROR_PEER_ACCESS_NOT_ENABLED, - CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE, - CUDA_ERROR_CONTEXT_IS_DESTROYED, - CUDA_ERROR_ASSERT, - CUDA_ERROR_TOO_MANY_PEERS, - CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED, - CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED, - CUDA_ERROR_HARDWARE_STACK_ERROR, - CUDA_ERROR_ILLEGAL_INSTRUCTION, - CUDA_ERROR_MISALIGNED_ADDRESS, - CUDA_ERROR_INVALID_ADDRESS_SPACE, - CUDA_ERROR_INVALID_PC, - CUDA_ERROR_LAUNCH_FAILED, - CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE, - CUDA_ERROR_NOT_PERMITTED, - CUDA_ERROR_NOT_SUPPORTED, - CUDA_ERROR_SYSTEM_NOT_READY, - CUDA_ERROR_SYSTEM_DRIVER_MISMATCH, - CUDA_ERROR_COMPAT_NOT_SUPPORTED_ON_DEVICE, - CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED, - CUDA_ERROR_STREAM_CAPTURE_INVALIDATED, - CUDA_ERROR_STREAM_CAPTURE_MERGE, - CUDA_ERROR_STREAM_CAPTURE_UNMATCHED, - CUDA_ERROR_STREAM_CAPTURE_UNJOINED, - CUDA_ERROR_STREAM_CAPTURE_ISOLATION, - CUDA_ERROR_STREAM_CAPTURE_IMPLICIT, - CUDA_ERROR_CAPTURED_EVENT, - CUDA_ERROR_STREAM_CAPTURE_WRONG_THREAD, - CUDA_ERROR_TIMEOUT, - CUDA_ERROR_GRAPH_EXEC_UPDATE_FAILURE, - CUDA_ERROR_UNKNOWN - ] - ); - match text { - Some(text) => { - unsafe { *str = text.as_ptr() as *const _ }; - CUresult::CUDA_SUCCESS - } - None => CUresult::CUDA_ERROR_INVALID_VALUE, - } -} - unsafe fn transmute_lifetime<'a, 'b, T: ?Sized>(t: &'a T) -> &'b T { mem::transmute(t) } @@ -437,20 +192,6 @@ pub fn driver_get_version() -> c_int { i32::max_value() } -impl<'a> CudaRepr for CUctx_st { - type Impl = context::Context; -} - -impl<'a> CudaRepr for CUdevice { - type Impl = device::Index; -} - -impl Decuda<device::Index> for CUdevice { - fn decuda(self) -> device::Index { - device::Index(self.0) - } -} - impl<'a> CudaRepr for CUdeviceptr { type Impl = *mut c_void; } @@ -460,15 +201,3 @@ impl Decuda<*mut c_void> for CUdeviceptr { self.0 as *mut _ } } - -impl<'a> CudaRepr for CUmod_st { - type Impl = module::Module; -} - -impl<'a> CudaRepr for CUfunc_st { - type Impl = function::Function; -} - -impl<'a> CudaRepr for CUstream_st { - type Impl = stream::Stream; -} diff --git a/zluda/src/impl/module.rs b/zluda/src/impl/module.rs deleted file mode 100644 index c1d7ffc..0000000 --- a/zluda/src/impl/module.rs +++ /dev/null @@ -1,435 +0,0 @@ -use std::{ - borrow::Cow, - collections::hash_map, - collections::HashMap, - ffi::c_void, - ffi::CStr, - ffi::CString, - fs::File, - io::{self, Read, Seek, SeekFrom, Write}, - mem, - os::raw::{c_char, c_int, c_uint}, - path::PathBuf, - process::{Command, Stdio}, - ptr, slice, -}; - -const CL_KERNEL_EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL: u32 = 0x4200; -const CL_KERNEL_EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL: u32 = 0x4201; -const CL_KERNEL_EXEC_INFO_INDIRECT_SHARED_ACCESS_INTEL: u32 = 0x4202; - -use super::{ - device, - function::Function, - function::{FunctionData, LegacyArguments}, - CUresult, GlobalState, HasLivenessCookie, LiveCheck, -}; -use ptx; -use tempfile::NamedTempFile; - -pub type Module = LiveCheck<ModuleData>; - -impl HasLivenessCookie for ModuleData { - #[cfg(target_pointer_width = "64")] - const COOKIE: usize = 0xf1313bd46505f98a; - - #[cfg(target_pointer_width = "32")] - const COOKIE: usize = 0xbdbe3f15; - - const LIVENESS_FAIL: CUresult = CUresult::CUDA_ERROR_INVALID_HANDLE; - - fn try_drop(&mut self) -> Result<(), CUresult> { - Ok(()) - } -} - -pub struct ModuleData { - pub spirv: SpirvModule, - // This should be a Vec<>, but I'm feeling lazy - pub device_binaries: HashMap<device::Index, CompiledModule>, -} - -pub struct SpirvModule { - pub binaries: Vec<u32>, - pub kernel_info: HashMap<String, ptx::KernelInfo>, - pub should_link_ptx_impl: Option<(&'static [u8], &'static [u8])>, - pub build_options: CString, -} - -pub struct CompiledModule { - pub base: ocl_core::Program, - pub kernels: HashMap<CString, Box<Function>>, -} - -impl<L, T, E> From<ptx::ParseError<L, T, E>> for CUresult { - fn from(_: ptx::ParseError<L, T, E>) -> Self { - CUresult::CUDA_ERROR_INVALID_PTX - } -} - -impl From<ptx::TranslateError> for CUresult { - fn from(_: ptx::TranslateError) -> Self { - CUresult::CUDA_ERROR_INVALID_PTX - } -} - -impl SpirvModule { - pub fn new_raw<'a>(text: *const c_char) -> Result<Self, CUresult> { - let u8_text = unsafe { CStr::from_ptr(text) }; - let ptx_text = u8_text - .to_str() - .map_err(|_| CUresult::CUDA_ERROR_INVALID_PTX)?; - Self::new(ptx_text) - } - - pub fn new<'a>(ptx_text: &str) -> Result<Self, CUresult> { - let mut errors = Vec::new(); - let ast = ptx::ModuleParser::new().parse(&mut errors, ptx_text)?; - let spirv_module = ptx::to_spirv_module(ast)?; - Ok(SpirvModule { - binaries: spirv_module.assemble(), - kernel_info: spirv_module.kernel_info, - should_link_ptx_impl: spirv_module.should_link_ptx_impl, - build_options: spirv_module.build_options, - }) - } - - const LLVM_SPIRV: &'static str = "/home/vosen/amd/llvm-project/build/bin/llvm-spirv"; - const AMDGPU: &'static str = "/opt/amdgpu-pro/"; - const AMDGPU_TARGET: &'static str = "amdgcn-amd-amdhsa"; - const AMDGPU_BITCODE: [&'static str; 8] = [ - "opencl.bc", - "ocml.bc", - "ockl.bc", - "oclc_correctly_rounded_sqrt_off.bc", - "oclc_daz_opt_on.bc", - "oclc_finite_only_off.bc", - "oclc_unsafe_math_off.bc", - "oclc_wavefrontsize64_off.bc", - ]; - const AMDGPU_BITCODE_DEVICE_PREFIX: &'static str = "oclc_isa_version_"; - - fn get_bitcode_paths(device_name: &str) -> impl Iterator<Item = PathBuf> { - let generic_paths = Self::AMDGPU_BITCODE.iter().map(|x| { - let mut path = PathBuf::from(Self::AMDGPU); - path.push("amdgcn"); - path.push("bitcode"); - path.push(x); - path - }); - let suffix = if let Some(suffix_idx) = device_name.find(':') { - suffix_idx - } else { - device_name.len() - }; - let mut additional_path = PathBuf::from(Self::AMDGPU); - additional_path.push("amdgcn"); - additional_path.push("bitcode"); - additional_path.push(format!( - "{}{}{}", - Self::AMDGPU_BITCODE_DEVICE_PREFIX, - &device_name[3..suffix], - ".bc" - )); - generic_paths.chain(std::iter::once(additional_path)) - } - - #[cfg(not(target_os = "linux"))] - fn compile_amd( - device_name: &str, - spirv_il: &[u8], - ptx_lib: Option<(&'static [u8], &'static [u8])>, - ) -> io::Result<Vec<u8>> { - unimplemented!() - } - - #[cfg(target_os = "linux")] - fn compile_amd( - device_name: &str, - spirv_il: &[u8], - ptx_lib: Option<(&'static [u8], &'static [u8])>, - ) -> io::Result<Vec<u8>> { - use std::env; - let dir = tempfile::tempdir()?; - let mut spirv = NamedTempFile::new_in(&dir)?; - let llvm = NamedTempFile::new_in(&dir)?; - spirv.write_all(spirv_il)?; - let llvm_spirv_path = match env::var("LLVM_SPIRV") { - Ok(path) => Cow::Owned(path), - Err(_) => Cow::Borrowed(Self::LLVM_SPIRV), - }; - let to_llvm_cmd = Command::new(&*llvm_spirv_path) - .arg("-r") - .arg("-o") - .arg(llvm.path()) - .arg(spirv.path()) - .status()?; - assert!(to_llvm_cmd.success()); - let linked_binary = NamedTempFile::new_in(&dir)?; - let mut llvm_link = PathBuf::from(Self::AMDGPU); - llvm_link.push("bin"); - llvm_link.push("llvm-link"); - let mut linker_cmd = Command::new(&llvm_link); - linker_cmd - .arg("--only-needed") - .arg("-o") - .arg(linked_binary.path()) - .arg(llvm.path()) - .args(Self::get_bitcode_paths(device_name)); - if cfg!(debug_assertions) { - linker_cmd.arg("-v"); - } - let status = linker_cmd.status()?; - assert!(status.success()); - let mut ptx_lib_bitcode = NamedTempFile::new_in(&dir)?; - let compiled_binary = NamedTempFile::new_in(&dir)?; - let mut cland_exe = PathBuf::from(Self::AMDGPU); - cland_exe.push("bin"); - cland_exe.push("clang"); - let mut compiler_cmd = Command::new(&cland_exe); - compiler_cmd - .arg(format!("-mcpu={}", device_name)) - .arg("-nogpulib") - .arg("-mno-wavefrontsize64") - .arg("-O3") - .arg("-Xlinker") - .arg("--no-undefined") - .arg("-target") - .arg(Self::AMDGPU_TARGET) - .arg("-o") - .arg(compiled_binary.path()) - .arg("-x") - .arg("ir") - .arg(linked_binary.path()); - if let Some((_, bitcode)) = ptx_lib { - ptx_lib_bitcode.write_all(bitcode)?; - compiler_cmd.arg(ptx_lib_bitcode.path()); - }; - if cfg!(debug_assertions) { - compiler_cmd.arg("-v"); - } - let status = compiler_cmd.status()?; - assert!(status.success()); - let mut result = Vec::new(); - let compiled_bin_path = compiled_binary.path(); - let mut compiled_binary = File::open(compiled_bin_path)?; - compiled_binary.read_to_end(&mut result)?; - let mut persistent = PathBuf::from("/tmp/zluda"); - std::fs::create_dir_all(&persistent)?; - persistent.push(compiled_bin_path.file_name().unwrap()); - std::fs::copy(compiled_bin_path, persistent)?; - Ok(result) - } - - fn compile_intel<'a>( - ctx: &ocl_core::Context, - dev: &ocl_core::DeviceId, - byte_il: &'a [u8], - build_options: &CString, - ptx_lib: Option<(&'static [u8], &'static [u8])>, - ) -> ocl_core::Result<ocl_core::Program> { - let main_module = ocl_core::create_program_with_il(ctx, byte_il, None)?; - Ok(match ptx_lib { - None => { - ocl_core::build_program(&main_module, Some(&[dev]), build_options, None, None)?; - main_module - } - Some((ptx_impl_intel, _)) => { - let ptx_impl_prog = ocl_core::create_program_with_il(ctx, ptx_impl_intel, None)?; - ocl_core::compile_program( - &main_module, - Some(&[dev]), - build_options, - &[], - &[], - None, - None, - None, - )?; - ocl_core::compile_program( - &ptx_impl_prog, - Some(&[dev]), - build_options, - &[], - &[], - None, - None, - None, - )?; - ocl_core::link_program( - ctx, - Some(&[dev]), - build_options, - &[&main_module, &ptx_impl_prog], - None, - None, - None, - )? - } - }) - } - - pub fn compile<'a>( - &self, - ctx: &ocl_core::Context, - dev: &ocl_core::DeviceId, - device_name: &str, - is_amd: bool, - ) -> Result<ocl_core::Program, CUresult> { - let byte_il = unsafe { - slice::from_raw_parts( - self.binaries.as_ptr() as *const u8, - self.binaries.len() * mem::size_of::<u32>(), - ) - }; - let ocl_program = if is_amd { - let binary_prog = - Self::compile_amd(device_name, byte_il, self.should_link_ptx_impl).unwrap(); - let device = dev.as_raw(); - let binary_len = binary_prog.len(); - let binary = binary_prog.as_ptr(); - let mut binary_status = 0; - let mut errcode_ret = 0; - let raw_program = unsafe { - ocl_core::ffi::clCreateProgramWithBinary( - ctx.as_ptr(), - 1, - &device, - &binary_len, - &binary, - &mut binary_status, - &mut errcode_ret, - ) - }; - assert_eq!(binary_status, 0, "clCreateProgramWithBinary"); - assert_eq!(errcode_ret, 0, "clCreateProgramWithBinary"); - let ocl_program = unsafe { ocl_core::Program::from_raw_create_ptr(raw_program) }; - ocl_core::build_program( - &ocl_program, - Some(&[dev]), - &CString::new("").unwrap(), - None, - None, - )?; - ocl_program - } else { - Self::compile_amd("gfx1011:xnack-", byte_il, self.should_link_ptx_impl).unwrap(); - Self::compile_intel( - ctx, - dev, - byte_il, - &self.build_options, - self.should_link_ptx_impl, - )? - }; - Ok(ocl_program) - } -} - -pub fn get_function( - hfunc: *mut *mut Function, - hmod: *mut Module, - name: *const c_char, -) -> Result<(), CUresult> { - if hfunc == ptr::null_mut() || hmod == ptr::null_mut() || name == ptr::null() { - return Err(CUresult::CUDA_ERROR_INVALID_VALUE); - } - let name = unsafe { CStr::from_ptr(name) }.to_owned(); - let function: *mut Function = GlobalState::lock_current_context(|ctx| { - let module = unsafe { &mut *hmod }.as_result_mut()?; - let device = unsafe { &mut *ctx.device }; - let compiled_module = match module.device_binaries.entry(device.index) { - hash_map::Entry::Occupied(entry) => entry.into_mut(), - hash_map::Entry::Vacant(entry) => { - let new_module = CompiledModule { - base: module.spirv.compile( - &device.ocl_context, - &device.ocl_base, - &device.name, - device.is_amd, - )?, - kernels: HashMap::new(), - }; - entry.insert(new_module) - } - }; - let kernel = match compiled_module.kernels.entry(name) { - hash_map::Entry::Occupied(entry) => entry.into_mut().as_mut(), - hash_map::Entry::Vacant(entry) => { - let kernel_info = module - .spirv - .kernel_info - .get(unsafe { - std::str::from_utf8_unchecked(entry.key().as_c_str().to_bytes()) - }) - .ok_or(CUresult::CUDA_ERROR_NOT_FOUND)?; - let kernel = ocl_core::create_kernel( - &compiled_module.base, - &entry.key().as_c_str().to_string_lossy(), - )?; - entry.insert(Box::new(Function::new(FunctionData { - base: kernel, - device: device.ocl_base.clone(), - arg_size: kernel_info.arguments_sizes.clone(), - use_shared_mem: kernel_info.uses_shared_mem, - legacy_args: LegacyArguments::new(), - }))) - } - }; - Ok::<_, CUresult>(kernel as *mut _) - })??; - unsafe { *hfunc = function }; - Ok(()) -} - -pub(crate) fn load_data(pmod: *mut *mut Module, image: *const c_void) -> Result<(), CUresult> { - let spirv_data = SpirvModule::new_raw(image as *const _)?; - load_data_impl(pmod, spirv_data) -} - -pub fn load_data_impl(pmod: *mut *mut Module, spirv_data: SpirvModule) -> Result<(), CUresult> { - let module = GlobalState::lock_current_context(|ctx| { - let device = unsafe { &mut *ctx.device }; - let l0_module = spirv_data.compile( - &device.ocl_context, - &device.ocl_base, - &device.name, - device.is_amd, - )?; - let mut device_binaries = HashMap::new(); - let compiled_module = CompiledModule { - base: l0_module, - kernels: HashMap::new(), - }; - device_binaries.insert(device.index, compiled_module); - let module_data = ModuleData { - spirv: spirv_data, - device_binaries, - }; - Ok::<_, CUresult>(module_data) - })??; - let module_ptr = Box::into_raw(Box::new(Module::new(module))); - unsafe { *pmod = module_ptr }; - Ok(()) -} - -pub(crate) fn unload(module: *mut Module) -> Result<(), CUresult> { - if module == ptr::null_mut() { - return Err(CUresult::CUDA_ERROR_INVALID_VALUE); - } - GlobalState::lock(|_| Module::destroy_impl(module))? -} - -pub(crate) fn load(pmod: *mut *mut Module, fname: *const i8) -> Result<(), CUresult> { - if pmod == ptr::null_mut() || fname == ptr::null() { - return Err(CUresult::CUDA_ERROR_INVALID_VALUE); - } - let path = unsafe { CStr::from_ptr(fname) }; - let path_utf8 = path - .to_str() - .map_err(|_| CUresult::CUDA_ERROR_INVALID_VALUE)?; - let file = std::fs::read(path_utf8).map_err(|_| CUresult::CUDA_ERROR_FILE_NOT_FOUND)?; - let module_text = std::str::from_utf8(&file).map_err(|_| CUresult::CUDA_ERROR_INVALID_PTX)?; - let spirv_data = SpirvModule::new(module_text)?; - load_data_impl(pmod, spirv_data) -} diff --git a/zluda/src/impl/ocl_ext.rs b/zluda/src/impl/ocl_ext.rs deleted file mode 100644 index e69de29..0000000 --- a/zluda/src/impl/ocl_ext.rs +++ /dev/null diff --git a/zluda/src/impl/stream.rs b/zluda/src/impl/stream.rs deleted file mode 100644 index 0231cd8..0000000 --- a/zluda/src/impl/stream.rs +++ /dev/null @@ -1,253 +0,0 @@ -use super::{ - context::{Context, ContextData}, - CUresult, GlobalState, -}; -use std::{collections::VecDeque, mem, ptr}; - -use super::{HasLivenessCookie, LiveCheck}; - -pub type Stream = LiveCheck<StreamData>; - -pub const CU_STREAM_LEGACY: *mut Stream = 1 as *mut _; -pub const CU_STREAM_PER_THREAD: *mut Stream = 2 as *mut _; - -impl HasLivenessCookie for StreamData { - #[cfg(target_pointer_width = "64")] - const COOKIE: usize = 0x512097354de18d35; - - #[cfg(target_pointer_width = "32")] - const COOKIE: usize = 0x77d5cc0b; - - const LIVENESS_FAIL: CUresult = CUresult::CUDA_ERROR_INVALID_HANDLE; - - fn try_drop(&mut self) -> Result<(), CUresult> { - if self.context != ptr::null_mut() { - let context = unsafe { &mut *self.context }; - if !context.streams.remove(&(self as *mut _)) { - return Err(CUresult::CUDA_ERROR_UNKNOWN); - } - } - Ok(()) - } -} - -pub struct StreamData { - pub context: *mut ContextData, - // Immediate CommandList - pub cmd_list: Option<ocl_core::CommandQueue>, -} - -impl StreamData { - pub fn new_unitialized() -> Result<Self, CUresult> { - Ok(StreamData { - context: ptr::null_mut(), - cmd_list: None, - }) - } - - pub fn new(ctx: &mut ContextData) -> Result<Self, CUresult> { - let ocl_ctx = &unsafe { &*ctx.device }.ocl_context; - let device = unsafe { &*ctx.device }.ocl_base; - Ok(StreamData { - context: ctx as *mut _, - cmd_list: Some(ocl_core::create_command_queue::< - &ocl_core::Context, - ocl_core::DeviceId, - >(ocl_ctx, device, None)?), - }) - } - - pub fn late_init(&mut self, ctx: &mut ContextData) { - let ocl_ctx = &unsafe { &*ctx.device }.ocl_context; - let device = unsafe { &*ctx.device }.ocl_base; - self.context = ctx as *mut _; - self.cmd_list = Some( - ocl_core::create_command_queue::<&ocl_core::Context, ocl_core::DeviceId>( - ocl_ctx, device, None, - ) - .unwrap(), - ); - } - - pub fn synchronize(&mut self) -> Result<(), CUresult> { - ocl_core::finish(self.cmd_list.as_ref().unwrap())?; - Ok(()) - } -} - -pub(crate) fn get_ctx(hstream: *mut Stream, pctx: *mut *mut Context) -> Result<(), CUresult> { - if pctx == ptr::null_mut() { - return Err(CUresult::CUDA_ERROR_INVALID_VALUE); - } - let ctx_ptr = GlobalState::lock_stream(hstream, |stream| stream.context)?; - if ctx_ptr == ptr::null_mut() { - return Err(CUresult::CUDA_ERROR_CONTEXT_IS_DESTROYED); - } - unsafe { *pctx = Context::ptr_from_inner(ctx_ptr) }; - Ok(()) -} - -pub(crate) fn create(phstream: *mut *mut Stream, _flags: u32) -> Result<(), CUresult> { - let stream_ptr = GlobalState::lock_current_context(|ctx| { - let mut stream_box = Box::new(Stream::new(StreamData::new(ctx)?)); - let stream_ptr = stream_box.as_mut().as_option_mut().unwrap() as *mut _; - if !ctx.streams.insert(stream_ptr) { - return Err(CUresult::CUDA_ERROR_UNKNOWN); - } - mem::forget(stream_box); - Ok::<_, CUresult>(stream_ptr) - })??; - unsafe { *phstream = Stream::ptr_from_inner(stream_ptr) }; - Ok(()) -} - -pub(crate) fn destroy_v2(pstream: *mut Stream) -> Result<(), CUresult> { - if pstream == ptr::null_mut() || pstream == CU_STREAM_LEGACY || pstream == CU_STREAM_PER_THREAD - { - return Err(CUresult::CUDA_ERROR_INVALID_VALUE); - } - GlobalState::lock(|_| Stream::destroy_impl(pstream))? -} - -pub(crate) fn synchronize(pstream: *mut Stream) -> Result<(), CUresult> { - GlobalState::lock_stream(pstream, |stream_data| Ok(stream_data.synchronize()?))? -} - -#[cfg(test)] -mod test { - use crate::cuda::CUstream; - - use super::super::test::CudaDriverFns; - use super::super::CUresult; - use std::{ptr, thread}; - - const CU_STREAM_LEGACY: CUstream = 1 as *mut _; - const CU_STREAM_PER_THREAD: CUstream = 2 as *mut _; - - cuda_driver_test!(default_stream_uses_current_ctx_legacy); - cuda_driver_test!(default_stream_uses_current_ctx_ptsd); - - fn default_stream_uses_current_ctx_legacy<T: CudaDriverFns>() { - default_stream_uses_current_ctx_impl::<T>(CU_STREAM_LEGACY); - } - - fn default_stream_uses_current_ctx_ptsd<T: CudaDriverFns>() { - default_stream_uses_current_ctx_impl::<T>(CU_STREAM_PER_THREAD); - } - - fn default_stream_uses_current_ctx_impl<T: CudaDriverFns>(stream: CUstream) { - assert_eq!(T::cuInit(0), CUresult::CUDA_SUCCESS); - let mut ctx1 = ptr::null_mut(); - assert_eq!(T::cuCtxCreate_v2(&mut ctx1, 0, 0), CUresult::CUDA_SUCCESS); - let mut stream_ctx1 = ptr::null_mut(); - assert_eq!( - T::cuStreamGetCtx(stream, &mut stream_ctx1), - CUresult::CUDA_SUCCESS - ); - assert_eq!(ctx1, stream_ctx1); - let mut ctx2 = ptr::null_mut(); - assert_eq!(T::cuCtxCreate_v2(&mut ctx2, 0, 0), CUresult::CUDA_SUCCESS); - assert_ne!(ctx1, ctx2); - let mut stream_ctx2 = ptr::null_mut(); - assert_eq!( - T::cuStreamGetCtx(stream, &mut stream_ctx2), - CUresult::CUDA_SUCCESS - ); - assert_eq!(ctx2, stream_ctx2); - // Cleanup - assert_eq!(T::cuCtxDestroy_v2(ctx1), CUresult::CUDA_SUCCESS); - assert_eq!(T::cuCtxDestroy_v2(ctx2), CUresult::CUDA_SUCCESS); - } - - cuda_driver_test!(stream_context_destroyed); - - fn stream_context_destroyed<T: CudaDriverFns>() { - assert_eq!(T::cuInit(0), CUresult::CUDA_SUCCESS); - let mut ctx = ptr::null_mut(); - assert_eq!(T::cuCtxCreate_v2(&mut ctx, 0, 0), CUresult::CUDA_SUCCESS); - let mut stream = ptr::null_mut(); - assert_eq!(T::cuStreamCreate(&mut stream, 0), CUresult::CUDA_SUCCESS); - let mut stream_ctx1 = ptr::null_mut(); - assert_eq!( - T::cuStreamGetCtx(stream, &mut stream_ctx1), - CUresult::CUDA_SUCCESS - ); - assert_eq!(stream_ctx1, ctx); - assert_eq!(T::cuCtxDestroy_v2(ctx), CUresult::CUDA_SUCCESS); - let mut stream_ctx2 = ptr::null_mut(); - // When a context gets destroyed, its streams are also destroyed - let cuda_result = T::cuStreamGetCtx(stream, &mut stream_ctx2); - assert!( - cuda_result == CUresult::CUDA_ERROR_INVALID_HANDLE - || cuda_result == CUresult::CUDA_ERROR_INVALID_CONTEXT - || cuda_result == CUresult::CUDA_ERROR_CONTEXT_IS_DESTROYED - ); - assert_eq!( - T::cuStreamDestroy_v2(stream), - CUresult::CUDA_ERROR_INVALID_HANDLE - ); - // Check if creating another context is possible - let mut ctx2 = ptr::null_mut(); - assert_eq!(T::cuCtxCreate_v2(&mut ctx2, 0, 0), CUresult::CUDA_SUCCESS); - // Cleanup - assert_eq!(T::cuCtxDestroy_v2(ctx2), CUresult::CUDA_SUCCESS); - } - - cuda_driver_test!(stream_moves_context_to_another_thread); - - fn stream_moves_context_to_another_thread<T: CudaDriverFns>() { - assert_eq!(T::cuInit(0), CUresult::CUDA_SUCCESS); - let mut ctx = ptr::null_mut(); - assert_eq!(T::cuCtxCreate_v2(&mut ctx, 0, 0), CUresult::CUDA_SUCCESS); - let mut stream = ptr::null_mut(); - assert_eq!(T::cuStreamCreate(&mut stream, 0), CUresult::CUDA_SUCCESS); - let mut stream_ctx1 = ptr::null_mut(); - assert_eq!( - T::cuStreamGetCtx(stream, &mut stream_ctx1), - CUresult::CUDA_SUCCESS - ); - assert_eq!(stream_ctx1, ctx); - let stream_ptr = stream as usize; - let stream_ctx_on_thread = thread::spawn(move || { - let mut stream_ctx2 = ptr::null_mut(); - assert_eq!( - T::cuStreamGetCtx(stream_ptr as *mut _, &mut stream_ctx2), - CUresult::CUDA_SUCCESS - ); - stream_ctx2 as usize - }) - .join() - .unwrap(); - assert_eq!(stream_ctx1, stream_ctx_on_thread as *mut _); - // Cleanup - assert_eq!(T::cuStreamDestroy_v2(stream), CUresult::CUDA_SUCCESS); - assert_eq!(T::cuCtxDestroy_v2(ctx), CUresult::CUDA_SUCCESS); - } - - cuda_driver_test!(can_destroy_stream); - - fn can_destroy_stream<T: CudaDriverFns>() { - assert_eq!(T::cuInit(0), CUresult::CUDA_SUCCESS); - let mut ctx = ptr::null_mut(); - assert_eq!(T::cuCtxCreate_v2(&mut ctx, 0, 0), CUresult::CUDA_SUCCESS); - let mut stream = ptr::null_mut(); - assert_eq!(T::cuStreamCreate(&mut stream, 0), CUresult::CUDA_SUCCESS); - assert_eq!(T::cuStreamDestroy_v2(stream), CUresult::CUDA_SUCCESS); - // Cleanup - assert_eq!(T::cuCtxDestroy_v2(ctx), CUresult::CUDA_SUCCESS); - } - - cuda_driver_test!(cant_destroy_default_stream); - - fn cant_destroy_default_stream<T: CudaDriverFns>() { - assert_eq!(T::cuInit(0), CUresult::CUDA_SUCCESS); - let mut ctx = ptr::null_mut(); - assert_eq!(T::cuCtxCreate_v2(&mut ctx, 0, 0), CUresult::CUDA_SUCCESS); - assert_ne!( - T::cuStreamDestroy_v2(super::CU_STREAM_LEGACY as *mut _), - CUresult::CUDA_SUCCESS - ); - // Cleanup - assert_eq!(T::cuCtxDestroy_v2(ctx), CUresult::CUDA_SUCCESS); - } -} |