diff options
author | Andrzej Janik <[email protected]> | 2021-08-27 23:04:52 +0000 |
---|---|---|
committer | Andrzej Janik <[email protected]> | 2021-08-27 23:04:52 +0000 |
commit | 5ec18f14a1b616e69521921972276a78d1957706 (patch) | |
tree | 4f07085a6c2dc8041e2d4705d2812a2859e0872b | |
parent | 4ae7feb93af81f42e53ece3b0a81b8112a731ee3 (diff) | |
download | ZLUDA-5ec18f14a1b616e69521921972276a78d1957706.tar.gz ZLUDA-5ec18f14a1b616e69521921972276a78d1957706.zip |
HIP conversion part #1
-rw-r--r-- | hip_runtime-sys/build.rs | 2 | ||||
-rw-r--r-- | zluda/Cargo.toml | 4 | ||||
-rw-r--r-- | zluda/src/cuda.rs | 28 | ||||
-rw-r--r-- | zluda/src/impl/device.rs | 384 | ||||
-rw-r--r-- | zluda/src/impl/mod.rs | 10 |
5 files changed, 298 insertions, 130 deletions
diff --git a/hip_runtime-sys/build.rs b/hip_runtime-sys/build.rs index 53511c7..e497e06 100644 --- a/hip_runtime-sys/build.rs +++ b/hip_runtime-sys/build.rs @@ -2,6 +2,6 @@ use std::env::VarError; fn main() -> Result<(), VarError> { println!("cargo:rustc-link-lib=dylib=amdhip64"); - println!("cargo:rustc-link-search=/opt/rocm/lib/"); + println!("cargo:rustc-link-search=native=/opt/rocm/lib/"); Ok(()) } diff --git a/zluda/Cargo.toml b/zluda/Cargo.toml index 1e7d650..9d242c5 100644 --- a/zluda/Cargo.toml +++ b/zluda/Cargo.toml @@ -14,6 +14,7 @@ lazy_static = "1.4" num_enum = "0.4" lz4-sys = "1.9" tempfile = "3" +paste = "1.0" [dependencies.ocl-core] version = "0.11" @@ -23,5 +24,4 @@ features = ["opencl_version_1_2", "opencl_version_2_0", "opencl_version_2_1"] winapi = { version = "0.3", features = ["heapapi", "std"] } [dev-dependencies] -cuda-driver-sys = "0.3.0" -paste = "1.0"
\ No newline at end of file +cuda-driver-sys = "0.3.0"
\ No newline at end of file diff --git a/zluda/src/cuda.rs b/zluda/src/cuda.rs index 1bf10fd..7eec241 100644 --- a/zluda/src/cuda.rs +++ b/zluda/src/cuda.rs @@ -1,3 +1,5 @@ +use hip_runtime_sys::*; + use super::r#impl; use super::r#impl::{Decuda, Encuda}; @@ -2183,10 +2185,11 @@ pub use self::CUgraphExecUpdateResult_enum as CUgraphExecUpdateResult; #[cfg_attr(not(test), no_mangle)] pub extern "system" fn cuGetErrorString( - error: CUresult, + CUresult(e): CUresult, pStr: *mut *const ::std::os::raw::c_char, ) -> CUresult { - r#impl::get_error_string(error, pStr).encuda() + unsafe { *pStr = hipGetErrorString(hipError_t(e)) }; + CUresult::CUDA_SUCCESS } #[cfg_attr(not(test), no_mangle)] @@ -2199,13 +2202,12 @@ pub extern "system" fn cuGetErrorName( #[cfg_attr(not(test), no_mangle)] pub extern "system" fn cuInit(Flags: ::std::os::raw::c_uint) -> CUresult { - r#impl::init().encuda() + unsafe { hipInit(Flags).into() } } #[cfg_attr(not(test), no_mangle)] pub extern "system" fn cuDriverGetVersion(driverVersion: *mut ::std::os::raw::c_int) -> CUresult { - unsafe { *driverVersion = r#impl::driver_get_version() }; - CUresult::CUDA_SUCCESS + unsafe { hipDriverGetVersion(driverVersion).into() } } #[cfg_attr(not(test), no_mangle)] @@ -2213,21 +2215,21 @@ pub extern "system" fn cuDeviceGet( device: *mut CUdevice, ordinal: ::std::os::raw::c_int, ) -> CUresult { - r#impl::device::get(device.decuda(), ordinal).encuda() + unsafe { hipDeviceGet(device as _, ordinal).into() } } #[cfg_attr(not(test), no_mangle)] pub extern "system" fn cuDeviceGetCount(count: *mut ::std::os::raw::c_int) -> CUresult { - r#impl::device::get_count(count).encuda() + unsafe { hipGetDeviceCount(count).into() } } #[cfg_attr(not(test), no_mangle)] pub extern "system" fn cuDeviceGetName( name: *mut ::std::os::raw::c_char, len: ::std::os::raw::c_int, - dev: CUdevice, + CUdevice(dev): CUdevice, ) -> CUresult { - r#impl::device::get_name(name, len, dev.decuda()).encuda() + unsafe { hipDeviceGetName(name, len, dev).into() } } #[cfg_attr(not(test), no_mangle)] @@ -2245,17 +2247,17 @@ pub extern "system" fn cuDeviceGetLuid( } #[cfg_attr(not(test), no_mangle)] -pub extern "system" fn cuDeviceTotalMem_v2(bytes: *mut usize, dev: CUdevice) -> CUresult { - r#impl::device::total_mem_v2(bytes, dev.decuda()).encuda() +pub extern "system" fn cuDeviceTotalMem_v2(bytes: *mut usize, CUdevice(dev): CUdevice) -> CUresult { + unsafe { hipDeviceTotalMem(bytes, dev).into() } } #[cfg_attr(not(test), no_mangle)] pub extern "system" fn cuDeviceGetAttribute( pi: *mut ::std::os::raw::c_int, attrib: CUdevice_attribute, - dev: CUdevice, + CUdevice(dev): CUdevice, ) -> CUresult { - r#impl::device::get_attribute(pi, attrib, dev.decuda()).encuda() + r#impl::device::get_attribute(pi, attrib, dev).into() } #[cfg_attr(not(test), no_mangle)] diff --git a/zluda/src/impl/device.rs b/zluda/src/impl/device.rs index 5fdb24b..2ca7251 100644 --- a/zluda/src/impl/device.rs +++ b/zluda/src/impl/device.rs @@ -1,7 +1,11 @@ use super::{context, transmute_lifetime, transmute_lifetime_mut, CUresult, GlobalState}; use crate::cuda; use cuda::{CUdevice_attribute, CUuuid_st}; +use hip_runtime_sys::{ + hipDeviceAttribute_t, hipDeviceGetAttribute, hipError_t, hipGetDeviceProperties, +}; use ocl_core::{ClDeviceIdPtr, ContextProperties, DeviceType}; +use paste::paste; use std::{ cmp, collections::HashSet, @@ -133,138 +137,290 @@ pub fn total_mem_v2(bytes: *mut usize, dev_idx: Index) -> Result<(), CUresult> { Ok(()) } -impl CUdevice_attribute { - fn get_static_value(self) -> Option<i32> { - match self { - CUdevice_attribute::CU_DEVICE_ATTRIBUTE_GPU_OVERLAP => Some(1), - CUdevice_attribute::CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT => Some(1), - // TODO: go back to this once we have more funcitonality implemented - CUdevice_attribute::CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR => Some(8), - CUdevice_attribute::CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR => Some(0), - CUdevice_attribute::CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY => Some(1), - _ => None, +#[allow(warnings)] +trait hipDeviceAttribute_t_ext { + const hipDeviceAttributeMaximumTexture1DWidth: hipDeviceAttribute_t = + hipDeviceAttribute_t::hipDeviceAttributeMaxTexture1DWidth; + const hipDeviceAttributeMaximumTexture2DWidth: hipDeviceAttribute_t = + hipDeviceAttribute_t::hipDeviceAttributeMaxTexture2DWidth; + const hipDeviceAttributeMaximumTexture2DHeight: hipDeviceAttribute_t = + hipDeviceAttribute_t::hipDeviceAttributeMaxTexture2DHeight; + const hipDeviceAttributeMaximumTexture3DWidth: hipDeviceAttribute_t = + hipDeviceAttribute_t::hipDeviceAttributeMaxTexture3DWidth; + const hipDeviceAttributeMaximumTexture3DHeight: hipDeviceAttribute_t = + hipDeviceAttribute_t::hipDeviceAttributeMaxTexture3DHeight; + const hipDeviceAttributeMaximumTexture3DDepth: hipDeviceAttribute_t = + hipDeviceAttribute_t::hipDeviceAttributeMaxTexture3DDepth; + const hipDeviceAttributeGlobalMemoryBusWidth: hipDeviceAttribute_t = + hipDeviceAttribute_t::hipDeviceAttributeMemoryBusWidth; + const hipDeviceAttributeMaxThreadsPerMultiprocessor: hipDeviceAttribute_t = + hipDeviceAttribute_t::hipDeviceAttributeMaxThreadsPerMultiProcessor; + const hipDeviceAttributeAsyncEngineCount: hipDeviceAttribute_t = + hipDeviceAttribute_t::hipDeviceAttributeConcurrentKernels; +} + +impl hipDeviceAttribute_t_ext for hipDeviceAttribute_t {} + +macro_rules! remap_attribute { + ($attrib:expr => $([ $($word:expr)* ]),*,) => { + match $attrib { + $( + paste! { CUdevice_attribute:: [< CU_DEVICE_ATTRIBUTE $(_ $word:upper)* >] } => { + paste! { hipDeviceAttribute_t:: [< hipDeviceAttribute $($word:camel)* >] } + } + )* + _ => return hipError_t::hipErrorInvalidValue } } } -pub fn get_attribute( - pi: *mut i32, - attrib: CUdevice_attribute, - dev_idx: Index, -) -> Result<(), CUresult> { +pub fn get_attribute(pi: *mut i32, attrib: CUdevice_attribute, dev_idx: c_int) -> hipError_t { if pi == ptr::null_mut() { - return Err(CUresult::CUDA_ERROR_INVALID_VALUE); + return hipError_t::hipErrorInvalidValue; } - if let Some(value) = attrib.get_static_value() { - unsafe { *pi = value }; - return Ok(()); - } - let value = match attrib { - CUdevice_attribute::CU_DEVICE_ATTRIBUTE_INTEGRATED => { - GlobalState::lock_device(dev_idx, |dev| if dev.is_amd { 0i32 } else { 1i32 })? + //let mut props = unsafe { mem::zeroed() }; + let hip_attrib = match attrib { + CUdevice_attribute::CU_DEVICE_ATTRIBUTE_GPU_OVERLAP + | CUdevice_attribute::CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING + | CUdevice_attribute::CU_DEVICE_ATTRIBUTE_STREAM_PRIORITIES_SUPPORTED + | CUdevice_attribute::CU_DEVICE_ATTRIBUTE_GLOBAL_L1_CACHE_SUPPORTED + | CUdevice_attribute::CU_DEVICE_ATTRIBUTE_LOCAL_L1_CACHE_SUPPORTED => { + unsafe { *pi = 1 }; + return hipError_t::hipSuccess; } - CUdevice_attribute::CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT => 1, - // Streaming Multiprocessor corresponds roughly to a sub-slice (thread group can't cross either) - CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT => { - GlobalState::lock_device(dev_idx, |dev| { - let props = - ocl_core::get_device_info(dev.ocl_base, ocl_core::DeviceInfo::MaxComputeUnits)?; - if let ocl_core::DeviceInfoResult::MaxComputeUnits(count) = props { - Ok(count as i32) - } else { - Err(CUresult::CUDA_ERROR_UNKNOWN) - } - })?? + CUdevice_attribute::CU_DEVICE_ATTRIBUTE_TCC_DRIVER + | CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_WIDTH + | CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_HEIGHT + | CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE + | CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE + | CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE + | CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_WIDTH + | CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH + | CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS + | CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_WIDTH + | CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH + | CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS + | CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH + | CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT + | CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH + | CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR + | CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD + | CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD_GROUP_ID => { + unsafe { *pi = 0 }; + return hipError_t::hipSuccess; } - // I honestly don't know how to answer this query - CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR => { - GlobalState::lock_device(dev_idx, |dev| { - if !dev.is_amd { - 7 // correct for GEN9 - } else { - 4i32 * 32 // probably correct for RDNA - } - })? + CUdevice_attribute::CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR => { + unsafe { *pi = 8 }; + return hipError_t::hipSuccess; } - CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK => { - GlobalState::lock_device(dev_idx, |dev| { - let props = ocl_core::get_device_info( - dev.ocl_base, - ocl_core::DeviceInfo::MaxWorkGroupSize, - )?; - if let ocl_core::DeviceInfoResult::MaxWorkGroupSize(size) = props { - Ok(size as i32) - } else { - Err(CUresult::CUDA_ERROR_UNKNOWN) - } - })?? + CUdevice_attribute::CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR => { + unsafe { *pi = 0 }; + return hipError_t::hipSuccess; } - CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X => { - GlobalState::lock_device(dev_idx, |dev| { - let props = ocl_core::get_device_info( - dev.ocl_base, - ocl_core::DeviceInfo::MaxWorkItemSizes, - )?; - if let ocl_core::DeviceInfoResult::MaxWorkItemSizes(sizes) = props { - Ok(sizes) - } else { - Err(CUresult::CUDA_ERROR_UNKNOWN) - } - })??[0] as i32 + // we assume that arrayed texts have the same limits + CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_WIDTH => { + hipDeviceAttribute_t::hipDeviceAttributeMaxTexture2DWidth } - CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y => { - GlobalState::lock_device(dev_idx, |dev| { - let props = ocl_core::get_device_info( - dev.ocl_base, - ocl_core::DeviceInfo::MaxWorkItemSizes, - )?; - if let ocl_core::DeviceInfoResult::MaxWorkItemSizes(sizes) = props { - Ok(sizes) - } else { - Err(CUresult::CUDA_ERROR_UNKNOWN) - } - })??[1] as i32 + CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_HEIGHT => { + hipDeviceAttribute_t::hipDeviceAttributeMaxTexture2DHeight } - CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z => { - GlobalState::lock_device(dev_idx, |dev| { - let props = ocl_core::get_device_info( - dev.ocl_base, - ocl_core::DeviceInfo::MaxWorkItemSizes, - )?; - if let ocl_core::DeviceInfoResult::MaxWorkItemSizes(sizes) = props { - Ok(sizes) - } else { - Err(CUresult::CUDA_ERROR_UNKNOWN) - } - })??[2] as i32 + CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH => { + hipDeviceAttribute_t::hipDeviceAttributeMaxTexture1DWidth } - CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK => { - GlobalState::lock_device(dev_idx, |dev| { - let props = - ocl_core::get_device_info(dev.ocl_base, ocl_core::DeviceInfo::LocalMemSize)?; - if let ocl_core::DeviceInfoResult::LocalMemSize(size) = props { - Ok(size) - } else { - Err(CUresult::CUDA_ERROR_UNKNOWN) - } - })?? as i32 + CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_WIDTH => { + hipDeviceAttribute_t::hipDeviceAttributeMaxTexture1DWidth } - CUdevice_attribute::CU_DEVICE_ATTRIBUTE_WARP_SIZE => 32, - _ => { - // TODO: support more attributes for CUDA runtime - /* - return Err(l0::Error( - l0::sys::ze_result_t::ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, - )) - */ - 0 + CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_WIDTH => { + hipDeviceAttribute_t::hipDeviceAttributeMaxTexture2DWidth } + CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_HEIGHT => { + hipDeviceAttribute_t::hipDeviceAttributeMaxTexture2DHeight + } + // we treat surface the same as texture + CUdevice_attribute::CU_DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT => { + hipDeviceAttribute_t::hipDeviceAttributeTextureAlignment + } + CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH => { + hipDeviceAttribute_t::hipDeviceAttributeMaxTexture1DWidth + } + CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH => { + hipDeviceAttribute_t::hipDeviceAttributeMaxTexture2DWidth + } + CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT => { + hipDeviceAttribute_t::hipDeviceAttributeMaxTexture2DHeight + } + CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH => { + hipDeviceAttribute_t::hipDeviceAttributeMaxTexture3DWidth + } + CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT => { + hipDeviceAttribute_t::hipDeviceAttributeMaxTexture3DHeight + } + CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH => { + hipDeviceAttribute_t::hipDeviceAttributeMaxTexture3DDepth + } + CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH => { + hipDeviceAttribute_t::hipDeviceAttributeMaxTexture2DWidth + } + CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT => { + hipDeviceAttribute_t::hipDeviceAttributeMaxTexture2DHeight + } + CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH => { + hipDeviceAttribute_t::hipDeviceAttributeMaxTexture1DWidth + } + // Totally made up + CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_NUMSLICES + | CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS + | CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_LAYERS + | CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_LAYERS => { + unsafe { *pi = u16::MAX as i32 }; + return hipError_t::hipSuccess; + } + // linear sizes + CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH => { + let mut prop = unsafe { mem::zeroed() }; + let err = unsafe { hipGetDeviceProperties(&mut prop, dev_idx) }; + if err != hipError_t::hipSuccess { + return err; + } + unsafe { *pi = prop.maxTexture1DLinear }; + return hipError_t::hipSuccess; + } + CUdevice_attribute::CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID => { + let mut prop = unsafe { mem::zeroed() }; + let err = unsafe { hipGetDeviceProperties(&mut prop, dev_idx) }; + if err != hipError_t::hipSuccess { + return err; + } + unsafe { *pi = prop.pciDomainID }; + return hipError_t::hipSuccess; + } + attrib => remap_attribute! { + attrib => + [MAX THREADS PER BLOCK], + [MAX BLOCK DIM X], + [MAX BLOCK DIM Y], + [MAX BLOCK DIM Z], + [MAX GRID DIM X], + [MAX GRID DIM Y], + [MAX GRID DIM Z], + [MAX SHARED MEMORY PER BLOCK], + [TOTAL CONSTANT MEMORY], + [WARP SIZE], + [MAX PITCH], + [MAX REGISTERS PER BLOCK], + [CLOCK RATE], + [TEXTURE ALIGNMENT], + //[GPU OVERLAP], + [MULTIPROCESSOR COUNT], + [KERNEL EXEC TIMEOUT], + [INTEGRATED], + [CAN MAP HOST MEMORY], + [COMPUTE MODE], + [MAXIMUM TEXTURE1D WIDTH], + [MAXIMUM TEXTURE2D WIDTH], + [MAXIMUM TEXTURE2D HEIGHT], + [MAXIMUM TEXTURE3D WIDTH], + [MAXIMUM TEXTURE3D HEIGHT], + [MAXIMUM TEXTURE3D DEPTH], + //[MAXIMUM TEXTURE2D LAYERED WIDTH], + //[MAXIMUM TEXTURE2D LAYERED HEIGHT], + //[MAXIMUM TEXTURE2D LAYERED LAYERS], + //[MAXIMUM TEXTURE2D ARRAY WIDTH], + //[MAXIMUM TEXTURE2D ARRAY HEIGHT], + //[MAXIMUM TEXTURE2D ARRAY NUMSLICES], + //[SURFACE ALIGNMENT], + [CONCURRENT KERNELS], + [ECC ENABLED], + [PCI BUS ID], + [PCI DEVICE ID], + //[TCC DRIVER], + [MEMORY CLOCK RATE], + [GLOBAL MEMORY BUS WIDTH], + [L2 CACHE SIZE], + [MAX THREADS PER MULTIPROCESSOR], + [ASYNC ENGINE COUNT], + //[UNIFIED ADDRESSING], + //[MAXIMUM TEXTURE1D LAYERED WIDTH], + //[MAXIMUM TEXTURE1D LAYERED LAYERS], + //[CAN TEX2D GATHER], + //[MAXIMUM TEXTURE2D GATHER WIDTH], + //[MAXIMUM TEXTURE2D GATHER HEIGHT], + //[MAXIMUM TEXTURE3D WIDTH ALTERNATE], + //[MAXIMUM TEXTURE3D HEIGHT ALTERNATE], + //[MAXIMUM TEXTURE3D DEPTH ALTERNATE], + //[PCI DOMAIN ID], + [TEXTURE PITCH ALIGNMENT], + //[MAXIMUM TEXTURECUBEMAP WIDTH], + //[MAXIMUM TEXTURECUBEMAP LAYERED WIDTH], + //[MAXIMUM TEXTURECUBEMAP LAYERED LAYERS], + //[MAXIMUM SURFACE1D WIDTH], + //[MAXIMUM SURFACE2D WIDTH], + //[MAXIMUM SURFACE2D HEIGHT], + //[MAXIMUM SURFACE3D WIDTH], + //[MAXIMUM SURFACE3D HEIGHT], + //[MAXIMUM SURFACE3D DEPTH], + //[MAXIMUM SURFACE1D LAYERED WIDTH], + //[MAXIMUM SURFACE1D LAYERED LAYERS], + //[MAXIMUM SURFACE2D LAYERED WIDTH], + //[MAXIMUM SURFACE2D LAYERED HEIGHT], + //[MAXIMUM SURFACE2D LAYERED LAYERS], + //[MAXIMUM SURFACECUBEMAP WIDTH], + //[MAXIMUM SURFACECUBEMAP LAYERED WIDTH], + //[MAXIMUM SURFACECUBEMAP LAYERED LAYERS], + //[MAXIMUM TEXTURE1D LINEAR WIDTH], + //[MAXIMUM TEXTURE2D LINEAR WIDTH], + //[MAXIMUM TEXTURE2D LINEAR HEIGHT], + //[MAXIMUM TEXTURE2D LINEAR PITCH], + //[MAXIMUM TEXTURE2D MIPMAPPED WIDTH], + //[MAXIMUM TEXTURE2D MIPMAPPED HEIGHT], + //[COMPUTE CAPABILITY MAJOR], + //[COMPUTE CAPABILITY MINOR], + //[MAXIMUM TEXTURE1D MIPMAPPED WIDTH], + //[STREAM PRIORITIES SUPPORTED], + //[GLOBAL L1 CACHE SUPPORTED], + //[LOCAL L1 CACHE SUPPORTED], + [MAX SHARED MEMORY PER MULTIPROCESSOR], + //[MAX REGISTERS PER MULTIPROCESSOR], + [MANAGED MEMORY], + //[MULTI GPU BOARD], + //[MULTI GPU BOARD GROUP ID], + //[HOST NATIVE ATOMIC SUPPORTED], + //[SINGLE TO DOUBLE PRECISION PERF RATIO], + [PAGEABLE MEMORY ACCESS], + [CONCURRENT MANAGED ACCESS], + //[COMPUTE PREEMPTION SUPPORTED], + //[CAN USE HOST POINTER FOR REGISTERED MEM], + //[CAN USE STREAM MEM OPS], + //[CAN USE 64 BIT STREAM MEM OPS], + //[CAN USE STREAM WAIT VALUE NOR], + [COOPERATIVE LAUNCH], + [COOPERATIVE MULTI DEVICE LAUNCH], + //[MAX SHARED MEMORY PER BLOCK OPTIN], + //[CAN FLUSH REMOTE WRITES], + //[HOST REGISTER SUPPORTED], + [PAGEABLE MEMORY ACCESS USES HOST PAGE TABLES], + [DIRECT MANAGED MEM ACCESS FROM HOST], + //[VIRTUAL ADDRESS MANAGEMENT SUPPORTED], + //[VIRTUAL MEMORY MANAGEMENT SUPPORTED], + //[HANDLE TYPE POSIX FILE DESCRIPTOR SUPPORTED], + //[HANDLE TYPE WIN32 HANDLE SUPPORTED], + //[HANDLE TYPE WIN32 KMT HANDLE SUPPORTED], + //[MAX BLOCKS PER MULTIPROCESSOR], + //[GENERIC COMPRESSION SUPPORTED], + //[MAX PERSISTING L2 CACHE SIZE], + //[MAX ACCESS POLICY WINDOW SIZE], + //[GPU DIRECT RDMA WITH CUDA VMM SUPPORTED], + //[RESERVED SHARED MEMORY PER BLOCK], + //[SPARSE CUDA ARRAY SUPPORTED], + //[READ ONLY HOST REGISTER SUPPORTED], + //[TIMELINE SEMAPHORE INTEROP SUPPORTED], + //[MEMORY POOLS SUPPORTED], + }, }; - unsafe { *pi = value }; - Ok(()) + unsafe { hipDeviceGetAttribute(pi, hip_attrib, dev_idx) } } -pub fn get_uuid(uuid: *mut CUuuid_st, dev_idx: Index) -> Result<(), CUresult> { +pub fn get_uuid(uuid: *mut CUuuid_st, _: Index) -> Result<(), CUresult> { unsafe { *uuid = CUuuid_st { bytes: mem::zeroed(), diff --git a/zluda/src/impl/mod.rs b/zluda/src/impl/mod.rs index f157002..bb32937 100644 --- a/zluda/src/impl/mod.rs +++ b/zluda/src/impl/mod.rs @@ -148,6 +148,16 @@ impl From<ocl_core::Error> for CUresult { } } +impl From<hip_runtime_sys::hipError_t> for CUresult { + fn from(result: hip_runtime_sys::hipError_t) -> Self { + match result { + hip_runtime_sys::hipError_t::hipErrorRuntimeMemory + | hip_runtime_sys::hipError_t::hipErrorRuntimeOther => CUresult::CUDA_ERROR_UNKNOWN, + hip_runtime_sys::hipError_t(e) => CUresult(e), + } + } +} + pub trait Encuda { type To: Sized; fn encuda(self: Self) -> Self::To; |