diff options
-rw-r--r-- | zluda/Cargo.toml | 2 | ||||
-rw-r--r-- | zluda/src/impl/context.rs | 4 | ||||
-rw-r--r-- | zluda/src/impl/device.rs | 4 | ||||
-rw-r--r-- | zluda/src/impl/mod.rs | 101 | ||||
-rw-r--r-- | zluda/src/impl/module.rs | 280 | ||||
-rw-r--r-- | zluda/src/lib.rs | 9 |
6 files changed, 152 insertions, 248 deletions
diff --git a/zluda/Cargo.toml b/zluda/Cargo.toml index ab87b6c..0a4c406 100644 --- a/zluda/Cargo.toml +++ b/zluda/Cargo.toml @@ -9,6 +9,8 @@ name = "nvcuda" crate-type = ["cdylib"] [dependencies] +comgr = { path = "../comgr" } +ptx_parser = { path = "../ptx_parser" } ptx = { path = "../ptx" } cuda_types = { path = "../cuda_types" } cuda_base = { path = "../cuda_base" } diff --git a/zluda/src/impl/context.rs b/zluda/src/impl/context.rs index 61cb92e..d1a135f 100644 --- a/zluda/src/impl/context.rs +++ b/zluda/src/impl/context.rs @@ -7,3 +7,7 @@ pub(crate) unsafe fn get_limit(pvalue: *mut usize, limit: hipLimit_t) -> hipErro pub(crate) fn set_limit(limit: hipLimit_t, value: usize) -> hipError_t { unsafe { hipDeviceSetLimit(limit, value) } } + +pub(crate) fn synchronize() -> hipError_t { + unsafe { hipDeviceSynchronize() } +} diff --git a/zluda/src/impl/device.rs b/zluda/src/impl/device.rs index 9782aad..a2a56c9 100644 --- a/zluda/src/impl/device.rs +++ b/zluda/src/impl/device.rs @@ -300,6 +300,10 @@ pub(crate) fn get_properties(prop: &mut cuda_types::CUdevprop, dev: hipDevice_t) Ok(()) } +pub(crate) fn get_count(count: &mut ::core::ffi::c_int) -> hipError_t { + unsafe { hipGetDeviceCount(count) } +} + fn clamp_usize(x: usize) -> i32 { usize::min(x, i32::MAX as usize) as i32 } diff --git a/zluda/src/impl/mod.rs b/zluda/src/impl/mod.rs index b2e8fc7..0400006 100644 --- a/zluda/src/impl/mod.rs +++ b/zluda/src/impl/mod.rs @@ -1,8 +1,10 @@ use cuda_types::*; use hip_runtime_sys::*; +use std::mem::{self, ManuallyDrop}; pub(super) mod context; pub(super) mod device; +pub(super) mod module; #[cfg(debug_assertions)] pub(crate) fn unimplemented() -> CUresult { @@ -66,9 +68,38 @@ macro_rules! from_cuda_transmute { }; } +macro_rules! from_cuda_object { + ($($type_:ty),*) => { + $( + impl<'a> FromCuda<'a, <$type_ as ZludaObject>::CudaHandle> for <$type_ as ZludaObject>::CudaHandle { + fn from_cuda(handle: &'a <$type_ as ZludaObject>::CudaHandle) -> Result<<$type_ as ZludaObject>::CudaHandle, CUerror> { + Ok(*handle) + } + } + + impl<'a> FromCuda<'a, *mut <$type_ as ZludaObject>::CudaHandle> for &'a mut <$type_ as ZludaObject>::CudaHandle { + fn from_cuda(handle: &'a *mut <$type_ as ZludaObject>::CudaHandle) -> Result<&'a mut <$type_ as ZludaObject>::CudaHandle, CUerror> { + match unsafe { handle.as_mut() } { + Some(x) => Ok(x), + None => Err(CUerror::INVALID_VALUE), + } + } + } + + impl<'a> FromCuda<'a, <$type_ as ZludaObject>::CudaHandle> for &'a $type_ { + fn from_cuda(handle: &'a <$type_ as ZludaObject>::CudaHandle) -> Result<&'a $type_, CUerror> { + Ok(as_ref(handle).as_result()?) + } + } + )* + }; +} + from_cuda_nop!( *mut i8, *mut usize, + *const std::ffi::c_void, + *const ::core::ffi::c_char, i32, u32, usize, @@ -77,8 +108,10 @@ from_cuda_nop!( ); from_cuda_transmute!( CUdevice => hipDevice_t, - CUuuid => hipUUID + CUuuid => hipUUID, + CUfunction => hipFunction_t ); +from_cuda_object!(module::Module); impl<'a> FromCuda<'a, CUlimit> for hipLimit_t { fn from_cuda(limit: &'a CUlimit) -> Result<Self, CUerror> { @@ -91,6 +124,72 @@ impl<'a> FromCuda<'a, CUlimit> for hipLimit_t { } } +pub(crate) trait ZludaObject: Sized + Send + Sync { + const COOKIE: usize; + const LIVENESS_FAIL: CUerror = cuda_types::CUerror::INVALID_VALUE; + + type CudaHandle: Sized; + + fn drop_checked(&mut self) -> CUresult; + + fn wrap(self) -> Self::CudaHandle { + unsafe { mem::transmute_copy(&LiveCheck::wrap(self)) } + } +} + +#[repr(C)] +pub(crate) struct LiveCheck<T: ZludaObject> { + cookie: usize, + data: ManuallyDrop<T>, +} + +impl<T: ZludaObject> LiveCheck<T> { + fn wrap(data: T) -> *mut Self { + Box::into_raw(Box::new(LiveCheck { + cookie: T::COOKIE, + data: ManuallyDrop::new(data), + })) + } + + fn as_result(&self) -> Result<&T, CUerror> { + if self.cookie == T::COOKIE { + Ok(&self.data) + } else { + Err(T::LIVENESS_FAIL) + } + } + + // This looks like nonsense, but it's not. There are two cases: + // Err(CUerror) -> meaning that the object is invalid, this pointer does not point into valid memory + // Ok(maybe_error) -> meaning that the object is valid, we dropped everything, but there *might* + // an error in the underlying runtime that we want to propagate + #[must_use] + fn drop_checked(&mut self) -> Result<Result<(), CUerror>, CUerror> { + if self.cookie == T::COOKIE { + self.cookie = 0; + let result = self.data.drop_checked(); + unsafe { ManuallyDrop::drop(&mut self.data) }; + Ok(result) + } else { + Err(T::LIVENESS_FAIL) + } + } +} + +pub fn as_ref<'a, T: ZludaObject>( + handle: &'a T::CudaHandle, +) -> &'a ManuallyDrop<Box<LiveCheck<T>>> { + unsafe { mem::transmute(handle) } +} + +pub fn drop_checked<T: ZludaObject>(handle: T::CudaHandle) -> Result<(), CUerror> { + let mut wrapped_object: ManuallyDrop<Box<LiveCheck<T>>> = + unsafe { mem::transmute_copy(&handle) }; + let underlying_error = LiveCheck::drop_checked(&mut wrapped_object)?; + unsafe { ManuallyDrop::drop(&mut wrapped_object) }; + underlying_error +} + pub(crate) fn init(flags: ::core::ffi::c_uint) -> hipError_t { unsafe { hipInit(flags) } } diff --git a/zluda/src/impl/module.rs b/zluda/src/impl/module.rs index 24fa88a..8b19c1b 100644 --- a/zluda/src/impl/module.rs +++ b/zluda/src/impl/module.rs @@ -1,261 +1,53 @@ -use std::borrow::Cow; -use std::collections::HashMap; -use std::ffi::{CStr, CString}; -use std::fs::File; -use std::io::{self, Read, Write}; -use std::ops::Add; -use std::os::raw::c_char; -use std::path::{Path, PathBuf}; -use std::process::Command; -use std::{env, fs, iter, mem, ptr, slice}; +use super::ZludaObject; +use cuda_types::*; +use hip_runtime_sys::*; +use std::{ffi::CStr, mem}; -use hip_runtime_sys::{ - hipCtxGetCurrent, hipCtxGetDevice, hipDeviceGetAttribute, hipDeviceGetName, hipDeviceProp_t, - hipError_t, hipGetDeviceProperties, hipGetStreamDeviceId, hipModuleLoadData, -}; -use tempfile::NamedTempFile; - -use crate::cuda::CUmodule; -use crate::hip_call; - -pub struct SpirvModule { - pub binaries: Vec<u32>, - pub kernel_info: HashMap<String, ptx::KernelInfo>, - pub should_link_ptx_impl: Option<(&'static [u8], &'static [u8])>, - pub build_options: CString, +pub(crate) struct Module { + base: hipModule_t, } -impl SpirvModule { - pub fn new_raw<'a>(text: *const c_char) -> Result<Self, hipError_t> { - let u8_text = unsafe { CStr::from_ptr(text) }; - let ptx_text = u8_text - .to_str() - .map_err(|_| hipError_t::hipErrorInvalidImage)?; - Self::new(ptx_text) - } +impl ZludaObject for Module { + const COOKIE: usize = 0xe9138bd040487d4a; - pub fn new<'a>(ptx_text: &str) -> Result<Self, hipError_t> { - let mut errors = Vec::new(); - let ast = ptx::ModuleParser::new() - .parse(&mut errors, ptx_text) - .map_err(|_| hipError_t::hipErrorInvalidImage)?; - if errors.len() > 0 { - return Err(hipError_t::hipErrorInvalidImage); - } - let spirv_module = - ptx::to_spirv_module(ast).map_err(|_| hipError_t::hipErrorInvalidImage)?; - Ok(SpirvModule { - binaries: spirv_module.assemble(), - kernel_info: spirv_module.kernel_info, - should_link_ptx_impl: spirv_module.should_link_ptx_impl, - build_options: spirv_module.build_options, - }) - } -} + type CudaHandle = CUmodule; -pub(crate) fn load(module: *mut CUmodule, fname: *const i8) -> Result<(), hipError_t> { - let file_name = unsafe { CStr::from_ptr(fname) } - .to_str() - .map_err(|_| hipError_t::hipErrorInvalidValue)?; - let mut file = File::open(file_name).map_err(|_| hipError_t::hipErrorFileNotFound)?; - let mut file_buffer = Vec::new(); - file.read_to_end(&mut file_buffer) - .map_err(|_| hipError_t::hipErrorUnknown)?; - let result = load_data(module, file_buffer.as_ptr() as _); - drop(file_buffer); - result -} - -pub(crate) fn load_data( - module: *mut CUmodule, - image: *const std::ffi::c_void, -) -> Result<(), hipError_t> { - if image == ptr::null() { - return Err(hipError_t::hipErrorInvalidValue); - } - if unsafe { *(image as *const u32) } == 0x464c457f { - return match unsafe { hipModuleLoadData(module as _, image) } { - hipError_t::hipSuccess => Ok(()), - e => Err(e), - }; + fn drop_checked(&mut self) -> CUresult { + unsafe { hipModuleUnload(self.base) }?; + Ok(()) } - let spirv_data = SpirvModule::new_raw(image as *const _)?; - load_data_impl(module, spirv_data) } -pub fn load_data_impl(pmod: *mut CUmodule, spirv_data: SpirvModule) -> Result<(), hipError_t> { +pub(crate) fn load_data(module: &mut CUmodule, image: *const std::ffi::c_void) -> CUresult { + let text = unsafe { CStr::from_ptr(image.cast()) } + .to_str() + .map_err(|_| CUerror::INVALID_VALUE)?; + let ast = ptx_parser::parse_module_checked(text).map_err(|_| CUerror::NO_BINARY_FOR_GPU)?; + let llvm_module = ptx::to_llvm_module(ast).map_err(|_| CUerror::UNKNOWN)?; let mut dev = 0; - hip_call! { hipCtxGetDevice(&mut dev) }; + unsafe { hipCtxGetDevice(&mut dev) }?; let mut props = unsafe { mem::zeroed() }; - hip_call! { hipGetDeviceProperties(&mut props, dev) }; - let arch_binary = compile_amd( - &props, - iter::once(&spirv_data.binaries[..]), - spirv_data.should_link_ptx_impl, + unsafe { hipGetDevicePropertiesR0600(&mut props, dev) }?; + let elf_module = comgr::compile_bitcode( + unsafe { CStr::from_ptr(props.gcnArchName.as_ptr()) }, + &*llvm_module.llvm_ir, + llvm_module.linked_bitcode(), ) - .map_err(|_| hipError_t::hipErrorUnknown)?; - hip_call! { hipModuleLoadData(pmod as _, arch_binary.as_ptr() as _) }; + .map_err(|_| CUerror::UNKNOWN)?; + let mut hip_module = unsafe { mem::zeroed() }; + unsafe { hipModuleLoadData(&mut hip_module, elf_module.as_ptr().cast()) }?; + *module = Module { base: hip_module }.wrap(); Ok(()) } -const LLVM_SPIRV: &'static str = "/home/vosen/amd/llvm-project/build/bin/llvm-spirv"; -const AMDGPU: &'static str = "/opt/rocm/"; -const AMDGPU_TARGET: &'static str = "amdgcn-amd-amdhsa"; -const AMDGPU_BITCODE: [&'static str; 8] = [ - "opencl.bc", - "ocml.bc", - "ockl.bc", - "oclc_correctly_rounded_sqrt_off.bc", - "oclc_daz_opt_on.bc", - "oclc_finite_only_off.bc", - "oclc_unsafe_math_off.bc", - "oclc_wavefrontsize64_off.bc", -]; -const AMDGPU_BITCODE_DEVICE_PREFIX: &'static str = "oclc_isa_version_"; - -pub(crate) fn compile_amd<'a>( - device_pros: &hipDeviceProp_t, - spirv_il: impl Iterator<Item = &'a [u32]>, - ptx_lib: Option<(&'static [u8], &'static [u8])>, -) -> io::Result<Vec<u8>> { - let null_terminator = device_pros - .gcnArchName - .iter() - .position(|&x| x == 0) - .unwrap(); - let gcn_arch_slice = unsafe { - slice::from_raw_parts(device_pros.gcnArchName.as_ptr() as _, null_terminator + 1) - }; - let device_name = - if let Ok(Ok(name)) = CStr::from_bytes_with_nul(gcn_arch_slice).map(|x| x.to_str()) { - name - } else { - return Err(io::Error::new(io::ErrorKind::Other, "")); - }; - let dir = tempfile::tempdir()?; - let llvm_spirv_path = match env::var("LLVM_SPIRV") { - Ok(path) => Cow::Owned(path), - Err(_) => Cow::Borrowed(LLVM_SPIRV), - }; - let llvm_files = spirv_il - .map(|spirv| { - let mut spirv_file = NamedTempFile::new_in(&dir)?; - let spirv_u8 = unsafe { - slice::from_raw_parts( - spirv.as_ptr() as *const u8, - spirv.len() * mem::size_of::<u32>(), - ) - }; - spirv_file.write_all(spirv_u8)?; - if cfg!(debug_assertions) { - persist_file(spirv_file.path())?; - } - let llvm = NamedTempFile::new_in(&dir)?; - let to_llvm_cmd = Command::new(&*llvm_spirv_path) - //.arg("--spirv-debug") - .arg("-r") - .arg("-o") - .arg(llvm.path()) - .arg(spirv_file.path()) - .status()?; - assert!(to_llvm_cmd.success()); - if cfg!(debug_assertions) { - persist_file(llvm.path())?; - } - Ok::<_, io::Error>(llvm) - }) - .collect::<Result<Vec<_>, _>>()?; - let linked_binary = NamedTempFile::new_in(&dir)?; - let mut llvm_link = PathBuf::from(AMDGPU); - llvm_link.push("llvm"); - llvm_link.push("bin"); - llvm_link.push("llvm-link"); - let mut linker_cmd = Command::new(&llvm_link); - linker_cmd - .arg("-o") - .arg(linked_binary.path()) - .args(llvm_files.iter().map(|f| f.path())) - .args(get_bitcode_paths(device_name)); - if cfg!(debug_assertions) { - linker_cmd.arg("-v"); - } - let status = linker_cmd.status()?; - assert!(status.success()); - if cfg!(debug_assertions) { - persist_file(linked_binary.path())?; - } - let mut ptx_lib_bitcode = NamedTempFile::new_in(&dir)?; - let compiled_binary = NamedTempFile::new_in(&dir)?; - let mut clang_exe = PathBuf::from(AMDGPU); - clang_exe.push("llvm"); - clang_exe.push("bin"); - clang_exe.push("clang"); - let mut compiler_cmd = Command::new(&clang_exe); - compiler_cmd - .arg(format!("-mcpu={}", device_name)) - .arg("-ffp-contract=off") - .arg("-nogpulib") - .arg("-mno-wavefrontsize64") - .arg("-O3") - .arg("-Xclang") - .arg("-O3") - .arg("-Xlinker") - .arg("--no-undefined") - .arg("-target") - .arg(AMDGPU_TARGET) - .arg("-o") - .arg(compiled_binary.path()) - .arg("-x") - .arg("ir") - .arg(linked_binary.path()); - if let Some((_, bitcode)) = ptx_lib { - ptx_lib_bitcode.write_all(bitcode)?; - compiler_cmd.arg(ptx_lib_bitcode.path()); - }; - if cfg!(debug_assertions) { - compiler_cmd.arg("-v"); - } - let status = compiler_cmd.status()?; - assert!(status.success()); - let mut result = Vec::new(); - let compiled_bin_path = compiled_binary.path(); - let mut compiled_binary = File::open(compiled_bin_path)?; - compiled_binary.read_to_end(&mut result)?; - if cfg!(debug_assertions) { - persist_file(compiled_bin_path)?; - } - Ok(result) -} - -fn persist_file(path: &Path) -> io::Result<()> { - let mut persistent = PathBuf::from("/tmp/zluda"); - std::fs::create_dir_all(&persistent)?; - persistent.push(path.file_name().unwrap()); - std::fs::copy(path, persistent)?; - Ok(()) +pub(crate) fn unload(hmod: CUmodule) -> CUresult { + super::drop_checked::<Module>(hmod) } -fn get_bitcode_paths(device_name: &str) -> impl Iterator<Item = PathBuf> { - let generic_paths = AMDGPU_BITCODE.iter().map(|x| { - let mut path = PathBuf::from(AMDGPU); - path.push("amdgcn"); - path.push("bitcode"); - path.push(x); - path - }); - let suffix = if let Some(suffix_idx) = device_name.find(':') { - suffix_idx - } else { - device_name.len() - }; - let mut additional_path = PathBuf::from(AMDGPU); - additional_path.push("amdgcn"); - additional_path.push("bitcode"); - additional_path.push(format!( - "{}{}{}", - AMDGPU_BITCODE_DEVICE_PREFIX, - &device_name[3..suffix], - ".bc" - )); - generic_paths.chain(std::iter::once(additional_path)) +pub(crate) fn get_function( + hfunc: &mut hipFunction_t, + hmod: &Module, + name: *const ::core::ffi::c_char, +) -> hipError_t { + unsafe { hipModuleGetFunction(hfunc, hmod.base, name) } } diff --git a/zluda/src/lib.rs b/zluda/src/lib.rs index 942c7e4..12d6ce0 100644 --- a/zluda/src/lib.rs +++ b/zluda/src/lib.rs @@ -27,16 +27,16 @@ macro_rules! implemented { }; } - -use cuda_base::cuda_function_declarations; -cuda_function_declarations!( +cuda_base::cuda_function_declarations!( unimplemented, implemented <= [ cuCtxGetLimit, cuCtxSetLimit, + cuCtxSynchronize, cuDeviceComputeCapability, cuDeviceGet, cuDeviceGetAttribute, + cuDeviceGetCount, cuDeviceGetLuid, cuDeviceGetName, cuDeviceGetProperties, @@ -44,5 +44,8 @@ cuda_function_declarations!( cuDeviceGetUuid_v2, cuDeviceTotalMem_v2, cuInit, + cuModuleGetFunction, + cuModuleLoadData, + cuModuleUnload, ] );
\ No newline at end of file |