aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorAndrzej Janik <[email protected]>2024-11-22 18:57:19 +0100
committerAndrzej Janik <[email protected]>2024-11-22 18:57:19 +0100
commit9f677e23c022955d552f2d530488ef51a95f0d6c (patch)
treefe1e5c51594e03e365bebaa51bc3dc66ccefaf1f
parent3ec7bffdc5706b4302e5b39411354f09dec02421 (diff)
downloadZLUDA-9f677e23c022955d552f2d530488ef51a95f0d6c.tar.gz
ZLUDA-9f677e23c022955d552f2d530488ef51a95f0d6c.zip
Add basic cuModule*, add handful of missing stuff
-rw-r--r--zluda/Cargo.toml2
-rw-r--r--zluda/src/impl/context.rs4
-rw-r--r--zluda/src/impl/device.rs4
-rw-r--r--zluda/src/impl/mod.rs101
-rw-r--r--zluda/src/impl/module.rs280
-rw-r--r--zluda/src/lib.rs9
6 files changed, 152 insertions, 248 deletions
diff --git a/zluda/Cargo.toml b/zluda/Cargo.toml
index ab87b6c..0a4c406 100644
--- a/zluda/Cargo.toml
+++ b/zluda/Cargo.toml
@@ -9,6 +9,8 @@ name = "nvcuda"
crate-type = ["cdylib"]
[dependencies]
+comgr = { path = "../comgr" }
+ptx_parser = { path = "../ptx_parser" }
ptx = { path = "../ptx" }
cuda_types = { path = "../cuda_types" }
cuda_base = { path = "../cuda_base" }
diff --git a/zluda/src/impl/context.rs b/zluda/src/impl/context.rs
index 61cb92e..d1a135f 100644
--- a/zluda/src/impl/context.rs
+++ b/zluda/src/impl/context.rs
@@ -7,3 +7,7 @@ pub(crate) unsafe fn get_limit(pvalue: *mut usize, limit: hipLimit_t) -> hipErro
pub(crate) fn set_limit(limit: hipLimit_t, value: usize) -> hipError_t {
unsafe { hipDeviceSetLimit(limit, value) }
}
+
+pub(crate) fn synchronize() -> hipError_t {
+ unsafe { hipDeviceSynchronize() }
+}
diff --git a/zluda/src/impl/device.rs b/zluda/src/impl/device.rs
index 9782aad..a2a56c9 100644
--- a/zluda/src/impl/device.rs
+++ b/zluda/src/impl/device.rs
@@ -300,6 +300,10 @@ pub(crate) fn get_properties(prop: &mut cuda_types::CUdevprop, dev: hipDevice_t)
Ok(())
}
+pub(crate) fn get_count(count: &mut ::core::ffi::c_int) -> hipError_t {
+ unsafe { hipGetDeviceCount(count) }
+}
+
fn clamp_usize(x: usize) -> i32 {
usize::min(x, i32::MAX as usize) as i32
}
diff --git a/zluda/src/impl/mod.rs b/zluda/src/impl/mod.rs
index b2e8fc7..0400006 100644
--- a/zluda/src/impl/mod.rs
+++ b/zluda/src/impl/mod.rs
@@ -1,8 +1,10 @@
use cuda_types::*;
use hip_runtime_sys::*;
+use std::mem::{self, ManuallyDrop};
pub(super) mod context;
pub(super) mod device;
+pub(super) mod module;
#[cfg(debug_assertions)]
pub(crate) fn unimplemented() -> CUresult {
@@ -66,9 +68,38 @@ macro_rules! from_cuda_transmute {
};
}
+macro_rules! from_cuda_object {
+ ($($type_:ty),*) => {
+ $(
+ impl<'a> FromCuda<'a, <$type_ as ZludaObject>::CudaHandle> for <$type_ as ZludaObject>::CudaHandle {
+ fn from_cuda(handle: &'a <$type_ as ZludaObject>::CudaHandle) -> Result<<$type_ as ZludaObject>::CudaHandle, CUerror> {
+ Ok(*handle)
+ }
+ }
+
+ impl<'a> FromCuda<'a, *mut <$type_ as ZludaObject>::CudaHandle> for &'a mut <$type_ as ZludaObject>::CudaHandle {
+ fn from_cuda(handle: &'a *mut <$type_ as ZludaObject>::CudaHandle) -> Result<&'a mut <$type_ as ZludaObject>::CudaHandle, CUerror> {
+ match unsafe { handle.as_mut() } {
+ Some(x) => Ok(x),
+ None => Err(CUerror::INVALID_VALUE),
+ }
+ }
+ }
+
+ impl<'a> FromCuda<'a, <$type_ as ZludaObject>::CudaHandle> for &'a $type_ {
+ fn from_cuda(handle: &'a <$type_ as ZludaObject>::CudaHandle) -> Result<&'a $type_, CUerror> {
+ Ok(as_ref(handle).as_result()?)
+ }
+ }
+ )*
+ };
+}
+
from_cuda_nop!(
*mut i8,
*mut usize,
+ *const std::ffi::c_void,
+ *const ::core::ffi::c_char,
i32,
u32,
usize,
@@ -77,8 +108,10 @@ from_cuda_nop!(
);
from_cuda_transmute!(
CUdevice => hipDevice_t,
- CUuuid => hipUUID
+ CUuuid => hipUUID,
+ CUfunction => hipFunction_t
);
+from_cuda_object!(module::Module);
impl<'a> FromCuda<'a, CUlimit> for hipLimit_t {
fn from_cuda(limit: &'a CUlimit) -> Result<Self, CUerror> {
@@ -91,6 +124,72 @@ impl<'a> FromCuda<'a, CUlimit> for hipLimit_t {
}
}
+pub(crate) trait ZludaObject: Sized + Send + Sync {
+ const COOKIE: usize;
+ const LIVENESS_FAIL: CUerror = cuda_types::CUerror::INVALID_VALUE;
+
+ type CudaHandle: Sized;
+
+ fn drop_checked(&mut self) -> CUresult;
+
+ fn wrap(self) -> Self::CudaHandle {
+ unsafe { mem::transmute_copy(&LiveCheck::wrap(self)) }
+ }
+}
+
+#[repr(C)]
+pub(crate) struct LiveCheck<T: ZludaObject> {
+ cookie: usize,
+ data: ManuallyDrop<T>,
+}
+
+impl<T: ZludaObject> LiveCheck<T> {
+ fn wrap(data: T) -> *mut Self {
+ Box::into_raw(Box::new(LiveCheck {
+ cookie: T::COOKIE,
+ data: ManuallyDrop::new(data),
+ }))
+ }
+
+ fn as_result(&self) -> Result<&T, CUerror> {
+ if self.cookie == T::COOKIE {
+ Ok(&self.data)
+ } else {
+ Err(T::LIVENESS_FAIL)
+ }
+ }
+
+ // This looks like nonsense, but it's not. There are two cases:
+ // Err(CUerror) -> meaning that the object is invalid, this pointer does not point into valid memory
+ // Ok(maybe_error) -> meaning that the object is valid, we dropped everything, but there *might*
+ // an error in the underlying runtime that we want to propagate
+ #[must_use]
+ fn drop_checked(&mut self) -> Result<Result<(), CUerror>, CUerror> {
+ if self.cookie == T::COOKIE {
+ self.cookie = 0;
+ let result = self.data.drop_checked();
+ unsafe { ManuallyDrop::drop(&mut self.data) };
+ Ok(result)
+ } else {
+ Err(T::LIVENESS_FAIL)
+ }
+ }
+}
+
+pub fn as_ref<'a, T: ZludaObject>(
+ handle: &'a T::CudaHandle,
+) -> &'a ManuallyDrop<Box<LiveCheck<T>>> {
+ unsafe { mem::transmute(handle) }
+}
+
+pub fn drop_checked<T: ZludaObject>(handle: T::CudaHandle) -> Result<(), CUerror> {
+ let mut wrapped_object: ManuallyDrop<Box<LiveCheck<T>>> =
+ unsafe { mem::transmute_copy(&handle) };
+ let underlying_error = LiveCheck::drop_checked(&mut wrapped_object)?;
+ unsafe { ManuallyDrop::drop(&mut wrapped_object) };
+ underlying_error
+}
+
pub(crate) fn init(flags: ::core::ffi::c_uint) -> hipError_t {
unsafe { hipInit(flags) }
}
diff --git a/zluda/src/impl/module.rs b/zluda/src/impl/module.rs
index 24fa88a..8b19c1b 100644
--- a/zluda/src/impl/module.rs
+++ b/zluda/src/impl/module.rs
@@ -1,261 +1,53 @@
-use std::borrow::Cow;
-use std::collections::HashMap;
-use std::ffi::{CStr, CString};
-use std::fs::File;
-use std::io::{self, Read, Write};
-use std::ops::Add;
-use std::os::raw::c_char;
-use std::path::{Path, PathBuf};
-use std::process::Command;
-use std::{env, fs, iter, mem, ptr, slice};
+use super::ZludaObject;
+use cuda_types::*;
+use hip_runtime_sys::*;
+use std::{ffi::CStr, mem};
-use hip_runtime_sys::{
- hipCtxGetCurrent, hipCtxGetDevice, hipDeviceGetAttribute, hipDeviceGetName, hipDeviceProp_t,
- hipError_t, hipGetDeviceProperties, hipGetStreamDeviceId, hipModuleLoadData,
-};
-use tempfile::NamedTempFile;
-
-use crate::cuda::CUmodule;
-use crate::hip_call;
-
-pub struct SpirvModule {
- pub binaries: Vec<u32>,
- pub kernel_info: HashMap<String, ptx::KernelInfo>,
- pub should_link_ptx_impl: Option<(&'static [u8], &'static [u8])>,
- pub build_options: CString,
+pub(crate) struct Module {
+ base: hipModule_t,
}
-impl SpirvModule {
- pub fn new_raw<'a>(text: *const c_char) -> Result<Self, hipError_t> {
- let u8_text = unsafe { CStr::from_ptr(text) };
- let ptx_text = u8_text
- .to_str()
- .map_err(|_| hipError_t::hipErrorInvalidImage)?;
- Self::new(ptx_text)
- }
+impl ZludaObject for Module {
+ const COOKIE: usize = 0xe9138bd040487d4a;
- pub fn new<'a>(ptx_text: &str) -> Result<Self, hipError_t> {
- let mut errors = Vec::new();
- let ast = ptx::ModuleParser::new()
- .parse(&mut errors, ptx_text)
- .map_err(|_| hipError_t::hipErrorInvalidImage)?;
- if errors.len() > 0 {
- return Err(hipError_t::hipErrorInvalidImage);
- }
- let spirv_module =
- ptx::to_spirv_module(ast).map_err(|_| hipError_t::hipErrorInvalidImage)?;
- Ok(SpirvModule {
- binaries: spirv_module.assemble(),
- kernel_info: spirv_module.kernel_info,
- should_link_ptx_impl: spirv_module.should_link_ptx_impl,
- build_options: spirv_module.build_options,
- })
- }
-}
+ type CudaHandle = CUmodule;
-pub(crate) fn load(module: *mut CUmodule, fname: *const i8) -> Result<(), hipError_t> {
- let file_name = unsafe { CStr::from_ptr(fname) }
- .to_str()
- .map_err(|_| hipError_t::hipErrorInvalidValue)?;
- let mut file = File::open(file_name).map_err(|_| hipError_t::hipErrorFileNotFound)?;
- let mut file_buffer = Vec::new();
- file.read_to_end(&mut file_buffer)
- .map_err(|_| hipError_t::hipErrorUnknown)?;
- let result = load_data(module, file_buffer.as_ptr() as _);
- drop(file_buffer);
- result
-}
-
-pub(crate) fn load_data(
- module: *mut CUmodule,
- image: *const std::ffi::c_void,
-) -> Result<(), hipError_t> {
- if image == ptr::null() {
- return Err(hipError_t::hipErrorInvalidValue);
- }
- if unsafe { *(image as *const u32) } == 0x464c457f {
- return match unsafe { hipModuleLoadData(module as _, image) } {
- hipError_t::hipSuccess => Ok(()),
- e => Err(e),
- };
+ fn drop_checked(&mut self) -> CUresult {
+ unsafe { hipModuleUnload(self.base) }?;
+ Ok(())
}
- let spirv_data = SpirvModule::new_raw(image as *const _)?;
- load_data_impl(module, spirv_data)
}
-pub fn load_data_impl(pmod: *mut CUmodule, spirv_data: SpirvModule) -> Result<(), hipError_t> {
+pub(crate) fn load_data(module: &mut CUmodule, image: *const std::ffi::c_void) -> CUresult {
+ let text = unsafe { CStr::from_ptr(image.cast()) }
+ .to_str()
+ .map_err(|_| CUerror::INVALID_VALUE)?;
+ let ast = ptx_parser::parse_module_checked(text).map_err(|_| CUerror::NO_BINARY_FOR_GPU)?;
+ let llvm_module = ptx::to_llvm_module(ast).map_err(|_| CUerror::UNKNOWN)?;
let mut dev = 0;
- hip_call! { hipCtxGetDevice(&mut dev) };
+ unsafe { hipCtxGetDevice(&mut dev) }?;
let mut props = unsafe { mem::zeroed() };
- hip_call! { hipGetDeviceProperties(&mut props, dev) };
- let arch_binary = compile_amd(
- &props,
- iter::once(&spirv_data.binaries[..]),
- spirv_data.should_link_ptx_impl,
+ unsafe { hipGetDevicePropertiesR0600(&mut props, dev) }?;
+ let elf_module = comgr::compile_bitcode(
+ unsafe { CStr::from_ptr(props.gcnArchName.as_ptr()) },
+ &*llvm_module.llvm_ir,
+ llvm_module.linked_bitcode(),
)
- .map_err(|_| hipError_t::hipErrorUnknown)?;
- hip_call! { hipModuleLoadData(pmod as _, arch_binary.as_ptr() as _) };
+ .map_err(|_| CUerror::UNKNOWN)?;
+ let mut hip_module = unsafe { mem::zeroed() };
+ unsafe { hipModuleLoadData(&mut hip_module, elf_module.as_ptr().cast()) }?;
+ *module = Module { base: hip_module }.wrap();
Ok(())
}
-const LLVM_SPIRV: &'static str = "/home/vosen/amd/llvm-project/build/bin/llvm-spirv";
-const AMDGPU: &'static str = "/opt/rocm/";
-const AMDGPU_TARGET: &'static str = "amdgcn-amd-amdhsa";
-const AMDGPU_BITCODE: [&'static str; 8] = [
- "opencl.bc",
- "ocml.bc",
- "ockl.bc",
- "oclc_correctly_rounded_sqrt_off.bc",
- "oclc_daz_opt_on.bc",
- "oclc_finite_only_off.bc",
- "oclc_unsafe_math_off.bc",
- "oclc_wavefrontsize64_off.bc",
-];
-const AMDGPU_BITCODE_DEVICE_PREFIX: &'static str = "oclc_isa_version_";
-
-pub(crate) fn compile_amd<'a>(
- device_pros: &hipDeviceProp_t,
- spirv_il: impl Iterator<Item = &'a [u32]>,
- ptx_lib: Option<(&'static [u8], &'static [u8])>,
-) -> io::Result<Vec<u8>> {
- let null_terminator = device_pros
- .gcnArchName
- .iter()
- .position(|&x| x == 0)
- .unwrap();
- let gcn_arch_slice = unsafe {
- slice::from_raw_parts(device_pros.gcnArchName.as_ptr() as _, null_terminator + 1)
- };
- let device_name =
- if let Ok(Ok(name)) = CStr::from_bytes_with_nul(gcn_arch_slice).map(|x| x.to_str()) {
- name
- } else {
- return Err(io::Error::new(io::ErrorKind::Other, ""));
- };
- let dir = tempfile::tempdir()?;
- let llvm_spirv_path = match env::var("LLVM_SPIRV") {
- Ok(path) => Cow::Owned(path),
- Err(_) => Cow::Borrowed(LLVM_SPIRV),
- };
- let llvm_files = spirv_il
- .map(|spirv| {
- let mut spirv_file = NamedTempFile::new_in(&dir)?;
- let spirv_u8 = unsafe {
- slice::from_raw_parts(
- spirv.as_ptr() as *const u8,
- spirv.len() * mem::size_of::<u32>(),
- )
- };
- spirv_file.write_all(spirv_u8)?;
- if cfg!(debug_assertions) {
- persist_file(spirv_file.path())?;
- }
- let llvm = NamedTempFile::new_in(&dir)?;
- let to_llvm_cmd = Command::new(&*llvm_spirv_path)
- //.arg("--spirv-debug")
- .arg("-r")
- .arg("-o")
- .arg(llvm.path())
- .arg(spirv_file.path())
- .status()?;
- assert!(to_llvm_cmd.success());
- if cfg!(debug_assertions) {
- persist_file(llvm.path())?;
- }
- Ok::<_, io::Error>(llvm)
- })
- .collect::<Result<Vec<_>, _>>()?;
- let linked_binary = NamedTempFile::new_in(&dir)?;
- let mut llvm_link = PathBuf::from(AMDGPU);
- llvm_link.push("llvm");
- llvm_link.push("bin");
- llvm_link.push("llvm-link");
- let mut linker_cmd = Command::new(&llvm_link);
- linker_cmd
- .arg("-o")
- .arg(linked_binary.path())
- .args(llvm_files.iter().map(|f| f.path()))
- .args(get_bitcode_paths(device_name));
- if cfg!(debug_assertions) {
- linker_cmd.arg("-v");
- }
- let status = linker_cmd.status()?;
- assert!(status.success());
- if cfg!(debug_assertions) {
- persist_file(linked_binary.path())?;
- }
- let mut ptx_lib_bitcode = NamedTempFile::new_in(&dir)?;
- let compiled_binary = NamedTempFile::new_in(&dir)?;
- let mut clang_exe = PathBuf::from(AMDGPU);
- clang_exe.push("llvm");
- clang_exe.push("bin");
- clang_exe.push("clang");
- let mut compiler_cmd = Command::new(&clang_exe);
- compiler_cmd
- .arg(format!("-mcpu={}", device_name))
- .arg("-ffp-contract=off")
- .arg("-nogpulib")
- .arg("-mno-wavefrontsize64")
- .arg("-O3")
- .arg("-Xclang")
- .arg("-O3")
- .arg("-Xlinker")
- .arg("--no-undefined")
- .arg("-target")
- .arg(AMDGPU_TARGET)
- .arg("-o")
- .arg(compiled_binary.path())
- .arg("-x")
- .arg("ir")
- .arg(linked_binary.path());
- if let Some((_, bitcode)) = ptx_lib {
- ptx_lib_bitcode.write_all(bitcode)?;
- compiler_cmd.arg(ptx_lib_bitcode.path());
- };
- if cfg!(debug_assertions) {
- compiler_cmd.arg("-v");
- }
- let status = compiler_cmd.status()?;
- assert!(status.success());
- let mut result = Vec::new();
- let compiled_bin_path = compiled_binary.path();
- let mut compiled_binary = File::open(compiled_bin_path)?;
- compiled_binary.read_to_end(&mut result)?;
- if cfg!(debug_assertions) {
- persist_file(compiled_bin_path)?;
- }
- Ok(result)
-}
-
-fn persist_file(path: &Path) -> io::Result<()> {
- let mut persistent = PathBuf::from("/tmp/zluda");
- std::fs::create_dir_all(&persistent)?;
- persistent.push(path.file_name().unwrap());
- std::fs::copy(path, persistent)?;
- Ok(())
+pub(crate) fn unload(hmod: CUmodule) -> CUresult {
+ super::drop_checked::<Module>(hmod)
}
-fn get_bitcode_paths(device_name: &str) -> impl Iterator<Item = PathBuf> {
- let generic_paths = AMDGPU_BITCODE.iter().map(|x| {
- let mut path = PathBuf::from(AMDGPU);
- path.push("amdgcn");
- path.push("bitcode");
- path.push(x);
- path
- });
- let suffix = if let Some(suffix_idx) = device_name.find(':') {
- suffix_idx
- } else {
- device_name.len()
- };
- let mut additional_path = PathBuf::from(AMDGPU);
- additional_path.push("amdgcn");
- additional_path.push("bitcode");
- additional_path.push(format!(
- "{}{}{}",
- AMDGPU_BITCODE_DEVICE_PREFIX,
- &device_name[3..suffix],
- ".bc"
- ));
- generic_paths.chain(std::iter::once(additional_path))
+pub(crate) fn get_function(
+ hfunc: &mut hipFunction_t,
+ hmod: &Module,
+ name: *const ::core::ffi::c_char,
+) -> hipError_t {
+ unsafe { hipModuleGetFunction(hfunc, hmod.base, name) }
}
diff --git a/zluda/src/lib.rs b/zluda/src/lib.rs
index 942c7e4..12d6ce0 100644
--- a/zluda/src/lib.rs
+++ b/zluda/src/lib.rs
@@ -27,16 +27,16 @@ macro_rules! implemented {
};
}
-
-use cuda_base::cuda_function_declarations;
-cuda_function_declarations!(
+cuda_base::cuda_function_declarations!(
unimplemented,
implemented <= [
cuCtxGetLimit,
cuCtxSetLimit,
+ cuCtxSynchronize,
cuDeviceComputeCapability,
cuDeviceGet,
cuDeviceGetAttribute,
+ cuDeviceGetCount,
cuDeviceGetLuid,
cuDeviceGetName,
cuDeviceGetProperties,
@@ -44,5 +44,8 @@ cuda_function_declarations!(
cuDeviceGetUuid_v2,
cuDeviceTotalMem_v2,
cuInit,
+ cuModuleGetFunction,
+ cuModuleLoadData,
+ cuModuleUnload,
]
); \ No newline at end of file