diff options
author | Andrzej Janik <[email protected]> | 2021-09-13 17:59:40 +0000 |
---|---|---|
committer | Andrzej Janik <[email protected]> | 2021-09-13 17:59:40 +0000 |
commit | dbb6f09ffa7e9848d4934acc48001b1777f20473 (patch) | |
tree | c92d0de10cbd0f3621664803187ffcc6df18931d | |
parent | e248a2c9a9ebe77902d14eb45706b1a5cc95f9a2 (diff) | |
download | ZLUDA-dbb6f09ffa7e9848d4934acc48001b1777f20473.tar.gz ZLUDA-dbb6f09ffa7e9848d4934acc48001b1777f20473.zip |
Continue HIP conversion
-rw-r--r-- | ptx/src/test/spirv_run/mod.rs | 28 | ||||
-rw-r--r-- | zluda/src/cuda.rs | 8 | ||||
-rw-r--r-- | zluda/src/impl/device.rs | 4 | ||||
-rw-r--r-- | zluda/src/impl/function.rs | 16 | ||||
-rw-r--r-- | zluda/src/impl/module.rs | 68 |
5 files changed, 74 insertions, 50 deletions
diff --git a/ptx/src/test/spirv_run/mod.rs b/ptx/src/test/spirv_run/mod.rs index 1bb6ab7..0330d3f 100644 --- a/ptx/src/test/spirv_run/mod.rs +++ b/ptx/src/test/spirv_run/mod.rs @@ -270,17 +270,7 @@ fn run_spirv<Input: From<u8> + Copy + Debug, Output: From<u8> + Copy + Debug + D hip_call! { hipStreamCreate(&mut stream) };
let mut dev_props = unsafe { mem::zeroed() };
hip_call! { hipGetDeviceProperties(&mut dev_props, dev) };
- let nul_terminator = dev_props.gcnArchName.iter().position(|&x| x == 0).unwrap();
- let gcn_arch_slice = unsafe {
- slice::from_raw_parts(dev_props.gcnArchName.as_ptr() as _, nul_terminator + 1)
- };
- let dev_name =
- if let Ok(Ok(name)) = CStr::from_bytes_with_nul(gcn_arch_slice).map(|x| x.to_str()) {
- name
- } else {
- return Err(hipError_t::hipErrorUnknown);
- };
- let elf_module = compile_amd(dev_name, &*spirv, module.should_link_ptx_impl)
+ let elf_module = compile_amd(&dev_props, &*spirv, module.should_link_ptx_impl)
.map_err(|_| hipError_t::hipErrorUnknown)?;
let mut module = ptr::null_mut();
hip_call! { hipModuleLoadData(&mut module, elf_module.as_ptr() as _) };
@@ -576,10 +566,24 @@ const AMDGPU_BITCODE: [&'static str; 8] = [ const AMDGPU_BITCODE_DEVICE_PREFIX: &'static str = "oclc_isa_version_";
fn compile_amd(
- device_name: &str,
+ device_pros: &hip::hipDeviceProp_t,
spirv_il: &[u32],
ptx_lib: Option<(&'static [u8], &'static [u8])>,
) -> io::Result<Vec<u8>> {
+ let null_terminator = device_pros
+ .gcnArchName
+ .iter()
+ .position(|&x| x == 0)
+ .unwrap();
+ let gcn_arch_slice = unsafe {
+ slice::from_raw_parts(device_pros.gcnArchName.as_ptr() as _, null_terminator + 1)
+ };
+ let device_name =
+ if let Ok(Ok(name)) = CStr::from_bytes_with_nul(gcn_arch_slice).map(|x| x.to_str()) {
+ name
+ } else {
+ return Err(io::Error::new(io::ErrorKind::Other, ""));
+ };
let dir = tempfile::tempdir()?;
let mut spirv = NamedTempFile::new_in(&dir)?;
let llvm = NamedTempFile::new_in(&dir)?;
diff --git a/zluda/src/cuda.rs b/zluda/src/cuda.rs index 68e5a80..9be70b5 100644 --- a/zluda/src/cuda.rs +++ b/zluda/src/cuda.rs @@ -2207,7 +2207,10 @@ pub extern "system" fn cuInit(Flags: ::std::os::raw::c_uint) -> CUresult { #[cfg_attr(not(test), no_mangle)] pub extern "system" fn cuDriverGetVersion(driverVersion: *mut ::std::os::raw::c_int) -> CUresult { - unsafe { hipDriverGetVersion(driverVersion).into() } + // GeekBench checks this value + // TODO: encode something more sensible + unsafe { *driverVersion = r#impl::driver_get_version() }; + CUresult::CUDA_SUCCESS } #[cfg_attr(not(test), no_mangle)] @@ -2382,7 +2385,8 @@ pub extern "system" fn cuCtxGetFlags(flags: *mut ::std::os::raw::c_uint) -> CUre #[cfg_attr(not(test), no_mangle)] pub extern "system" fn cuCtxSynchronize() -> CUresult { - unsafe { hipCtxSynchronize().into() } + // hipCtxSynchronize is not implemented + unsafe { hipDeviceSynchronize().into() } } #[cfg_attr(not(test), no_mangle)] diff --git a/zluda/src/impl/device.rs b/zluda/src/impl/device.rs index bf0545d..f234f0b 100644 --- a/zluda/src/impl/device.rs +++ b/zluda/src/impl/device.rs @@ -62,6 +62,10 @@ pub fn get_attribute(pi: *mut i32, attrib: CUdevice_attribute, dev_idx: c_int) - } //let mut props = unsafe { mem::zeroed() }; let hip_attrib = match attrib { + CUdevice_attribute::CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT => { + unsafe { *pi = 1 }; + return hipError_t::hipSuccess; + } CUdevice_attribute::CU_DEVICE_ATTRIBUTE_GPU_OVERLAP | CUdevice_attribute::CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING | CUdevice_attribute::CU_DEVICE_ATTRIBUTE_STREAM_PRIORITIES_SUPPORTED diff --git a/zluda/src/impl/function.rs b/zluda/src/impl/function.rs index c5ea964..7f35bb4 100644 --- a/zluda/src/impl/function.rs +++ b/zluda/src/impl/function.rs @@ -1,4 +1,4 @@ -use hip_runtime_sys::{hipError_t, hipFuncGetAttributes, hipLaunchKernel, hipModuleLaunchKernel}; +use hip_runtime_sys::{hipError_t, hipFuncAttribute, hipFuncGetAttribute, hipFuncGetAttributes, hipFunction_attribute, hipLaunchKernel, hipModuleLaunchKernel}; use super::{CUresult, HasLivenessCookie, LiveCheck}; use crate::cuda::{CUfunction, CUfunction_attribute, CUstream}; @@ -13,20 +13,14 @@ pub(crate) fn get_attribute( if pi == ptr::null_mut() || func == ptr::null_mut() { return hipError_t::hipErrorInvalidValue; } - let mut hip_attrib = unsafe { mem::zeroed() }; - let err = unsafe { hipFuncGetAttributes(&mut hip_attrib, func as _) }; - if err != hipError_t::hipSuccess { - return err; - } - let value = match cu_attrib { + let attrib = match cu_attrib { CUfunction_attribute::CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK => { - hip_attrib.maxThreadsPerBlock + hipFunction_attribute::HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK } CUfunction_attribute::CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES => { - hip_attrib.sharedSizeBytes as i32 + hipFunction_attribute::HIP_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES } _ => return hipError_t::hipErrorInvalidValue, }; - unsafe { *pi = value }; - hipError_t::hipSuccess + unsafe { hipFuncGetAttribute(pi, attrib, func as _) } } diff --git a/zluda/src/impl/module.rs b/zluda/src/impl/module.rs index 463312c..6bd9a40 100644 --- a/zluda/src/impl/module.rs +++ b/zluda/src/impl/module.rs @@ -5,13 +5,13 @@ use std::fs::File; use std::io::{self, Read, Write}; use std::ops::Add; use std::os::raw::c_char; -use std::path::PathBuf; +use std::path::{Path, PathBuf}; use std::process::Command; -use std::{fs, mem, ptr, slice}; +use std::{env, fs, mem, ptr, slice}; use hip_runtime_sys::{ - hipCtxGetCurrent, hipCtxGetDevice, hipDeviceGetAttribute, hipDeviceGetName, hipError_t, - hipGetDeviceProperties, hipGetStreamDeviceId, hipModuleLoadData, + hipCtxGetCurrent, hipCtxGetDevice, hipDeviceGetAttribute, hipDeviceGetName, hipDeviceProp_t, + hipError_t, hipGetDeviceProperties, hipGetStreamDeviceId, hipModuleLoadData, }; use tempfile::NamedTempFile; @@ -85,18 +85,8 @@ pub fn load_data_impl(pmod: *mut CUmodule, spirv_data: SpirvModule) -> Result<() } let mut props = unsafe { mem::zeroed() }; let err = unsafe { hipGetDeviceProperties(&mut props, dev) }; - if err != hipError_t::hipSuccess { - return Err(err); - } - let gcn_arch_slice = - unsafe { slice::from_raw_parts(props.gcnArchName.as_ptr() as _, props.gcnArchName.len()) }; - let name = if let Ok(Ok(name)) = CStr::from_bytes_with_nul(gcn_arch_slice).map(|x| x.to_str()) { - name - } else { - return Err(hipError_t::hipErrorUnknown); - }; let arch_binary = compile_amd( - name, + &props, &spirv_data.binaries[..], spirv_data.should_link_ptx_impl, ) @@ -109,7 +99,7 @@ pub fn load_data_impl(pmod: *mut CUmodule, spirv_data: SpirvModule) -> Result<() } const LLVM_SPIRV: &'static str = "/home/vosen/amd/llvm-project/build/bin/llvm-spirv"; -const AMDGPU: &'static str = "/opt/amdgpu-pro/"; +const AMDGPU: &'static str = "/opt/rocm/"; const AMDGPU_TARGET: &'static str = "amdgcn-amd-amdhsa"; const AMDGPU_BITCODE: [&'static str; 8] = [ "opencl.bc", @@ -124,11 +114,24 @@ const AMDGPU_BITCODE: [&'static str; 8] = [ const AMDGPU_BITCODE_DEVICE_PREFIX: &'static str = "oclc_isa_version_"; fn compile_amd( - device_name: &str, + device_pros: &hipDeviceProp_t, spirv_il: &[u32], ptx_lib: Option<(&'static [u8], &'static [u8])>, ) -> io::Result<Vec<u8>> { - use std::env; + let null_terminator = device_pros + .gcnArchName + .iter() + .position(|&x| x == 0) + .unwrap(); + let gcn_arch_slice = unsafe { + slice::from_raw_parts(device_pros.gcnArchName.as_ptr() as _, null_terminator + 1) + }; + let device_name = + if let Ok(Ok(name)) = CStr::from_bytes_with_nul(gcn_arch_slice).map(|x| x.to_str()) { + name + } else { + return Err(io::Error::new(io::ErrorKind::Other, "")); + }; let dir = tempfile::tempdir()?; let mut spirv = NamedTempFile::new_in(&dir)?; let llvm = NamedTempFile::new_in(&dir)?; @@ -150,8 +153,12 @@ fn compile_amd( .arg(spirv.path()) .status()?; assert!(to_llvm_cmd.success()); + if cfg!(debug_assertions) { + persist_file(llvm.path())?; + } let linked_binary = NamedTempFile::new_in(&dir)?; let mut llvm_link = PathBuf::from(AMDGPU); + llvm_link.push("llvm"); llvm_link.push("bin"); llvm_link.push("llvm-link"); let mut linker_cmd = Command::new(&llvm_link); @@ -166,12 +173,16 @@ fn compile_amd( } let status = linker_cmd.status()?; assert!(status.success()); + if cfg!(debug_assertions) { + persist_file(linked_binary.path())?; + } let mut ptx_lib_bitcode = NamedTempFile::new_in(&dir)?; let compiled_binary = NamedTempFile::new_in(&dir)?; - let mut cland_exe = PathBuf::from(AMDGPU); - cland_exe.push("bin"); - cland_exe.push("clang"); - let mut compiler_cmd = Command::new(&cland_exe); + let mut clang_exe = PathBuf::from(AMDGPU); + clang_exe.push("llvm"); + clang_exe.push("bin"); + clang_exe.push("clang"); + let mut compiler_cmd = Command::new(&clang_exe); compiler_cmd .arg(format!("-mcpu={}", device_name)) .arg("-nogpulib") @@ -199,11 +210,18 @@ fn compile_amd( let compiled_bin_path = compiled_binary.path(); let mut compiled_binary = File::open(compiled_bin_path)?; compiled_binary.read_to_end(&mut result)?; + if cfg!(debug_assertions) { + persist_file(compiled_bin_path)?; + } + Ok(result) +} + +fn persist_file(path: &Path) -> io::Result<()> { let mut persistent = PathBuf::from("/tmp/zluda"); std::fs::create_dir_all(&persistent)?; - persistent.push(compiled_bin_path.file_name().unwrap()); - std::fs::copy(compiled_bin_path, persistent)?; - Ok(result) + persistent.push(path.file_name().unwrap()); + std::fs::copy(path, persistent)?; + Ok(()) } fn get_bitcode_paths(device_name: &str) -> impl Iterator<Item = PathBuf> { |