aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorAndrzej Janik <[email protected]>2021-08-08 14:26:56 +0200
committerAndrzej Janik <[email protected]>2021-08-08 14:26:56 +0200
commite2432d0df15e626de6a2b2d9d7ae5295f0fac999 (patch)
tree457f2884726f43ff7f2fd0b5d7874bf5d05a1082
parent043172bd9b425a687f72c52d2fb006a897fd621d (diff)
downloadZLUDA-e2432d0df15e626de6a2b2d9d7ae5295f0fac999.tar.gz
ZLUDA-e2432d0df15e626de6a2b2d9d7ae5295f0fac999.zip
Improve AMD compatibility
-rw-r--r--zluda/src/impl/function.rs5
-rw-r--r--zluda/src/impl/module.rs41
2 files changed, 14 insertions, 32 deletions
diff --git a/zluda/src/impl/function.rs b/zluda/src/impl/function.rs
index a438699..1f756ee 100644
--- a/zluda/src/impl/function.rs
+++ b/zluda/src/impl/function.rs
@@ -27,6 +27,7 @@ impl HasLivenessCookie for FunctionData {
pub struct FunctionData {
pub base: ocl_core::Kernel,
+ pub device: ocl_core::DeviceId,
pub arg_size: Vec<(usize, bool)>,
pub use_shared_mem: bool,
pub legacy_args: LegacyArguments,
@@ -215,9 +216,9 @@ pub(crate) fn get_attribute(
CUfunction_attribute::CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK => {
let max_threads = GlobalState::lock_function(func, |func| {
if let ocl_core::KernelWorkGroupInfoResult::WorkGroupSize(size) =
- ocl_core::get_kernel_work_group_info::<()>(
+ ocl_core::get_kernel_work_group_info(
&func.base,
- (),
+ &func.device,
ocl_core::KernelWorkGroupInfo::WorkGroupSize,
)?
{
diff --git a/zluda/src/impl/module.rs b/zluda/src/impl/module.rs
index c586494..c1d7ffc 100644
--- a/zluda/src/impl/module.rs
+++ b/zluda/src/impl/module.rs
@@ -290,7 +290,7 @@ impl SpirvModule {
let binary = binary_prog.as_ptr();
let mut binary_status = 0;
let mut errcode_ret = 0;
- let program = unsafe {
+ let raw_program = unsafe {
ocl_core::ffi::clCreateProgramWithBinary(
ctx.as_ptr(),
1,
@@ -303,7 +303,15 @@ impl SpirvModule {
};
assert_eq!(binary_status, 0, "clCreateProgramWithBinary");
assert_eq!(errcode_ret, 0, "clCreateProgramWithBinary");
- unsafe { ocl_core::Program::from_raw_create_ptr(program) }
+ let ocl_program = unsafe { ocl_core::Program::from_raw_create_ptr(raw_program) };
+ ocl_core::build_program(
+ &ocl_program,
+ Some(&[dev]),
+ &CString::new("").unwrap(),
+ None,
+ None,
+ )?;
+ ocl_program
} else {
Self::compile_amd("gfx1011:xnack-", byte_il, self.should_link_ptx_impl).unwrap();
Self::compile_intel(
@@ -359,36 +367,9 @@ pub fn get_function(
&compiled_module.base,
&entry.key().as_c_str().to_string_lossy(),
)?;
- let true_b: ocl_core::ffi::cl_bool = 1;
- let err = unsafe {
- ocl_core::ffi::clSetKernelExecInfo(
- kernel.as_ptr(),
- CL_KERNEL_EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL,
- mem::size_of::<ocl_core::ffi::cl_bool>(),
- &true_b as *const _ as *const _,
- )
- };
- assert_eq!(err, 0);
- let err = unsafe {
- ocl_core::ffi::clSetKernelExecInfo(
- kernel.as_ptr(),
- CL_KERNEL_EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL,
- mem::size_of::<ocl_core::ffi::cl_bool>(),
- &true_b as *const _ as *const _,
- )
- };
- assert_eq!(err, 0);
- let err = unsafe {
- ocl_core::ffi::clSetKernelExecInfo(
- kernel.as_ptr(),
- CL_KERNEL_EXEC_INFO_INDIRECT_SHARED_ACCESS_INTEL,
- mem::size_of::<ocl_core::ffi::cl_bool>(),
- &true_b as *const _ as *const _,
- )
- };
- assert_eq!(err, 0);
entry.insert(Box::new(Function::new(FunctionData {
base: kernel,
+ device: device.ocl_base.clone(),
arg_size: kernel_info.arguments_sizes.clone(),
use_shared_mem: kernel_info.uses_shared_mem,
legacy_args: LegacyArguments::new(),