diff options
Diffstat (limited to 'zluda')
-rw-r--r-- | zluda/src/impl/function.rs | 5 | ||||
-rw-r--r-- | zluda/src/impl/module.rs | 41 |
2 files changed, 14 insertions, 32 deletions
diff --git a/zluda/src/impl/function.rs b/zluda/src/impl/function.rs index a438699..1f756ee 100644 --- a/zluda/src/impl/function.rs +++ b/zluda/src/impl/function.rs @@ -27,6 +27,7 @@ impl HasLivenessCookie for FunctionData { pub struct FunctionData { pub base: ocl_core::Kernel, + pub device: ocl_core::DeviceId, pub arg_size: Vec<(usize, bool)>, pub use_shared_mem: bool, pub legacy_args: LegacyArguments, @@ -215,9 +216,9 @@ pub(crate) fn get_attribute( CUfunction_attribute::CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK => { let max_threads = GlobalState::lock_function(func, |func| { if let ocl_core::KernelWorkGroupInfoResult::WorkGroupSize(size) = - ocl_core::get_kernel_work_group_info::<()>( + ocl_core::get_kernel_work_group_info( &func.base, - (), + &func.device, ocl_core::KernelWorkGroupInfo::WorkGroupSize, )? { diff --git a/zluda/src/impl/module.rs b/zluda/src/impl/module.rs index c586494..c1d7ffc 100644 --- a/zluda/src/impl/module.rs +++ b/zluda/src/impl/module.rs @@ -290,7 +290,7 @@ impl SpirvModule { let binary = binary_prog.as_ptr(); let mut binary_status = 0; let mut errcode_ret = 0; - let program = unsafe { + let raw_program = unsafe { ocl_core::ffi::clCreateProgramWithBinary( ctx.as_ptr(), 1, @@ -303,7 +303,15 @@ impl SpirvModule { }; assert_eq!(binary_status, 0, "clCreateProgramWithBinary"); assert_eq!(errcode_ret, 0, "clCreateProgramWithBinary"); - unsafe { ocl_core::Program::from_raw_create_ptr(program) } + let ocl_program = unsafe { ocl_core::Program::from_raw_create_ptr(raw_program) }; + ocl_core::build_program( + &ocl_program, + Some(&[dev]), + &CString::new("").unwrap(), + None, + None, + )?; + ocl_program } else { Self::compile_amd("gfx1011:xnack-", byte_il, self.should_link_ptx_impl).unwrap(); Self::compile_intel( @@ -359,36 +367,9 @@ pub fn get_function( &compiled_module.base, &entry.key().as_c_str().to_string_lossy(), )?; - let true_b: ocl_core::ffi::cl_bool = 1; - let err = unsafe { - ocl_core::ffi::clSetKernelExecInfo( - kernel.as_ptr(), - CL_KERNEL_EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL, - mem::size_of::<ocl_core::ffi::cl_bool>(), - &true_b as *const _ as *const _, - ) - }; - assert_eq!(err, 0); - let err = unsafe { - ocl_core::ffi::clSetKernelExecInfo( - kernel.as_ptr(), - CL_KERNEL_EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL, - mem::size_of::<ocl_core::ffi::cl_bool>(), - &true_b as *const _ as *const _, - ) - }; - assert_eq!(err, 0); - let err = unsafe { - ocl_core::ffi::clSetKernelExecInfo( - kernel.as_ptr(), - CL_KERNEL_EXEC_INFO_INDIRECT_SHARED_ACCESS_INTEL, - mem::size_of::<ocl_core::ffi::cl_bool>(), - &true_b as *const _ as *const _, - ) - }; - assert_eq!(err, 0); entry.insert(Box::new(Function::new(FunctionData { base: kernel, + device: device.ocl_base.clone(), arg_size: kernel_info.arguments_sizes.clone(), use_shared_mem: kernel_info.uses_shared_mem, legacy_args: LegacyArguments::new(), |