diff options
author | Andrzej Janik <[email protected]> | 2021-07-21 01:46:50 +0200 |
---|---|---|
committer | Andrzej Janik <[email protected]> | 2021-07-21 01:46:50 +0200 |
commit | 3d2024bf62db6f56c5cc00fea6c41f9d6bde1271 (patch) | |
tree | 7453a13ce8664146860a556a724b057fae35f02a | |
parent | 58fb8a234c338ace4a205c9847df740841701bde (diff) | |
download | ZLUDA-3d2024bf62db6f56c5cc00fea6c41f9d6bde1271.tar.gz ZLUDA-3d2024bf62db6f56c5cc00fea6c41f9d6bde1271.zip |
Start converting to OpenCL
-rw-r--r-- | .cargo/config.toml | 5 | ||||
-rw-r--r-- | zluda/Cargo.toml | 4 | ||||
-rw-r--r-- | zluda/src/impl/context.rs | 4 | ||||
-rw-r--r-- | zluda/src/impl/device.rs | 84 | ||||
-rw-r--r-- | zluda/src/impl/function.rs | 89 | ||||
-rw-r--r-- | zluda/src/impl/memory.rs | 4 | ||||
-rw-r--r-- | zluda/src/impl/mod.rs | 44 | ||||
-rw-r--r-- | zluda/src/impl/module.rs | 113 | ||||
-rw-r--r-- | zluda/src/impl/stream.rs | 2 |
9 files changed, 220 insertions, 129 deletions
diff --git a/.cargo/config.toml b/.cargo/config.toml index cd7ce74..6833199 100644 --- a/.cargo/config.toml +++ b/.cargo/config.toml @@ -1,2 +1,5 @@ +[build]
+rustflags = ["-C", "target-cpu=haswell"]
+
[target."x86_64-pc-windows-gnu"]
-rustflags = ["-C", "link-self-contained=y"]
+rustflags = ["-C", "link-self-contained=y", "-C", "target-cpu=haswell"]
diff --git a/zluda/Cargo.toml b/zluda/Cargo.toml index c7d8cc9..cfb1a50 100644 --- a/zluda/Cargo.toml +++ b/zluda/Cargo.toml @@ -15,6 +15,10 @@ lazy_static = "1.4" num_enum = "0.4" lz4-sys = "1.9" +[dependencies.ocl-core] +version = "0.11" +features = ["opencl_version_1_2", "opencl_version_2_0", "opencl_version_2_1"] + [target.'cfg(windows)'.dependencies] winapi = { version = "0.3", features = ["heapapi", "std"] } diff --git a/zluda/src/impl/context.rs b/zluda/src/impl/context.rs index 8d7a465..18a2bd6 100644 --- a/zluda/src/impl/context.rs +++ b/zluda/src/impl/context.rs @@ -137,11 +137,11 @@ pub fn create_v2( let mut ctx_box = GlobalState::lock_device(dev_idx, |dev| { let dev_ptr = dev as *mut _; let mut ctx_box = Box::new(LiveCheck::new(ContextData::new( - &dev.l0_context, + &dev.ocl_context, dev.base, flags, false, - dev.host_event_pool.get(dev.base, &dev.l0_context)?, + dev.host_event_pool.get(dev.base, &dev.ocl_context)?, dev_ptr as *mut _, )?)); ctx_box.late_init(); diff --git a/zluda/src/impl/device.rs b/zluda/src/impl/device.rs index e686f27..c356bda 100644 --- a/zluda/src/impl/device.rs +++ b/zluda/src/impl/device.rs @@ -1,6 +1,7 @@ use super::{context, transmute_lifetime, transmute_lifetime_mut, CUresult, GlobalState}; use crate::cuda; use cuda::{CUdevice_attribute, CUuuid_st}; +use ocl_core::DeviceType; use std::{ cmp, mem, os::raw::{c_char, c_int, c_uint}, @@ -18,11 +19,10 @@ pub struct Index(pub c_int); pub struct Device { pub index: Index, pub base: l0::Device, - pub default_queue: l0::CommandQueue<'static>, - pub l0_context: l0::Context, + pub ocl_base: ocl_core::DeviceId, + pub default_queue: ocl_core::CommandQueue, + pub ocl_context: ocl_core::Context, pub primary_context: context::Context, - pub device_event_pool: DynamicEventPool, - pub host_event_pool: DynamicEventPool, properties: Option<Box<l0::sys::ze_device_properties_t>>, image_properties: Option<Box<l0::sys::ze_device_image_properties_t>>, memory_properties: Option<Vec<l0::sys::ze_device_memory_properties_t>>, @@ -32,41 +32,22 @@ pub struct Device { unsafe impl Send for Device {} impl Device { - // Unsafe because it does not fully initalize primary_context - // and we transmute lifetimes left and right - unsafe fn new(drv: &l0::Driver, l0_dev: l0::Device, idx: usize) -> Result<Self, CUresult> { - let ctx = l0::Context::new(*drv, Some(&[l0_dev]))?; - let queue = l0::CommandQueue::new(mem::transmute(&ctx), l0_dev)?; - let mut host_event_pool = DynamicEventPool::new( - l0_dev, - transmute_lifetime(&ctx), - l0::sys::ze_event_pool_flags_t::ZE_EVENT_POOL_FLAG_HOST_VISIBLE, - l0::sys::ze_event_scope_flags_t::ZE_EVENT_SCOPE_FLAG_HOST, - )?; - let host_event = - transmute_lifetime_mut(&mut host_event_pool).get(l0_dev, transmute_lifetime(&ctx))?; - let primary_context = context::Context::new(context::ContextData::new( - transmute_lifetime(&ctx), - l0_dev, - 0, - true, - host_event, - ptr::null_mut(), - )?); - let device_event_pool = DynamicEventPool::new( - l0_dev, - transmute_lifetime(&ctx), - l0::sys::ze_event_pool_flags_t(0), - l0::sys::ze_event_scope_flags_t(0), - )?; + pub fn new( + drv: &l0::Driver, + l0_dev: l0::Device, + ocl_dev: ocl_core::DeviceId, + idx: usize, + ) -> Result<Self, CUresult> { + let ctx = ocl_core::create_context(None, &[ocl_dev], None, None)?; + let queue = ocl_core::create_command_queue(&ctx, ocl_dev, None)?; + let primary_context = context::Context::new(context::ContextData::new()); Ok(Self { index: Index(idx as c_int), base: l0_dev, + ocl_base: ocl_dev, default_queue: queue, - l0_context: ctx, - primary_context: primary_context, - device_event_pool, - host_event_pool, + ocl_context: ctx, + primary_context, properties: None, image_properties: None, memory_properties: None, @@ -111,10 +92,6 @@ impl Device { Ok(self.compute_properties.get_or_insert(Box::new(props))) } - pub fn late_init(&mut self) { - self.primary_context.as_option_mut().unwrap().device = self as *mut _; - } - fn get_max_simd(&mut self) -> l0::Result<u32> { let props = self.get_compute_properties()?; Ok(*props.subGroupSizes[0..props.numSubGroupSizes as usize] @@ -124,20 +101,6 @@ impl Device { } } -pub fn init(driver: &l0::Driver) -> Result<Vec<Device>, CUresult> { - let ze_devices = driver.devices()?; - let mut devices = ze_devices - .into_iter() - .enumerate() - .map(|(idx, d)| unsafe { Device::new(driver, d, idx) }) - .collect::<Result<Vec<_>, _>>()?; - for dev in devices.iter_mut() { - dev.late_init(); - dev.primary_context.late_init(); - } - Ok(devices) -} - pub fn get_count(count: *mut c_int) -> Result<(), CUresult> { let len = GlobalState::lock(|state| state.devices.len())?; unsafe { *count = len as c_int }; @@ -215,8 +178,6 @@ impl CUdevice_attribute { match self { CUdevice_attribute::CU_DEVICE_ATTRIBUTE_GPU_OVERLAP => Some(1), CUdevice_attribute::CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT => Some(1), - // TODO: fix this for DG1 - CUdevice_attribute::CU_DEVICE_ATTRIBUTE_INTEGRATED => Some(1), // TODO: go back to this once we have more funcitonality implemented CUdevice_attribute::CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR => Some(8), CUdevice_attribute::CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR => Some(0), @@ -239,6 +200,19 @@ pub fn get_attribute( return Ok(()); } let value = match attrib { + CUdevice_attribute::CU_DEVICE_ATTRIBUTE_INTEGRATED => { + GlobalState::lock_device(dev_idx, |dev| { + let props = dev.get_properties()?; + if (props.flags + & l0::sys::ze_device_property_flags_t::ZE_DEVICE_PROPERTY_FLAG_INTEGRATED) + == l0::sys::ze_device_property_flags_t::ZE_DEVICE_PROPERTY_FLAG_INTEGRATED + { + Ok(1) + } else { + Ok(0) + } + })?? + } CUdevice_attribute::CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT => { GlobalState::lock_device(dev_idx, |dev| { let props = dev.get_properties()?; diff --git a/zluda/src/impl/function.rs b/zluda/src/impl/function.rs index 2a35512..2658d27 100644 --- a/zluda/src/impl/function.rs +++ b/zluda/src/impl/function.rs @@ -1,3 +1,5 @@ +use ocl_core::DeviceId; + use super::{stream::Stream, CUresult, GlobalState, HasLivenessCookie, LiveCheck}; use crate::cuda::CUfunction_attribute; use ::std::os::raw::{c_uint, c_void}; @@ -24,10 +26,9 @@ impl HasLivenessCookie for FunctionData { } pub struct FunctionData { - pub base: l0::Kernel<'static>, + pub base: ocl_core::Kernel, pub arg_size: Vec<usize>, pub use_shared_mem: bool, - pub properties: Option<Box<l0::sys::ze_kernel_properties_t>>, pub legacy_args: LegacyArguments, } @@ -50,18 +51,6 @@ impl LegacyArguments { } } -impl FunctionData { - fn get_properties(&mut self) -> Result<&l0::sys::ze_kernel_properties_t, l0::sys::ze_result_t> { - if let None = self.properties { - self.properties = Some(self.base.get_properties()?) - } - match self.properties { - Some(ref props) => Ok(props.as_ref()), - None => unsafe { hint::unreachable_unchecked() }, - } - } -} - pub fn launch_kernel( f: *mut Function, grid_dim_x: c_uint, @@ -81,13 +70,16 @@ pub fn launch_kernel( { return Err(CUresult::CUDA_ERROR_INVALID_VALUE); } - GlobalState::lock_enqueue(hstream, |cmd_list, signal, wait| { + GlobalState::lock_enqueue(hstream, |queue| { let func: &mut FunctionData = unsafe { &mut *f }.as_result_mut()?; if kernel_params != ptr::null_mut() { for (i, arg_size) in func.arg_size.iter().enumerate() { unsafe { - func.base - .set_arg_raw(i as u32, *arg_size, *kernel_params.add(i))? + ocl_core::set_kernel_arg( + &func.base, + i as u32, + ocl_core::ArgVal::from_raw(*arg_size, *kernel_params.add(i), false), + )?; }; } } else { @@ -120,11 +112,15 @@ pub fn launch_kernel( for (i, arg_size) in func.arg_size.iter().enumerate() { let buffer_offset = round_up_to_multiple(offset, *arg_size); unsafe { - func.base.set_arg_raw( + ocl_core::set_kernel_arg( + &func.base, i as u32, - *arg_size, - buffer_ptr.add(buffer_offset) as *const _, - )? + ocl_core::ArgVal::from_raw( + *arg_size, + buffer_ptr.add(buffer_offset) as *const _, + false, + ), + )?; }; offset = buffer_offset + *arg_size; } @@ -134,24 +130,34 @@ pub fn launch_kernel( } if func.use_shared_mem { unsafe { - func.base.set_arg_raw( + ocl_core::set_kernel_arg( + &func.base, func.arg_size.len() as u32, - shared_mem_bytes as usize, - ptr::null(), - )? + ocl_core::ArgVal::from_raw(shared_mem_bytes as usize, ptr::null(), false), + )?; }; } - func.base - .set_group_size(block_dim_x, block_dim_y, block_dim_z)?; - func.legacy_args.reset(); + let global_dims = [ + (block_dim_x * grid_dim_x) as usize, + (block_dim_y * grid_dim_y) as usize, + (block_dim_z * grid_dim_z) as usize, + ]; unsafe { - cmd_list.append_launch_kernel( - &mut func.base, - &[grid_dim_x, grid_dim_y, grid_dim_z], - Some(signal), - wait, - )?; - } + ocl_core::enqueue_kernel::<&mut ocl_core::Event, ocl_core::Event>( + queue, + &func.base, + 3, + None, + &global_dims, + Some([ + block_dim_x as usize, + block_dim_y as usize, + block_dim_z as usize, + ]), + None, + None, + )? + }; Ok::<_, CUresult>(()) }) } @@ -171,8 +177,17 @@ pub(crate) fn get_attribute( match attrib { CUfunction_attribute::CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK => { let max_threads = GlobalState::lock_function(func, |func| { - let props = func.get_properties()?; - Ok::<_, CUresult>(props.maxSubgroupSize * props.maxNumSubgroups) + if let ocl_core::KernelWorkGroupInfoResult::WorkGroupSize(size) = + ocl_core::get_kernel_work_group_info::<ocl_core::DeviceId>( + &func.base, + unsafe { ocl_core::DeviceId::null() }, + ocl_core::KernelWorkGroupInfo::WorkGroupSize, + )? + { + Ok(size) + } else { + Err(CUresult::CUDA_ERROR_UNKNOWN) + } })??; unsafe { *pi = max_threads as i32 }; Ok(()) diff --git a/zluda/src/impl/memory.rs b/zluda/src/impl/memory.rs index 56821d1..5919690 100644 --- a/zluda/src/impl/memory.rs +++ b/zluda/src/impl/memory.rs @@ -4,7 +4,7 @@ use std::{ffi::c_void, mem}; pub fn alloc_v2(dptr: *mut *mut c_void, bytesize: usize) -> Result<(), CUresult> {
let ptr = GlobalState::lock_current_context(|ctx| {
let dev = unsafe { &mut *ctx.device };
- Ok::<_, CUresult>(dev.l0_context.mem_alloc_device(bytesize, 0, dev.base)?)
+ Ok::<_, CUresult>(dev.ocl_context.mem_alloc_device(bytesize, 0, dev.base)?)
})??;
unsafe { *dptr = ptr };
Ok(())
@@ -20,7 +20,7 @@ pub fn copy_v2(dst: *mut c_void, src: *const c_void, bytesize: usize) -> Result< pub fn free_v2(ptr: *mut c_void) -> Result<(), CUresult> {
GlobalState::lock_current_context(|ctx| {
let dev = unsafe { &mut *ctx.device };
- Ok::<_, CUresult>(dev.l0_context.mem_free(ptr)?)
+ Ok::<_, CUresult>(dev.ocl_context.mem_free(ptr)?)
})
.map_err(|_| CUresult::CUDA_ERROR_INVALID_VALUE)?
}
diff --git a/zluda/src/impl/mod.rs b/zluda/src/impl/mod.rs index c3df815..d410554 100644 --- a/zluda/src/impl/mod.rs +++ b/zluda/src/impl/mod.rs @@ -164,6 +164,14 @@ impl<T> From<TryLockError<T>> for CUresult { } } +impl From<ocl_core::Error> for CUresult { + fn from(result: ocl_core::Error) -> Self { + match result { + _ => CUresult::CUDA_ERROR_UNKNOWN, + } + } +} + pub trait Encuda { type To: Sized; fn encuda(self: Self) -> Self::To; @@ -207,6 +215,7 @@ lazy_static! { struct GlobalState { devices: Vec<Device>, global_heap: *mut c_void, + platform: ocl_core::PlatformId, } unsafe impl Send for GlobalState {} @@ -275,15 +284,11 @@ impl GlobalState { fn lock_enqueue( stream: *mut stream::Stream, - f: impl FnOnce( - &l0::CommandList, - &l0::Event<'static>, - &[&l0::Event<'static>], - ) -> Result<(), CUresult>, + f: impl FnOnce(&ocl_core::CommandQueue) -> Result<(), CUresult>, ) -> Result<(), CUresult> { Self::lock_stream(stream, |stream_data| { let l0_dev = unsafe { (*(*stream_data.context).device).base }; - let l0_ctx = unsafe { &mut (*(*stream_data.context).device).l0_context }; + let l0_ctx = unsafe { &mut (*(*stream_data.context).device).ocl_context }; let cmd_list = unsafe { transmute_lifetime(&stream_data.cmd_list) }; // TODO: make new_marker drop-safe let (new_event, new_marker) = stream_data.get_event(l0_dev, l0_ctx)?; @@ -325,10 +330,34 @@ pub fn init() -> Result<(), CUresult> { return Ok(()); } l0::init()?; + let platforms = ocl_core::get_platform_ids()?; + let (platform, device) = platforms + .iter() + .find_map(|plat| { + let devices = + ocl_core::get_device_ids(plat, Some(ocl_core::DeviceType::GPU), None).ok()?; + for dev in devices { + let vendor = ocl_core::get_device_info(dev, ocl_core::DeviceInfo::VendorId).ok()?; + if let ocl_core::DeviceInfoResult::VendorId(0x8086) = vendor { + let dev_type = + ocl_core::get_device_info(dev, ocl_core::DeviceInfo::Type).ok()?; + if let ocl_core::DeviceInfoResult::Type(ocl_core::DeviceType::GPU) = dev_type { + return Some((plat.clone(), dev)); + } + } + } + None + }) + .ok_or(CUresult::CUDA_ERROR_UNKNOWN)?; let drivers = l0::Driver::get()?; let devices = match drivers.into_iter().find(is_intel_gpu_driver) { None => return Err(CUresult::CUDA_ERROR_UNKNOWN), - Some(driver) => device::init(&driver)?, + Some(driver) => driver + .devices()? + .into_iter() + .enumerate() + .map(|(idx, l0_dev)| device::Device::new(&driver, l0_dev, device, idx).unwrap()) + .collect::<Vec<_>>(), }; let global_heap = unsafe { os::heap_create() }; if global_heap == ptr::null_mut() { @@ -337,6 +366,7 @@ pub fn init() -> Result<(), CUresult> { *global_state = Some(GlobalState { devices, global_heap, + platform, }); drop(global_state); Ok(()) diff --git a/zluda/src/impl/module.rs b/zluda/src/impl/module.rs index 6268904..88d85c4 100644 --- a/zluda/src/impl/module.rs +++ b/zluda/src/impl/module.rs @@ -1,8 +1,18 @@ use std::{ - collections::hash_map, collections::HashMap, ffi::c_void, ffi::CStr, ffi::CString, mem, - os::raw::c_char, ptr, slice, + collections::hash_map, + collections::HashMap, + ffi::c_void, + ffi::CStr, + ffi::CString, + mem, + os::raw::{c_char, c_int, c_uint}, + ptr, slice, }; +const CL_KERNEL_EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL: u32 = 0x4200; +const CL_KERNEL_EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL: u32 = 0x4201; +const CL_KERNEL_EXEC_INFO_INDIRECT_SHARED_ACCESS_INTEL: u32 = 0x4202; + use super::{ device, function::Function, @@ -41,7 +51,7 @@ pub struct SpirvModule { } pub struct CompiledModule { - pub base: l0::Module<'static>, + pub base: ocl_core::Program, pub kernels: HashMap<CString, Box<Function>>, } @@ -80,28 +90,57 @@ impl SpirvModule { pub fn compile<'a>( &self, - ctx: &'a l0::Context, - dev: l0::Device, - ) -> Result<l0::Module<'a>, CUresult> { + ctx: &ocl_core::Context, + dev: &ocl_core::DeviceId, + ) -> Result<ocl_core::Program, CUresult> { let byte_il = unsafe { slice::from_raw_parts( self.binaries.as_ptr() as *const u8, self.binaries.len() * mem::size_of::<u32>(), ) }; - let l0_module = match self.should_link_ptx_impl { - None => l0::Module::build_spirv(ctx, dev, byte_il, Some(self.build_options.as_c_str())), + let main_module = ocl_core::create_program_with_il(ctx, byte_il, None)?; + match self.should_link_ptx_impl { + None => { + ocl_core::compile_program( + &main_module, + Some(&[dev]), + &self.build_options, + &[], + &[], + None, + None, + None, + )?; + } Some(ptx_impl) => { - l0::Module::build_link_spirv( + let ptx_impl_prog = ocl_core::create_program_with_il(ctx, ptx_impl, None)?; + ocl_core::build_program( + &main_module, + Some(&[dev]), + &self.build_options, + None, + None, + )?; + ocl_core::build_program( + &ptx_impl_prog, + Some(&[dev]), + &self.build_options, + None, + None, + )?; + ocl_core::link_program( ctx, - dev, - &[ptx_impl, byte_il], - Some(self.build_options.as_c_str()), - ) - .0 + Some(&[dev]), + &self.build_options, + &[&main_module, &ptx_impl_prog], + None, + None, + None, + )?; } }; - Ok(l0_module?) + Ok(main_module) } } @@ -121,7 +160,9 @@ pub fn get_function( hash_map::Entry::Occupied(entry) => entry.into_mut(), hash_map::Entry::Vacant(entry) => { let new_module = CompiledModule { - base: module.spirv.compile(&mut device.l0_context, device.base)?, + base: module + .spirv + .compile(&device.ocl_context, &device.ocl_base)?, kernels: HashMap::new(), }; entry.insert(new_module) @@ -137,18 +178,42 @@ pub fn get_function( std::str::from_utf8_unchecked(entry.key().as_c_str().to_bytes()) }) .ok_or(CUresult::CUDA_ERROR_NOT_FOUND)?; - let kernel = - l0::Kernel::new_resident(&compiled_module.base, entry.key().as_c_str())?; - kernel.set_indirect_access( - l0::sys::ze_kernel_indirect_access_flags_t::ZE_KERNEL_INDIRECT_ACCESS_FLAG_DEVICE - | l0::sys::ze_kernel_indirect_access_flags_t::ZE_KERNEL_INDIRECT_ACCESS_FLAG_HOST - | l0::sys::ze_kernel_indirect_access_flags_t::ZE_KERNEL_INDIRECT_ACCESS_FLAG_SHARED + let kernel = ocl_core::create_kernel( + &compiled_module.base, + &entry.key().as_c_str().to_string_lossy(), )?; + let true_b: ocl_core::ffi::cl_bool = 1; + let err = unsafe { + ocl_core::ffi::clSetKernelExecInfo( + kernel.as_ptr(), + CL_KERNEL_EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL, + mem::size_of::<ocl_core::ffi::cl_bool>(), + &true_b as *const _ as *const _, + ) + }; + assert_eq!(err, 0); + let err = unsafe { + ocl_core::ffi::clSetKernelExecInfo( + kernel.as_ptr(), + CL_KERNEL_EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL, + mem::size_of::<ocl_core::ffi::cl_bool>(), + &true_b as *const _ as *const _, + ) + }; + assert_eq!(err, 0); + let err = unsafe { + ocl_core::ffi::clSetKernelExecInfo( + kernel.as_ptr(), + CL_KERNEL_EXEC_INFO_INDIRECT_SHARED_ACCESS_INTEL, + mem::size_of::<ocl_core::ffi::cl_bool>(), + &true_b as *const _ as *const _, + ) + }; + assert_eq!(err, 0); entry.insert(Box::new(Function::new(FunctionData { base: kernel, arg_size: kernel_info.arguments_sizes.clone(), use_shared_mem: kernel_info.uses_shared_mem, - properties: None, legacy_args: LegacyArguments::new(), }))) } @@ -167,7 +232,7 @@ pub(crate) fn load_data(pmod: *mut *mut Module, image: *const c_void) -> Result< pub fn load_data_impl(pmod: *mut *mut Module, spirv_data: SpirvModule) -> Result<(), CUresult> { let module = GlobalState::lock_current_context(|ctx| { let device = unsafe { &mut *ctx.device }; - let l0_module = spirv_data.compile(&device.l0_context, device.base)?; + let l0_module = spirv_data.compile(&device.ocl_context, &device.ocl_base)?; let mut device_binaries = HashMap::new(); let compiled_module = CompiledModule { base: l0_module, diff --git a/zluda/src/impl/stream.rs b/zluda/src/impl/stream.rs index 1de422b..f3910d6 100644 --- a/zluda/src/impl/stream.rs +++ b/zluda/src/impl/stream.rs @@ -56,7 +56,7 @@ impl StreamData { }) } pub fn new(ctx: &mut ContextData) -> Result<Self, CUresult> { - let l0_ctx = &mut unsafe { &mut *ctx.device }.l0_context; + let l0_ctx = &mut unsafe { &mut *ctx.device }.ocl_context; let device = unsafe { &*ctx.device }.base; let synchronization_event = unsafe { &mut *ctx.device } .host_event_pool |