diff options
-rw-r--r-- | ptx/src/translate.rs | 9 | ||||
-rw-r--r-- | zluda/src/impl/context.rs | 21 | ||||
-rw-r--r-- | zluda/src/impl/device.rs | 185 | ||||
-rw-r--r-- | zluda/src/impl/function.rs | 46 | ||||
-rw-r--r-- | zluda/src/impl/memory.rs | 77 | ||||
-rw-r--r-- | zluda/src/impl/mod.rs | 18 | ||||
-rw-r--r-- | zluda/src/impl/module.rs | 20 | ||||
-rw-r--r-- | zluda/src/impl/ocl_ext.rs | 0 | ||||
-rw-r--r-- | zluda/src/impl/stream.rs | 117 |
9 files changed, 315 insertions, 178 deletions
diff --git a/ptx/src/translate.rs b/ptx/src/translate.rs index 2885f52..c236438 100644 --- a/ptx/src/translate.rs +++ b/ptx/src/translate.rs @@ -415,7 +415,7 @@ impl Module { }
pub struct KernelInfo {
- pub arguments_sizes: Vec<usize>,
+ pub arguments_sizes: Vec<(usize, bool)>,
pub uses_shared_mem: bool,
}
@@ -1024,7 +1024,12 @@ fn emit_function_header<'a>( let args_lens = func_decl
.input_arguments
.iter()
- .map(|param| param.v_type.size_of())
+ .map(|param| {
+ (
+ param.v_type.size_of(),
+ matches!(param.v_type, ast::Type::Pointer(..)),
+ )
+ })
.collect();
kernel_info.insert(
name.to_string(),
diff --git a/zluda/src/impl/context.rs b/zluda/src/impl/context.rs index 18a2bd6..e8de477 100644 --- a/zluda/src/impl/context.rs +++ b/zluda/src/impl/context.rs @@ -1,5 +1,5 @@ use super::{device, stream::Stream, stream::StreamData, HasLivenessCookie, LiveCheck}; -use super::{CUresult, GlobalState}; +use super::{transmute_lifetime_mut, CUresult, GlobalState}; use crate::{cuda::CUcontext, cuda_impl}; use l0::sys::ze_result_t; use std::{cell::RefCell, num::NonZeroU32, os::raw::c_uint, ptr, sync::atomic::AtomicU32}; @@ -98,14 +98,11 @@ pub struct ContextData { impl ContextData { pub fn new( - l0_ctx: &'static l0::Context, - l0_dev: l0::Device, flags: c_uint, is_primary: bool, - host_event: (l0::Event<'static>, u64), dev: *mut device::Device, ) -> Result<Self, CUresult> { - let default_stream = StreamData::new_unitialized(l0_ctx, l0_dev, host_event)?; + let default_stream = StreamData::new_unitialized()?; Ok(ContextData { flags: AtomicU32::new(flags), device: dev, @@ -121,8 +118,15 @@ impl ContextData { impl Context { pub fn late_init(&mut self) { - let ctx_data = self.as_option_mut().unwrap(); - ctx_data.default_stream.context = ctx_data as *mut _; + let ctx_data: &'static mut _ = { + let this = self.as_option_mut().unwrap(); + let result = { unsafe { transmute_lifetime_mut(this) } }; + drop(this); + result + }; + { self.as_option_mut().unwrap() } + .default_stream + .late_init(ctx_data); } } @@ -137,11 +141,8 @@ pub fn create_v2( let mut ctx_box = GlobalState::lock_device(dev_idx, |dev| { let dev_ptr = dev as *mut _; let mut ctx_box = Box::new(LiveCheck::new(ContextData::new( - &dev.ocl_context, - dev.base, flags, false, - dev.host_event_pool.get(dev.base, &dev.ocl_context)?, dev_ptr as *mut _, )?)); ctx_box.late_init(); diff --git a/zluda/src/impl/device.rs b/zluda/src/impl/device.rs index c356bda..3b43c49 100644 --- a/zluda/src/impl/device.rs +++ b/zluda/src/impl/device.rs @@ -1,9 +1,11 @@ use super::{context, transmute_lifetime, transmute_lifetime_mut, CUresult, GlobalState}; use crate::cuda; use cuda::{CUdevice_attribute, CUuuid_st}; -use ocl_core::DeviceType; +use ocl_core::{ClDeviceIdPtr, ContextProperties, DeviceType}; use std::{ - cmp, mem, + cmp, + ffi::c_void, + mem, os::raw::{c_char, c_int, c_uint}, ptr, sync::atomic::{AtomicU32, Ordering}, @@ -22,6 +24,7 @@ pub struct Device { pub ocl_base: ocl_core::DeviceId, pub default_queue: ocl_core::CommandQueue, pub ocl_context: ocl_core::Context, + pub(crate) ocl_ext: OpenCLExtensions, pub primary_context: context::Context, properties: Option<Box<l0::sys::ze_device_properties_t>>, image_properties: Option<Box<l0::sys::ze_device_image_properties_t>>, @@ -29,19 +32,185 @@ pub struct Device { compute_properties: Option<Box<l0::sys::ze_device_compute_properties_t>>, } +type cl_mem_properties_intel = ocl_core::ffi::cl_bitfield; + +pub(crate) struct OpenCLExtensions { + pub clDeviceMemAllocINTEL: unsafe extern "system" fn( + ocl_core::ffi::cl_context, + ocl_core::ffi::cl_device_id, + *const cl_mem_properties_intel, + usize, + ocl_core::ffi::cl_uint, + *mut ocl_core::ffi::cl_int, + ) -> *mut c_void, + pub clEnqueueMemcpyINTEL: unsafe extern "system" fn( + ocl_core::ffi::cl_command_queue, + ocl_core::ffi::cl_bool, + *mut c_void, + *const c_void, + usize, + ocl_core::ffi::cl_uint, + *const ocl_core::ffi::cl_event, + *mut ocl_core::ffi::cl_event, + ) -> ocl_core::ffi::cl_int, + pub clMemBlockingFreeINTEL: + unsafe extern "system" fn(ocl_core::ffi::cl_context, *mut c_void) -> ocl_core::ffi::cl_int, + pub clEnqueueMemFillINTEL: unsafe extern "system" fn( + ocl_core::ffi::cl_command_queue, + *mut c_void, + *const c_void, + usize, + usize, + ocl_core::ffi::cl_uint, + *const ocl_core::ffi::cl_event, + *mut ocl_core::ffi::cl_event, + ) -> ocl_core::ffi::cl_int, +} + +impl OpenCLExtensions { + fn new(plat: &ocl_core::PlatformId) -> Result<Self, CUresult> { + let clDeviceMemAllocINTEL = unsafe { + ocl_core::get_extension_function_address_for_platform( + plat, + "clDeviceMemAllocINTEL", + None, + )? + }; + let clEnqueueMemcpyINTEL = unsafe { + ocl_core::get_extension_function_address_for_platform( + plat, + "clEnqueueMemcpyINTEL", + None, + )? + }; + let clMemBlockingFreeINTEL = unsafe { + ocl_core::get_extension_function_address_for_platform( + plat, + "clMemBlockingFreeINTEL", + None, + )? + }; + let clEnqueueMemFillINTEL = unsafe { + ocl_core::get_extension_function_address_for_platform( + plat, + "clEnqueueMemFillINTEL", + None, + )? + }; + Ok(Self { + clDeviceMemAllocINTEL: unsafe { mem::transmute(clDeviceMemAllocINTEL) }, + clEnqueueMemcpyINTEL: unsafe { mem::transmute(clEnqueueMemcpyINTEL) }, + clMemBlockingFreeINTEL: unsafe { mem::transmute(clMemBlockingFreeINTEL) }, + clEnqueueMemFillINTEL: unsafe { mem::transmute(clEnqueueMemFillINTEL) }, + }) + } + + pub unsafe fn device_mem_alloc( + &self, + ctx: &ocl_core::Context, + device: &ocl_core::DeviceId, + size: usize, + alignment: ocl_core::ffi::cl_uint, + ) -> Result<*mut c_void, CUresult> { + let mut error = 0; + let result = (self.clDeviceMemAllocINTEL)( + ctx.as_ptr(), + device.as_ptr(), + ptr::null(), + size, + alignment, + &mut error, + ); + if error == 0 { + Ok(result) + } else { + Err(CUresult::CUDA_ERROR_UNKNOWN) + } + } + + pub unsafe fn enqueue_memcpy( + &self, + queue: &ocl_core::CommandQueue, + blocking: bool, + dst: *mut c_void, + src: *const c_void, + size: usize, + ) -> Result<(), CUresult> { + let error = (self.clEnqueueMemcpyINTEL)( + queue.as_ptr(), + if blocking { 1 } else { 0 }, + dst, + src, + size, + 0, + ptr::null(), + ptr::null_mut(), + ); + if error == 0 { + Ok(()) + } else { + Err(CUresult::CUDA_ERROR_UNKNOWN) + } + } + + pub unsafe fn mem_blocking_free( + &self, + ctx: &ocl_core::Context, + mem_ptr: *mut c_void, + ) -> Result<(), CUresult> { + let error = (self.clMemBlockingFreeINTEL)(ctx.as_ptr(), mem_ptr); + if error == 0 { + Ok(()) + } else { + Err(CUresult::CUDA_ERROR_UNKNOWN) + } + } + + pub unsafe fn enqueue_memfill( + &self, + queue: &ocl_core::CommandQueue, + dst: *mut c_void, + pattern: *const c_void, + patternSize: usize, + size: usize, + ) -> Result<ocl_core::Event, CUresult> { + let mut signal: ocl_core::ffi::cl_event = ptr::null_mut(); + let error = (self.clEnqueueMemFillINTEL)( + queue.as_ptr(), + dst, + pattern, + patternSize, + size, + 0, + ptr::null(), + &mut signal, + ); + if error == 0 { + Ok(ocl_core::Event::from_raw(signal)) + } else { + Err(CUresult::CUDA_ERROR_UNKNOWN) + } + } +} + unsafe impl Send for Device {} impl Device { pub fn new( - drv: &l0::Driver, l0_dev: l0::Device, + platform: ocl_core::PlatformId, ocl_dev: ocl_core::DeviceId, idx: usize, ) -> Result<Self, CUresult> { - let ctx = ocl_core::create_context(None, &[ocl_dev], None, None)?; + let ocl_ext = OpenCLExtensions::new(&platform)?; + let mut props = ocl_core::ContextProperties::new(); + props.set_platform(platform); + let ctx = ocl_core::create_context(Some(&props), &[ocl_dev], None, None)?; let queue = ocl_core::create_command_queue(&ctx, ocl_dev, None)?; - let primary_context = context::Context::new(context::ContextData::new()); + let primary_context = + context::Context::new(context::ContextData::new(0, true, ptr::null_mut())?); Ok(Self { + ocl_ext, index: Index(idx as c_int), base: l0_dev, ocl_base: ocl_dev, @@ -55,6 +224,10 @@ impl Device { }) } + pub fn late_init(&mut self) { + self.primary_context.as_option_mut().unwrap().device = self as *mut _; + } + fn get_properties<'a>(&'a mut self) -> l0::Result<&'a l0::sys::ze_device_properties_t> { if let Some(ref prop) = self.properties { return Ok(prop); @@ -207,7 +380,7 @@ pub fn get_attribute( & l0::sys::ze_device_property_flags_t::ZE_DEVICE_PROPERTY_FLAG_INTEGRATED) == l0::sys::ze_device_property_flags_t::ZE_DEVICE_PROPERTY_FLAG_INTEGRATED { - Ok(1) + Ok::<_, CUresult>(1) } else { Ok(0) } diff --git a/zluda/src/impl/function.rs b/zluda/src/impl/function.rs index 2658d27..05f864b 100644 --- a/zluda/src/impl/function.rs +++ b/zluda/src/impl/function.rs @@ -27,7 +27,7 @@ impl HasLivenessCookie for FunctionData { pub struct FunctionData { pub base: ocl_core::Kernel, - pub arg_size: Vec<usize>, + pub arg_size: Vec<(usize, bool)>, pub use_shared_mem: bool, pub legacy_args: LegacyArguments, } @@ -73,14 +73,28 @@ pub fn launch_kernel( GlobalState::lock_enqueue(hstream, |queue| { let func: &mut FunctionData = unsafe { &mut *f }.as_result_mut()?; if kernel_params != ptr::null_mut() { - for (i, arg_size) in func.arg_size.iter().enumerate() { - unsafe { - ocl_core::set_kernel_arg( - &func.base, - i as u32, - ocl_core::ArgVal::from_raw(*arg_size, *kernel_params.add(i), false), - )?; - }; + for (i, &(arg_size, is_mem)) in func.arg_size.iter().enumerate() { + if is_mem { + let error = 0; + unsafe { + ocl_core::ffi::clSetKernelArgSVMPointer( + func.base.as_ptr(), + i as u32, + *(*kernel_params.add(i) as *const _), + ) + }; + if error != 0 { + panic!("clSetKernelArgSVMPointer"); + } + } else { + unsafe { + ocl_core::set_kernel_arg( + &func.base, + i as u32, + ocl_core::ArgVal::from_raw(arg_size, *kernel_params.add(i), is_mem), + )?; + }; + } } } else { let mut offset = 0; @@ -102,27 +116,27 @@ pub fn launch_kernel( match (buffer_size, buffer_ptr) { (Some(buffer_size), Some(buffer_ptr)) => { let sum_of_kernel_argument_sizes = - func.arg_size.iter().fold(0, |offset, size_of_arg| { - size_of_arg + round_up_to_multiple(offset, *size_of_arg) + func.arg_size.iter().fold(0, |offset, &(size_of_arg, _)| { + size_of_arg + round_up_to_multiple(offset, size_of_arg) }); if buffer_size < sum_of_kernel_argument_sizes { return Err(CUresult::CUDA_ERROR_INVALID_VALUE); } let mut offset = 0; - for (i, arg_size) in func.arg_size.iter().enumerate() { - let buffer_offset = round_up_to_multiple(offset, *arg_size); + for (i, &(arg_size, is_mem)) in func.arg_size.iter().enumerate() { + let buffer_offset = round_up_to_multiple(offset, arg_size); unsafe { ocl_core::set_kernel_arg( &func.base, i as u32, ocl_core::ArgVal::from_raw( - *arg_size, + arg_size, buffer_ptr.add(buffer_offset) as *const _, - false, + is_mem, ), )?; }; - offset = buffer_offset + *arg_size; + offset = buffer_offset + arg_size; } } _ => return Err(CUresult::CUDA_ERROR_INVALID_VALUE), diff --git a/zluda/src/impl/memory.rs b/zluda/src/impl/memory.rs index 5919690..3e96a8c 100644 --- a/zluda/src/impl/memory.rs +++ b/zluda/src/impl/memory.rs @@ -1,60 +1,77 @@ use super::{stream, CUresult, GlobalState};
-use std::{ffi::c_void, mem};
+use std::{
+ ffi::c_void,
+ mem::{self, size_of},
+};
pub fn alloc_v2(dptr: *mut *mut c_void, bytesize: usize) -> Result<(), CUresult> {
let ptr = GlobalState::lock_current_context(|ctx| {
let dev = unsafe { &mut *ctx.device };
- Ok::<_, CUresult>(dev.ocl_context.mem_alloc_device(bytesize, 0, dev.base)?)
+ Ok::<_, CUresult>(unsafe {
+ dev.ocl_ext
+ .device_mem_alloc(&dev.ocl_context, &dev.ocl_base, bytesize, 0)?
+ })
})??;
unsafe { *dptr = ptr };
Ok(())
}
pub fn copy_v2(dst: *mut c_void, src: *const c_void, bytesize: usize) -> Result<(), CUresult> {
- GlobalState::lock_enqueue(stream::CU_STREAM_LEGACY, |cmd_list, signal, wait| {
- unsafe { cmd_list.append_memory_copy_raw(dst, src, bytesize, Some(signal), wait)? };
+ GlobalState::lock_stream(stream::CU_STREAM_LEGACY, |stream_data| {
+ let dev = unsafe { &*(*stream_data.context).device };
+ let queue = stream_data.cmd_list.as_ref().unwrap();
+ unsafe {
+ dev.ocl_ext
+ .enqueue_memcpy(queue, true, dst, src, bytesize)?
+ };
Ok(())
- })
+ })?
}
pub fn free_v2(ptr: *mut c_void) -> Result<(), CUresult> {
GlobalState::lock_current_context(|ctx| {
let dev = unsafe { &mut *ctx.device };
- Ok::<_, CUresult>(dev.ocl_context.mem_free(ptr)?)
- })
- .map_err(|_| CUresult::CUDA_ERROR_INVALID_VALUE)?
+ unsafe { dev.ocl_ext.mem_blocking_free(&dev.ocl_context, ptr)? };
+ Ok(())
+ })?
}
pub(crate) fn set_d32_v2(dst: *mut c_void, mut ui: u32, n: usize) -> Result<(), CUresult> {
- GlobalState::lock_enqueue(stream::CU_STREAM_LEGACY, |cmd_list, signal, wait| {
- unsafe {
- cmd_list.append_memory_fill_raw(
+ GlobalState::lock_stream(stream::CU_STREAM_LEGACY, move |stream_data| {
+ let dev = unsafe { &*(*stream_data.context).device };
+ let queue = stream_data.cmd_list.as_ref().unwrap();
+ let pattern_size = mem::size_of_val(&ui);
+ let event = unsafe {
+ dev.ocl_ext.enqueue_memfill(
+ queue,
dst,
- &mut ui as *mut _ as *mut _,
- mem::size_of::<u32>(),
- mem::size_of::<u32>() * n,
- Some(signal),
- wait,
- )
- }?;
+ &ui as *const _ as *const _,
+ pattern_size,
+ pattern_size * n,
+ )?
+ };
+ ocl_core::wait_for_event(&event)?;
Ok(())
- })
+ })?
}
pub(crate) fn set_d8_v2(dst: *mut c_void, mut uc: u8, n: usize) -> Result<(), CUresult> {
- GlobalState::lock_enqueue(stream::CU_STREAM_LEGACY, |cmd_list, signal, wait| {
- unsafe {
- cmd_list.append_memory_fill_raw(
+ GlobalState::lock_stream(stream::CU_STREAM_LEGACY, move |stream_data| {
+ let dev = unsafe { &*(*stream_data.context).device };
+ let queue = stream_data.cmd_list.as_ref().unwrap();
+ let pattern_size = mem::size_of_val(&uc);
+ let event = unsafe {
+ dev.ocl_ext.enqueue_memfill(
+ queue,
dst,
- &mut uc as *mut _ as *mut _,
- mem::size_of::<u8>(),
- mem::size_of::<u8>() * n,
- Some(signal),
- wait,
- )
- }?;
+ &uc as *const _ as *const _,
+ pattern_size,
+ pattern_size * n,
+ )?
+ };
+ ocl_core::wait_for_event(&event)?;
Ok(())
- })
+ })?
}
#[cfg(test)]
diff --git a/zluda/src/impl/mod.rs b/zluda/src/impl/mod.rs index d410554..4b7a761 100644 --- a/zluda/src/impl/mod.rs +++ b/zluda/src/impl/mod.rs @@ -290,15 +290,7 @@ impl GlobalState { let l0_dev = unsafe { (*(*stream_data.context).device).base }; let l0_ctx = unsafe { &mut (*(*stream_data.context).device).ocl_context }; let cmd_list = unsafe { transmute_lifetime(&stream_data.cmd_list) }; - // TODO: make new_marker drop-safe - let (new_event, new_marker) = stream_data.get_event(l0_dev, l0_ctx)?; - stream_data.try_reuse_finished_events()?; - let prev_event = stream_data.get_last_event(); - let prev_event_array = prev_event.map(|e| [e]); - let empty = []; - let prev_event_slice = prev_event_array.as_ref().map_or(&empty[..], |arr| &arr[..]); - f(cmd_list, &new_event, prev_event_slice)?; - stream_data.push_event((new_event, new_marker)); + f(&stream_data.cmd_list.as_ref().unwrap())?; Ok(()) })? } @@ -350,15 +342,19 @@ pub fn init() -> Result<(), CUresult> { }) .ok_or(CUresult::CUDA_ERROR_UNKNOWN)?; let drivers = l0::Driver::get()?; - let devices = match drivers.into_iter().find(is_intel_gpu_driver) { + let mut devices = match drivers.into_iter().find(is_intel_gpu_driver) { None => return Err(CUresult::CUDA_ERROR_UNKNOWN), Some(driver) => driver .devices()? .into_iter() .enumerate() - .map(|(idx, l0_dev)| device::Device::new(&driver, l0_dev, device, idx).unwrap()) + .map(|(idx, l0_dev)| device::Device::new(l0_dev, platform, device, idx).unwrap()) .collect::<Vec<_>>(), }; + for d in devices.iter_mut() { + d.late_init(); + d.primary_context.late_init(); + } let global_heap = unsafe { os::heap_create() }; if global_heap == ptr::null_mut() { return Err(CUresult::CUDA_ERROR_OUT_OF_MEMORY); diff --git a/zluda/src/impl/module.rs b/zluda/src/impl/module.rs index 88d85c4..a1fa9dd 100644 --- a/zluda/src/impl/module.rs +++ b/zluda/src/impl/module.rs @@ -100,32 +100,36 @@ impl SpirvModule { ) }; let main_module = ocl_core::create_program_with_il(ctx, byte_il, None)?; - match self.should_link_ptx_impl { + let main_module = match self.should_link_ptx_impl { None => { - ocl_core::compile_program( + ocl_core::build_program( &main_module, Some(&[dev]), &self.build_options, - &[], - &[], - None, None, None, )?; + main_module } Some(ptx_impl) => { let ptx_impl_prog = ocl_core::create_program_with_il(ctx, ptx_impl, None)?; - ocl_core::build_program( + ocl_core::compile_program( &main_module, Some(&[dev]), &self.build_options, + &[], + &[], + None, None, None, )?; - ocl_core::build_program( + ocl_core::compile_program( &ptx_impl_prog, Some(&[dev]), &self.build_options, + &[], + &[], + None, None, None, )?; @@ -137,7 +141,7 @@ impl SpirvModule { None, None, None, - )?; + )? } }; Ok(main_module) diff --git a/zluda/src/impl/ocl_ext.rs b/zluda/src/impl/ocl_ext.rs new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/zluda/src/impl/ocl_ext.rs diff --git a/zluda/src/impl/stream.rs b/zluda/src/impl/stream.rs index f3910d6..0231cd8 100644 --- a/zluda/src/impl/stream.rs +++ b/zluda/src/impl/stream.rs @@ -34,118 +34,45 @@ impl HasLivenessCookie for StreamData { pub struct StreamData { pub context: *mut ContextData, // Immediate CommandList - pub cmd_list: l0::CommandList<'static>, - pub busy_events: VecDeque<(l0::Event<'static>, u64)>, - // This could be a Vec, but I'd rather reuse earliest enqueued event not the one recently enqueued - pub free_events: VecDeque<(l0::Event<'static>, u64)>, - pub synchronization_event: (l0::Event<'static>, u64), + pub cmd_list: Option<ocl_core::CommandQueue>, } impl StreamData { - pub fn new_unitialized( - ctx: &'static l0::Context, - device: l0::Device, - host_event: (l0::Event<'static>, u64), - ) -> Result<Self, CUresult> { + pub fn new_unitialized() -> Result<Self, CUresult> { Ok(StreamData { context: ptr::null_mut(), - cmd_list: l0::CommandList::new_immediate(ctx, device)?, - busy_events: VecDeque::new(), - free_events: VecDeque::new(), - synchronization_event: host_event, + cmd_list: None, }) } + pub fn new(ctx: &mut ContextData) -> Result<Self, CUresult> { - let l0_ctx = &mut unsafe { &mut *ctx.device }.ocl_context; - let device = unsafe { &*ctx.device }.base; - let synchronization_event = unsafe { &mut *ctx.device } - .host_event_pool - .get(device, l0_ctx)?; + let ocl_ctx = &unsafe { &*ctx.device }.ocl_context; + let device = unsafe { &*ctx.device }.ocl_base; Ok(StreamData { context: ctx as *mut _, - cmd_list: l0::CommandList::new_immediate(l0_ctx, device)?, - busy_events: VecDeque::new(), - free_events: VecDeque::new(), - synchronization_event, + cmd_list: Some(ocl_core::create_command_queue::< + &ocl_core::Context, + ocl_core::DeviceId, + >(ocl_ctx, device, None)?), }) } - pub fn try_reuse_finished_events(&mut self) -> l0::Result<()> { - loop { - match self.busy_events.get(0) { - None => return Ok(()), - Some((ev, _)) => { - if ev.is_ready()? { - let (ev, marker) = self.busy_events.pop_front().unwrap(); - ev.host_reset()?; - self.free_events.push_back((ev, marker)); - } else { - return Ok(()); - } - } - } - } - } - - pub fn reuse_all_finished_events(&mut self) -> l0::Result<()> { - self.free_events.reserve(self.busy_events.len()); - for (ev, marker) in self.busy_events.drain(..) { - ev.host_reset()?; - self.free_events.push_back((ev, marker)); - } - Ok(()) - } - - pub fn get_last_event(&self) -> Option<&l0::Event<'static>> { - self.busy_events.iter().next_back().map(|(ev, _)| ev) - } - - pub fn push_event(&mut self, ev: (l0::Event<'static>, u64)) { - self.busy_events.push_back(ev); + pub fn late_init(&mut self, ctx: &mut ContextData) { + let ocl_ctx = &unsafe { &*ctx.device }.ocl_context; + let device = unsafe { &*ctx.device }.ocl_base; + self.context = ctx as *mut _; + self.cmd_list = Some( + ocl_core::create_command_queue::<&ocl_core::Context, ocl_core::DeviceId>( + ocl_ctx, device, None, + ) + .unwrap(), + ); } - pub fn synchronize(&mut self) -> l0::Result<()> { - let empty = []; - let busy_event_arr = self.busy_events.back().map(|(ev, _)| [ev]); - let wait_events = busy_event_arr.as_ref().map_or(&empty[..], |arr| &arr[..]); - unsafe { - self.cmd_list - .append_barrier(Some(&self.synchronization_event.0), wait_events)? - }; - self.synchronization_event - .0 - .host_synchronize(u64::max_value())?; - self.synchronization_event.0.host_reset()?; - self.reuse_all_finished_events()?; + pub fn synchronize(&mut self) -> Result<(), CUresult> { + ocl_core::finish(self.cmd_list.as_ref().unwrap())?; Ok(()) } - - pub fn get_event( - &mut self, - l0_dev: l0::Device, - l0_ctx: &'static l0::Context, - ) -> l0::Result<(l0::Event<'static>, u64)> { - self.free_events - .pop_front() - .map(|x| Ok(x)) - .unwrap_or_else(|| { - let event_pool = unsafe { &mut (*(*self.context).device).device_event_pool }; - event_pool.get(l0_dev, l0_ctx) - }) - } -} - -impl Drop for StreamData { - fn drop(&mut self) { - if self.context == ptr::null_mut() { - return; - } - for (_, marker) in self.busy_events.iter().chain(self.free_events.iter()) { - let device_event_pool = unsafe { &mut (*(*self.context).device).device_event_pool }; - device_event_pool.mark_as_free(*marker); - } - unsafe { (&mut *self.context).streams.remove(&(&mut *self as *mut _)) }; - } } pub(crate) fn get_ctx(hstream: *mut Stream, pctx: *mut *mut Context) -> Result<(), CUresult> { |