aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--ptx/src/translate.rs9
-rw-r--r--zluda/src/impl/context.rs21
-rw-r--r--zluda/src/impl/device.rs185
-rw-r--r--zluda/src/impl/function.rs46
-rw-r--r--zluda/src/impl/memory.rs77
-rw-r--r--zluda/src/impl/mod.rs18
-rw-r--r--zluda/src/impl/module.rs20
-rw-r--r--zluda/src/impl/ocl_ext.rs0
-rw-r--r--zluda/src/impl/stream.rs117
9 files changed, 315 insertions, 178 deletions
diff --git a/ptx/src/translate.rs b/ptx/src/translate.rs
index 2885f52..c236438 100644
--- a/ptx/src/translate.rs
+++ b/ptx/src/translate.rs
@@ -415,7 +415,7 @@ impl Module {
}
pub struct KernelInfo {
- pub arguments_sizes: Vec<usize>,
+ pub arguments_sizes: Vec<(usize, bool)>,
pub uses_shared_mem: bool,
}
@@ -1024,7 +1024,12 @@ fn emit_function_header<'a>(
let args_lens = func_decl
.input_arguments
.iter()
- .map(|param| param.v_type.size_of())
+ .map(|param| {
+ (
+ param.v_type.size_of(),
+ matches!(param.v_type, ast::Type::Pointer(..)),
+ )
+ })
.collect();
kernel_info.insert(
name.to_string(),
diff --git a/zluda/src/impl/context.rs b/zluda/src/impl/context.rs
index 18a2bd6..e8de477 100644
--- a/zluda/src/impl/context.rs
+++ b/zluda/src/impl/context.rs
@@ -1,5 +1,5 @@
use super::{device, stream::Stream, stream::StreamData, HasLivenessCookie, LiveCheck};
-use super::{CUresult, GlobalState};
+use super::{transmute_lifetime_mut, CUresult, GlobalState};
use crate::{cuda::CUcontext, cuda_impl};
use l0::sys::ze_result_t;
use std::{cell::RefCell, num::NonZeroU32, os::raw::c_uint, ptr, sync::atomic::AtomicU32};
@@ -98,14 +98,11 @@ pub struct ContextData {
impl ContextData {
pub fn new(
- l0_ctx: &'static l0::Context,
- l0_dev: l0::Device,
flags: c_uint,
is_primary: bool,
- host_event: (l0::Event<'static>, u64),
dev: *mut device::Device,
) -> Result<Self, CUresult> {
- let default_stream = StreamData::new_unitialized(l0_ctx, l0_dev, host_event)?;
+ let default_stream = StreamData::new_unitialized()?;
Ok(ContextData {
flags: AtomicU32::new(flags),
device: dev,
@@ -121,8 +118,15 @@ impl ContextData {
impl Context {
pub fn late_init(&mut self) {
- let ctx_data = self.as_option_mut().unwrap();
- ctx_data.default_stream.context = ctx_data as *mut _;
+ let ctx_data: &'static mut _ = {
+ let this = self.as_option_mut().unwrap();
+ let result = { unsafe { transmute_lifetime_mut(this) } };
+ drop(this);
+ result
+ };
+ { self.as_option_mut().unwrap() }
+ .default_stream
+ .late_init(ctx_data);
}
}
@@ -137,11 +141,8 @@ pub fn create_v2(
let mut ctx_box = GlobalState::lock_device(dev_idx, |dev| {
let dev_ptr = dev as *mut _;
let mut ctx_box = Box::new(LiveCheck::new(ContextData::new(
- &dev.ocl_context,
- dev.base,
flags,
false,
- dev.host_event_pool.get(dev.base, &dev.ocl_context)?,
dev_ptr as *mut _,
)?));
ctx_box.late_init();
diff --git a/zluda/src/impl/device.rs b/zluda/src/impl/device.rs
index c356bda..3b43c49 100644
--- a/zluda/src/impl/device.rs
+++ b/zluda/src/impl/device.rs
@@ -1,9 +1,11 @@
use super::{context, transmute_lifetime, transmute_lifetime_mut, CUresult, GlobalState};
use crate::cuda;
use cuda::{CUdevice_attribute, CUuuid_st};
-use ocl_core::DeviceType;
+use ocl_core::{ClDeviceIdPtr, ContextProperties, DeviceType};
use std::{
- cmp, mem,
+ cmp,
+ ffi::c_void,
+ mem,
os::raw::{c_char, c_int, c_uint},
ptr,
sync::atomic::{AtomicU32, Ordering},
@@ -22,6 +24,7 @@ pub struct Device {
pub ocl_base: ocl_core::DeviceId,
pub default_queue: ocl_core::CommandQueue,
pub ocl_context: ocl_core::Context,
+ pub(crate) ocl_ext: OpenCLExtensions,
pub primary_context: context::Context,
properties: Option<Box<l0::sys::ze_device_properties_t>>,
image_properties: Option<Box<l0::sys::ze_device_image_properties_t>>,
@@ -29,19 +32,185 @@ pub struct Device {
compute_properties: Option<Box<l0::sys::ze_device_compute_properties_t>>,
}
+type cl_mem_properties_intel = ocl_core::ffi::cl_bitfield;
+
+pub(crate) struct OpenCLExtensions {
+ pub clDeviceMemAllocINTEL: unsafe extern "system" fn(
+ ocl_core::ffi::cl_context,
+ ocl_core::ffi::cl_device_id,
+ *const cl_mem_properties_intel,
+ usize,
+ ocl_core::ffi::cl_uint,
+ *mut ocl_core::ffi::cl_int,
+ ) -> *mut c_void,
+ pub clEnqueueMemcpyINTEL: unsafe extern "system" fn(
+ ocl_core::ffi::cl_command_queue,
+ ocl_core::ffi::cl_bool,
+ *mut c_void,
+ *const c_void,
+ usize,
+ ocl_core::ffi::cl_uint,
+ *const ocl_core::ffi::cl_event,
+ *mut ocl_core::ffi::cl_event,
+ ) -> ocl_core::ffi::cl_int,
+ pub clMemBlockingFreeINTEL:
+ unsafe extern "system" fn(ocl_core::ffi::cl_context, *mut c_void) -> ocl_core::ffi::cl_int,
+ pub clEnqueueMemFillINTEL: unsafe extern "system" fn(
+ ocl_core::ffi::cl_command_queue,
+ *mut c_void,
+ *const c_void,
+ usize,
+ usize,
+ ocl_core::ffi::cl_uint,
+ *const ocl_core::ffi::cl_event,
+ *mut ocl_core::ffi::cl_event,
+ ) -> ocl_core::ffi::cl_int,
+}
+
+impl OpenCLExtensions {
+ fn new(plat: &ocl_core::PlatformId) -> Result<Self, CUresult> {
+ let clDeviceMemAllocINTEL = unsafe {
+ ocl_core::get_extension_function_address_for_platform(
+ plat,
+ "clDeviceMemAllocINTEL",
+ None,
+ )?
+ };
+ let clEnqueueMemcpyINTEL = unsafe {
+ ocl_core::get_extension_function_address_for_platform(
+ plat,
+ "clEnqueueMemcpyINTEL",
+ None,
+ )?
+ };
+ let clMemBlockingFreeINTEL = unsafe {
+ ocl_core::get_extension_function_address_for_platform(
+ plat,
+ "clMemBlockingFreeINTEL",
+ None,
+ )?
+ };
+ let clEnqueueMemFillINTEL = unsafe {
+ ocl_core::get_extension_function_address_for_platform(
+ plat,
+ "clEnqueueMemFillINTEL",
+ None,
+ )?
+ };
+ Ok(Self {
+ clDeviceMemAllocINTEL: unsafe { mem::transmute(clDeviceMemAllocINTEL) },
+ clEnqueueMemcpyINTEL: unsafe { mem::transmute(clEnqueueMemcpyINTEL) },
+ clMemBlockingFreeINTEL: unsafe { mem::transmute(clMemBlockingFreeINTEL) },
+ clEnqueueMemFillINTEL: unsafe { mem::transmute(clEnqueueMemFillINTEL) },
+ })
+ }
+
+ pub unsafe fn device_mem_alloc(
+ &self,
+ ctx: &ocl_core::Context,
+ device: &ocl_core::DeviceId,
+ size: usize,
+ alignment: ocl_core::ffi::cl_uint,
+ ) -> Result<*mut c_void, CUresult> {
+ let mut error = 0;
+ let result = (self.clDeviceMemAllocINTEL)(
+ ctx.as_ptr(),
+ device.as_ptr(),
+ ptr::null(),
+ size,
+ alignment,
+ &mut error,
+ );
+ if error == 0 {
+ Ok(result)
+ } else {
+ Err(CUresult::CUDA_ERROR_UNKNOWN)
+ }
+ }
+
+ pub unsafe fn enqueue_memcpy(
+ &self,
+ queue: &ocl_core::CommandQueue,
+ blocking: bool,
+ dst: *mut c_void,
+ src: *const c_void,
+ size: usize,
+ ) -> Result<(), CUresult> {
+ let error = (self.clEnqueueMemcpyINTEL)(
+ queue.as_ptr(),
+ if blocking { 1 } else { 0 },
+ dst,
+ src,
+ size,
+ 0,
+ ptr::null(),
+ ptr::null_mut(),
+ );
+ if error == 0 {
+ Ok(())
+ } else {
+ Err(CUresult::CUDA_ERROR_UNKNOWN)
+ }
+ }
+
+ pub unsafe fn mem_blocking_free(
+ &self,
+ ctx: &ocl_core::Context,
+ mem_ptr: *mut c_void,
+ ) -> Result<(), CUresult> {
+ let error = (self.clMemBlockingFreeINTEL)(ctx.as_ptr(), mem_ptr);
+ if error == 0 {
+ Ok(())
+ } else {
+ Err(CUresult::CUDA_ERROR_UNKNOWN)
+ }
+ }
+
+ pub unsafe fn enqueue_memfill(
+ &self,
+ queue: &ocl_core::CommandQueue,
+ dst: *mut c_void,
+ pattern: *const c_void,
+ patternSize: usize,
+ size: usize,
+ ) -> Result<ocl_core::Event, CUresult> {
+ let mut signal: ocl_core::ffi::cl_event = ptr::null_mut();
+ let error = (self.clEnqueueMemFillINTEL)(
+ queue.as_ptr(),
+ dst,
+ pattern,
+ patternSize,
+ size,
+ 0,
+ ptr::null(),
+ &mut signal,
+ );
+ if error == 0 {
+ Ok(ocl_core::Event::from_raw(signal))
+ } else {
+ Err(CUresult::CUDA_ERROR_UNKNOWN)
+ }
+ }
+}
+
unsafe impl Send for Device {}
impl Device {
pub fn new(
- drv: &l0::Driver,
l0_dev: l0::Device,
+ platform: ocl_core::PlatformId,
ocl_dev: ocl_core::DeviceId,
idx: usize,
) -> Result<Self, CUresult> {
- let ctx = ocl_core::create_context(None, &[ocl_dev], None, None)?;
+ let ocl_ext = OpenCLExtensions::new(&platform)?;
+ let mut props = ocl_core::ContextProperties::new();
+ props.set_platform(platform);
+ let ctx = ocl_core::create_context(Some(&props), &[ocl_dev], None, None)?;
let queue = ocl_core::create_command_queue(&ctx, ocl_dev, None)?;
- let primary_context = context::Context::new(context::ContextData::new());
+ let primary_context =
+ context::Context::new(context::ContextData::new(0, true, ptr::null_mut())?);
Ok(Self {
+ ocl_ext,
index: Index(idx as c_int),
base: l0_dev,
ocl_base: ocl_dev,
@@ -55,6 +224,10 @@ impl Device {
})
}
+ pub fn late_init(&mut self) {
+ self.primary_context.as_option_mut().unwrap().device = self as *mut _;
+ }
+
fn get_properties<'a>(&'a mut self) -> l0::Result<&'a l0::sys::ze_device_properties_t> {
if let Some(ref prop) = self.properties {
return Ok(prop);
@@ -207,7 +380,7 @@ pub fn get_attribute(
& l0::sys::ze_device_property_flags_t::ZE_DEVICE_PROPERTY_FLAG_INTEGRATED)
== l0::sys::ze_device_property_flags_t::ZE_DEVICE_PROPERTY_FLAG_INTEGRATED
{
- Ok(1)
+ Ok::<_, CUresult>(1)
} else {
Ok(0)
}
diff --git a/zluda/src/impl/function.rs b/zluda/src/impl/function.rs
index 2658d27..05f864b 100644
--- a/zluda/src/impl/function.rs
+++ b/zluda/src/impl/function.rs
@@ -27,7 +27,7 @@ impl HasLivenessCookie for FunctionData {
pub struct FunctionData {
pub base: ocl_core::Kernel,
- pub arg_size: Vec<usize>,
+ pub arg_size: Vec<(usize, bool)>,
pub use_shared_mem: bool,
pub legacy_args: LegacyArguments,
}
@@ -73,14 +73,28 @@ pub fn launch_kernel(
GlobalState::lock_enqueue(hstream, |queue| {
let func: &mut FunctionData = unsafe { &mut *f }.as_result_mut()?;
if kernel_params != ptr::null_mut() {
- for (i, arg_size) in func.arg_size.iter().enumerate() {
- unsafe {
- ocl_core::set_kernel_arg(
- &func.base,
- i as u32,
- ocl_core::ArgVal::from_raw(*arg_size, *kernel_params.add(i), false),
- )?;
- };
+ for (i, &(arg_size, is_mem)) in func.arg_size.iter().enumerate() {
+ if is_mem {
+ let error = 0;
+ unsafe {
+ ocl_core::ffi::clSetKernelArgSVMPointer(
+ func.base.as_ptr(),
+ i as u32,
+ *(*kernel_params.add(i) as *const _),
+ )
+ };
+ if error != 0 {
+ panic!("clSetKernelArgSVMPointer");
+ }
+ } else {
+ unsafe {
+ ocl_core::set_kernel_arg(
+ &func.base,
+ i as u32,
+ ocl_core::ArgVal::from_raw(arg_size, *kernel_params.add(i), is_mem),
+ )?;
+ };
+ }
}
} else {
let mut offset = 0;
@@ -102,27 +116,27 @@ pub fn launch_kernel(
match (buffer_size, buffer_ptr) {
(Some(buffer_size), Some(buffer_ptr)) => {
let sum_of_kernel_argument_sizes =
- func.arg_size.iter().fold(0, |offset, size_of_arg| {
- size_of_arg + round_up_to_multiple(offset, *size_of_arg)
+ func.arg_size.iter().fold(0, |offset, &(size_of_arg, _)| {
+ size_of_arg + round_up_to_multiple(offset, size_of_arg)
});
if buffer_size < sum_of_kernel_argument_sizes {
return Err(CUresult::CUDA_ERROR_INVALID_VALUE);
}
let mut offset = 0;
- for (i, arg_size) in func.arg_size.iter().enumerate() {
- let buffer_offset = round_up_to_multiple(offset, *arg_size);
+ for (i, &(arg_size, is_mem)) in func.arg_size.iter().enumerate() {
+ let buffer_offset = round_up_to_multiple(offset, arg_size);
unsafe {
ocl_core::set_kernel_arg(
&func.base,
i as u32,
ocl_core::ArgVal::from_raw(
- *arg_size,
+ arg_size,
buffer_ptr.add(buffer_offset) as *const _,
- false,
+ is_mem,
),
)?;
};
- offset = buffer_offset + *arg_size;
+ offset = buffer_offset + arg_size;
}
}
_ => return Err(CUresult::CUDA_ERROR_INVALID_VALUE),
diff --git a/zluda/src/impl/memory.rs b/zluda/src/impl/memory.rs
index 5919690..3e96a8c 100644
--- a/zluda/src/impl/memory.rs
+++ b/zluda/src/impl/memory.rs
@@ -1,60 +1,77 @@
use super::{stream, CUresult, GlobalState};
-use std::{ffi::c_void, mem};
+use std::{
+ ffi::c_void,
+ mem::{self, size_of},
+};
pub fn alloc_v2(dptr: *mut *mut c_void, bytesize: usize) -> Result<(), CUresult> {
let ptr = GlobalState::lock_current_context(|ctx| {
let dev = unsafe { &mut *ctx.device };
- Ok::<_, CUresult>(dev.ocl_context.mem_alloc_device(bytesize, 0, dev.base)?)
+ Ok::<_, CUresult>(unsafe {
+ dev.ocl_ext
+ .device_mem_alloc(&dev.ocl_context, &dev.ocl_base, bytesize, 0)?
+ })
})??;
unsafe { *dptr = ptr };
Ok(())
}
pub fn copy_v2(dst: *mut c_void, src: *const c_void, bytesize: usize) -> Result<(), CUresult> {
- GlobalState::lock_enqueue(stream::CU_STREAM_LEGACY, |cmd_list, signal, wait| {
- unsafe { cmd_list.append_memory_copy_raw(dst, src, bytesize, Some(signal), wait)? };
+ GlobalState::lock_stream(stream::CU_STREAM_LEGACY, |stream_data| {
+ let dev = unsafe { &*(*stream_data.context).device };
+ let queue = stream_data.cmd_list.as_ref().unwrap();
+ unsafe {
+ dev.ocl_ext
+ .enqueue_memcpy(queue, true, dst, src, bytesize)?
+ };
Ok(())
- })
+ })?
}
pub fn free_v2(ptr: *mut c_void) -> Result<(), CUresult> {
GlobalState::lock_current_context(|ctx| {
let dev = unsafe { &mut *ctx.device };
- Ok::<_, CUresult>(dev.ocl_context.mem_free(ptr)?)
- })
- .map_err(|_| CUresult::CUDA_ERROR_INVALID_VALUE)?
+ unsafe { dev.ocl_ext.mem_blocking_free(&dev.ocl_context, ptr)? };
+ Ok(())
+ })?
}
pub(crate) fn set_d32_v2(dst: *mut c_void, mut ui: u32, n: usize) -> Result<(), CUresult> {
- GlobalState::lock_enqueue(stream::CU_STREAM_LEGACY, |cmd_list, signal, wait| {
- unsafe {
- cmd_list.append_memory_fill_raw(
+ GlobalState::lock_stream(stream::CU_STREAM_LEGACY, move |stream_data| {
+ let dev = unsafe { &*(*stream_data.context).device };
+ let queue = stream_data.cmd_list.as_ref().unwrap();
+ let pattern_size = mem::size_of_val(&ui);
+ let event = unsafe {
+ dev.ocl_ext.enqueue_memfill(
+ queue,
dst,
- &mut ui as *mut _ as *mut _,
- mem::size_of::<u32>(),
- mem::size_of::<u32>() * n,
- Some(signal),
- wait,
- )
- }?;
+ &ui as *const _ as *const _,
+ pattern_size,
+ pattern_size * n,
+ )?
+ };
+ ocl_core::wait_for_event(&event)?;
Ok(())
- })
+ })?
}
pub(crate) fn set_d8_v2(dst: *mut c_void, mut uc: u8, n: usize) -> Result<(), CUresult> {
- GlobalState::lock_enqueue(stream::CU_STREAM_LEGACY, |cmd_list, signal, wait| {
- unsafe {
- cmd_list.append_memory_fill_raw(
+ GlobalState::lock_stream(stream::CU_STREAM_LEGACY, move |stream_data| {
+ let dev = unsafe { &*(*stream_data.context).device };
+ let queue = stream_data.cmd_list.as_ref().unwrap();
+ let pattern_size = mem::size_of_val(&uc);
+ let event = unsafe {
+ dev.ocl_ext.enqueue_memfill(
+ queue,
dst,
- &mut uc as *mut _ as *mut _,
- mem::size_of::<u8>(),
- mem::size_of::<u8>() * n,
- Some(signal),
- wait,
- )
- }?;
+ &uc as *const _ as *const _,
+ pattern_size,
+ pattern_size * n,
+ )?
+ };
+ ocl_core::wait_for_event(&event)?;
Ok(())
- })
+ })?
}
#[cfg(test)]
diff --git a/zluda/src/impl/mod.rs b/zluda/src/impl/mod.rs
index d410554..4b7a761 100644
--- a/zluda/src/impl/mod.rs
+++ b/zluda/src/impl/mod.rs
@@ -290,15 +290,7 @@ impl GlobalState {
let l0_dev = unsafe { (*(*stream_data.context).device).base };
let l0_ctx = unsafe { &mut (*(*stream_data.context).device).ocl_context };
let cmd_list = unsafe { transmute_lifetime(&stream_data.cmd_list) };
- // TODO: make new_marker drop-safe
- let (new_event, new_marker) = stream_data.get_event(l0_dev, l0_ctx)?;
- stream_data.try_reuse_finished_events()?;
- let prev_event = stream_data.get_last_event();
- let prev_event_array = prev_event.map(|e| [e]);
- let empty = [];
- let prev_event_slice = prev_event_array.as_ref().map_or(&empty[..], |arr| &arr[..]);
- f(cmd_list, &new_event, prev_event_slice)?;
- stream_data.push_event((new_event, new_marker));
+ f(&stream_data.cmd_list.as_ref().unwrap())?;
Ok(())
})?
}
@@ -350,15 +342,19 @@ pub fn init() -> Result<(), CUresult> {
})
.ok_or(CUresult::CUDA_ERROR_UNKNOWN)?;
let drivers = l0::Driver::get()?;
- let devices = match drivers.into_iter().find(is_intel_gpu_driver) {
+ let mut devices = match drivers.into_iter().find(is_intel_gpu_driver) {
None => return Err(CUresult::CUDA_ERROR_UNKNOWN),
Some(driver) => driver
.devices()?
.into_iter()
.enumerate()
- .map(|(idx, l0_dev)| device::Device::new(&driver, l0_dev, device, idx).unwrap())
+ .map(|(idx, l0_dev)| device::Device::new(l0_dev, platform, device, idx).unwrap())
.collect::<Vec<_>>(),
};
+ for d in devices.iter_mut() {
+ d.late_init();
+ d.primary_context.late_init();
+ }
let global_heap = unsafe { os::heap_create() };
if global_heap == ptr::null_mut() {
return Err(CUresult::CUDA_ERROR_OUT_OF_MEMORY);
diff --git a/zluda/src/impl/module.rs b/zluda/src/impl/module.rs
index 88d85c4..a1fa9dd 100644
--- a/zluda/src/impl/module.rs
+++ b/zluda/src/impl/module.rs
@@ -100,32 +100,36 @@ impl SpirvModule {
)
};
let main_module = ocl_core::create_program_with_il(ctx, byte_il, None)?;
- match self.should_link_ptx_impl {
+ let main_module = match self.should_link_ptx_impl {
None => {
- ocl_core::compile_program(
+ ocl_core::build_program(
&main_module,
Some(&[dev]),
&self.build_options,
- &[],
- &[],
- None,
None,
None,
)?;
+ main_module
}
Some(ptx_impl) => {
let ptx_impl_prog = ocl_core::create_program_with_il(ctx, ptx_impl, None)?;
- ocl_core::build_program(
+ ocl_core::compile_program(
&main_module,
Some(&[dev]),
&self.build_options,
+ &[],
+ &[],
+ None,
None,
None,
)?;
- ocl_core::build_program(
+ ocl_core::compile_program(
&ptx_impl_prog,
Some(&[dev]),
&self.build_options,
+ &[],
+ &[],
+ None,
None,
None,
)?;
@@ -137,7 +141,7 @@ impl SpirvModule {
None,
None,
None,
- )?;
+ )?
}
};
Ok(main_module)
diff --git a/zluda/src/impl/ocl_ext.rs b/zluda/src/impl/ocl_ext.rs
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/zluda/src/impl/ocl_ext.rs
diff --git a/zluda/src/impl/stream.rs b/zluda/src/impl/stream.rs
index f3910d6..0231cd8 100644
--- a/zluda/src/impl/stream.rs
+++ b/zluda/src/impl/stream.rs
@@ -34,118 +34,45 @@ impl HasLivenessCookie for StreamData {
pub struct StreamData {
pub context: *mut ContextData,
// Immediate CommandList
- pub cmd_list: l0::CommandList<'static>,
- pub busy_events: VecDeque<(l0::Event<'static>, u64)>,
- // This could be a Vec, but I'd rather reuse earliest enqueued event not the one recently enqueued
- pub free_events: VecDeque<(l0::Event<'static>, u64)>,
- pub synchronization_event: (l0::Event<'static>, u64),
+ pub cmd_list: Option<ocl_core::CommandQueue>,
}
impl StreamData {
- pub fn new_unitialized(
- ctx: &'static l0::Context,
- device: l0::Device,
- host_event: (l0::Event<'static>, u64),
- ) -> Result<Self, CUresult> {
+ pub fn new_unitialized() -> Result<Self, CUresult> {
Ok(StreamData {
context: ptr::null_mut(),
- cmd_list: l0::CommandList::new_immediate(ctx, device)?,
- busy_events: VecDeque::new(),
- free_events: VecDeque::new(),
- synchronization_event: host_event,
+ cmd_list: None,
})
}
+
pub fn new(ctx: &mut ContextData) -> Result<Self, CUresult> {
- let l0_ctx = &mut unsafe { &mut *ctx.device }.ocl_context;
- let device = unsafe { &*ctx.device }.base;
- let synchronization_event = unsafe { &mut *ctx.device }
- .host_event_pool
- .get(device, l0_ctx)?;
+ let ocl_ctx = &unsafe { &*ctx.device }.ocl_context;
+ let device = unsafe { &*ctx.device }.ocl_base;
Ok(StreamData {
context: ctx as *mut _,
- cmd_list: l0::CommandList::new_immediate(l0_ctx, device)?,
- busy_events: VecDeque::new(),
- free_events: VecDeque::new(),
- synchronization_event,
+ cmd_list: Some(ocl_core::create_command_queue::<
+ &ocl_core::Context,
+ ocl_core::DeviceId,
+ >(ocl_ctx, device, None)?),
})
}
- pub fn try_reuse_finished_events(&mut self) -> l0::Result<()> {
- loop {
- match self.busy_events.get(0) {
- None => return Ok(()),
- Some((ev, _)) => {
- if ev.is_ready()? {
- let (ev, marker) = self.busy_events.pop_front().unwrap();
- ev.host_reset()?;
- self.free_events.push_back((ev, marker));
- } else {
- return Ok(());
- }
- }
- }
- }
- }
-
- pub fn reuse_all_finished_events(&mut self) -> l0::Result<()> {
- self.free_events.reserve(self.busy_events.len());
- for (ev, marker) in self.busy_events.drain(..) {
- ev.host_reset()?;
- self.free_events.push_back((ev, marker));
- }
- Ok(())
- }
-
- pub fn get_last_event(&self) -> Option<&l0::Event<'static>> {
- self.busy_events.iter().next_back().map(|(ev, _)| ev)
- }
-
- pub fn push_event(&mut self, ev: (l0::Event<'static>, u64)) {
- self.busy_events.push_back(ev);
+ pub fn late_init(&mut self, ctx: &mut ContextData) {
+ let ocl_ctx = &unsafe { &*ctx.device }.ocl_context;
+ let device = unsafe { &*ctx.device }.ocl_base;
+ self.context = ctx as *mut _;
+ self.cmd_list = Some(
+ ocl_core::create_command_queue::<&ocl_core::Context, ocl_core::DeviceId>(
+ ocl_ctx, device, None,
+ )
+ .unwrap(),
+ );
}
- pub fn synchronize(&mut self) -> l0::Result<()> {
- let empty = [];
- let busy_event_arr = self.busy_events.back().map(|(ev, _)| [ev]);
- let wait_events = busy_event_arr.as_ref().map_or(&empty[..], |arr| &arr[..]);
- unsafe {
- self.cmd_list
- .append_barrier(Some(&self.synchronization_event.0), wait_events)?
- };
- self.synchronization_event
- .0
- .host_synchronize(u64::max_value())?;
- self.synchronization_event.0.host_reset()?;
- self.reuse_all_finished_events()?;
+ pub fn synchronize(&mut self) -> Result<(), CUresult> {
+ ocl_core::finish(self.cmd_list.as_ref().unwrap())?;
Ok(())
}
-
- pub fn get_event(
- &mut self,
- l0_dev: l0::Device,
- l0_ctx: &'static l0::Context,
- ) -> l0::Result<(l0::Event<'static>, u64)> {
- self.free_events
- .pop_front()
- .map(|x| Ok(x))
- .unwrap_or_else(|| {
- let event_pool = unsafe { &mut (*(*self.context).device).device_event_pool };
- event_pool.get(l0_dev, l0_ctx)
- })
- }
-}
-
-impl Drop for StreamData {
- fn drop(&mut self) {
- if self.context == ptr::null_mut() {
- return;
- }
- for (_, marker) in self.busy_events.iter().chain(self.free_events.iter()) {
- let device_event_pool = unsafe { &mut (*(*self.context).device).device_event_pool };
- device_event_pool.mark_as_free(*marker);
- }
- unsafe { (&mut *self.context).streams.remove(&(&mut *self as *mut _)) };
- }
}
pub(crate) fn get_ctx(hstream: *mut Stream, pctx: *mut *mut Context) -> Result<(), CUresult> {