aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorAndrzej Janik <[email protected]>2021-08-04 19:34:56 +0200
committerAndrzej Janik <[email protected]>2021-08-04 19:34:56 +0200
commitbecda3152408de759f81fe82f629e4c53c1f551d (patch)
treea82abee2723de031b58969b182e3883e1ab5ece4
parent638786b0ec179fcd7dde985cba5c8257a07a9c19 (diff)
downloadZLUDA-becda3152408de759f81fe82f629e4c53c1f551d.tar.gz
ZLUDA-becda3152408de759f81fe82f629e4c53c1f551d.zip
Convert OpenCL host code to SVM
-rw-r--r--zluda/src/impl/device.rs167
-rw-r--r--zluda/src/impl/function.rs18
-rw-r--r--zluda/src/impl/memory.rs81
3 files changed, 77 insertions, 189 deletions
diff --git a/zluda/src/impl/device.rs b/zluda/src/impl/device.rs
index e886eb9..16ff41b 100644
--- a/zluda/src/impl/device.rs
+++ b/zluda/src/impl/device.rs
@@ -4,6 +4,7 @@ use cuda::{CUdevice_attribute, CUuuid_st};
use ocl_core::{ClDeviceIdPtr, ContextProperties, DeviceType};
use std::{
cmp,
+ collections::HashSet,
ffi::c_void,
mem,
os::raw::{c_char, c_int, c_uint},
@@ -24,175 +25,14 @@ pub struct Device {
pub ocl_base: ocl_core::DeviceId,
pub default_queue: ocl_core::CommandQueue,
pub ocl_context: ocl_core::Context,
- pub(crate) ocl_ext: OpenCLExtensions,
pub primary_context: context::Context,
+ pub allocations: HashSet<*mut c_void>,
properties: Option<Box<l0::sys::ze_device_properties_t>>,
image_properties: Option<Box<l0::sys::ze_device_image_properties_t>>,
memory_properties: Option<Vec<l0::sys::ze_device_memory_properties_t>>,
compute_properties: Option<Box<l0::sys::ze_device_compute_properties_t>>,
}
-type cl_mem_properties_intel = ocl_core::ffi::cl_bitfield;
-
-pub(crate) struct OpenCLExtensions {
- pub clDeviceMemAllocINTEL: unsafe extern "system" fn(
- ocl_core::ffi::cl_context,
- ocl_core::ffi::cl_device_id,
- *const cl_mem_properties_intel,
- usize,
- ocl_core::ffi::cl_uint,
- *mut ocl_core::ffi::cl_int,
- ) -> *mut c_void,
- pub clEnqueueMemcpyINTEL: unsafe extern "system" fn(
- ocl_core::ffi::cl_command_queue,
- ocl_core::ffi::cl_bool,
- *mut c_void,
- *const c_void,
- usize,
- ocl_core::ffi::cl_uint,
- *const ocl_core::ffi::cl_event,
- *mut ocl_core::ffi::cl_event,
- ) -> ocl_core::ffi::cl_int,
- pub clMemBlockingFreeINTEL:
- unsafe extern "system" fn(ocl_core::ffi::cl_context, *mut c_void) -> ocl_core::ffi::cl_int,
- pub clEnqueueMemFillINTEL: unsafe extern "system" fn(
- ocl_core::ffi::cl_command_queue,
- *mut c_void,
- *const c_void,
- usize,
- usize,
- ocl_core::ffi::cl_uint,
- *const ocl_core::ffi::cl_event,
- *mut ocl_core::ffi::cl_event,
- ) -> ocl_core::ffi::cl_int,
-}
-
-impl OpenCLExtensions {
- fn new(plat: &ocl_core::PlatformId) -> Result<Self, CUresult> {
- let clDeviceMemAllocINTEL = unsafe {
- ocl_core::get_extension_function_address_for_platform(
- plat,
- "clDeviceMemAllocINTEL",
- None,
- )?
- };
- let clEnqueueMemcpyINTEL = unsafe {
- ocl_core::get_extension_function_address_for_platform(
- plat,
- "clEnqueueMemcpyINTEL",
- None,
- )?
- };
- let clMemBlockingFreeINTEL = unsafe {
- ocl_core::get_extension_function_address_for_platform(
- plat,
- "clMemBlockingFreeINTEL",
- None,
- )?
- };
- let clEnqueueMemFillINTEL = unsafe {
- ocl_core::get_extension_function_address_for_platform(
- plat,
- "clEnqueueMemFillINTEL",
- None,
- )?
- };
- Ok(Self {
- clDeviceMemAllocINTEL: unsafe { mem::transmute(clDeviceMemAllocINTEL) },
- clEnqueueMemcpyINTEL: unsafe { mem::transmute(clEnqueueMemcpyINTEL) },
- clMemBlockingFreeINTEL: unsafe { mem::transmute(clMemBlockingFreeINTEL) },
- clEnqueueMemFillINTEL: unsafe { mem::transmute(clEnqueueMemFillINTEL) },
- })
- }
-
- pub unsafe fn device_mem_alloc(
- &self,
- ctx: &ocl_core::Context,
- device: &ocl_core::DeviceId,
- size: usize,
- alignment: ocl_core::ffi::cl_uint,
- ) -> Result<*mut c_void, CUresult> {
- let mut error = 0;
- let result = (self.clDeviceMemAllocINTEL)(
- ctx.as_ptr(),
- device.as_ptr(),
- ptr::null(),
- size,
- alignment,
- &mut error,
- );
- if error == 0 {
- Ok(result)
- } else {
- Err(CUresult::CUDA_ERROR_UNKNOWN)
- }
- }
-
- pub unsafe fn enqueue_memcpy(
- &self,
- queue: &ocl_core::CommandQueue,
- blocking: bool,
- dst: *mut c_void,
- src: *const c_void,
- size: usize,
- ) -> Result<(), CUresult> {
- let error = (self.clEnqueueMemcpyINTEL)(
- queue.as_ptr(),
- if blocking { 1 } else { 0 },
- dst,
- src,
- size,
- 0,
- ptr::null(),
- ptr::null_mut(),
- );
- if error == 0 {
- Ok(())
- } else {
- Err(CUresult::CUDA_ERROR_UNKNOWN)
- }
- }
-
- pub unsafe fn mem_blocking_free(
- &self,
- ctx: &ocl_core::Context,
- mem_ptr: *mut c_void,
- ) -> Result<(), CUresult> {
- let error = (self.clMemBlockingFreeINTEL)(ctx.as_ptr(), mem_ptr);
- if error == 0 {
- Ok(())
- } else {
- Err(CUresult::CUDA_ERROR_UNKNOWN)
- }
- }
-
- pub unsafe fn enqueue_memfill(
- &self,
- queue: &ocl_core::CommandQueue,
- dst: *mut c_void,
- pattern: *const c_void,
- patternSize: usize,
- size: usize,
- ) -> Result<ocl_core::Event, CUresult> {
- let mut signal: ocl_core::ffi::cl_event = ptr::null_mut();
- let error = (self.clEnqueueMemFillINTEL)(
- queue.as_ptr(),
- dst,
- pattern,
- patternSize,
- size,
- 0,
- ptr::null(),
- &mut signal,
- );
- if error == 0 {
- Ok(ocl_core::Event::from_raw(signal))
- } else {
- Err(CUresult::CUDA_ERROR_UNKNOWN)
- }
- }
-}
-
unsafe impl Send for Device {}
impl Device {
@@ -202,7 +42,6 @@ impl Device {
ocl_dev: ocl_core::DeviceId,
idx: usize,
) -> Result<Self, CUresult> {
- let ocl_ext = OpenCLExtensions::new(&platform)?;
let mut props = ocl_core::ContextProperties::new();
props.set_platform(platform);
let ctx = ocl_core::create_context(Some(&props), &[ocl_dev], None, None)?;
@@ -210,13 +49,13 @@ impl Device {
let primary_context =
context::Context::new(context::ContextData::new(0, true, ptr::null_mut())?);
Ok(Self {
- ocl_ext,
index: Index(idx as c_int),
base: l0_dev,
ocl_base: ocl_dev,
default_queue: queue,
ocl_context: ctx,
primary_context,
+ allocations: HashSet::new(),
properties: None,
image_properties: None,
memory_properties: None,
diff --git a/zluda/src/impl/function.rs b/zluda/src/impl/function.rs
index 548936f..a438699 100644
--- a/zluda/src/impl/function.rs
+++ b/zluda/src/impl/function.rs
@@ -3,7 +3,7 @@ use ocl_core::DeviceId;
use super::{stream::Stream, CUresult, GlobalState, HasLivenessCookie, LiveCheck};
use crate::cuda::CUfunction_attribute;
use ::std::os::raw::{c_uint, c_void};
-use std::{hint, ptr};
+use std::{hint, mem, ptr};
const CU_LAUNCH_PARAM_END: *mut c_void = 0 as *mut _;
const CU_LAUNCH_PARAM_BUFFER_POINTER: *mut c_void = 1 as *mut _;
@@ -101,7 +101,9 @@ pub fn launch_kernel(
{
return Err(CUresult::CUDA_ERROR_INVALID_VALUE);
}
- GlobalState::lock_enqueue(hstream, |queue| {
+ GlobalState::lock_stream(hstream, |stream_data| {
+ let dev = unsafe { &mut *(*stream_data.context).device };
+ let queue = stream_data.cmd_list.as_ref().unwrap();
let func: &mut FunctionData = unsafe { &mut *f }.as_result_mut()?;
if kernel_params != ptr::null_mut() {
for (i, &(arg_size, is_mem)) in func.arg_size.iter().enumerate() {
@@ -162,6 +164,16 @@ pub fn launch_kernel(
)?
};
}
+ let buffers = dev.allocations.iter().copied().collect::<Vec<_>>();
+ let err = unsafe {
+ ocl_core::ffi::clSetKernelExecInfo(
+ func.base.as_ptr(),
+ ocl_core::ffi::CL_KERNEL_EXEC_INFO_SVM_PTRS,
+ buffers.len() * mem::size_of::<*mut c_void>(),
+ buffers.as_ptr() as *const _,
+ )
+ };
+ assert_eq!(err, 0);
let global_dims = [
(block_dim_x * grid_dim_x) as usize,
(block_dim_y * grid_dim_y) as usize,
@@ -184,7 +196,7 @@ pub fn launch_kernel(
)?
};
Ok::<_, CUresult>(())
- })
+ })?
}
fn round_up_to_multiple(x: usize, multiple: usize) -> usize {
diff --git a/zluda/src/impl/memory.rs b/zluda/src/impl/memory.rs
index 7293ca6..e4abeda 100644
--- a/zluda/src/impl/memory.rs
+++ b/zluda/src/impl/memory.rs
@@ -5,27 +5,39 @@ use super::{
use std::{
ffi::c_void,
mem::{self, size_of},
+ ptr,
};
pub fn alloc_v2(dptr: *mut *mut c_void, bytesize: usize) -> Result<(), CUresult> {
let ptr = GlobalState::lock_stream(CU_STREAM_LEGACY, |stream_data| {
- let dev = unsafe { &*(*stream_data.context).device };
+ let dev = unsafe { &mut *(*stream_data.context).device };
let queue = stream_data.cmd_list.as_ref().unwrap();
let ptr = unsafe {
- dev.ocl_ext
- .device_mem_alloc(&dev.ocl_context, &dev.ocl_base, bytesize, 0)?
+ ocl_core::ffi::clSVMAlloc(
+ dev.ocl_context.as_ptr(),
+ ocl_core::ffi::CL_MEM_READ_WRITE,
+ bytesize,
+ 0,
+ )
};
// CUDA does the same thing and e.g. GeekBench relies on this behavior
- let event = unsafe {
- dev.ocl_ext.enqueue_memfill(
- queue,
+ let mut event = ptr::null_mut();
+ let err = unsafe {
+ ocl_core::ffi::clEnqueueSVMMemFill(
+ queue.as_ptr(),
ptr,
&0u8 as *const u8 as *const c_void,
1,
bytesize,
- )?
+ 0,
+ ptr::null(),
+ &mut event,
+ )
};
- ocl_core::wait_for_event(&event)?;
+ assert_eq!(err, 0);
+ let err = unsafe { ocl_core::ffi::clWaitForEvents(1, &mut event) };
+ assert_eq!(err, 0);
+ dev.allocations.insert(ptr);
Ok::<_, CUresult>(ptr)
})??;
unsafe { *dptr = ptr };
@@ -36,10 +48,22 @@ pub fn copy_v2(dst: *mut c_void, src: *const c_void, bytesize: usize) -> Result<
GlobalState::lock_stream(stream::CU_STREAM_LEGACY, |stream_data| {
let dev = unsafe { &*(*stream_data.context).device };
let queue = stream_data.cmd_list.as_ref().unwrap();
- unsafe {
- dev.ocl_ext
- .enqueue_memcpy(queue, true, dst, src, bytesize)?
+ let mut event = ptr::null_mut();
+ let err = unsafe {
+ ocl_core::ffi::clEnqueueSVMMemcpy(
+ queue.as_ptr(),
+ 1,
+ dst,
+ src,
+ bytesize,
+ 0,
+ ptr::null(),
+ &mut event,
+ )
};
+ assert_eq!(err, 0);
+ let err = unsafe { ocl_core::ffi::clWaitForEvents(1, &mut event) };
+ assert_eq!(err, 0);
Ok(())
})?
}
@@ -47,7 +71,8 @@ pub fn copy_v2(dst: *mut c_void, src: *const c_void, bytesize: usize) -> Result<
pub fn free_v2(ptr: *mut c_void) -> Result<(), CUresult> {
GlobalState::lock_current_context(|ctx| {
let dev = unsafe { &mut *ctx.device };
- unsafe { dev.ocl_ext.mem_blocking_free(&dev.ocl_context, ptr)? };
+ unsafe { ocl_core::ffi::clSVMFree(dev.ocl_context.as_ptr(), ptr) };
+ dev.allocations.remove(&ptr);
Ok(())
})?
}
@@ -57,16 +82,22 @@ pub(crate) fn set_d32_v2(dst: *mut c_void, mut ui: u32, n: usize) -> Result<(),
let dev = unsafe { &*(*stream_data.context).device };
let queue = stream_data.cmd_list.as_ref().unwrap();
let pattern_size = mem::size_of_val(&ui);
- let event = unsafe {
- dev.ocl_ext.enqueue_memfill(
- queue,
+ let mut event = ptr::null_mut();
+ let err = unsafe {
+ ocl_core::ffi::clEnqueueSVMMemFill(
+ queue.as_ptr(),
dst,
&ui as *const _ as *const _,
pattern_size,
pattern_size * n,
- )?
+ 0,
+ ptr::null(),
+ &mut event,
+ )
};
- ocl_core::wait_for_event(&event)?;
+ assert_eq!(err, 0);
+ let err = unsafe { ocl_core::ffi::clWaitForEvents(1, &mut event) };
+ assert_eq!(err, 0);
Ok(())
})?
}
@@ -76,16 +107,22 @@ pub(crate) fn set_d8_v2(dst: *mut c_void, mut uc: u8, n: usize) -> Result<(), CU
let dev = unsafe { &*(*stream_data.context).device };
let queue = stream_data.cmd_list.as_ref().unwrap();
let pattern_size = mem::size_of_val(&uc);
- let event = unsafe {
- dev.ocl_ext.enqueue_memfill(
- queue,
+ let mut event = ptr::null_mut();
+ let err = unsafe {
+ ocl_core::ffi::clEnqueueSVMMemFill(
+ queue.as_ptr(),
dst,
&uc as *const _ as *const _,
pattern_size,
pattern_size * n,
- )?
+ 0,
+ ptr::null(),
+ &mut event,
+ )
};
- ocl_core::wait_for_event(&event)?;
+ assert_eq!(err, 0);
+ let err = unsafe { ocl_core::ffi::clWaitForEvents(1, &mut event) };
+ assert_eq!(err, 0);
Ok(())
})?
}