diff options
author | Andrzej Janik <[email protected]> | 2021-06-27 21:55:08 +0200 |
---|---|---|
committer | Andrzej Janik <[email protected]> | 2021-06-27 21:55:08 +0200 |
commit | 196242b4104eae77249cb342d749f95c37d6ffec (patch) | |
tree | 0ef4f93152d6c434f47e2d03321c9240c190a7cb /zluda | |
parent | 55fbe1abb523fa04cc3017696dfdd8b459dd9edf (diff) | |
download | ZLUDA-196242b4104eae77249cb342d749f95c37d6ffec.tar.gz ZLUDA-196242b4104eae77249cb342d749f95c37d6ffec.zip |
Revert "Fix offset calculation in kernel launch"
This reverts commit d7d38256e0580c3cfb649a641d9ed62c6ff0fc20.
Diffstat (limited to 'zluda')
-rw-r--r-- | zluda/src/impl/function.rs | 18 |
1 files changed, 8 insertions, 10 deletions
diff --git a/zluda/src/impl/function.rs b/zluda/src/impl/function.rs index 638d08a..4f2006c 100644 --- a/zluda/src/impl/function.rs +++ b/zluda/src/impl/function.rs @@ -110,24 +110,23 @@ pub fn launch_kernel( match (buffer_size, buffer_ptr) { (Some(buffer_size), Some(buffer_ptr)) => { let sum_of_kernel_argument_sizes = - func.arg_size - .iter() - .fold(0, |sum_of_arg_sizes, size_of_arg| { - sum_of_arg_sizes + align_to_usize(*size_of_arg) - }); + func.arg_size.iter().fold(0, |offset, size_of_arg| { + size_of_arg + round_up_to_multiple(offset, *size_of_arg) + }); if buffer_size != sum_of_kernel_argument_sizes { return Err(CUresult::CUDA_ERROR_INVALID_VALUE); } let mut offset = 0; for (i, arg_size) in func.arg_size.iter().enumerate() { + let buffer_offset = round_up_to_multiple(offset, *arg_size); unsafe { func.base.set_arg_raw( i as u32, *arg_size, - buffer_ptr.add(offset) as *const _, + buffer_ptr.add(buffer_offset) as *const _, )? }; - offset += align_to_usize(*arg_size); + offset = buffer_offset + *arg_size; } } _ => return Err(CUresult::CUDA_ERROR_INVALID_VALUE), @@ -160,9 +159,8 @@ pub fn launch_kernel( })? } -fn align_to_usize(value: usize) -> usize { - let multiple = std::mem::size_of::<usize>(); - ((value + multiple - 1) / multiple) * multiple +fn round_up_to_multiple(x: usize, multiple: usize) -> usize { + ((x + multiple - 1) / multiple) * multiple } pub(crate) fn get_attribute( |