aboutsummaryrefslogtreecommitdiffhomepage
path: root/zluda
diff options
context:
space:
mode:
authorAndrzej Janik <[email protected]>2021-06-27 21:55:08 +0200
committerAndrzej Janik <[email protected]>2021-06-27 21:55:08 +0200
commit196242b4104eae77249cb342d749f95c37d6ffec (patch)
tree0ef4f93152d6c434f47e2d03321c9240c190a7cb /zluda
parent55fbe1abb523fa04cc3017696dfdd8b459dd9edf (diff)
downloadZLUDA-196242b4104eae77249cb342d749f95c37d6ffec.tar.gz
ZLUDA-196242b4104eae77249cb342d749f95c37d6ffec.zip
Revert "Fix offset calculation in kernel launch"
This reverts commit d7d38256e0580c3cfb649a641d9ed62c6ff0fc20.
Diffstat (limited to 'zluda')
-rw-r--r--zluda/src/impl/function.rs18
1 files changed, 8 insertions, 10 deletions
diff --git a/zluda/src/impl/function.rs b/zluda/src/impl/function.rs
index 638d08a..4f2006c 100644
--- a/zluda/src/impl/function.rs
+++ b/zluda/src/impl/function.rs
@@ -110,24 +110,23 @@ pub fn launch_kernel(
match (buffer_size, buffer_ptr) {
(Some(buffer_size), Some(buffer_ptr)) => {
let sum_of_kernel_argument_sizes =
- func.arg_size
- .iter()
- .fold(0, |sum_of_arg_sizes, size_of_arg| {
- sum_of_arg_sizes + align_to_usize(*size_of_arg)
- });
+ func.arg_size.iter().fold(0, |offset, size_of_arg| {
+ size_of_arg + round_up_to_multiple(offset, *size_of_arg)
+ });
if buffer_size != sum_of_kernel_argument_sizes {
return Err(CUresult::CUDA_ERROR_INVALID_VALUE);
}
let mut offset = 0;
for (i, arg_size) in func.arg_size.iter().enumerate() {
+ let buffer_offset = round_up_to_multiple(offset, *arg_size);
unsafe {
func.base.set_arg_raw(
i as u32,
*arg_size,
- buffer_ptr.add(offset) as *const _,
+ buffer_ptr.add(buffer_offset) as *const _,
)?
};
- offset += align_to_usize(*arg_size);
+ offset = buffer_offset + *arg_size;
}
}
_ => return Err(CUresult::CUDA_ERROR_INVALID_VALUE),
@@ -160,9 +159,8 @@ pub fn launch_kernel(
})?
}
-fn align_to_usize(value: usize) -> usize {
- let multiple = std::mem::size_of::<usize>();
- ((value + multiple - 1) / multiple) * multiple
+fn round_up_to_multiple(x: usize, multiple: usize) -> usize {
+ ((x + multiple - 1) / multiple) * multiple
}
pub(crate) fn get_attribute(