aboutsummaryrefslogtreecommitdiffhomepage
path: root/zluda
diff options
context:
space:
mode:
authorAndrzej Janik <[email protected]>2021-09-13 23:43:50 +0000
committerAndrzej Janik <[email protected]>2021-09-13 23:43:50 +0000
commit986fa49097ef31fcd5eedcc05a624eb57d582ba4 (patch)
tree415dd34dda937d9408c0abfaf2581e3cc92c3994 /zluda
parentdbb6f09ffa7e9848d4934acc48001b1777f20473 (diff)
downloadZLUDA-986fa49097ef31fcd5eedcc05a624eb57d582ba4.tar.gz
ZLUDA-986fa49097ef31fcd5eedcc05a624eb57d582ba4.zip
Zero out buffer on creation
Diffstat (limited to 'zluda')
-rw-r--r--zluda/src/cuda.rs18
-rw-r--r--zluda/src/impl/module.rs1
2 files changed, 18 insertions, 1 deletions
diff --git a/zluda/src/cuda.rs b/zluda/src/cuda.rs
index 9be70b5..e66ee75 100644
--- a/zluda/src/cuda.rs
+++ b/zluda/src/cuda.rs
@@ -2588,7 +2588,23 @@ pub extern "system" fn cuMemGetInfo_v2(free: *mut usize, total: *mut usize) -> C
#[cfg_attr(not(test), no_mangle)]
pub extern "system" fn cuMemAlloc_v2(dptr: *mut CUdeviceptr, bytesize: usize) -> CUresult {
- unsafe { hipMalloc(dptr as _, bytesize).into() }
+ let mut dev_ptr = std::ptr::null_mut();
+ let err = unsafe { hipMalloc(&mut dev_ptr, bytesize) }.into();
+ if err != CUresult::CUDA_SUCCESS {
+ return err;
+ }
+ // HACK ALERT: GeekBench is buggy and sometimes assumes that buffers are zeroed-out on creation
+ let err = unsafe { hipMemsetD8(dev_ptr, 0, bytesize) }.into();
+ /*
+ let bytesize_rounded_down = bytesize & !3usize;
+ let bytes = usize::min(bytesize_rounded_down, 4096);
+ let err = unsafe { hipMemsetD32(dev_ptr, 0, bytes / 1024).into() };
+ */
+ if err != CUresult::CUDA_SUCCESS {
+ return err;
+ }
+ unsafe { *dptr = CUdeviceptr(dev_ptr as usize) };
+ CUresult::CUDA_SUCCESS
}
#[cfg_attr(not(test), no_mangle)]
diff --git a/zluda/src/impl/module.rs b/zluda/src/impl/module.rs
index 6bd9a40..6575d96 100644
--- a/zluda/src/impl/module.rs
+++ b/zluda/src/impl/module.rs
@@ -185,6 +185,7 @@ fn compile_amd(
let mut compiler_cmd = Command::new(&clang_exe);
compiler_cmd
.arg(format!("-mcpu={}", device_name))
+ .arg("-ffp-contract=off")
.arg("-nogpulib")
.arg("-mno-wavefrontsize64")
.arg("-O3")