diff options
author | Andrzej Janik <[email protected]> | 2021-09-13 23:43:50 +0000 |
---|---|---|
committer | Andrzej Janik <[email protected]> | 2021-09-13 23:43:50 +0000 |
commit | 986fa49097ef31fcd5eedcc05a624eb57d582ba4 (patch) | |
tree | 415dd34dda937d9408c0abfaf2581e3cc92c3994 /zluda | |
parent | dbb6f09ffa7e9848d4934acc48001b1777f20473 (diff) | |
download | ZLUDA-986fa49097ef31fcd5eedcc05a624eb57d582ba4.tar.gz ZLUDA-986fa49097ef31fcd5eedcc05a624eb57d582ba4.zip |
Zero out buffer on creation
Diffstat (limited to 'zluda')
-rw-r--r-- | zluda/src/cuda.rs | 18 | ||||
-rw-r--r-- | zluda/src/impl/module.rs | 1 |
2 files changed, 18 insertions, 1 deletions
diff --git a/zluda/src/cuda.rs b/zluda/src/cuda.rs index 9be70b5..e66ee75 100644 --- a/zluda/src/cuda.rs +++ b/zluda/src/cuda.rs @@ -2588,7 +2588,23 @@ pub extern "system" fn cuMemGetInfo_v2(free: *mut usize, total: *mut usize) -> C #[cfg_attr(not(test), no_mangle)] pub extern "system" fn cuMemAlloc_v2(dptr: *mut CUdeviceptr, bytesize: usize) -> CUresult { - unsafe { hipMalloc(dptr as _, bytesize).into() } + let mut dev_ptr = std::ptr::null_mut(); + let err = unsafe { hipMalloc(&mut dev_ptr, bytesize) }.into(); + if err != CUresult::CUDA_SUCCESS { + return err; + } + // HACK ALERT: GeekBench is buggy and sometimes assumes that buffers are zeroed-out on creation + let err = unsafe { hipMemsetD8(dev_ptr, 0, bytesize) }.into(); + /* + let bytesize_rounded_down = bytesize & !3usize; + let bytes = usize::min(bytesize_rounded_down, 4096); + let err = unsafe { hipMemsetD32(dev_ptr, 0, bytes / 1024).into() }; + */ + if err != CUresult::CUDA_SUCCESS { + return err; + } + unsafe { *dptr = CUdeviceptr(dev_ptr as usize) }; + CUresult::CUDA_SUCCESS } #[cfg_attr(not(test), no_mangle)] diff --git a/zluda/src/impl/module.rs b/zluda/src/impl/module.rs index 6bd9a40..6575d96 100644 --- a/zluda/src/impl/module.rs +++ b/zluda/src/impl/module.rs @@ -185,6 +185,7 @@ fn compile_amd( let mut compiler_cmd = Command::new(&clang_exe); compiler_cmd .arg(format!("-mcpu={}", device_name)) + .arg("-ffp-contract=off") .arg("-nogpulib") .arg("-mno-wavefrontsize64") .arg("-O3") |