diff options
author | Andrzej Janik <[email protected]> | 2021-06-28 01:40:35 +0200 |
---|---|---|
committer | Andrzej Janik <[email protected]> | 2021-06-28 01:40:35 +0200 |
commit | b2a455e12e91d3b1bdc4edaa7b25a3348047be9e (patch) | |
tree | 8ed0c81404bf0cab77fc264714bc257badc6ac4d | |
parent | 196242b4104eae77249cb342d749f95c37d6ffec (diff) | |
download | ZLUDA-b2a455e12e91d3b1bdc4edaa7b25a3348047be9e.tar.gz ZLUDA-b2a455e12e91d3b1bdc4edaa7b25a3348047be9e.zip |
Bunch of tiny fixes and improvements
-rw-r--r-- | ptx/src/translate.rs | 8 | ||||
-rw-r--r-- | zluda/src/cuda.rs | 2 | ||||
-rw-r--r-- | zluda/src/impl/function.rs | 2 | ||||
-rw-r--r-- | zluda_dump/src/cuda.rs | 17 | ||||
-rw-r--r-- | zluda_dump/src/lib.rs | 2 | ||||
-rw-r--r-- | zluda_dump/src/replay.py | 10 |
6 files changed, 28 insertions, 13 deletions
diff --git a/ptx/src/translate.rs b/ptx/src/translate.rs index e0b82e8..7cefdd6 100644 --- a/ptx/src/translate.rs +++ b/ptx/src/translate.rs @@ -1020,12 +1020,8 @@ fn emit_function_header<'a>( kernel_info: &mut HashMap<String, KernelInfo>,
) -> Result<spirv::Word, TranslateError> {
if let ast::MethodName::Kernel(name) = func_decl.name {
- let input_args = if func_decl.shared_mem.is_none() {
- func_decl.input_arguments.as_slice()
- } else {
- &func_decl.input_arguments[0..func_decl.input_arguments.len() - 1]
- };
- let args_lens = input_args
+ let args_lens = func_decl
+ .input_arguments
.iter()
.map(|param| param.v_type.size_of())
.collect();
diff --git a/zluda/src/cuda.rs b/zluda/src/cuda.rs index 16193dd..9e7cbff 100644 --- a/zluda/src/cuda.rs +++ b/zluda/src/cuda.rs @@ -3449,7 +3449,7 @@ pub extern "system" fn cuStreamQuery(hStream: CUstream) -> CUresult { #[cfg_attr(not(test), no_mangle)] pub extern "system" fn cuStreamSynchronize(hStream: CUstream) -> CUresult { - r#impl::unimplemented() + CUresult::CUDA_SUCCESS } #[cfg_attr(not(test), no_mangle)] diff --git a/zluda/src/impl/function.rs b/zluda/src/impl/function.rs index 4f2006c..2aaab22 100644 --- a/zluda/src/impl/function.rs +++ b/zluda/src/impl/function.rs @@ -113,7 +113,7 @@ pub fn launch_kernel( func.arg_size.iter().fold(0, |offset, size_of_arg| { size_of_arg + round_up_to_multiple(offset, *size_of_arg) }); - if buffer_size != sum_of_kernel_argument_sizes { + if buffer_size < sum_of_kernel_argument_sizes { return Err(CUresult::CUDA_ERROR_INVALID_VALUE); } let mut offset = 0; diff --git a/zluda_dump/src/cuda.rs b/zluda_dump/src/cuda.rs index d715689..50082d1 100644 --- a/zluda_dump/src/cuda.rs +++ b/zluda_dump/src/cuda.rs @@ -4399,3 +4399,20 @@ extern_redirect_with! { extern_redirect! { pub fn cuFuncGetModule(hmod: *mut CUmodule, hfunc: CUfunction) -> CUresult; } +#[repr(transparent)] +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct CUoutput_mode_enum(pub ::std::os::raw::c_uint); +pub use self::CUoutput_mode_enum as CUoutput_mode; +extern_redirect! { + pub fn cuProfilerInitialize( + configFile: *const ::std::os::raw::c_char, + outputFile: *const ::std::os::raw::c_char, + outputMode: CUoutput_mode, + ) -> CUresult; +} +extern_redirect! { + pub fn cuProfilerStart() -> CUresult; +} +extern_redirect! { + pub fn cuProfilerStop() -> CUresult; +} diff --git a/zluda_dump/src/lib.rs b/zluda_dump/src/lib.rs index cff6733..92238c6 100644 --- a/zluda_dump/src/lib.rs +++ b/zluda_dump/src/lib.rs @@ -576,7 +576,7 @@ fn dump_arguments( let sum_of_kernel_argument_sizes = args.iter().fold(0, |offset, size_of_arg| { size_of_arg + round_up_to_multiple(offset, *size_of_arg) }); - if buffer_size != sum_of_kernel_argument_sizes { + if buffer_size < sum_of_kernel_argument_sizes { return Err("Malformed `extra` parameter to kernel launch")?; } let mut offset = 0; diff --git a/zluda_dump/src/replay.py b/zluda_dump/src/replay.py index 52f250c..723d954 100644 --- a/zluda_dump/src/replay.py +++ b/zluda_dump/src/replay.py @@ -51,9 +51,10 @@ def parse_arguments(dump_path, prefix): return [load_arguments(path.join(dir, f)) for f in sorted(arg_files)]
-def append_debug_buffer(args):
+def append_debug_buffer(args, grid, block):
args = list(args)
- debug_buff = np.zeros(1024 * 1024, np.single)
+ items = block[0] * block[1] * block[2] * block[0] * block[1] * block[2]
+ debug_buff = np.zeros(items, dtype=np.uint32)
args.append((drv.InOut(debug_buff), debug_buff))
return args
@@ -71,7 +72,7 @@ def verify_single_dump(input_path, max_block_threads): return
module = drv.module_from_file(path.join(input_path, "module.ptx"))
kernel = module.get_function(kernel_name)
- pre_args = append_debug_buffer(parse_arguments(input_path, "pre"))
+ pre_args = append_debug_buffer(parse_arguments(input_path, "pre"), tuple(launch_lines[:3]), block)
kernel_pre_args, host_pre_args = zip(*pre_args)
kernel(*list(kernel_pre_args), grid=tuple(launch_lines[:3]), block=block, shared=launch_lines[6])
post_args = parse_arguments(input_path, "post")
@@ -94,7 +95,8 @@ def main(argv): verify_single_dump(input_path, max_threads)
else:
for input_subdir in sorted([path.join(input_path, dir_name) for dir_name in os.listdir(input_path)]):
- verify_single_dump(input_subdir, max_threads)
+ if os.path.isdir(input_subdir):
+ verify_single_dump(input_subdir, max_threads)
if __name__ == "__main__":
|