aboutsummaryrefslogtreecommitdiffhomepage
path: root/zluda_dump
diff options
context:
space:
mode:
authorAndrzej Janik <[email protected]>2021-06-28 01:40:35 +0200
committerAndrzej Janik <[email protected]>2021-06-28 01:40:35 +0200
commitb2a455e12e91d3b1bdc4edaa7b25a3348047be9e (patch)
tree8ed0c81404bf0cab77fc264714bc257badc6ac4d /zluda_dump
parent196242b4104eae77249cb342d749f95c37d6ffec (diff)
downloadZLUDA-b2a455e12e91d3b1bdc4edaa7b25a3348047be9e.tar.gz
ZLUDA-b2a455e12e91d3b1bdc4edaa7b25a3348047be9e.zip
Bunch of tiny fixes and improvements
Diffstat (limited to 'zluda_dump')
-rw-r--r--zluda_dump/src/cuda.rs17
-rw-r--r--zluda_dump/src/lib.rs2
-rw-r--r--zluda_dump/src/replay.py10
3 files changed, 24 insertions, 5 deletions
diff --git a/zluda_dump/src/cuda.rs b/zluda_dump/src/cuda.rs
index d715689..50082d1 100644
--- a/zluda_dump/src/cuda.rs
+++ b/zluda_dump/src/cuda.rs
@@ -4399,3 +4399,20 @@ extern_redirect_with! {
extern_redirect! {
pub fn cuFuncGetModule(hmod: *mut CUmodule, hfunc: CUfunction) -> CUresult;
}
+#[repr(transparent)]
+#[derive(Copy, Clone, Hash, PartialEq, Eq)]
+pub struct CUoutput_mode_enum(pub ::std::os::raw::c_uint);
+pub use self::CUoutput_mode_enum as CUoutput_mode;
+extern_redirect! {
+ pub fn cuProfilerInitialize(
+ configFile: *const ::std::os::raw::c_char,
+ outputFile: *const ::std::os::raw::c_char,
+ outputMode: CUoutput_mode,
+ ) -> CUresult;
+}
+extern_redirect! {
+ pub fn cuProfilerStart() -> CUresult;
+}
+extern_redirect! {
+ pub fn cuProfilerStop() -> CUresult;
+}
diff --git a/zluda_dump/src/lib.rs b/zluda_dump/src/lib.rs
index cff6733..92238c6 100644
--- a/zluda_dump/src/lib.rs
+++ b/zluda_dump/src/lib.rs
@@ -576,7 +576,7 @@ fn dump_arguments(
let sum_of_kernel_argument_sizes = args.iter().fold(0, |offset, size_of_arg| {
size_of_arg + round_up_to_multiple(offset, *size_of_arg)
});
- if buffer_size != sum_of_kernel_argument_sizes {
+ if buffer_size < sum_of_kernel_argument_sizes {
return Err("Malformed `extra` parameter to kernel launch")?;
}
let mut offset = 0;
diff --git a/zluda_dump/src/replay.py b/zluda_dump/src/replay.py
index 52f250c..723d954 100644
--- a/zluda_dump/src/replay.py
+++ b/zluda_dump/src/replay.py
@@ -51,9 +51,10 @@ def parse_arguments(dump_path, prefix):
return [load_arguments(path.join(dir, f)) for f in sorted(arg_files)]
-def append_debug_buffer(args):
+def append_debug_buffer(args, grid, block):
args = list(args)
- debug_buff = np.zeros(1024 * 1024, np.single)
+ items = block[0] * block[1] * block[2] * block[0] * block[1] * block[2]
+ debug_buff = np.zeros(items, dtype=np.uint32)
args.append((drv.InOut(debug_buff), debug_buff))
return args
@@ -71,7 +72,7 @@ def verify_single_dump(input_path, max_block_threads):
return
module = drv.module_from_file(path.join(input_path, "module.ptx"))
kernel = module.get_function(kernel_name)
- pre_args = append_debug_buffer(parse_arguments(input_path, "pre"))
+ pre_args = append_debug_buffer(parse_arguments(input_path, "pre"), tuple(launch_lines[:3]), block)
kernel_pre_args, host_pre_args = zip(*pre_args)
kernel(*list(kernel_pre_args), grid=tuple(launch_lines[:3]), block=block, shared=launch_lines[6])
post_args = parse_arguments(input_path, "post")
@@ -94,7 +95,8 @@ def main(argv):
verify_single_dump(input_path, max_threads)
else:
for input_subdir in sorted([path.join(input_path, dir_name) for dir_name in os.listdir(input_path)]):
- verify_single_dump(input_subdir, max_threads)
+ if os.path.isdir(input_subdir):
+ verify_single_dump(input_subdir, max_threads)
if __name__ == "__main__":