diff options
-rw-r--r-- | zluda/src/impl/export_table.rs | 36 | ||||
-rw-r--r-- | zluda_dump/src/lib.rs | 157 | ||||
-rw-r--r-- | zluda_dump/src/os_unix.rs | 16 | ||||
-rw-r--r-- | zluda_dump/src/os_win.rs | 31 |
4 files changed, 171 insertions, 69 deletions
diff --git a/zluda/src/impl/export_table.rs b/zluda/src/impl/export_table.rs index ddee92d..e5b17ca 100644 --- a/zluda/src/impl/export_table.rs +++ b/zluda/src/impl/export_table.rs @@ -75,7 +75,10 @@ unsafe extern "system" fn runtime_callback_hooks_fn1(ptr: *mut *mut usize, size: static mut TOOLS_RUNTIME_CALLBACK_HOOKS_FN5_SPACE: [u8; 2] = [0; 2];
-unsafe extern "system" fn runtime_callback_hooks_fn5(ptr: *mut *mut u8, size: *mut usize) -> *mut u8 {
+unsafe extern "system" fn runtime_callback_hooks_fn5(
+ ptr: *mut *mut u8,
+ size: *mut usize,
+) -> *mut u8 {
*ptr = TOOLS_RUNTIME_CALLBACK_HOOKS_FN5_SPACE.as_mut_ptr();
*size = TOOLS_RUNTIME_CALLBACK_HOOKS_FN5_SPACE.len();
return TOOLS_RUNTIME_CALLBACK_HOOKS_FN5_SPACE.as_mut_ptr();
@@ -93,7 +96,9 @@ static CUDART_INTERFACE_VTABLE: [VTableEntry; CUDART_INTERFACE_LENGTH] = [ VTableEntry {
length: mem::size_of::<[VTableEntry; CUDART_INTERFACE_LENGTH]>(),
},
- VTableEntry { ptr: ptr::null() },
+ VTableEntry {
+ ptr: get_module_from_cubin as *const (),
+ },
VTableEntry {
ptr: cudart_interface_fn1 as *const (),
},
@@ -101,7 +106,7 @@ static CUDART_INTERFACE_VTABLE: [VTableEntry; CUDART_INTERFACE_LENGTH] = [ VTableEntry { ptr: ptr::null() },
VTableEntry { ptr: ptr::null() },
VTableEntry {
- ptr: get_module_from_cubin as *const (),
+ ptr: get_module_from_cubin_ext as *const (),
},
VTableEntry {
ptr: cudart_interface_fn6 as *const (),
@@ -198,14 +203,7 @@ struct FatbinFileHeader { unsafe extern "system" fn get_module_from_cubin(
result: *mut CUmodule,
fatbinc_wrapper: *const FatbincWrapper,
- ptr1: *mut c_void,
- ptr2: *mut c_void,
) -> CUresult {
- // Not sure what those two parameters are actually used for,
- // they are somehow involved in __cudaRegisterHostVar
- if ptr1 != ptr::null_mut() || ptr2 != ptr::null_mut() {
- return CUresult::CUDA_ERROR_NOT_SUPPORTED;
- }
if result == ptr::null_mut()
|| (*fatbinc_wrapper).magic != FATBINC_MAGIC
|| (*fatbinc_wrapper).version != FATBINC_VERSION
@@ -248,6 +246,21 @@ unsafe extern "system" fn get_module_from_cubin( CUresult::CUDA_ERROR_COMPAT_NOT_SUPPORTED_ON_DEVICE
}
+unsafe extern "system" fn get_module_from_cubin_ext(
+ result: *mut CUmodule,
+ fatbinc_wrapper: *const FatbincWrapper,
+ ptr1: *mut c_void,
+ ptr2: *mut c_void,
+) -> CUresult {
+ // Not sure what those two parameters are actually used for,
+ // they are somehow involved in __cudaRegisterHostVar
+ if ptr1 != ptr::null_mut() || ptr2 != ptr::null_mut() {
+ CUresult::CUDA_ERROR_NOT_SUPPORTED
+ } else {
+ get_module_from_cubin(result, fatbinc_wrapper)
+ }
+}
+
unsafe fn get_ptx_files(file: *const u8, end: *const u8) -> Vec<*const FatbinFileHeader> {
let mut index = file;
let mut result = Vec::new();
@@ -284,6 +297,9 @@ unsafe fn decompress_kernel_module(file: *const FatbinFileHeader) -> Option<Vec< }
real_decompressed_size => {
decompressed_vec.truncate(real_decompressed_size as usize);
+ if decompressed_vec.last().copied().unwrap_or(1) != 0 {
+ decompressed_vec.push(0);
+ }
return Some(decompressed_vec);
}
}
diff --git a/zluda_dump/src/lib.rs b/zluda_dump/src/lib.rs index 9b9146d..c10057d 100644 --- a/zluda_dump/src/lib.rs +++ b/zluda_dump/src/lib.rs @@ -75,13 +75,13 @@ pub static mut OVERRIDE_COMPUTE_CAPABILITY_MAJOR: Option<i32> = None; pub struct ModuleDump { content: Rc<String>, - kernels_args: HashMap<String, Vec<usize>>, + kernels_args: Option<HashMap<String, Vec<usize>>>, } pub struct KernelDump { module_content: Rc<String>, name: String, - arguments: Vec<usize>, + arguments: Option<Vec<usize>>, } // We are doing dlopen here instead of just using LD_PRELOAD, @@ -95,7 +95,7 @@ pub unsafe fn init_libcuda_handle() { Ok(kernel_filter) => match Regex::new(&kernel_filter) { Ok(r) => KERNEL_PATTERN = Some(r), Err(err) => { - eprintln!("[ZLUDA_DUMP] Error parsing ZLUDA_DUMP_KERNEL: {}", err); + os_log!("Error parsing ZLUDA_DUMP_KERNEL: {}", err); } }, Err(_) => (), @@ -104,15 +104,15 @@ pub unsafe fn init_libcuda_handle() { Ok(cc_override) => match str::parse::<i32>(&cc_override) { Ok(ver) => OVERRIDE_COMPUTE_CAPABILITY_MAJOR = Some(ver), Err(err) => { - eprintln!( - "[ZLUDA_DUMP] Error parsing ZLUDA_OVERRIDE_COMPUTE_CAPABILITY_MAJOR: {}", + os_log!( + "Error parsing ZLUDA_OVERRIDE_COMPUTE_CAPABILITY_MAJOR: {}", err ); } }, Err(_) => (), } - eprintln!("[ZLUDA_DUMP] Initialized"); + os_log!("Initialized"); } } @@ -131,51 +131,45 @@ pub unsafe fn cuModuleLoadData( unsafe fn record_module_image_raw(module: CUmodule, raw_image: *const ::std::os::raw::c_void) { if *(raw_image as *const u32) == 0x464c457f { - eprintln!("[ZLUDA_DUMP] Unsupported ELF module: {:?}", raw_image); + os_log!("Unsupported ELF module: {:?}", raw_image); return; } let image = to_str(raw_image); match image { - None => eprintln!("[ZLUDA_DUMP] Malformed module image: {:?}", raw_image), + None => os_log!("Malformed module image: {:?}", raw_image), Some(image) => record_module_image(module, image), }; } unsafe fn record_module_image(module: CUmodule, image: &str) { if !image.contains(&".address_size") { - eprintln!("[ZLUDA_DUMP] Malformed module image: {:?}", module) + os_log!("Malformed module image: {:?}", module) } else { let mut errors = Vec::new(); let ast = ptx::ModuleParser::new().parse(&mut errors, image); - match (&*errors, ast) { + let kernels_args = match (&*errors, ast) { (&[], Ok(ast)) => { let kernels_args = ast .directives .iter() .filter_map(directive_to_kernel) .collect::<HashMap<_, _>>(); - let modules = MODULES.get_or_insert_with(|| HashMap::new()); - modules.insert( - module, - ModuleDump { - content: Rc::new(image.to_string()), - kernels_args, - }, - ); + Some(kernels_args) } - (errs, ast) => { - let err_string = errs - .iter() - .map(|e| format!("{:?}", e)) - .chain(ast.err().iter().map(|e| format!("{:?}", e))) - .collect::<Vec<_>>() - .join("\n"); - eprintln!( - "[ZLUDA_DUMP] Errors when parsing module:\n---ERRORS---\n{}\n---MODULE---\n{}", - err_string, image - ); + (_, _) => { + // Don't print errors - it's usually too verbose to be useful + os_log!("Errors when parsing module: {:?}", module); + None } - } + }; + let modules = MODULES.get_or_insert_with(|| HashMap::new()); + modules.insert( + module, + ModuleDump { + content: Rc::new(image.to_string()), + kernels_args, + }, + ); } } @@ -248,27 +242,32 @@ unsafe fn cuModuleGetFunction( if let Some(modules) = &MODULES { if let Some(module_dump) = modules.get(&hmod) { if let Some(kernel) = to_str(name) { - if let Some(args) = module_dump.kernels_args.get(kernel) { - let kernel_args = KERNELS.get_or_insert_with(|| HashMap::new()); - kernel_args.insert( - *hfunc, - KernelDump { - module_content: module_dump.content.clone(), - name: kernel.to_string(), - arguments: args.clone(), - }, - ); + let kernel_args = if let Some(kernels) = &module_dump.kernels_args { + if let Some(args) = kernels.get(kernel) { + Some(args.clone()) + } else { + None + } } else { - eprintln!("[ZLUDA_DUMP] Unknown kernel: {}", kernel); - } + None + }; + let kernel_args_map = KERNELS.get_or_insert_with(|| HashMap::new()); + kernel_args_map.insert( + *hfunc, + KernelDump { + module_content: module_dump.content.clone(), + name: kernel.to_string(), + arguments: kernel_args, + }, + ); } else { - eprintln!("[ZLUDA_DUMP] Unknown kernel name at: {:?}", hfunc); + os_log!("Malformed name at: {:?}", hfunc); } } else { - eprintln!("[ZLUDA_DUMP] Unknown module: {:?}", hmod); + os_log!("Unknown module: {:?}", hmod); } } else { - eprintln!("[ZLUDA_DUMP] Unknown module: {:?}", hmod); + os_log!("Unknown module: {:?}", hmod); } CUresult::CUDA_SUCCESS } @@ -317,7 +316,7 @@ pub unsafe fn cuLaunchKernel( let dump_env = match create_dump_dir(f, LAUNCH_COUNTER) { Ok(dump_env) => dump_env, Err(err) => { - eprintln!("[ZLUDA_DUMP] {:#?}", err); + os_log!("Error when creating the dump directory: {}", err); None } }; @@ -333,7 +332,7 @@ pub unsafe fn cuLaunchKernel( kernelParams, dump_env, ) - .unwrap_or_else(|err| eprintln!("[ZLUDA_DUMP] {:#?}", err)); + .unwrap_or_else(|err| os_log!("{}", err)); }; error = cont( f, @@ -357,9 +356,9 @@ pub unsafe fn cuLaunchKernel( "post", &kernel_dump.name, LAUNCH_COUNTER, - &kernel_dump.arguments, + kernel_dump.arguments.as_ref().map(|vec| &vec[..]), ) - .unwrap_or_else(|err| eprintln!("[ZLUDA_DUMP] {:#?}", err)); + .unwrap_or_else(|err| os_log!("{}", err)); } LAUNCH_COUNTER += 1; CUresult::CUDA_SUCCESS @@ -445,7 +444,7 @@ unsafe fn dump_pre_data( "pre", &kernel_dump.name, LAUNCH_COUNTER, - &kernel_dump.arguments, + kernel_dump.arguments.as_ref().map(|vec| &vec[..]), )?; Ok(()) } @@ -455,8 +454,12 @@ unsafe fn dump_arguments( prefix: &str, kernel_name: &str, counter: usize, - args: &[usize], + args: Option<&[usize]>, ) -> Result<(), Box<dyn Error>> { + let args = match args { + None => return Ok(()), + Some(a) => a, + }; let mut dump_dir = get_dump_dir()?; dump_dir.push(format!("{:04}_{}", counter, kernel_name)); dump_dir.push(prefix); @@ -508,12 +511,19 @@ const CUDART_INTERFACE_GUID: CUuuid = CUuuid { ], }; -const GET_MODULE_OFFSET: usize = 6; static mut CUDART_INTERFACE_VTABLE: Vec<*const c_void> = Vec::new(); +const GET_MODULE_FROM_CUBIN_OFFSET: usize = 1; +const GET_MODULE_FROM_CUBIN_EXT_OFFSET: usize = 6; static mut ORIGINAL_GET_MODULE_FROM_CUBIN: Option< unsafe extern "system" fn( result: *mut CUmodule, fatbinc_wrapper: *const FatbincWrapper, + ) -> CUresult, +> = None; +static mut ORIGINAL_GET_MODULE_FROM_CUBIN_EXT: Option< + unsafe extern "system" fn( + result: *mut CUmodule, + fatbinc_wrapper: *const FatbincWrapper, ptr1: *mut c_void, ptr2: *mut c_void, ) -> CUresult, @@ -539,16 +549,23 @@ pub unsafe fn cuGetExportTable( CUDART_INTERFACE_VTABLE.as_mut_ptr(), len, ); - if GET_MODULE_OFFSET >= len { + if GET_MODULE_FROM_CUBIN_EXT_OFFSET >= len { return CUresult::CUDA_ERROR_UNKNOWN; } ORIGINAL_GET_MODULE_FROM_CUBIN = - mem::transmute(CUDART_INTERFACE_VTABLE[GET_MODULE_OFFSET]); - CUDART_INTERFACE_VTABLE[GET_MODULE_OFFSET] = get_module_from_cubin as *const _; + mem::transmute(CUDART_INTERFACE_VTABLE[GET_MODULE_FROM_CUBIN_OFFSET]); + CUDART_INTERFACE_VTABLE[GET_MODULE_FROM_CUBIN_OFFSET] = + get_module_from_cubin as *const _; + ORIGINAL_GET_MODULE_FROM_CUBIN_EXT = + mem::transmute(CUDART_INTERFACE_VTABLE[GET_MODULE_FROM_CUBIN_EXT_OFFSET]); + CUDART_INTERFACE_VTABLE[GET_MODULE_FROM_CUBIN_EXT_OFFSET] = + get_module_from_cubin_ext as *const _; } *ppExportTable = CUDART_INTERFACE_VTABLE.as_ptr() as *const _; return CUresult::CUDA_SUCCESS; } else { + let guid = (*pExportTableId).bytes; + os_log!("Unsupported export table id: {{{:02X}{:02X}{:02X}{:02X}-{:02X}{:02X}-{:02X}{:02X}-{:02X}{:02X}-{:02X}{:02X}{:02X}{:02X}{:02X}{:02X}}}", guid[0], guid[1], guid[2], guid[3], guid[4], guid[5], guid[6], guid[7], guid[8], guid[9], guid[10], guid[11], guid[12], guid[13], guid[14], guid[15]); cont(ppExportTable, pExportTableId) } } @@ -598,11 +615,10 @@ struct FatbinFileHeader { uncompressed_payload: c_ulong, } -unsafe extern "system" fn get_module_from_cubin( +unsafe fn get_module_from_cubin_impl( module: *mut CUmodule, fatbinc_wrapper: *const FatbincWrapper, - ptr1: *mut c_void, - ptr2: *mut c_void, + get_module_base: impl FnOnce() -> CUresult, ) -> CUresult { if module == ptr::null_mut() || (*fatbinc_wrapper).magic != FATBINC_MAGIC @@ -628,7 +644,7 @@ unsafe extern "system" fn get_module_from_cubin( } }; } - let result = ORIGINAL_GET_MODULE_FROM_CUBIN.unwrap()(module, fatbinc_wrapper, ptr1, ptr2); + let result = get_module_base(); if result != CUresult::CUDA_SUCCESS { return result; } @@ -644,6 +660,26 @@ unsafe extern "system" fn get_module_from_cubin( result } +unsafe extern "system" fn get_module_from_cubin( + module: *mut CUmodule, + fatbinc_wrapper: *const FatbincWrapper, +) -> CUresult { + get_module_from_cubin_impl(module, fatbinc_wrapper, || { + ORIGINAL_GET_MODULE_FROM_CUBIN.unwrap()(module, fatbinc_wrapper) + }) +} + +unsafe extern "system" fn get_module_from_cubin_ext( + module: *mut CUmodule, + fatbinc_wrapper: *const FatbincWrapper, + ptr1: *mut c_void, + ptr2: *mut c_void, +) -> CUresult { + get_module_from_cubin_impl(module, fatbinc_wrapper, || { + ORIGINAL_GET_MODULE_FROM_CUBIN_EXT.unwrap()(module, fatbinc_wrapper, ptr1, ptr2) + }) +} + unsafe fn get_ptx_files(file: *const u8, end: *const u8) -> Vec<*const FatbinFileHeader> { let mut index = file; let mut result = Vec::new(); @@ -680,6 +716,9 @@ unsafe fn decompress_kernel_module(file: *const FatbinFileHeader) -> Option<Vec< } real_decompressed_size => { decompressed_vec.truncate(real_decompressed_size as usize); + if decompressed_vec.last().copied().unwrap_or(1) != 0 { + decompressed_vec.push(0); + } return Some(decompressed_vec); } } diff --git a/zluda_dump/src/os_unix.rs b/zluda_dump/src/os_unix.rs index b3d9343..91a004a 100644 --- a/zluda_dump/src/os_unix.rs +++ b/zluda_dump/src/os_unix.rs @@ -2,6 +2,8 @@ use std::ffi::{c_void, CStr}; const NVCUDA_DEFAULT_PATH: &'static [u8] = b"/usr/lib/x86_64-linux-gnu/libcuda.so.1\0";
+pub fn init() {}
+
pub unsafe fn load_cuda_library() -> *mut c_void {
libc::dlopen(
NVCUDA_DEFAULT_PATH.as_ptr() as *const _,
@@ -12,3 +14,17 @@ pub unsafe fn load_cuda_library() -> *mut c_void { pub unsafe fn get_proc_address(handle: *mut c_void, func: &CStr) -> *mut c_void {
libc::dlsym(handle, func.as_ptr() as *const _)
}
+
+#[macro_export]
+macro_rules! os_log {
+ ($format:tt) => {
+ {
+ eprintln!($format);
+ }
+ };
+ ($format:tt, $($obj: expr),+) => {
+ {
+ eprintln!($format, $($obj,)+);
+ }
+ };
+}
diff --git a/zluda_dump/src/os_win.rs b/zluda_dump/src/os_win.rs index 7e411ac..70a2b42 100644 --- a/zluda_dump/src/os_win.rs +++ b/zluda_dump/src/os_win.rs @@ -5,9 +5,11 @@ use std::{ ptr,
};
+use std::os::windows::io::AsRawHandle;
use wchar::wch_c;
use winapi::{
shared::minwindef::HMODULE,
+ um::debugapi::OutputDebugStringA,
um::libloaderapi::{GetProcAddress, LoadLibraryW},
};
@@ -66,3 +68,32 @@ unsafe fn get_non_detoured_load_library( pub unsafe fn get_proc_address(handle: *mut c_void, func: &CStr) -> *mut c_void {
GetProcAddress(handle as *mut _, func.as_ptr()) as *mut _
}
+
+#[macro_export]
+macro_rules! os_log {
+ ($format:tt) => {
+ {
+ use crate::os::__log_impl;
+ __log_impl(format!($format));
+ }
+ };
+ ($format:tt, $($obj: expr),+) => {
+ {
+ use crate::os::__log_impl;
+ __log_impl(format!($format, $($obj,)+));
+ }
+ };
+}
+
+pub fn __log_impl(s: String) {
+ let log_to_stderr = std::io::stderr().as_raw_handle() != ptr::null_mut();
+ if log_to_stderr {
+ eprintln!("[ZLUDA_DUMP] {}\n", s);
+ } else {
+ let mut win_str = String::with_capacity("[ZLUDA_DUMP] ".len() + s.len() + 2);
+ win_str.push_str("[ZLUDA_DUMP] ");
+ win_str.push_str(&s);
+ win_str.push_str("\n\0");
+ unsafe { OutputDebugStringA(win_str.as_ptr() as *const _) };
+ }
+}
|