aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorAndrzej Janik <[email protected]>2021-04-10 23:01:01 +0200
committerAndrzej Janik <[email protected]>2021-04-10 23:01:01 +0200
commita39dda67d1fb3897c5ea778ae00c4079e8e2939a (patch)
treed04bf8554b27be20673877ad0c35caee498baeba
parent8393dbd6e963ee59161dbbe3447a876156bea1c3 (diff)
downloadZLUDA-a39dda67d1fb3897c5ea778ae00c4079e8e2939a.tar.gz
ZLUDA-a39dda67d1fb3897c5ea778ae00c4079e8e2939a.zip
Make dumper compatible with older versions of CUDA
-rw-r--r--zluda/src/impl/export_table.rs36
-rw-r--r--zluda_dump/src/lib.rs157
-rw-r--r--zluda_dump/src/os_unix.rs16
-rw-r--r--zluda_dump/src/os_win.rs31
4 files changed, 171 insertions, 69 deletions
diff --git a/zluda/src/impl/export_table.rs b/zluda/src/impl/export_table.rs
index ddee92d..e5b17ca 100644
--- a/zluda/src/impl/export_table.rs
+++ b/zluda/src/impl/export_table.rs
@@ -75,7 +75,10 @@ unsafe extern "system" fn runtime_callback_hooks_fn1(ptr: *mut *mut usize, size:
static mut TOOLS_RUNTIME_CALLBACK_HOOKS_FN5_SPACE: [u8; 2] = [0; 2];
-unsafe extern "system" fn runtime_callback_hooks_fn5(ptr: *mut *mut u8, size: *mut usize) -> *mut u8 {
+unsafe extern "system" fn runtime_callback_hooks_fn5(
+ ptr: *mut *mut u8,
+ size: *mut usize,
+) -> *mut u8 {
*ptr = TOOLS_RUNTIME_CALLBACK_HOOKS_FN5_SPACE.as_mut_ptr();
*size = TOOLS_RUNTIME_CALLBACK_HOOKS_FN5_SPACE.len();
return TOOLS_RUNTIME_CALLBACK_HOOKS_FN5_SPACE.as_mut_ptr();
@@ -93,7 +96,9 @@ static CUDART_INTERFACE_VTABLE: [VTableEntry; CUDART_INTERFACE_LENGTH] = [
VTableEntry {
length: mem::size_of::<[VTableEntry; CUDART_INTERFACE_LENGTH]>(),
},
- VTableEntry { ptr: ptr::null() },
+ VTableEntry {
+ ptr: get_module_from_cubin as *const (),
+ },
VTableEntry {
ptr: cudart_interface_fn1 as *const (),
},
@@ -101,7 +106,7 @@ static CUDART_INTERFACE_VTABLE: [VTableEntry; CUDART_INTERFACE_LENGTH] = [
VTableEntry { ptr: ptr::null() },
VTableEntry { ptr: ptr::null() },
VTableEntry {
- ptr: get_module_from_cubin as *const (),
+ ptr: get_module_from_cubin_ext as *const (),
},
VTableEntry {
ptr: cudart_interface_fn6 as *const (),
@@ -198,14 +203,7 @@ struct FatbinFileHeader {
unsafe extern "system" fn get_module_from_cubin(
result: *mut CUmodule,
fatbinc_wrapper: *const FatbincWrapper,
- ptr1: *mut c_void,
- ptr2: *mut c_void,
) -> CUresult {
- // Not sure what those two parameters are actually used for,
- // they are somehow involved in __cudaRegisterHostVar
- if ptr1 != ptr::null_mut() || ptr2 != ptr::null_mut() {
- return CUresult::CUDA_ERROR_NOT_SUPPORTED;
- }
if result == ptr::null_mut()
|| (*fatbinc_wrapper).magic != FATBINC_MAGIC
|| (*fatbinc_wrapper).version != FATBINC_VERSION
@@ -248,6 +246,21 @@ unsafe extern "system" fn get_module_from_cubin(
CUresult::CUDA_ERROR_COMPAT_NOT_SUPPORTED_ON_DEVICE
}
+unsafe extern "system" fn get_module_from_cubin_ext(
+ result: *mut CUmodule,
+ fatbinc_wrapper: *const FatbincWrapper,
+ ptr1: *mut c_void,
+ ptr2: *mut c_void,
+) -> CUresult {
+ // Not sure what those two parameters are actually used for,
+ // they are somehow involved in __cudaRegisterHostVar
+ if ptr1 != ptr::null_mut() || ptr2 != ptr::null_mut() {
+ CUresult::CUDA_ERROR_NOT_SUPPORTED
+ } else {
+ get_module_from_cubin(result, fatbinc_wrapper)
+ }
+}
+
unsafe fn get_ptx_files(file: *const u8, end: *const u8) -> Vec<*const FatbinFileHeader> {
let mut index = file;
let mut result = Vec::new();
@@ -284,6 +297,9 @@ unsafe fn decompress_kernel_module(file: *const FatbinFileHeader) -> Option<Vec<
}
real_decompressed_size => {
decompressed_vec.truncate(real_decompressed_size as usize);
+ if decompressed_vec.last().copied().unwrap_or(1) != 0 {
+ decompressed_vec.push(0);
+ }
return Some(decompressed_vec);
}
}
diff --git a/zluda_dump/src/lib.rs b/zluda_dump/src/lib.rs
index 9b9146d..c10057d 100644
--- a/zluda_dump/src/lib.rs
+++ b/zluda_dump/src/lib.rs
@@ -75,13 +75,13 @@ pub static mut OVERRIDE_COMPUTE_CAPABILITY_MAJOR: Option<i32> = None;
pub struct ModuleDump {
content: Rc<String>,
- kernels_args: HashMap<String, Vec<usize>>,
+ kernels_args: Option<HashMap<String, Vec<usize>>>,
}
pub struct KernelDump {
module_content: Rc<String>,
name: String,
- arguments: Vec<usize>,
+ arguments: Option<Vec<usize>>,
}
// We are doing dlopen here instead of just using LD_PRELOAD,
@@ -95,7 +95,7 @@ pub unsafe fn init_libcuda_handle() {
Ok(kernel_filter) => match Regex::new(&kernel_filter) {
Ok(r) => KERNEL_PATTERN = Some(r),
Err(err) => {
- eprintln!("[ZLUDA_DUMP] Error parsing ZLUDA_DUMP_KERNEL: {}", err);
+ os_log!("Error parsing ZLUDA_DUMP_KERNEL: {}", err);
}
},
Err(_) => (),
@@ -104,15 +104,15 @@ pub unsafe fn init_libcuda_handle() {
Ok(cc_override) => match str::parse::<i32>(&cc_override) {
Ok(ver) => OVERRIDE_COMPUTE_CAPABILITY_MAJOR = Some(ver),
Err(err) => {
- eprintln!(
- "[ZLUDA_DUMP] Error parsing ZLUDA_OVERRIDE_COMPUTE_CAPABILITY_MAJOR: {}",
+ os_log!(
+ "Error parsing ZLUDA_OVERRIDE_COMPUTE_CAPABILITY_MAJOR: {}",
err
);
}
},
Err(_) => (),
}
- eprintln!("[ZLUDA_DUMP] Initialized");
+ os_log!("Initialized");
}
}
@@ -131,51 +131,45 @@ pub unsafe fn cuModuleLoadData(
unsafe fn record_module_image_raw(module: CUmodule, raw_image: *const ::std::os::raw::c_void) {
if *(raw_image as *const u32) == 0x464c457f {
- eprintln!("[ZLUDA_DUMP] Unsupported ELF module: {:?}", raw_image);
+ os_log!("Unsupported ELF module: {:?}", raw_image);
return;
}
let image = to_str(raw_image);
match image {
- None => eprintln!("[ZLUDA_DUMP] Malformed module image: {:?}", raw_image),
+ None => os_log!("Malformed module image: {:?}", raw_image),
Some(image) => record_module_image(module, image),
};
}
unsafe fn record_module_image(module: CUmodule, image: &str) {
if !image.contains(&".address_size") {
- eprintln!("[ZLUDA_DUMP] Malformed module image: {:?}", module)
+ os_log!("Malformed module image: {:?}", module)
} else {
let mut errors = Vec::new();
let ast = ptx::ModuleParser::new().parse(&mut errors, image);
- match (&*errors, ast) {
+ let kernels_args = match (&*errors, ast) {
(&[], Ok(ast)) => {
let kernels_args = ast
.directives
.iter()
.filter_map(directive_to_kernel)
.collect::<HashMap<_, _>>();
- let modules = MODULES.get_or_insert_with(|| HashMap::new());
- modules.insert(
- module,
- ModuleDump {
- content: Rc::new(image.to_string()),
- kernels_args,
- },
- );
+ Some(kernels_args)
}
- (errs, ast) => {
- let err_string = errs
- .iter()
- .map(|e| format!("{:?}", e))
- .chain(ast.err().iter().map(|e| format!("{:?}", e)))
- .collect::<Vec<_>>()
- .join("\n");
- eprintln!(
- "[ZLUDA_DUMP] Errors when parsing module:\n---ERRORS---\n{}\n---MODULE---\n{}",
- err_string, image
- );
+ (_, _) => {
+ // Don't print errors - it's usually too verbose to be useful
+ os_log!("Errors when parsing module: {:?}", module);
+ None
}
- }
+ };
+ let modules = MODULES.get_or_insert_with(|| HashMap::new());
+ modules.insert(
+ module,
+ ModuleDump {
+ content: Rc::new(image.to_string()),
+ kernels_args,
+ },
+ );
}
}
@@ -248,27 +242,32 @@ unsafe fn cuModuleGetFunction(
if let Some(modules) = &MODULES {
if let Some(module_dump) = modules.get(&hmod) {
if let Some(kernel) = to_str(name) {
- if let Some(args) = module_dump.kernels_args.get(kernel) {
- let kernel_args = KERNELS.get_or_insert_with(|| HashMap::new());
- kernel_args.insert(
- *hfunc,
- KernelDump {
- module_content: module_dump.content.clone(),
- name: kernel.to_string(),
- arguments: args.clone(),
- },
- );
+ let kernel_args = if let Some(kernels) = &module_dump.kernels_args {
+ if let Some(args) = kernels.get(kernel) {
+ Some(args.clone())
+ } else {
+ None
+ }
} else {
- eprintln!("[ZLUDA_DUMP] Unknown kernel: {}", kernel);
- }
+ None
+ };
+ let kernel_args_map = KERNELS.get_or_insert_with(|| HashMap::new());
+ kernel_args_map.insert(
+ *hfunc,
+ KernelDump {
+ module_content: module_dump.content.clone(),
+ name: kernel.to_string(),
+ arguments: kernel_args,
+ },
+ );
} else {
- eprintln!("[ZLUDA_DUMP] Unknown kernel name at: {:?}", hfunc);
+ os_log!("Malformed name at: {:?}", hfunc);
}
} else {
- eprintln!("[ZLUDA_DUMP] Unknown module: {:?}", hmod);
+ os_log!("Unknown module: {:?}", hmod);
}
} else {
- eprintln!("[ZLUDA_DUMP] Unknown module: {:?}", hmod);
+ os_log!("Unknown module: {:?}", hmod);
}
CUresult::CUDA_SUCCESS
}
@@ -317,7 +316,7 @@ pub unsafe fn cuLaunchKernel(
let dump_env = match create_dump_dir(f, LAUNCH_COUNTER) {
Ok(dump_env) => dump_env,
Err(err) => {
- eprintln!("[ZLUDA_DUMP] {:#?}", err);
+ os_log!("Error when creating the dump directory: {}", err);
None
}
};
@@ -333,7 +332,7 @@ pub unsafe fn cuLaunchKernel(
kernelParams,
dump_env,
)
- .unwrap_or_else(|err| eprintln!("[ZLUDA_DUMP] {:#?}", err));
+ .unwrap_or_else(|err| os_log!("{}", err));
};
error = cont(
f,
@@ -357,9 +356,9 @@ pub unsafe fn cuLaunchKernel(
"post",
&kernel_dump.name,
LAUNCH_COUNTER,
- &kernel_dump.arguments,
+ kernel_dump.arguments.as_ref().map(|vec| &vec[..]),
)
- .unwrap_or_else(|err| eprintln!("[ZLUDA_DUMP] {:#?}", err));
+ .unwrap_or_else(|err| os_log!("{}", err));
}
LAUNCH_COUNTER += 1;
CUresult::CUDA_SUCCESS
@@ -445,7 +444,7 @@ unsafe fn dump_pre_data(
"pre",
&kernel_dump.name,
LAUNCH_COUNTER,
- &kernel_dump.arguments,
+ kernel_dump.arguments.as_ref().map(|vec| &vec[..]),
)?;
Ok(())
}
@@ -455,8 +454,12 @@ unsafe fn dump_arguments(
prefix: &str,
kernel_name: &str,
counter: usize,
- args: &[usize],
+ args: Option<&[usize]>,
) -> Result<(), Box<dyn Error>> {
+ let args = match args {
+ None => return Ok(()),
+ Some(a) => a,
+ };
let mut dump_dir = get_dump_dir()?;
dump_dir.push(format!("{:04}_{}", counter, kernel_name));
dump_dir.push(prefix);
@@ -508,12 +511,19 @@ const CUDART_INTERFACE_GUID: CUuuid = CUuuid {
],
};
-const GET_MODULE_OFFSET: usize = 6;
static mut CUDART_INTERFACE_VTABLE: Vec<*const c_void> = Vec::new();
+const GET_MODULE_FROM_CUBIN_OFFSET: usize = 1;
+const GET_MODULE_FROM_CUBIN_EXT_OFFSET: usize = 6;
static mut ORIGINAL_GET_MODULE_FROM_CUBIN: Option<
unsafe extern "system" fn(
result: *mut CUmodule,
fatbinc_wrapper: *const FatbincWrapper,
+ ) -> CUresult,
+> = None;
+static mut ORIGINAL_GET_MODULE_FROM_CUBIN_EXT: Option<
+ unsafe extern "system" fn(
+ result: *mut CUmodule,
+ fatbinc_wrapper: *const FatbincWrapper,
ptr1: *mut c_void,
ptr2: *mut c_void,
) -> CUresult,
@@ -539,16 +549,23 @@ pub unsafe fn cuGetExportTable(
CUDART_INTERFACE_VTABLE.as_mut_ptr(),
len,
);
- if GET_MODULE_OFFSET >= len {
+ if GET_MODULE_FROM_CUBIN_EXT_OFFSET >= len {
return CUresult::CUDA_ERROR_UNKNOWN;
}
ORIGINAL_GET_MODULE_FROM_CUBIN =
- mem::transmute(CUDART_INTERFACE_VTABLE[GET_MODULE_OFFSET]);
- CUDART_INTERFACE_VTABLE[GET_MODULE_OFFSET] = get_module_from_cubin as *const _;
+ mem::transmute(CUDART_INTERFACE_VTABLE[GET_MODULE_FROM_CUBIN_OFFSET]);
+ CUDART_INTERFACE_VTABLE[GET_MODULE_FROM_CUBIN_OFFSET] =
+ get_module_from_cubin as *const _;
+ ORIGINAL_GET_MODULE_FROM_CUBIN_EXT =
+ mem::transmute(CUDART_INTERFACE_VTABLE[GET_MODULE_FROM_CUBIN_EXT_OFFSET]);
+ CUDART_INTERFACE_VTABLE[GET_MODULE_FROM_CUBIN_EXT_OFFSET] =
+ get_module_from_cubin_ext as *const _;
}
*ppExportTable = CUDART_INTERFACE_VTABLE.as_ptr() as *const _;
return CUresult::CUDA_SUCCESS;
} else {
+ let guid = (*pExportTableId).bytes;
+ os_log!("Unsupported export table id: {{{:02X}{:02X}{:02X}{:02X}-{:02X}{:02X}-{:02X}{:02X}-{:02X}{:02X}-{:02X}{:02X}{:02X}{:02X}{:02X}{:02X}}}", guid[0], guid[1], guid[2], guid[3], guid[4], guid[5], guid[6], guid[7], guid[8], guid[9], guid[10], guid[11], guid[12], guid[13], guid[14], guid[15]);
cont(ppExportTable, pExportTableId)
}
}
@@ -598,11 +615,10 @@ struct FatbinFileHeader {
uncompressed_payload: c_ulong,
}
-unsafe extern "system" fn get_module_from_cubin(
+unsafe fn get_module_from_cubin_impl(
module: *mut CUmodule,
fatbinc_wrapper: *const FatbincWrapper,
- ptr1: *mut c_void,
- ptr2: *mut c_void,
+ get_module_base: impl FnOnce() -> CUresult,
) -> CUresult {
if module == ptr::null_mut()
|| (*fatbinc_wrapper).magic != FATBINC_MAGIC
@@ -628,7 +644,7 @@ unsafe extern "system" fn get_module_from_cubin(
}
};
}
- let result = ORIGINAL_GET_MODULE_FROM_CUBIN.unwrap()(module, fatbinc_wrapper, ptr1, ptr2);
+ let result = get_module_base();
if result != CUresult::CUDA_SUCCESS {
return result;
}
@@ -644,6 +660,26 @@ unsafe extern "system" fn get_module_from_cubin(
result
}
+unsafe extern "system" fn get_module_from_cubin(
+ module: *mut CUmodule,
+ fatbinc_wrapper: *const FatbincWrapper,
+) -> CUresult {
+ get_module_from_cubin_impl(module, fatbinc_wrapper, || {
+ ORIGINAL_GET_MODULE_FROM_CUBIN.unwrap()(module, fatbinc_wrapper)
+ })
+}
+
+unsafe extern "system" fn get_module_from_cubin_ext(
+ module: *mut CUmodule,
+ fatbinc_wrapper: *const FatbincWrapper,
+ ptr1: *mut c_void,
+ ptr2: *mut c_void,
+) -> CUresult {
+ get_module_from_cubin_impl(module, fatbinc_wrapper, || {
+ ORIGINAL_GET_MODULE_FROM_CUBIN_EXT.unwrap()(module, fatbinc_wrapper, ptr1, ptr2)
+ })
+}
+
unsafe fn get_ptx_files(file: *const u8, end: *const u8) -> Vec<*const FatbinFileHeader> {
let mut index = file;
let mut result = Vec::new();
@@ -680,6 +716,9 @@ unsafe fn decompress_kernel_module(file: *const FatbinFileHeader) -> Option<Vec<
}
real_decompressed_size => {
decompressed_vec.truncate(real_decompressed_size as usize);
+ if decompressed_vec.last().copied().unwrap_or(1) != 0 {
+ decompressed_vec.push(0);
+ }
return Some(decompressed_vec);
}
}
diff --git a/zluda_dump/src/os_unix.rs b/zluda_dump/src/os_unix.rs
index b3d9343..91a004a 100644
--- a/zluda_dump/src/os_unix.rs
+++ b/zluda_dump/src/os_unix.rs
@@ -2,6 +2,8 @@ use std::ffi::{c_void, CStr};
const NVCUDA_DEFAULT_PATH: &'static [u8] = b"/usr/lib/x86_64-linux-gnu/libcuda.so.1\0";
+pub fn init() {}
+
pub unsafe fn load_cuda_library() -> *mut c_void {
libc::dlopen(
NVCUDA_DEFAULT_PATH.as_ptr() as *const _,
@@ -12,3 +14,17 @@ pub unsafe fn load_cuda_library() -> *mut c_void {
pub unsafe fn get_proc_address(handle: *mut c_void, func: &CStr) -> *mut c_void {
libc::dlsym(handle, func.as_ptr() as *const _)
}
+
+#[macro_export]
+macro_rules! os_log {
+ ($format:tt) => {
+ {
+ eprintln!($format);
+ }
+ };
+ ($format:tt, $($obj: expr),+) => {
+ {
+ eprintln!($format, $($obj,)+);
+ }
+ };
+}
diff --git a/zluda_dump/src/os_win.rs b/zluda_dump/src/os_win.rs
index 7e411ac..70a2b42 100644
--- a/zluda_dump/src/os_win.rs
+++ b/zluda_dump/src/os_win.rs
@@ -5,9 +5,11 @@ use std::{
ptr,
};
+use std::os::windows::io::AsRawHandle;
use wchar::wch_c;
use winapi::{
shared::minwindef::HMODULE,
+ um::debugapi::OutputDebugStringA,
um::libloaderapi::{GetProcAddress, LoadLibraryW},
};
@@ -66,3 +68,32 @@ unsafe fn get_non_detoured_load_library(
pub unsafe fn get_proc_address(handle: *mut c_void, func: &CStr) -> *mut c_void {
GetProcAddress(handle as *mut _, func.as_ptr()) as *mut _
}
+
+#[macro_export]
+macro_rules! os_log {
+ ($format:tt) => {
+ {
+ use crate::os::__log_impl;
+ __log_impl(format!($format));
+ }
+ };
+ ($format:tt, $($obj: expr),+) => {
+ {
+ use crate::os::__log_impl;
+ __log_impl(format!($format, $($obj,)+));
+ }
+ };
+}
+
+pub fn __log_impl(s: String) {
+ let log_to_stderr = std::io::stderr().as_raw_handle() != ptr::null_mut();
+ if log_to_stderr {
+ eprintln!("[ZLUDA_DUMP] {}\n", s);
+ } else {
+ let mut win_str = String::with_capacity("[ZLUDA_DUMP] ".len() + s.len() + 2);
+ win_str.push_str("[ZLUDA_DUMP] ");
+ win_str.push_str(&s);
+ win_str.push_str("\n\0");
+ unsafe { OutputDebugStringA(win_str.as_ptr() as *const _) };
+ }
+}