diff options
-rw-r--r-- | zluda_dump/src/cuda.rs | 7 | ||||
-rw-r--r-- | zluda_dump/src/lib.rs | 114 | ||||
-rw-r--r-- | zluda_dump/src/log.rs | 37 | ||||
-rw-r--r-- | zluda_dump/src/trace.rs | 166 |
4 files changed, 303 insertions, 21 deletions
diff --git a/zluda_dump/src/cuda.rs b/zluda_dump/src/cuda.rs index 35ec52c..dc86197 100644 --- a/zluda_dump/src/cuda.rs +++ b/zluda_dump/src/cuda.rs @@ -2491,15 +2491,16 @@ extern_redirect! { extern_redirect! { pub fn cuCtxDetach(ctx: CUcontext) -> CUresult; } -extern_redirect! { +extern_redirect_with_post! { pub fn cuModuleLoad(module: *mut CUmodule, fname: *const ::std::os::raw::c_char) -> CUresult; + super::cuModuleLoad_Post; } -extern_redirect_with! { +extern_redirect_with_post! { pub fn cuModuleLoadData( module: *mut CUmodule, image: *const ::std::os::raw::c_void, ) -> CUresult; - super::cuModuleLoadData; + super::cuModuleLoadData_Post; } extern_redirect_with! { pub fn cuModuleLoadDataEx( diff --git a/zluda_dump/src/lib.rs b/zluda_dump/src/lib.rs index 7116eee..6a7545c 100644 --- a/zluda_dump/src/lib.rs +++ b/zluda_dump/src/lib.rs @@ -42,6 +42,26 @@ macro_rules! extern_redirect { }; } +macro_rules! extern_redirect_with_post { + ( + pub fn $fn_name:ident ( $($arg_id:ident: $arg_type:ty),* $(,)? ) -> $ret_type:ty ; + $post_fn:path ; + ) => { + #[no_mangle] + pub extern "system" fn $fn_name ( $( $arg_id : $arg_type),* ) -> $ret_type { + let original_fn = |fn_ptr| { + let typed_fn = unsafe { std::mem::transmute::<_, extern "system" fn( $( $arg_id : $arg_type),* ) -> $ret_type>(fn_ptr) }; + typed_fn($( $arg_id ),*) + }; + crate::handle_cuda_function_call_with_probes( + stringify!($fn_name), + || (), original_fn, + move |logger, state, _, cuda_result| $post_fn ( $( $arg_id ),* , logger, state, cuda_result ) + ) + } + }; +} + macro_rules! extern_redirect_with { ( pub fn $fn_name:ident ( $($arg_id:ident: $arg_type:ty),* $(,)? ) -> $ret_type:ty ; @@ -64,6 +84,7 @@ mod log; #[cfg_attr(windows, path = "os_win.rs")] #[cfg_attr(not(windows), path = "os_unix.rs")] mod os; +mod trace; pub static mut LIBCUDA_HANDLE: *mut c_void = ptr::null_mut(); pub static mut PENDING_LINKING: Option<HashMap<CUlinkState, Vec<ModuleDump>>> = None; @@ -121,7 +142,7 @@ impl<T> LateInit<T> { struct GlobalDelayedState { settings: Settings, libcuda_handle: NonNull<c_void>, - cuda_state: CUDAStateTracker, + cuda_state: trace::StateTracker, } impl GlobalDelayedState { @@ -140,10 +161,11 @@ impl GlobalDelayedState { return (LateInit::Error, fn_logger); } }; + let cuda_state = trace::StateTracker::new(&settings); let delayed_state = GlobalDelayedState { settings, libcuda_handle, - cuda_state: CUDAStateTracker::new(), + cuda_state, }; (LateInit::Success(delayed_state), fn_logger) } @@ -196,22 +218,6 @@ impl Settings { } } -// This struct contains all the information about current state of CUDA runtime -// that are relevant to us: modules, kernels, linking objects, etc. -struct CUDAStateTracker { - modules: HashMap<CUmodule, Option<ModuleDump>>, - module_counter: usize, -} - -impl CUDAStateTracker { - fn new() -> Self { - CUDAStateTracker { - modules: HashMap::new(), - module_counter: 0, - } - } -} - pub struct ModuleDump { content: Rc<String>, kernels_args: Option<HashMap<String, Vec<usize>>>, @@ -248,6 +254,50 @@ fn handle_cuda_function_call( cu_result } +fn handle_cuda_function_call_with_probes<T, PostFn>( + func: &'static str, + pre_probe: impl FnOnce() -> T, + original_cuda_fn: impl FnOnce(NonNull<c_void>) -> CUresult, + post_probe: PostFn, +) -> CUresult +where + for<'a> PostFn: FnOnce(&'a mut log::FunctionLogger, &'a mut trace::StateTracker, T, CUresult), +{ + let global_state_mutex = &*GLOBAL_STATE; + // We unwrap because there's really no sensible thing we could do, + // alternatively we could return a CUDA error, but I think it's fine to + // crash. This is a diagnostic utility, if the lock was poisoned we can't + // extract any useful trace or logging anyway + let mut global_state = &mut *global_state_mutex.lock().unwrap(); + let (mut logger, delayed_state) = match global_state.delayed_state { + LateInit::Success(ref mut delayed_state) => { + (global_state.log_factory.get_logger(func), delayed_state) + } + // There's no libcuda to load, so we might as well panic + LateInit::Error => panic!(), + LateInit::Unitialized => { + let (new_delayed_state, logger) = + GlobalDelayedState::new(func, &mut global_state.log_factory); + global_state.delayed_state = new_delayed_state; + (logger, global_state.delayed_state.as_mut().unwrap()) + } + }; + let name = std::ffi::CString::new(func).unwrap(); + let fn_ptr = + unsafe { os::get_proc_address(delayed_state.libcuda_handle.as_ptr(), name.as_c_str()) }; + let fn_ptr = NonNull::new(fn_ptr).unwrap(); + let pre_result = pre_probe(); + let cu_result = original_cuda_fn(fn_ptr); + logger.result = Some(cu_result); + post_probe( + &mut logger, + &mut delayed_state.cuda_state, + pre_result, + cu_result, + ); + cu_result +} + #[derive(Clone, Copy)] enum AllocLocation { Device, @@ -327,6 +377,34 @@ pub unsafe fn cuModuleLoadData( result } +#[allow(non_snake_case)] +pub(crate) fn cuModuleLoad_Post( + module: *mut CUmodule, + fname: *const ::std::os::raw::c_char, + fn_logger: &mut log::FunctionLogger, + state: &mut trace::StateTracker, + result: CUresult, +) { + if result != CUresult::CUDA_SUCCESS { + return; + } + state.record_new_module_file(unsafe { *module }, fname, fn_logger) +} + +#[allow(non_snake_case)] +pub(crate) fn cuModuleLoadData_Post( + module: *mut CUmodule, + raw_image: *const ::std::os::raw::c_void, + fn_logger: &mut log::FunctionLogger, + state: &mut trace::StateTracker, + result: CUresult, +) { + if result != CUresult::CUDA_SUCCESS { + return; + } + state.record_new_module(unsafe { *module }, raw_image, fn_logger) +} + unsafe fn record_module_image_raw(module: CUmodule, raw_image: *const ::std::os::raw::c_void) { if *(raw_image as *const u32) == 0x464c457f { os_log!("Unsupported ELF module image: {:?}", raw_image); diff --git a/zluda_dump/src/log.rs b/zluda_dump/src/log.rs index ca8a1ef..4ffc459 100644 --- a/zluda_dump/src/log.rs +++ b/zluda_dump/src/log.rs @@ -1,14 +1,17 @@ +use crate::cuda::CUmodule;
use crate::cuda::CUuuid;
use super::CUresult;
use super::Settings;
use std::borrow::Cow;
use std::error::Error;
+use std::ffi::c_void;
use std::fmt::Display;
use std::fs::File;
use std::io;
use std::io::Stderr;
use std::io::Write;
+use std::str::Utf8Error;
const LOG_PREFIX: &[u8] = b"[ZLUDA_DUMP] ";
@@ -237,6 +240,12 @@ impl<'a> FunctionLogger<'a> { self.log_queue.push(l);
}
+ pub(crate) fn log_io_error(&mut self, error: io::Result<()>) {
+ if let Err(e) = error {
+ self.log_queue.push(LogEntry::IoError(e));
+ }
+ }
+
fn flush_log_queue_to_write_buffer(&mut self) {
self.write_buffer.start_line();
self.write_buffer.write(&self.name);
@@ -284,6 +293,14 @@ impl<'a> Drop for FunctionLogger<'a> { pub(crate) enum LogEntry {
IoError(io::Error),
ErrorBox(Box<dyn Error>),
+ UnsupportedModule {
+ module: CUmodule,
+ raw_image: *const c_void,
+ kind: &'static str,
+ },
+ MalformedModulePath(Utf8Error),
+ MalformedModuleText(Utf8Error),
+ ModuleParsingError(usize),
}
impl Display for LogEntry {
@@ -291,6 +308,26 @@ impl Display for LogEntry { match self {
LogEntry::IoError(e) => e.fmt(f),
LogEntry::ErrorBox(e) => e.fmt(f),
+ LogEntry::UnsupportedModule {
+ module,
+ raw_image,
+ kind,
+ } => {
+ write!(
+ f,
+ "Unsupported {} module {:?} loaded from module image {:?}",
+ kind, module, raw_image
+ )
+ }
+ LogEntry::MalformedModulePath(e) => e.fmt(f),
+ LogEntry::MalformedModuleText(e) => e.fmt(f),
+ LogEntry::ModuleParsingError(index) => {
+ write!(
+ f,
+ "Error parsing module, log has been written to module_{:04}.log",
+ index
+ )
+ }
}
}
}
diff --git a/zluda_dump/src/trace.rs b/zluda_dump/src/trace.rs new file mode 100644 index 0000000..f060467 --- /dev/null +++ b/zluda_dump/src/trace.rs @@ -0,0 +1,166 @@ +use ptx::{ast::PtxError, Token};
+
+use crate::{cuda::CUmodule, log, Settings};
+use std::{
+ collections::HashMap,
+ ffi::{c_void, CStr},
+ fs::{self, File},
+ io::{self, Read, Write},
+ path::PathBuf,
+ rc::Rc,
+};
+
+// This struct is the heart of CUDA state tracking, it:
+// * receives calls from the probes about changes to CUDA state
+// * records updates to the state change
+// * writes out relevant state change and details to disk and log
+pub(crate) struct StateTracker {
+ writer: DumpWriter,
+ modules: HashMap<CUmodule, Option<ParsedModule>>,
+ module_counter: usize,
+}
+
+impl StateTracker {
+ pub(crate) fn new(settings: &Settings) -> Self {
+ StateTracker {
+ writer: DumpWriter::new(settings.dump_dir.clone()),
+ modules: HashMap::new(),
+ module_counter: 0,
+ }
+ }
+
+ pub(crate) fn record_new_module_file(
+ &mut self,
+ module: CUmodule,
+ file_name: *const i8,
+ fn_logger: &mut log::FunctionLogger,
+ ) {
+ let file_name = match unsafe { CStr::from_ptr(file_name) }.to_str() {
+ Ok(f) => f,
+ Err(err) => {
+ fn_logger.log(log::LogEntry::MalformedModulePath(err));
+ return;
+ }
+ };
+ let maybe_io_error = self.try_record_new_module_file(module, fn_logger, file_name);
+ fn_logger.log_io_error(maybe_io_error)
+ }
+
+ fn try_record_new_module_file(
+ &mut self,
+ module: CUmodule,
+ fn_logger: &mut log::FunctionLogger,
+ file_name: &str,
+ ) -> io::Result<()> {
+ let mut module_file = fs::File::open(file_name)?;
+ let mut read_buff = Vec::new();
+ module_file.read_to_end(&mut read_buff)?;
+ self.record_new_module(module, read_buff.as_ptr() as *const _, fn_logger);
+ Ok(())
+ }
+
+ pub(crate) fn record_new_module(
+ &mut self,
+ module: CUmodule,
+ raw_image: *const c_void,
+ fn_logger: &mut log::FunctionLogger,
+ ) {
+ self.module_counter += 1;
+ if unsafe { *(raw_image as *const [u8; 4]) } == *goblin::elf64::header::ELFMAG {
+ self.modules.insert(module, None);
+ // TODO: Parse ELF and write it to disk
+ fn_logger.log(log::LogEntry::UnsupportedModule {
+ module,
+ raw_image,
+ kind: "ELF",
+ })
+ } else if unsafe { *(raw_image as *const [u8; 8]) } == *goblin::archive::MAGIC {
+ self.modules.insert(module, None);
+ // TODO: Figure out how to get size of archive module and write it to disk
+ fn_logger.log(log::LogEntry::UnsupportedModule {
+ module,
+ raw_image,
+ kind: "archive",
+ })
+ } else {
+ self.record_module_ptx(module, raw_image, fn_logger)
+ }
+ }
+
+ fn record_module_ptx(
+ &mut self,
+ module: CUmodule,
+ raw_image: *const c_void,
+ fn_logger: &mut log::FunctionLogger,
+ ) {
+ let module_text = unsafe { CStr::from_ptr(raw_image as *const _) }.to_str();
+ let module_text = match module_text {
+ Ok(m) => m,
+ Err(utf8_err) => {
+ fn_logger.log(log::LogEntry::MalformedModuleText(utf8_err));
+ return;
+ }
+ };
+ fn_logger.log_io_error(self.writer.save_module(self.module_counter, module_text));
+ let mut errors = Vec::new();
+ let ast = ptx::ModuleParser::new().parse(&mut errors, module_text);
+ let ast = match (&*errors, ast) {
+ (&[], Ok(ast)) => ast,
+ (err_vec, res) => {
+ fn_logger.log(log::LogEntry::ModuleParsingError(self.module_counter));
+ fn_logger.log_io_error(self.writer.save_module_error_log(
+ self.module_counter,
+ err_vec,
+ res.err(),
+ ));
+ return;
+ }
+ };
+ // TODO: store kernel names and details
+ }
+}
+
+struct ParsedModule {
+ content: Rc<String>,
+ kernels_args: Option<HashMap<String, Vec<usize>>>,
+}
+
+// This structs writes out information about CUDA execution to the dump dir
+struct DumpWriter {
+ dump_dir: Option<PathBuf>,
+}
+
+impl DumpWriter {
+ fn new(dump_dir: Option<PathBuf>) -> Self {
+ Self { dump_dir }
+ }
+
+ fn save_module(&self, index: usize, text: &str) -> io::Result<()> {
+ let mut dump_file = match &self.dump_dir {
+ None => return Ok(()),
+ Some(d) => d.clone(),
+ };
+ dump_file.push(format!("module_{:04}.ptx", index));
+ let mut file = File::create(dump_file)?;
+ file.write_all(text.as_bytes())?;
+ Ok(())
+ }
+
+ fn save_module_error_log<'input>(
+ &self,
+ index: usize,
+ recoverable: &[ptx::ParseError<usize, Token<'input>, PtxError>],
+ unrecoverable: Option<ptx::ParseError<usize, Token<'input>, PtxError>>,
+ ) -> io::Result<()> {
+ let mut log_file = match &self.dump_dir {
+ None => return Ok(()),
+ Some(d) => d.clone(),
+ };
+ log_file.push(format!("module_{:04}.log", index));
+ let mut file = File::create(log_file)?;
+ for err in unrecoverable.iter().chain(recoverable.iter()) {
+ writeln!(file, "{}", err)?;
+ }
+ Ok(())
+ }
+}
|