From e459086c5bfb84ff3b382e65a95d0c6d162266fb Mon Sep 17 00:00:00 2001 From: Andrzej Janik Date: Sun, 14 Nov 2021 02:25:51 +0100 Subject: Now dump function calls --- zluda_dump/Cargo.toml | 1 + zluda_dump/README.md | 1 + zluda_dump/src/lib.rs | 177 +++++++++++++++++++++++++++++++++++++++------- zluda_dump/src/log.rs | 26 +++++-- zluda_dump/src/os_unix.rs | 9 +-- zluda_dump/src/os_win.rs | 10 ++- 6 files changed, 186 insertions(+), 38 deletions(-) create mode 100644 zluda_dump/README.md diff --git a/zluda_dump/Cargo.toml b/zluda_dump/Cargo.toml index c88dca7..b3e8c74 100644 --- a/zluda_dump/Cargo.toml +++ b/zluda_dump/Cargo.toml @@ -14,6 +14,7 @@ lz4-sys = "1.9" regex = "1.4" dynasm = "1.1" dynasmrt = "1.1" +lazy_static = "1.4" [target.'cfg(windows)'.dependencies] winapi = { version = "0.3", features = ["libloaderapi", "debugapi", "std"] } diff --git a/zluda_dump/README.md b/zluda_dump/README.md new file mode 100644 index 0000000..52cecc8 --- /dev/null +++ b/zluda_dump/README.md @@ -0,0 +1 @@ +sed 's/(.*//g' log.txt | sort | uniq > uniq.txt \ No newline at end of file diff --git a/zluda_dump/src/lib.rs b/zluda_dump/src/lib.rs index 2887409..7116eee 100644 --- a/zluda_dump/src/lib.rs +++ b/zluda_dump/src/lib.rs @@ -7,9 +7,11 @@ use std::{ io::{self, prelude::*}, mem, os::raw::{c_int, c_uint, c_ulong, c_ushort}, - path::{Path, PathBuf}, + path::PathBuf, + ptr::NonNull, rc::Rc, slice, + sync::Mutex, }; use std::{fs::File, ptr}; @@ -20,6 +22,9 @@ use cuda::{ use ptx::ast; use regex::Regex; +#[macro_use] +extern crate lazy_static; + const CU_LAUNCH_PARAM_END: *mut c_void = 0 as *mut _; const CU_LAUNCH_PARAM_BUFFER_POINTER: *mut c_void = 1 as *mut _; const CU_LAUNCH_PARAM_BUFFER_SIZE: *mut c_void = 2 as *mut _; @@ -28,14 +33,11 @@ macro_rules! extern_redirect { (pub fn $fn_name:ident ( $($arg_id:ident: $arg_type:ty),* $(,)? ) -> $ret_type:ty ;) => { #[no_mangle] pub extern "system" fn $fn_name ( $( $arg_id : $arg_type),* ) -> $ret_type { - unsafe { $crate::init_libcuda_handle(stringify!($fn_name)) }; - let name = std::ffi::CString::new(stringify!($fn_name)).unwrap(); - let fn_ptr = unsafe { crate::os::get_proc_address($crate::LIBCUDA_HANDLE, &name) }; - if fn_ptr == std::ptr::null_mut() { - return CUresult::CUDA_ERROR_UNKNOWN; - } - let typed_fn = unsafe { std::mem::transmute::<_, extern "system" fn( $( $arg_id : $arg_type),* ) -> $ret_type>(fn_ptr) }; - typed_fn($( $arg_id ),*) + let original_fn = |fn_ptr| { + let typed_fn = unsafe { std::mem::transmute::<_, extern "system" fn( $( $arg_id : $arg_type),* ) -> $ret_type>(fn_ptr) }; + typed_fn($( $arg_id ),*) + }; + crate::handle_cuda_function_call(stringify!($fn_name), original_fn) } }; } @@ -47,17 +49,11 @@ macro_rules! extern_redirect_with { ) => { #[no_mangle] pub extern "system" fn $fn_name ( $( $arg_id : $arg_type),* ) -> $ret_type { - unsafe { $crate::init_libcuda_handle(stringify!($fn_name)) }; - let continuation = |$( $arg_id : $arg_type),* | { - let name = std::ffi::CString::new(stringify!($fn_name)).unwrap(); - let fn_ptr = unsafe { crate::os::get_proc_address($crate::LIBCUDA_HANDLE, &name) }; - if fn_ptr == std::ptr::null_mut() { - return CUresult::CUDA_ERROR_UNKNOWN; - } + let original_fn = |fn_ptr| { let typed_fn = unsafe { std::mem::transmute::<_, extern "system" fn( $( $arg_id : $arg_type),* ) -> $ret_type>(fn_ptr) }; typed_fn($( $arg_id ),*) }; - unsafe { $receiver($( $arg_id ),* , continuation) } + crate::handle_cuda_function_call(stringify!($fn_name), original_fn) } }; } @@ -81,10 +77,81 @@ pub static mut KERNEL_PATTERN: Option = None; pub static mut OVERRIDE_COMPUTE_CAPABILITY_MAJOR: Option = None; pub static mut KERNEL_INDEX_MINIMUM: usize = 0; pub static mut KERNEL_INDEX_MAXIMUM: usize = usize::MAX; -pub(crate) static mut LOG_FACTORY: Option = None; +static mut LOG_FACTORY: Option = None; + +lazy_static! { + static ref GLOBAL_STATE: Mutex = Mutex::new(GlobalState::new()); +} + +struct GlobalState { + log_factory: log::Factory, + // We split off fields that require a mutable reference to log factory to be + // created, additionally creation of some fields in this struct can fail + // initalization (e.g. we passed path a non-existant path to libcuda) + delayed_state: LateInit, +} + +unsafe impl Send for GlobalState {} + +impl GlobalState { + fn new() -> Self { + GlobalState { + log_factory: log::Factory::new(), + delayed_state: LateInit::Unitialized, + } + } +} + +enum LateInit { + Success(T), + Unitialized, + Error, +} + +impl LateInit { + fn as_mut(&mut self) -> Option<&mut T> { + match self { + LateInit::Success(t) => Some(t), + LateInit::Unitialized => None, + LateInit::Error => None, + } + } +} + +struct GlobalDelayedState { + settings: Settings, + libcuda_handle: NonNull, + cuda_state: CUDAStateTracker, +} + +impl GlobalDelayedState { + fn new<'a>( + func: &'static str, + factory: &'a mut log::Factory, + ) -> (LateInit, log::FunctionLogger<'a>) { + let (mut fn_logger, settings) = factory.get_first_logger_and_init_settings(func); + let maybe_libcuda_handle = unsafe { os::load_cuda_library(&settings.libcuda_path) }; + let libcuda_handle = match NonNull::new(maybe_libcuda_handle) { + Some(h) => h, + None => { + fn_logger.log(log::LogEntry::ErrorBox( + format!("Invalid CUDA library at path {}", &settings.libcuda_path).into(), + )); + return (LateInit::Error, fn_logger); + } + }; + let delayed_state = GlobalDelayedState { + settings, + libcuda_handle, + cuda_state: CUDAStateTracker::new(), + }; + (LateInit::Success(delayed_state), fn_logger) + } +} -pub(crate) struct Settings { +struct Settings { dump_dir: Option, + libcuda_path: String, } impl Settings { @@ -97,7 +164,18 @@ impl Settings { None } }; - Settings { dump_dir } + let libcuda_path = match env::var("ZLUDA_DUMP_LIBCUDA_FILE") { + Err(env::VarError::NotPresent) => os::LIBCUDA_DEFAULT_PATH.to_owned(), + Err(e) => { + logger.log(log::LogEntry::ErrorBox(Box::new(e) as _)); + os::LIBCUDA_DEFAULT_PATH.to_owned() + } + Ok(env_string) => env_string, + }; + Settings { + dump_dir, + libcuda_path, + } } fn read_and_init_dump_dir() -> Result, Box> { @@ -118,11 +196,20 @@ impl Settings { } } -#[derive(Clone, Copy)] -enum AllocLocation { - Device, - DeviceV2, - Host, +// This struct contains all the information about current state of CUDA runtime +// that are relevant to us: modules, kernels, linking objects, etc. +struct CUDAStateTracker { + modules: HashMap>, + module_counter: usize, +} + +impl CUDAStateTracker { + fn new() -> Self { + CUDAStateTracker { + modules: HashMap::new(), + module_counter: 0, + } + } } pub struct ModuleDump { @@ -130,6 +217,44 @@ pub struct ModuleDump { kernels_args: Option>>, } +fn handle_cuda_function_call( + func: &'static str, + original_cuda_fn: impl FnOnce(NonNull) -> CUresult, +) -> CUresult { + let global_state_mutex = &*GLOBAL_STATE; + // We unwrap because there's really no sensible thing we could do, + // alternatively we could return a CUDA error, but I think it's fine to + // crash. This is a diagnostic utility, if the lock was poisoned we can't + // extract any useful trace or logging anyway + let mut global_state = &mut *global_state_mutex.lock().unwrap(); + let (mut logger, delayed_state) = match global_state.delayed_state { + LateInit::Success(ref mut delayed_state) => { + (global_state.log_factory.get_logger(func), delayed_state) + } + // There's no libcuda to load, so we might as well panic + LateInit::Error => panic!(), + LateInit::Unitialized => { + let (new_delayed_state, logger) = + GlobalDelayedState::new(func, &mut global_state.log_factory); + global_state.delayed_state = new_delayed_state; + (logger, global_state.delayed_state.as_mut().unwrap()) + } + }; + let name = std::ffi::CString::new(func).unwrap(); + let fn_ptr = + unsafe { os::get_proc_address(delayed_state.libcuda_handle.as_ptr(), name.as_c_str()) }; + let cu_result = original_cuda_fn(NonNull::new(fn_ptr).unwrap()); + logger.result = Some(cu_result); + cu_result +} + +#[derive(Clone, Copy)] +enum AllocLocation { + Device, + DeviceV2, + Host, +} + pub struct KernelDump { module_content: Rc, name: String, @@ -145,7 +270,7 @@ pub unsafe fn init_libcuda_handle(func: &'static str) { MODULES = Some(HashMap::new()); KERNELS = Some(HashMap::new()); BUFFERS = Some(BTreeMap::new()); - let libcuda_handle = os::load_cuda_library(); + let libcuda_handle = ptr::null_mut(); assert_ne!(libcuda_handle, ptr::null_mut()); LIBCUDA_HANDLE = libcuda_handle; match env::var("ZLUDA_DUMP_KERNEL") { diff --git a/zluda_dump/src/log.rs b/zluda_dump/src/log.rs index 2b9ef91..ca8a1ef 100644 --- a/zluda_dump/src/log.rs +++ b/zluda_dump/src/log.rs @@ -1,5 +1,8 @@ +use crate::cuda::CUuuid; + use super::CUresult; use super::Settings; +use std::borrow::Cow; use std::error::Error; use std::fmt::Display; use std::fs::File; @@ -193,7 +196,20 @@ impl Factory { pub(crate) fn get_logger(&mut self, func: &'static str) -> FunctionLogger { FunctionLogger { result: None, - name: func, + name: Cow::Borrowed(func), + fallible_emitter: &mut self.fallible_emitter, + infallible_emitter: &mut self.infallible_emitter, + write_buffer: &mut self.write_buffer, + log_queue: &mut self.log_queue, + } + } + + pub(crate) fn get_logger_dark_api(&mut self, guid: CUuuid, idx: usize) -> FunctionLogger { + let guid = guid.bytes; + let fn_name = format!("{{{:02X}{:02X}{:02X}{:02X}-{:02X}{:02X}-{:02X}{:02X}-{:02X}{:02X}-{:02X}{:02X}{:02X}{:02X}{:02X}{:02X}}}::{}", guid[0], guid[1], guid[2], guid[3], guid[4], guid[5], guid[6], guid[7], guid[8], guid[9], guid[10], guid[11], guid[12], guid[13], guid[14], guid[15], idx); + FunctionLogger { + result: None, + name: Cow::Owned(fn_name), fallible_emitter: &mut self.fallible_emitter, infallible_emitter: &mut self.infallible_emitter, write_buffer: &mut self.write_buffer, @@ -209,7 +225,7 @@ impl Factory { // * We want to handle panics gracefully with Drop pub(crate) struct FunctionLogger<'a> { pub(crate) result: Option, - name: &'static str, + name: Cow<'static, str>, infallible_emitter: &'a mut Box, fallible_emitter: &'a mut Option>, write_buffer: &'a mut WriteBuffer, @@ -223,7 +239,7 @@ impl<'a> FunctionLogger<'a> { fn flush_log_queue_to_write_buffer(&mut self) { self.write_buffer.start_line(); - self.write_buffer.write(self.name); + self.write_buffer.write(&self.name); self.write_buffer.write("(...) -> "); if let Some(result) = self.result { write!(self.write_buffer, "{:#X}", result.0).unwrap_or_else(|_| unreachable!()); @@ -360,7 +376,7 @@ mod os { #[cfg(test)] mod tests { - use std::{cell::RefCell, io, rc::Rc, str}; + use std::{borrow::Cow, cell::RefCell, io, rc::Rc, str}; use super::{FunctionLogger, LogEntry, WriteTrailingZeroAware}; use crate::{log::WriteBuffer, CUresult}; @@ -422,7 +438,7 @@ mod tests { let mut log_queue = Vec::new(); let mut func_logger = FunctionLogger { result: Some(CUresult::CUDA_SUCCESS), - name: "cuInit", + name: Cow::Borrowed("cuInit"), infallible_emitter: &mut infallible_emitter, fallible_emitter: &mut fallible_emitter, write_buffer: &mut write_buffer, diff --git a/zluda_dump/src/os_unix.rs b/zluda_dump/src/os_unix.rs index 0d9db04..3b37e74 100644 --- a/zluda_dump/src/os_unix.rs +++ b/zluda_dump/src/os_unix.rs @@ -1,12 +1,13 @@ use crate::cuda::CUuuid; -use std::ffi::{c_void, CStr}; +use std::ffi::{c_void, CStr, CString}; use std::mem; -const NVCUDA_DEFAULT_PATH: &'static [u8] = b"/usr/lib/x86_64-linux-gnu/libcuda.so.1\0"; +pub(crate) const LIBCUDA_DEFAULT_PATH: &'static str = b"/usr/lib/x86_64-linux-gnu/libcuda.so.1\0"; -pub unsafe fn load_cuda_library() -> *mut c_void { +pub unsafe fn load_cuda_library(libcuda_path: &str) -> *mut c_void { + let libcuda_path = CString::new(libcuda_path).unwrap(); libc::dlopen( - NVCUDA_DEFAULT_PATH.as_ptr() as *const _, + libcuda_path.as_ptr() as *const _, libc::RTLD_LOCAL | libc::RTLD_NOW, ) } diff --git a/zluda_dump/src/os_win.rs b/zluda_dump/src/os_win.rs index e99b653..ab4d1d3 100644 --- a/zluda_dump/src/os_win.rs +++ b/zluda_dump/src/os_win.rs @@ -15,12 +15,12 @@ use winapi::{ use crate::cuda::CUuuid; -const NVCUDA_DEFAULT_PATH: &[u16] = wch_c!(r"C:\Windows\System32\nvcuda.dll"); +pub(crate) const LIBCUDA_DEFAULT_PATH: &'static str = "C:\\Windows\\System32\\nvcuda.dll"; const LOAD_LIBRARY_NO_REDIRECT: &'static [u8] = b"ZludaLoadLibraryW_NoRedirect\0"; include!("../../zluda_redirect/src/payload_guid.rs"); -pub unsafe fn load_cuda_library() -> *mut c_void { +pub unsafe fn load_cuda_library(libcuda_path: &str) -> *mut c_void { let load_lib = if is_detoured() { match get_non_detoured_load_library() { Some(load_lib) => load_lib, @@ -29,7 +29,11 @@ pub unsafe fn load_cuda_library() -> *mut c_void { } else { LoadLibraryW }; - load_lib(NVCUDA_DEFAULT_PATH.as_ptr()) as *mut _ + let libcuda_path_uf16 = libcuda_path + .encode_utf16() + .chain(std::iter::once(0)) + .collect::>(); + load_lib(libcuda_path_uf16.as_ptr()) as *mut _ } unsafe fn is_detoured() -> bool { -- cgit v1.2.3