From a125b0746f69f5f661b6d6caeb0b3e5511eeabdc Mon Sep 17 00:00:00 2001 From: Andrzej Janik Date: Sun, 21 Nov 2021 02:23:01 +0100 Subject: Do full dumping from dark api module creation --- zluda_dump/src/cuda.rs | 39 ++-- zluda_dump/src/dark_api.rs | 556 +++++++++++++++++++++++++++++++++++++++++++++ zluda_dump/src/lib.rs | 74 +++++- zluda_dump/src/log.rs | 80 ++++++- zluda_dump/src/trace.rs | 141 ++++++++++-- 5 files changed, 839 insertions(+), 51 deletions(-) create mode 100644 zluda_dump/src/dark_api.rs diff --git a/zluda_dump/src/cuda.rs b/zluda_dump/src/cuda.rs index dc86197..8cdfde9 100644 --- a/zluda_dump/src/cuda.rs +++ b/zluda_dump/src/cuda.rs @@ -2355,13 +2355,12 @@ extern_redirect! { dev: CUdevice, ) -> CUresult; } -extern_redirect_with! { +extern_redirect! { pub fn cuDeviceGetAttribute( pi: *mut ::std::os::raw::c_int, attrib: CUdevice_attribute, dev: CUdevice, ) -> CUresult; - super::cuDeviceGetAttribute; } extern_redirect! { pub fn cuDeviceGetNvSciSyncAttributes( @@ -2502,7 +2501,7 @@ extern_redirect_with_post! { ) -> CUresult; super::cuModuleLoadData_Post; } -extern_redirect_with! { +extern_redirect_with_post! { pub fn cuModuleLoadDataEx( module: *mut CUmodule, image: *const ::std::os::raw::c_void, @@ -2510,7 +2509,7 @@ extern_redirect_with! { options: *mut CUjit_option, optionValues: *mut *mut ::std::os::raw::c_void, ) -> CUresult; - super::cuModuleLoadDataEx; + super::cuModuleLoadDataEx_Post; } extern_redirect! { pub fn cuModuleLoadFatBinary( @@ -2521,13 +2520,13 @@ extern_redirect! { extern_redirect! { pub fn cuModuleUnload(hmod: CUmodule) -> CUresult; } -extern_redirect_with! { +extern_redirect_with_post! { pub fn cuModuleGetFunction( hfunc: *mut CUfunction, hmod: CUmodule, name: *const ::std::os::raw::c_char, ) -> CUresult; - super::cuModuleGetFunction; + super::cuModuleGetFunction_Post; } extern_redirect! { pub fn cuModuleGetGlobal( @@ -2575,7 +2574,7 @@ extern_redirect! { stateOut: *mut CUlinkState, ) -> CUresult; } -extern_redirect_with! { +extern_redirect! { pub fn cuLinkAddData( state: CUlinkState, type_: CUjitInputType, @@ -2586,9 +2585,8 @@ extern_redirect_with! { options: *mut CUjit_option, optionValues: *mut *mut ::std::os::raw::c_void, ) -> CUresult; - super::cuLinkAddData; } -extern_redirect_with! { +extern_redirect! { pub fn cuLinkAddData_v2( state: CUlinkState, type_: CUjitInputType, @@ -2599,9 +2597,8 @@ extern_redirect_with! { options: *mut CUjit_option, optionValues: *mut *mut ::std::os::raw::c_void, ) -> CUresult; - super::cuLinkAddData; } -extern_redirect_with! { +extern_redirect! { pub fn cuLinkAddFile( state: CUlinkState, type_: CUjitInputType, @@ -2610,9 +2607,8 @@ extern_redirect_with! { options: *mut CUjit_option, optionValues: *mut *mut ::std::os::raw::c_void, ) -> CUresult; - super::cuLinkAddFile; } -extern_redirect_with! { +extern_redirect! { pub fn cuLinkAddFile_v2( state: CUlinkState, type_: CUjitInputType, @@ -2621,7 +2617,6 @@ extern_redirect_with! { options: *mut CUjit_option, optionValues: *mut *mut ::std::os::raw::c_void, ) -> CUresult; - super::cuLinkAddFile; } extern_redirect! { pub fn cuLinkComplete( @@ -2639,13 +2634,11 @@ extern_redirect! { extern_redirect! { pub fn cuMemGetInfo_v2(free: *mut usize, total: *mut usize) -> CUresult; } -extern_redirect_with! { +extern_redirect! { pub fn cuMemAlloc(dptr: *mut CUdeviceptr, bytesize: usize) -> CUresult; - super::cuMemAlloc; } -extern_redirect_with! { +extern_redirect! { pub fn cuMemAlloc_v2(dptr: *mut CUdeviceptr, bytesize: usize) -> CUresult; - super::cuMemAlloc_v2; } extern_redirect! { pub fn cuMemAllocPitch( @@ -2694,13 +2687,12 @@ extern_redirect! { extern_redirect! { pub fn cuMemFreeHost(p: *mut ::std::os::raw::c_void) -> CUresult; } -extern_redirect_with! { +extern_redirect! { pub fn cuMemHostAlloc( pp: *mut *mut ::std::os::raw::c_void, bytesize: usize, Flags: ::std::os::raw::c_uint, ) -> CUresult; - super::cuMemHostAlloc; } extern_redirect! { pub fn cuMemHostGetDevicePointer( @@ -3658,7 +3650,7 @@ extern_redirect! { extern_redirect! { pub fn cuFuncSetSharedMemConfig(hfunc: CUfunction, config: CUsharedconfig) -> CUresult; } -extern_redirect_with! { +extern_redirect! { pub fn cuLaunchKernel( f: CUfunction, gridDimX: ::std::os::raw::c_uint, @@ -3672,7 +3664,6 @@ extern_redirect_with! { kernelParams: *mut *mut ::std::os::raw::c_void, extra: *mut *mut ::std::os::raw::c_void, ) -> CUresult; - super::cuLaunchKernel; } extern_redirect! { pub fn cuLaunchCooperativeKernel( @@ -4394,12 +4385,12 @@ extern_redirect! { hStream: CUstream, ) -> CUresult; } -extern_redirect_with! { +extern_redirect_with_post! { pub fn cuGetExportTable( ppExportTable: *mut *const ::std::os::raw::c_void, pExportTableId: *const CUuuid, ) -> CUresult; - super::cuGetExportTable; + super::cuGetExportTable_Post; } extern_redirect! { pub fn cuFuncGetModule(hmod: *mut CUmodule, hfunc: CUfunction) -> CUresult; diff --git a/zluda_dump/src/dark_api.rs b/zluda_dump/src/dark_api.rs new file mode 100644 index 0000000..b9fd9c2 --- /dev/null +++ b/zluda_dump/src/dark_api.rs @@ -0,0 +1,556 @@ +use crate::{log::UInt, GlobalDelayedState}; +use std::borrow::Cow; + +use crate::{ + cuda::{CUmodule, CUresult, CUuuid}, + log, os, + trace::StateTracker, +}; +use std::{ + collections::{hash_map, HashMap}, + ffi::c_void, + mem, + os::raw::{c_int, c_uint, c_ulong, c_ushort}, + ptr, slice, +}; + +pub(crate) struct DarkApiState { + // Key is Box, Vec<*const c_void>>, + original: OriginalExports, +} + +pub(crate) struct OriginalExports { + original_get_module_from_cubin: Option< + unsafe extern "system" fn( + result: *mut CUmodule, + fatbinc_wrapper: *const FatbincWrapper, + ) -> CUresult, + >, + original_get_module_from_cubin_ext1: Option< + unsafe extern "system" fn( + result: *mut CUmodule, + fatbinc_wrapper: *const FatbincWrapper, + ptr1: *mut c_void, + ptr2: *mut c_void, + _unknown: usize, + ) -> CUresult, + >, + original_get_module_from_cubin_ext2: Option< + unsafe extern "system" fn( + fatbinc_wrapper: *const FatbinHeader, + result: *mut CUmodule, + ptr1: *mut c_void, + ptr2: *mut c_void, + _unknown: usize, + ) -> CUresult, + >, +} + +impl DarkApiState { + pub(crate) fn new() -> Self { + let original = OriginalExports { + original_get_module_from_cubin: None, + original_get_module_from_cubin_ext1: None, + original_get_module_from_cubin_ext2: None, + }; + DarkApiState { + overrides: HashMap::new(), + original, + } + } +} + +pub(crate) fn override_export_table( + pp_export_table: *mut *const c_void, + p_export_table_id: *const CUuuid, + state: &mut crate::trace::StateTracker, +) { + let state = &mut state.dark_api; + let export_table_mut = unsafe { &mut *pp_export_table }; + let export_id = Box::new(unsafe { *p_export_table_id }); + *export_table_mut = match state.overrides.entry(export_id) { + hash_map::Entry::Occupied(entry) => entry.get().as_ptr() as *const _, + hash_map::Entry::Vacant(entry) => { + let guid_ptr = &**entry.key() as *const _; + entry + .insert(unsafe { + create_new_override(*pp_export_table as *const _, guid_ptr, &mut state.original) + }) + .as_ptr() as *const _ + } + }; +} + +unsafe fn create_new_override( + export_table: *const *const c_void, + export_id: *const CUuuid, + state: &mut OriginalExports, +) -> Vec<*const c_void> { + let mut byte_length: usize = *(export_table as *const usize); + // Some export tables don't start with a byte count, but directly with a + // pointer, and are instead terminated by 0 or MAX + let export_functions_start_idx; + let mut override_table = Vec::new(); + if byte_length > 0x10000 { + export_functions_start_idx = 0; + let mut i = 0; + loop { + let current_fn = export_table.add(i); + let current_fn_numeric = *current_fn as usize; + if current_fn_numeric == 0usize || current_fn_numeric == usize::MAX { + byte_length = (i + 1) * mem::size_of::(); + break; + } + i += 1; + } + } else { + override_table.push(byte_length as *const _); + export_functions_start_idx = 1; + } + for i in export_functions_start_idx..(byte_length / mem::size_of::()) { + let current_fn = export_table.add(i); + override_table.push(get_export_override_fn(state, *current_fn, export_id, i)); + } + override_table +} + +unsafe extern "system" fn report_unknown_export_table_call( + export_table: *const CUuuid, + idx: usize, +) { + if let Ok(mut global_state) = crate::GLOBAL_STATE.lock() { + let mut logger = global_state + .log_factory + .get_logger_dark_api(*export_table, idx); + logger.log(log::LogEntry::UnknownExportTableFn) + } +} + +const CUDART_INTERFACE_GUID: CUuuid = CUuuid { + bytes: [ + 0x6b, 0xd5, 0xfb, 0x6c, 0x5b, 0xf4, 0xe7, 0x4a, 0x89, 0x87, 0xd9, 0x39, 0x12, 0xfd, 0x9d, + 0xf9, + ], +}; + +const TOOLS_RUNTIME_CALLBACK_HOOKS_GUID: CUuuid = CUuuid { + bytes: [ + 0xa0, 0x94, 0x79, 0x8c, 0x2e, 0x74, 0x2e, 0x74, 0x93, 0xf2, 0x08, 0x00, 0x20, 0x0c, 0x0a, + 0x66, + ], +}; + +const CONTEXT_LOCAL_STORAGE_INTERFACE_V0301_GUID: CUuuid = CUuuid { + bytes: [ + 0xc6, 0x93, 0x33, 0x6e, 0x11, 0x21, 0xdf, 0x11, 0xa8, 0xc3, 0x68, 0xf3, 0x55, 0xd8, 0x95, + 0x93, + ], +}; + +const CTX_CREATE_BYPASS_GUID: CUuuid = CUuuid { + bytes: [ + 0x0C, 0xA5, 0x0B, 0x8C, 0x10, 0x04, 0x92, 0x9A, 0x89, 0xA7, 0xD0, 0xDF, 0x10, 0xE7, 0x72, + 0x86, + ], +}; + +const HEAP_ACCESS_GUID: CUuuid = CUuuid { + bytes: [ + 0x19, 0x5B, 0xCB, 0xF4, 0xD6, 0x7D, 0x02, 0x4A, 0xAC, 0xC5, 0x1D, 0x29, 0xCE, 0xA6, 0x31, + 0xAE, + ], +}; + +const DEVICE_EXTENDED_RT_GUID: CUuuid = CUuuid { + bytes: [ + 0xB1u8, 0x05, 0x41, 0xE1, 0xF7, 0xC7, 0xC7, 0x4A, 0x9F, 0x64, 0xF2, 0x23, 0xBE, 0x99, 0xF1, + 0xE2, + ], +}; + +unsafe fn get_export_override_fn( + state: &mut OriginalExports, + original_fn: *const c_void, + guid: *const CUuuid, + idx: usize, +) -> *const c_void { + match (*guid, idx) { + (TOOLS_RUNTIME_CALLBACK_HOOKS_GUID, 2) + | (TOOLS_RUNTIME_CALLBACK_HOOKS_GUID, 6) + | (CUDART_INTERFACE_GUID, 2) + | (CUDART_INTERFACE_GUID, 7) + | (CONTEXT_LOCAL_STORAGE_INTERFACE_V0301_GUID, 0) + | (CONTEXT_LOCAL_STORAGE_INTERFACE_V0301_GUID, 1) + | (CONTEXT_LOCAL_STORAGE_INTERFACE_V0301_GUID, 2) + | (CTX_CREATE_BYPASS_GUID, 1) + | (HEAP_ACCESS_GUID, 1) + | (HEAP_ACCESS_GUID, 2) + | (DEVICE_EXTENDED_RT_GUID, 5) + | (DEVICE_EXTENDED_RT_GUID, 13) => original_fn, + (CUDART_INTERFACE_GUID, 1) => { + state.original_get_module_from_cubin = mem::transmute(original_fn); + get_module_from_cubin as *const _ + } + (CUDART_INTERFACE_GUID, 6) => { + state.original_get_module_from_cubin_ext1 = mem::transmute(original_fn); + get_module_from_cubin_ext1 as *const _ + } + (CUDART_INTERFACE_GUID, 8) => { + state.original_get_module_from_cubin_ext2 = mem::transmute(original_fn); + get_module_from_cubin_ext2 as *const _ + } + _ => { + // terminator if it's an export table that is not size-prefixed + if original_fn == ptr::null() || (original_fn as usize) == usize::MAX { + ptr::null() + } else { + os::get_thunk(original_fn, report_unknown_export_table_call, guid, idx) + } + } + } +} + +const FATBINC_MAGIC: c_uint = 0x466243B1; +const FATBINC_VERSION_V1: c_uint = 0x1; +const FATBINC_VERSION_V2: c_uint = 0x2; + +#[repr(C)] +struct FatbincWrapper { + magic: c_uint, + version: c_uint, + data: *const FatbinHeader, + filename_or_fatbins: *const c_void, +} + +const FATBIN_MAGIC: c_uint = 0xBA55ED50; +const FATBIN_VERSION: c_ushort = 0x01; + +#[repr(C, align(8))] +struct FatbinHeader { + magic: c_uint, + version: c_ushort, + header_size: c_ushort, + files_size: c_ulong, // excluding frame header, size of all blocks framed by this frame +} + +const FATBIN_FILE_HEADER_KIND_PTX: c_ushort = 0x01; +const FATBIN_FILE_HEADER_KIND_ELF: c_ushort = 0x02; +const FATBIN_FILE_HEADER_VERSION_CURRENT: c_ushort = 0x101; + +// assembly file header is a bit different, but we don't care +#[repr(C)] +#[derive(Debug)] +struct FatbinFileHeader { + kind: c_ushort, + version: c_ushort, + header_size: c_uint, + padded_payload_size: c_uint, + unknown0: c_uint, // check if it's written into separately + payload_size: c_uint, + unknown1: c_uint, + unknown2: c_uint, + sm_version: c_uint, + bit_width: c_uint, + unknown3: c_uint, + unknown4: c_ulong, + unknown5: c_ulong, + uncompressed_payload: c_ulong, +} + +unsafe fn record_submodules_from_wrapped_fatbin( + module: *mut CUmodule, + fatbinc_wrapper: *const FatbincWrapper, + fn_logger: &mut log::FunctionLogger, + delayed_state: &mut GlobalDelayedState, + original_fn: impl FnOnce(&OriginalExports) -> CUresult, +) -> CUresult { + let result = original_fn(&delayed_state.cuda_state.dark_api.original); + fn_logger.result = Some(result); + let magic = (*fatbinc_wrapper).magic; + if magic != FATBINC_MAGIC { + fn_logger.log(log::LogEntry::UnexpectedBinaryField { + field_name: "FATBINC_MAGIC", + expected: vec![UInt::U32(FATBINC_MAGIC)], + observed: UInt::U32(magic), + }); + } + if (*fatbinc_wrapper).version != FATBINC_VERSION_V1 + && (*fatbinc_wrapper).version != FATBINC_VERSION_V2 + { + fn_logger.log(log::LogEntry::UnexpectedBinaryField { + field_name: "FATBINC_VERSION", + expected: vec![UInt::U32(FATBINC_VERSION_V1), UInt::U32(FATBINC_VERSION_V2)], + observed: UInt::U32(magic), + }); + } + let is_version_2 = (*fatbinc_wrapper).version == FATBINC_VERSION_V2; + record_submodules_from_fatbin( + *module, + (*fatbinc_wrapper).data, + if is_version_2 { Some(1) } else { None }, + fn_logger, + &mut delayed_state.cuda_state, + ); + if is_version_2 { + let mut current = (*fatbinc_wrapper).filename_or_fatbins as *const *const c_void; + while *current != ptr::null() { + record_submodules_from_fatbin( + *module, + *current as *const _, + Some(2), + fn_logger, + &mut delayed_state.cuda_state, + ); + current = current.add(1); + } + } + result +} + +unsafe fn record_submodules_from_fatbin( + module: CUmodule, + fatbin_header: *const FatbinHeader, + fatbin_version: Option, + logger: &mut log::FunctionLogger, + state: &mut StateTracker, +) { + let magic = (*fatbin_header).magic; + if magic != FATBIN_MAGIC { + logger.log(log::LogEntry::UnexpectedBinaryField { + field_name: "FATBIN_MAGIC", + expected: vec![UInt::U32(FATBIN_MAGIC)], + observed: UInt::U32(magic), + }); + return; + } + let version = (*fatbin_header).version; + if version != FATBIN_VERSION { + logger.log(log::LogEntry::UnexpectedBinaryField { + field_name: "FATBIN_VERSION", + expected: vec![UInt::U16(FATBIN_VERSION)], + observed: UInt::U16(version), + }); + return; + } + let file = (fatbin_header as *const u8).add((*fatbin_header).header_size as usize); + let end = file.add((*fatbin_header).files_size as usize); + record_submodules( + fatbin_version == Some(2), + module, + fatbin_version, + logger, + state, + file, + end, + ); +} + +unsafe extern "system" fn get_module_from_cubin( + module: *mut CUmodule, + fatbinc_wrapper: *const FatbincWrapper, +) -> CUresult { + let global_state = &mut *super::GLOBAL_STATE.lock().unwrap(); + let mut fn_logger = global_state + .log_factory + .get_logger_dark_api(CUDART_INTERFACE_GUID, 1); + let global_state = &mut *super::GLOBAL_STATE.lock().unwrap(); + let delayed_state = global_state.delayed_state.unwrap_mut(); + record_submodules_from_wrapped_fatbin( + module, + fatbinc_wrapper, + &mut fn_logger, + delayed_state, + |original_exports| { + original_exports.original_get_module_from_cubin.unwrap()(module, fatbinc_wrapper) + }, + ) +} + +unsafe extern "system" fn get_module_from_cubin_ext1( + module: *mut CUmodule, + fatbinc_wrapper: *const FatbincWrapper, + ptr1: *mut c_void, + ptr2: *mut c_void, + _unknown: usize, +) -> CUresult { + let global_state = &mut *super::GLOBAL_STATE.lock().unwrap(); + let mut fn_logger = global_state + .log_factory + .get_logger_dark_api(CUDART_INTERFACE_GUID, 6); + if ptr1 != ptr::null_mut() { + fn_logger.log(log::LogEntry::UnexpectedArgument { + arg_name: stringify!(ptr1), + expected: vec![UInt::USize(0)], + observed: UInt::USize(ptr1 as usize), + }); + } + if ptr2 != ptr::null_mut() { + fn_logger.log(log::LogEntry::UnexpectedArgument { + arg_name: stringify!(ptr2), + expected: vec![UInt::USize(0)], + observed: UInt::USize(ptr2 as usize), + }); + } + if _unknown != 0 { + fn_logger.log(log::LogEntry::UnexpectedArgument { + arg_name: stringify!(_unknown), + expected: vec![UInt::USize(0)], + observed: UInt::USize(_unknown), + }); + } + let delayed_state = global_state.delayed_state.unwrap_mut(); + record_submodules_from_wrapped_fatbin( + module, + fatbinc_wrapper, + &mut fn_logger, + delayed_state, + |original_exports| { + original_exports + .original_get_module_from_cubin_ext1 + .unwrap()(module, fatbinc_wrapper, ptr1, ptr2, _unknown) + }, + ) +} + +unsafe extern "system" fn get_module_from_cubin_ext2( + fatbin_header: *const FatbinHeader, + module: *mut CUmodule, + ptr1: *mut c_void, + ptr2: *mut c_void, + _unknown: usize, +) -> CUresult { + let global_state = &mut *super::GLOBAL_STATE.lock().unwrap(); + let mut fn_logger = global_state + .log_factory + .get_logger_dark_api(CUDART_INTERFACE_GUID, 8); + if ptr1 != ptr::null_mut() { + fn_logger.log(log::LogEntry::UnexpectedArgument { + arg_name: stringify!(ptr1), + expected: vec![UInt::USize(0)], + observed: UInt::USize(ptr1 as usize), + }); + } + if ptr2 != ptr::null_mut() { + fn_logger.log(log::LogEntry::UnexpectedArgument { + arg_name: stringify!(ptr2), + expected: vec![UInt::USize(0)], + observed: UInt::USize(ptr2 as usize), + }); + } + if _unknown != 0 { + fn_logger.log(log::LogEntry::UnexpectedArgument { + arg_name: stringify!(_unknown), + expected: vec![UInt::USize(0)], + observed: UInt::USize(_unknown), + }); + } + let delayed_state = global_state.delayed_state.unwrap_mut(); + let result = delayed_state + .cuda_state + .dark_api + .original + .original_get_module_from_cubin_ext2 + .unwrap()(fatbin_header, module, ptr1, ptr2, _unknown); + fn_logger.result = Some(result); + if result != CUresult::CUDA_SUCCESS { + return result; + } + record_submodules_from_fatbin( + *module, + fatbin_header, + None, + &mut fn_logger, + &mut delayed_state.cuda_state, + ); + result +} + +unsafe fn record_submodules( + should_decompress_elf: bool, + module: CUmodule, + version: Option, + fn_logger: &mut log::FunctionLogger, + state: &mut StateTracker, + start: *const u8, + end: *const u8, +) { + let mut index = start; + while index < end { + let fatbin_file = index as *const FatbinFileHeader; + let fatbin_file_version = (*fatbin_file).version; + if fatbin_file_version != FATBIN_FILE_HEADER_VERSION_CURRENT { + fn_logger.log(log::LogEntry::UnexpectedBinaryField { + field_name: stringify!(fatbin_file_version), + expected: vec![UInt::U16(FATBIN_FILE_HEADER_VERSION_CURRENT)], + observed: UInt::U16(fatbin_file_version), + }); + } + let fatbin_file_kind = (*fatbin_file).kind; + if fatbin_file_kind == FATBIN_FILE_HEADER_KIND_PTX { + let decompressed = decompress_kernel_module(fatbin_file); + match decompressed { + Some(decompressed) => { + state.record_new_submodule(module, version, &*decompressed, fn_logger, "ptx") + } + None => fn_logger.log(log::LogEntry::Lz4DecompressionFailure), + } + } else if fatbin_file_kind == FATBIN_FILE_HEADER_KIND_ELF { + let source_buffer = if should_decompress_elf { + let decompressed = decompress_kernel_module(fatbin_file); + match decompressed { + Some(decompressed) => Cow::Owned(decompressed), + None => { + fn_logger.log(log::LogEntry::Lz4DecompressionFailure); + continue; + } + } + } else { + Cow::Borrowed(slice::from_raw_parts( + (fatbin_file as *const u8).add((*fatbin_file).header_size as usize), + (*fatbin_file).padded_payload_size as usize, + )) + }; + state.record_new_submodule(module, version, &*source_buffer, fn_logger, "elf") + } else { + fn_logger.log(log::LogEntry::UnexpectedBinaryField { + field_name: stringify!(fatbin_file_kind), + expected: vec![ + UInt::U16(FATBIN_FILE_HEADER_KIND_PTX), + UInt::U16(FATBIN_FILE_HEADER_KIND_ELF), + ], + observed: UInt::U16(fatbin_file_kind), + }); + } + index = index + .add((*fatbin_file).header_size as usize + (*fatbin_file).padded_payload_size as usize); + } +} + +const MAX_MODULE_DECOMPRESSION_BOUND: usize = 64 * 1024 * 1024; + +unsafe fn decompress_kernel_module(file: *const FatbinFileHeader) -> Option> { + let decompressed_size = usize::max(1024, (*file).uncompressed_payload as usize); + let mut decompressed_vec = vec![0u8; decompressed_size]; + loop { + match lz4_sys::LZ4_decompress_safe( + (file as *const u8).add((*file).header_size as usize) as *const _, + decompressed_vec.as_mut_ptr() as *mut _, + (*file).payload_size as c_int, + decompressed_vec.len() as c_int, + ) { + error if error < 0 => { + let new_size = decompressed_vec.len() * 2; + if new_size > MAX_MODULE_DECOMPRESSION_BOUND { + return None; + } + decompressed_vec.resize(decompressed_vec.len() * 2, 0); + } + real_decompressed_size => { + decompressed_vec.truncate(real_decompressed_size as usize); + return Some(decompressed_vec); + } + } + } +} diff --git a/zluda_dump/src/lib.rs b/zluda_dump/src/lib.rs index 6a7545c..d96e8c7 100644 --- a/zluda_dump/src/lib.rs +++ b/zluda_dump/src/lib.rs @@ -80,6 +80,7 @@ macro_rules! extern_redirect_with { #[allow(warnings)] mod cuda; +mod dark_api; mod log; #[cfg_attr(windows, path = "os_win.rs")] #[cfg_attr(not(windows), path = "os_unix.rs")] @@ -132,9 +133,16 @@ enum LateInit { impl LateInit { fn as_mut(&mut self) -> Option<&mut T> { match self { - LateInit::Success(t) => Some(t), - LateInit::Unitialized => None, - LateInit::Error => None, + Self::Success(t) => Some(t), + Self::Unitialized => None, + Self::Error => None, + } + } + + pub(crate) fn unwrap_mut(&mut self) -> &mut T { + match self { + Self::Success(t) => t, + Self::Unitialized | Self::Error => panic!(), } } } @@ -180,7 +188,11 @@ impl Settings { fn read_and_init(logger: &mut log::FunctionLogger) -> Self { let maybe_dump_dir = Self::read_and_init_dump_dir(); let dump_dir = match maybe_dump_dir { - Ok(d) => d, + Ok(Some(dir)) => { + logger.log(log::LogEntry::CreatedDumpDirectory(dir.clone())); + Some(dir) + }, + Ok(None) => None, Err(err) => { logger.log(log::LogEntry::ErrorBox(err)); None @@ -212,8 +224,16 @@ impl Settings { fn create_dump_directory(dir: String) -> io::Result { let mut main_dir = PathBuf::from(dir); let current_exe = env::current_exe()?; - main_dir.push(current_exe.file_name().unwrap()); - fs::create_dir_all(&main_dir)?; + let file_name_base = current_exe.file_name().unwrap().to_string_lossy(); + main_dir.push(&*file_name_base); + let mut suffix = 1; + // This can get into infinite loop. Unfortunately try_exists is unstable: + // https://doc.rust-lang.org/std/path/struct.Path.html#method.try_exists + while main_dir.exists() { + main_dir.set_file_name(format!("{}_{}", file_name_base, suffix)); + suffix += 1; + } + fs::create_dir_all(&*main_dir)?; Ok(main_dir) } } @@ -405,6 +425,20 @@ pub(crate) fn cuModuleLoadData_Post( state.record_new_module(unsafe { *module }, raw_image, fn_logger) } +#[allow(non_snake_case)] +pub(crate) fn cuModuleLoadDataEx_Post( + module: *mut CUmodule, + raw_image: *const ::std::os::raw::c_void, + _numOptions: ::std::os::raw::c_uint, + _options: *mut CUjit_option, + _optionValues: *mut *mut ::std::os::raw::c_void, + fn_logger: &mut log::FunctionLogger, + state: &mut trace::StateTracker, + result: CUresult, +) { + cuModuleLoadData_Post(module, raw_image, fn_logger, state, result) +} + unsafe fn record_module_image_raw(module: CUmodule, raw_image: *const ::std::os::raw::c_void) { if *(raw_image as *const u32) == 0x464c457f { os_log!("Unsupported ELF module image: {:?}", raw_image); @@ -1410,3 +1444,31 @@ pub unsafe fn cuLinkAddFile( ) -> CUresult { cont(state, type_, path, numOptions, options, optionValues) } + +#[allow(non_snake_case)] +pub(crate) fn cuGetExportTable_Post( + ppExportTable: *mut *const ::std::os::raw::c_void, + pExportTableId: *const CUuuid, + _fn_logger: &mut log::FunctionLogger, + state: &mut trace::StateTracker, + result: CUresult, +) { + if result != CUresult::CUDA_SUCCESS { + return; + } + dark_api::override_export_table(ppExportTable, pExportTableId, state) +} + +#[allow(non_snake_case)] +pub(crate) fn cuModuleGetFunction_Post( + hfunc: *mut CUfunction, + hmod: CUmodule, + name: *const ::std::os::raw::c_char, + fn_logger: &mut log::FunctionLogger, + state: &mut trace::StateTracker, + result: CUresult, +) { + if !state.module_exists(hmod) { + fn_logger.log(log::LogEntry::UnknownModule(hmod)) + } +} diff --git a/zluda_dump/src/log.rs b/zluda_dump/src/log.rs index 4ffc459..474912f 100644 --- a/zluda_dump/src/log.rs +++ b/zluda_dump/src/log.rs @@ -6,11 +6,14 @@ use super::Settings; use std::borrow::Cow; use std::error::Error; use std::ffi::c_void; +use std::ffi::FromBytesWithNulError; +use std::ffi::NulError; use std::fmt::Display; use std::fs::File; use std::io; use std::io::Stderr; use std::io::Write; +use std::path::PathBuf; use std::str::Utf8Error; const LOG_PREFIX: &[u8] = b"[ZLUDA_DUMP] "; @@ -253,7 +256,7 @@ impl<'a> FunctionLogger<'a> { if let Some(result) = self.result { write!(self.write_buffer, "{:#X}", result.0).unwrap_or_else(|_| unreachable!()); } else { - self.write_buffer.write("(INTERNAL ERROR)"); + self.write_buffer.write("(UNKNOWN)"); }; self.write_buffer.end_line(); for entry in self.log_queue.iter() { @@ -292,6 +295,7 @@ impl<'a> Drop for FunctionLogger<'a> { // Structured log type. We don't want frontend to care about log formatting pub(crate) enum LogEntry { IoError(io::Error), + CreatedDumpDirectory(PathBuf), ErrorBox(Box), UnsupportedModule { module: CUmodule, @@ -299,14 +303,35 @@ pub(crate) enum LogEntry { kind: &'static str, }, MalformedModulePath(Utf8Error), - MalformedModuleText(Utf8Error), - ModuleParsingError(usize), + NonUtf8ModuleText(Utf8Error), + NulInsideModuleText(NulError), + ModuleParsingError(String), + Lz4DecompressionFailure, + UnknownExportTableFn, + UnknownModule(CUmodule), + UnexpectedArgument { + arg_name: &'static str, + expected: Vec, + observed: UInt, + }, + UnexpectedBinaryField { + field_name: &'static str, + expected: Vec, + observed: UInt, + }, } impl Display for LogEntry { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { LogEntry::IoError(e) => e.fmt(f), + LogEntry::CreatedDumpDirectory(dir) => { + write!( + f, + "Created dump directory {} ", + dir.as_os_str().to_string_lossy() + ) + } LogEntry::ErrorBox(e) => e.fmt(f), LogEntry::UnsupportedModule { module, @@ -320,14 +345,55 @@ impl Display for LogEntry { ) } LogEntry::MalformedModulePath(e) => e.fmt(f), - LogEntry::MalformedModuleText(e) => e.fmt(f), - LogEntry::ModuleParsingError(index) => { + LogEntry::NonUtf8ModuleText(e) => e.fmt(f), + LogEntry::ModuleParsingError(file_name) => { write!( f, - "Error parsing module, log has been written to module_{:04}.log", - index + "Error parsing module, log has been written to {}", + file_name ) } + LogEntry::NulInsideModuleText(e) => e.fmt(f), + LogEntry::Lz4DecompressionFailure => write!(f, "LZ4 decompression failure"), + LogEntry::UnknownExportTableFn => write!(f, "Unknown export table function"), + LogEntry::UnknownModule(hmod) => write!(f, "Unknown module {:?}", hmod), + LogEntry::UnexpectedBinaryField { + field_name, + expected, + observed, + } => write!( + f, + "Unexected field {}. Expected: [{}], observed: {}", + field_name, + expected + .iter() + .map(|x| x.to_string()) + .collect::>() + .join(", "), + observed + ), + LogEntry::UnexpectedArgument { + arg_name, + expected, + observed, + } => write!(f, "Unexected argument"), + } + } +} + +#[derive(Clone, Copy)] +pub(crate) enum UInt { + U16(u16), + U32(u32), + USize(usize), +} + +impl Display for UInt { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + UInt::U16(x) => write!(f, "{:#x}", x), + UInt::U32(x) => write!(f, "{:#x}", x), + UInt::USize(x) => write!(f, "{:#x}", x), } } } diff --git a/zluda_dump/src/trace.rs b/zluda_dump/src/trace.rs index f060467..f93c360 100644 --- a/zluda_dump/src/trace.rs +++ b/zluda_dump/src/trace.rs @@ -1,9 +1,9 @@ use ptx::{ast::PtxError, Token}; -use crate::{cuda::CUmodule, log, Settings}; +use crate::{cuda::CUmodule, dark_api, log, Settings}; use std::{ collections::HashMap, - ffi::{c_void, CStr}, + ffi::{c_void, CStr, CString}, fs::{self, File}, io::{self, Read, Write}, path::PathBuf, @@ -18,6 +18,9 @@ pub(crate) struct StateTracker { writer: DumpWriter, modules: HashMap>, module_counter: usize, + submodule_counter: usize, + last_module_version: Option, + pub(crate) dark_api: dark_api::DarkApiState, } impl StateTracker { @@ -26,6 +29,9 @@ impl StateTracker { writer: DumpWriter::new(settings.dump_dir.clone()), modules: HashMap::new(), module_counter: 0, + submodule_counter: 0, + last_module_version: None, + dark_api: dark_api::DarkApiState::new(), } } @@ -59,6 +65,48 @@ impl StateTracker { Ok(()) } + pub(crate) fn record_new_submodule( + &mut self, + module: CUmodule, + version: Option, + submodule: &[u8], + fn_logger: &mut log::FunctionLogger, + type_: &'static str, + ) { + if !self.modules.contains_key(&module) { + self.module_counter += 1; + self.submodule_counter = 0; + self.modules.insert(module, None); + } + if version != self.last_module_version { + self.submodule_counter = 0; + } + self.submodule_counter += 1; + self.last_module_version = version; + fn_logger.log_io_error(self.writer.save_module( + self.module_counter, + version, + Some(self.submodule_counter), + submodule, + type_, + )); + if type_ == "ptx" { + match CString::new(submodule) { + Err(e) => fn_logger.log(log::LogEntry::NulInsideModuleText(e)), + Ok(submodule_cstring) => match submodule_cstring.to_str() { + Err(e) => fn_logger.log(log::LogEntry::NonUtf8ModuleText(e)), + Ok(submodule_text) => self.try_parse_and_record_kernels( + fn_logger, + self.module_counter, + version, + Some(self.submodule_counter), + submodule_text, + ), + }, + } + } + } + pub(crate) fn record_new_module( &mut self, module: CUmodule, @@ -93,30 +141,55 @@ impl StateTracker { raw_image: *const c_void, fn_logger: &mut log::FunctionLogger, ) { + self.modules.insert(module, None); let module_text = unsafe { CStr::from_ptr(raw_image as *const _) }.to_str(); let module_text = match module_text { Ok(m) => m, Err(utf8_err) => { - fn_logger.log(log::LogEntry::MalformedModuleText(utf8_err)); + fn_logger.log(log::LogEntry::NonUtf8ModuleText(utf8_err)); return; } }; - fn_logger.log_io_error(self.writer.save_module(self.module_counter, module_text)); + fn_logger.log_io_error(self.writer.save_module( + self.module_counter, + None, + None, + module_text.as_bytes(), + "ptx", + )); + self.try_parse_and_record_kernels(fn_logger, self.module_counter, None, None, module_text); + } + + fn try_parse_and_record_kernels( + &mut self, + fn_logger: &mut log::FunctionLogger, + module_index: usize, + version: Option, + submodule_index: Option, + module_text: &str, + ) { let mut errors = Vec::new(); let ast = ptx::ModuleParser::new().parse(&mut errors, module_text); let ast = match (&*errors, ast) { (&[], Ok(ast)) => ast, (err_vec, res) => { - fn_logger.log(log::LogEntry::ModuleParsingError(self.module_counter)); + fn_logger.log(log::LogEntry::ModuleParsingError( + DumpWriter::get_file_name(module_index, version, submodule_index, "log"), + )); fn_logger.log_io_error(self.writer.save_module_error_log( - self.module_counter, + module_index, + version, + submodule_index, err_vec, res.err(), )); return; } }; - // TODO: store kernel names and details + } + + pub(crate) fn module_exists(&self, hmod: CUmodule) -> bool { + self.modules.contains_key(&hmod) } } @@ -135,20 +208,34 @@ impl DumpWriter { Self { dump_dir } } - fn save_module(&self, index: usize, text: &str) -> io::Result<()> { + fn save_module( + &self, + module_index: usize, + version: Option, + submodule_index: Option, + buffer: &[u8], + kind: &'static str, + ) -> io::Result<()> { let mut dump_file = match &self.dump_dir { None => return Ok(()), Some(d) => d.clone(), }; - dump_file.push(format!("module_{:04}.ptx", index)); + dump_file.push(Self::get_file_name( + module_index, + version, + submodule_index, + kind, + )); let mut file = File::create(dump_file)?; - file.write_all(text.as_bytes())?; + file.write_all(buffer)?; Ok(()) } fn save_module_error_log<'input>( &self, - index: usize, + module_index: usize, + version: Option, + submodule_index: Option, recoverable: &[ptx::ParseError, PtxError>], unrecoverable: Option, PtxError>>, ) -> io::Result<()> { @@ -156,11 +243,37 @@ impl DumpWriter { None => return Ok(()), Some(d) => d.clone(), }; - log_file.push(format!("module_{:04}.log", index)); + log_file.push(Self::get_file_name( + module_index, + version, + submodule_index, + "log", + )); let mut file = File::create(log_file)?; - for err in unrecoverable.iter().chain(recoverable.iter()) { - writeln!(file, "{}", err)?; + for error in unrecoverable.iter().chain(recoverable.iter()) { + writeln!(file, "{}", error)?; } Ok(()) } + + fn get_file_name( + module_index: usize, + version: Option, + submodule_index: Option, + kind: &str, + ) -> String { + match (version, submodule_index) { + (Some(version), Some(submodule_index)) => format!( + "module_{:04}_v{}_{}.{}", + module_index, version, submodule_index, kind + ), + (Some(version), None) => { + format!("module_{:04}_v{}.{}", module_index, version, kind) + } + (None, Some(submodule_index)) => { + format!("module_{:04}_{}.{}", module_index, submodule_index, kind) + } + (None, None) => format!("module_{:04}.{}", module_index, kind), + } + } } -- cgit v1.2.3