aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorAndrzej Janik <[email protected]>2021-11-21 02:23:01 +0100
committerAndrzej Janik <[email protected]>2021-11-21 02:23:01 +0100
commita125b0746f69f5f661b6d6caeb0b3e5511eeabdc (patch)
tree136b753715938047a186a40d7b018c9262323bd8
parent2cb5960a186c756598b69c2e63b04933b657b1ff (diff)
downloadZLUDA-a125b0746f69f5f661b6d6caeb0b3e5511eeabdc.tar.gz
ZLUDA-a125b0746f69f5f661b6d6caeb0b3e5511eeabdc.zip
Do full dumping from dark api module creation
-rw-r--r--zluda_dump/src/cuda.rs39
-rw-r--r--zluda_dump/src/dark_api.rs556
-rw-r--r--zluda_dump/src/lib.rs74
-rw-r--r--zluda_dump/src/log.rs80
-rw-r--r--zluda_dump/src/trace.rs141
5 files changed, 839 insertions, 51 deletions
diff --git a/zluda_dump/src/cuda.rs b/zluda_dump/src/cuda.rs
index dc86197..8cdfde9 100644
--- a/zluda_dump/src/cuda.rs
+++ b/zluda_dump/src/cuda.rs
@@ -2355,13 +2355,12 @@ extern_redirect! {
dev: CUdevice,
) -> CUresult;
}
-extern_redirect_with! {
+extern_redirect! {
pub fn cuDeviceGetAttribute(
pi: *mut ::std::os::raw::c_int,
attrib: CUdevice_attribute,
dev: CUdevice,
) -> CUresult;
- super::cuDeviceGetAttribute;
}
extern_redirect! {
pub fn cuDeviceGetNvSciSyncAttributes(
@@ -2502,7 +2501,7 @@ extern_redirect_with_post! {
) -> CUresult;
super::cuModuleLoadData_Post;
}
-extern_redirect_with! {
+extern_redirect_with_post! {
pub fn cuModuleLoadDataEx(
module: *mut CUmodule,
image: *const ::std::os::raw::c_void,
@@ -2510,7 +2509,7 @@ extern_redirect_with! {
options: *mut CUjit_option,
optionValues: *mut *mut ::std::os::raw::c_void,
) -> CUresult;
- super::cuModuleLoadDataEx;
+ super::cuModuleLoadDataEx_Post;
}
extern_redirect! {
pub fn cuModuleLoadFatBinary(
@@ -2521,13 +2520,13 @@ extern_redirect! {
extern_redirect! {
pub fn cuModuleUnload(hmod: CUmodule) -> CUresult;
}
-extern_redirect_with! {
+extern_redirect_with_post! {
pub fn cuModuleGetFunction(
hfunc: *mut CUfunction,
hmod: CUmodule,
name: *const ::std::os::raw::c_char,
) -> CUresult;
- super::cuModuleGetFunction;
+ super::cuModuleGetFunction_Post;
}
extern_redirect! {
pub fn cuModuleGetGlobal(
@@ -2575,7 +2574,7 @@ extern_redirect! {
stateOut: *mut CUlinkState,
) -> CUresult;
}
-extern_redirect_with! {
+extern_redirect! {
pub fn cuLinkAddData(
state: CUlinkState,
type_: CUjitInputType,
@@ -2586,9 +2585,8 @@ extern_redirect_with! {
options: *mut CUjit_option,
optionValues: *mut *mut ::std::os::raw::c_void,
) -> CUresult;
- super::cuLinkAddData;
}
-extern_redirect_with! {
+extern_redirect! {
pub fn cuLinkAddData_v2(
state: CUlinkState,
type_: CUjitInputType,
@@ -2599,9 +2597,8 @@ extern_redirect_with! {
options: *mut CUjit_option,
optionValues: *mut *mut ::std::os::raw::c_void,
) -> CUresult;
- super::cuLinkAddData;
}
-extern_redirect_with! {
+extern_redirect! {
pub fn cuLinkAddFile(
state: CUlinkState,
type_: CUjitInputType,
@@ -2610,9 +2607,8 @@ extern_redirect_with! {
options: *mut CUjit_option,
optionValues: *mut *mut ::std::os::raw::c_void,
) -> CUresult;
- super::cuLinkAddFile;
}
-extern_redirect_with! {
+extern_redirect! {
pub fn cuLinkAddFile_v2(
state: CUlinkState,
type_: CUjitInputType,
@@ -2621,7 +2617,6 @@ extern_redirect_with! {
options: *mut CUjit_option,
optionValues: *mut *mut ::std::os::raw::c_void,
) -> CUresult;
- super::cuLinkAddFile;
}
extern_redirect! {
pub fn cuLinkComplete(
@@ -2639,13 +2634,11 @@ extern_redirect! {
extern_redirect! {
pub fn cuMemGetInfo_v2(free: *mut usize, total: *mut usize) -> CUresult;
}
-extern_redirect_with! {
+extern_redirect! {
pub fn cuMemAlloc(dptr: *mut CUdeviceptr, bytesize: usize) -> CUresult;
- super::cuMemAlloc;
}
-extern_redirect_with! {
+extern_redirect! {
pub fn cuMemAlloc_v2(dptr: *mut CUdeviceptr, bytesize: usize) -> CUresult;
- super::cuMemAlloc_v2;
}
extern_redirect! {
pub fn cuMemAllocPitch(
@@ -2694,13 +2687,12 @@ extern_redirect! {
extern_redirect! {
pub fn cuMemFreeHost(p: *mut ::std::os::raw::c_void) -> CUresult;
}
-extern_redirect_with! {
+extern_redirect! {
pub fn cuMemHostAlloc(
pp: *mut *mut ::std::os::raw::c_void,
bytesize: usize,
Flags: ::std::os::raw::c_uint,
) -> CUresult;
- super::cuMemHostAlloc;
}
extern_redirect! {
pub fn cuMemHostGetDevicePointer(
@@ -3658,7 +3650,7 @@ extern_redirect! {
extern_redirect! {
pub fn cuFuncSetSharedMemConfig(hfunc: CUfunction, config: CUsharedconfig) -> CUresult;
}
-extern_redirect_with! {
+extern_redirect! {
pub fn cuLaunchKernel(
f: CUfunction,
gridDimX: ::std::os::raw::c_uint,
@@ -3672,7 +3664,6 @@ extern_redirect_with! {
kernelParams: *mut *mut ::std::os::raw::c_void,
extra: *mut *mut ::std::os::raw::c_void,
) -> CUresult;
- super::cuLaunchKernel;
}
extern_redirect! {
pub fn cuLaunchCooperativeKernel(
@@ -4394,12 +4385,12 @@ extern_redirect! {
hStream: CUstream,
) -> CUresult;
}
-extern_redirect_with! {
+extern_redirect_with_post! {
pub fn cuGetExportTable(
ppExportTable: *mut *const ::std::os::raw::c_void,
pExportTableId: *const CUuuid,
) -> CUresult;
- super::cuGetExportTable;
+ super::cuGetExportTable_Post;
}
extern_redirect! {
pub fn cuFuncGetModule(hmod: *mut CUmodule, hfunc: CUfunction) -> CUresult;
diff --git a/zluda_dump/src/dark_api.rs b/zluda_dump/src/dark_api.rs
new file mode 100644
index 0000000..b9fd9c2
--- /dev/null
+++ b/zluda_dump/src/dark_api.rs
@@ -0,0 +1,556 @@
+use crate::{log::UInt, GlobalDelayedState};
+use std::borrow::Cow;
+
+use crate::{
+ cuda::{CUmodule, CUresult, CUuuid},
+ log, os,
+ trace::StateTracker,
+};
+use std::{
+ collections::{hash_map, HashMap},
+ ffi::c_void,
+ mem,
+ os::raw::{c_int, c_uint, c_ulong, c_ushort},
+ ptr, slice,
+};
+
+pub(crate) struct DarkApiState {
+ // Key is Box<CUuuid, because thunk reporting unknown export table needs a
+ // stablememory location for the guid
+ overrides: HashMap<Box<CUuuid>, Vec<*const c_void>>,
+ original: OriginalExports,
+}
+
+pub(crate) struct OriginalExports {
+ original_get_module_from_cubin: Option<
+ unsafe extern "system" fn(
+ result: *mut CUmodule,
+ fatbinc_wrapper: *const FatbincWrapper,
+ ) -> CUresult,
+ >,
+ original_get_module_from_cubin_ext1: Option<
+ unsafe extern "system" fn(
+ result: *mut CUmodule,
+ fatbinc_wrapper: *const FatbincWrapper,
+ ptr1: *mut c_void,
+ ptr2: *mut c_void,
+ _unknown: usize,
+ ) -> CUresult,
+ >,
+ original_get_module_from_cubin_ext2: Option<
+ unsafe extern "system" fn(
+ fatbinc_wrapper: *const FatbinHeader,
+ result: *mut CUmodule,
+ ptr1: *mut c_void,
+ ptr2: *mut c_void,
+ _unknown: usize,
+ ) -> CUresult,
+ >,
+}
+
+impl DarkApiState {
+ pub(crate) fn new() -> Self {
+ let original = OriginalExports {
+ original_get_module_from_cubin: None,
+ original_get_module_from_cubin_ext1: None,
+ original_get_module_from_cubin_ext2: None,
+ };
+ DarkApiState {
+ overrides: HashMap::new(),
+ original,
+ }
+ }
+}
+
+pub(crate) fn override_export_table(
+ pp_export_table: *mut *const c_void,
+ p_export_table_id: *const CUuuid,
+ state: &mut crate::trace::StateTracker,
+) {
+ let state = &mut state.dark_api;
+ let export_table_mut = unsafe { &mut *pp_export_table };
+ let export_id = Box::new(unsafe { *p_export_table_id });
+ *export_table_mut = match state.overrides.entry(export_id) {
+ hash_map::Entry::Occupied(entry) => entry.get().as_ptr() as *const _,
+ hash_map::Entry::Vacant(entry) => {
+ let guid_ptr = &**entry.key() as *const _;
+ entry
+ .insert(unsafe {
+ create_new_override(*pp_export_table as *const _, guid_ptr, &mut state.original)
+ })
+ .as_ptr() as *const _
+ }
+ };
+}
+
+unsafe fn create_new_override(
+ export_table: *const *const c_void,
+ export_id: *const CUuuid,
+ state: &mut OriginalExports,
+) -> Vec<*const c_void> {
+ let mut byte_length: usize = *(export_table as *const usize);
+ // Some export tables don't start with a byte count, but directly with a
+ // pointer, and are instead terminated by 0 or MAX
+ let export_functions_start_idx;
+ let mut override_table = Vec::new();
+ if byte_length > 0x10000 {
+ export_functions_start_idx = 0;
+ let mut i = 0;
+ loop {
+ let current_fn = export_table.add(i);
+ let current_fn_numeric = *current_fn as usize;
+ if current_fn_numeric == 0usize || current_fn_numeric == usize::MAX {
+ byte_length = (i + 1) * mem::size_of::<usize>();
+ break;
+ }
+ i += 1;
+ }
+ } else {
+ override_table.push(byte_length as *const _);
+ export_functions_start_idx = 1;
+ }
+ for i in export_functions_start_idx..(byte_length / mem::size_of::<usize>()) {
+ let current_fn = export_table.add(i);
+ override_table.push(get_export_override_fn(state, *current_fn, export_id, i));
+ }
+ override_table
+}
+
+unsafe extern "system" fn report_unknown_export_table_call(
+ export_table: *const CUuuid,
+ idx: usize,
+) {
+ if let Ok(mut global_state) = crate::GLOBAL_STATE.lock() {
+ let mut logger = global_state
+ .log_factory
+ .get_logger_dark_api(*export_table, idx);
+ logger.log(log::LogEntry::UnknownExportTableFn)
+ }
+}
+
+const CUDART_INTERFACE_GUID: CUuuid = CUuuid {
+ bytes: [
+ 0x6b, 0xd5, 0xfb, 0x6c, 0x5b, 0xf4, 0xe7, 0x4a, 0x89, 0x87, 0xd9, 0x39, 0x12, 0xfd, 0x9d,
+ 0xf9,
+ ],
+};
+
+const TOOLS_RUNTIME_CALLBACK_HOOKS_GUID: CUuuid = CUuuid {
+ bytes: [
+ 0xa0, 0x94, 0x79, 0x8c, 0x2e, 0x74, 0x2e, 0x74, 0x93, 0xf2, 0x08, 0x00, 0x20, 0x0c, 0x0a,
+ 0x66,
+ ],
+};
+
+const CONTEXT_LOCAL_STORAGE_INTERFACE_V0301_GUID: CUuuid = CUuuid {
+ bytes: [
+ 0xc6, 0x93, 0x33, 0x6e, 0x11, 0x21, 0xdf, 0x11, 0xa8, 0xc3, 0x68, 0xf3, 0x55, 0xd8, 0x95,
+ 0x93,
+ ],
+};
+
+const CTX_CREATE_BYPASS_GUID: CUuuid = CUuuid {
+ bytes: [
+ 0x0C, 0xA5, 0x0B, 0x8C, 0x10, 0x04, 0x92, 0x9A, 0x89, 0xA7, 0xD0, 0xDF, 0x10, 0xE7, 0x72,
+ 0x86,
+ ],
+};
+
+const HEAP_ACCESS_GUID: CUuuid = CUuuid {
+ bytes: [
+ 0x19, 0x5B, 0xCB, 0xF4, 0xD6, 0x7D, 0x02, 0x4A, 0xAC, 0xC5, 0x1D, 0x29, 0xCE, 0xA6, 0x31,
+ 0xAE,
+ ],
+};
+
+const DEVICE_EXTENDED_RT_GUID: CUuuid = CUuuid {
+ bytes: [
+ 0xB1u8, 0x05, 0x41, 0xE1, 0xF7, 0xC7, 0xC7, 0x4A, 0x9F, 0x64, 0xF2, 0x23, 0xBE, 0x99, 0xF1,
+ 0xE2,
+ ],
+};
+
+unsafe fn get_export_override_fn(
+ state: &mut OriginalExports,
+ original_fn: *const c_void,
+ guid: *const CUuuid,
+ idx: usize,
+) -> *const c_void {
+ match (*guid, idx) {
+ (TOOLS_RUNTIME_CALLBACK_HOOKS_GUID, 2)
+ | (TOOLS_RUNTIME_CALLBACK_HOOKS_GUID, 6)
+ | (CUDART_INTERFACE_GUID, 2)
+ | (CUDART_INTERFACE_GUID, 7)
+ | (CONTEXT_LOCAL_STORAGE_INTERFACE_V0301_GUID, 0)
+ | (CONTEXT_LOCAL_STORAGE_INTERFACE_V0301_GUID, 1)
+ | (CONTEXT_LOCAL_STORAGE_INTERFACE_V0301_GUID, 2)
+ | (CTX_CREATE_BYPASS_GUID, 1)
+ | (HEAP_ACCESS_GUID, 1)
+ | (HEAP_ACCESS_GUID, 2)
+ | (DEVICE_EXTENDED_RT_GUID, 5)
+ | (DEVICE_EXTENDED_RT_GUID, 13) => original_fn,
+ (CUDART_INTERFACE_GUID, 1) => {
+ state.original_get_module_from_cubin = mem::transmute(original_fn);
+ get_module_from_cubin as *const _
+ }
+ (CUDART_INTERFACE_GUID, 6) => {
+ state.original_get_module_from_cubin_ext1 = mem::transmute(original_fn);
+ get_module_from_cubin_ext1 as *const _
+ }
+ (CUDART_INTERFACE_GUID, 8) => {
+ state.original_get_module_from_cubin_ext2 = mem::transmute(original_fn);
+ get_module_from_cubin_ext2 as *const _
+ }
+ _ => {
+ // terminator if it's an export table that is not size-prefixed
+ if original_fn == ptr::null() || (original_fn as usize) == usize::MAX {
+ ptr::null()
+ } else {
+ os::get_thunk(original_fn, report_unknown_export_table_call, guid, idx)
+ }
+ }
+ }
+}
+
+const FATBINC_MAGIC: c_uint = 0x466243B1;
+const FATBINC_VERSION_V1: c_uint = 0x1;
+const FATBINC_VERSION_V2: c_uint = 0x2;
+
+#[repr(C)]
+struct FatbincWrapper {
+ magic: c_uint,
+ version: c_uint,
+ data: *const FatbinHeader,
+ filename_or_fatbins: *const c_void,
+}
+
+const FATBIN_MAGIC: c_uint = 0xBA55ED50;
+const FATBIN_VERSION: c_ushort = 0x01;
+
+#[repr(C, align(8))]
+struct FatbinHeader {
+ magic: c_uint,
+ version: c_ushort,
+ header_size: c_ushort,
+ files_size: c_ulong, // excluding frame header, size of all blocks framed by this frame
+}
+
+const FATBIN_FILE_HEADER_KIND_PTX: c_ushort = 0x01;
+const FATBIN_FILE_HEADER_KIND_ELF: c_ushort = 0x02;
+const FATBIN_FILE_HEADER_VERSION_CURRENT: c_ushort = 0x101;
+
+// assembly file header is a bit different, but we don't care
+#[repr(C)]
+#[derive(Debug)]
+struct FatbinFileHeader {
+ kind: c_ushort,
+ version: c_ushort,
+ header_size: c_uint,
+ padded_payload_size: c_uint,
+ unknown0: c_uint, // check if it's written into separately
+ payload_size: c_uint,
+ unknown1: c_uint,
+ unknown2: c_uint,
+ sm_version: c_uint,
+ bit_width: c_uint,
+ unknown3: c_uint,
+ unknown4: c_ulong,
+ unknown5: c_ulong,
+ uncompressed_payload: c_ulong,
+}
+
+unsafe fn record_submodules_from_wrapped_fatbin(
+ module: *mut CUmodule,
+ fatbinc_wrapper: *const FatbincWrapper,
+ fn_logger: &mut log::FunctionLogger,
+ delayed_state: &mut GlobalDelayedState,
+ original_fn: impl FnOnce(&OriginalExports) -> CUresult,
+) -> CUresult {
+ let result = original_fn(&delayed_state.cuda_state.dark_api.original);
+ fn_logger.result = Some(result);
+ let magic = (*fatbinc_wrapper).magic;
+ if magic != FATBINC_MAGIC {
+ fn_logger.log(log::LogEntry::UnexpectedBinaryField {
+ field_name: "FATBINC_MAGIC",
+ expected: vec![UInt::U32(FATBINC_MAGIC)],
+ observed: UInt::U32(magic),
+ });
+ }
+ if (*fatbinc_wrapper).version != FATBINC_VERSION_V1
+ && (*fatbinc_wrapper).version != FATBINC_VERSION_V2
+ {
+ fn_logger.log(log::LogEntry::UnexpectedBinaryField {
+ field_name: "FATBINC_VERSION",
+ expected: vec![UInt::U32(FATBINC_VERSION_V1), UInt::U32(FATBINC_VERSION_V2)],
+ observed: UInt::U32(magic),
+ });
+ }
+ let is_version_2 = (*fatbinc_wrapper).version == FATBINC_VERSION_V2;
+ record_submodules_from_fatbin(
+ *module,
+ (*fatbinc_wrapper).data,
+ if is_version_2 { Some(1) } else { None },
+ fn_logger,
+ &mut delayed_state.cuda_state,
+ );
+ if is_version_2 {
+ let mut current = (*fatbinc_wrapper).filename_or_fatbins as *const *const c_void;
+ while *current != ptr::null() {
+ record_submodules_from_fatbin(
+ *module,
+ *current as *const _,
+ Some(2),
+ fn_logger,
+ &mut delayed_state.cuda_state,
+ );
+ current = current.add(1);
+ }
+ }
+ result
+}
+
+unsafe fn record_submodules_from_fatbin(
+ module: CUmodule,
+ fatbin_header: *const FatbinHeader,
+ fatbin_version: Option<usize>,
+ logger: &mut log::FunctionLogger,
+ state: &mut StateTracker,
+) {
+ let magic = (*fatbin_header).magic;
+ if magic != FATBIN_MAGIC {
+ logger.log(log::LogEntry::UnexpectedBinaryField {
+ field_name: "FATBIN_MAGIC",
+ expected: vec![UInt::U32(FATBIN_MAGIC)],
+ observed: UInt::U32(magic),
+ });
+ return;
+ }
+ let version = (*fatbin_header).version;
+ if version != FATBIN_VERSION {
+ logger.log(log::LogEntry::UnexpectedBinaryField {
+ field_name: "FATBIN_VERSION",
+ expected: vec![UInt::U16(FATBIN_VERSION)],
+ observed: UInt::U16(version),
+ });
+ return;
+ }
+ let file = (fatbin_header as *const u8).add((*fatbin_header).header_size as usize);
+ let end = file.add((*fatbin_header).files_size as usize);
+ record_submodules(
+ fatbin_version == Some(2),
+ module,
+ fatbin_version,
+ logger,
+ state,
+ file,
+ end,
+ );
+}
+
+unsafe extern "system" fn get_module_from_cubin(
+ module: *mut CUmodule,
+ fatbinc_wrapper: *const FatbincWrapper,
+) -> CUresult {
+ let global_state = &mut *super::GLOBAL_STATE.lock().unwrap();
+ let mut fn_logger = global_state
+ .log_factory
+ .get_logger_dark_api(CUDART_INTERFACE_GUID, 1);
+ let global_state = &mut *super::GLOBAL_STATE.lock().unwrap();
+ let delayed_state = global_state.delayed_state.unwrap_mut();
+ record_submodules_from_wrapped_fatbin(
+ module,
+ fatbinc_wrapper,
+ &mut fn_logger,
+ delayed_state,
+ |original_exports| {
+ original_exports.original_get_module_from_cubin.unwrap()(module, fatbinc_wrapper)
+ },
+ )
+}
+
+unsafe extern "system" fn get_module_from_cubin_ext1(
+ module: *mut CUmodule,
+ fatbinc_wrapper: *const FatbincWrapper,
+ ptr1: *mut c_void,
+ ptr2: *mut c_void,
+ _unknown: usize,
+) -> CUresult {
+ let global_state = &mut *super::GLOBAL_STATE.lock().unwrap();
+ let mut fn_logger = global_state
+ .log_factory
+ .get_logger_dark_api(CUDART_INTERFACE_GUID, 6);
+ if ptr1 != ptr::null_mut() {
+ fn_logger.log(log::LogEntry::UnexpectedArgument {
+ arg_name: stringify!(ptr1),
+ expected: vec![UInt::USize(0)],
+ observed: UInt::USize(ptr1 as usize),
+ });
+ }
+ if ptr2 != ptr::null_mut() {
+ fn_logger.log(log::LogEntry::UnexpectedArgument {
+ arg_name: stringify!(ptr2),
+ expected: vec![UInt::USize(0)],
+ observed: UInt::USize(ptr2 as usize),
+ });
+ }
+ if _unknown != 0 {
+ fn_logger.log(log::LogEntry::UnexpectedArgument {
+ arg_name: stringify!(_unknown),
+ expected: vec![UInt::USize(0)],
+ observed: UInt::USize(_unknown),
+ });
+ }
+ let delayed_state = global_state.delayed_state.unwrap_mut();
+ record_submodules_from_wrapped_fatbin(
+ module,
+ fatbinc_wrapper,
+ &mut fn_logger,
+ delayed_state,
+ |original_exports| {
+ original_exports
+ .original_get_module_from_cubin_ext1
+ .unwrap()(module, fatbinc_wrapper, ptr1, ptr2, _unknown)
+ },
+ )
+}
+
+unsafe extern "system" fn get_module_from_cubin_ext2(
+ fatbin_header: *const FatbinHeader,
+ module: *mut CUmodule,
+ ptr1: *mut c_void,
+ ptr2: *mut c_void,
+ _unknown: usize,
+) -> CUresult {
+ let global_state = &mut *super::GLOBAL_STATE.lock().unwrap();
+ let mut fn_logger = global_state
+ .log_factory
+ .get_logger_dark_api(CUDART_INTERFACE_GUID, 8);
+ if ptr1 != ptr::null_mut() {
+ fn_logger.log(log::LogEntry::UnexpectedArgument {
+ arg_name: stringify!(ptr1),
+ expected: vec![UInt::USize(0)],
+ observed: UInt::USize(ptr1 as usize),
+ });
+ }
+ if ptr2 != ptr::null_mut() {
+ fn_logger.log(log::LogEntry::UnexpectedArgument {
+ arg_name: stringify!(ptr2),
+ expected: vec![UInt::USize(0)],
+ observed: UInt::USize(ptr2 as usize),
+ });
+ }
+ if _unknown != 0 {
+ fn_logger.log(log::LogEntry::UnexpectedArgument {
+ arg_name: stringify!(_unknown),
+ expected: vec![UInt::USize(0)],
+ observed: UInt::USize(_unknown),
+ });
+ }
+ let delayed_state = global_state.delayed_state.unwrap_mut();
+ let result = delayed_state
+ .cuda_state
+ .dark_api
+ .original
+ .original_get_module_from_cubin_ext2
+ .unwrap()(fatbin_header, module, ptr1, ptr2, _unknown);
+ fn_logger.result = Some(result);
+ if result != CUresult::CUDA_SUCCESS {
+ return result;
+ }
+ record_submodules_from_fatbin(
+ *module,
+ fatbin_header,
+ None,
+ &mut fn_logger,
+ &mut delayed_state.cuda_state,
+ );
+ result
+}
+
+unsafe fn record_submodules(
+ should_decompress_elf: bool,
+ module: CUmodule,
+ version: Option<usize>,
+ fn_logger: &mut log::FunctionLogger,
+ state: &mut StateTracker,
+ start: *const u8,
+ end: *const u8,
+) {
+ let mut index = start;
+ while index < end {
+ let fatbin_file = index as *const FatbinFileHeader;
+ let fatbin_file_version = (*fatbin_file).version;
+ if fatbin_file_version != FATBIN_FILE_HEADER_VERSION_CURRENT {
+ fn_logger.log(log::LogEntry::UnexpectedBinaryField {
+ field_name: stringify!(fatbin_file_version),
+ expected: vec![UInt::U16(FATBIN_FILE_HEADER_VERSION_CURRENT)],
+ observed: UInt::U16(fatbin_file_version),
+ });
+ }
+ let fatbin_file_kind = (*fatbin_file).kind;
+ if fatbin_file_kind == FATBIN_FILE_HEADER_KIND_PTX {
+ let decompressed = decompress_kernel_module(fatbin_file);
+ match decompressed {
+ Some(decompressed) => {
+ state.record_new_submodule(module, version, &*decompressed, fn_logger, "ptx")
+ }
+ None => fn_logger.log(log::LogEntry::Lz4DecompressionFailure),
+ }
+ } else if fatbin_file_kind == FATBIN_FILE_HEADER_KIND_ELF {
+ let source_buffer = if should_decompress_elf {
+ let decompressed = decompress_kernel_module(fatbin_file);
+ match decompressed {
+ Some(decompressed) => Cow::Owned(decompressed),
+ None => {
+ fn_logger.log(log::LogEntry::Lz4DecompressionFailure);
+ continue;
+ }
+ }
+ } else {
+ Cow::Borrowed(slice::from_raw_parts(
+ (fatbin_file as *const u8).add((*fatbin_file).header_size as usize),
+ (*fatbin_file).padded_payload_size as usize,
+ ))
+ };
+ state.record_new_submodule(module, version, &*source_buffer, fn_logger, "elf")
+ } else {
+ fn_logger.log(log::LogEntry::UnexpectedBinaryField {
+ field_name: stringify!(fatbin_file_kind),
+ expected: vec![
+ UInt::U16(FATBIN_FILE_HEADER_KIND_PTX),
+ UInt::U16(FATBIN_FILE_HEADER_KIND_ELF),
+ ],
+ observed: UInt::U16(fatbin_file_kind),
+ });
+ }
+ index = index
+ .add((*fatbin_file).header_size as usize + (*fatbin_file).padded_payload_size as usize);
+ }
+}
+
+const MAX_MODULE_DECOMPRESSION_BOUND: usize = 64 * 1024 * 1024;
+
+unsafe fn decompress_kernel_module(file: *const FatbinFileHeader) -> Option<Vec<u8>> {
+ let decompressed_size = usize::max(1024, (*file).uncompressed_payload as usize);
+ let mut decompressed_vec = vec![0u8; decompressed_size];
+ loop {
+ match lz4_sys::LZ4_decompress_safe(
+ (file as *const u8).add((*file).header_size as usize) as *const _,
+ decompressed_vec.as_mut_ptr() as *mut _,
+ (*file).payload_size as c_int,
+ decompressed_vec.len() as c_int,
+ ) {
+ error if error < 0 => {
+ let new_size = decompressed_vec.len() * 2;
+ if new_size > MAX_MODULE_DECOMPRESSION_BOUND {
+ return None;
+ }
+ decompressed_vec.resize(decompressed_vec.len() * 2, 0);
+ }
+ real_decompressed_size => {
+ decompressed_vec.truncate(real_decompressed_size as usize);
+ return Some(decompressed_vec);
+ }
+ }
+ }
+}
diff --git a/zluda_dump/src/lib.rs b/zluda_dump/src/lib.rs
index 6a7545c..d96e8c7 100644
--- a/zluda_dump/src/lib.rs
+++ b/zluda_dump/src/lib.rs
@@ -80,6 +80,7 @@ macro_rules! extern_redirect_with {
#[allow(warnings)]
mod cuda;
+mod dark_api;
mod log;
#[cfg_attr(windows, path = "os_win.rs")]
#[cfg_attr(not(windows), path = "os_unix.rs")]
@@ -132,9 +133,16 @@ enum LateInit<T> {
impl<T> LateInit<T> {
fn as_mut(&mut self) -> Option<&mut T> {
match self {
- LateInit::Success(t) => Some(t),
- LateInit::Unitialized => None,
- LateInit::Error => None,
+ Self::Success(t) => Some(t),
+ Self::Unitialized => None,
+ Self::Error => None,
+ }
+ }
+
+ pub(crate) fn unwrap_mut(&mut self) -> &mut T {
+ match self {
+ Self::Success(t) => t,
+ Self::Unitialized | Self::Error => panic!(),
}
}
}
@@ -180,7 +188,11 @@ impl Settings {
fn read_and_init(logger: &mut log::FunctionLogger) -> Self {
let maybe_dump_dir = Self::read_and_init_dump_dir();
let dump_dir = match maybe_dump_dir {
- Ok(d) => d,
+ Ok(Some(dir)) => {
+ logger.log(log::LogEntry::CreatedDumpDirectory(dir.clone()));
+ Some(dir)
+ },
+ Ok(None) => None,
Err(err) => {
logger.log(log::LogEntry::ErrorBox(err));
None
@@ -212,8 +224,16 @@ impl Settings {
fn create_dump_directory(dir: String) -> io::Result<PathBuf> {
let mut main_dir = PathBuf::from(dir);
let current_exe = env::current_exe()?;
- main_dir.push(current_exe.file_name().unwrap());
- fs::create_dir_all(&main_dir)?;
+ let file_name_base = current_exe.file_name().unwrap().to_string_lossy();
+ main_dir.push(&*file_name_base);
+ let mut suffix = 1;
+ // This can get into infinite loop. Unfortunately try_exists is unstable:
+ // https://doc.rust-lang.org/std/path/struct.Path.html#method.try_exists
+ while main_dir.exists() {
+ main_dir.set_file_name(format!("{}_{}", file_name_base, suffix));
+ suffix += 1;
+ }
+ fs::create_dir_all(&*main_dir)?;
Ok(main_dir)
}
}
@@ -405,6 +425,20 @@ pub(crate) fn cuModuleLoadData_Post(
state.record_new_module(unsafe { *module }, raw_image, fn_logger)
}
+#[allow(non_snake_case)]
+pub(crate) fn cuModuleLoadDataEx_Post(
+ module: *mut CUmodule,
+ raw_image: *const ::std::os::raw::c_void,
+ _numOptions: ::std::os::raw::c_uint,
+ _options: *mut CUjit_option,
+ _optionValues: *mut *mut ::std::os::raw::c_void,
+ fn_logger: &mut log::FunctionLogger,
+ state: &mut trace::StateTracker,
+ result: CUresult,
+) {
+ cuModuleLoadData_Post(module, raw_image, fn_logger, state, result)
+}
+
unsafe fn record_module_image_raw(module: CUmodule, raw_image: *const ::std::os::raw::c_void) {
if *(raw_image as *const u32) == 0x464c457f {
os_log!("Unsupported ELF module image: {:?}", raw_image);
@@ -1410,3 +1444,31 @@ pub unsafe fn cuLinkAddFile(
) -> CUresult {
cont(state, type_, path, numOptions, options, optionValues)
}
+
+#[allow(non_snake_case)]
+pub(crate) fn cuGetExportTable_Post(
+ ppExportTable: *mut *const ::std::os::raw::c_void,
+ pExportTableId: *const CUuuid,
+ _fn_logger: &mut log::FunctionLogger,
+ state: &mut trace::StateTracker,
+ result: CUresult,
+) {
+ if result != CUresult::CUDA_SUCCESS {
+ return;
+ }
+ dark_api::override_export_table(ppExportTable, pExportTableId, state)
+}
+
+#[allow(non_snake_case)]
+pub(crate) fn cuModuleGetFunction_Post(
+ hfunc: *mut CUfunction,
+ hmod: CUmodule,
+ name: *const ::std::os::raw::c_char,
+ fn_logger: &mut log::FunctionLogger,
+ state: &mut trace::StateTracker,
+ result: CUresult,
+) {
+ if !state.module_exists(hmod) {
+ fn_logger.log(log::LogEntry::UnknownModule(hmod))
+ }
+}
diff --git a/zluda_dump/src/log.rs b/zluda_dump/src/log.rs
index 4ffc459..474912f 100644
--- a/zluda_dump/src/log.rs
+++ b/zluda_dump/src/log.rs
@@ -6,11 +6,14 @@ use super::Settings;
use std::borrow::Cow;
use std::error::Error;
use std::ffi::c_void;
+use std::ffi::FromBytesWithNulError;
+use std::ffi::NulError;
use std::fmt::Display;
use std::fs::File;
use std::io;
use std::io::Stderr;
use std::io::Write;
+use std::path::PathBuf;
use std::str::Utf8Error;
const LOG_PREFIX: &[u8] = b"[ZLUDA_DUMP] ";
@@ -253,7 +256,7 @@ impl<'a> FunctionLogger<'a> {
if let Some(result) = self.result {
write!(self.write_buffer, "{:#X}", result.0).unwrap_or_else(|_| unreachable!());
} else {
- self.write_buffer.write("(INTERNAL ERROR)");
+ self.write_buffer.write("(UNKNOWN)");
};
self.write_buffer.end_line();
for entry in self.log_queue.iter() {
@@ -292,6 +295,7 @@ impl<'a> Drop for FunctionLogger<'a> {
// Structured log type. We don't want frontend to care about log formatting
pub(crate) enum LogEntry {
IoError(io::Error),
+ CreatedDumpDirectory(PathBuf),
ErrorBox(Box<dyn Error>),
UnsupportedModule {
module: CUmodule,
@@ -299,14 +303,35 @@ pub(crate) enum LogEntry {
kind: &'static str,
},
MalformedModulePath(Utf8Error),
- MalformedModuleText(Utf8Error),
- ModuleParsingError(usize),
+ NonUtf8ModuleText(Utf8Error),
+ NulInsideModuleText(NulError),
+ ModuleParsingError(String),
+ Lz4DecompressionFailure,
+ UnknownExportTableFn,
+ UnknownModule(CUmodule),
+ UnexpectedArgument {
+ arg_name: &'static str,
+ expected: Vec<UInt>,
+ observed: UInt,
+ },
+ UnexpectedBinaryField {
+ field_name: &'static str,
+ expected: Vec<UInt>,
+ observed: UInt,
+ },
}
impl Display for LogEntry {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
LogEntry::IoError(e) => e.fmt(f),
+ LogEntry::CreatedDumpDirectory(dir) => {
+ write!(
+ f,
+ "Created dump directory {} ",
+ dir.as_os_str().to_string_lossy()
+ )
+ }
LogEntry::ErrorBox(e) => e.fmt(f),
LogEntry::UnsupportedModule {
module,
@@ -320,14 +345,55 @@ impl Display for LogEntry {
)
}
LogEntry::MalformedModulePath(e) => e.fmt(f),
- LogEntry::MalformedModuleText(e) => e.fmt(f),
- LogEntry::ModuleParsingError(index) => {
+ LogEntry::NonUtf8ModuleText(e) => e.fmt(f),
+ LogEntry::ModuleParsingError(file_name) => {
write!(
f,
- "Error parsing module, log has been written to module_{:04}.log",
- index
+ "Error parsing module, log has been written to {}",
+ file_name
)
}
+ LogEntry::NulInsideModuleText(e) => e.fmt(f),
+ LogEntry::Lz4DecompressionFailure => write!(f, "LZ4 decompression failure"),
+ LogEntry::UnknownExportTableFn => write!(f, "Unknown export table function"),
+ LogEntry::UnknownModule(hmod) => write!(f, "Unknown module {:?}", hmod),
+ LogEntry::UnexpectedBinaryField {
+ field_name,
+ expected,
+ observed,
+ } => write!(
+ f,
+ "Unexected field {}. Expected: [{}], observed: {}",
+ field_name,
+ expected
+ .iter()
+ .map(|x| x.to_string())
+ .collect::<Vec<_>>()
+ .join(", "),
+ observed
+ ),
+ LogEntry::UnexpectedArgument {
+ arg_name,
+ expected,
+ observed,
+ } => write!(f, "Unexected argument"),
+ }
+ }
+}
+
+#[derive(Clone, Copy)]
+pub(crate) enum UInt {
+ U16(u16),
+ U32(u32),
+ USize(usize),
+}
+
+impl Display for UInt {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ match self {
+ UInt::U16(x) => write!(f, "{:#x}", x),
+ UInt::U32(x) => write!(f, "{:#x}", x),
+ UInt::USize(x) => write!(f, "{:#x}", x),
}
}
}
diff --git a/zluda_dump/src/trace.rs b/zluda_dump/src/trace.rs
index f060467..f93c360 100644
--- a/zluda_dump/src/trace.rs
+++ b/zluda_dump/src/trace.rs
@@ -1,9 +1,9 @@
use ptx::{ast::PtxError, Token};
-use crate::{cuda::CUmodule, log, Settings};
+use crate::{cuda::CUmodule, dark_api, log, Settings};
use std::{
collections::HashMap,
- ffi::{c_void, CStr},
+ ffi::{c_void, CStr, CString},
fs::{self, File},
io::{self, Read, Write},
path::PathBuf,
@@ -18,6 +18,9 @@ pub(crate) struct StateTracker {
writer: DumpWriter,
modules: HashMap<CUmodule, Option<ParsedModule>>,
module_counter: usize,
+ submodule_counter: usize,
+ last_module_version: Option<usize>,
+ pub(crate) dark_api: dark_api::DarkApiState,
}
impl StateTracker {
@@ -26,6 +29,9 @@ impl StateTracker {
writer: DumpWriter::new(settings.dump_dir.clone()),
modules: HashMap::new(),
module_counter: 0,
+ submodule_counter: 0,
+ last_module_version: None,
+ dark_api: dark_api::DarkApiState::new(),
}
}
@@ -59,6 +65,48 @@ impl StateTracker {
Ok(())
}
+ pub(crate) fn record_new_submodule(
+ &mut self,
+ module: CUmodule,
+ version: Option<usize>,
+ submodule: &[u8],
+ fn_logger: &mut log::FunctionLogger,
+ type_: &'static str,
+ ) {
+ if !self.modules.contains_key(&module) {
+ self.module_counter += 1;
+ self.submodule_counter = 0;
+ self.modules.insert(module, None);
+ }
+ if version != self.last_module_version {
+ self.submodule_counter = 0;
+ }
+ self.submodule_counter += 1;
+ self.last_module_version = version;
+ fn_logger.log_io_error(self.writer.save_module(
+ self.module_counter,
+ version,
+ Some(self.submodule_counter),
+ submodule,
+ type_,
+ ));
+ if type_ == "ptx" {
+ match CString::new(submodule) {
+ Err(e) => fn_logger.log(log::LogEntry::NulInsideModuleText(e)),
+ Ok(submodule_cstring) => match submodule_cstring.to_str() {
+ Err(e) => fn_logger.log(log::LogEntry::NonUtf8ModuleText(e)),
+ Ok(submodule_text) => self.try_parse_and_record_kernels(
+ fn_logger,
+ self.module_counter,
+ version,
+ Some(self.submodule_counter),
+ submodule_text,
+ ),
+ },
+ }
+ }
+ }
+
pub(crate) fn record_new_module(
&mut self,
module: CUmodule,
@@ -93,30 +141,55 @@ impl StateTracker {
raw_image: *const c_void,
fn_logger: &mut log::FunctionLogger,
) {
+ self.modules.insert(module, None);
let module_text = unsafe { CStr::from_ptr(raw_image as *const _) }.to_str();
let module_text = match module_text {
Ok(m) => m,
Err(utf8_err) => {
- fn_logger.log(log::LogEntry::MalformedModuleText(utf8_err));
+ fn_logger.log(log::LogEntry::NonUtf8ModuleText(utf8_err));
return;
}
};
- fn_logger.log_io_error(self.writer.save_module(self.module_counter, module_text));
+ fn_logger.log_io_error(self.writer.save_module(
+ self.module_counter,
+ None,
+ None,
+ module_text.as_bytes(),
+ "ptx",
+ ));
+ self.try_parse_and_record_kernels(fn_logger, self.module_counter, None, None, module_text);
+ }
+
+ fn try_parse_and_record_kernels(
+ &mut self,
+ fn_logger: &mut log::FunctionLogger,
+ module_index: usize,
+ version: Option<usize>,
+ submodule_index: Option<usize>,
+ module_text: &str,
+ ) {
let mut errors = Vec::new();
let ast = ptx::ModuleParser::new().parse(&mut errors, module_text);
let ast = match (&*errors, ast) {
(&[], Ok(ast)) => ast,
(err_vec, res) => {
- fn_logger.log(log::LogEntry::ModuleParsingError(self.module_counter));
+ fn_logger.log(log::LogEntry::ModuleParsingError(
+ DumpWriter::get_file_name(module_index, version, submodule_index, "log"),
+ ));
fn_logger.log_io_error(self.writer.save_module_error_log(
- self.module_counter,
+ module_index,
+ version,
+ submodule_index,
err_vec,
res.err(),
));
return;
}
};
- // TODO: store kernel names and details
+ }
+
+ pub(crate) fn module_exists(&self, hmod: CUmodule) -> bool {
+ self.modules.contains_key(&hmod)
}
}
@@ -135,20 +208,34 @@ impl DumpWriter {
Self { dump_dir }
}
- fn save_module(&self, index: usize, text: &str) -> io::Result<()> {
+ fn save_module(
+ &self,
+ module_index: usize,
+ version: Option<usize>,
+ submodule_index: Option<usize>,
+ buffer: &[u8],
+ kind: &'static str,
+ ) -> io::Result<()> {
let mut dump_file = match &self.dump_dir {
None => return Ok(()),
Some(d) => d.clone(),
};
- dump_file.push(format!("module_{:04}.ptx", index));
+ dump_file.push(Self::get_file_name(
+ module_index,
+ version,
+ submodule_index,
+ kind,
+ ));
let mut file = File::create(dump_file)?;
- file.write_all(text.as_bytes())?;
+ file.write_all(buffer)?;
Ok(())
}
fn save_module_error_log<'input>(
&self,
- index: usize,
+ module_index: usize,
+ version: Option<usize>,
+ submodule_index: Option<usize>,
recoverable: &[ptx::ParseError<usize, Token<'input>, PtxError>],
unrecoverable: Option<ptx::ParseError<usize, Token<'input>, PtxError>>,
) -> io::Result<()> {
@@ -156,11 +243,37 @@ impl DumpWriter {
None => return Ok(()),
Some(d) => d.clone(),
};
- log_file.push(format!("module_{:04}.log", index));
+ log_file.push(Self::get_file_name(
+ module_index,
+ version,
+ submodule_index,
+ "log",
+ ));
let mut file = File::create(log_file)?;
- for err in unrecoverable.iter().chain(recoverable.iter()) {
- writeln!(file, "{}", err)?;
+ for error in unrecoverable.iter().chain(recoverable.iter()) {
+ writeln!(file, "{}", error)?;
}
Ok(())
}
+
+ fn get_file_name(
+ module_index: usize,
+ version: Option<usize>,
+ submodule_index: Option<usize>,
+ kind: &str,
+ ) -> String {
+ match (version, submodule_index) {
+ (Some(version), Some(submodule_index)) => format!(
+ "module_{:04}_v{}_{}.{}",
+ module_index, version, submodule_index, kind
+ ),
+ (Some(version), None) => {
+ format!("module_{:04}_v{}.{}", module_index, version, kind)
+ }
+ (None, Some(submodule_index)) => {
+ format!("module_{:04}_{}.{}", module_index, submodule_index, kind)
+ }
+ (None, None) => format!("module_{:04}.{}", module_index, kind),
+ }
+ }
}