diff options
Diffstat (limited to 'zluda_dump/src')
-rw-r--r-- | zluda_dump/src/cuda.rs | 3 | ||||
-rw-r--r-- | zluda_dump/src/format.rs | 361 | ||||
-rw-r--r-- | zluda_dump/src/lib.rs | 45 | ||||
-rw-r--r-- | zluda_dump/src/log.rs | 65 |
4 files changed, 449 insertions, 25 deletions
diff --git a/zluda_dump/src/cuda.rs b/zluda_dump/src/cuda.rs index d9d57c4..3836137 100644 --- a/zluda_dump/src/cuda.rs +++ b/zluda_dump/src/cuda.rs @@ -2513,11 +2513,12 @@ extern_redirect_with_post! { ) -> CUresult; super::cuModuleLoadDataEx_Post; } -extern_redirect! { +extern_redirect_with_post! { pub fn cuModuleLoadFatBinary( module: *mut CUmodule, fatCubin: *const ::std::os::raw::c_void, ) -> CUresult; + super::cuModuleLoadFatBinary_Post; } extern_redirect! { pub fn cuModuleUnload(hmod: CUmodule) -> CUresult; diff --git a/zluda_dump/src/format.rs b/zluda_dump/src/format.rs new file mode 100644 index 0000000..df6d4f6 --- /dev/null +++ b/zluda_dump/src/format.rs @@ -0,0 +1,361 @@ +use std::{
+ ffi::{c_void, CStr},
+ fmt::Formatter,
+ io::Write,
+ ptr,
+};
+
+use crate::cuda::*;
+
+pub(crate) trait FormatCudaObject {
+ fn write_post_execution(self, result: CUresult, f: &mut impl Write);
+}
+
+fn write_post_execution_ptr<T: FormatCudaObject + Copy>(
+ t: *const T,
+ result: CUresult,
+ f: &mut impl Write,
+) {
+ if t == ptr::null() {
+ write!(f, "NULL").ok();
+ } else if result != CUresult::CUDA_SUCCESS {
+ write!(f, "NONE").ok();
+ } else {
+ unsafe { *t }.write_post_execution(result, f)
+ }
+}
+
+impl<T: FormatCudaObject + Copy> FormatCudaObject for *mut T {
+ fn write_post_execution(self, result: CUresult, f: &mut impl Write) {
+ write_post_execution_ptr(self, result, f)
+ }
+}
+
+impl<T: FormatCudaObject + Copy> FormatCudaObject for *const T {
+ fn write_post_execution(self, result: CUresult, f: &mut impl Write) {
+ write_post_execution_ptr(self, result, f)
+ }
+}
+
+impl FormatCudaObject for CUmodule {
+ fn write_post_execution(self, result: CUresult, f: &mut impl Write) {
+ write!(f, "{:p}", self).ok();
+ }
+}
+
+impl FormatCudaObject for CUfunction {
+ fn write_post_execution(self, result: CUresult, f: &mut impl Write) {
+ write!(f, "{:p}", self).ok();
+ }
+}
+
+impl FormatCudaObject for *mut c_void {
+ fn write_post_execution(self, result: CUresult, f: &mut impl Write) {
+ write!(f, "{:p}", self).ok();
+ }
+}
+
+impl FormatCudaObject for *const c_void {
+ fn write_post_execution(self, result: CUresult, f: &mut impl Write) {
+ write!(f, "{:p}", self).ok();
+ }
+}
+
+impl FormatCudaObject for *const i8 {
+ fn write_post_execution(self, result: CUresult, f: &mut impl Write) {
+ write!(f, "\"{}\"", unsafe { CStr::from_ptr(self) }.to_str().unwrap()).ok();
+ }
+}
+
+impl FormatCudaObject for u32 {
+ fn write_post_execution(self, result: CUresult, f: &mut impl Write) {
+ write!(f, "{}", self).ok();
+ }
+}
+
+impl FormatCudaObject for i32 {
+ fn write_post_execution(self, result: CUresult, f: &mut impl Write) {
+ write!(f, "{}", self).ok();
+ }
+}
+
+impl FormatCudaObject for CUdevice {
+ fn write_post_execution(self, result: CUresult, f: &mut impl Write) {
+ write!(f, "{}", self.0).ok();
+ }
+}
+
+impl FormatCudaObject for CUjit_option {
+ fn write_post_execution(self, result: CUresult, f: &mut impl Write) {
+ match stringify_cujit_option(self) {
+ Some(text) => write!(f, "{}", text),
+ None => write!(f, "{}", self.0),
+ };
+ }
+}
+
+impl FormatCudaObject for CUuuid {
+ fn write_post_execution(self, result: CUresult, f: &mut impl Write) {
+ let guid = self.bytes;
+ write!(f, "{{{:02X}{:02X}{:02X}{:02X}-{:02X}{:02X}-{:02X}{:02X}-{:02X}{:02X}-{:02X}{:02X}{:02X}{:02X}{:02X}{:02X}}}", guid[0], guid[1], guid[2], guid[3], guid[4], guid[5], guid[6], guid[7], guid[8], guid[9], guid[10], guid[11], guid[12], guid[13], guid[14], guid[15]).ok();
+ }
+}
+
+impl FormatCudaObject for CUdevice_attribute {
+ fn write_post_execution(self, result: CUresult, f: &mut impl Write) {
+ match stringify_cudevice_attribute(self) {
+ Some(text) => write!(f, "{}", text),
+ None => write!(f, "{}", self.0),
+ }
+ .ok();
+ }
+}
+
+macro_rules! stringify_enum {
+ ($fn_name:ident, $type_:ident, [ $($variant:ident),+ ]) => {
+ pub(crate) fn $fn_name(x: $type_) -> Option<&'static str> {
+ match x {
+ $(
+ $type_::$variant => Some(stringify!($variant)),
+ )+
+ _ => None
+ }
+ }
+ }
+}
+
+stringify_enum! {
+ stringify_cudevice_attribute,
+ CUdevice_attribute_enum,
+ [
+ CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK,
+ CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X,
+ CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y,
+ CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z,
+ CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X,
+ CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y,
+ CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z,
+ CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK,
+ CU_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK,
+ CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY,
+ CU_DEVICE_ATTRIBUTE_WARP_SIZE,
+ CU_DEVICE_ATTRIBUTE_MAX_PITCH,
+ CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK,
+ CU_DEVICE_ATTRIBUTE_REGISTERS_PER_BLOCK,
+ CU_DEVICE_ATTRIBUTE_CLOCK_RATE,
+ CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT,
+ CU_DEVICE_ATTRIBUTE_GPU_OVERLAP,
+ CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
+ CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT,
+ CU_DEVICE_ATTRIBUTE_INTEGRATED,
+ CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY,
+ CU_DEVICE_ATTRIBUTE_COMPUTE_MODE,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_WIDTH,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_HEIGHT,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_NUMSLICES,
+ CU_DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT,
+ CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS,
+ CU_DEVICE_ATTRIBUTE_ECC_ENABLED,
+ CU_DEVICE_ATTRIBUTE_PCI_BUS_ID,
+ CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID,
+ CU_DEVICE_ATTRIBUTE_TCC_DRIVER,
+ CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE,
+ CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH,
+ CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE,
+ CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR,
+ CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT,
+ CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS,
+ CU_DEVICE_ATTRIBUTE_CAN_TEX2D_GATHER,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_WIDTH,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_HEIGHT,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE,
+ CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID,
+ CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_WIDTH,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_WIDTH,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_LAYERS,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_WIDTH,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_HEIGHT,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_LAYERS,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_WIDTH,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT,
+ CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR,
+ CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH,
+ CU_DEVICE_ATTRIBUTE_STREAM_PRIORITIES_SUPPORTED,
+ CU_DEVICE_ATTRIBUTE_GLOBAL_L1_CACHE_SUPPORTED,
+ CU_DEVICE_ATTRIBUTE_LOCAL_L1_CACHE_SUPPORTED,
+ CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR,
+ CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR,
+ CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY,
+ CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD,
+ CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD_GROUP_ID,
+ CU_DEVICE_ATTRIBUTE_HOST_NATIVE_ATOMIC_SUPPORTED,
+ CU_DEVICE_ATTRIBUTE_SINGLE_TO_DOUBLE_PRECISION_PERF_RATIO,
+ CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS,
+ CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS,
+ CU_DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED,
+ CU_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM,
+ CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_MEM_OPS,
+ CU_DEVICE_ATTRIBUTE_CAN_USE_64_BIT_STREAM_MEM_OPS,
+ CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR,
+ CU_DEVICE_ATTRIBUTE_COOPERATIVE_LAUNCH,
+ CU_DEVICE_ATTRIBUTE_COOPERATIVE_MULTI_DEVICE_LAUNCH,
+ CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN,
+ CU_DEVICE_ATTRIBUTE_CAN_FLUSH_REMOTE_WRITES,
+ CU_DEVICE_ATTRIBUTE_HOST_REGISTER_SUPPORTED,
+ CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES,
+ CU_DEVICE_ATTRIBUTE_DIRECT_MANAGED_MEM_ACCESS_FROM_HOST,
+ CU_DEVICE_ATTRIBUTE_VIRTUAL_ADDRESS_MANAGEMENT_SUPPORTED,
+ CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR_SUPPORTED,
+ CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_HANDLE_SUPPORTED,
+ CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_KMT_HANDLE_SUPPORTED,
+ CU_DEVICE_ATTRIBUTE_MAX_BLOCKS_PER_MULTIPROCESSOR,
+ CU_DEVICE_ATTRIBUTE_GENERIC_COMPRESSION_SUPPORTED,
+ CU_DEVICE_ATTRIBUTE_MAX_PERSISTING_L2_CACHE_SIZE,
+ CU_DEVICE_ATTRIBUTE_MAX_ACCESS_POLICY_WINDOW_SIZE,
+ CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WITH_CUDA_VMM_SUPPORTED,
+ CU_DEVICE_ATTRIBUTE_RESERVED_SHARED_MEMORY_PER_BLOCK,
+ CU_DEVICE_ATTRIBUTE_SPARSE_CUDA_ARRAY_SUPPORTED,
+ CU_DEVICE_ATTRIBUTE_READ_ONLY_HOST_REGISTER_SUPPORTED
+ ]
+}
+
+stringify_enum! {
+ stringify_cujit_option,
+ CUjit_option,
+ [
+ CU_JIT_MAX_REGISTERS,
+ CU_JIT_THREADS_PER_BLOCK,
+ CU_JIT_WALL_TIME,
+ CU_JIT_INFO_LOG_BUFFER,
+ CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES,
+ CU_JIT_ERROR_LOG_BUFFER,
+ CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES,
+ CU_JIT_OPTIMIZATION_LEVEL,
+ CU_JIT_TARGET_FROM_CUCONTEXT,
+ CU_JIT_TARGET,
+ CU_JIT_FALLBACK_STRATEGY,
+ CU_JIT_GENERATE_DEBUG_INFO,
+ CU_JIT_LOG_VERBOSE,
+ CU_JIT_GENERATE_LINE_INFO,
+ CU_JIT_CACHE_MODE,
+ CU_JIT_NEW_SM3X_OPT,
+ CU_JIT_FAST_COMPILE,
+ CU_JIT_GLOBAL_SYMBOL_NAMES,
+ CU_JIT_GLOBAL_SYMBOL_ADDRESSES,
+ CU_JIT_GLOBAL_SYMBOL_COUNT,
+ CU_JIT_NUM_OPTIONS
+ ]
+}
+
+stringify_enum! {
+ stringify_curesult,
+ CUresult,
+ [
+ CUDA_SUCCESS,
+ CUDA_ERROR_INVALID_VALUE,
+ CUDA_ERROR_OUT_OF_MEMORY,
+ CUDA_ERROR_NOT_INITIALIZED,
+ CUDA_ERROR_DEINITIALIZED,
+ CUDA_ERROR_PROFILER_DISABLED,
+ CUDA_ERROR_PROFILER_NOT_INITIALIZED,
+ CUDA_ERROR_PROFILER_ALREADY_STARTED,
+ CUDA_ERROR_PROFILER_ALREADY_STOPPED,
+ CUDA_ERROR_NO_DEVICE,
+ CUDA_ERROR_INVALID_DEVICE,
+ CUDA_ERROR_INVALID_IMAGE,
+ CUDA_ERROR_INVALID_CONTEXT,
+ CUDA_ERROR_CONTEXT_ALREADY_CURRENT,
+ CUDA_ERROR_MAP_FAILED,
+ CUDA_ERROR_UNMAP_FAILED,
+ CUDA_ERROR_ARRAY_IS_MAPPED,
+ CUDA_ERROR_ALREADY_MAPPED,
+ CUDA_ERROR_NO_BINARY_FOR_GPU,
+ CUDA_ERROR_ALREADY_ACQUIRED,
+ CUDA_ERROR_NOT_MAPPED,
+ CUDA_ERROR_NOT_MAPPED_AS_ARRAY,
+ CUDA_ERROR_NOT_MAPPED_AS_POINTER,
+ CUDA_ERROR_ECC_UNCORRECTABLE,
+ CUDA_ERROR_UNSUPPORTED_LIMIT,
+ CUDA_ERROR_CONTEXT_ALREADY_IN_USE,
+ CUDA_ERROR_PEER_ACCESS_UNSUPPORTED,
+ CUDA_ERROR_INVALID_PTX,
+ CUDA_ERROR_INVALID_GRAPHICS_CONTEXT,
+ CUDA_ERROR_NVLINK_UNCORRECTABLE,
+ CUDA_ERROR_JIT_COMPILER_NOT_FOUND,
+ CUDA_ERROR_INVALID_SOURCE,
+ CUDA_ERROR_FILE_NOT_FOUND,
+ CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND,
+ CUDA_ERROR_SHARED_OBJECT_INIT_FAILED,
+ CUDA_ERROR_OPERATING_SYSTEM,
+ CUDA_ERROR_INVALID_HANDLE,
+ CUDA_ERROR_ILLEGAL_STATE,
+ CUDA_ERROR_NOT_FOUND,
+ CUDA_ERROR_NOT_READY,
+ CUDA_ERROR_ILLEGAL_ADDRESS,
+ CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES,
+ CUDA_ERROR_LAUNCH_TIMEOUT,
+ CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING,
+ CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED,
+ CUDA_ERROR_PEER_ACCESS_NOT_ENABLED,
+ CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE,
+ CUDA_ERROR_CONTEXT_IS_DESTROYED,
+ CUDA_ERROR_ASSERT,
+ CUDA_ERROR_TOO_MANY_PEERS,
+ CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED,
+ CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED,
+ CUDA_ERROR_HARDWARE_STACK_ERROR,
+ CUDA_ERROR_ILLEGAL_INSTRUCTION,
+ CUDA_ERROR_MISALIGNED_ADDRESS,
+ CUDA_ERROR_INVALID_ADDRESS_SPACE,
+ CUDA_ERROR_INVALID_PC,
+ CUDA_ERROR_LAUNCH_FAILED,
+ CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE,
+ CUDA_ERROR_NOT_PERMITTED,
+ CUDA_ERROR_NOT_SUPPORTED,
+ CUDA_ERROR_SYSTEM_NOT_READY,
+ CUDA_ERROR_SYSTEM_DRIVER_MISMATCH,
+ CUDA_ERROR_COMPAT_NOT_SUPPORTED_ON_DEVICE,
+ CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED,
+ CUDA_ERROR_STREAM_CAPTURE_INVALIDATED,
+ CUDA_ERROR_STREAM_CAPTURE_MERGE,
+ CUDA_ERROR_STREAM_CAPTURE_UNMATCHED,
+ CUDA_ERROR_STREAM_CAPTURE_UNJOINED,
+ CUDA_ERROR_STREAM_CAPTURE_ISOLATION,
+ CUDA_ERROR_STREAM_CAPTURE_IMPLICIT,
+ CUDA_ERROR_CAPTURED_EVENT,
+ CUDA_ERROR_STREAM_CAPTURE_WRONG_THREAD,
+ CUDA_ERROR_TIMEOUT,
+ CUDA_ERROR_GRAPH_EXEC_UPDATE_FAILURE,
+ CUDA_ERROR_UNKNOWN
+ ]
+}
diff --git a/zluda_dump/src/lib.rs b/zluda_dump/src/lib.rs index 780b9e8..1eb70e2 100644 --- a/zluda_dump/src/lib.rs +++ b/zluda_dump/src/lib.rs @@ -42,6 +42,11 @@ macro_rules! extern_redirect { }; } +macro_rules! count_tts { + () => {0usize}; + ($_head:tt $($tail:tt)*) => {1usize + count_tts!($($tail)*)}; +} + macro_rules! extern_redirect_with_post { ( pub fn $fn_name:ident ( $($arg_id:ident: $arg_type:ty),* $(,)? ) -> $ret_type:ty ; @@ -53,9 +58,18 @@ macro_rules! extern_redirect_with_post { let typed_fn = unsafe { std::mem::transmute::<_, extern "system" fn( $( $arg_id : $arg_type),* ) -> $ret_type>(fn_ptr) }; typed_fn($( $arg_id ),*) }; + let get_formatted_args = |fn_logger: &mut crate::log::FunctionLogger, result: CUresult| { + let arg_count = (count_tts!($($arg_id),*) + 1) / 2; + fn_logger.begin_writing_arguments(arg_count); + $( + fn_logger.write_single_argument(result, $arg_id); + )* + fn_logger.end_writing_arguments(); + }; crate::handle_cuda_function_call_with_probes( stringify!($fn_name), || (), original_fn, + get_formatted_args, move |logger, state, _, cuda_result| $post_fn ( $( $arg_id ),* , logger, state, cuda_result ) ) } @@ -81,6 +95,7 @@ macro_rules! extern_redirect_with { #[allow(warnings)] mod cuda; mod dark_api; +mod format; mod log; #[cfg_attr(windows, path = "os_win.rs")] #[cfg_attr(not(windows), path = "os_unix.rs")] @@ -294,6 +309,7 @@ fn handle_cuda_function_call_with_probes<T, PostFn>( func: &'static str, pre_probe: impl FnOnce() -> T, original_cuda_fn: impl FnOnce(NonNull<c_void>) -> CUresult, + print_arguments_fn: impl FnOnce(&mut crate::log::FunctionLogger, CUresult), post_probe: PostFn, ) -> CUresult where @@ -325,6 +341,7 @@ where let pre_result = pre_probe(); let cu_result = original_cuda_fn(fn_ptr); logger.result = Some(cu_result); + print_arguments_fn(&mut logger, cu_result); post_probe( &mut logger, &mut delayed_state.cuda_state, @@ -1220,6 +1237,7 @@ struct FatbincWrapper { } const FATBIN_MAGIC: c_uint = 0xBA55ED50; +const LEGACY_FATBIN_MAGIC: c_uint = 0x1EE55A01; const FATBIN_VERSION: c_ushort = 0x01; #[repr(C, align(8))] @@ -1484,16 +1502,6 @@ pub(crate) fn cuModuleGetFunction_Post( state: &mut trace::StateTracker, result: CUresult, ) { - if !state.module_exists(hmod) { - fn_logger.log(log::LogEntry::UnknownModule(hmod)) - } - match unsafe { CStr::from_ptr(name) }.to_str() { - Ok(str) => fn_logger.log(log::LogEntry::FunctionParameter { - name: "name", - value: str.to_string(), - }), - Err(e) => fn_logger.log(log::LogEntry::MalformedFunctionName(e)), - } } #[allow(non_snake_case)] @@ -1505,10 +1513,6 @@ pub(crate) fn cuDeviceGetAttribute_Post( state: &mut trace::StateTracker, result: CUresult, ) { - fn_logger.log(log::LogEntry::FunctionParameter { - name: "attrib", - value: attrib.0.to_string(), - }); } #[allow(non_snake_case)] @@ -1524,3 +1528,16 @@ pub(crate) fn cuDeviceComputeCapability_Post( unsafe { *major = major_ver_override as i32 }; } } + +#[allow(non_snake_case)] +pub(crate) fn cuModuleLoadFatBinary_Post( + module: *mut CUmodule, + fatCubin: *const ::std::os::raw::c_void, + fn_logger: &mut log::FunctionLogger, + state: &mut trace::StateTracker, + result: CUresult, +) { + if result == CUresult::CUDA_SUCCESS { + panic!() + } +} diff --git a/zluda_dump/src/log.rs b/zluda_dump/src/log.rs index 57c804c..ef36acd 100644 --- a/zluda_dump/src/log.rs +++ b/zluda_dump/src/log.rs @@ -1,5 +1,7 @@ use crate::cuda::CUmodule;
use crate::cuda::CUuuid;
+use crate::format;
+use crate::format::FormatCudaObject;
use super::CUresult;
use super::Settings;
@@ -202,28 +204,35 @@ impl Factory { pub(crate) fn get_logger(&mut self, func: &'static str) -> FunctionLogger {
FunctionLogger {
result: None,
- name: Cow::Borrowed(func),
+ name: CudaFunctionName::Normal(func),
fallible_emitter: &mut self.fallible_emitter,
infallible_emitter: &mut self.infallible_emitter,
write_buffer: &mut self.write_buffer,
log_queue: &mut self.log_queue,
+ finished_writing_args: false,
+ args_to_write: 0,
}
}
- pub(crate) fn get_logger_dark_api(&mut self, guid: CUuuid, idx: usize) -> FunctionLogger {
- let guid = guid.bytes;
- let fn_name = format!("{{{:02X}{:02X}{:02X}{:02X}-{:02X}{:02X}-{:02X}{:02X}-{:02X}{:02X}-{:02X}{:02X}{:02X}{:02X}{:02X}{:02X}}}::{}", guid[0], guid[1], guid[2], guid[3], guid[4], guid[5], guid[6], guid[7], guid[8], guid[9], guid[10], guid[11], guid[12], guid[13], guid[14], guid[15], idx);
+ pub(crate) fn get_logger_dark_api(&mut self, guid: CUuuid, index: usize) -> FunctionLogger {
FunctionLogger {
result: None,
- name: Cow::Owned(fn_name),
+ name: CudaFunctionName::Dark { guid, index },
fallible_emitter: &mut self.fallible_emitter,
infallible_emitter: &mut self.infallible_emitter,
write_buffer: &mut self.write_buffer,
log_queue: &mut self.log_queue,
+ finished_writing_args: false,
+ args_to_write: 0,
}
}
}
+enum CudaFunctionName {
+ Normal(&'static str),
+ Dark { guid: CUuuid, index: usize },
+}
+
// This encapsulates log output for a single function call.
// It's a separate struct and not just a plain function for two reasons:
// * While we want to always display return code before logging errors,
@@ -231,11 +240,13 @@ impl Factory { // * We want to handle panics gracefully with Drop
pub(crate) struct FunctionLogger<'a> {
pub(crate) result: Option<CUresult>,
- name: Cow<'static, str>,
+ name: CudaFunctionName,
infallible_emitter: &'a mut Box<dyn WriteTrailingZeroAware>,
fallible_emitter: &'a mut Option<Box<dyn WriteTrailingZeroAware>>,
write_buffer: &'a mut WriteBuffer,
log_queue: &'a mut Vec<LogEntry>,
+ args_to_write: usize,
+ finished_writing_args: bool,
}
impl<'a> FunctionLogger<'a> {
@@ -250,11 +261,16 @@ impl<'a> FunctionLogger<'a> { }
fn flush_log_queue_to_write_buffer(&mut self) {
- self.write_buffer.start_line();
- self.write_buffer.write(&self.name);
- self.write_buffer.write("(...) -> ");
+ // TODO: remove this once everything has been converted to dtailed logging
+ if !self.finished_writing_args {
+ self.begin_writing_arguments(0);
+ self.write_buffer.write("...) -> ");
+ }
if let Some(result) = self.result {
- write!(self.write_buffer, "{:#X}", result.0).unwrap_or_else(|_| unreachable!());
+ match format::stringify_curesult(result) {
+ Some(text) => self.write_buffer.write(text),
+ None => write!(self.write_buffer, "{}", result.0).unwrap(),
+ }
} else {
self.write_buffer.write("(UNKNOWN)");
};
@@ -274,6 +290,35 @@ impl<'a> FunctionLogger<'a> { self.write_buffer.end_line();
self.write_buffer.finish();
}
+
+ pub(crate) fn begin_writing_arguments(&mut self, len: usize) {
+ self.args_to_write = len;
+ match self.name {
+ CudaFunctionName::Normal(fn_name) => self.write_buffer.write(fn_name),
+ CudaFunctionName::Dark { guid, index } => {
+ guid.write_post_execution(CUresult::CUDA_SUCCESS, &mut self.write_buffer);
+ write!(&mut self.write_buffer, "::{}", index).ok();
+ }
+ }
+ self.write_buffer.write("(")
+ }
+
+ pub(crate) fn write_single_argument<'x>(
+ &mut self,
+ result: CUresult,
+ arg: impl FormatCudaObject,
+ ) {
+ self.args_to_write -= 1;
+ arg.write_post_execution(result, self.write_buffer);
+ if self.args_to_write != 0 {
+ self.write_buffer.write(", ")
+ }
+ }
+
+ pub(crate) fn end_writing_arguments(&mut self) {
+ self.write_buffer.write(") -> ");
+ self.finished_writing_args = true;
+ }
}
impl<'a> Drop for FunctionLogger<'a> {
|