aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorAndrzej Janik <[email protected]>2021-12-19 01:18:03 +0100
committerAndrzej Janik <[email protected]>2021-12-19 01:18:03 +0100
commitbdcef897cca85c2213f1c8689f733d4d755cddbb (patch)
tree2971bf76807f991af35034e84284ba1ddd1db75e
parent971951bc9e696fbf895978b864a45b2ddb56ba5b (diff)
downloadZLUDA-bdcef897cca85c2213f1c8689f733d4d755cddbb.tar.gz
ZLUDA-bdcef897cca85c2213f1c8689f733d4d755cddbb.zip
Start converting zluda_dump logging to provide more detailed
-rw-r--r--zluda_dump/README.md2
-rw-r--r--zluda_dump/src/cuda.rs3
-rw-r--r--zluda_dump/src/format.rs361
-rw-r--r--zluda_dump/src/lib.rs45
-rw-r--r--zluda_dump/src/log.rs65
5 files changed, 450 insertions, 26 deletions
diff --git a/zluda_dump/README.md b/zluda_dump/README.md
index 1e7c03b..ba82a1c 100644
--- a/zluda_dump/README.md
+++ b/zluda_dump/README.md
@@ -1,3 +1,3 @@
grep -E '^cu.*' log.txt | sed 's/(.*//g' | sort | uniq > uniq_host.txt
-cat *.log | grep "^Unrecognized s" | grep -Eo '`([^`]*)`' | sed -E 's/^`([^[:space:]]*).*`/\1/' | sort | uniq > uniq_statements.txt
+cat *.log | grep "^Unrecognized s" | grep -Eo '`([^`]*)`' | sed -E 's/^`((@\w+ )?[^[:space:]]*).*`/\1/' | sort | uniq > uniq_statements.txt
cat *.log | grep "^Unrecognized d" | grep -Eo '`([^`]*)`' | sed -E 's/^`([^`]*)`/\1/' | sort | uniq > uniq_directives.txt \ No newline at end of file
diff --git a/zluda_dump/src/cuda.rs b/zluda_dump/src/cuda.rs
index d9d57c4..3836137 100644
--- a/zluda_dump/src/cuda.rs
+++ b/zluda_dump/src/cuda.rs
@@ -2513,11 +2513,12 @@ extern_redirect_with_post! {
) -> CUresult;
super::cuModuleLoadDataEx_Post;
}
-extern_redirect! {
+extern_redirect_with_post! {
pub fn cuModuleLoadFatBinary(
module: *mut CUmodule,
fatCubin: *const ::std::os::raw::c_void,
) -> CUresult;
+ super::cuModuleLoadFatBinary_Post;
}
extern_redirect! {
pub fn cuModuleUnload(hmod: CUmodule) -> CUresult;
diff --git a/zluda_dump/src/format.rs b/zluda_dump/src/format.rs
new file mode 100644
index 0000000..df6d4f6
--- /dev/null
+++ b/zluda_dump/src/format.rs
@@ -0,0 +1,361 @@
+use std::{
+ ffi::{c_void, CStr},
+ fmt::Formatter,
+ io::Write,
+ ptr,
+};
+
+use crate::cuda::*;
+
+pub(crate) trait FormatCudaObject {
+ fn write_post_execution(self, result: CUresult, f: &mut impl Write);
+}
+
+fn write_post_execution_ptr<T: FormatCudaObject + Copy>(
+ t: *const T,
+ result: CUresult,
+ f: &mut impl Write,
+) {
+ if t == ptr::null() {
+ write!(f, "NULL").ok();
+ } else if result != CUresult::CUDA_SUCCESS {
+ write!(f, "NONE").ok();
+ } else {
+ unsafe { *t }.write_post_execution(result, f)
+ }
+}
+
+impl<T: FormatCudaObject + Copy> FormatCudaObject for *mut T {
+ fn write_post_execution(self, result: CUresult, f: &mut impl Write) {
+ write_post_execution_ptr(self, result, f)
+ }
+}
+
+impl<T: FormatCudaObject + Copy> FormatCudaObject for *const T {
+ fn write_post_execution(self, result: CUresult, f: &mut impl Write) {
+ write_post_execution_ptr(self, result, f)
+ }
+}
+
+impl FormatCudaObject for CUmodule {
+ fn write_post_execution(self, result: CUresult, f: &mut impl Write) {
+ write!(f, "{:p}", self).ok();
+ }
+}
+
+impl FormatCudaObject for CUfunction {
+ fn write_post_execution(self, result: CUresult, f: &mut impl Write) {
+ write!(f, "{:p}", self).ok();
+ }
+}
+
+impl FormatCudaObject for *mut c_void {
+ fn write_post_execution(self, result: CUresult, f: &mut impl Write) {
+ write!(f, "{:p}", self).ok();
+ }
+}
+
+impl FormatCudaObject for *const c_void {
+ fn write_post_execution(self, result: CUresult, f: &mut impl Write) {
+ write!(f, "{:p}", self).ok();
+ }
+}
+
+impl FormatCudaObject for *const i8 {
+ fn write_post_execution(self, result: CUresult, f: &mut impl Write) {
+ write!(f, "\"{}\"", unsafe { CStr::from_ptr(self) }.to_str().unwrap()).ok();
+ }
+}
+
+impl FormatCudaObject for u32 {
+ fn write_post_execution(self, result: CUresult, f: &mut impl Write) {
+ write!(f, "{}", self).ok();
+ }
+}
+
+impl FormatCudaObject for i32 {
+ fn write_post_execution(self, result: CUresult, f: &mut impl Write) {
+ write!(f, "{}", self).ok();
+ }
+}
+
+impl FormatCudaObject for CUdevice {
+ fn write_post_execution(self, result: CUresult, f: &mut impl Write) {
+ write!(f, "{}", self.0).ok();
+ }
+}
+
+impl FormatCudaObject for CUjit_option {
+ fn write_post_execution(self, result: CUresult, f: &mut impl Write) {
+ match stringify_cujit_option(self) {
+ Some(text) => write!(f, "{}", text),
+ None => write!(f, "{}", self.0),
+ };
+ }
+}
+
+impl FormatCudaObject for CUuuid {
+ fn write_post_execution(self, result: CUresult, f: &mut impl Write) {
+ let guid = self.bytes;
+ write!(f, "{{{:02X}{:02X}{:02X}{:02X}-{:02X}{:02X}-{:02X}{:02X}-{:02X}{:02X}-{:02X}{:02X}{:02X}{:02X}{:02X}{:02X}}}", guid[0], guid[1], guid[2], guid[3], guid[4], guid[5], guid[6], guid[7], guid[8], guid[9], guid[10], guid[11], guid[12], guid[13], guid[14], guid[15]).ok();
+ }
+}
+
+impl FormatCudaObject for CUdevice_attribute {
+ fn write_post_execution(self, result: CUresult, f: &mut impl Write) {
+ match stringify_cudevice_attribute(self) {
+ Some(text) => write!(f, "{}", text),
+ None => write!(f, "{}", self.0),
+ }
+ .ok();
+ }
+}
+
+macro_rules! stringify_enum {
+ ($fn_name:ident, $type_:ident, [ $($variant:ident),+ ]) => {
+ pub(crate) fn $fn_name(x: $type_) -> Option<&'static str> {
+ match x {
+ $(
+ $type_::$variant => Some(stringify!($variant)),
+ )+
+ _ => None
+ }
+ }
+ }
+}
+
+stringify_enum! {
+ stringify_cudevice_attribute,
+ CUdevice_attribute_enum,
+ [
+ CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK,
+ CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X,
+ CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y,
+ CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z,
+ CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X,
+ CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y,
+ CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z,
+ CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK,
+ CU_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK,
+ CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY,
+ CU_DEVICE_ATTRIBUTE_WARP_SIZE,
+ CU_DEVICE_ATTRIBUTE_MAX_PITCH,
+ CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK,
+ CU_DEVICE_ATTRIBUTE_REGISTERS_PER_BLOCK,
+ CU_DEVICE_ATTRIBUTE_CLOCK_RATE,
+ CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT,
+ CU_DEVICE_ATTRIBUTE_GPU_OVERLAP,
+ CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
+ CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT,
+ CU_DEVICE_ATTRIBUTE_INTEGRATED,
+ CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY,
+ CU_DEVICE_ATTRIBUTE_COMPUTE_MODE,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_WIDTH,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_HEIGHT,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_NUMSLICES,
+ CU_DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT,
+ CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS,
+ CU_DEVICE_ATTRIBUTE_ECC_ENABLED,
+ CU_DEVICE_ATTRIBUTE_PCI_BUS_ID,
+ CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID,
+ CU_DEVICE_ATTRIBUTE_TCC_DRIVER,
+ CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE,
+ CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH,
+ CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE,
+ CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR,
+ CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT,
+ CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS,
+ CU_DEVICE_ATTRIBUTE_CAN_TEX2D_GATHER,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_WIDTH,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_HEIGHT,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE,
+ CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID,
+ CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_WIDTH,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_WIDTH,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_LAYERS,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_WIDTH,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_HEIGHT,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_LAYERS,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_WIDTH,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT,
+ CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR,
+ CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR,
+ CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH,
+ CU_DEVICE_ATTRIBUTE_STREAM_PRIORITIES_SUPPORTED,
+ CU_DEVICE_ATTRIBUTE_GLOBAL_L1_CACHE_SUPPORTED,
+ CU_DEVICE_ATTRIBUTE_LOCAL_L1_CACHE_SUPPORTED,
+ CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR,
+ CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR,
+ CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY,
+ CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD,
+ CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD_GROUP_ID,
+ CU_DEVICE_ATTRIBUTE_HOST_NATIVE_ATOMIC_SUPPORTED,
+ CU_DEVICE_ATTRIBUTE_SINGLE_TO_DOUBLE_PRECISION_PERF_RATIO,
+ CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS,
+ CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS,
+ CU_DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED,
+ CU_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM,
+ CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_MEM_OPS,
+ CU_DEVICE_ATTRIBUTE_CAN_USE_64_BIT_STREAM_MEM_OPS,
+ CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR,
+ CU_DEVICE_ATTRIBUTE_COOPERATIVE_LAUNCH,
+ CU_DEVICE_ATTRIBUTE_COOPERATIVE_MULTI_DEVICE_LAUNCH,
+ CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN,
+ CU_DEVICE_ATTRIBUTE_CAN_FLUSH_REMOTE_WRITES,
+ CU_DEVICE_ATTRIBUTE_HOST_REGISTER_SUPPORTED,
+ CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES,
+ CU_DEVICE_ATTRIBUTE_DIRECT_MANAGED_MEM_ACCESS_FROM_HOST,
+ CU_DEVICE_ATTRIBUTE_VIRTUAL_ADDRESS_MANAGEMENT_SUPPORTED,
+ CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR_SUPPORTED,
+ CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_HANDLE_SUPPORTED,
+ CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_KMT_HANDLE_SUPPORTED,
+ CU_DEVICE_ATTRIBUTE_MAX_BLOCKS_PER_MULTIPROCESSOR,
+ CU_DEVICE_ATTRIBUTE_GENERIC_COMPRESSION_SUPPORTED,
+ CU_DEVICE_ATTRIBUTE_MAX_PERSISTING_L2_CACHE_SIZE,
+ CU_DEVICE_ATTRIBUTE_MAX_ACCESS_POLICY_WINDOW_SIZE,
+ CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WITH_CUDA_VMM_SUPPORTED,
+ CU_DEVICE_ATTRIBUTE_RESERVED_SHARED_MEMORY_PER_BLOCK,
+ CU_DEVICE_ATTRIBUTE_SPARSE_CUDA_ARRAY_SUPPORTED,
+ CU_DEVICE_ATTRIBUTE_READ_ONLY_HOST_REGISTER_SUPPORTED
+ ]
+}
+
+stringify_enum! {
+ stringify_cujit_option,
+ CUjit_option,
+ [
+ CU_JIT_MAX_REGISTERS,
+ CU_JIT_THREADS_PER_BLOCK,
+ CU_JIT_WALL_TIME,
+ CU_JIT_INFO_LOG_BUFFER,
+ CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES,
+ CU_JIT_ERROR_LOG_BUFFER,
+ CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES,
+ CU_JIT_OPTIMIZATION_LEVEL,
+ CU_JIT_TARGET_FROM_CUCONTEXT,
+ CU_JIT_TARGET,
+ CU_JIT_FALLBACK_STRATEGY,
+ CU_JIT_GENERATE_DEBUG_INFO,
+ CU_JIT_LOG_VERBOSE,
+ CU_JIT_GENERATE_LINE_INFO,
+ CU_JIT_CACHE_MODE,
+ CU_JIT_NEW_SM3X_OPT,
+ CU_JIT_FAST_COMPILE,
+ CU_JIT_GLOBAL_SYMBOL_NAMES,
+ CU_JIT_GLOBAL_SYMBOL_ADDRESSES,
+ CU_JIT_GLOBAL_SYMBOL_COUNT,
+ CU_JIT_NUM_OPTIONS
+ ]
+}
+
+stringify_enum! {
+ stringify_curesult,
+ CUresult,
+ [
+ CUDA_SUCCESS,
+ CUDA_ERROR_INVALID_VALUE,
+ CUDA_ERROR_OUT_OF_MEMORY,
+ CUDA_ERROR_NOT_INITIALIZED,
+ CUDA_ERROR_DEINITIALIZED,
+ CUDA_ERROR_PROFILER_DISABLED,
+ CUDA_ERROR_PROFILER_NOT_INITIALIZED,
+ CUDA_ERROR_PROFILER_ALREADY_STARTED,
+ CUDA_ERROR_PROFILER_ALREADY_STOPPED,
+ CUDA_ERROR_NO_DEVICE,
+ CUDA_ERROR_INVALID_DEVICE,
+ CUDA_ERROR_INVALID_IMAGE,
+ CUDA_ERROR_INVALID_CONTEXT,
+ CUDA_ERROR_CONTEXT_ALREADY_CURRENT,
+ CUDA_ERROR_MAP_FAILED,
+ CUDA_ERROR_UNMAP_FAILED,
+ CUDA_ERROR_ARRAY_IS_MAPPED,
+ CUDA_ERROR_ALREADY_MAPPED,
+ CUDA_ERROR_NO_BINARY_FOR_GPU,
+ CUDA_ERROR_ALREADY_ACQUIRED,
+ CUDA_ERROR_NOT_MAPPED,
+ CUDA_ERROR_NOT_MAPPED_AS_ARRAY,
+ CUDA_ERROR_NOT_MAPPED_AS_POINTER,
+ CUDA_ERROR_ECC_UNCORRECTABLE,
+ CUDA_ERROR_UNSUPPORTED_LIMIT,
+ CUDA_ERROR_CONTEXT_ALREADY_IN_USE,
+ CUDA_ERROR_PEER_ACCESS_UNSUPPORTED,
+ CUDA_ERROR_INVALID_PTX,
+ CUDA_ERROR_INVALID_GRAPHICS_CONTEXT,
+ CUDA_ERROR_NVLINK_UNCORRECTABLE,
+ CUDA_ERROR_JIT_COMPILER_NOT_FOUND,
+ CUDA_ERROR_INVALID_SOURCE,
+ CUDA_ERROR_FILE_NOT_FOUND,
+ CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND,
+ CUDA_ERROR_SHARED_OBJECT_INIT_FAILED,
+ CUDA_ERROR_OPERATING_SYSTEM,
+ CUDA_ERROR_INVALID_HANDLE,
+ CUDA_ERROR_ILLEGAL_STATE,
+ CUDA_ERROR_NOT_FOUND,
+ CUDA_ERROR_NOT_READY,
+ CUDA_ERROR_ILLEGAL_ADDRESS,
+ CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES,
+ CUDA_ERROR_LAUNCH_TIMEOUT,
+ CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING,
+ CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED,
+ CUDA_ERROR_PEER_ACCESS_NOT_ENABLED,
+ CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE,
+ CUDA_ERROR_CONTEXT_IS_DESTROYED,
+ CUDA_ERROR_ASSERT,
+ CUDA_ERROR_TOO_MANY_PEERS,
+ CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED,
+ CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED,
+ CUDA_ERROR_HARDWARE_STACK_ERROR,
+ CUDA_ERROR_ILLEGAL_INSTRUCTION,
+ CUDA_ERROR_MISALIGNED_ADDRESS,
+ CUDA_ERROR_INVALID_ADDRESS_SPACE,
+ CUDA_ERROR_INVALID_PC,
+ CUDA_ERROR_LAUNCH_FAILED,
+ CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE,
+ CUDA_ERROR_NOT_PERMITTED,
+ CUDA_ERROR_NOT_SUPPORTED,
+ CUDA_ERROR_SYSTEM_NOT_READY,
+ CUDA_ERROR_SYSTEM_DRIVER_MISMATCH,
+ CUDA_ERROR_COMPAT_NOT_SUPPORTED_ON_DEVICE,
+ CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED,
+ CUDA_ERROR_STREAM_CAPTURE_INVALIDATED,
+ CUDA_ERROR_STREAM_CAPTURE_MERGE,
+ CUDA_ERROR_STREAM_CAPTURE_UNMATCHED,
+ CUDA_ERROR_STREAM_CAPTURE_UNJOINED,
+ CUDA_ERROR_STREAM_CAPTURE_ISOLATION,
+ CUDA_ERROR_STREAM_CAPTURE_IMPLICIT,
+ CUDA_ERROR_CAPTURED_EVENT,
+ CUDA_ERROR_STREAM_CAPTURE_WRONG_THREAD,
+ CUDA_ERROR_TIMEOUT,
+ CUDA_ERROR_GRAPH_EXEC_UPDATE_FAILURE,
+ CUDA_ERROR_UNKNOWN
+ ]
+}
diff --git a/zluda_dump/src/lib.rs b/zluda_dump/src/lib.rs
index 780b9e8..1eb70e2 100644
--- a/zluda_dump/src/lib.rs
+++ b/zluda_dump/src/lib.rs
@@ -42,6 +42,11 @@ macro_rules! extern_redirect {
};
}
+macro_rules! count_tts {
+ () => {0usize};
+ ($_head:tt $($tail:tt)*) => {1usize + count_tts!($($tail)*)};
+}
+
macro_rules! extern_redirect_with_post {
(
pub fn $fn_name:ident ( $($arg_id:ident: $arg_type:ty),* $(,)? ) -> $ret_type:ty ;
@@ -53,9 +58,18 @@ macro_rules! extern_redirect_with_post {
let typed_fn = unsafe { std::mem::transmute::<_, extern "system" fn( $( $arg_id : $arg_type),* ) -> $ret_type>(fn_ptr) };
typed_fn($( $arg_id ),*)
};
+ let get_formatted_args = |fn_logger: &mut crate::log::FunctionLogger, result: CUresult| {
+ let arg_count = (count_tts!($($arg_id),*) + 1) / 2;
+ fn_logger.begin_writing_arguments(arg_count);
+ $(
+ fn_logger.write_single_argument(result, $arg_id);
+ )*
+ fn_logger.end_writing_arguments();
+ };
crate::handle_cuda_function_call_with_probes(
stringify!($fn_name),
|| (), original_fn,
+ get_formatted_args,
move |logger, state, _, cuda_result| $post_fn ( $( $arg_id ),* , logger, state, cuda_result )
)
}
@@ -81,6 +95,7 @@ macro_rules! extern_redirect_with {
#[allow(warnings)]
mod cuda;
mod dark_api;
+mod format;
mod log;
#[cfg_attr(windows, path = "os_win.rs")]
#[cfg_attr(not(windows), path = "os_unix.rs")]
@@ -294,6 +309,7 @@ fn handle_cuda_function_call_with_probes<T, PostFn>(
func: &'static str,
pre_probe: impl FnOnce() -> T,
original_cuda_fn: impl FnOnce(NonNull<c_void>) -> CUresult,
+ print_arguments_fn: impl FnOnce(&mut crate::log::FunctionLogger, CUresult),
post_probe: PostFn,
) -> CUresult
where
@@ -325,6 +341,7 @@ where
let pre_result = pre_probe();
let cu_result = original_cuda_fn(fn_ptr);
logger.result = Some(cu_result);
+ print_arguments_fn(&mut logger, cu_result);
post_probe(
&mut logger,
&mut delayed_state.cuda_state,
@@ -1220,6 +1237,7 @@ struct FatbincWrapper {
}
const FATBIN_MAGIC: c_uint = 0xBA55ED50;
+const LEGACY_FATBIN_MAGIC: c_uint = 0x1EE55A01;
const FATBIN_VERSION: c_ushort = 0x01;
#[repr(C, align(8))]
@@ -1484,16 +1502,6 @@ pub(crate) fn cuModuleGetFunction_Post(
state: &mut trace::StateTracker,
result: CUresult,
) {
- if !state.module_exists(hmod) {
- fn_logger.log(log::LogEntry::UnknownModule(hmod))
- }
- match unsafe { CStr::from_ptr(name) }.to_str() {
- Ok(str) => fn_logger.log(log::LogEntry::FunctionParameter {
- name: "name",
- value: str.to_string(),
- }),
- Err(e) => fn_logger.log(log::LogEntry::MalformedFunctionName(e)),
- }
}
#[allow(non_snake_case)]
@@ -1505,10 +1513,6 @@ pub(crate) fn cuDeviceGetAttribute_Post(
state: &mut trace::StateTracker,
result: CUresult,
) {
- fn_logger.log(log::LogEntry::FunctionParameter {
- name: "attrib",
- value: attrib.0.to_string(),
- });
}
#[allow(non_snake_case)]
@@ -1524,3 +1528,16 @@ pub(crate) fn cuDeviceComputeCapability_Post(
unsafe { *major = major_ver_override as i32 };
}
}
+
+#[allow(non_snake_case)]
+pub(crate) fn cuModuleLoadFatBinary_Post(
+ module: *mut CUmodule,
+ fatCubin: *const ::std::os::raw::c_void,
+ fn_logger: &mut log::FunctionLogger,
+ state: &mut trace::StateTracker,
+ result: CUresult,
+) {
+ if result == CUresult::CUDA_SUCCESS {
+ panic!()
+ }
+}
diff --git a/zluda_dump/src/log.rs b/zluda_dump/src/log.rs
index 57c804c..ef36acd 100644
--- a/zluda_dump/src/log.rs
+++ b/zluda_dump/src/log.rs
@@ -1,5 +1,7 @@
use crate::cuda::CUmodule;
use crate::cuda::CUuuid;
+use crate::format;
+use crate::format::FormatCudaObject;
use super::CUresult;
use super::Settings;
@@ -202,28 +204,35 @@ impl Factory {
pub(crate) fn get_logger(&mut self, func: &'static str) -> FunctionLogger {
FunctionLogger {
result: None,
- name: Cow::Borrowed(func),
+ name: CudaFunctionName::Normal(func),
fallible_emitter: &mut self.fallible_emitter,
infallible_emitter: &mut self.infallible_emitter,
write_buffer: &mut self.write_buffer,
log_queue: &mut self.log_queue,
+ finished_writing_args: false,
+ args_to_write: 0,
}
}
- pub(crate) fn get_logger_dark_api(&mut self, guid: CUuuid, idx: usize) -> FunctionLogger {
- let guid = guid.bytes;
- let fn_name = format!("{{{:02X}{:02X}{:02X}{:02X}-{:02X}{:02X}-{:02X}{:02X}-{:02X}{:02X}-{:02X}{:02X}{:02X}{:02X}{:02X}{:02X}}}::{}", guid[0], guid[1], guid[2], guid[3], guid[4], guid[5], guid[6], guid[7], guid[8], guid[9], guid[10], guid[11], guid[12], guid[13], guid[14], guid[15], idx);
+ pub(crate) fn get_logger_dark_api(&mut self, guid: CUuuid, index: usize) -> FunctionLogger {
FunctionLogger {
result: None,
- name: Cow::Owned(fn_name),
+ name: CudaFunctionName::Dark { guid, index },
fallible_emitter: &mut self.fallible_emitter,
infallible_emitter: &mut self.infallible_emitter,
write_buffer: &mut self.write_buffer,
log_queue: &mut self.log_queue,
+ finished_writing_args: false,
+ args_to_write: 0,
}
}
}
+enum CudaFunctionName {
+ Normal(&'static str),
+ Dark { guid: CUuuid, index: usize },
+}
+
// This encapsulates log output for a single function call.
// It's a separate struct and not just a plain function for two reasons:
// * While we want to always display return code before logging errors,
@@ -231,11 +240,13 @@ impl Factory {
// * We want to handle panics gracefully with Drop
pub(crate) struct FunctionLogger<'a> {
pub(crate) result: Option<CUresult>,
- name: Cow<'static, str>,
+ name: CudaFunctionName,
infallible_emitter: &'a mut Box<dyn WriteTrailingZeroAware>,
fallible_emitter: &'a mut Option<Box<dyn WriteTrailingZeroAware>>,
write_buffer: &'a mut WriteBuffer,
log_queue: &'a mut Vec<LogEntry>,
+ args_to_write: usize,
+ finished_writing_args: bool,
}
impl<'a> FunctionLogger<'a> {
@@ -250,11 +261,16 @@ impl<'a> FunctionLogger<'a> {
}
fn flush_log_queue_to_write_buffer(&mut self) {
- self.write_buffer.start_line();
- self.write_buffer.write(&self.name);
- self.write_buffer.write("(...) -> ");
+ // TODO: remove this once everything has been converted to dtailed logging
+ if !self.finished_writing_args {
+ self.begin_writing_arguments(0);
+ self.write_buffer.write("...) -> ");
+ }
if let Some(result) = self.result {
- write!(self.write_buffer, "{:#X}", result.0).unwrap_or_else(|_| unreachable!());
+ match format::stringify_curesult(result) {
+ Some(text) => self.write_buffer.write(text),
+ None => write!(self.write_buffer, "{}", result.0).unwrap(),
+ }
} else {
self.write_buffer.write("(UNKNOWN)");
};
@@ -274,6 +290,35 @@ impl<'a> FunctionLogger<'a> {
self.write_buffer.end_line();
self.write_buffer.finish();
}
+
+ pub(crate) fn begin_writing_arguments(&mut self, len: usize) {
+ self.args_to_write = len;
+ match self.name {
+ CudaFunctionName::Normal(fn_name) => self.write_buffer.write(fn_name),
+ CudaFunctionName::Dark { guid, index } => {
+ guid.write_post_execution(CUresult::CUDA_SUCCESS, &mut self.write_buffer);
+ write!(&mut self.write_buffer, "::{}", index).ok();
+ }
+ }
+ self.write_buffer.write("(")
+ }
+
+ pub(crate) fn write_single_argument<'x>(
+ &mut self,
+ result: CUresult,
+ arg: impl FormatCudaObject,
+ ) {
+ self.args_to_write -= 1;
+ arg.write_post_execution(result, self.write_buffer);
+ if self.args_to_write != 0 {
+ self.write_buffer.write(", ")
+ }
+ }
+
+ pub(crate) fn end_writing_arguments(&mut self) {
+ self.write_buffer.write(") -> ");
+ self.finished_writing_args = true;
+ }
}
impl<'a> Drop for FunctionLogger<'a> {