diff options
author | Andrzej Janik <[email protected]> | 2021-04-12 00:18:27 +0200 |
---|---|---|
committer | Andrzej Janik <[email protected]> | 2021-04-12 00:18:27 +0200 |
commit | 96f95d59ce23497f00340c5b7ba6cac41d6de69f (patch) | |
tree | 5217467b6e3b4b09b582021e6d07560e3ada4eef /zluda_dump/src | |
parent | a39dda67d1fb3897c5ea778ae00c4079e8e2939a (diff) | |
download | ZLUDA-96f95d59ce23497f00340c5b7ba6cac41d6de69f.tar.gz ZLUDA-96f95d59ce23497f00340c5b7ba6cac41d6de69f.zip |
Make zluda_dump more robust
Diffstat (limited to 'zluda_dump/src')
-rw-r--r-- | zluda_dump/src/cuda.rs | 330 | ||||
-rw-r--r-- | zluda_dump/src/lib.rs | 199 |
2 files changed, 500 insertions, 29 deletions
diff --git a/zluda_dump/src/cuda.rs b/zluda_dump/src/cuda.rs index 05e1813..3f78b14 100644 --- a/zluda_dump/src/cuda.rs +++ b/zluda_dump/src/cuda.rs @@ -2342,6 +2342,9 @@ extern_redirect! { ) -> CUresult; } extern_redirect! { + pub fn cuDeviceTotalMem(bytes: *mut usize, dev: CUdevice) -> CUresult; +} +extern_redirect! { pub fn cuDeviceTotalMem_v2(bytes: *mut usize, dev: CUdevice) -> CUresult; } extern_redirect! { @@ -2406,6 +2409,13 @@ extern_redirect! { pub fn cuDevicePrimaryCtxReset_v2(dev: CUdevice) -> CUresult; } extern_redirect! { + pub fn cuCtxCreate( + pctx: *mut CUcontext, + flags: ::std::os::raw::c_uint, + dev: CUdevice, + ) -> CUresult; +} +extern_redirect! { pub fn cuCtxCreate_v2( pctx: *mut CUcontext, flags: ::std::os::raw::c_uint, @@ -2413,12 +2423,21 @@ extern_redirect! { ) -> CUresult; } extern_redirect! { + pub fn cuCtxDestroy(ctx: CUcontext) -> CUresult; +} +extern_redirect! { pub fn cuCtxDestroy_v2(ctx: CUcontext) -> CUresult; } extern_redirect! { + pub fn cuCtxPushCurrent(ctx: CUcontext) -> CUresult; +} +extern_redirect! { pub fn cuCtxPushCurrent_v2(ctx: CUcontext) -> CUresult; } extern_redirect! { + pub fn cuCtxPopCurrent(pctx: *mut CUcontext) -> CUresult; +} +extern_redirect! { pub fn cuCtxPopCurrent_v2(pctx: *mut CUcontext) -> CUresult; } extern_redirect! { @@ -2510,6 +2529,14 @@ extern_redirect_with! { super::cuModuleGetFunction; } extern_redirect! { + pub fn cuModuleGetGlobal( + dptr: *mut CUdeviceptr, + bytes: *mut usize, + hmod: CUmodule, + name: *const ::std::os::raw::c_char, + ) -> CUresult; +} +extern_redirect! { pub fn cuModuleGetGlobal_v2( dptr: *mut CUdeviceptr, bytes: *mut usize, @@ -2532,6 +2559,14 @@ extern_redirect! { ) -> CUresult; } extern_redirect! { + pub fn cuLinkCreate( + numOptions: ::std::os::raw::c_uint, + options: *mut CUjit_option, + optionValues: *mut *mut ::std::os::raw::c_void, + stateOut: *mut CUlinkState, + ) -> CUresult; +} +extern_redirect! { pub fn cuLinkCreate_v2( numOptions: ::std::os::raw::c_uint, options: *mut CUjit_option, @@ -2540,6 +2575,18 @@ extern_redirect! { ) -> CUresult; } extern_redirect! { + pub fn cuLinkAddData( + state: CUlinkState, + type_: CUjitInputType, + data: *mut ::std::os::raw::c_void, + size: usize, + name: *const ::std::os::raw::c_char, + numOptions: ::std::os::raw::c_uint, + options: *mut CUjit_option, + optionValues: *mut *mut ::std::os::raw::c_void, + ) -> CUresult; +} +extern_redirect! { pub fn cuLinkAddData_v2( state: CUlinkState, type_: CUjitInputType, @@ -2552,6 +2599,16 @@ extern_redirect! { ) -> CUresult; } extern_redirect! { + pub fn cuLinkAddFile( + state: CUlinkState, + type_: CUjitInputType, + path: *const ::std::os::raw::c_char, + numOptions: ::std::os::raw::c_uint, + options: *mut CUjit_option, + optionValues: *mut *mut ::std::os::raw::c_void, + ) -> CUresult; +} +extern_redirect! { pub fn cuLinkAddFile_v2( state: CUlinkState, type_: CUjitInputType, @@ -2572,13 +2629,29 @@ extern_redirect! { pub fn cuLinkDestroy(state: CUlinkState) -> CUresult; } extern_redirect! { + pub fn cuMemGetInfo(free: *mut usize, total: *mut usize) -> CUresult; +} +extern_redirect! { pub fn cuMemGetInfo_v2(free: *mut usize, total: *mut usize) -> CUresult; } extern_redirect_with! { + pub fn cuMemAlloc(dptr: *mut CUdeviceptr, bytesize: usize) -> CUresult; + super::cuMemAlloc; +} +extern_redirect_with! { pub fn cuMemAlloc_v2(dptr: *mut CUdeviceptr, bytesize: usize) -> CUresult; super::cuMemAlloc_v2; } extern_redirect! { + pub fn cuMemAllocPitch( + dptr: *mut CUdeviceptr, + pPitch: *mut usize, + WidthInBytes: usize, + Height: usize, + ElementSizeBytes: ::std::os::raw::c_uint, + ) -> CUresult; +} +extern_redirect! { pub fn cuMemAllocPitch_v2( dptr: *mut CUdeviceptr, pPitch: *mut usize, @@ -2588,9 +2661,19 @@ extern_redirect! { ) -> CUresult; } extern_redirect! { + pub fn cuMemFree(dptr: CUdeviceptr) -> CUresult; +} +extern_redirect! { pub fn cuMemFree_v2(dptr: CUdeviceptr) -> CUresult; } extern_redirect! { + pub fn cuMemGetAddressRange( + pbase: *mut CUdeviceptr, + psize: *mut usize, + dptr: CUdeviceptr, + ) -> CUresult; +} +extern_redirect! { pub fn cuMemGetAddressRange_v2( pbase: *mut CUdeviceptr, psize: *mut usize, @@ -2598,17 +2681,28 @@ extern_redirect! { ) -> CUresult; } extern_redirect! { + pub fn cuMemAllocHost(pp: *mut *mut ::std::os::raw::c_void, bytesize: usize) -> CUresult; +} +extern_redirect! { pub fn cuMemAllocHost_v2(pp: *mut *mut ::std::os::raw::c_void, bytesize: usize) -> CUresult; } extern_redirect! { pub fn cuMemFreeHost(p: *mut ::std::os::raw::c_void) -> CUresult; } -extern_redirect! { +extern_redirect_with! { pub fn cuMemHostAlloc( pp: *mut *mut ::std::os::raw::c_void, bytesize: usize, Flags: ::std::os::raw::c_uint, ) -> CUresult; + super::cuMemHostAlloc; +} +extern_redirect! { + pub fn cuMemHostGetDevicePointer( + pdptr: *mut CUdeviceptr, + p: *mut ::std::os::raw::c_void, + Flags: ::std::os::raw::c_uint, + ) -> CUresult; } extern_redirect! { pub fn cuMemHostGetDevicePointer_v2( @@ -2670,6 +2764,13 @@ extern_redirect! { pub fn cuIpcCloseMemHandle(dptr: CUdeviceptr) -> CUresult; } extern_redirect! { + pub fn cuMemHostRegister( + p: *mut ::std::os::raw::c_void, + bytesize: usize, + Flags: ::std::os::raw::c_uint, + ) -> CUresult; +} +extern_redirect! { pub fn cuMemHostRegister_v2( p: *mut ::std::os::raw::c_void, bytesize: usize, @@ -2692,6 +2793,13 @@ extern_redirect! { ) -> CUresult; } extern_redirect! { + pub fn cuMemcpyHtoD( + dstDevice: CUdeviceptr, + srcHost: *const ::std::os::raw::c_void, + ByteCount: usize, + ) -> CUresult; +} +extern_redirect! { pub fn cuMemcpyHtoD_v2( dstDevice: CUdeviceptr, srcHost: *const ::std::os::raw::c_void, @@ -2699,6 +2807,13 @@ extern_redirect! { ) -> CUresult; } extern_redirect! { + pub fn cuMemcpyDtoH( + dstHost: *mut ::std::os::raw::c_void, + srcDevice: CUdeviceptr, + ByteCount: usize, + ) -> CUresult; +} +extern_redirect! { pub fn cuMemcpyDtoH_v2( dstHost: *mut ::std::os::raw::c_void, srcDevice: CUdeviceptr, @@ -2706,6 +2821,13 @@ extern_redirect! { ) -> CUresult; } extern_redirect! { + pub fn cuMemcpyDtoD( + dstDevice: CUdeviceptr, + srcDevice: CUdeviceptr, + ByteCount: usize, + ) -> CUresult; +} +extern_redirect! { pub fn cuMemcpyDtoD_v2( dstDevice: CUdeviceptr, srcDevice: CUdeviceptr, @@ -2713,6 +2835,14 @@ extern_redirect! { ) -> CUresult; } extern_redirect! { + pub fn cuMemcpyDtoA( + dstArray: CUarray, + dstOffset: usize, + srcDevice: CUdeviceptr, + ByteCount: usize, + ) -> CUresult; +} +extern_redirect! { pub fn cuMemcpyDtoA_v2( dstArray: CUarray, dstOffset: usize, @@ -2721,6 +2851,14 @@ extern_redirect! { ) -> CUresult; } extern_redirect! { + pub fn cuMemcpyAtoD( + dstDevice: CUdeviceptr, + srcArray: CUarray, + srcOffset: usize, + ByteCount: usize, + ) -> CUresult; +} +extern_redirect! { pub fn cuMemcpyAtoD_v2( dstDevice: CUdeviceptr, srcArray: CUarray, @@ -2729,6 +2867,14 @@ extern_redirect! { ) -> CUresult; } extern_redirect! { + pub fn cuMemcpyHtoA( + dstArray: CUarray, + dstOffset: usize, + srcHost: *const ::std::os::raw::c_void, + ByteCount: usize, + ) -> CUresult; +} +extern_redirect! { pub fn cuMemcpyHtoA_v2( dstArray: CUarray, dstOffset: usize, @@ -2737,6 +2883,14 @@ extern_redirect! { ) -> CUresult; } extern_redirect! { + pub fn cuMemcpyAtoH( + dstHost: *mut ::std::os::raw::c_void, + srcArray: CUarray, + srcOffset: usize, + ByteCount: usize, + ) -> CUresult; +} +extern_redirect! { pub fn cuMemcpyAtoH_v2( dstHost: *mut ::std::os::raw::c_void, srcArray: CUarray, @@ -2745,6 +2899,15 @@ extern_redirect! { ) -> CUresult; } extern_redirect! { + pub fn cuMemcpyAtoA( + dstArray: CUarray, + dstOffset: usize, + srcArray: CUarray, + srcOffset: usize, + ByteCount: usize, + ) -> CUresult; +} +extern_redirect! { pub fn cuMemcpyAtoA_v2( dstArray: CUarray, dstOffset: usize, @@ -2754,12 +2917,21 @@ extern_redirect! { ) -> CUresult; } extern_redirect! { + pub fn cuMemcpy2D(pCopy: *const CUDA_MEMCPY2D) -> CUresult; +} +extern_redirect! { pub fn cuMemcpy2D_v2(pCopy: *const CUDA_MEMCPY2D) -> CUresult; } extern_redirect! { + pub fn cuMemcpy2DUnaligned(pCopy: *const CUDA_MEMCPY2D) -> CUresult; +} +extern_redirect! { pub fn cuMemcpy2DUnaligned_v2(pCopy: *const CUDA_MEMCPY2D) -> CUresult; } extern_redirect! { + pub fn cuMemcpy3D(pCopy: *const CUDA_MEMCPY3D) -> CUresult; +} +extern_redirect! { pub fn cuMemcpy3D_v2(pCopy: *const CUDA_MEMCPY3D) -> CUresult; } extern_redirect! { @@ -2784,6 +2956,14 @@ extern_redirect! { ) -> CUresult; } extern_redirect! { + pub fn cuMemcpyHtoDAsync( + dstDevice: CUdeviceptr, + srcHost: *const ::std::os::raw::c_void, + ByteCount: usize, + hStream: CUstream, + ) -> CUresult; +} +extern_redirect! { pub fn cuMemcpyHtoDAsync_v2( dstDevice: CUdeviceptr, srcHost: *const ::std::os::raw::c_void, @@ -2792,6 +2972,14 @@ extern_redirect! { ) -> CUresult; } extern_redirect! { + pub fn cuMemcpyDtoHAsync( + dstHost: *mut ::std::os::raw::c_void, + srcDevice: CUdeviceptr, + ByteCount: usize, + hStream: CUstream, + ) -> CUresult; +} +extern_redirect! { pub fn cuMemcpyDtoHAsync_v2( dstHost: *mut ::std::os::raw::c_void, srcDevice: CUdeviceptr, @@ -2800,6 +2988,14 @@ extern_redirect! { ) -> CUresult; } extern_redirect! { + pub fn cuMemcpyDtoDAsync( + dstDevice: CUdeviceptr, + srcDevice: CUdeviceptr, + ByteCount: usize, + hStream: CUstream, + ) -> CUresult; +} +extern_redirect! { pub fn cuMemcpyDtoDAsync_v2( dstDevice: CUdeviceptr, srcDevice: CUdeviceptr, @@ -2808,6 +3004,15 @@ extern_redirect! { ) -> CUresult; } extern_redirect! { + pub fn cuMemcpyHtoAAsync( + dstArray: CUarray, + dstOffset: usize, + srcHost: *const ::std::os::raw::c_void, + ByteCount: usize, + hStream: CUstream, + ) -> CUresult; +} +extern_redirect! { pub fn cuMemcpyHtoAAsync_v2( dstArray: CUarray, dstOffset: usize, @@ -2817,6 +3022,15 @@ extern_redirect! { ) -> CUresult; } extern_redirect! { + pub fn cuMemcpyAtoHAsync( + dstHost: *mut ::std::os::raw::c_void, + srcArray: CUarray, + srcOffset: usize, + ByteCount: usize, + hStream: CUstream, + ) -> CUresult; +} +extern_redirect! { pub fn cuMemcpyAtoHAsync_v2( dstHost: *mut ::std::os::raw::c_void, srcArray: CUarray, @@ -2826,19 +3040,36 @@ extern_redirect! { ) -> CUresult; } extern_redirect! { + pub fn cuMemcpy2DAsync(pCopy: *const CUDA_MEMCPY2D, hStream: CUstream) -> CUresult; +} +extern_redirect! { pub fn cuMemcpy2DAsync_v2(pCopy: *const CUDA_MEMCPY2D, hStream: CUstream) -> CUresult; } extern_redirect! { + pub fn cuMemcpy3DAsync(pCopy: *const CUDA_MEMCPY3D, hStream: CUstream) -> CUresult; +} +extern_redirect! { pub fn cuMemcpy3DAsync_v2(pCopy: *const CUDA_MEMCPY3D, hStream: CUstream) -> CUresult; } extern_redirect! { pub fn cuMemcpy3DPeerAsync(pCopy: *const CUDA_MEMCPY3D_PEER, hStream: CUstream) -> CUresult; } extern_redirect! { + pub fn cuMemsetD8(dstDevice: CUdeviceptr, uc: ::std::os::raw::c_uchar, N: usize) + -> CUresult; +} +extern_redirect! { pub fn cuMemsetD8_v2(dstDevice: CUdeviceptr, uc: ::std::os::raw::c_uchar, N: usize) -> CUresult; } extern_redirect! { + pub fn cuMemsetD16( + dstDevice: CUdeviceptr, + us: ::std::os::raw::c_ushort, + N: usize, + ) -> CUresult; +} +extern_redirect! { pub fn cuMemsetD16_v2( dstDevice: CUdeviceptr, us: ::std::os::raw::c_ushort, @@ -2846,10 +3077,23 @@ extern_redirect! { ) -> CUresult; } extern_redirect! { + pub fn cuMemsetD32(dstDevice: CUdeviceptr, ui: ::std::os::raw::c_uint, N: usize) + -> CUresult; +} +extern_redirect! { pub fn cuMemsetD32_v2(dstDevice: CUdeviceptr, ui: ::std::os::raw::c_uint, N: usize) -> CUresult; } extern_redirect! { + pub fn cuMemsetD2D8( + dstDevice: CUdeviceptr, + dstPitch: usize, + uc: ::std::os::raw::c_uchar, + Width: usize, + Height: usize, + ) -> CUresult; +} +extern_redirect! { pub fn cuMemsetD2D8_v2( dstDevice: CUdeviceptr, dstPitch: usize, @@ -2859,6 +3103,15 @@ extern_redirect! { ) -> CUresult; } extern_redirect! { + pub fn cuMemsetD2D16( + dstDevice: CUdeviceptr, + dstPitch: usize, + us: ::std::os::raw::c_ushort, + Width: usize, + Height: usize, + ) -> CUresult; +} +extern_redirect! { pub fn cuMemsetD2D16_v2( dstDevice: CUdeviceptr, dstPitch: usize, @@ -2868,6 +3121,15 @@ extern_redirect! { ) -> CUresult; } extern_redirect! { + pub fn cuMemsetD2D32( + dstDevice: CUdeviceptr, + dstPitch: usize, + ui: ::std::os::raw::c_uint, + Width: usize, + Height: usize, + ) -> CUresult; +} +extern_redirect! { pub fn cuMemsetD2D32_v2( dstDevice: CUdeviceptr, dstPitch: usize, @@ -2931,12 +3193,24 @@ extern_redirect! { ) -> CUresult; } extern_redirect! { + pub fn cuArrayCreate( + pHandle: *mut CUarray, + pAllocateArray: *const CUDA_ARRAY_DESCRIPTOR, + ) -> CUresult; +} +extern_redirect! { pub fn cuArrayCreate_v2( pHandle: *mut CUarray, pAllocateArray: *const CUDA_ARRAY_DESCRIPTOR, ) -> CUresult; } extern_redirect! { + pub fn cuArrayGetDescriptor( + pArrayDescriptor: *mut CUDA_ARRAY_DESCRIPTOR, + hArray: CUarray, + ) -> CUresult; +} +extern_redirect! { pub fn cuArrayGetDescriptor_v2( pArrayDescriptor: *mut CUDA_ARRAY_DESCRIPTOR, hArray: CUarray, @@ -2958,12 +3232,24 @@ extern_redirect! { pub fn cuArrayDestroy(hArray: CUarray) -> CUresult; } extern_redirect! { + pub fn cuArray3DCreate( + pHandle: *mut CUarray, + pAllocateArray: *const CUDA_ARRAY3D_DESCRIPTOR, + ) -> CUresult; +} +extern_redirect! { pub fn cuArray3DCreate_v2( pHandle: *mut CUarray, pAllocateArray: *const CUDA_ARRAY3D_DESCRIPTOR, ) -> CUresult; } extern_redirect! { + pub fn cuArray3DGetDescriptor( + pArrayDescriptor: *mut CUDA_ARRAY3D_DESCRIPTOR, + hArray: CUarray, + ) -> CUresult; +} +extern_redirect! { pub fn cuArray3DGetDescriptor_v2( pArrayDescriptor: *mut CUDA_ARRAY3D_DESCRIPTOR, hArray: CUarray, @@ -3170,6 +3456,9 @@ extern_redirect! { ) -> CUresult; } extern_redirect! { + pub fn cuStreamBeginCapture(hStream: CUstream, mode: CUstreamCaptureMode) -> CUresult; +} +extern_redirect! { pub fn cuStreamBeginCapture_v2(hStream: CUstream, mode: CUstreamCaptureMode) -> CUresult; } extern_redirect! { @@ -3206,6 +3495,9 @@ extern_redirect! { pub fn cuStreamSynchronize(hStream: CUstream) -> CUresult; } extern_redirect! { + pub fn cuStreamDestroy(hStream: CUstream) -> CUresult; +} +extern_redirect! { pub fn cuStreamDestroy_v2(hStream: CUstream) -> CUresult; } extern_redirect! { @@ -3245,6 +3537,9 @@ extern_redirect! { pub fn cuEventSynchronize(hEvent: CUevent) -> CUresult; } extern_redirect! { + pub fn cuEventDestroy(hEvent: CUevent) -> CUresult; +} +extern_redirect! { pub fn cuEventDestroy_v2(hEvent: CUevent) -> CUresult; } extern_redirect! { @@ -3667,6 +3962,15 @@ extern_redirect! { pub fn cuGraphDestroyNode(hNode: CUgraphNode) -> CUresult; } extern_redirect! { + pub fn cuGraphInstantiate( + phGraphExec: *mut CUgraphExec, + hGraph: CUgraph, + phErrorNode: *mut CUgraphNode, + logBuffer: *mut ::std::os::raw::c_char, + bufferSize: usize, + ) -> CUresult; +} +extern_redirect! { pub fn cuGraphInstantiate_v2( phGraphExec: *mut CUgraphExec, hGraph: CUgraph, @@ -3824,6 +4128,14 @@ extern_redirect! { ) -> CUresult; } extern_redirect! { + pub fn cuTexRefSetAddress( + ByteOffset: *mut usize, + hTexRef: CUtexref, + dptr: CUdeviceptr, + bytes: usize, + ) -> CUresult; +} +extern_redirect! { pub fn cuTexRefSetAddress_v2( ByteOffset: *mut usize, hTexRef: CUtexref, @@ -3882,6 +4194,9 @@ extern_redirect! { pub fn cuTexRefSetFlags(hTexRef: CUtexref, Flags: ::std::os::raw::c_uint) -> CUresult; } extern_redirect! { + pub fn cuTexRefGetAddress(pdptr: *mut CUdeviceptr, hTexRef: CUtexref) -> CUresult; +} +extern_redirect! { pub fn cuTexRefGetAddress_v2(pdptr: *mut CUdeviceptr, hTexRef: CUtexref) -> CUresult; } extern_redirect! { @@ -4035,6 +4350,13 @@ extern_redirect! { ) -> CUresult; } extern_redirect! { + pub fn cuGraphicsResourceGetMappedPointer( + pDevPtr: *mut CUdeviceptr, + pSize: *mut usize, + resource: CUgraphicsResource, + ) -> CUresult; +} +extern_redirect! { pub fn cuGraphicsResourceGetMappedPointer_v2( pDevPtr: *mut CUdeviceptr, pSize: *mut usize, @@ -4042,6 +4364,12 @@ extern_redirect! { ) -> CUresult; } extern_redirect! { + pub fn cuGraphicsResourceSetMapFlags( + resource: CUgraphicsResource, + flags: ::std::os::raw::c_uint, + ) -> CUresult; +} +extern_redirect! { pub fn cuGraphicsResourceSetMapFlags_v2( resource: CUgraphicsResource, flags: ::std::os::raw::c_uint, diff --git a/zluda_dump/src/lib.rs b/zluda_dump/src/lib.rs index c10057d..7387dcc 100644 --- a/zluda_dump/src/lib.rs +++ b/zluda_dump/src/lib.rs @@ -1,5 +1,5 @@ use std::{ - collections::HashMap, + collections::{BTreeMap, HashMap}, env, error::Error, ffi::{c_void, CStr}, @@ -24,6 +24,10 @@ use regex::Regex; #[cfg_attr(not(windows), path = "os_unix.rs")] mod os; +const CU_LAUNCH_PARAM_END: *mut c_void = 0 as *mut _; +const CU_LAUNCH_PARAM_BUFFER_POINTER: *mut c_void = 1 as *mut _; +const CU_LAUNCH_PARAM_BUFFER_SIZE: *mut c_void = 2 as *mut _; + macro_rules! extern_redirect { (pub fn $fn_name:ident ( $($arg_id:ident: $arg_type:ty),* $(,)? ) -> $ret_type:ty ;) => { #[no_mangle] @@ -68,11 +72,18 @@ mod cuda; pub static mut LIBCUDA_HANDLE: *mut c_void = ptr::null_mut(); pub static mut MODULES: Option<HashMap<CUmodule, ModuleDump>> = None; pub static mut KERNELS: Option<HashMap<CUfunction, KernelDump>> = None; -pub static mut BUFFERS: Vec<(usize, usize)> = Vec::new(); +static mut BUFFERS: Option<BTreeMap<usize, (usize, AllocLocation)>> = None; pub static mut LAUNCH_COUNTER: usize = 0; pub static mut KERNEL_PATTERN: Option<Regex> = None; pub static mut OVERRIDE_COMPUTE_CAPABILITY_MAJOR: Option<i32> = None; +#[derive(Clone, Copy)] +enum AllocLocation { + Device, + DeviceV2, + Host, +} + pub struct ModuleDump { content: Rc<String>, kernels_args: Option<HashMap<String, Vec<usize>>>, @@ -88,6 +99,9 @@ pub struct KernelDump { // it's because CUDA Runtime API does dlopen to open libcuda.so, which ignores LD_PRELOAD pub unsafe fn init_libcuda_handle() { if LIBCUDA_HANDLE == ptr::null_mut() { + MODULES = Some(HashMap::new()); + KERNELS = Some(HashMap::new()); + BUFFERS = Some(BTreeMap::new()); let libcuda_handle = os::load_cuda_library(); assert_ne!(libcuda_handle, ptr::null_mut()); LIBCUDA_HANDLE = libcuda_handle; @@ -162,8 +176,7 @@ unsafe fn record_module_image(module: CUmodule, image: &str) { None } }; - let modules = MODULES.get_or_insert_with(|| HashMap::new()); - modules.insert( + MODULES.as_mut().unwrap().insert( module, ModuleDump { content: Rc::new(image.to_string()), @@ -251,8 +264,7 @@ unsafe fn cuModuleGetFunction( } else { None }; - let kernel_args_map = KERNELS.get_or_insert_with(|| HashMap::new()); - kernel_args_map.insert( + KERNELS.as_mut().unwrap().insert( *hfunc, KernelDump { module_content: module_dump.content.clone(), @@ -273,15 +285,59 @@ unsafe fn cuModuleGetFunction( } #[allow(non_snake_case)] +pub unsafe fn cuMemAlloc( + dptr: *mut CUdeviceptr, + bytesize: usize, + cont: impl FnOnce(*mut CUdeviceptr, usize) -> CUresult, +) -> CUresult { + cuMemAlloc_impl(false, dptr, bytesize, cont) +} + +#[allow(non_snake_case)] pub unsafe fn cuMemAlloc_v2( dptr: *mut CUdeviceptr, bytesize: usize, cont: impl FnOnce(*mut CUdeviceptr, usize) -> CUresult, ) -> CUresult { + cuMemAlloc_impl(true, dptr, bytesize, cont) +} + +#[allow(non_snake_case)] +pub unsafe fn cuMemAlloc_impl( + is_v2: bool, + dptr: *mut CUdeviceptr, + bytesize: usize, + cont: impl FnOnce(*mut CUdeviceptr, usize) -> CUresult, +) -> CUresult { let result = cont(dptr, bytesize); assert_eq!(result, CUresult::CUDA_SUCCESS); let start = (*dptr).0 as usize; - BUFFERS.push((start, bytesize)); + let location = if is_v2 { + AllocLocation::DeviceV2 + } else { + AllocLocation::Device + }; + BUFFERS + .as_mut() + .unwrap() + .insert(start, (bytesize, location)); + CUresult::CUDA_SUCCESS +} + +#[allow(non_snake_case)] +pub unsafe fn cuMemHostAlloc( + pp: *mut *mut c_void, + bytesize: usize, + flags: c_uint, + cont: impl FnOnce(*mut *mut c_void, usize, c_uint) -> CUresult, +) -> CUresult { + let result = cont(pp, bytesize, flags); + assert_eq!(result, CUresult::CUDA_SUCCESS); + let start = (*pp) as usize; + BUFFERS + .as_mut() + .unwrap() + .insert(start, (bytesize, AllocLocation::Host)); CUresult::CUDA_SUCCESS } @@ -330,6 +386,7 @@ pub unsafe fn cuLaunchKernel( blockDimZ, sharedMemBytes, kernelParams, + extra, dump_env, ) .unwrap_or_else(|err| os_log!("{}", err)); @@ -353,6 +410,7 @@ pub unsafe fn cuLaunchKernel( if let Some((_, kernel_dump)) = &dump_env { dump_arguments( kernelParams, + extra, "post", &kernel_dump.name, LAUNCH_COUNTER, @@ -423,6 +481,7 @@ unsafe fn dump_pre_data( blockDimZ: ::std::os::raw::c_uint, sharedMemBytes: ::std::os::raw::c_uint, kernelParams: *mut *mut ::std::os::raw::c_void, + extra: *mut *mut ::std::os::raw::c_void, (dump_dir, kernel_dump): &(PathBuf, &'static KernelDump), ) -> Result<(), Box<dyn Error>> { dump_launch_arguments( @@ -441,6 +500,7 @@ unsafe fn dump_pre_data( module_file.write_all(kernel_dump.module_content.as_bytes())?; dump_arguments( kernelParams, + extra, "pre", &kernel_dump.name, LAUNCH_COUNTER, @@ -449,8 +509,9 @@ unsafe fn dump_pre_data( Ok(()) } -unsafe fn dump_arguments( +fn dump_arguments( kernel_params: *mut *mut ::std::os::raw::c_void, + extra: *mut *mut ::std::os::raw::c_void, prefix: &str, kernel_name: &str, counter: usize, @@ -467,33 +528,115 @@ unsafe fn dump_arguments( fs::remove_dir_all(&dump_dir)?; } fs::create_dir_all(&dump_dir)?; - for (i, arg_len) in args.iter().enumerate() { - let dev_ptr = *(*kernel_params.add(i) as *mut usize); - match BUFFERS.iter().find(|(start, _)| *start == dev_ptr as usize) { - Some((start, len)) => { - let mut output = vec![0u8; *len]; - let error = - cuda::cuMemcpyDtoH_v2(output.as_mut_ptr() as *mut _, CUdeviceptr(*start), *len); - assert_eq!(error, CUresult::CUDA_SUCCESS); - let mut path = dump_dir.clone(); - path.push(format!("arg_{:03}.buffer", i)); - let mut file = File::create(path)?; - file.write_all(&mut output)?; + if kernel_params != ptr::null_mut() { + for (i, arg_len) in args.iter().enumerate() { + unsafe { dump_argument_to_file(&dump_dir, i, *arg_len, *kernel_params.add(i))? }; + } + } else { + let mut offset = 0; + let mut buffer_ptr = None; + let mut buffer_size = None; + loop { + match unsafe { *extra.add(offset) } { + CU_LAUNCH_PARAM_END => break, + CU_LAUNCH_PARAM_BUFFER_POINTER => { + buffer_ptr = Some(unsafe { *extra.add(offset + 1) as *mut u8 }); + } + CU_LAUNCH_PARAM_BUFFER_SIZE => { + buffer_size = Some(unsafe { *(*extra.add(offset + 1) as *mut usize) }); + } + _ => return Err("Malformed `extra` parameter to kernel launch")?, } - None => { - let mut path = dump_dir.clone(); - path.push(format!("arg_{:03}", i)); - let mut file = File::create(path)?; - file.write_all(slice::from_raw_parts( - *kernel_params.add(i) as *mut u8, - *arg_len, - ))?; + offset += 2; + } + match (buffer_size, buffer_ptr) { + (Some(buffer_size), Some(buffer_ptr)) => { + let sum_of_kernel_argument_sizes = args.iter().fold(0, |offset, size_of_arg| { + size_of_arg + round_up_to_multiple(offset, *size_of_arg) + }); + if buffer_size != sum_of_kernel_argument_sizes { + return Err("Malformed `extra` parameter to kernel launch")?; + } + let mut offset = 0; + for (i, arg_size) in args.iter().enumerate() { + let buffer_offset = round_up_to_multiple(offset, *arg_size); + unsafe { + dump_argument_to_file( + &dump_dir, + i, + *arg_size, + buffer_ptr.add(buffer_offset) as *const _, + )? + }; + offset = buffer_offset + *arg_size; + } } + _ => return Err("Malformed `extra` parameter to kernel launch")?, + } + } + Ok(()) +} + +fn round_up_to_multiple(x: usize, multiple: usize) -> usize { + ((x + multiple - 1) / multiple) * multiple +} + +unsafe fn dump_argument_to_file( + dump_dir: &PathBuf, + i: usize, + arg_len: usize, + ptr: *const c_void, +) -> Result<(), Box<dyn Error>> { + // Don't check if arg_len == sizeof(void*), there are libraries + // which for some reason pass 32 pointers (4 bytes) in 8 byte arguments + match get_buffer_length(*(ptr as *mut usize)) { + Some((start, len, location)) => { + let mut output = vec![0u8; len]; + let memcpy_fn = match location { + AllocLocation::Device => |src, dst: usize, len| { + let error = cuda::cuMemcpyDtoH(dst as *mut _, CUdeviceptr(src), len); + assert_eq!(error, CUresult::CUDA_SUCCESS); + }, + AllocLocation::DeviceV2 => |src, dst: usize, len| { + let error = cuda::cuMemcpyDtoH_v2(dst as *mut _, CUdeviceptr(src), len); + assert_eq!(error, CUresult::CUDA_SUCCESS); + }, + AllocLocation::Host => |src, dst: usize, len| { + ptr::copy_nonoverlapping(src as *mut u8, dst as *mut u8, len); + }, + }; + memcpy_fn(start, output.as_mut_ptr() as usize, len); + let mut path = dump_dir.clone(); + path.push(format!("arg_{:03}.buffer", i)); + let mut file = File::create(path)?; + file.write_all(&mut output)?; + } + None => { + let mut path = dump_dir.clone(); + path.push(format!("arg_{:03}", i)); + let mut file = File::create(path)?; + file.write_all(slice::from_raw_parts(ptr as *mut u8, arg_len))?; } } Ok(()) } +unsafe fn get_buffer_length(ptr: usize) -> Option<(usize, usize, AllocLocation)> { + BUFFERS + .as_mut() + .unwrap() + .range(..=ptr) + .next_back() + .and_then(|(start, (len, loc))| { + let end = *start + *len; + if ptr < end { + Some((ptr, end - ptr, *loc)) + } else { + None + } + }) +} + fn get_dump_dir() -> Result<PathBuf, Box<dyn Error>> { let dir = env::var("ZLUDA_DUMP_DIR")?; let mut main_dir = PathBuf::from(dir); |