aboutsummaryrefslogtreecommitdiffhomepage
path: root/zluda_dump
diff options
context:
space:
mode:
authorAndrzej Janik <[email protected]>2021-04-12 00:18:27 +0200
committerAndrzej Janik <[email protected]>2021-04-12 00:18:27 +0200
commit96f95d59ce23497f00340c5b7ba6cac41d6de69f (patch)
tree5217467b6e3b4b09b582021e6d07560e3ada4eef /zluda_dump
parenta39dda67d1fb3897c5ea778ae00c4079e8e2939a (diff)
downloadZLUDA-96f95d59ce23497f00340c5b7ba6cac41d6de69f.tar.gz
ZLUDA-96f95d59ce23497f00340c5b7ba6cac41d6de69f.zip
Make zluda_dump more robust
Diffstat (limited to 'zluda_dump')
-rw-r--r--zluda_dump/src/cuda.rs330
-rw-r--r--zluda_dump/src/lib.rs199
2 files changed, 500 insertions, 29 deletions
diff --git a/zluda_dump/src/cuda.rs b/zluda_dump/src/cuda.rs
index 05e1813..3f78b14 100644
--- a/zluda_dump/src/cuda.rs
+++ b/zluda_dump/src/cuda.rs
@@ -2342,6 +2342,9 @@ extern_redirect! {
) -> CUresult;
}
extern_redirect! {
+ pub fn cuDeviceTotalMem(bytes: *mut usize, dev: CUdevice) -> CUresult;
+}
+extern_redirect! {
pub fn cuDeviceTotalMem_v2(bytes: *mut usize, dev: CUdevice) -> CUresult;
}
extern_redirect! {
@@ -2406,6 +2409,13 @@ extern_redirect! {
pub fn cuDevicePrimaryCtxReset_v2(dev: CUdevice) -> CUresult;
}
extern_redirect! {
+ pub fn cuCtxCreate(
+ pctx: *mut CUcontext,
+ flags: ::std::os::raw::c_uint,
+ dev: CUdevice,
+ ) -> CUresult;
+}
+extern_redirect! {
pub fn cuCtxCreate_v2(
pctx: *mut CUcontext,
flags: ::std::os::raw::c_uint,
@@ -2413,12 +2423,21 @@ extern_redirect! {
) -> CUresult;
}
extern_redirect! {
+ pub fn cuCtxDestroy(ctx: CUcontext) -> CUresult;
+}
+extern_redirect! {
pub fn cuCtxDestroy_v2(ctx: CUcontext) -> CUresult;
}
extern_redirect! {
+ pub fn cuCtxPushCurrent(ctx: CUcontext) -> CUresult;
+}
+extern_redirect! {
pub fn cuCtxPushCurrent_v2(ctx: CUcontext) -> CUresult;
}
extern_redirect! {
+ pub fn cuCtxPopCurrent(pctx: *mut CUcontext) -> CUresult;
+}
+extern_redirect! {
pub fn cuCtxPopCurrent_v2(pctx: *mut CUcontext) -> CUresult;
}
extern_redirect! {
@@ -2510,6 +2529,14 @@ extern_redirect_with! {
super::cuModuleGetFunction;
}
extern_redirect! {
+ pub fn cuModuleGetGlobal(
+ dptr: *mut CUdeviceptr,
+ bytes: *mut usize,
+ hmod: CUmodule,
+ name: *const ::std::os::raw::c_char,
+ ) -> CUresult;
+}
+extern_redirect! {
pub fn cuModuleGetGlobal_v2(
dptr: *mut CUdeviceptr,
bytes: *mut usize,
@@ -2532,6 +2559,14 @@ extern_redirect! {
) -> CUresult;
}
extern_redirect! {
+ pub fn cuLinkCreate(
+ numOptions: ::std::os::raw::c_uint,
+ options: *mut CUjit_option,
+ optionValues: *mut *mut ::std::os::raw::c_void,
+ stateOut: *mut CUlinkState,
+ ) -> CUresult;
+}
+extern_redirect! {
pub fn cuLinkCreate_v2(
numOptions: ::std::os::raw::c_uint,
options: *mut CUjit_option,
@@ -2540,6 +2575,18 @@ extern_redirect! {
) -> CUresult;
}
extern_redirect! {
+ pub fn cuLinkAddData(
+ state: CUlinkState,
+ type_: CUjitInputType,
+ data: *mut ::std::os::raw::c_void,
+ size: usize,
+ name: *const ::std::os::raw::c_char,
+ numOptions: ::std::os::raw::c_uint,
+ options: *mut CUjit_option,
+ optionValues: *mut *mut ::std::os::raw::c_void,
+ ) -> CUresult;
+}
+extern_redirect! {
pub fn cuLinkAddData_v2(
state: CUlinkState,
type_: CUjitInputType,
@@ -2552,6 +2599,16 @@ extern_redirect! {
) -> CUresult;
}
extern_redirect! {
+ pub fn cuLinkAddFile(
+ state: CUlinkState,
+ type_: CUjitInputType,
+ path: *const ::std::os::raw::c_char,
+ numOptions: ::std::os::raw::c_uint,
+ options: *mut CUjit_option,
+ optionValues: *mut *mut ::std::os::raw::c_void,
+ ) -> CUresult;
+}
+extern_redirect! {
pub fn cuLinkAddFile_v2(
state: CUlinkState,
type_: CUjitInputType,
@@ -2572,13 +2629,29 @@ extern_redirect! {
pub fn cuLinkDestroy(state: CUlinkState) -> CUresult;
}
extern_redirect! {
+ pub fn cuMemGetInfo(free: *mut usize, total: *mut usize) -> CUresult;
+}
+extern_redirect! {
pub fn cuMemGetInfo_v2(free: *mut usize, total: *mut usize) -> CUresult;
}
extern_redirect_with! {
+ pub fn cuMemAlloc(dptr: *mut CUdeviceptr, bytesize: usize) -> CUresult;
+ super::cuMemAlloc;
+}
+extern_redirect_with! {
pub fn cuMemAlloc_v2(dptr: *mut CUdeviceptr, bytesize: usize) -> CUresult;
super::cuMemAlloc_v2;
}
extern_redirect! {
+ pub fn cuMemAllocPitch(
+ dptr: *mut CUdeviceptr,
+ pPitch: *mut usize,
+ WidthInBytes: usize,
+ Height: usize,
+ ElementSizeBytes: ::std::os::raw::c_uint,
+ ) -> CUresult;
+}
+extern_redirect! {
pub fn cuMemAllocPitch_v2(
dptr: *mut CUdeviceptr,
pPitch: *mut usize,
@@ -2588,9 +2661,19 @@ extern_redirect! {
) -> CUresult;
}
extern_redirect! {
+ pub fn cuMemFree(dptr: CUdeviceptr) -> CUresult;
+}
+extern_redirect! {
pub fn cuMemFree_v2(dptr: CUdeviceptr) -> CUresult;
}
extern_redirect! {
+ pub fn cuMemGetAddressRange(
+ pbase: *mut CUdeviceptr,
+ psize: *mut usize,
+ dptr: CUdeviceptr,
+ ) -> CUresult;
+}
+extern_redirect! {
pub fn cuMemGetAddressRange_v2(
pbase: *mut CUdeviceptr,
psize: *mut usize,
@@ -2598,17 +2681,28 @@ extern_redirect! {
) -> CUresult;
}
extern_redirect! {
+ pub fn cuMemAllocHost(pp: *mut *mut ::std::os::raw::c_void, bytesize: usize) -> CUresult;
+}
+extern_redirect! {
pub fn cuMemAllocHost_v2(pp: *mut *mut ::std::os::raw::c_void, bytesize: usize) -> CUresult;
}
extern_redirect! {
pub fn cuMemFreeHost(p: *mut ::std::os::raw::c_void) -> CUresult;
}
-extern_redirect! {
+extern_redirect_with! {
pub fn cuMemHostAlloc(
pp: *mut *mut ::std::os::raw::c_void,
bytesize: usize,
Flags: ::std::os::raw::c_uint,
) -> CUresult;
+ super::cuMemHostAlloc;
+}
+extern_redirect! {
+ pub fn cuMemHostGetDevicePointer(
+ pdptr: *mut CUdeviceptr,
+ p: *mut ::std::os::raw::c_void,
+ Flags: ::std::os::raw::c_uint,
+ ) -> CUresult;
}
extern_redirect! {
pub fn cuMemHostGetDevicePointer_v2(
@@ -2670,6 +2764,13 @@ extern_redirect! {
pub fn cuIpcCloseMemHandle(dptr: CUdeviceptr) -> CUresult;
}
extern_redirect! {
+ pub fn cuMemHostRegister(
+ p: *mut ::std::os::raw::c_void,
+ bytesize: usize,
+ Flags: ::std::os::raw::c_uint,
+ ) -> CUresult;
+}
+extern_redirect! {
pub fn cuMemHostRegister_v2(
p: *mut ::std::os::raw::c_void,
bytesize: usize,
@@ -2692,6 +2793,13 @@ extern_redirect! {
) -> CUresult;
}
extern_redirect! {
+ pub fn cuMemcpyHtoD(
+ dstDevice: CUdeviceptr,
+ srcHost: *const ::std::os::raw::c_void,
+ ByteCount: usize,
+ ) -> CUresult;
+}
+extern_redirect! {
pub fn cuMemcpyHtoD_v2(
dstDevice: CUdeviceptr,
srcHost: *const ::std::os::raw::c_void,
@@ -2699,6 +2807,13 @@ extern_redirect! {
) -> CUresult;
}
extern_redirect! {
+ pub fn cuMemcpyDtoH(
+ dstHost: *mut ::std::os::raw::c_void,
+ srcDevice: CUdeviceptr,
+ ByteCount: usize,
+ ) -> CUresult;
+}
+extern_redirect! {
pub fn cuMemcpyDtoH_v2(
dstHost: *mut ::std::os::raw::c_void,
srcDevice: CUdeviceptr,
@@ -2706,6 +2821,13 @@ extern_redirect! {
) -> CUresult;
}
extern_redirect! {
+ pub fn cuMemcpyDtoD(
+ dstDevice: CUdeviceptr,
+ srcDevice: CUdeviceptr,
+ ByteCount: usize,
+ ) -> CUresult;
+}
+extern_redirect! {
pub fn cuMemcpyDtoD_v2(
dstDevice: CUdeviceptr,
srcDevice: CUdeviceptr,
@@ -2713,6 +2835,14 @@ extern_redirect! {
) -> CUresult;
}
extern_redirect! {
+ pub fn cuMemcpyDtoA(
+ dstArray: CUarray,
+ dstOffset: usize,
+ srcDevice: CUdeviceptr,
+ ByteCount: usize,
+ ) -> CUresult;
+}
+extern_redirect! {
pub fn cuMemcpyDtoA_v2(
dstArray: CUarray,
dstOffset: usize,
@@ -2721,6 +2851,14 @@ extern_redirect! {
) -> CUresult;
}
extern_redirect! {
+ pub fn cuMemcpyAtoD(
+ dstDevice: CUdeviceptr,
+ srcArray: CUarray,
+ srcOffset: usize,
+ ByteCount: usize,
+ ) -> CUresult;
+}
+extern_redirect! {
pub fn cuMemcpyAtoD_v2(
dstDevice: CUdeviceptr,
srcArray: CUarray,
@@ -2729,6 +2867,14 @@ extern_redirect! {
) -> CUresult;
}
extern_redirect! {
+ pub fn cuMemcpyHtoA(
+ dstArray: CUarray,
+ dstOffset: usize,
+ srcHost: *const ::std::os::raw::c_void,
+ ByteCount: usize,
+ ) -> CUresult;
+}
+extern_redirect! {
pub fn cuMemcpyHtoA_v2(
dstArray: CUarray,
dstOffset: usize,
@@ -2737,6 +2883,14 @@ extern_redirect! {
) -> CUresult;
}
extern_redirect! {
+ pub fn cuMemcpyAtoH(
+ dstHost: *mut ::std::os::raw::c_void,
+ srcArray: CUarray,
+ srcOffset: usize,
+ ByteCount: usize,
+ ) -> CUresult;
+}
+extern_redirect! {
pub fn cuMemcpyAtoH_v2(
dstHost: *mut ::std::os::raw::c_void,
srcArray: CUarray,
@@ -2745,6 +2899,15 @@ extern_redirect! {
) -> CUresult;
}
extern_redirect! {
+ pub fn cuMemcpyAtoA(
+ dstArray: CUarray,
+ dstOffset: usize,
+ srcArray: CUarray,
+ srcOffset: usize,
+ ByteCount: usize,
+ ) -> CUresult;
+}
+extern_redirect! {
pub fn cuMemcpyAtoA_v2(
dstArray: CUarray,
dstOffset: usize,
@@ -2754,12 +2917,21 @@ extern_redirect! {
) -> CUresult;
}
extern_redirect! {
+ pub fn cuMemcpy2D(pCopy: *const CUDA_MEMCPY2D) -> CUresult;
+}
+extern_redirect! {
pub fn cuMemcpy2D_v2(pCopy: *const CUDA_MEMCPY2D) -> CUresult;
}
extern_redirect! {
+ pub fn cuMemcpy2DUnaligned(pCopy: *const CUDA_MEMCPY2D) -> CUresult;
+}
+extern_redirect! {
pub fn cuMemcpy2DUnaligned_v2(pCopy: *const CUDA_MEMCPY2D) -> CUresult;
}
extern_redirect! {
+ pub fn cuMemcpy3D(pCopy: *const CUDA_MEMCPY3D) -> CUresult;
+}
+extern_redirect! {
pub fn cuMemcpy3D_v2(pCopy: *const CUDA_MEMCPY3D) -> CUresult;
}
extern_redirect! {
@@ -2784,6 +2956,14 @@ extern_redirect! {
) -> CUresult;
}
extern_redirect! {
+ pub fn cuMemcpyHtoDAsync(
+ dstDevice: CUdeviceptr,
+ srcHost: *const ::std::os::raw::c_void,
+ ByteCount: usize,
+ hStream: CUstream,
+ ) -> CUresult;
+}
+extern_redirect! {
pub fn cuMemcpyHtoDAsync_v2(
dstDevice: CUdeviceptr,
srcHost: *const ::std::os::raw::c_void,
@@ -2792,6 +2972,14 @@ extern_redirect! {
) -> CUresult;
}
extern_redirect! {
+ pub fn cuMemcpyDtoHAsync(
+ dstHost: *mut ::std::os::raw::c_void,
+ srcDevice: CUdeviceptr,
+ ByteCount: usize,
+ hStream: CUstream,
+ ) -> CUresult;
+}
+extern_redirect! {
pub fn cuMemcpyDtoHAsync_v2(
dstHost: *mut ::std::os::raw::c_void,
srcDevice: CUdeviceptr,
@@ -2800,6 +2988,14 @@ extern_redirect! {
) -> CUresult;
}
extern_redirect! {
+ pub fn cuMemcpyDtoDAsync(
+ dstDevice: CUdeviceptr,
+ srcDevice: CUdeviceptr,
+ ByteCount: usize,
+ hStream: CUstream,
+ ) -> CUresult;
+}
+extern_redirect! {
pub fn cuMemcpyDtoDAsync_v2(
dstDevice: CUdeviceptr,
srcDevice: CUdeviceptr,
@@ -2808,6 +3004,15 @@ extern_redirect! {
) -> CUresult;
}
extern_redirect! {
+ pub fn cuMemcpyHtoAAsync(
+ dstArray: CUarray,
+ dstOffset: usize,
+ srcHost: *const ::std::os::raw::c_void,
+ ByteCount: usize,
+ hStream: CUstream,
+ ) -> CUresult;
+}
+extern_redirect! {
pub fn cuMemcpyHtoAAsync_v2(
dstArray: CUarray,
dstOffset: usize,
@@ -2817,6 +3022,15 @@ extern_redirect! {
) -> CUresult;
}
extern_redirect! {
+ pub fn cuMemcpyAtoHAsync(
+ dstHost: *mut ::std::os::raw::c_void,
+ srcArray: CUarray,
+ srcOffset: usize,
+ ByteCount: usize,
+ hStream: CUstream,
+ ) -> CUresult;
+}
+extern_redirect! {
pub fn cuMemcpyAtoHAsync_v2(
dstHost: *mut ::std::os::raw::c_void,
srcArray: CUarray,
@@ -2826,19 +3040,36 @@ extern_redirect! {
) -> CUresult;
}
extern_redirect! {
+ pub fn cuMemcpy2DAsync(pCopy: *const CUDA_MEMCPY2D, hStream: CUstream) -> CUresult;
+}
+extern_redirect! {
pub fn cuMemcpy2DAsync_v2(pCopy: *const CUDA_MEMCPY2D, hStream: CUstream) -> CUresult;
}
extern_redirect! {
+ pub fn cuMemcpy3DAsync(pCopy: *const CUDA_MEMCPY3D, hStream: CUstream) -> CUresult;
+}
+extern_redirect! {
pub fn cuMemcpy3DAsync_v2(pCopy: *const CUDA_MEMCPY3D, hStream: CUstream) -> CUresult;
}
extern_redirect! {
pub fn cuMemcpy3DPeerAsync(pCopy: *const CUDA_MEMCPY3D_PEER, hStream: CUstream) -> CUresult;
}
extern_redirect! {
+ pub fn cuMemsetD8(dstDevice: CUdeviceptr, uc: ::std::os::raw::c_uchar, N: usize)
+ -> CUresult;
+}
+extern_redirect! {
pub fn cuMemsetD8_v2(dstDevice: CUdeviceptr, uc: ::std::os::raw::c_uchar, N: usize)
-> CUresult;
}
extern_redirect! {
+ pub fn cuMemsetD16(
+ dstDevice: CUdeviceptr,
+ us: ::std::os::raw::c_ushort,
+ N: usize,
+ ) -> CUresult;
+}
+extern_redirect! {
pub fn cuMemsetD16_v2(
dstDevice: CUdeviceptr,
us: ::std::os::raw::c_ushort,
@@ -2846,10 +3077,23 @@ extern_redirect! {
) -> CUresult;
}
extern_redirect! {
+ pub fn cuMemsetD32(dstDevice: CUdeviceptr, ui: ::std::os::raw::c_uint, N: usize)
+ -> CUresult;
+}
+extern_redirect! {
pub fn cuMemsetD32_v2(dstDevice: CUdeviceptr, ui: ::std::os::raw::c_uint, N: usize)
-> CUresult;
}
extern_redirect! {
+ pub fn cuMemsetD2D8(
+ dstDevice: CUdeviceptr,
+ dstPitch: usize,
+ uc: ::std::os::raw::c_uchar,
+ Width: usize,
+ Height: usize,
+ ) -> CUresult;
+}
+extern_redirect! {
pub fn cuMemsetD2D8_v2(
dstDevice: CUdeviceptr,
dstPitch: usize,
@@ -2859,6 +3103,15 @@ extern_redirect! {
) -> CUresult;
}
extern_redirect! {
+ pub fn cuMemsetD2D16(
+ dstDevice: CUdeviceptr,
+ dstPitch: usize,
+ us: ::std::os::raw::c_ushort,
+ Width: usize,
+ Height: usize,
+ ) -> CUresult;
+}
+extern_redirect! {
pub fn cuMemsetD2D16_v2(
dstDevice: CUdeviceptr,
dstPitch: usize,
@@ -2868,6 +3121,15 @@ extern_redirect! {
) -> CUresult;
}
extern_redirect! {
+ pub fn cuMemsetD2D32(
+ dstDevice: CUdeviceptr,
+ dstPitch: usize,
+ ui: ::std::os::raw::c_uint,
+ Width: usize,
+ Height: usize,
+ ) -> CUresult;
+}
+extern_redirect! {
pub fn cuMemsetD2D32_v2(
dstDevice: CUdeviceptr,
dstPitch: usize,
@@ -2931,12 +3193,24 @@ extern_redirect! {
) -> CUresult;
}
extern_redirect! {
+ pub fn cuArrayCreate(
+ pHandle: *mut CUarray,
+ pAllocateArray: *const CUDA_ARRAY_DESCRIPTOR,
+ ) -> CUresult;
+}
+extern_redirect! {
pub fn cuArrayCreate_v2(
pHandle: *mut CUarray,
pAllocateArray: *const CUDA_ARRAY_DESCRIPTOR,
) -> CUresult;
}
extern_redirect! {
+ pub fn cuArrayGetDescriptor(
+ pArrayDescriptor: *mut CUDA_ARRAY_DESCRIPTOR,
+ hArray: CUarray,
+ ) -> CUresult;
+}
+extern_redirect! {
pub fn cuArrayGetDescriptor_v2(
pArrayDescriptor: *mut CUDA_ARRAY_DESCRIPTOR,
hArray: CUarray,
@@ -2958,12 +3232,24 @@ extern_redirect! {
pub fn cuArrayDestroy(hArray: CUarray) -> CUresult;
}
extern_redirect! {
+ pub fn cuArray3DCreate(
+ pHandle: *mut CUarray,
+ pAllocateArray: *const CUDA_ARRAY3D_DESCRIPTOR,
+ ) -> CUresult;
+}
+extern_redirect! {
pub fn cuArray3DCreate_v2(
pHandle: *mut CUarray,
pAllocateArray: *const CUDA_ARRAY3D_DESCRIPTOR,
) -> CUresult;
}
extern_redirect! {
+ pub fn cuArray3DGetDescriptor(
+ pArrayDescriptor: *mut CUDA_ARRAY3D_DESCRIPTOR,
+ hArray: CUarray,
+ ) -> CUresult;
+}
+extern_redirect! {
pub fn cuArray3DGetDescriptor_v2(
pArrayDescriptor: *mut CUDA_ARRAY3D_DESCRIPTOR,
hArray: CUarray,
@@ -3170,6 +3456,9 @@ extern_redirect! {
) -> CUresult;
}
extern_redirect! {
+ pub fn cuStreamBeginCapture(hStream: CUstream, mode: CUstreamCaptureMode) -> CUresult;
+}
+extern_redirect! {
pub fn cuStreamBeginCapture_v2(hStream: CUstream, mode: CUstreamCaptureMode) -> CUresult;
}
extern_redirect! {
@@ -3206,6 +3495,9 @@ extern_redirect! {
pub fn cuStreamSynchronize(hStream: CUstream) -> CUresult;
}
extern_redirect! {
+ pub fn cuStreamDestroy(hStream: CUstream) -> CUresult;
+}
+extern_redirect! {
pub fn cuStreamDestroy_v2(hStream: CUstream) -> CUresult;
}
extern_redirect! {
@@ -3245,6 +3537,9 @@ extern_redirect! {
pub fn cuEventSynchronize(hEvent: CUevent) -> CUresult;
}
extern_redirect! {
+ pub fn cuEventDestroy(hEvent: CUevent) -> CUresult;
+}
+extern_redirect! {
pub fn cuEventDestroy_v2(hEvent: CUevent) -> CUresult;
}
extern_redirect! {
@@ -3667,6 +3962,15 @@ extern_redirect! {
pub fn cuGraphDestroyNode(hNode: CUgraphNode) -> CUresult;
}
extern_redirect! {
+ pub fn cuGraphInstantiate(
+ phGraphExec: *mut CUgraphExec,
+ hGraph: CUgraph,
+ phErrorNode: *mut CUgraphNode,
+ logBuffer: *mut ::std::os::raw::c_char,
+ bufferSize: usize,
+ ) -> CUresult;
+}
+extern_redirect! {
pub fn cuGraphInstantiate_v2(
phGraphExec: *mut CUgraphExec,
hGraph: CUgraph,
@@ -3824,6 +4128,14 @@ extern_redirect! {
) -> CUresult;
}
extern_redirect! {
+ pub fn cuTexRefSetAddress(
+ ByteOffset: *mut usize,
+ hTexRef: CUtexref,
+ dptr: CUdeviceptr,
+ bytes: usize,
+ ) -> CUresult;
+}
+extern_redirect! {
pub fn cuTexRefSetAddress_v2(
ByteOffset: *mut usize,
hTexRef: CUtexref,
@@ -3882,6 +4194,9 @@ extern_redirect! {
pub fn cuTexRefSetFlags(hTexRef: CUtexref, Flags: ::std::os::raw::c_uint) -> CUresult;
}
extern_redirect! {
+ pub fn cuTexRefGetAddress(pdptr: *mut CUdeviceptr, hTexRef: CUtexref) -> CUresult;
+}
+extern_redirect! {
pub fn cuTexRefGetAddress_v2(pdptr: *mut CUdeviceptr, hTexRef: CUtexref) -> CUresult;
}
extern_redirect! {
@@ -4035,6 +4350,13 @@ extern_redirect! {
) -> CUresult;
}
extern_redirect! {
+ pub fn cuGraphicsResourceGetMappedPointer(
+ pDevPtr: *mut CUdeviceptr,
+ pSize: *mut usize,
+ resource: CUgraphicsResource,
+ ) -> CUresult;
+}
+extern_redirect! {
pub fn cuGraphicsResourceGetMappedPointer_v2(
pDevPtr: *mut CUdeviceptr,
pSize: *mut usize,
@@ -4042,6 +4364,12 @@ extern_redirect! {
) -> CUresult;
}
extern_redirect! {
+ pub fn cuGraphicsResourceSetMapFlags(
+ resource: CUgraphicsResource,
+ flags: ::std::os::raw::c_uint,
+ ) -> CUresult;
+}
+extern_redirect! {
pub fn cuGraphicsResourceSetMapFlags_v2(
resource: CUgraphicsResource,
flags: ::std::os::raw::c_uint,
diff --git a/zluda_dump/src/lib.rs b/zluda_dump/src/lib.rs
index c10057d..7387dcc 100644
--- a/zluda_dump/src/lib.rs
+++ b/zluda_dump/src/lib.rs
@@ -1,5 +1,5 @@
use std::{
- collections::HashMap,
+ collections::{BTreeMap, HashMap},
env,
error::Error,
ffi::{c_void, CStr},
@@ -24,6 +24,10 @@ use regex::Regex;
#[cfg_attr(not(windows), path = "os_unix.rs")]
mod os;
+const CU_LAUNCH_PARAM_END: *mut c_void = 0 as *mut _;
+const CU_LAUNCH_PARAM_BUFFER_POINTER: *mut c_void = 1 as *mut _;
+const CU_LAUNCH_PARAM_BUFFER_SIZE: *mut c_void = 2 as *mut _;
+
macro_rules! extern_redirect {
(pub fn $fn_name:ident ( $($arg_id:ident: $arg_type:ty),* $(,)? ) -> $ret_type:ty ;) => {
#[no_mangle]
@@ -68,11 +72,18 @@ mod cuda;
pub static mut LIBCUDA_HANDLE: *mut c_void = ptr::null_mut();
pub static mut MODULES: Option<HashMap<CUmodule, ModuleDump>> = None;
pub static mut KERNELS: Option<HashMap<CUfunction, KernelDump>> = None;
-pub static mut BUFFERS: Vec<(usize, usize)> = Vec::new();
+static mut BUFFERS: Option<BTreeMap<usize, (usize, AllocLocation)>> = None;
pub static mut LAUNCH_COUNTER: usize = 0;
pub static mut KERNEL_PATTERN: Option<Regex> = None;
pub static mut OVERRIDE_COMPUTE_CAPABILITY_MAJOR: Option<i32> = None;
+#[derive(Clone, Copy)]
+enum AllocLocation {
+ Device,
+ DeviceV2,
+ Host,
+}
+
pub struct ModuleDump {
content: Rc<String>,
kernels_args: Option<HashMap<String, Vec<usize>>>,
@@ -88,6 +99,9 @@ pub struct KernelDump {
// it's because CUDA Runtime API does dlopen to open libcuda.so, which ignores LD_PRELOAD
pub unsafe fn init_libcuda_handle() {
if LIBCUDA_HANDLE == ptr::null_mut() {
+ MODULES = Some(HashMap::new());
+ KERNELS = Some(HashMap::new());
+ BUFFERS = Some(BTreeMap::new());
let libcuda_handle = os::load_cuda_library();
assert_ne!(libcuda_handle, ptr::null_mut());
LIBCUDA_HANDLE = libcuda_handle;
@@ -162,8 +176,7 @@ unsafe fn record_module_image(module: CUmodule, image: &str) {
None
}
};
- let modules = MODULES.get_or_insert_with(|| HashMap::new());
- modules.insert(
+ MODULES.as_mut().unwrap().insert(
module,
ModuleDump {
content: Rc::new(image.to_string()),
@@ -251,8 +264,7 @@ unsafe fn cuModuleGetFunction(
} else {
None
};
- let kernel_args_map = KERNELS.get_or_insert_with(|| HashMap::new());
- kernel_args_map.insert(
+ KERNELS.as_mut().unwrap().insert(
*hfunc,
KernelDump {
module_content: module_dump.content.clone(),
@@ -273,15 +285,59 @@ unsafe fn cuModuleGetFunction(
}
#[allow(non_snake_case)]
+pub unsafe fn cuMemAlloc(
+ dptr: *mut CUdeviceptr,
+ bytesize: usize,
+ cont: impl FnOnce(*mut CUdeviceptr, usize) -> CUresult,
+) -> CUresult {
+ cuMemAlloc_impl(false, dptr, bytesize, cont)
+}
+
+#[allow(non_snake_case)]
pub unsafe fn cuMemAlloc_v2(
dptr: *mut CUdeviceptr,
bytesize: usize,
cont: impl FnOnce(*mut CUdeviceptr, usize) -> CUresult,
) -> CUresult {
+ cuMemAlloc_impl(true, dptr, bytesize, cont)
+}
+
+#[allow(non_snake_case)]
+pub unsafe fn cuMemAlloc_impl(
+ is_v2: bool,
+ dptr: *mut CUdeviceptr,
+ bytesize: usize,
+ cont: impl FnOnce(*mut CUdeviceptr, usize) -> CUresult,
+) -> CUresult {
let result = cont(dptr, bytesize);
assert_eq!(result, CUresult::CUDA_SUCCESS);
let start = (*dptr).0 as usize;
- BUFFERS.push((start, bytesize));
+ let location = if is_v2 {
+ AllocLocation::DeviceV2
+ } else {
+ AllocLocation::Device
+ };
+ BUFFERS
+ .as_mut()
+ .unwrap()
+ .insert(start, (bytesize, location));
+ CUresult::CUDA_SUCCESS
+}
+
+#[allow(non_snake_case)]
+pub unsafe fn cuMemHostAlloc(
+ pp: *mut *mut c_void,
+ bytesize: usize,
+ flags: c_uint,
+ cont: impl FnOnce(*mut *mut c_void, usize, c_uint) -> CUresult,
+) -> CUresult {
+ let result = cont(pp, bytesize, flags);
+ assert_eq!(result, CUresult::CUDA_SUCCESS);
+ let start = (*pp) as usize;
+ BUFFERS
+ .as_mut()
+ .unwrap()
+ .insert(start, (bytesize, AllocLocation::Host));
CUresult::CUDA_SUCCESS
}
@@ -330,6 +386,7 @@ pub unsafe fn cuLaunchKernel(
blockDimZ,
sharedMemBytes,
kernelParams,
+ extra,
dump_env,
)
.unwrap_or_else(|err| os_log!("{}", err));
@@ -353,6 +410,7 @@ pub unsafe fn cuLaunchKernel(
if let Some((_, kernel_dump)) = &dump_env {
dump_arguments(
kernelParams,
+ extra,
"post",
&kernel_dump.name,
LAUNCH_COUNTER,
@@ -423,6 +481,7 @@ unsafe fn dump_pre_data(
blockDimZ: ::std::os::raw::c_uint,
sharedMemBytes: ::std::os::raw::c_uint,
kernelParams: *mut *mut ::std::os::raw::c_void,
+ extra: *mut *mut ::std::os::raw::c_void,
(dump_dir, kernel_dump): &(PathBuf, &'static KernelDump),
) -> Result<(), Box<dyn Error>> {
dump_launch_arguments(
@@ -441,6 +500,7 @@ unsafe fn dump_pre_data(
module_file.write_all(kernel_dump.module_content.as_bytes())?;
dump_arguments(
kernelParams,
+ extra,
"pre",
&kernel_dump.name,
LAUNCH_COUNTER,
@@ -449,8 +509,9 @@ unsafe fn dump_pre_data(
Ok(())
}
-unsafe fn dump_arguments(
+fn dump_arguments(
kernel_params: *mut *mut ::std::os::raw::c_void,
+ extra: *mut *mut ::std::os::raw::c_void,
prefix: &str,
kernel_name: &str,
counter: usize,
@@ -467,33 +528,115 @@ unsafe fn dump_arguments(
fs::remove_dir_all(&dump_dir)?;
}
fs::create_dir_all(&dump_dir)?;
- for (i, arg_len) in args.iter().enumerate() {
- let dev_ptr = *(*kernel_params.add(i) as *mut usize);
- match BUFFERS.iter().find(|(start, _)| *start == dev_ptr as usize) {
- Some((start, len)) => {
- let mut output = vec![0u8; *len];
- let error =
- cuda::cuMemcpyDtoH_v2(output.as_mut_ptr() as *mut _, CUdeviceptr(*start), *len);
- assert_eq!(error, CUresult::CUDA_SUCCESS);
- let mut path = dump_dir.clone();
- path.push(format!("arg_{:03}.buffer", i));
- let mut file = File::create(path)?;
- file.write_all(&mut output)?;
+ if kernel_params != ptr::null_mut() {
+ for (i, arg_len) in args.iter().enumerate() {
+ unsafe { dump_argument_to_file(&dump_dir, i, *arg_len, *kernel_params.add(i))? };
+ }
+ } else {
+ let mut offset = 0;
+ let mut buffer_ptr = None;
+ let mut buffer_size = None;
+ loop {
+ match unsafe { *extra.add(offset) } {
+ CU_LAUNCH_PARAM_END => break,
+ CU_LAUNCH_PARAM_BUFFER_POINTER => {
+ buffer_ptr = Some(unsafe { *extra.add(offset + 1) as *mut u8 });
+ }
+ CU_LAUNCH_PARAM_BUFFER_SIZE => {
+ buffer_size = Some(unsafe { *(*extra.add(offset + 1) as *mut usize) });
+ }
+ _ => return Err("Malformed `extra` parameter to kernel launch")?,
}
- None => {
- let mut path = dump_dir.clone();
- path.push(format!("arg_{:03}", i));
- let mut file = File::create(path)?;
- file.write_all(slice::from_raw_parts(
- *kernel_params.add(i) as *mut u8,
- *arg_len,
- ))?;
+ offset += 2;
+ }
+ match (buffer_size, buffer_ptr) {
+ (Some(buffer_size), Some(buffer_ptr)) => {
+ let sum_of_kernel_argument_sizes = args.iter().fold(0, |offset, size_of_arg| {
+ size_of_arg + round_up_to_multiple(offset, *size_of_arg)
+ });
+ if buffer_size != sum_of_kernel_argument_sizes {
+ return Err("Malformed `extra` parameter to kernel launch")?;
+ }
+ let mut offset = 0;
+ for (i, arg_size) in args.iter().enumerate() {
+ let buffer_offset = round_up_to_multiple(offset, *arg_size);
+ unsafe {
+ dump_argument_to_file(
+ &dump_dir,
+ i,
+ *arg_size,
+ buffer_ptr.add(buffer_offset) as *const _,
+ )?
+ };
+ offset = buffer_offset + *arg_size;
+ }
}
+ _ => return Err("Malformed `extra` parameter to kernel launch")?,
+ }
+ }
+ Ok(())
+}
+
+fn round_up_to_multiple(x: usize, multiple: usize) -> usize {
+ ((x + multiple - 1) / multiple) * multiple
+}
+
+unsafe fn dump_argument_to_file(
+ dump_dir: &PathBuf,
+ i: usize,
+ arg_len: usize,
+ ptr: *const c_void,
+) -> Result<(), Box<dyn Error>> {
+ // Don't check if arg_len == sizeof(void*), there are libraries
+ // which for some reason pass 32 pointers (4 bytes) in 8 byte arguments
+ match get_buffer_length(*(ptr as *mut usize)) {
+ Some((start, len, location)) => {
+ let mut output = vec![0u8; len];
+ let memcpy_fn = match location {
+ AllocLocation::Device => |src, dst: usize, len| {
+ let error = cuda::cuMemcpyDtoH(dst as *mut _, CUdeviceptr(src), len);
+ assert_eq!(error, CUresult::CUDA_SUCCESS);
+ },
+ AllocLocation::DeviceV2 => |src, dst: usize, len| {
+ let error = cuda::cuMemcpyDtoH_v2(dst as *mut _, CUdeviceptr(src), len);
+ assert_eq!(error, CUresult::CUDA_SUCCESS);
+ },
+ AllocLocation::Host => |src, dst: usize, len| {
+ ptr::copy_nonoverlapping(src as *mut u8, dst as *mut u8, len);
+ },
+ };
+ memcpy_fn(start, output.as_mut_ptr() as usize, len);
+ let mut path = dump_dir.clone();
+ path.push(format!("arg_{:03}.buffer", i));
+ let mut file = File::create(path)?;
+ file.write_all(&mut output)?;
+ }
+ None => {
+ let mut path = dump_dir.clone();
+ path.push(format!("arg_{:03}", i));
+ let mut file = File::create(path)?;
+ file.write_all(slice::from_raw_parts(ptr as *mut u8, arg_len))?;
}
}
Ok(())
}
+unsafe fn get_buffer_length(ptr: usize) -> Option<(usize, usize, AllocLocation)> {
+ BUFFERS
+ .as_mut()
+ .unwrap()
+ .range(..=ptr)
+ .next_back()
+ .and_then(|(start, (len, loc))| {
+ let end = *start + *len;
+ if ptr < end {
+ Some((ptr, end - ptr, *loc))
+ } else {
+ None
+ }
+ })
+}
+
fn get_dump_dir() -> Result<PathBuf, Box<dyn Error>> {
let dir = env::var("ZLUDA_DUMP_DIR")?;
let mut main_dir = PathBuf::from(dir);