diff options
author | Andrzej Janik <[email protected]> | 2020-09-27 13:14:19 +0200 |
---|---|---|
committer | Andrzej Janik <[email protected]> | 2020-09-27 13:14:19 +0200 |
commit | e0190fcbe19e9554ccc2fb0d72685569823224ef (patch) | |
tree | c396a59b3080c0bdfbf308742e4f53caf48b5030 | |
parent | 42bcd999eb2caec0046aa76d12ec7e73919495fc (diff) | |
download | ZLUDA-e0190fcbe19e9554ccc2fb0d72685569823224ef.tar.gz ZLUDA-e0190fcbe19e9554ccc2fb0d72685569823224ef.zip |
Add missing support for Milestone 1
-rw-r--r-- | level_zero/src/ze.rs | 5 | ||||
-rw-r--r-- | notcuda/src/cuda.rs | 881 | ||||
-rw-r--r-- | notcuda/src/impl/function.rs | 52 | ||||
-rw-r--r-- | notcuda/src/impl/memory.rs | 4 | ||||
-rw-r--r-- | notcuda/src/impl/mod.rs | 12 | ||||
-rw-r--r-- | notcuda/src/impl/module.rs | 51 | ||||
-rw-r--r-- | notcuda/src/impl/stream.rs | 69 | ||||
-rw-r--r-- | notcuda/src/impl/test.rs | 14 | ||||
-rw-r--r-- | ptx/src/ast.rs | 18 | ||||
-rw-r--r-- | ptx/src/ptx.lalrpop | 18 | ||||
-rw-r--r-- | ptx/src/test/mod.rs | 7 | ||||
-rw-r--r-- | ptx/src/test/spirv_run/mad_s32.ptx | 28 | ||||
-rw-r--r-- | ptx/src/test/spirv_run/mad_s32.spvtxt | 77 | ||||
-rw-r--r-- | ptx/src/test/spirv_run/mod.rs | 10 | ||||
-rw-r--r-- | ptx/src/test/spirv_run/mul_wide.ptx | 24 | ||||
-rw-r--r-- | ptx/src/test/spirv_run/mul_wide.spvtxt | 64 | ||||
-rw-r--r-- | ptx/src/test/vectorAdd_11.ptx | 55 | ||||
-rw-r--r-- | ptx/src/translate.rs | 210 |
18 files changed, 982 insertions, 617 deletions
diff --git a/level_zero/src/ze.rs b/level_zero/src/ze.rs index 16b9130..559805e 100644 --- a/level_zero/src/ze.rs +++ b/level_zero/src/ze.rs @@ -726,6 +726,11 @@ impl<'a> Kernel<'a> { Ok(())
}
+ pub unsafe fn set_arg_raw(&self, index: u32, size: usize, value: *const c_void) -> Result<()> {
+ check!(sys::zeKernelSetArgumentValue(self.0, index, size, value));
+ Ok(())
+ }
+
pub fn set_group_size(&self, x: u32, y: u32, z: u32) -> Result<()> {
check!(sys::zeKernelSetGroupSize(self.0, x, y, z));
Ok(())
diff --git a/notcuda/src/cuda.rs b/notcuda/src/cuda.rs index 122f0da..feeada0 100644 --- a/notcuda/src/cuda.rs +++ b/notcuda/src/cuda.rs @@ -2181,8 +2181,7 @@ impl CUgraphExecUpdateResult_enum { pub struct CUgraphExecUpdateResult_enum(pub ::std::os::raw::c_uint); pub use self::CUgraphExecUpdateResult_enum as CUgraphExecUpdateResult; -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuGetErrorString( error: CUresult, pStr: *mut *const ::std::os::raw::c_char, @@ -2190,8 +2189,7 @@ pub extern "C" fn cuGetErrorString( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuGetErrorName( error: CUresult, pStr: *mut *const ::std::os::raw::c_char, @@ -2199,33 +2197,28 @@ pub extern "C" fn cuGetErrorName( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuInit(Flags: ::std::os::raw::c_uint) -> CUresult { r#impl::init().encuda() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuDriverGetVersion(driverVersion: *mut ::std::os::raw::c_int) -> CUresult { unsafe { *driverVersion = r#impl::driver_get_version() }; CUresult::CUDA_SUCCESS } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuDeviceGet(device: *mut CUdevice, ordinal: ::std::os::raw::c_int) -> CUresult { r#impl::device::get(device.decuda(), ordinal) } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuDeviceGetCount(count: *mut ::std::os::raw::c_int) -> CUresult { r#impl::device::get_count(count) } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuDeviceGetName( name: *mut ::std::os::raw::c_char, len: ::std::os::raw::c_int, @@ -2234,20 +2227,17 @@ pub extern "C" fn cuDeviceGetName( r#impl::device::get_name(name, len, dev.decuda()).encuda() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuDeviceGetUuid(uuid: *mut CUuuid, dev: CUdevice) -> CUresult { r#impl::device::get_uuid(uuid, dev.decuda()).encuda() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuDeviceTotalMem_v2(bytes: *mut usize, dev: CUdevice) -> CUresult { r#impl::device::total_mem_v2(bytes, dev.decuda()).encuda() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuDeviceGetAttribute( pi: *mut ::std::os::raw::c_int, attrib: CUdevice_attribute, @@ -2256,8 +2246,7 @@ pub extern "C" fn cuDeviceGetAttribute( r#impl::device::get_attribute(pi, attrib, dev.decuda()).encuda() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuDeviceGetNvSciSyncAttributes( nvSciSyncAttrList: *mut ::std::os::raw::c_void, dev: CUdevice, @@ -2266,14 +2255,12 @@ pub extern "C" fn cuDeviceGetNvSciSyncAttributes( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuDeviceGetProperties(prop: *mut CUdevprop, dev: CUdevice) -> CUresult { r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuDeviceComputeCapability( major: *mut ::std::os::raw::c_int, minor: *mut ::std::os::raw::c_int, @@ -2282,20 +2269,17 @@ pub extern "C" fn cuDeviceComputeCapability( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuDevicePrimaryCtxRetain(pctx: *mut CUcontext, dev: CUdevice) -> CUresult { r#impl::device::primary_ctx_retain(pctx.decuda(), dev.decuda()).encuda() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuDevicePrimaryCtxRelease_v2(dev: CUdevice) -> CUresult { r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuDevicePrimaryCtxSetFlags_v2( dev: CUdevice, flags: ::std::os::raw::c_uint, @@ -2303,8 +2287,7 @@ pub extern "C" fn cuDevicePrimaryCtxSetFlags_v2( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuDevicePrimaryCtxGetState( dev: CUdevice, flags: *mut ::std::os::raw::c_uint, @@ -2313,14 +2296,12 @@ pub extern "C" fn cuDevicePrimaryCtxGetState( r#impl::device::primary_ctx_get_state(dev.decuda(), flags, active).encuda() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuDevicePrimaryCtxReset_v2(dev: CUdevice) -> CUresult { r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuCtxCreate_v2( pctx: *mut CUcontext, flags: ::std::os::raw::c_uint, @@ -2329,92 +2310,77 @@ pub extern "C" fn cuCtxCreate_v2( r#impl::context::create_v2(pctx.decuda(), flags, dev.decuda()).encuda() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuCtxDestroy_v2(ctx: CUcontext) -> CUresult { r#impl::context::destroy_v2(ctx.decuda()) } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuCtxPushCurrent_v2(ctx: CUcontext) -> CUresult { r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuCtxPopCurrent_v2(pctx: *mut CUcontext) -> CUresult { r#impl::context::pop_current_v2(pctx.decuda()) } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuCtxSetCurrent(ctx: CUcontext) -> CUresult { r#impl::context::set_current(ctx.decuda()) } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuCtxGetCurrent(pctx: *mut CUcontext) -> CUresult { r#impl::context::get_current(pctx.decuda()).encuda() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuCtxGetDevice(device: *mut CUdevice) -> CUresult { r#impl::context::get_device(device.decuda()) } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuCtxGetFlags(flags: *mut ::std::os::raw::c_uint) -> CUresult { r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuCtxSynchronize() -> CUresult { r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuCtxSetLimit(limit: CUlimit, value: usize) -> CUresult { r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuCtxGetLimit(pvalue: *mut usize, limit: CUlimit) -> CUresult { r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuCtxGetCacheConfig(pconfig: *mut CUfunc_cache) -> CUresult { r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuCtxSetCacheConfig(config: CUfunc_cache) -> CUresult { r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuCtxGetSharedMemConfig(pConfig: *mut CUsharedconfig) -> CUresult { r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuCtxSetSharedMemConfig(config: CUsharedconfig) -> CUresult { r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuCtxGetApiVersion( ctx: CUcontext, version: *mut ::std::os::raw::c_uint, @@ -2422,8 +2388,7 @@ pub extern "C" fn cuCtxGetApiVersion( r#impl::context::get_api_version(ctx.decuda(), version) } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuCtxGetStreamPriorityRange( leastPriority: *mut ::std::os::raw::c_int, greatestPriority: *mut ::std::os::raw::c_int, @@ -2431,26 +2396,22 @@ pub extern "C" fn cuCtxGetStreamPriorityRange( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuCtxResetPersistingL2Cache() -> CUresult { r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuCtxAttach(pctx: *mut CUcontext, flags: ::std::os::raw::c_uint) -> CUresult { r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuCtxDetach(ctx: CUcontext) -> CUresult { r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuModuleLoad( module: *mut CUmodule, fname: *const ::std::os::raw::c_char, @@ -2458,8 +2419,7 @@ pub extern "C" fn cuModuleLoad( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuModuleLoadData( module: *mut CUmodule, image: *const ::std::os::raw::c_void, @@ -2467,8 +2427,7 @@ pub extern "C" fn cuModuleLoadData( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuModuleLoadDataEx( module: *mut CUmodule, image: *const ::std::os::raw::c_void, @@ -2479,8 +2438,7 @@ pub extern "C" fn cuModuleLoadDataEx( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuModuleLoadFatBinary( module: *mut CUmodule, fatCubin: *const ::std::os::raw::c_void, @@ -2488,14 +2446,12 @@ pub extern "C" fn cuModuleLoadFatBinary( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuModuleUnload(hmod: CUmodule) -> CUresult { - r#impl::unimplemented() + r#impl::module::unload(hmod.decuda()).encuda() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuModuleGetFunction( hfunc: *mut CUfunction, hmod: CUmodule, @@ -2504,8 +2460,7 @@ pub extern "C" fn cuModuleGetFunction( r#impl::module::get_function(hfunc.decuda(), hmod.decuda(), name).encuda() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuModuleGetGlobal_v2( dptr: *mut CUdeviceptr, bytes: *mut usize, @@ -2515,8 +2470,7 @@ pub extern "C" fn cuModuleGetGlobal_v2( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuModuleGetTexRef( pTexRef: *mut CUtexref, hmod: CUmodule, @@ -2525,8 +2479,7 @@ pub extern "C" fn cuModuleGetTexRef( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuModuleGetSurfRef( pSurfRef: *mut CUsurfref, hmod: CUmodule, @@ -2535,8 +2488,7 @@ pub extern "C" fn cuModuleGetSurfRef( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuLinkCreate_v2( numOptions: ::std::os::raw::c_uint, options: *mut CUjit_option, @@ -2546,8 +2498,7 @@ pub extern "C" fn cuLinkCreate_v2( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuLinkAddData_v2( state: CUlinkState, type_: CUjitInputType, @@ -2561,8 +2512,7 @@ pub extern "C" fn cuLinkAddData_v2( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuLinkAddFile_v2( state: CUlinkState, type_: CUjitInputType, @@ -2574,8 +2524,7 @@ pub extern "C" fn cuLinkAddFile_v2( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuLinkComplete( state: CUlinkState, cubinOut: *mut *mut ::std::os::raw::c_void, @@ -2584,26 +2533,22 @@ pub extern "C" fn cuLinkComplete( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuLinkDestroy(state: CUlinkState) -> CUresult { r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuMemGetInfo_v2(free: *mut usize, total: *mut usize) -> CUresult { r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuMemAlloc_v2(dptr: *mut CUdeviceptr, bytesize: usize) -> CUresult { r#impl::memory::alloc_v2(dptr.decuda(), bytesize) } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuMemAllocPitch_v2( dptr: *mut CUdeviceptr, pPitch: *mut usize, @@ -2614,14 +2559,12 @@ pub extern "C" fn cuMemAllocPitch_v2( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuMemFree_v2(dptr: CUdeviceptr) -> CUresult { - r#impl::unimplemented() + r#impl::memory::free_v2(dptr.decuda()).encuda() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuMemGetAddressRange_v2( pbase: *mut CUdeviceptr, psize: *mut usize, @@ -2630,8 +2573,7 @@ pub extern "C" fn cuMemGetAddressRange_v2( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuMemAllocHost_v2( pp: *mut *mut ::std::os::raw::c_void, bytesize: usize, @@ -2639,14 +2581,12 @@ pub extern "C" fn cuMemAllocHost_v2( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuMemFreeHost(p: *mut ::std::os::raw::c_void) -> CUresult { r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuMemHostAlloc( pp: *mut *mut ::std::os::raw::c_void, bytesize: usize, @@ -2655,8 +2595,7 @@ pub extern "C" fn cuMemHostAlloc( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuMemHostGetDevicePointer_v2( pdptr: *mut CUdeviceptr, p: *mut ::std::os::raw::c_void, @@ -2665,8 +2604,7 @@ pub extern "C" fn cuMemHostGetDevicePointer_v2( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuMemHostGetFlags( pFlags: *mut ::std::os::raw::c_uint, p: *mut ::std::os::raw::c_void, @@ -2674,8 +2612,7 @@ pub extern "C" fn cuMemHostGetFlags( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuMemAllocManaged( dptr: *mut CUdeviceptr, bytesize: usize, @@ -2684,8 +2621,7 @@ pub extern "C" fn cuMemAllocManaged( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuDeviceGetByPCIBusId( dev: *mut CUdevice, pciBusId: *const ::std::os::raw::c_char, @@ -2693,8 +2629,7 @@ pub extern "C" fn cuDeviceGetByPCIBusId( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuDeviceGetPCIBusId( pciBusId: *mut ::std::os::raw::c_char, len: ::std::os::raw::c_int, @@ -2703,14 +2638,12 @@ pub extern "C" fn cuDeviceGetPCIBusId( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuIpcGetEventHandle(pHandle: *mut CUipcEventHandle, event: CUevent) -> CUresult { r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuIpcOpenEventHandle( phEvent: *mut CUevent, handle: CUipcEventHandle, @@ -2718,14 +2651,12 @@ pub extern "C" fn cuIpcOpenEventHandle( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuIpcGetMemHandle(pHandle: *mut CUipcMemHandle, dptr: CUdeviceptr) -> CUresult { r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuIpcOpenMemHandle( pdptr: *mut CUdeviceptr, handle: CUipcMemHandle, @@ -2734,14 +2665,12 @@ pub extern "C" fn cuIpcOpenMemHandle( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuIpcCloseMemHandle(dptr: CUdeviceptr) -> CUresult { r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuMemHostRegister_v2( p: *mut ::std::os::raw::c_void, bytesize: usize, @@ -2750,20 +2679,17 @@ pub extern "C" fn cuMemHostRegister_v2( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuMemHostUnregister(p: *mut ::std::os::raw::c_void) -> CUresult { r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuMemcpy(dst: CUdeviceptr, src: CUdeviceptr, ByteCount: usize) -> CUresult { r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuMemcpyPeer( dstDevice: CUdeviceptr, dstContext: CUcontext, @@ -2774,8 +2700,7 @@ pub extern "C" fn cuMemcpyPeer( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuMemcpyHtoD_v2( dstDevice: CUdeviceptr, srcHost: *const ::std::os::raw::c_void, @@ -2784,8 +2709,7 @@ pub extern "C" fn cuMemcpyHtoD_v2( r#impl::memory::copy_v2(dstDevice.decuda(), srcHost, ByteCount).encuda() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuMemcpyDtoH_v2( dstHost: *mut ::std::os::raw::c_void, srcDevice: CUdeviceptr, @@ -2794,8 +2718,7 @@ pub extern "C" fn cuMemcpyDtoH_v2( r#impl::memory::copy_v2(dstHost, srcDevice.decuda(), ByteCount).encuda() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuMemcpyDtoD_v2( dstDevice: CUdeviceptr, srcDevice: CUdeviceptr, @@ -2804,8 +2727,7 @@ pub extern "C" fn cuMemcpyDtoD_v2( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuMemcpyDtoA_v2( dstArray: CUarray, dstOffset: usize, @@ -2815,8 +2737,7 @@ pub extern "C" fn cuMemcpyDtoA_v2( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuMemcpyAtoD_v2( dstDevice: CUdeviceptr, srcArray: CUarray, @@ -2826,8 +2747,7 @@ pub extern "C" fn cuMemcpyAtoD_v2( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuMemcpyHtoA_v2( dstArray: CUarray, dstOffset: usize, @@ -2837,8 +2757,7 @@ pub extern "C" fn cuMemcpyHtoA_v2( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuMemcpyAtoH_v2( dstHost: *mut ::std::os::raw::c_void, srcArray: CUarray, @@ -2848,8 +2767,7 @@ pub extern "C" fn cuMemcpyAtoH_v2( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuMemcpyAtoA_v2( dstArray: CUarray, dstOffset: usize, @@ -2860,32 +2778,27 @@ pub extern "C" fn cuMemcpyAtoA_v2( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuMemcpy2D_v2(pCopy: *const CUDA_MEMCPY2D) -> CUresult { r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuMemcpy2DUnaligned_v2(pCopy: *const CUDA_MEMCPY2D) -> CUresult { r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuMemcpy3D_v2(pCopy: *const CUDA_MEMCPY3D) -> CUresult { r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuMemcpy3DPeer(pCopy: *const CUDA_MEMCPY3D_PEER) -> CUresult { r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuMemcpyAsync( dst: CUdeviceptr, src: CUdeviceptr, @@ -2895,8 +2808,7 @@ pub extern "C" fn cuMemcpyAsync( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuMemcpyPeerAsync( dstDevice: CUdeviceptr, dstContext: CUcontext, @@ -2908,8 +2820,7 @@ pub extern "C" fn cuMemcpyPeerAsync( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuMemcpyHtoDAsync_v2( dstDevice: CUdeviceptr, srcHost: *const ::std::os::raw::c_void, @@ -2919,8 +2830,7 @@ pub extern "C" fn cuMemcpyHtoDAsync_v2( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuMemcpyDtoHAsync_v2( dstHost: *mut ::std::os::raw::c_void, srcDevice: CUdeviceptr, @@ -2930,8 +2840,7 @@ pub extern "C" fn cuMemcpyDtoHAsync_v2( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuMemcpyDtoDAsync_v2( dstDevice: CUdeviceptr, srcDevice: CUdeviceptr, @@ -2941,8 +2850,7 @@ pub extern "C" fn cuMemcpyDtoDAsync_v2( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuMemcpyHtoAAsync_v2( dstArray: CUarray, dstOffset: usize, @@ -2953,8 +2861,7 @@ pub extern "C" fn cuMemcpyHtoAAsync_v2( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuMemcpyAtoHAsync_v2( dstHost: *mut ::std::os::raw::c_void, srcArray: CUarray, @@ -2965,20 +2872,17 @@ pub extern "C" fn cuMemcpyAtoHAsync_v2( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuMemcpy2DAsync_v2(pCopy: *const CUDA_MEMCPY2D, hStream: CUstream) -> CUresult { r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuMemcpy3DAsync_v2(pCopy: *const CUDA_MEMCPY3D, hStream: CUstream) -> CUresult { r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuMemcpy3DPeerAsync( pCopy: *const CUDA_MEMCPY3D_PEER, hStream: CUstream, @@ -2986,8 +2890,7 @@ pub extern "C" fn cuMemcpy3DPeerAsync( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuMemsetD8_v2( dstDevice: CUdeviceptr, uc: ::std::os::raw::c_uchar, @@ -2996,8 +2899,7 @@ pub extern "C" fn cuMemsetD8_v2( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuMemsetD16_v2( dstDevice: CUdeviceptr, us: ::std::os::raw::c_ushort, @@ -3006,8 +2908,7 @@ pub extern "C" fn cuMemsetD16_v2( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuMemsetD32_v2( dstDevice: CUdeviceptr, ui: ::std::os::raw::c_uint, @@ -3016,8 +2917,7 @@ pub extern "C" fn cuMemsetD32_v2( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuMemsetD2D8_v2( dstDevice: CUdeviceptr, dstPitch: usize, @@ -3028,8 +2928,7 @@ pub extern "C" fn cuMemsetD2D8_v2( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuMemsetD2D16_v2( dstDevice: CUdeviceptr, dstPitch: usize, @@ -3040,8 +2939,7 @@ pub extern "C" fn cuMemsetD2D16_v2( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuMemsetD2D32_v2( dstDevice: CUdeviceptr, dstPitch: usize, @@ -3052,8 +2950,7 @@ pub extern "C" fn cuMemsetD2D32_v2( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuMemsetD8Async( dstDevice: CUdeviceptr, uc: ::std::os::raw::c_uchar, @@ -3063,8 +2960,7 @@ pub extern "C" fn cuMemsetD8Async( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuMemsetD16Async( dstDevice: CUdeviceptr, us: ::std::os::raw::c_ushort, @@ -3074,8 +2970,7 @@ pub extern "C" fn cuMemsetD16Async( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuMemsetD32Async( dstDevice: CUdeviceptr, ui: ::std::os::raw::c_uint, @@ -3085,8 +2980,7 @@ pub extern "C" fn cuMemsetD32Async( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuMemsetD2D8Async( dstDevice: CUdeviceptr, dstPitch: usize, @@ -3098,8 +2992,7 @@ pub extern "C" fn cuMemsetD2D8Async( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuMemsetD2D16Async( dstDevice: CUdeviceptr, dstPitch: usize, @@ -3111,8 +3004,7 @@ pub extern "C" fn cuMemsetD2D16Async( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuMemsetD2D32Async( dstDevice: CUdeviceptr, dstPitch: usize, @@ -3124,8 +3016,7 @@ pub extern "C" fn cuMemsetD2D32Async( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuArrayCreate_v2( pHandle: *mut CUarray, pAllocateArray: *const CUDA_ARRAY_DESCRIPTOR, @@ -3133,8 +3024,7 @@ pub extern "C" fn cuArrayCreate_v2( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuArrayGetDescriptor_v2( pArrayDescriptor: *mut CUDA_ARRAY_DESCRIPTOR, hArray: CUarray, @@ -3142,14 +3032,12 @@ pub extern "C" fn cuArrayGetDescriptor_v2( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuArrayDestroy(hArray: CUarray) -> CUresult { r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuArray3DCreate_v2( pHandle: *mut CUarray, pAllocateArray: *const CUDA_ARRAY3D_DESCRIPTOR, @@ -3157,8 +3045,7 @@ pub extern "C" fn cuArray3DCreate_v2( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuArray3DGetDescriptor_v2( pArrayDescriptor: *mut CUDA_ARRAY3D_DESCRIPTOR, hArray: CUarray, @@ -3166,8 +3053,7 @@ pub extern "C" fn cuArray3DGetDescriptor_v2( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuMipmappedArrayCreate( pHandle: *mut CUmipmappedArray, pMipmappedArrayDesc: *const CUDA_ARRAY3D_DESCRIPTOR, @@ -3176,8 +3062,7 @@ pub extern "C" fn cuMipmappedArrayCreate( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuMipmappedArrayGetLevel( pLevelArray: *mut CUarray, hMipmappedArray: CUmipmappedArray, @@ -3186,14 +3071,12 @@ pub extern "C" fn cuMipmappedArrayGetLevel( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuMipmappedArrayDestroy(hMipmappedArray: CUmipmappedArray) -> CUresult { r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuMemAddressReserve( ptr: *mut CUdeviceptr, size: usize, @@ -3204,14 +3087,12 @@ pub extern "C" fn cuMemAddressReserve( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuMemAddressFree(ptr: CUdeviceptr, size: usize) -> CUresult { r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuMemCreate( handle: *mut CUmemGenericAllocationHandle, size: usize, @@ -3221,14 +3102,12 @@ pub extern "C" fn cuMemCreate( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuMemRelease(handle: CUmemGenericAllocationHandle) -> CUresult { r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuMemMap( ptr: CUdeviceptr, size: usize, @@ -3239,14 +3118,12 @@ pub extern "C" fn cuMemMap( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuMemUnmap(ptr: CUdeviceptr, size: usize) -> CUresult { r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuMemSetAccess( ptr: CUdeviceptr, size: usize, @@ -3256,8 +3133,7 @@ pub extern "C" fn cuMemSetAccess( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuMemGetAccess( flags: *mut ::std::os::raw::c_ulonglong, location: *const CUmemLocation, @@ -3266,8 +3142,7 @@ pub extern "C" fn cuMemGetAccess( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuMemExportToShareableHandle( shareableHandle: *mut ::std::os::raw::c_void, handle: CUmemGenericAllocationHandle, @@ -3277,8 +3152,7 @@ pub extern "C" fn cuMemExportToShareableHandle( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuMemImportFromShareableHandle( handle: *mut CUmemGenericAllocationHandle, osHandle: *mut ::std::os::raw::c_void, @@ -3287,8 +3161,7 @@ pub extern "C" fn cuMemImportFromShareableHandle( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuMemGetAllocationGranularity( granularity: *mut usize, prop: *const CUmemAllocationProp, @@ -3297,8 +3170,7 @@ pub extern "C" fn cuMemGetAllocationGranularity( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuMemGetAllocationPropertiesFromHandle( prop: *mut CUmemAllocationProp, handle: CUmemGenericAllocationHandle, @@ -3306,8 +3178,7 @@ pub extern "C" fn cuMemGetAllocationPropertiesFromHandle( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuMemRetainAllocationHandle( handle: *mut CUmemGenericAllocationHandle, addr: *mut ::std::os::raw::c_void, @@ -3315,8 +3186,7 @@ pub extern "C" fn cuMemRetainAllocationHandle( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuPointerGetAttribute( data: *mut ::std::os::raw::c_void, attribute: CUpointer_attribute, @@ -3325,8 +3195,7 @@ pub extern "C" fn cuPointerGetAttribute( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuMemPrefetchAsync( devPtr: CUdeviceptr, count: usize, @@ -3336,8 +3205,7 @@ pub extern "C" fn cuMemPrefetchAsync( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuMemAdvise( devPtr: CUdeviceptr, count: usize, @@ -3347,8 +3215,7 @@ pub extern "C" fn cuMemAdvise( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuMemRangeGetAttribute( data: *mut ::std::os::raw::c_void, dataSize: usize, @@ -3359,8 +3226,7 @@ pub extern "C" fn cuMemRangeGetAttribute( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuMemRangeGetAttributes( data: *mut *mut ::std::os::raw::c_void, dataSizes: *mut usize, @@ -3372,8 +3238,7 @@ pub extern "C" fn cuMemRangeGetAttributes( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuPointerSetAttribute( value: *const ::std::os::raw::c_void, attribute: CUpointer_attribute, @@ -3382,8 +3247,7 @@ pub extern "C" fn cuPointerSetAttribute( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuPointerGetAttributes( numAttributes: ::std::os::raw::c_uint, attributes: *mut CUpointer_attribute, @@ -3393,8 +3257,7 @@ pub extern "C" fn cuPointerGetAttributes( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuStreamCreate( phStream: *mut CUstream, Flags: ::std::os::raw::c_uint, @@ -3402,8 +3265,7 @@ pub extern "C" fn cuStreamCreate( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuStreamCreateWithPriority( phStream: *mut CUstream, flags: ::std::os::raw::c_uint, @@ -3412,8 +3274,7 @@ pub extern "C" fn cuStreamCreateWithPriority( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuStreamGetPriority( hStream: CUstream, priority: *mut ::std::os::raw::c_int, @@ -3421,8 +3282,7 @@ pub extern "C" fn cuStreamGetPriority( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuStreamGetFlags( hStream: CUstream, flags: *mut ::std::os::raw::c_uint, @@ -3430,14 +3290,12 @@ pub extern "C" fn cuStreamGetFlags( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuStreamGetCtx(hStream: CUstream, pctx: *mut CUcontext) -> CUresult { r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuStreamWaitEvent( hStream: CUstream, hEvent: CUevent, @@ -3446,8 +3304,7 @@ pub extern "C" fn cuStreamWaitEvent( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuStreamAddCallback( hStream: CUstream, callback: CUstreamCallback, @@ -3457,8 +3314,7 @@ pub extern "C" fn cuStreamAddCallback( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuStreamBeginCapture_v2( hStream: CUstream, mode: CUstreamCaptureMode, @@ -3466,20 +3322,17 @@ pub extern "C" fn cuStreamBeginCapture_v2( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuThreadExchangeStreamCaptureMode(mode: *mut CUstreamCaptureMode) -> CUresult { r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuStreamEndCapture(hStream: CUstream, phGraph: *mut CUgraph) -> CUresult { r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuStreamIsCapturing( hStream: CUstream, captureStatus: *mut CUstreamCaptureStatus, @@ -3487,8 +3340,7 @@ pub extern "C" fn cuStreamIsCapturing( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuStreamGetCaptureInfo( hStream: CUstream, captureStatus: *mut CUstreamCaptureStatus, @@ -3497,8 +3349,7 @@ pub extern "C" fn cuStreamGetCaptureInfo( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuStreamAttachMemAsync( hStream: CUstream, dptr: CUdeviceptr, @@ -3508,32 +3359,27 @@ pub extern "C" fn cuStreamAttachMemAsync( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuStreamQuery(hStream: CUstream) -> CUresult { r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuStreamSynchronize(hStream: CUstream) -> CUresult { r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuStreamDestroy_v2(hStream: CUstream) -> CUresult { r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuStreamCopyAttributes(dst: CUstream, src: CUstream) -> CUresult { r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuStreamGetAttribute( hStream: CUstream, attr: CUstreamAttrID, @@ -3542,8 +3388,7 @@ pub extern "C" fn cuStreamGetAttribute( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuStreamSetAttribute( hStream: CUstream, attr: CUstreamAttrID, @@ -3552,38 +3397,32 @@ pub extern "C" fn cuStreamSetAttribute( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuEventCreate(phEvent: *mut CUevent, Flags: ::std::os::raw::c_uint) -> CUresult { r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuEventRecord(hEvent: CUevent, hStream: CUstream) -> CUresult { r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuEventQuery(hEvent: CUevent) -> CUresult { r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuEventSynchronize(hEvent: CUevent) -> CUresult { r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuEventDestroy_v2(hEvent: CUevent) -> CUresult { r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuEventElapsedTime( pMilliseconds: *mut f32, hStart: CUevent, @@ -3592,8 +3431,7 @@ pub extern "C" fn cuEventElapsedTime( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuImportExternalMemory( extMem_out: *mut CUexternalMemory, memHandleDesc: *const CUDA_EXTERNAL_MEMORY_HANDLE_DESC, @@ -3601,8 +3439,7 @@ pub extern "C" fn cuImportExternalMemory( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuExternalMemoryGetMappedBuffer( devPtr: *mut CUdeviceptr, extMem: CUexternalMemory, @@ -3611,8 +3448,7 @@ pub extern "C" fn cuExternalMemoryGetMappedBuffer( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuExternalMemoryGetMappedMipmappedArray( mipmap: *mut CUmipmappedArray, extMem: CUexternalMemory, @@ -3621,14 +3457,12 @@ pub extern "C" fn cuExternalMemoryGetMappedMipmappedArray( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuDestroyExternalMemory(extMem: CUexternalMemory) -> CUresult { r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuImportExternalSemaphore( extSem_out: *mut CUexternalSemaphore, semHandleDesc: *const CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC, @@ -3636,8 +3470,7 @@ pub extern "C" fn cuImportExternalSemaphore( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuSignalExternalSemaphoresAsync( extSemArray: *const CUexternalSemaphore, paramsArray: *const CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS, @@ -3647,8 +3480,7 @@ pub extern "C" fn cuSignalExternalSemaphoresAsync( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuWaitExternalSemaphoresAsync( extSemArray: *const CUexternalSemaphore, paramsArray: *const CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS, @@ -3658,14 +3490,12 @@ pub extern "C" fn cuWaitExternalSemaphoresAsync( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuDestroyExternalSemaphore(extSem: CUexternalSemaphore) -> CUresult { r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuStreamWaitValue32( stream: CUstream, addr: CUdeviceptr, @@ -3675,8 +3505,7 @@ pub extern "C" fn cuStreamWaitValue32( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuStreamWaitValue64( stream: CUstream, addr: CUdeviceptr, @@ -3686,8 +3515,7 @@ pub extern "C" fn cuStreamWaitValue64( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuStreamWriteValue32( stream: CUstream, addr: CUdeviceptr, @@ -3697,8 +3525,7 @@ pub extern "C" fn cuStreamWriteValue32( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuStreamWriteValue64( stream: CUstream, addr: CUdeviceptr, @@ -3708,8 +3535,7 @@ pub extern "C" fn cuStreamWriteValue64( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuStreamBatchMemOp( stream: CUstream, count: ::std::os::raw::c_uint, @@ -3719,8 +3545,7 @@ pub extern "C" fn cuStreamBatchMemOp( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuFuncGetAttribute( pi: *mut ::std::os::raw::c_int, attrib: CUfunction_attribute, @@ -3729,8 +3554,7 @@ pub extern "C" fn cuFuncGetAttribute( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuFuncSetAttribute( hfunc: CUfunction, attrib: CUfunction_attribute, @@ -3739,20 +3563,17 @@ pub extern "C" fn cuFuncSetAttribute( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuFuncSetCacheConfig(hfunc: CUfunction, config: CUfunc_cache) -> CUresult { r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuFuncSetSharedMemConfig(hfunc: CUfunction, config: CUsharedconfig) -> CUresult { r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuLaunchKernel( f: CUfunction, gridDimX: ::std::os::raw::c_uint, @@ -3766,11 +3587,23 @@ pub extern "C" fn cuLaunchKernel( kernelParams: *mut *mut ::std::os::raw::c_void, extra: *mut *mut ::std::os::raw::c_void, ) -> CUresult { - r#impl::unimplemented() -} - -#[no_mangle] -#[cfg(not(test))] + r#impl::function::launch_kernel( + f.decuda(), + gridDimX, + gridDimY, + gridDimZ, + blockDimX, + blockDimY, + blockDimZ, + sharedMemBytes, + hStream.decuda(), + kernelParams, + extra, + ) + .encuda() +} + +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuLaunchCooperativeKernel( f: CUfunction, gridDimX: ::std::os::raw::c_uint, @@ -3786,8 +3619,7 @@ pub extern "C" fn cuLaunchCooperativeKernel( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuLaunchCooperativeKernelMultiDevice( launchParamsList: *mut CUDA_LAUNCH_PARAMS, numDevices: ::std::os::raw::c_uint, @@ -3796,8 +3628,7 @@ pub extern "C" fn cuLaunchCooperativeKernelMultiDevice( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuLaunchHostFunc( hStream: CUstream, fn_: CUhostFn, @@ -3806,8 +3637,7 @@ pub extern "C" fn cuLaunchHostFunc( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuFuncSetBlockShape( hfunc: CUfunction, x: ::std::os::raw::c_int, @@ -3817,8 +3647,7 @@ pub extern "C" fn cuFuncSetBlockShape( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuFuncSetSharedSize( hfunc: CUfunction, bytes: ::std::os::raw::c_uint, @@ -3826,14 +3655,12 @@ pub extern "C" fn cuFuncSetSharedSize( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuParamSetSize(hfunc: CUfunction, numbytes: ::std::os::raw::c_uint) -> CUresult { r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuParamSeti( hfunc: CUfunction, offset: ::std::os::raw::c_int, @@ -3842,8 +3669,7 @@ pub extern "C" fn cuParamSeti( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuParamSetf( hfunc: CUfunction, offset: ::std::os::raw::c_int, @@ -3852,8 +3678,7 @@ pub extern "C" fn cuParamSetf( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuParamSetv( hfunc: CUfunction, offset: ::std::os::raw::c_int, @@ -3863,14 +3688,12 @@ pub extern "C" fn cuParamSetv( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuLaunch(f: CUfunction) -> CUresult { r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuLaunchGrid( f: CUfunction, grid_width: ::std::os::raw::c_int, @@ -3879,8 +3702,7 @@ pub extern "C" fn cuLaunchGrid( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuLaunchGridAsync( f: CUfunction, grid_width: ::std::os::raw::c_int, @@ -3890,8 +3712,7 @@ pub extern "C" fn cuLaunchGridAsync( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuParamSetTexRef( hfunc: CUfunction, texunit: ::std::os::raw::c_int, @@ -3900,14 +3721,12 @@ pub extern "C" fn cuParamSetTexRef( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuGraphCreate(phGraph: *mut CUgraph, flags: ::std::os::raw::c_uint) -> CUresult { r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuGraphAddKernelNode( phGraphNode: *mut CUgraphNode, hGraph: CUgraph, @@ -3918,8 +3737,7 @@ pub extern "C" fn cuGraphAddKernelNode( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuGraphKernelNodeGetParams( hNode: CUgraphNode, nodeParams: *mut CUDA_KERNEL_NODE_PARAMS, @@ -3927,8 +3745,7 @@ pub extern "C" fn cuGraphKernelNodeGetParams( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuGraphKernelNodeSetParams( hNode: CUgraphNode, nodeParams: *const CUDA_KERNEL_NODE_PARAMS, @@ -3936,8 +3753,7 @@ pub extern "C" fn cuGraphKernelNodeSetParams( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuGraphAddMemcpyNode( phGraphNode: *mut CUgraphNode, hGraph: CUgraph, @@ -3949,8 +3765,7 @@ pub extern "C" fn cuGraphAddMemcpyNode( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuGraphMemcpyNodeGetParams( hNode: CUgraphNode, nodeParams: *mut CUDA_MEMCPY3D, @@ -3958,8 +3773,7 @@ pub extern "C" fn cuGraphMemcpyNodeGetParams( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuGraphMemcpyNodeSetParams( hNode: CUgraphNode, nodeParams: *const CUDA_MEMCPY3D, @@ -3967,8 +3781,7 @@ pub extern "C" fn cuGraphMemcpyNodeSetParams( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuGraphAddMemsetNode( phGraphNode: *mut CUgraphNode, hGraph: CUgraph, @@ -3980,8 +3793,7 @@ pub extern "C" fn cuGraphAddMemsetNode( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuGraphMemsetNodeGetParams( hNode: CUgraphNode, nodeParams: *mut CUDA_MEMSET_NODE_PARAMS, @@ -3989,8 +3801,7 @@ pub extern "C" fn cuGraphMemsetNodeGetParams( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuGraphMemsetNodeSetParams( hNode: CUgraphNode, nodeParams: *const CUDA_MEMSET_NODE_PARAMS, @@ -3998,8 +3809,7 @@ pub extern "C" fn cuGraphMemsetNodeSetParams( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuGraphAddHostNode( phGraphNode: *mut CUgraphNode, hGraph: CUgraph, @@ -4010,8 +3820,7 @@ pub extern "C" fn cuGraphAddHostNode( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuGraphHostNodeGetParams( hNode: CUgraphNode, nodeParams: *mut CUDA_HOST_NODE_PARAMS, @@ -4019,8 +3828,7 @@ pub extern "C" fn cuGraphHostNodeGetParams( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuGraphHostNodeSetParams( hNode: CUgraphNode, nodeParams: *const CUDA_HOST_NODE_PARAMS, @@ -4028,8 +3836,7 @@ pub extern "C" fn cuGraphHostNodeSetParams( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuGraphAddChildGraphNode( phGraphNode: *mut CUgraphNode, hGraph: CUgraph, @@ -4040,8 +3847,7 @@ pub extern "C" fn cuGraphAddChildGraphNode( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuGraphChildGraphNodeGetGraph( hNode: CUgraphNode, phGraph: *mut CUgraph, @@ -4049,8 +3855,7 @@ pub extern "C" fn cuGraphChildGraphNodeGetGraph( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuGraphAddEmptyNode( phGraphNode: *mut CUgraphNode, hGraph: CUgraph, @@ -4060,14 +3865,12 @@ pub extern "C" fn cuGraphAddEmptyNode( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuGraphClone(phGraphClone: *mut CUgraph, originalGraph: CUgraph) -> CUresult { r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuGraphNodeFindInClone( phNode: *mut CUgraphNode, hOriginalNode: CUgraphNode, @@ -4076,14 +3879,12 @@ pub extern "C" fn cuGraphNodeFindInClone( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuGraphNodeGetType(hNode: CUgraphNode, type_: *mut CUgraphNodeType) -> CUresult { r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuGraphGetNodes( hGraph: CUgraph, nodes: *mut CUgraphNode, @@ -4092,8 +3893,7 @@ pub extern "C" fn cuGraphGetNodes( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuGraphGetRootNodes( hGraph: CUgraph, rootNodes: *mut CUgraphNode, @@ -4102,8 +3902,7 @@ pub extern "C" fn cuGraphGetRootNodes( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuGraphGetEdges( hGraph: CUgraph, from: *mut CUgraphNode, @@ -4113,8 +3912,7 @@ pub extern "C" fn cuGraphGetEdges( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuGraphNodeGetDependencies( hNode: CUgraphNode, dependencies: *mut CUgraphNode, @@ -4123,8 +3921,7 @@ pub extern "C" fn cuGraphNodeGetDependencies( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuGraphNodeGetDependentNodes( hNode: CUgraphNode, dependentNodes: *mut CUgraphNode, @@ -4133,8 +3930,7 @@ pub extern "C" fn cuGraphNodeGetDependentNodes( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuGraphAddDependencies( hGraph: CUgraph, from: *const CUgraphNode, @@ -4144,8 +3940,7 @@ pub extern "C" fn cuGraphAddDependencies( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuGraphRemoveDependencies( hGraph: CUgraph, from: *const CUgraphNode, @@ -4155,14 +3950,12 @@ pub extern "C" fn cuGraphRemoveDependencies( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuGraphDestroyNode(hNode: CUgraphNode) -> CUresult { r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuGraphInstantiate_v2( phGraphExec: *mut CUgraphExec, hGraph: CUgraph, @@ -4173,8 +3966,7 @@ pub extern "C" fn cuGraphInstantiate_v2( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuGraphExecKernelNodeSetParams( hGraphExec: CUgraphExec, hNode: CUgraphNode, @@ -4183,8 +3975,7 @@ pub extern "C" fn cuGraphExecKernelNodeSetParams( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuGraphExecMemcpyNodeSetParams( hGraphExec: CUgraphExec, hNode: CUgraphNode, @@ -4194,8 +3985,7 @@ pub extern "C" fn cuGraphExecMemcpyNodeSetParams( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuGraphExecMemsetNodeSetParams( hGraphExec: CUgraphExec, hNode: CUgraphNode, @@ -4205,8 +3995,7 @@ pub extern "C" fn cuGraphExecMemsetNodeSetParams( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuGraphExecHostNodeSetParams( hGraphExec: CUgraphExec, hNode: CUgraphNode, @@ -4215,26 +4004,22 @@ pub extern "C" fn cuGraphExecHostNodeSetParams( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuGraphLaunch(hGraphExec: CUgraphExec, hStream: CUstream) -> CUresult { r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuGraphExecDestroy(hGraphExec: CUgraphExec) -> CUresult { r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuGraphDestroy(hGraph: CUgraph) -> CUresult { r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuGraphExecUpdate( hGraphExec: CUgraphExec, hGraph: CUgraph, @@ -4244,14 +4029,12 @@ pub extern "C" fn cuGraphExecUpdate( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuGraphKernelNodeCopyAttributes(dst: CUgraphNode, src: CUgraphNode) -> CUresult { r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuGraphKernelNodeGetAttribute( hNode: CUgraphNode, attr: CUkernelNodeAttrID, @@ -4260,8 +4043,7 @@ pub extern "C" fn cuGraphKernelNodeGetAttribute( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuGraphKernelNodeSetAttribute( hNode: CUgraphNode, attr: CUkernelNodeAttrID, @@ -4270,8 +4052,7 @@ pub extern "C" fn cuGraphKernelNodeSetAttribute( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuOccupancyMaxActiveBlocksPerMultiprocessor( numBlocks: *mut ::std::os::raw::c_int, func: CUfunction, @@ -4281,8 +4062,7 @@ pub extern "C" fn cuOccupancyMaxActiveBlocksPerMultiprocessor( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( numBlocks: *mut ::std::os::raw::c_int, func: CUfunction, @@ -4293,8 +4073,7 @@ pub extern "C" fn cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuOccupancyMaxPotentialBlockSize( minGridSize: *mut ::std::os::raw::c_int, blockSize: *mut ::std::os::raw::c_int, @@ -4306,8 +4085,7 @@ pub extern "C" fn cuOccupancyMaxPotentialBlockSize( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuOccupancyMaxPotentialBlockSizeWithFlags( minGridSize: *mut ::std::os::raw::c_int, blockSize: *mut ::std::os::raw::c_int, @@ -4320,8 +4098,7 @@ pub extern "C" fn cuOccupancyMaxPotentialBlockSizeWithFlags( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuOccupancyAvailableDynamicSMemPerBlock( dynamicSmemSize: *mut usize, func: CUfunction, @@ -4331,8 +4108,7 @@ pub extern "C" fn cuOccupancyAvailableDynamicSMemPerBlock( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuTexRefSetArray( hTexRef: CUtexref, hArray: CUarray, @@ -4341,8 +4117,7 @@ pub extern "C" fn cuTexRefSetArray( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuTexRefSetMipmappedArray( hTexRef: CUtexref, hMipmappedArray: CUmipmappedArray, @@ -4351,8 +4126,7 @@ pub extern "C" fn cuTexRefSetMipmappedArray( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuTexRefSetAddress_v2( ByteOffset: *mut usize, hTexRef: CUtexref, @@ -4362,8 +4136,7 @@ pub extern "C" fn cuTexRefSetAddress_v2( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuTexRefSetAddress2D_v3( hTexRef: CUtexref, desc: *const CUDA_ARRAY_DESCRIPTOR, @@ -4373,8 +4146,7 @@ pub extern "C" fn cuTexRefSetAddress2D_v3( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuTexRefSetFormat( hTexRef: CUtexref, fmt: CUarray_format, @@ -4383,8 +4155,7 @@ pub extern "C" fn cuTexRefSetFormat( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuTexRefSetAddressMode( hTexRef: CUtexref, dim: ::std::os::raw::c_int, @@ -4393,26 +4164,22 @@ pub extern "C" fn cuTexRefSetAddressMode( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuTexRefSetFilterMode(hTexRef: CUtexref, fm: CUfilter_mode) -> CUresult { r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuTexRefSetMipmapFilterMode(hTexRef: CUtexref, fm: CUfilter_mode) -> CUresult { r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuTexRefSetMipmapLevelBias(hTexRef: CUtexref, bias: f32) -> CUresult { r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuTexRefSetMipmapLevelClamp( hTexRef: CUtexref, minMipmapLevelClamp: f32, @@ -4421,8 +4188,7 @@ pub extern "C" fn cuTexRefSetMipmapLevelClamp( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuTexRefSetMaxAnisotropy( hTexRef: CUtexref, maxAniso: ::std::os::raw::c_uint, @@ -4430,32 +4196,27 @@ pub extern "C" fn cuTexRefSetMaxAnisotropy( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuTexRefSetBorderColor(hTexRef: CUtexref, pBorderColor: *mut f32) -> CUresult { r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuTexRefSetFlags(hTexRef: CUtexref, Flags: ::std::os::raw::c_uint) -> CUresult { r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuTexRefGetAddress_v2(pdptr: *mut CUdeviceptr, hTexRef: CUtexref) -> CUresult { r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuTexRefGetArray(phArray: *mut CUarray, hTexRef: CUtexref) -> CUresult { r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuTexRefGetMipmappedArray( phMipmappedArray: *mut CUmipmappedArray, hTexRef: CUtexref, @@ -4463,8 +4224,7 @@ pub extern "C" fn cuTexRefGetMipmappedArray( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuTexRefGetAddressMode( pam: *mut CUaddress_mode, hTexRef: CUtexref, @@ -4473,14 +4233,12 @@ pub extern "C" fn cuTexRefGetAddressMode( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuTexRefGetFilterMode(pfm: *mut CUfilter_mode, hTexRef: CUtexref) -> CUresult { r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuTexRefGetFormat( pFormat: *mut CUarray_format, pNumChannels: *mut ::std::os::raw::c_int, @@ -4489,8 +4247,7 @@ pub extern "C" fn cuTexRefGetFormat( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuTexRefGetMipmapFilterMode( pfm: *mut CUfilter_mode, hTexRef: CUtexref, @@ -4498,14 +4255,12 @@ pub extern "C" fn cuTexRefGetMipmapFilterMode( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuTexRefGetMipmapLevelBias(pbias: *mut f32, hTexRef: CUtexref) -> CUresult { r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuTexRefGetMipmapLevelClamp( pminMipmapLevelClamp: *mut f32, pmaxMipmapLevelClamp: *mut f32, @@ -4514,8 +4269,7 @@ pub extern "C" fn cuTexRefGetMipmapLevelClamp( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuTexRefGetMaxAnisotropy( pmaxAniso: *mut ::std::os::raw::c_int, hTexRef: CUtexref, @@ -4523,14 +4277,12 @@ pub extern "C" fn cuTexRefGetMaxAnisotropy( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuTexRefGetBorderColor(pBorderColor: *mut f32, hTexRef: CUtexref) -> CUresult { r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuTexRefGetFlags( pFlags: *mut ::std::os::raw::c_uint, hTexRef: CUtexref, @@ -4538,20 +4290,17 @@ pub extern "C" fn cuTexRefGetFlags( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuTexRefCreate(pTexRef: *mut CUtexref) -> CUresult { r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuTexRefDestroy(hTexRef: CUtexref) -> CUresult { r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuSurfRefSetArray( hSurfRef: CUsurfref, hArray: CUarray, @@ -4560,14 +4309,12 @@ pub extern "C" fn cuSurfRefSetArray( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuSurfRefGetArray(phArray: *mut CUarray, hSurfRef: CUsurfref) -> CUresult { r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuTexObjectCreate( pTexObject: *mut CUtexObject, pResDesc: *const CUDA_RESOURCE_DESC, @@ -4577,14 +4324,12 @@ pub extern "C" fn cuTexObjectCreate( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuTexObjectDestroy(texObject: CUtexObject) -> CUresult { r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuTexObjectGetResourceDesc( pResDesc: *mut CUDA_RESOURCE_DESC, texObject: CUtexObject, @@ -4592,8 +4337,7 @@ pub extern "C" fn cuTexObjectGetResourceDesc( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuTexObjectGetTextureDesc( pTexDesc: *mut CUDA_TEXTURE_DESC, texObject: CUtexObject, @@ -4601,8 +4345,7 @@ pub extern "C" fn cuTexObjectGetTextureDesc( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuTexObjectGetResourceViewDesc( pResViewDesc: *mut CUDA_RESOURCE_VIEW_DESC, texObject: CUtexObject, @@ -4610,8 +4353,7 @@ pub extern "C" fn cuTexObjectGetResourceViewDesc( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuSurfObjectCreate( pSurfObject: *mut CUsurfObject, pResDesc: *const CUDA_RESOURCE_DESC, @@ -4619,14 +4361,12 @@ pub extern "C" fn cuSurfObjectCreate( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuSurfObjectDestroy(surfObject: CUsurfObject) -> CUresult { r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuSurfObjectGetResourceDesc( pResDesc: *mut CUDA_RESOURCE_DESC, surfObject: CUsurfObject, @@ -4634,8 +4374,7 @@ pub extern "C" fn cuSurfObjectGetResourceDesc( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuDeviceCanAccessPeer( canAccessPeer: *mut ::std::os::raw::c_int, dev: CUdevice, @@ -4644,8 +4383,7 @@ pub extern "C" fn cuDeviceCanAccessPeer( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuCtxEnablePeerAccess( peerContext: CUcontext, Flags: ::std::os::raw::c_uint, @@ -4653,14 +4391,12 @@ pub extern "C" fn cuCtxEnablePeerAccess( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuCtxDisablePeerAccess(peerContext: CUcontext) -> CUresult { r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuDeviceGetP2PAttribute( value: *mut ::std::os::raw::c_int, attrib: CUdevice_P2PAttribute, @@ -4670,14 +4406,12 @@ pub extern "C" fn cuDeviceGetP2PAttribute( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuGraphicsUnregisterResource(resource: CUgraphicsResource) -> CUresult { r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuGraphicsSubResourceGetMappedArray( pArray: *mut CUarray, resource: CUgraphicsResource, @@ -4687,8 +4421,7 @@ pub extern "C" fn cuGraphicsSubResourceGetMappedArray( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuGraphicsResourceGetMappedMipmappedArray( pMipmappedArray: *mut CUmipmappedArray, resource: CUgraphicsResource, @@ -4696,8 +4429,7 @@ pub extern "C" fn cuGraphicsResourceGetMappedMipmappedArray( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuGraphicsResourceGetMappedPointer_v2( pDevPtr: *mut CUdeviceptr, pSize: *mut usize, @@ -4706,8 +4438,7 @@ pub extern "C" fn cuGraphicsResourceGetMappedPointer_v2( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuGraphicsResourceSetMapFlags_v2( resource: CUgraphicsResource, flags: ::std::os::raw::c_uint, @@ -4715,8 +4446,7 @@ pub extern "C" fn cuGraphicsResourceSetMapFlags_v2( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuGraphicsMapResources( count: ::std::os::raw::c_uint, resources: *mut CUgraphicsResource, @@ -4725,8 +4455,7 @@ pub extern "C" fn cuGraphicsMapResources( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuGraphicsUnmapResources( count: ::std::os::raw::c_uint, resources: *mut CUgraphicsResource, @@ -4735,8 +4464,7 @@ pub extern "C" fn cuGraphicsUnmapResources( r#impl::unimplemented() } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuGetExportTable( ppExportTable: *mut *const ::std::os::raw::c_void, pExportTableId: *const CUuuid, @@ -4744,8 +4472,7 @@ pub extern "C" fn cuGetExportTable( r#impl::export_table::get(ppExportTable, pExportTableId) } -#[no_mangle] -#[cfg(not(test))] +#[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuFuncGetModule(hmod: *mut CUmodule, hfunc: CUfunction) -> CUresult { r#impl::unimplemented() } diff --git a/notcuda/src/impl/function.rs b/notcuda/src/impl/function.rs new file mode 100644 index 0000000..6f8773e --- /dev/null +++ b/notcuda/src/impl/function.rs @@ -0,0 +1,52 @@ +use ::std::os::raw::{c_uint, c_void}; +use std::ptr; + +use super::{context, device, stream::Stream, CUresult}; + +pub struct Function { + pub base: l0::Kernel<'static>, + pub arg_size: Vec<usize>, +} + +pub fn launch_kernel( + f: *mut Function, + grid_dim_x: c_uint, + grid_dim_y: c_uint, + grid_dim_z: c_uint, + block_dim_x: c_uint, + block_dim_y: c_uint, + block_dim_z: c_uint, + shared_mem_bytes: c_uint, + strean: *mut Stream, + kernel_params: *mut *mut c_void, + extra: *mut *mut c_void, +) -> Result<(), CUresult> { + if f == ptr::null_mut() { + return Err(CUresult::CUDA_ERROR_INVALID_VALUE); + } + if shared_mem_bytes != 0 || strean != ptr::null_mut() || extra != ptr::null_mut() { + return Err(CUresult::CUDA_ERROR_NOT_SUPPORTED); + } + let func = unsafe { &*f }; + for (i, arg_size) in func.arg_size.iter().copied().enumerate() { + unsafe { + func.base + .set_arg_raw(i as u32, arg_size, *kernel_params.add(i))? + }; + } + unsafe { &*f } + .base + .set_group_size(block_dim_x, block_dim_y, block_dim_z)?; + device::with_current_exclusive(|dev| { + let mut cmd_list = l0::CommandList::new(&mut dev.l0_context, &dev.base)?; + cmd_list.append_launch_kernel( + &unsafe { &*f }.base, + &[grid_dim_x, grid_dim_y, grid_dim_z], + None, + &mut [], + )?; + dev.default_queue.execute(cmd_list)?; + l0::Result::Ok(()) + })??; + Ok(()) +} diff --git a/notcuda/src/impl/memory.rs b/notcuda/src/impl/memory.rs index 52e269d..3f92b5e 100644 --- a/notcuda/src/impl/memory.rs +++ b/notcuda/src/impl/memory.rs @@ -46,6 +46,10 @@ unsafe fn memcpy_impl( Ok(())
}
+pub(crate) fn free_v2(mem: *mut c_void)-> l0::Result<()> {
+ Ok(())
+}
+
#[cfg(test)]
mod tests {
use super::super::test::CudaDriverFns;
diff --git a/notcuda/src/impl/mod.rs b/notcuda/src/impl/mod.rs index c37b85d..3d31da2 100644 --- a/notcuda/src/impl/mod.rs +++ b/notcuda/src/impl/mod.rs @@ -1,4 +1,4 @@ -use crate::cuda::{CUctx_st, CUdevice, CUdeviceptr, CUfunction, CUmod_st, CUmodule, CUresult}; +use crate::cuda::{CUctx_st, CUdevice, CUdeviceptr, CUfunc_st, CUfunction, CUmod_st, CUmodule, CUresult, CUstream, CUstream_st}; use std::{ffi::c_void, mem::{self, ManuallyDrop}, os::raw::c_int, sync::Mutex}; #[cfg(test)] @@ -9,6 +9,8 @@ pub mod device; pub mod export_table; pub mod memory; pub mod module; +pub mod function; +pub mod stream; #[cfg(debug_assertions)] pub fn unimplemented() -> CUresult { @@ -242,6 +244,10 @@ impl<'a> CudaRepr for CUmod_st { type Impl = module::Module; } -impl<'a> CudaRepr for CUfunction { - type Impl = *mut module::Function; +impl<'a> CudaRepr for CUfunc_st { + type Impl = function::Function; +} + +impl<'a> CudaRepr for CUstream_st { + type Impl = stream::Stream; } diff --git a/notcuda/src/impl/module.rs b/notcuda/src/impl/module.rs index 06d050d..fc55f33 100644 --- a/notcuda/src/impl/module.rs +++ b/notcuda/src/impl/module.rs @@ -1,6 +1,9 @@ -use std::{ffi::c_void, ffi::CStr, mem, os::raw::c_char, ptr, slice, sync::Mutex}; +use std::{ + collections::HashMap, ffi::c_void, ffi::CStr, ffi::CString, mem, os::raw::c_char, ptr, slice, + sync::Mutex, +}; -use super::{transmute_lifetime, CUresult}; +use super::{function::Function, transmute_lifetime, CUresult}; use ptx; use super::context; @@ -9,6 +12,7 @@ pub type Module = Mutex<ModuleData>; pub struct ModuleData { base: l0::Module, + arg_lens: HashMap<CString, Vec<usize>>, } pub enum ModuleCompileError<'a> { @@ -52,7 +56,7 @@ impl ModuleData { Ok(_) if errors.len() > 0 => return Err(ModuleCompileError::Parse(errors, None)), Ok(ast) => ast, }; - let spirv = ptx::to_spirv(ast)?; + let (spirv, all_arg_lens) = ptx::to_spirv(ast)?; let byte_il = unsafe { slice::from_raw_parts::<u8>( spirv.as_ptr() as *const _, @@ -63,17 +67,19 @@ impl ModuleData { l0::Module::new_spirv(&mut dev.l0_context, &dev.base, byte_il, None) }); match module { - Ok(Ok(module)) => Ok(Mutex::new(Self { base: module })), + Ok(Ok(module)) => Ok(Mutex::new(Self { + base: module, + arg_lens: all_arg_lens + .into_iter() + .map(|(k, v)| (CString::new(k).unwrap(), v)) + .collect(), + })), Ok(Err(err)) => Err(ModuleCompileError::from(err)), Err(err) => Err(ModuleCompileError::from(err)), } } } -pub struct Function { - base: l0::Kernel<'static>, -} - pub fn get_function( hfunc: *mut *mut Function, hmod: *mut Module, @@ -83,10 +89,33 @@ pub fn get_function( return Err(CUresult::CUDA_ERROR_INVALID_VALUE); } let name = unsafe { CStr::from_ptr(name) }; - let kernel = unsafe { &*hmod } + let (mut kernel, args_len) = unsafe { &*hmod } .try_lock() - .map(|module| l0::Kernel::new_resident(unsafe { transmute_lifetime(&module.base) }, name)) + .map(|module| { + Result::<_, CUresult>::Ok(( + l0::Kernel::new_resident(unsafe { transmute_lifetime(&module.base) }, name)?, + module + .arg_lens + .get(name) + .ok_or(CUresult::CUDA_ERROR_NOT_FOUND)? + .clone(), + )) + }) .map_err(|_| CUresult::CUDA_ERROR_ILLEGAL_STATE)??; - unsafe { *hfunc = Box::into_raw(Box::new(Function { base: kernel })) }; + kernel.set_indirect_access( + l0::sys::ze_kernel_indirect_access_flags_t::ZE_KERNEL_INDIRECT_ACCESS_FLAG_DEVICE + | l0::sys::ze_kernel_indirect_access_flags_t::ZE_KERNEL_INDIRECT_ACCESS_FLAG_HOST + | l0::sys::ze_kernel_indirect_access_flags_t::ZE_KERNEL_INDIRECT_ACCESS_FLAG_SHARED, + )?; + unsafe { + *hfunc = Box::into_raw(Box::new(Function { + base: kernel, + arg_size: args_len, + })) + }; + Ok(()) +} + +pub(crate) fn unload(decuda: *mut Module) -> Result<(), CUresult> { Ok(()) } diff --git a/notcuda/src/impl/stream.rs b/notcuda/src/impl/stream.rs new file mode 100644 index 0000000..7410100 --- /dev/null +++ b/notcuda/src/impl/stream.rs @@ -0,0 +1,69 @@ +use std::cell::RefCell; + +use device::Device; + +use super::device; + +pub struct Stream { + dev: *mut Device, +} + +pub struct DefaultStream { + streams: Vec<Option<Stream>>, +} + +impl DefaultStream { + fn new() -> Self { + DefaultStream { + streams: Vec::new(), + } + } +} + +thread_local! { + pub static DEFAULT_STREAM: RefCell<DefaultStream> = RefCell::new(DefaultStream::new()); +} + +#[cfg(test)] +mod tests { + use crate::cuda::CUstream; + + use super::super::test::CudaDriverFns; + use super::super::CUresult; + use std::{ffi::c_void, ptr}; + + const CU_STREAM_LEGACY: CUstream = 1 as *mut _; + const CU_STREAM_PER_THREAD: CUstream = 2 as *mut _; + + cuda_driver_test!(default_stream_uses_current_ctx_legacy); + cuda_driver_test!(default_stream_uses_current_ctx_ptsd); + + fn default_stream_uses_current_ctx_legacy<T: CudaDriverFns>() { + default_stream_uses_current_ctx_impl::<T>(CU_STREAM_LEGACY); + } + + fn default_stream_uses_current_ctx_ptsd<T: CudaDriverFns>() { + default_stream_uses_current_ctx_impl::<T>(CU_STREAM_PER_THREAD); + } + + fn default_stream_uses_current_ctx_impl<T: CudaDriverFns>(stream: CUstream) { + assert_eq!(T::cuInit(0), CUresult::CUDA_SUCCESS); + let mut ctx1 = ptr::null_mut(); + assert_eq!(T::cuCtxCreate_v2(&mut ctx1, 0, 0), CUresult::CUDA_SUCCESS); + let mut stream_ctx1 = ptr::null_mut(); + assert_eq!( + T::cuStreamGetCtx(stream, &mut stream_ctx1), + CUresult::CUDA_SUCCESS + ); + assert_eq!(ctx1, stream_ctx1); + let mut ctx2 = ptr::null_mut(); + assert_eq!(T::cuCtxCreate_v2(&mut ctx2, 0, 0), CUresult::CUDA_SUCCESS); + assert_ne!(ctx1, ctx2); + let mut stream_ctx2 = ptr::null_mut(); + assert_eq!( + T::cuStreamGetCtx(stream, &mut stream_ctx2), + CUresult::CUDA_SUCCESS + ); + assert_eq!(ctx2, stream_ctx2); + } +} diff --git a/notcuda/src/impl/test.rs b/notcuda/src/impl/test.rs index 0ad625b..d4366b7 100644 --- a/notcuda/src/impl/test.rs +++ b/notcuda/src/impl/test.rs @@ -1,6 +1,6 @@ #![allow(non_snake_case)] -use crate::r#impl as notcuda; +use crate::{cuda::CUcontext, cuda::CUstream, r#impl as notcuda}; use crate::r#impl::CUresult; use crate::{cuda::CUuuid, r#impl::Encuda}; use ::std::{ @@ -36,14 +36,14 @@ pub trait CudaDriverFns { fn cuMemAlloc_v2(dptr: *mut *mut c_void, bytesize: usize) -> CUresult; fn cuDeviceGetUuid(uuid: *mut CUuuid, dev: c_int) -> CUresult; fn cuDevicePrimaryCtxGetState(dev: c_int, flags: *mut c_uint, active: *mut c_int) -> CUresult; + fn cuStreamGetCtx(hStream: CUstream, pctx: *mut *mut c_void) -> CUresult; } pub struct NotCuda(); impl CudaDriverFns for NotCuda { fn cuInit(_flags: c_uint) -> CUresult { - assert!(notcuda::context::is_context_stack_empty()); - notcuda::init().encuda() + crate::cuda::cuInit(_flags as _) } fn cuCtxCreate_v2(pctx: *mut *mut c_void, flags: c_uint, dev: c_int) -> CUresult { @@ -76,6 +76,10 @@ impl CudaDriverFns for NotCuda { fn cuDevicePrimaryCtxGetState(dev: c_int, flags: *mut c_uint, active: *mut c_int) -> CUresult { notcuda::device::primary_ctx_get_state(notcuda::device::Index(dev), flags, active).encuda() } + + fn cuStreamGetCtx(hStream: CUstream, pctx: *mut *mut c_void) -> CUresult { + crate::cuda::cuStreamGetCtx(hStream, pctx as _) + } } pub struct Cuda(); @@ -115,4 +119,8 @@ impl CudaDriverFns for Cuda { fn cuDevicePrimaryCtxGetState(dev: c_int, flags: *mut c_uint, active: *mut c_int) -> CUresult { unsafe { CUresult(cuda::cuDevicePrimaryCtxGetState(dev, flags, active) as c_uint) } } + + fn cuStreamGetCtx(hStream: CUstream, pctx: *mut *mut c_void) -> CUresult { + unsafe { CUresult(cuda::cuStreamGetCtx(hStream as _, pctx as _) as c_uint) } + } } diff --git a/ptx/src/ast.rs b/ptx/src/ast.rs index 77afee6..acefdc1 100644 --- a/ptx/src/ast.rs +++ b/ptx/src/ast.rs @@ -320,7 +320,7 @@ pub enum Instruction<P: ArgParams> { MovVector(MovVectorDetails, Arg2Vec<P>), Mul(MulDetails, Arg3<P>), Add(AddDetails, Arg3<P>), - Setp(SetpData, Arg4<P>), + Setp(SetpData, Arg4Setp<P>), SetpBool(SetpBoolData, Arg5<P>), Not(NotType, Arg2<P>), Bra(BraData, Arg1<P>), @@ -331,9 +331,13 @@ pub enum Instruction<P: ArgParams> { Ret(RetData), Call(CallInst<P>), Abs(AbsDetails, Arg2<P>), + Mad(MulDetails, Arg4<P>), } #[derive(Copy, Clone)] +pub struct MadFloatDesc {} + +#[derive(Copy, Clone)] pub struct MovVectorDetails { pub typ: MovVectorType, pub length: u8, @@ -398,6 +402,13 @@ pub struct Arg3<P: ArgParams> { } pub struct Arg4<P: ArgParams> { + pub dst: P::ID, + pub src1: P::Operand, + pub src2: P::Operand, + pub src3: P::Operand, +} + +pub struct Arg4Setp<P: ArgParams> { pub dst1: P::ID, pub dst2: Option<P::ID>, pub src1: P::Operand, @@ -503,7 +514,7 @@ sub_scalar_type!(MovVectorType { pub struct MovDetails { pub typ: MovType, - pub src_is_address: bool + pub src_is_address: bool, } sub_type! { @@ -518,17 +529,20 @@ pub enum MulDetails { Float(MulFloatDesc), } +#[derive(Copy, Clone)] pub struct MulIntDesc { pub typ: IntType, pub control: MulIntControl, } +#[derive(Copy, Clone)] pub enum MulIntControl { Low, High, Wide, } +#[derive(Copy, Clone)] pub struct MulFloatDesc { pub typ: FloatType, pub rounding: Option<RoundingMode>, diff --git a/ptx/src/ptx.lalrpop b/ptx/src/ptx.lalrpop index 208e076..50a6aeb 100644 --- a/ptx/src/ptx.lalrpop +++ b/ptx/src/ptx.lalrpop @@ -122,6 +122,7 @@ match { "cvta", "debug", "ld", + "mad", "map_f64_to_f32", "mov", "mul", @@ -149,6 +150,7 @@ ExtendedID : &'input str = { "cvta", "debug", "ld", + "mad", "map_f64_to_f32", "mov", "mul", @@ -442,6 +444,7 @@ Instruction: ast::Instruction<ast::ParsedArgParams<'input>> = { InstCvta, InstCall, InstAbs, + InstMad }; // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-ld @@ -649,7 +652,7 @@ InstAddMode: ast::AddDetails = { // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#half-precision-comparison-instructions-setp // TODO: support f16 setp InstSetp: ast::Instruction<ast::ParsedArgParams<'input>> = { - "setp" <d:SetpMode> <a:Arg4> => ast::Instruction::Setp(d, a), + "setp" <d:SetpMode> <a:Arg4Setp> => ast::Instruction::Setp(d, a), "setp" <d:SetpBoolMode> <a:Arg5> => ast::Instruction::SetpBool(d, a), }; @@ -995,6 +998,13 @@ InstAbs: ast::Instruction<ast::ParsedArgParams<'input>> = { }, }; +// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-mad +// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-mad +InstMad: ast::Instruction<ast::ParsedArgParams<'input>> = { + "mad" <d:InstMulMode> <a:Arg4> => ast::Instruction::Mad(d, a), + "mad" ".hi" ".sat" ".s32" => todo!() +}; + SignedIntType: ast::ScalarType = { ".s16" => ast::ScalarType::S16, ".s32" => ast::ScalarType::S32, @@ -1056,7 +1066,11 @@ Arg3: ast::Arg3<ast::ParsedArgParams<'input>> = { }; Arg4: ast::Arg4<ast::ParsedArgParams<'input>> = { - <dst1:ExtendedID> <dst2:OptionalDst?> "," <src1:Operand> "," <src2:Operand> => ast::Arg4{<>} + <dst:ExtendedID> "," <src1:Operand> "," <src2:Operand> "," <src3:Operand> => ast::Arg4{<>} +}; + +Arg4Setp: ast::Arg4Setp<ast::ParsedArgParams<'input>> = { + <dst1:ExtendedID> <dst2:OptionalDst?> "," <src1:Operand> "," <src2:Operand> => ast::Arg4Setp{<>} }; // TODO: pass src3 negation somewhere diff --git a/ptx/src/test/mod.rs b/ptx/src/test/mod.rs index d251884..0339141 100644 --- a/ptx/src/test/mod.rs +++ b/ptx/src/test/mod.rs @@ -40,3 +40,10 @@ fn _Z9vectorAddPKfS0_Pfi_ptx() -> Result<(), TranslateError> { let vector_add = include_str!("_Z9vectorAddPKfS0_Pfi.ptx"); compile_and_assert(vector_add) } + +#[test] +#[allow(non_snake_case)] +fn vectorAdd_11_ptx() -> Result<(), TranslateError> { + let vector_add = include_str!("vectorAdd_11.ptx"); + compile_and_assert(vector_add) +} diff --git a/ptx/src/test/spirv_run/mad_s32.ptx b/ptx/src/test/spirv_run/mad_s32.ptx new file mode 100644 index 0000000..a864266 --- /dev/null +++ b/ptx/src/test/spirv_run/mad_s32.ptx @@ -0,0 +1,28 @@ +.version 6.5 +.target sm_30 +.address_size 64 + +.visible .entry mad_s32( + .param .u64 input, + .param .u64 output +) +{ + .reg .u64 in_addr; + .reg .u64 out_addr; + .reg .s32 dst; + .reg .s32 src1; + .reg .s32 src2; + .reg .s32 src3; + + ld.param.u64 in_addr, [input]; + ld.param.u64 out_addr, [output]; + + ld.s32 src1, [in_addr]; + ld.s32 src2, [in_addr+4]; + ld.s32 src3, [in_addr+8]; + mad.lo.s32 dst, src1, src2, src3; + st.s32 [out_addr], dst; + st.s32 [out_addr+4], dst; + st.s32 [out_addr+8], dst; + ret; +} diff --git a/ptx/src/test/spirv_run/mad_s32.spvtxt b/ptx/src/test/spirv_run/mad_s32.spvtxt new file mode 100644 index 0000000..3a7153d --- /dev/null +++ b/ptx/src/test/spirv_run/mad_s32.spvtxt @@ -0,0 +1,77 @@ + OpCapability GenericPointer + OpCapability Linkage + OpCapability Addresses + OpCapability Kernel + OpCapability Int64 + OpCapability Int8 + OpCapability Float64 + %48 = OpExtInstImport "OpenCL.std" + OpMemoryModel Physical64 OpenCL + OpEntryPoint Kernel %1 "mad_s32" + %void = OpTypeVoid + %ulong = OpTypeInt 64 0 + %51 = OpTypeFunction %void %ulong %ulong +%_ptr_Function_ulong = OpTypePointer Function %ulong + %uint = OpTypeInt 32 0 +%_ptr_Function_uint = OpTypePointer Function %uint +%_ptr_Generic_uint = OpTypePointer Generic %uint + %ulong_4 = OpConstant %ulong 4 + %ulong_8 = OpConstant %ulong 8 + %ulong_4_0 = OpConstant %ulong 4 + %ulong_8_0 = OpConstant %ulong 8 + %1 = OpFunction %void None %51 + %10 = OpFunctionParameter %ulong + %11 = OpFunctionParameter %ulong + %46 = OpLabel + %2 = OpVariable %_ptr_Function_ulong Function + %3 = OpVariable %_ptr_Function_ulong Function + %4 = OpVariable %_ptr_Function_ulong Function + %5 = OpVariable %_ptr_Function_ulong Function + %6 = OpVariable %_ptr_Function_uint Function + %7 = OpVariable %_ptr_Function_uint Function + %8 = OpVariable %_ptr_Function_uint Function + %9 = OpVariable %_ptr_Function_uint Function + OpStore %2 %10 + OpStore %3 %11 + %13 = OpLoad %ulong %2 + %12 = OpCopyObject %ulong %13 + OpStore %4 %12 + %15 = OpLoad %ulong %3 + %14 = OpCopyObject %ulong %15 + OpStore %5 %14 + %17 = OpLoad %ulong %4 + %40 = OpConvertUToPtr %_ptr_Generic_uint %17 + %16 = OpLoad %uint %40 + OpStore %7 %16 + %19 = OpLoad %ulong %4 + %33 = OpIAdd %ulong %19 %ulong_4 + %41 = OpConvertUToPtr %_ptr_Generic_uint %33 + %18 = OpLoad %uint %41 + OpStore %8 %18 + %21 = OpLoad %ulong %4 + %35 = OpIAdd %ulong %21 %ulong_8 + %42 = OpConvertUToPtr %_ptr_Generic_uint %35 + %20 = OpLoad %uint %42 + OpStore %9 %20 + %23 = OpLoad %uint %7 + %24 = OpLoad %uint %8 + %25 = OpLoad %uint %9 + %56 = OpIMul %uint %23 %24 + %22 = OpIAdd %uint %25 %56 + OpStore %6 %22 + %26 = OpLoad %ulong %5 + %27 = OpLoad %uint %6 + %43 = OpConvertUToPtr %_ptr_Generic_uint %26 + OpStore %43 %27 + %28 = OpLoad %ulong %5 + %29 = OpLoad %uint %6 + %37 = OpIAdd %ulong %28 %ulong_4_0 + %44 = OpConvertUToPtr %_ptr_Generic_uint %37 + OpStore %44 %29 + %30 = OpLoad %ulong %5 + %31 = OpLoad %uint %6 + %39 = OpIAdd %ulong %30 %ulong_8_0 + %45 = OpConvertUToPtr %_ptr_Generic_uint %39 + OpStore %45 %31 + OpReturn + OpFunctionEnd diff --git a/ptx/src/test/spirv_run/mod.rs b/ptx/src/test/spirv_run/mod.rs index 78c3375..27dc063 100644 --- a/ptx/src/test/spirv_run/mod.rs +++ b/ptx/src/test/spirv_run/mod.rs @@ -8,7 +8,6 @@ use spirv_headers::Word; use spirv_tools_sys::{
spv_binary, spv_endianness_t, spv_parsed_instruction_t, spv_result_t, spv_target_env,
};
-use std::{collections::hash_map::Entry, cmp};
use std::error;
use std::ffi::{c_void, CStr, CString};
use std::fmt;
@@ -17,6 +16,7 @@ use std::hash::Hash; use std::mem;
use std::slice;
use std::{borrow::Cow, collections::HashMap, env, fs, path::PathBuf, ptr, str};
+use std::{cmp, collections::hash_map::Entry};
macro_rules! test_ptx {
($fn_name:ident, $input:expr, $output:expr) => {
@@ -65,6 +65,8 @@ test_ptx!(mov_address, [0xDEADu64], [0u64]); test_ptx!(b64tof64, [111u64], [111u64]);
test_ptx!(implicit_param, [34u32], [34u32]);
test_ptx!(pred_not, [10u64, 11u64], [2u64, 0u64]);
+test_ptx!(mad_s32, [2i32, 3i32, 4i32], [10i32, 10i32, 10i32]);
+test_ptx!(mul_wide, [0x01_00_00_00__01_00_00_00i64], [0x1_00_00_00_00_00_00i64]);
struct DisplayError<T: Debug> {
err: T,
@@ -93,7 +95,7 @@ fn test_ptx_assert<'a, T: From<u8> + ze::SafeRepr + Debug + Copy + PartialEq>( let mut errors = Vec::new();
let ast = ptx::ModuleParser::new().parse(&mut errors, ptx_text)?;
assert!(errors.len() == 0);
- let spirv = translate::to_spirv(ast)?;
+ let (spirv, _) = translate::to_spirv(ast)?;
let name = CString::new(name)?;
let result =
run_spirv(name.as_c_str(), &spirv, input, output).map_err(|err| DisplayError { err })?;
@@ -127,7 +129,7 @@ fn run_spirv<T: From<u8> + ze::SafeRepr + Copy + Debug>( kernel.set_indirect_access(
ze::sys::ze_kernel_indirect_access_flags_t::ZE_KERNEL_INDIRECT_ACCESS_FLAG_DEVICE,
)?;
- let mut inp_b = ze::DeviceBuffer::<T>::new(&mut ctx, &dev, cmp::max(input.len(),1))?;
+ let mut inp_b = ze::DeviceBuffer::<T>::new(&mut ctx, &dev, cmp::max(input.len(), 1))?;
let mut out_b = ze::DeviceBuffer::<T>::new(&mut ctx, &dev, cmp::max(output.len(), 1))?;
let inp_b_ptr_mut: ze::BufferPtrMut<T> = (&mut inp_b).into();
let event_pool = ze::EventPool::new(&mut ctx, 3, Some(&[&dev]))?;
@@ -157,7 +159,7 @@ fn test_spvtxt_assert<'a>( let mut errors = Vec::new();
let ast = ptx::ModuleParser::new().parse(&mut errors, ptx_txt)?;
assert!(errors.len() == 0);
- let ptx_mod = translate::to_spirv_module(ast)?;
+ let (ptx_mod, _) = translate::to_spirv_module(ast)?;
let spv_context =
unsafe { spirv_tools::spvContextCreate(spv_target_env::SPV_ENV_UNIVERSAL_1_3) };
assert!(spv_context != ptr::null_mut());
diff --git a/ptx/src/test/spirv_run/mul_wide.ptx b/ptx/src/test/spirv_run/mul_wide.ptx new file mode 100644 index 0000000..2d6f8a5 --- /dev/null +++ b/ptx/src/test/spirv_run/mul_wide.ptx @@ -0,0 +1,24 @@ +.version 6.5
+.target sm_30
+.address_size 64
+
+.visible .entry mul_wide(
+ .param .u64 input,
+ .param .u64 output
+)
+{
+ .reg .u64 in_addr;
+ .reg .u64 out_addr;
+ .reg .s32 inp1;
+ .reg .s32 inp2;
+ .reg .s64 result;
+
+ ld.param.u64 in_addr, [input];
+ ld.param.u64 out_addr, [output];
+
+ ld.global.s32 inp1, [in_addr];
+ ld.global.s32 inp2, [in_addr+4];
+ mul.wide.s32 result, inp1, inp2;
+ st.u64 [out_addr], result;
+ ret;
+}
diff --git a/ptx/src/test/spirv_run/mul_wide.spvtxt b/ptx/src/test/spirv_run/mul_wide.spvtxt new file mode 100644 index 0000000..274612c --- /dev/null +++ b/ptx/src/test/spirv_run/mul_wide.spvtxt @@ -0,0 +1,64 @@ + OpCapability GenericPointer + OpCapability Linkage + OpCapability Addresses + OpCapability Kernel + OpCapability Int64 + OpCapability Int8 + OpCapability Float64 + %32 = OpExtInstImport "OpenCL.std" + OpMemoryModel Physical64 OpenCL + OpEntryPoint Kernel %1 "mul_wide" + %void = OpTypeVoid + %ulong = OpTypeInt 64 0 + %35 = OpTypeFunction %void %ulong %ulong +%_ptr_Function_ulong = OpTypePointer Function %ulong + %uint = OpTypeInt 32 0 +%_ptr_Function_uint = OpTypePointer Function %uint +%_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint + %ulong_4 = OpConstant %ulong 4 + %_struct_40 = OpTypeStruct %uint %uint + %v2uint = OpTypeVector %uint 2 +%_ptr_Generic_ulong = OpTypePointer Generic %ulong + %1 = OpFunction %void None %35 + %9 = OpFunctionParameter %ulong + %10 = OpFunctionParameter %ulong + %30 = OpLabel + %2 = OpVariable %_ptr_Function_ulong Function + %3 = OpVariable %_ptr_Function_ulong Function + %4 = OpVariable %_ptr_Function_ulong Function + %5 = OpVariable %_ptr_Function_ulong Function + %6 = OpVariable %_ptr_Function_uint Function + %7 = OpVariable %_ptr_Function_uint Function + %8 = OpVariable %_ptr_Function_ulong Function + OpStore %2 %9 + OpStore %3 %10 + %12 = OpLoad %ulong %2 + %11 = OpCopyObject %ulong %12 + OpStore %4 %11 + %14 = OpLoad %ulong %3 + %13 = OpCopyObject %ulong %14 + OpStore %5 %13 + %16 = OpLoad %ulong %4 + %26 = OpConvertUToPtr %_ptr_CrossWorkgroup_uint %16 + %15 = OpLoad %uint %26 + OpStore %6 %15 + %18 = OpLoad %ulong %4 + %25 = OpIAdd %ulong %18 %ulong_4 + %27 = OpConvertUToPtr %_ptr_CrossWorkgroup_uint %25 + %17 = OpLoad %uint %27 + OpStore %7 %17 + %20 = OpLoad %uint %6 + %21 = OpLoad %uint %7 + %41 = OpSMulExtended %_struct_40 %20 %21 + %42 = OpCompositeExtract %uint %41 0 + %43 = OpCompositeExtract %uint %41 1 + %45 = OpCompositeConstruct %v2uint %42 %43 + %19 = OpBitcast %ulong %45 + OpStore %8 %19 + %22 = OpLoad %ulong %5 + %23 = OpLoad %ulong %8 + %28 = OpCopyObject %ulong %23 + %29 = OpConvertUToPtr %_ptr_Generic_ulong %22 + OpStore %29 %28 + OpReturn + OpFunctionEnd diff --git a/ptx/src/test/vectorAdd_11.ptx b/ptx/src/test/vectorAdd_11.ptx new file mode 100644 index 0000000..ba0381e --- /dev/null +++ b/ptx/src/test/vectorAdd_11.ptx @@ -0,0 +1,55 @@ + + + + + + + + +.version 7.0 +.target sm_80 +.address_size 64 + + + +.visible .entry _Z9vectorAddPKfS0_Pfi( +.param .u64 _Z9vectorAddPKfS0_Pfi_param_0, +.param .u64 _Z9vectorAddPKfS0_Pfi_param_1, +.param .u64 _Z9vectorAddPKfS0_Pfi_param_2, +.param .u32 _Z9vectorAddPKfS0_Pfi_param_3 +) +{ +.reg .pred %p<2>; +.reg .f32 %f<4>; +.reg .b32 %r<6>; +.reg .b64 %rd<11>; + + +ld.param.u64 %rd1, [_Z9vectorAddPKfS0_Pfi_param_0]; +ld.param.u64 %rd2, [_Z9vectorAddPKfS0_Pfi_param_1]; +ld.param.u64 %rd3, [_Z9vectorAddPKfS0_Pfi_param_2]; +ld.param.u32 %r2, [_Z9vectorAddPKfS0_Pfi_param_3]; +mov.u32 %r3, %ntid.x; +mov.u32 %r4, %ctaid.x; +mov.u32 %r5, %tid.x; +mad.lo.s32 %r1, %r4, %r3, %r5; +setp.ge.s32 %p1, %r1, %r2; +@%p1 bra BB0_2; + +cvta.to.global.u64 %rd4, %rd1; +mul.wide.s32 %rd5, %r1, 4; +add.s64 %rd6, %rd4, %rd5; +cvta.to.global.u64 %rd7, %rd2; +add.s64 %rd8, %rd7, %rd5; +ld.global.f32 %f1, [%rd8]; +ld.global.f32 %f2, [%rd6]; +add.f32 %f3, %f2, %f1; +cvta.to.global.u64 %rd9, %rd3; +add.s64 %rd10, %rd9, %rd5; +st.global.f32 [%rd10], %f3; + +BB0_2: +ret; +} + + diff --git a/ptx/src/translate.rs b/ptx/src/translate.rs index 5b03f0b..a1d4b6a 100644 --- a/ptx/src/translate.rs +++ b/ptx/src/translate.rs @@ -28,6 +28,7 @@ enum SpirvType { Array(SpirvScalarKey, u32),
Pointer(Box<SpirvType>, spirv::StorageClass),
Func(Option<Box<SpirvType>>, Vec<SpirvType>),
+ Struct(Vec<SpirvScalarKey>),
}
impl SpirvType {
@@ -174,6 +175,16 @@ impl TypeWordMap { .entry(t)
.or_insert_with(|| b.type_function(out_t, in_t))
}
+ SpirvType::Struct(ref underlying) => {
+ let underlying_ids = underlying
+ .iter()
+ .map(|t| self.get_or_add_spirv_scalar(b, *t))
+ .collect::<Vec<_>>();
+ *self
+ .complex
+ .entry(t)
+ .or_insert_with(|| b.type_struct(underlying_ids))
+ }
}
}
@@ -201,7 +212,9 @@ impl TypeWordMap { }
}
-pub fn to_spirv_module<'a>(ast: ast::Module<'a>) -> Result<dr::Module, TranslateError> {
+pub fn to_spirv_module<'a>(
+ ast: ast::Module<'a>,
+) -> Result<(dr::Module, HashMap<String, Vec<usize>>), TranslateError> {
let mut id_defs = GlobalStringIdResolver::new(1);
let ssa_functions = ast
.functions
@@ -218,17 +231,24 @@ pub fn to_spirv_module<'a>(ast: ast::Module<'a>) -> Result<dr::Module, Translate emit_memory_model(&mut builder);
let mut map = TypeWordMap::new(&mut builder);
emit_builtins(&mut builder, &mut map, &id_defs);
+ let mut args_len = HashMap::new();
for f in ssa_functions {
let f_body = match f.body {
Some(f) => f,
None => continue,
};
emit_function_body_ops(&mut builder, &mut map, opencl_id, &f.globals)?;
- emit_function_header(&mut builder, &mut map, &id_defs, f.func_directive)?;
+ emit_function_header(
+ &mut builder,
+ &mut map,
+ &id_defs,
+ f.func_directive,
+ &mut args_len,
+ )?;
emit_function_body_ops(&mut builder, &mut map, opencl_id, &f_body)?;
builder.end_function()?;
}
- Ok(builder.module())
+ Ok((builder.module(), args_len))
}
fn emit_builtins(
@@ -263,7 +283,12 @@ fn emit_function_header<'a>( map: &mut TypeWordMap,
global: &GlobalStringIdResolver<'a>,
func_directive: ast::MethodDecl<ExpandedArgParams>,
+ all_args_lens: &mut HashMap<String, Vec<usize>>,
) -> Result<(), TranslateError> {
+ if let ast::MethodDecl::Kernel(name, args) = &func_directive {
+ let args_lens = args.iter().map(|param| param.v_type.width()).collect();
+ all_args_lens.insert(name.to_string(), args_lens);
+ }
let (ret_type, func_type) = get_function_type(builder, map, &func_directive);
let fn_id = match func_directive {
ast::MethodDecl::Kernel(name, _) => {
@@ -297,9 +322,11 @@ fn emit_function_header<'a>( Ok(())
}
-pub fn to_spirv<'a>(ast: ast::Module<'a>) -> Result<Vec<u32>, TranslateError> {
- let module = to_spirv_module(ast)?;
- Ok(module.assemble())
+pub fn to_spirv<'a>(
+ ast: ast::Module<'a>,
+) -> Result<(Vec<u32>, HashMap<String, Vec<usize>>), TranslateError> {
+ let (module, all_args_lens) = to_spirv_module(ast)?;
+ Ok((module.assemble(), all_args_lens))
}
fn emit_capabilities(builder: &mut dr::Builder) {
@@ -905,7 +932,7 @@ impl<'a, 'b> ArgumentMapVisitor<NormalizedArgParams, ExpandedArgParams> ArgumentSemantics::PhysicalPointer => {
let scalar_t = ast::ScalarType::U64;
let id_constant_stmt = self.id_def.new_id(ast::Type::Scalar(scalar_t));
- let result_id = self.id_def.new_id(typ);
+ let result_id = self.id_def.new_id(ast::Type::Scalar(scalar_t));
self.func.push(Statement::Constant(ConstantDefinition {
dst: id_constant_stmt,
typ: scalar_t,
@@ -1314,8 +1341,8 @@ fn emit_function_body_ops( let type_pred = map.get_or_add_scalar(builder, ast::ScalarType::Pred);
let const_true = builder.constant_true(type_pred);
let const_false = builder.constant_false(type_pred);
- builder.select(result_type, result_id, operand, const_false, const_true)
- },
+ builder.select(result_type, result_id, operand, const_false, const_true)
+ }
_ => builder.not(result_type, result_id, operand),
}?;
}
@@ -1359,6 +1386,12 @@ fn emit_function_body_ops( builder.copy_object(result_type, Some(*dst), *src)?;
}
},
+ ast::Instruction::Mad(mad, arg) => match mad {
+ ast::MulDetails::Int(ref desc) => {
+ emit_mad_int(builder, map, opencl, desc, arg)?
+ }
+ ast::MulDetails::Float(desc) => emit_mad_float(builder, map, desc, arg)?,
+ },
},
Statement::LoadVar(arg, typ) => {
let type_id = map.get_or_add(builder, SpirvType::from(*typ));
@@ -1385,6 +1418,47 @@ fn emit_function_body_ops( Ok(())
}
+fn emit_mad_int(
+ builder: &mut dr::Builder,
+ map: &mut TypeWordMap,
+ opencl: spirv::Word,
+ desc: &ast::MulIntDesc,
+ arg: &ast::Arg4<ExpandedArgParams>,
+) -> Result<(), dr::Error> {
+ let inst_type = map.get_or_add(builder, SpirvType::from(ast::ScalarType::from(desc.typ)));
+ match desc.control {
+ ast::MulIntControl::Low => {
+ let mul_result = builder.i_mul(inst_type, None, arg.src1, arg.src2)?;
+ builder.i_add(inst_type, Some(arg.dst), arg.src3, mul_result)?;
+ }
+ ast::MulIntControl::High => {
+ let cl_op = if desc.typ.is_signed() {
+ spirv::CLOp::s_mad_hi
+ } else {
+ spirv::CLOp::u_mad_hi
+ };
+ builder.ext_inst(
+ inst_type,
+ Some(arg.dst),
+ opencl,
+ cl_op as spirv::Word,
+ [arg.src1, arg.src2, arg.src3],
+ )?;
+ }
+ ast::MulIntControl::Wide => todo!(),
+ };
+ Ok(())
+}
+
+fn emit_mad_float(
+ builder: &mut dr::Builder,
+ map: &mut TypeWordMap,
+ desc: &ast::MulFloatDesc,
+ arg: &ast::Arg4<ExpandedArgParams>,
+) -> Result<(), dr::Error> {
+ todo!()
+}
+
fn emit_add_float(
builder: &mut dr::Builder,
map: &mut TypeWordMap,
@@ -1529,7 +1603,7 @@ fn emit_setp( builder: &mut dr::Builder,
map: &mut TypeWordMap,
setp: &ast::SetpData,
- arg: &ast::Arg4<ExpandedArgParams>,
+ arg: &ast::Arg4Setp<ExpandedArgParams>,
) -> Result<(), dr::Error> {
if setp.flush_to_zero {
todo!()
@@ -1607,6 +1681,7 @@ fn emit_mul_int( desc: &ast::MulIntDesc,
arg: &ast::Arg3<ExpandedArgParams>,
) -> Result<(), dr::Error> {
+ let instruction_type = ast::ScalarType::from(desc.typ);
let inst_type = map.get_or_add(builder, SpirvType::from(ast::ScalarType::from(desc.typ)));
match desc.control {
ast::MulIntControl::Low => {
@@ -1626,11 +1701,53 @@ fn emit_mul_int( [arg.src1, arg.src2],
)?;
}
- ast::MulIntControl::Wide => todo!(),
+ ast::MulIntControl::Wide => {
+ let mul_ext_type = SpirvType::Struct(vec![
+ SpirvScalarKey::from(instruction_type),
+ SpirvScalarKey::from(instruction_type),
+ ]);
+ let mul_ext_type_id = map.get_or_add(builder, mul_ext_type);
+ let mul = if desc.typ.is_signed() {
+ builder.s_mul_extended(mul_ext_type_id, None, arg.src1, arg.src2)?
+ } else {
+ builder.u_mul_extended(mul_ext_type_id, None, arg.src1, arg.src2)?
+ };
+ let instr_width = instruction_type.width();
+ let instr_kind = instruction_type.kind();
+ let dst_type = ast::ScalarType::from_parts(instr_width * 2, instr_kind);
+ let dst_type_id = map.get_or_add_scalar(builder, dst_type);
+ struct2_bitcast_to_wide(
+ builder,
+ map,
+ SpirvScalarKey::from(instruction_type),
+ inst_type,
+ arg.dst,
+ dst_type_id,
+ mul,
+ )?;
+ }
}
Ok(())
}
+// Surprisingly, structs can't be bitcast, so we route everything through a vector
+fn struct2_bitcast_to_wide(
+ builder: &mut dr::Builder,
+ map: &mut TypeWordMap,
+ base_type_key: SpirvScalarKey,
+ instruction_type: spirv::Word,
+ dst: spirv::Word,
+ dst_type_id: spirv::Word,
+ src: spirv::Word,
+) -> Result<(), dr::Error> {
+ let low_bits = builder.composite_extract(instruction_type, None, src, [0])?;
+ let high_bits = builder.composite_extract(instruction_type, None, src, [1])?;
+ let vector_type = map.get_or_add(builder, SpirvType::Vector(base_type_key, 2));
+ let vector = builder.composite_construct(vector_type, None, [low_bits, high_bits])?;
+ builder.bitcast(dst_type_id, Some(dst), vector)?;
+ Ok(())
+}
+
fn emit_abs(
builder: &mut dr::Builder,
map: &mut TypeWordMap,
@@ -1844,8 +1961,8 @@ impl PtxSpecialRegister { fn get_builtin(self) -> spirv::BuiltIn {
match self {
- PtxSpecialRegister::Tid => spirv::BuiltIn::GlobalInvocationId,
- PtxSpecialRegister::Ntid => spirv::BuiltIn::GlobalSize,
+ PtxSpecialRegister::Tid => spirv::BuiltIn::LocalInvocationId,
+ PtxSpecialRegister::Ntid => spirv::BuiltIn::WorkgroupSize,
PtxSpecialRegister::Ctaid => spirv::BuiltIn::WorkgroupId,
PtxSpecialRegister::Nctaid => spirv::BuiltIn::NumWorkgroups,
}
@@ -2492,6 +2609,10 @@ impl<T: ArgParamsEx> ast::Instruction<T> { let inst_type = ast::Type::Scalar(ast::ScalarType::B64);
ast::Instruction::Cvta(d, a.map(visitor, false, inst_type)?)
}
+ ast::Instruction::Mad(d, a) => {
+ let inst_type = d.get_type();
+ ast::Instruction::Mad(d, a.map(visitor, inst_type)?)
+ }
})
}
}
@@ -2641,7 +2762,8 @@ impl ast::Instruction<ExpandedArgParams> { | ast::Instruction::St(_, _)
| ast::Instruction::Ret(_)
| ast::Instruction::Abs(_, _)
- | ast::Instruction::Call(_) => None,
+ | ast::Instruction::Call(_)
+ | ast::Instruction::Mad(_, _) => None,
}
}
}
@@ -2741,6 +2863,17 @@ impl<'a> ast::Instruction<ast::ParsedArgParams<'a>> { }
}
+impl ast::VariableParamType {
+ fn width(self) -> usize {
+ match self {
+ ast::VariableParamType::Scalar(t) => ast::ScalarType::from(t).width() as usize,
+ ast::VariableParamType::Array(t, len) => {
+ (ast::ScalarType::from(t).width() as usize) * (len as usize)
+ }
+ }
+ }
+}
+
impl<T: ArgParamsEx> ast::Arg1<T> {
fn map<U: ArgParamsEx, V: ArgumentMapVisitor<T, U>>(
self,
@@ -3042,6 +3175,53 @@ impl<T: ArgParamsEx> ast::Arg4<T> { visitor: &mut V,
t: ast::Type,
) -> Result<ast::Arg4<U>, TranslateError> {
+ let dst = visitor.variable(
+ ArgumentDescriptor {
+ op: self.dst,
+ is_dst: true,
+ sema: ArgumentSemantics::Default,
+ },
+ Some(t),
+ )?;
+ let src1 = visitor.operand(
+ ArgumentDescriptor {
+ op: self.src1,
+ is_dst: false,
+ sema: ArgumentSemantics::Default,
+ },
+ t,
+ )?;
+ let src2 = visitor.operand(
+ ArgumentDescriptor {
+ op: self.src2,
+ is_dst: false,
+ sema: ArgumentSemantics::Default,
+ },
+ t,
+ )?;
+ let src3 = visitor.operand(
+ ArgumentDescriptor {
+ op: self.src3,
+ is_dst: false,
+ sema: ArgumentSemantics::Default,
+ },
+ t,
+ )?;
+ Ok(ast::Arg4 {
+ dst,
+ src1,
+ src2,
+ src3,
+ })
+ }
+}
+
+impl<T: ArgParamsEx> ast::Arg4Setp<T> {
+ fn map<U: ArgParamsEx, V: ArgumentMapVisitor<T, U>>(
+ self,
+ visitor: &mut V,
+ t: ast::Type,
+ ) -> Result<ast::Arg4Setp<U>, TranslateError> {
let dst1 = visitor.variable(
ArgumentDescriptor {
op: self.dst1,
@@ -3079,7 +3259,7 @@ impl<T: ArgParamsEx> ast::Arg4<T> { },
t,
)?;
- Ok(ast::Arg4 {
+ Ok(ast::Arg4Setp {
dst1,
dst2,
src1,
|