summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorAndrzej Janik <[email protected]>2020-09-27 13:14:19 +0200
committerAndrzej Janik <[email protected]>2020-09-27 13:14:19 +0200
commite0190fcbe19e9554ccc2fb0d72685569823224ef (patch)
treec396a59b3080c0bdfbf308742e4f53caf48b5030
parent42bcd999eb2caec0046aa76d12ec7e73919495fc (diff)
downloadZLUDA-e0190fcbe19e9554ccc2fb0d72685569823224ef.tar.gz
ZLUDA-e0190fcbe19e9554ccc2fb0d72685569823224ef.zip
Add missing support for Milestone 1
-rw-r--r--level_zero/src/ze.rs5
-rw-r--r--notcuda/src/cuda.rs881
-rw-r--r--notcuda/src/impl/function.rs52
-rw-r--r--notcuda/src/impl/memory.rs4
-rw-r--r--notcuda/src/impl/mod.rs12
-rw-r--r--notcuda/src/impl/module.rs51
-rw-r--r--notcuda/src/impl/stream.rs69
-rw-r--r--notcuda/src/impl/test.rs14
-rw-r--r--ptx/src/ast.rs18
-rw-r--r--ptx/src/ptx.lalrpop18
-rw-r--r--ptx/src/test/mod.rs7
-rw-r--r--ptx/src/test/spirv_run/mad_s32.ptx28
-rw-r--r--ptx/src/test/spirv_run/mad_s32.spvtxt77
-rw-r--r--ptx/src/test/spirv_run/mod.rs10
-rw-r--r--ptx/src/test/spirv_run/mul_wide.ptx24
-rw-r--r--ptx/src/test/spirv_run/mul_wide.spvtxt64
-rw-r--r--ptx/src/test/vectorAdd_11.ptx55
-rw-r--r--ptx/src/translate.rs210
18 files changed, 982 insertions, 617 deletions
diff --git a/level_zero/src/ze.rs b/level_zero/src/ze.rs
index 16b9130..559805e 100644
--- a/level_zero/src/ze.rs
+++ b/level_zero/src/ze.rs
@@ -726,6 +726,11 @@ impl<'a> Kernel<'a> {
Ok(())
}
+ pub unsafe fn set_arg_raw(&self, index: u32, size: usize, value: *const c_void) -> Result<()> {
+ check!(sys::zeKernelSetArgumentValue(self.0, index, size, value));
+ Ok(())
+ }
+
pub fn set_group_size(&self, x: u32, y: u32, z: u32) -> Result<()> {
check!(sys::zeKernelSetGroupSize(self.0, x, y, z));
Ok(())
diff --git a/notcuda/src/cuda.rs b/notcuda/src/cuda.rs
index 122f0da..feeada0 100644
--- a/notcuda/src/cuda.rs
+++ b/notcuda/src/cuda.rs
@@ -2181,8 +2181,7 @@ impl CUgraphExecUpdateResult_enum {
pub struct CUgraphExecUpdateResult_enum(pub ::std::os::raw::c_uint);
pub use self::CUgraphExecUpdateResult_enum as CUgraphExecUpdateResult;
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuGetErrorString(
error: CUresult,
pStr: *mut *const ::std::os::raw::c_char,
@@ -2190,8 +2189,7 @@ pub extern "C" fn cuGetErrorString(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuGetErrorName(
error: CUresult,
pStr: *mut *const ::std::os::raw::c_char,
@@ -2199,33 +2197,28 @@ pub extern "C" fn cuGetErrorName(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuInit(Flags: ::std::os::raw::c_uint) -> CUresult {
r#impl::init().encuda()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuDriverGetVersion(driverVersion: *mut ::std::os::raw::c_int) -> CUresult {
unsafe { *driverVersion = r#impl::driver_get_version() };
CUresult::CUDA_SUCCESS
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuDeviceGet(device: *mut CUdevice, ordinal: ::std::os::raw::c_int) -> CUresult {
r#impl::device::get(device.decuda(), ordinal)
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuDeviceGetCount(count: *mut ::std::os::raw::c_int) -> CUresult {
r#impl::device::get_count(count)
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuDeviceGetName(
name: *mut ::std::os::raw::c_char,
len: ::std::os::raw::c_int,
@@ -2234,20 +2227,17 @@ pub extern "C" fn cuDeviceGetName(
r#impl::device::get_name(name, len, dev.decuda()).encuda()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuDeviceGetUuid(uuid: *mut CUuuid, dev: CUdevice) -> CUresult {
r#impl::device::get_uuid(uuid, dev.decuda()).encuda()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuDeviceTotalMem_v2(bytes: *mut usize, dev: CUdevice) -> CUresult {
r#impl::device::total_mem_v2(bytes, dev.decuda()).encuda()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuDeviceGetAttribute(
pi: *mut ::std::os::raw::c_int,
attrib: CUdevice_attribute,
@@ -2256,8 +2246,7 @@ pub extern "C" fn cuDeviceGetAttribute(
r#impl::device::get_attribute(pi, attrib, dev.decuda()).encuda()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuDeviceGetNvSciSyncAttributes(
nvSciSyncAttrList: *mut ::std::os::raw::c_void,
dev: CUdevice,
@@ -2266,14 +2255,12 @@ pub extern "C" fn cuDeviceGetNvSciSyncAttributes(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuDeviceGetProperties(prop: *mut CUdevprop, dev: CUdevice) -> CUresult {
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuDeviceComputeCapability(
major: *mut ::std::os::raw::c_int,
minor: *mut ::std::os::raw::c_int,
@@ -2282,20 +2269,17 @@ pub extern "C" fn cuDeviceComputeCapability(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuDevicePrimaryCtxRetain(pctx: *mut CUcontext, dev: CUdevice) -> CUresult {
r#impl::device::primary_ctx_retain(pctx.decuda(), dev.decuda()).encuda()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuDevicePrimaryCtxRelease_v2(dev: CUdevice) -> CUresult {
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuDevicePrimaryCtxSetFlags_v2(
dev: CUdevice,
flags: ::std::os::raw::c_uint,
@@ -2303,8 +2287,7 @@ pub extern "C" fn cuDevicePrimaryCtxSetFlags_v2(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuDevicePrimaryCtxGetState(
dev: CUdevice,
flags: *mut ::std::os::raw::c_uint,
@@ -2313,14 +2296,12 @@ pub extern "C" fn cuDevicePrimaryCtxGetState(
r#impl::device::primary_ctx_get_state(dev.decuda(), flags, active).encuda()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuDevicePrimaryCtxReset_v2(dev: CUdevice) -> CUresult {
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuCtxCreate_v2(
pctx: *mut CUcontext,
flags: ::std::os::raw::c_uint,
@@ -2329,92 +2310,77 @@ pub extern "C" fn cuCtxCreate_v2(
r#impl::context::create_v2(pctx.decuda(), flags, dev.decuda()).encuda()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuCtxDestroy_v2(ctx: CUcontext) -> CUresult {
r#impl::context::destroy_v2(ctx.decuda())
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuCtxPushCurrent_v2(ctx: CUcontext) -> CUresult {
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuCtxPopCurrent_v2(pctx: *mut CUcontext) -> CUresult {
r#impl::context::pop_current_v2(pctx.decuda())
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuCtxSetCurrent(ctx: CUcontext) -> CUresult {
r#impl::context::set_current(ctx.decuda())
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuCtxGetCurrent(pctx: *mut CUcontext) -> CUresult {
r#impl::context::get_current(pctx.decuda()).encuda()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuCtxGetDevice(device: *mut CUdevice) -> CUresult {
r#impl::context::get_device(device.decuda())
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuCtxGetFlags(flags: *mut ::std::os::raw::c_uint) -> CUresult {
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuCtxSynchronize() -> CUresult {
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuCtxSetLimit(limit: CUlimit, value: usize) -> CUresult {
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuCtxGetLimit(pvalue: *mut usize, limit: CUlimit) -> CUresult {
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuCtxGetCacheConfig(pconfig: *mut CUfunc_cache) -> CUresult {
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuCtxSetCacheConfig(config: CUfunc_cache) -> CUresult {
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuCtxGetSharedMemConfig(pConfig: *mut CUsharedconfig) -> CUresult {
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuCtxSetSharedMemConfig(config: CUsharedconfig) -> CUresult {
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuCtxGetApiVersion(
ctx: CUcontext,
version: *mut ::std::os::raw::c_uint,
@@ -2422,8 +2388,7 @@ pub extern "C" fn cuCtxGetApiVersion(
r#impl::context::get_api_version(ctx.decuda(), version)
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuCtxGetStreamPriorityRange(
leastPriority: *mut ::std::os::raw::c_int,
greatestPriority: *mut ::std::os::raw::c_int,
@@ -2431,26 +2396,22 @@ pub extern "C" fn cuCtxGetStreamPriorityRange(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuCtxResetPersistingL2Cache() -> CUresult {
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuCtxAttach(pctx: *mut CUcontext, flags: ::std::os::raw::c_uint) -> CUresult {
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuCtxDetach(ctx: CUcontext) -> CUresult {
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuModuleLoad(
module: *mut CUmodule,
fname: *const ::std::os::raw::c_char,
@@ -2458,8 +2419,7 @@ pub extern "C" fn cuModuleLoad(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuModuleLoadData(
module: *mut CUmodule,
image: *const ::std::os::raw::c_void,
@@ -2467,8 +2427,7 @@ pub extern "C" fn cuModuleLoadData(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuModuleLoadDataEx(
module: *mut CUmodule,
image: *const ::std::os::raw::c_void,
@@ -2479,8 +2438,7 @@ pub extern "C" fn cuModuleLoadDataEx(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuModuleLoadFatBinary(
module: *mut CUmodule,
fatCubin: *const ::std::os::raw::c_void,
@@ -2488,14 +2446,12 @@ pub extern "C" fn cuModuleLoadFatBinary(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuModuleUnload(hmod: CUmodule) -> CUresult {
- r#impl::unimplemented()
+ r#impl::module::unload(hmod.decuda()).encuda()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuModuleGetFunction(
hfunc: *mut CUfunction,
hmod: CUmodule,
@@ -2504,8 +2460,7 @@ pub extern "C" fn cuModuleGetFunction(
r#impl::module::get_function(hfunc.decuda(), hmod.decuda(), name).encuda()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuModuleGetGlobal_v2(
dptr: *mut CUdeviceptr,
bytes: *mut usize,
@@ -2515,8 +2470,7 @@ pub extern "C" fn cuModuleGetGlobal_v2(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuModuleGetTexRef(
pTexRef: *mut CUtexref,
hmod: CUmodule,
@@ -2525,8 +2479,7 @@ pub extern "C" fn cuModuleGetTexRef(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuModuleGetSurfRef(
pSurfRef: *mut CUsurfref,
hmod: CUmodule,
@@ -2535,8 +2488,7 @@ pub extern "C" fn cuModuleGetSurfRef(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuLinkCreate_v2(
numOptions: ::std::os::raw::c_uint,
options: *mut CUjit_option,
@@ -2546,8 +2498,7 @@ pub extern "C" fn cuLinkCreate_v2(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuLinkAddData_v2(
state: CUlinkState,
type_: CUjitInputType,
@@ -2561,8 +2512,7 @@ pub extern "C" fn cuLinkAddData_v2(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuLinkAddFile_v2(
state: CUlinkState,
type_: CUjitInputType,
@@ -2574,8 +2524,7 @@ pub extern "C" fn cuLinkAddFile_v2(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuLinkComplete(
state: CUlinkState,
cubinOut: *mut *mut ::std::os::raw::c_void,
@@ -2584,26 +2533,22 @@ pub extern "C" fn cuLinkComplete(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuLinkDestroy(state: CUlinkState) -> CUresult {
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuMemGetInfo_v2(free: *mut usize, total: *mut usize) -> CUresult {
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuMemAlloc_v2(dptr: *mut CUdeviceptr, bytesize: usize) -> CUresult {
r#impl::memory::alloc_v2(dptr.decuda(), bytesize)
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuMemAllocPitch_v2(
dptr: *mut CUdeviceptr,
pPitch: *mut usize,
@@ -2614,14 +2559,12 @@ pub extern "C" fn cuMemAllocPitch_v2(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuMemFree_v2(dptr: CUdeviceptr) -> CUresult {
- r#impl::unimplemented()
+ r#impl::memory::free_v2(dptr.decuda()).encuda()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuMemGetAddressRange_v2(
pbase: *mut CUdeviceptr,
psize: *mut usize,
@@ -2630,8 +2573,7 @@ pub extern "C" fn cuMemGetAddressRange_v2(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuMemAllocHost_v2(
pp: *mut *mut ::std::os::raw::c_void,
bytesize: usize,
@@ -2639,14 +2581,12 @@ pub extern "C" fn cuMemAllocHost_v2(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuMemFreeHost(p: *mut ::std::os::raw::c_void) -> CUresult {
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuMemHostAlloc(
pp: *mut *mut ::std::os::raw::c_void,
bytesize: usize,
@@ -2655,8 +2595,7 @@ pub extern "C" fn cuMemHostAlloc(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuMemHostGetDevicePointer_v2(
pdptr: *mut CUdeviceptr,
p: *mut ::std::os::raw::c_void,
@@ -2665,8 +2604,7 @@ pub extern "C" fn cuMemHostGetDevicePointer_v2(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuMemHostGetFlags(
pFlags: *mut ::std::os::raw::c_uint,
p: *mut ::std::os::raw::c_void,
@@ -2674,8 +2612,7 @@ pub extern "C" fn cuMemHostGetFlags(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuMemAllocManaged(
dptr: *mut CUdeviceptr,
bytesize: usize,
@@ -2684,8 +2621,7 @@ pub extern "C" fn cuMemAllocManaged(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuDeviceGetByPCIBusId(
dev: *mut CUdevice,
pciBusId: *const ::std::os::raw::c_char,
@@ -2693,8 +2629,7 @@ pub extern "C" fn cuDeviceGetByPCIBusId(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuDeviceGetPCIBusId(
pciBusId: *mut ::std::os::raw::c_char,
len: ::std::os::raw::c_int,
@@ -2703,14 +2638,12 @@ pub extern "C" fn cuDeviceGetPCIBusId(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuIpcGetEventHandle(pHandle: *mut CUipcEventHandle, event: CUevent) -> CUresult {
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuIpcOpenEventHandle(
phEvent: *mut CUevent,
handle: CUipcEventHandle,
@@ -2718,14 +2651,12 @@ pub extern "C" fn cuIpcOpenEventHandle(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuIpcGetMemHandle(pHandle: *mut CUipcMemHandle, dptr: CUdeviceptr) -> CUresult {
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuIpcOpenMemHandle(
pdptr: *mut CUdeviceptr,
handle: CUipcMemHandle,
@@ -2734,14 +2665,12 @@ pub extern "C" fn cuIpcOpenMemHandle(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuIpcCloseMemHandle(dptr: CUdeviceptr) -> CUresult {
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuMemHostRegister_v2(
p: *mut ::std::os::raw::c_void,
bytesize: usize,
@@ -2750,20 +2679,17 @@ pub extern "C" fn cuMemHostRegister_v2(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuMemHostUnregister(p: *mut ::std::os::raw::c_void) -> CUresult {
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuMemcpy(dst: CUdeviceptr, src: CUdeviceptr, ByteCount: usize) -> CUresult {
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuMemcpyPeer(
dstDevice: CUdeviceptr,
dstContext: CUcontext,
@@ -2774,8 +2700,7 @@ pub extern "C" fn cuMemcpyPeer(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuMemcpyHtoD_v2(
dstDevice: CUdeviceptr,
srcHost: *const ::std::os::raw::c_void,
@@ -2784,8 +2709,7 @@ pub extern "C" fn cuMemcpyHtoD_v2(
r#impl::memory::copy_v2(dstDevice.decuda(), srcHost, ByteCount).encuda()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuMemcpyDtoH_v2(
dstHost: *mut ::std::os::raw::c_void,
srcDevice: CUdeviceptr,
@@ -2794,8 +2718,7 @@ pub extern "C" fn cuMemcpyDtoH_v2(
r#impl::memory::copy_v2(dstHost, srcDevice.decuda(), ByteCount).encuda()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuMemcpyDtoD_v2(
dstDevice: CUdeviceptr,
srcDevice: CUdeviceptr,
@@ -2804,8 +2727,7 @@ pub extern "C" fn cuMemcpyDtoD_v2(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuMemcpyDtoA_v2(
dstArray: CUarray,
dstOffset: usize,
@@ -2815,8 +2737,7 @@ pub extern "C" fn cuMemcpyDtoA_v2(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuMemcpyAtoD_v2(
dstDevice: CUdeviceptr,
srcArray: CUarray,
@@ -2826,8 +2747,7 @@ pub extern "C" fn cuMemcpyAtoD_v2(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuMemcpyHtoA_v2(
dstArray: CUarray,
dstOffset: usize,
@@ -2837,8 +2757,7 @@ pub extern "C" fn cuMemcpyHtoA_v2(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuMemcpyAtoH_v2(
dstHost: *mut ::std::os::raw::c_void,
srcArray: CUarray,
@@ -2848,8 +2767,7 @@ pub extern "C" fn cuMemcpyAtoH_v2(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuMemcpyAtoA_v2(
dstArray: CUarray,
dstOffset: usize,
@@ -2860,32 +2778,27 @@ pub extern "C" fn cuMemcpyAtoA_v2(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuMemcpy2D_v2(pCopy: *const CUDA_MEMCPY2D) -> CUresult {
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuMemcpy2DUnaligned_v2(pCopy: *const CUDA_MEMCPY2D) -> CUresult {
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuMemcpy3D_v2(pCopy: *const CUDA_MEMCPY3D) -> CUresult {
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuMemcpy3DPeer(pCopy: *const CUDA_MEMCPY3D_PEER) -> CUresult {
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuMemcpyAsync(
dst: CUdeviceptr,
src: CUdeviceptr,
@@ -2895,8 +2808,7 @@ pub extern "C" fn cuMemcpyAsync(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuMemcpyPeerAsync(
dstDevice: CUdeviceptr,
dstContext: CUcontext,
@@ -2908,8 +2820,7 @@ pub extern "C" fn cuMemcpyPeerAsync(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuMemcpyHtoDAsync_v2(
dstDevice: CUdeviceptr,
srcHost: *const ::std::os::raw::c_void,
@@ -2919,8 +2830,7 @@ pub extern "C" fn cuMemcpyHtoDAsync_v2(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuMemcpyDtoHAsync_v2(
dstHost: *mut ::std::os::raw::c_void,
srcDevice: CUdeviceptr,
@@ -2930,8 +2840,7 @@ pub extern "C" fn cuMemcpyDtoHAsync_v2(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuMemcpyDtoDAsync_v2(
dstDevice: CUdeviceptr,
srcDevice: CUdeviceptr,
@@ -2941,8 +2850,7 @@ pub extern "C" fn cuMemcpyDtoDAsync_v2(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuMemcpyHtoAAsync_v2(
dstArray: CUarray,
dstOffset: usize,
@@ -2953,8 +2861,7 @@ pub extern "C" fn cuMemcpyHtoAAsync_v2(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuMemcpyAtoHAsync_v2(
dstHost: *mut ::std::os::raw::c_void,
srcArray: CUarray,
@@ -2965,20 +2872,17 @@ pub extern "C" fn cuMemcpyAtoHAsync_v2(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuMemcpy2DAsync_v2(pCopy: *const CUDA_MEMCPY2D, hStream: CUstream) -> CUresult {
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuMemcpy3DAsync_v2(pCopy: *const CUDA_MEMCPY3D, hStream: CUstream) -> CUresult {
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuMemcpy3DPeerAsync(
pCopy: *const CUDA_MEMCPY3D_PEER,
hStream: CUstream,
@@ -2986,8 +2890,7 @@ pub extern "C" fn cuMemcpy3DPeerAsync(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuMemsetD8_v2(
dstDevice: CUdeviceptr,
uc: ::std::os::raw::c_uchar,
@@ -2996,8 +2899,7 @@ pub extern "C" fn cuMemsetD8_v2(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuMemsetD16_v2(
dstDevice: CUdeviceptr,
us: ::std::os::raw::c_ushort,
@@ -3006,8 +2908,7 @@ pub extern "C" fn cuMemsetD16_v2(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuMemsetD32_v2(
dstDevice: CUdeviceptr,
ui: ::std::os::raw::c_uint,
@@ -3016,8 +2917,7 @@ pub extern "C" fn cuMemsetD32_v2(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuMemsetD2D8_v2(
dstDevice: CUdeviceptr,
dstPitch: usize,
@@ -3028,8 +2928,7 @@ pub extern "C" fn cuMemsetD2D8_v2(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuMemsetD2D16_v2(
dstDevice: CUdeviceptr,
dstPitch: usize,
@@ -3040,8 +2939,7 @@ pub extern "C" fn cuMemsetD2D16_v2(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuMemsetD2D32_v2(
dstDevice: CUdeviceptr,
dstPitch: usize,
@@ -3052,8 +2950,7 @@ pub extern "C" fn cuMemsetD2D32_v2(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuMemsetD8Async(
dstDevice: CUdeviceptr,
uc: ::std::os::raw::c_uchar,
@@ -3063,8 +2960,7 @@ pub extern "C" fn cuMemsetD8Async(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuMemsetD16Async(
dstDevice: CUdeviceptr,
us: ::std::os::raw::c_ushort,
@@ -3074,8 +2970,7 @@ pub extern "C" fn cuMemsetD16Async(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuMemsetD32Async(
dstDevice: CUdeviceptr,
ui: ::std::os::raw::c_uint,
@@ -3085,8 +2980,7 @@ pub extern "C" fn cuMemsetD32Async(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuMemsetD2D8Async(
dstDevice: CUdeviceptr,
dstPitch: usize,
@@ -3098,8 +2992,7 @@ pub extern "C" fn cuMemsetD2D8Async(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuMemsetD2D16Async(
dstDevice: CUdeviceptr,
dstPitch: usize,
@@ -3111,8 +3004,7 @@ pub extern "C" fn cuMemsetD2D16Async(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuMemsetD2D32Async(
dstDevice: CUdeviceptr,
dstPitch: usize,
@@ -3124,8 +3016,7 @@ pub extern "C" fn cuMemsetD2D32Async(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuArrayCreate_v2(
pHandle: *mut CUarray,
pAllocateArray: *const CUDA_ARRAY_DESCRIPTOR,
@@ -3133,8 +3024,7 @@ pub extern "C" fn cuArrayCreate_v2(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuArrayGetDescriptor_v2(
pArrayDescriptor: *mut CUDA_ARRAY_DESCRIPTOR,
hArray: CUarray,
@@ -3142,14 +3032,12 @@ pub extern "C" fn cuArrayGetDescriptor_v2(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuArrayDestroy(hArray: CUarray) -> CUresult {
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuArray3DCreate_v2(
pHandle: *mut CUarray,
pAllocateArray: *const CUDA_ARRAY3D_DESCRIPTOR,
@@ -3157,8 +3045,7 @@ pub extern "C" fn cuArray3DCreate_v2(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuArray3DGetDescriptor_v2(
pArrayDescriptor: *mut CUDA_ARRAY3D_DESCRIPTOR,
hArray: CUarray,
@@ -3166,8 +3053,7 @@ pub extern "C" fn cuArray3DGetDescriptor_v2(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuMipmappedArrayCreate(
pHandle: *mut CUmipmappedArray,
pMipmappedArrayDesc: *const CUDA_ARRAY3D_DESCRIPTOR,
@@ -3176,8 +3062,7 @@ pub extern "C" fn cuMipmappedArrayCreate(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuMipmappedArrayGetLevel(
pLevelArray: *mut CUarray,
hMipmappedArray: CUmipmappedArray,
@@ -3186,14 +3071,12 @@ pub extern "C" fn cuMipmappedArrayGetLevel(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuMipmappedArrayDestroy(hMipmappedArray: CUmipmappedArray) -> CUresult {
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuMemAddressReserve(
ptr: *mut CUdeviceptr,
size: usize,
@@ -3204,14 +3087,12 @@ pub extern "C" fn cuMemAddressReserve(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuMemAddressFree(ptr: CUdeviceptr, size: usize) -> CUresult {
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuMemCreate(
handle: *mut CUmemGenericAllocationHandle,
size: usize,
@@ -3221,14 +3102,12 @@ pub extern "C" fn cuMemCreate(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuMemRelease(handle: CUmemGenericAllocationHandle) -> CUresult {
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuMemMap(
ptr: CUdeviceptr,
size: usize,
@@ -3239,14 +3118,12 @@ pub extern "C" fn cuMemMap(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuMemUnmap(ptr: CUdeviceptr, size: usize) -> CUresult {
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuMemSetAccess(
ptr: CUdeviceptr,
size: usize,
@@ -3256,8 +3133,7 @@ pub extern "C" fn cuMemSetAccess(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuMemGetAccess(
flags: *mut ::std::os::raw::c_ulonglong,
location: *const CUmemLocation,
@@ -3266,8 +3142,7 @@ pub extern "C" fn cuMemGetAccess(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuMemExportToShareableHandle(
shareableHandle: *mut ::std::os::raw::c_void,
handle: CUmemGenericAllocationHandle,
@@ -3277,8 +3152,7 @@ pub extern "C" fn cuMemExportToShareableHandle(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuMemImportFromShareableHandle(
handle: *mut CUmemGenericAllocationHandle,
osHandle: *mut ::std::os::raw::c_void,
@@ -3287,8 +3161,7 @@ pub extern "C" fn cuMemImportFromShareableHandle(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuMemGetAllocationGranularity(
granularity: *mut usize,
prop: *const CUmemAllocationProp,
@@ -3297,8 +3170,7 @@ pub extern "C" fn cuMemGetAllocationGranularity(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuMemGetAllocationPropertiesFromHandle(
prop: *mut CUmemAllocationProp,
handle: CUmemGenericAllocationHandle,
@@ -3306,8 +3178,7 @@ pub extern "C" fn cuMemGetAllocationPropertiesFromHandle(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuMemRetainAllocationHandle(
handle: *mut CUmemGenericAllocationHandle,
addr: *mut ::std::os::raw::c_void,
@@ -3315,8 +3186,7 @@ pub extern "C" fn cuMemRetainAllocationHandle(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuPointerGetAttribute(
data: *mut ::std::os::raw::c_void,
attribute: CUpointer_attribute,
@@ -3325,8 +3195,7 @@ pub extern "C" fn cuPointerGetAttribute(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuMemPrefetchAsync(
devPtr: CUdeviceptr,
count: usize,
@@ -3336,8 +3205,7 @@ pub extern "C" fn cuMemPrefetchAsync(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuMemAdvise(
devPtr: CUdeviceptr,
count: usize,
@@ -3347,8 +3215,7 @@ pub extern "C" fn cuMemAdvise(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuMemRangeGetAttribute(
data: *mut ::std::os::raw::c_void,
dataSize: usize,
@@ -3359,8 +3226,7 @@ pub extern "C" fn cuMemRangeGetAttribute(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuMemRangeGetAttributes(
data: *mut *mut ::std::os::raw::c_void,
dataSizes: *mut usize,
@@ -3372,8 +3238,7 @@ pub extern "C" fn cuMemRangeGetAttributes(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuPointerSetAttribute(
value: *const ::std::os::raw::c_void,
attribute: CUpointer_attribute,
@@ -3382,8 +3247,7 @@ pub extern "C" fn cuPointerSetAttribute(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuPointerGetAttributes(
numAttributes: ::std::os::raw::c_uint,
attributes: *mut CUpointer_attribute,
@@ -3393,8 +3257,7 @@ pub extern "C" fn cuPointerGetAttributes(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuStreamCreate(
phStream: *mut CUstream,
Flags: ::std::os::raw::c_uint,
@@ -3402,8 +3265,7 @@ pub extern "C" fn cuStreamCreate(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuStreamCreateWithPriority(
phStream: *mut CUstream,
flags: ::std::os::raw::c_uint,
@@ -3412,8 +3274,7 @@ pub extern "C" fn cuStreamCreateWithPriority(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuStreamGetPriority(
hStream: CUstream,
priority: *mut ::std::os::raw::c_int,
@@ -3421,8 +3282,7 @@ pub extern "C" fn cuStreamGetPriority(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuStreamGetFlags(
hStream: CUstream,
flags: *mut ::std::os::raw::c_uint,
@@ -3430,14 +3290,12 @@ pub extern "C" fn cuStreamGetFlags(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuStreamGetCtx(hStream: CUstream, pctx: *mut CUcontext) -> CUresult {
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuStreamWaitEvent(
hStream: CUstream,
hEvent: CUevent,
@@ -3446,8 +3304,7 @@ pub extern "C" fn cuStreamWaitEvent(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuStreamAddCallback(
hStream: CUstream,
callback: CUstreamCallback,
@@ -3457,8 +3314,7 @@ pub extern "C" fn cuStreamAddCallback(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuStreamBeginCapture_v2(
hStream: CUstream,
mode: CUstreamCaptureMode,
@@ -3466,20 +3322,17 @@ pub extern "C" fn cuStreamBeginCapture_v2(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuThreadExchangeStreamCaptureMode(mode: *mut CUstreamCaptureMode) -> CUresult {
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuStreamEndCapture(hStream: CUstream, phGraph: *mut CUgraph) -> CUresult {
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuStreamIsCapturing(
hStream: CUstream,
captureStatus: *mut CUstreamCaptureStatus,
@@ -3487,8 +3340,7 @@ pub extern "C" fn cuStreamIsCapturing(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuStreamGetCaptureInfo(
hStream: CUstream,
captureStatus: *mut CUstreamCaptureStatus,
@@ -3497,8 +3349,7 @@ pub extern "C" fn cuStreamGetCaptureInfo(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuStreamAttachMemAsync(
hStream: CUstream,
dptr: CUdeviceptr,
@@ -3508,32 +3359,27 @@ pub extern "C" fn cuStreamAttachMemAsync(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuStreamQuery(hStream: CUstream) -> CUresult {
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuStreamSynchronize(hStream: CUstream) -> CUresult {
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuStreamDestroy_v2(hStream: CUstream) -> CUresult {
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuStreamCopyAttributes(dst: CUstream, src: CUstream) -> CUresult {
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuStreamGetAttribute(
hStream: CUstream,
attr: CUstreamAttrID,
@@ -3542,8 +3388,7 @@ pub extern "C" fn cuStreamGetAttribute(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuStreamSetAttribute(
hStream: CUstream,
attr: CUstreamAttrID,
@@ -3552,38 +3397,32 @@ pub extern "C" fn cuStreamSetAttribute(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuEventCreate(phEvent: *mut CUevent, Flags: ::std::os::raw::c_uint) -> CUresult {
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuEventRecord(hEvent: CUevent, hStream: CUstream) -> CUresult {
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuEventQuery(hEvent: CUevent) -> CUresult {
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuEventSynchronize(hEvent: CUevent) -> CUresult {
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuEventDestroy_v2(hEvent: CUevent) -> CUresult {
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuEventElapsedTime(
pMilliseconds: *mut f32,
hStart: CUevent,
@@ -3592,8 +3431,7 @@ pub extern "C" fn cuEventElapsedTime(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuImportExternalMemory(
extMem_out: *mut CUexternalMemory,
memHandleDesc: *const CUDA_EXTERNAL_MEMORY_HANDLE_DESC,
@@ -3601,8 +3439,7 @@ pub extern "C" fn cuImportExternalMemory(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuExternalMemoryGetMappedBuffer(
devPtr: *mut CUdeviceptr,
extMem: CUexternalMemory,
@@ -3611,8 +3448,7 @@ pub extern "C" fn cuExternalMemoryGetMappedBuffer(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuExternalMemoryGetMappedMipmappedArray(
mipmap: *mut CUmipmappedArray,
extMem: CUexternalMemory,
@@ -3621,14 +3457,12 @@ pub extern "C" fn cuExternalMemoryGetMappedMipmappedArray(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuDestroyExternalMemory(extMem: CUexternalMemory) -> CUresult {
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuImportExternalSemaphore(
extSem_out: *mut CUexternalSemaphore,
semHandleDesc: *const CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC,
@@ -3636,8 +3470,7 @@ pub extern "C" fn cuImportExternalSemaphore(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuSignalExternalSemaphoresAsync(
extSemArray: *const CUexternalSemaphore,
paramsArray: *const CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS,
@@ -3647,8 +3480,7 @@ pub extern "C" fn cuSignalExternalSemaphoresAsync(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuWaitExternalSemaphoresAsync(
extSemArray: *const CUexternalSemaphore,
paramsArray: *const CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS,
@@ -3658,14 +3490,12 @@ pub extern "C" fn cuWaitExternalSemaphoresAsync(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuDestroyExternalSemaphore(extSem: CUexternalSemaphore) -> CUresult {
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuStreamWaitValue32(
stream: CUstream,
addr: CUdeviceptr,
@@ -3675,8 +3505,7 @@ pub extern "C" fn cuStreamWaitValue32(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuStreamWaitValue64(
stream: CUstream,
addr: CUdeviceptr,
@@ -3686,8 +3515,7 @@ pub extern "C" fn cuStreamWaitValue64(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuStreamWriteValue32(
stream: CUstream,
addr: CUdeviceptr,
@@ -3697,8 +3525,7 @@ pub extern "C" fn cuStreamWriteValue32(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuStreamWriteValue64(
stream: CUstream,
addr: CUdeviceptr,
@@ -3708,8 +3535,7 @@ pub extern "C" fn cuStreamWriteValue64(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuStreamBatchMemOp(
stream: CUstream,
count: ::std::os::raw::c_uint,
@@ -3719,8 +3545,7 @@ pub extern "C" fn cuStreamBatchMemOp(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuFuncGetAttribute(
pi: *mut ::std::os::raw::c_int,
attrib: CUfunction_attribute,
@@ -3729,8 +3554,7 @@ pub extern "C" fn cuFuncGetAttribute(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuFuncSetAttribute(
hfunc: CUfunction,
attrib: CUfunction_attribute,
@@ -3739,20 +3563,17 @@ pub extern "C" fn cuFuncSetAttribute(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuFuncSetCacheConfig(hfunc: CUfunction, config: CUfunc_cache) -> CUresult {
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuFuncSetSharedMemConfig(hfunc: CUfunction, config: CUsharedconfig) -> CUresult {
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuLaunchKernel(
f: CUfunction,
gridDimX: ::std::os::raw::c_uint,
@@ -3766,11 +3587,23 @@ pub extern "C" fn cuLaunchKernel(
kernelParams: *mut *mut ::std::os::raw::c_void,
extra: *mut *mut ::std::os::raw::c_void,
) -> CUresult {
- r#impl::unimplemented()
-}
-
-#[no_mangle]
-#[cfg(not(test))]
+ r#impl::function::launch_kernel(
+ f.decuda(),
+ gridDimX,
+ gridDimY,
+ gridDimZ,
+ blockDimX,
+ blockDimY,
+ blockDimZ,
+ sharedMemBytes,
+ hStream.decuda(),
+ kernelParams,
+ extra,
+ )
+ .encuda()
+}
+
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuLaunchCooperativeKernel(
f: CUfunction,
gridDimX: ::std::os::raw::c_uint,
@@ -3786,8 +3619,7 @@ pub extern "C" fn cuLaunchCooperativeKernel(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuLaunchCooperativeKernelMultiDevice(
launchParamsList: *mut CUDA_LAUNCH_PARAMS,
numDevices: ::std::os::raw::c_uint,
@@ -3796,8 +3628,7 @@ pub extern "C" fn cuLaunchCooperativeKernelMultiDevice(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuLaunchHostFunc(
hStream: CUstream,
fn_: CUhostFn,
@@ -3806,8 +3637,7 @@ pub extern "C" fn cuLaunchHostFunc(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuFuncSetBlockShape(
hfunc: CUfunction,
x: ::std::os::raw::c_int,
@@ -3817,8 +3647,7 @@ pub extern "C" fn cuFuncSetBlockShape(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuFuncSetSharedSize(
hfunc: CUfunction,
bytes: ::std::os::raw::c_uint,
@@ -3826,14 +3655,12 @@ pub extern "C" fn cuFuncSetSharedSize(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuParamSetSize(hfunc: CUfunction, numbytes: ::std::os::raw::c_uint) -> CUresult {
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuParamSeti(
hfunc: CUfunction,
offset: ::std::os::raw::c_int,
@@ -3842,8 +3669,7 @@ pub extern "C" fn cuParamSeti(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuParamSetf(
hfunc: CUfunction,
offset: ::std::os::raw::c_int,
@@ -3852,8 +3678,7 @@ pub extern "C" fn cuParamSetf(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuParamSetv(
hfunc: CUfunction,
offset: ::std::os::raw::c_int,
@@ -3863,14 +3688,12 @@ pub extern "C" fn cuParamSetv(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuLaunch(f: CUfunction) -> CUresult {
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuLaunchGrid(
f: CUfunction,
grid_width: ::std::os::raw::c_int,
@@ -3879,8 +3702,7 @@ pub extern "C" fn cuLaunchGrid(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuLaunchGridAsync(
f: CUfunction,
grid_width: ::std::os::raw::c_int,
@@ -3890,8 +3712,7 @@ pub extern "C" fn cuLaunchGridAsync(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuParamSetTexRef(
hfunc: CUfunction,
texunit: ::std::os::raw::c_int,
@@ -3900,14 +3721,12 @@ pub extern "C" fn cuParamSetTexRef(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuGraphCreate(phGraph: *mut CUgraph, flags: ::std::os::raw::c_uint) -> CUresult {
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuGraphAddKernelNode(
phGraphNode: *mut CUgraphNode,
hGraph: CUgraph,
@@ -3918,8 +3737,7 @@ pub extern "C" fn cuGraphAddKernelNode(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuGraphKernelNodeGetParams(
hNode: CUgraphNode,
nodeParams: *mut CUDA_KERNEL_NODE_PARAMS,
@@ -3927,8 +3745,7 @@ pub extern "C" fn cuGraphKernelNodeGetParams(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuGraphKernelNodeSetParams(
hNode: CUgraphNode,
nodeParams: *const CUDA_KERNEL_NODE_PARAMS,
@@ -3936,8 +3753,7 @@ pub extern "C" fn cuGraphKernelNodeSetParams(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuGraphAddMemcpyNode(
phGraphNode: *mut CUgraphNode,
hGraph: CUgraph,
@@ -3949,8 +3765,7 @@ pub extern "C" fn cuGraphAddMemcpyNode(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuGraphMemcpyNodeGetParams(
hNode: CUgraphNode,
nodeParams: *mut CUDA_MEMCPY3D,
@@ -3958,8 +3773,7 @@ pub extern "C" fn cuGraphMemcpyNodeGetParams(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuGraphMemcpyNodeSetParams(
hNode: CUgraphNode,
nodeParams: *const CUDA_MEMCPY3D,
@@ -3967,8 +3781,7 @@ pub extern "C" fn cuGraphMemcpyNodeSetParams(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuGraphAddMemsetNode(
phGraphNode: *mut CUgraphNode,
hGraph: CUgraph,
@@ -3980,8 +3793,7 @@ pub extern "C" fn cuGraphAddMemsetNode(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuGraphMemsetNodeGetParams(
hNode: CUgraphNode,
nodeParams: *mut CUDA_MEMSET_NODE_PARAMS,
@@ -3989,8 +3801,7 @@ pub extern "C" fn cuGraphMemsetNodeGetParams(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuGraphMemsetNodeSetParams(
hNode: CUgraphNode,
nodeParams: *const CUDA_MEMSET_NODE_PARAMS,
@@ -3998,8 +3809,7 @@ pub extern "C" fn cuGraphMemsetNodeSetParams(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuGraphAddHostNode(
phGraphNode: *mut CUgraphNode,
hGraph: CUgraph,
@@ -4010,8 +3820,7 @@ pub extern "C" fn cuGraphAddHostNode(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuGraphHostNodeGetParams(
hNode: CUgraphNode,
nodeParams: *mut CUDA_HOST_NODE_PARAMS,
@@ -4019,8 +3828,7 @@ pub extern "C" fn cuGraphHostNodeGetParams(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuGraphHostNodeSetParams(
hNode: CUgraphNode,
nodeParams: *const CUDA_HOST_NODE_PARAMS,
@@ -4028,8 +3836,7 @@ pub extern "C" fn cuGraphHostNodeSetParams(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuGraphAddChildGraphNode(
phGraphNode: *mut CUgraphNode,
hGraph: CUgraph,
@@ -4040,8 +3847,7 @@ pub extern "C" fn cuGraphAddChildGraphNode(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuGraphChildGraphNodeGetGraph(
hNode: CUgraphNode,
phGraph: *mut CUgraph,
@@ -4049,8 +3855,7 @@ pub extern "C" fn cuGraphChildGraphNodeGetGraph(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuGraphAddEmptyNode(
phGraphNode: *mut CUgraphNode,
hGraph: CUgraph,
@@ -4060,14 +3865,12 @@ pub extern "C" fn cuGraphAddEmptyNode(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuGraphClone(phGraphClone: *mut CUgraph, originalGraph: CUgraph) -> CUresult {
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuGraphNodeFindInClone(
phNode: *mut CUgraphNode,
hOriginalNode: CUgraphNode,
@@ -4076,14 +3879,12 @@ pub extern "C" fn cuGraphNodeFindInClone(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuGraphNodeGetType(hNode: CUgraphNode, type_: *mut CUgraphNodeType) -> CUresult {
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuGraphGetNodes(
hGraph: CUgraph,
nodes: *mut CUgraphNode,
@@ -4092,8 +3893,7 @@ pub extern "C" fn cuGraphGetNodes(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuGraphGetRootNodes(
hGraph: CUgraph,
rootNodes: *mut CUgraphNode,
@@ -4102,8 +3902,7 @@ pub extern "C" fn cuGraphGetRootNodes(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuGraphGetEdges(
hGraph: CUgraph,
from: *mut CUgraphNode,
@@ -4113,8 +3912,7 @@ pub extern "C" fn cuGraphGetEdges(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuGraphNodeGetDependencies(
hNode: CUgraphNode,
dependencies: *mut CUgraphNode,
@@ -4123,8 +3921,7 @@ pub extern "C" fn cuGraphNodeGetDependencies(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuGraphNodeGetDependentNodes(
hNode: CUgraphNode,
dependentNodes: *mut CUgraphNode,
@@ -4133,8 +3930,7 @@ pub extern "C" fn cuGraphNodeGetDependentNodes(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuGraphAddDependencies(
hGraph: CUgraph,
from: *const CUgraphNode,
@@ -4144,8 +3940,7 @@ pub extern "C" fn cuGraphAddDependencies(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuGraphRemoveDependencies(
hGraph: CUgraph,
from: *const CUgraphNode,
@@ -4155,14 +3950,12 @@ pub extern "C" fn cuGraphRemoveDependencies(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuGraphDestroyNode(hNode: CUgraphNode) -> CUresult {
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuGraphInstantiate_v2(
phGraphExec: *mut CUgraphExec,
hGraph: CUgraph,
@@ -4173,8 +3966,7 @@ pub extern "C" fn cuGraphInstantiate_v2(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuGraphExecKernelNodeSetParams(
hGraphExec: CUgraphExec,
hNode: CUgraphNode,
@@ -4183,8 +3975,7 @@ pub extern "C" fn cuGraphExecKernelNodeSetParams(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuGraphExecMemcpyNodeSetParams(
hGraphExec: CUgraphExec,
hNode: CUgraphNode,
@@ -4194,8 +3985,7 @@ pub extern "C" fn cuGraphExecMemcpyNodeSetParams(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuGraphExecMemsetNodeSetParams(
hGraphExec: CUgraphExec,
hNode: CUgraphNode,
@@ -4205,8 +3995,7 @@ pub extern "C" fn cuGraphExecMemsetNodeSetParams(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuGraphExecHostNodeSetParams(
hGraphExec: CUgraphExec,
hNode: CUgraphNode,
@@ -4215,26 +4004,22 @@ pub extern "C" fn cuGraphExecHostNodeSetParams(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuGraphLaunch(hGraphExec: CUgraphExec, hStream: CUstream) -> CUresult {
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuGraphExecDestroy(hGraphExec: CUgraphExec) -> CUresult {
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuGraphDestroy(hGraph: CUgraph) -> CUresult {
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuGraphExecUpdate(
hGraphExec: CUgraphExec,
hGraph: CUgraph,
@@ -4244,14 +4029,12 @@ pub extern "C" fn cuGraphExecUpdate(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuGraphKernelNodeCopyAttributes(dst: CUgraphNode, src: CUgraphNode) -> CUresult {
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuGraphKernelNodeGetAttribute(
hNode: CUgraphNode,
attr: CUkernelNodeAttrID,
@@ -4260,8 +4043,7 @@ pub extern "C" fn cuGraphKernelNodeGetAttribute(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuGraphKernelNodeSetAttribute(
hNode: CUgraphNode,
attr: CUkernelNodeAttrID,
@@ -4270,8 +4052,7 @@ pub extern "C" fn cuGraphKernelNodeSetAttribute(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuOccupancyMaxActiveBlocksPerMultiprocessor(
numBlocks: *mut ::std::os::raw::c_int,
func: CUfunction,
@@ -4281,8 +4062,7 @@ pub extern "C" fn cuOccupancyMaxActiveBlocksPerMultiprocessor(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(
numBlocks: *mut ::std::os::raw::c_int,
func: CUfunction,
@@ -4293,8 +4073,7 @@ pub extern "C" fn cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuOccupancyMaxPotentialBlockSize(
minGridSize: *mut ::std::os::raw::c_int,
blockSize: *mut ::std::os::raw::c_int,
@@ -4306,8 +4085,7 @@ pub extern "C" fn cuOccupancyMaxPotentialBlockSize(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuOccupancyMaxPotentialBlockSizeWithFlags(
minGridSize: *mut ::std::os::raw::c_int,
blockSize: *mut ::std::os::raw::c_int,
@@ -4320,8 +4098,7 @@ pub extern "C" fn cuOccupancyMaxPotentialBlockSizeWithFlags(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuOccupancyAvailableDynamicSMemPerBlock(
dynamicSmemSize: *mut usize,
func: CUfunction,
@@ -4331,8 +4108,7 @@ pub extern "C" fn cuOccupancyAvailableDynamicSMemPerBlock(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuTexRefSetArray(
hTexRef: CUtexref,
hArray: CUarray,
@@ -4341,8 +4117,7 @@ pub extern "C" fn cuTexRefSetArray(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuTexRefSetMipmappedArray(
hTexRef: CUtexref,
hMipmappedArray: CUmipmappedArray,
@@ -4351,8 +4126,7 @@ pub extern "C" fn cuTexRefSetMipmappedArray(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuTexRefSetAddress_v2(
ByteOffset: *mut usize,
hTexRef: CUtexref,
@@ -4362,8 +4136,7 @@ pub extern "C" fn cuTexRefSetAddress_v2(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuTexRefSetAddress2D_v3(
hTexRef: CUtexref,
desc: *const CUDA_ARRAY_DESCRIPTOR,
@@ -4373,8 +4146,7 @@ pub extern "C" fn cuTexRefSetAddress2D_v3(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuTexRefSetFormat(
hTexRef: CUtexref,
fmt: CUarray_format,
@@ -4383,8 +4155,7 @@ pub extern "C" fn cuTexRefSetFormat(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuTexRefSetAddressMode(
hTexRef: CUtexref,
dim: ::std::os::raw::c_int,
@@ -4393,26 +4164,22 @@ pub extern "C" fn cuTexRefSetAddressMode(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuTexRefSetFilterMode(hTexRef: CUtexref, fm: CUfilter_mode) -> CUresult {
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuTexRefSetMipmapFilterMode(hTexRef: CUtexref, fm: CUfilter_mode) -> CUresult {
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuTexRefSetMipmapLevelBias(hTexRef: CUtexref, bias: f32) -> CUresult {
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuTexRefSetMipmapLevelClamp(
hTexRef: CUtexref,
minMipmapLevelClamp: f32,
@@ -4421,8 +4188,7 @@ pub extern "C" fn cuTexRefSetMipmapLevelClamp(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuTexRefSetMaxAnisotropy(
hTexRef: CUtexref,
maxAniso: ::std::os::raw::c_uint,
@@ -4430,32 +4196,27 @@ pub extern "C" fn cuTexRefSetMaxAnisotropy(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuTexRefSetBorderColor(hTexRef: CUtexref, pBorderColor: *mut f32) -> CUresult {
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuTexRefSetFlags(hTexRef: CUtexref, Flags: ::std::os::raw::c_uint) -> CUresult {
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuTexRefGetAddress_v2(pdptr: *mut CUdeviceptr, hTexRef: CUtexref) -> CUresult {
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuTexRefGetArray(phArray: *mut CUarray, hTexRef: CUtexref) -> CUresult {
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuTexRefGetMipmappedArray(
phMipmappedArray: *mut CUmipmappedArray,
hTexRef: CUtexref,
@@ -4463,8 +4224,7 @@ pub extern "C" fn cuTexRefGetMipmappedArray(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuTexRefGetAddressMode(
pam: *mut CUaddress_mode,
hTexRef: CUtexref,
@@ -4473,14 +4233,12 @@ pub extern "C" fn cuTexRefGetAddressMode(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuTexRefGetFilterMode(pfm: *mut CUfilter_mode, hTexRef: CUtexref) -> CUresult {
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuTexRefGetFormat(
pFormat: *mut CUarray_format,
pNumChannels: *mut ::std::os::raw::c_int,
@@ -4489,8 +4247,7 @@ pub extern "C" fn cuTexRefGetFormat(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuTexRefGetMipmapFilterMode(
pfm: *mut CUfilter_mode,
hTexRef: CUtexref,
@@ -4498,14 +4255,12 @@ pub extern "C" fn cuTexRefGetMipmapFilterMode(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuTexRefGetMipmapLevelBias(pbias: *mut f32, hTexRef: CUtexref) -> CUresult {
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuTexRefGetMipmapLevelClamp(
pminMipmapLevelClamp: *mut f32,
pmaxMipmapLevelClamp: *mut f32,
@@ -4514,8 +4269,7 @@ pub extern "C" fn cuTexRefGetMipmapLevelClamp(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuTexRefGetMaxAnisotropy(
pmaxAniso: *mut ::std::os::raw::c_int,
hTexRef: CUtexref,
@@ -4523,14 +4277,12 @@ pub extern "C" fn cuTexRefGetMaxAnisotropy(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuTexRefGetBorderColor(pBorderColor: *mut f32, hTexRef: CUtexref) -> CUresult {
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuTexRefGetFlags(
pFlags: *mut ::std::os::raw::c_uint,
hTexRef: CUtexref,
@@ -4538,20 +4290,17 @@ pub extern "C" fn cuTexRefGetFlags(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuTexRefCreate(pTexRef: *mut CUtexref) -> CUresult {
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuTexRefDestroy(hTexRef: CUtexref) -> CUresult {
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuSurfRefSetArray(
hSurfRef: CUsurfref,
hArray: CUarray,
@@ -4560,14 +4309,12 @@ pub extern "C" fn cuSurfRefSetArray(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuSurfRefGetArray(phArray: *mut CUarray, hSurfRef: CUsurfref) -> CUresult {
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuTexObjectCreate(
pTexObject: *mut CUtexObject,
pResDesc: *const CUDA_RESOURCE_DESC,
@@ -4577,14 +4324,12 @@ pub extern "C" fn cuTexObjectCreate(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuTexObjectDestroy(texObject: CUtexObject) -> CUresult {
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuTexObjectGetResourceDesc(
pResDesc: *mut CUDA_RESOURCE_DESC,
texObject: CUtexObject,
@@ -4592,8 +4337,7 @@ pub extern "C" fn cuTexObjectGetResourceDesc(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuTexObjectGetTextureDesc(
pTexDesc: *mut CUDA_TEXTURE_DESC,
texObject: CUtexObject,
@@ -4601,8 +4345,7 @@ pub extern "C" fn cuTexObjectGetTextureDesc(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuTexObjectGetResourceViewDesc(
pResViewDesc: *mut CUDA_RESOURCE_VIEW_DESC,
texObject: CUtexObject,
@@ -4610,8 +4353,7 @@ pub extern "C" fn cuTexObjectGetResourceViewDesc(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuSurfObjectCreate(
pSurfObject: *mut CUsurfObject,
pResDesc: *const CUDA_RESOURCE_DESC,
@@ -4619,14 +4361,12 @@ pub extern "C" fn cuSurfObjectCreate(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuSurfObjectDestroy(surfObject: CUsurfObject) -> CUresult {
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuSurfObjectGetResourceDesc(
pResDesc: *mut CUDA_RESOURCE_DESC,
surfObject: CUsurfObject,
@@ -4634,8 +4374,7 @@ pub extern "C" fn cuSurfObjectGetResourceDesc(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuDeviceCanAccessPeer(
canAccessPeer: *mut ::std::os::raw::c_int,
dev: CUdevice,
@@ -4644,8 +4383,7 @@ pub extern "C" fn cuDeviceCanAccessPeer(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuCtxEnablePeerAccess(
peerContext: CUcontext,
Flags: ::std::os::raw::c_uint,
@@ -4653,14 +4391,12 @@ pub extern "C" fn cuCtxEnablePeerAccess(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuCtxDisablePeerAccess(peerContext: CUcontext) -> CUresult {
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuDeviceGetP2PAttribute(
value: *mut ::std::os::raw::c_int,
attrib: CUdevice_P2PAttribute,
@@ -4670,14 +4406,12 @@ pub extern "C" fn cuDeviceGetP2PAttribute(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuGraphicsUnregisterResource(resource: CUgraphicsResource) -> CUresult {
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuGraphicsSubResourceGetMappedArray(
pArray: *mut CUarray,
resource: CUgraphicsResource,
@@ -4687,8 +4421,7 @@ pub extern "C" fn cuGraphicsSubResourceGetMappedArray(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuGraphicsResourceGetMappedMipmappedArray(
pMipmappedArray: *mut CUmipmappedArray,
resource: CUgraphicsResource,
@@ -4696,8 +4429,7 @@ pub extern "C" fn cuGraphicsResourceGetMappedMipmappedArray(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuGraphicsResourceGetMappedPointer_v2(
pDevPtr: *mut CUdeviceptr,
pSize: *mut usize,
@@ -4706,8 +4438,7 @@ pub extern "C" fn cuGraphicsResourceGetMappedPointer_v2(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuGraphicsResourceSetMapFlags_v2(
resource: CUgraphicsResource,
flags: ::std::os::raw::c_uint,
@@ -4715,8 +4446,7 @@ pub extern "C" fn cuGraphicsResourceSetMapFlags_v2(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuGraphicsMapResources(
count: ::std::os::raw::c_uint,
resources: *mut CUgraphicsResource,
@@ -4725,8 +4455,7 @@ pub extern "C" fn cuGraphicsMapResources(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuGraphicsUnmapResources(
count: ::std::os::raw::c_uint,
resources: *mut CUgraphicsResource,
@@ -4735,8 +4464,7 @@ pub extern "C" fn cuGraphicsUnmapResources(
r#impl::unimplemented()
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuGetExportTable(
ppExportTable: *mut *const ::std::os::raw::c_void,
pExportTableId: *const CUuuid,
@@ -4744,8 +4472,7 @@ pub extern "C" fn cuGetExportTable(
r#impl::export_table::get(ppExportTable, pExportTableId)
}
-#[no_mangle]
-#[cfg(not(test))]
+#[cfg_attr(not(test), no_mangle)]
pub extern "C" fn cuFuncGetModule(hmod: *mut CUmodule, hfunc: CUfunction) -> CUresult {
r#impl::unimplemented()
}
diff --git a/notcuda/src/impl/function.rs b/notcuda/src/impl/function.rs
new file mode 100644
index 0000000..6f8773e
--- /dev/null
+++ b/notcuda/src/impl/function.rs
@@ -0,0 +1,52 @@
+use ::std::os::raw::{c_uint, c_void};
+use std::ptr;
+
+use super::{context, device, stream::Stream, CUresult};
+
+pub struct Function {
+ pub base: l0::Kernel<'static>,
+ pub arg_size: Vec<usize>,
+}
+
+pub fn launch_kernel(
+ f: *mut Function,
+ grid_dim_x: c_uint,
+ grid_dim_y: c_uint,
+ grid_dim_z: c_uint,
+ block_dim_x: c_uint,
+ block_dim_y: c_uint,
+ block_dim_z: c_uint,
+ shared_mem_bytes: c_uint,
+ strean: *mut Stream,
+ kernel_params: *mut *mut c_void,
+ extra: *mut *mut c_void,
+) -> Result<(), CUresult> {
+ if f == ptr::null_mut() {
+ return Err(CUresult::CUDA_ERROR_INVALID_VALUE);
+ }
+ if shared_mem_bytes != 0 || strean != ptr::null_mut() || extra != ptr::null_mut() {
+ return Err(CUresult::CUDA_ERROR_NOT_SUPPORTED);
+ }
+ let func = unsafe { &*f };
+ for (i, arg_size) in func.arg_size.iter().copied().enumerate() {
+ unsafe {
+ func.base
+ .set_arg_raw(i as u32, arg_size, *kernel_params.add(i))?
+ };
+ }
+ unsafe { &*f }
+ .base
+ .set_group_size(block_dim_x, block_dim_y, block_dim_z)?;
+ device::with_current_exclusive(|dev| {
+ let mut cmd_list = l0::CommandList::new(&mut dev.l0_context, &dev.base)?;
+ cmd_list.append_launch_kernel(
+ &unsafe { &*f }.base,
+ &[grid_dim_x, grid_dim_y, grid_dim_z],
+ None,
+ &mut [],
+ )?;
+ dev.default_queue.execute(cmd_list)?;
+ l0::Result::Ok(())
+ })??;
+ Ok(())
+}
diff --git a/notcuda/src/impl/memory.rs b/notcuda/src/impl/memory.rs
index 52e269d..3f92b5e 100644
--- a/notcuda/src/impl/memory.rs
+++ b/notcuda/src/impl/memory.rs
@@ -46,6 +46,10 @@ unsafe fn memcpy_impl(
Ok(())
}
+pub(crate) fn free_v2(mem: *mut c_void)-> l0::Result<()> {
+ Ok(())
+}
+
#[cfg(test)]
mod tests {
use super::super::test::CudaDriverFns;
diff --git a/notcuda/src/impl/mod.rs b/notcuda/src/impl/mod.rs
index c37b85d..3d31da2 100644
--- a/notcuda/src/impl/mod.rs
+++ b/notcuda/src/impl/mod.rs
@@ -1,4 +1,4 @@
-use crate::cuda::{CUctx_st, CUdevice, CUdeviceptr, CUfunction, CUmod_st, CUmodule, CUresult};
+use crate::cuda::{CUctx_st, CUdevice, CUdeviceptr, CUfunc_st, CUfunction, CUmod_st, CUmodule, CUresult, CUstream, CUstream_st};
use std::{ffi::c_void, mem::{self, ManuallyDrop}, os::raw::c_int, sync::Mutex};
#[cfg(test)]
@@ -9,6 +9,8 @@ pub mod device;
pub mod export_table;
pub mod memory;
pub mod module;
+pub mod function;
+pub mod stream;
#[cfg(debug_assertions)]
pub fn unimplemented() -> CUresult {
@@ -242,6 +244,10 @@ impl<'a> CudaRepr for CUmod_st {
type Impl = module::Module;
}
-impl<'a> CudaRepr for CUfunction {
- type Impl = *mut module::Function;
+impl<'a> CudaRepr for CUfunc_st {
+ type Impl = function::Function;
+}
+
+impl<'a> CudaRepr for CUstream_st {
+ type Impl = stream::Stream;
}
diff --git a/notcuda/src/impl/module.rs b/notcuda/src/impl/module.rs
index 06d050d..fc55f33 100644
--- a/notcuda/src/impl/module.rs
+++ b/notcuda/src/impl/module.rs
@@ -1,6 +1,9 @@
-use std::{ffi::c_void, ffi::CStr, mem, os::raw::c_char, ptr, slice, sync::Mutex};
+use std::{
+ collections::HashMap, ffi::c_void, ffi::CStr, ffi::CString, mem, os::raw::c_char, ptr, slice,
+ sync::Mutex,
+};
-use super::{transmute_lifetime, CUresult};
+use super::{function::Function, transmute_lifetime, CUresult};
use ptx;
use super::context;
@@ -9,6 +12,7 @@ pub type Module = Mutex<ModuleData>;
pub struct ModuleData {
base: l0::Module,
+ arg_lens: HashMap<CString, Vec<usize>>,
}
pub enum ModuleCompileError<'a> {
@@ -52,7 +56,7 @@ impl ModuleData {
Ok(_) if errors.len() > 0 => return Err(ModuleCompileError::Parse(errors, None)),
Ok(ast) => ast,
};
- let spirv = ptx::to_spirv(ast)?;
+ let (spirv, all_arg_lens) = ptx::to_spirv(ast)?;
let byte_il = unsafe {
slice::from_raw_parts::<u8>(
spirv.as_ptr() as *const _,
@@ -63,17 +67,19 @@ impl ModuleData {
l0::Module::new_spirv(&mut dev.l0_context, &dev.base, byte_il, None)
});
match module {
- Ok(Ok(module)) => Ok(Mutex::new(Self { base: module })),
+ Ok(Ok(module)) => Ok(Mutex::new(Self {
+ base: module,
+ arg_lens: all_arg_lens
+ .into_iter()
+ .map(|(k, v)| (CString::new(k).unwrap(), v))
+ .collect(),
+ })),
Ok(Err(err)) => Err(ModuleCompileError::from(err)),
Err(err) => Err(ModuleCompileError::from(err)),
}
}
}
-pub struct Function {
- base: l0::Kernel<'static>,
-}
-
pub fn get_function(
hfunc: *mut *mut Function,
hmod: *mut Module,
@@ -83,10 +89,33 @@ pub fn get_function(
return Err(CUresult::CUDA_ERROR_INVALID_VALUE);
}
let name = unsafe { CStr::from_ptr(name) };
- let kernel = unsafe { &*hmod }
+ let (mut kernel, args_len) = unsafe { &*hmod }
.try_lock()
- .map(|module| l0::Kernel::new_resident(unsafe { transmute_lifetime(&module.base) }, name))
+ .map(|module| {
+ Result::<_, CUresult>::Ok((
+ l0::Kernel::new_resident(unsafe { transmute_lifetime(&module.base) }, name)?,
+ module
+ .arg_lens
+ .get(name)
+ .ok_or(CUresult::CUDA_ERROR_NOT_FOUND)?
+ .clone(),
+ ))
+ })
.map_err(|_| CUresult::CUDA_ERROR_ILLEGAL_STATE)??;
- unsafe { *hfunc = Box::into_raw(Box::new(Function { base: kernel })) };
+ kernel.set_indirect_access(
+ l0::sys::ze_kernel_indirect_access_flags_t::ZE_KERNEL_INDIRECT_ACCESS_FLAG_DEVICE
+ | l0::sys::ze_kernel_indirect_access_flags_t::ZE_KERNEL_INDIRECT_ACCESS_FLAG_HOST
+ | l0::sys::ze_kernel_indirect_access_flags_t::ZE_KERNEL_INDIRECT_ACCESS_FLAG_SHARED,
+ )?;
+ unsafe {
+ *hfunc = Box::into_raw(Box::new(Function {
+ base: kernel,
+ arg_size: args_len,
+ }))
+ };
+ Ok(())
+}
+
+pub(crate) fn unload(decuda: *mut Module) -> Result<(), CUresult> {
Ok(())
}
diff --git a/notcuda/src/impl/stream.rs b/notcuda/src/impl/stream.rs
new file mode 100644
index 0000000..7410100
--- /dev/null
+++ b/notcuda/src/impl/stream.rs
@@ -0,0 +1,69 @@
+use std::cell::RefCell;
+
+use device::Device;
+
+use super::device;
+
+pub struct Stream {
+ dev: *mut Device,
+}
+
+pub struct DefaultStream {
+ streams: Vec<Option<Stream>>,
+}
+
+impl DefaultStream {
+ fn new() -> Self {
+ DefaultStream {
+ streams: Vec::new(),
+ }
+ }
+}
+
+thread_local! {
+ pub static DEFAULT_STREAM: RefCell<DefaultStream> = RefCell::new(DefaultStream::new());
+}
+
+#[cfg(test)]
+mod tests {
+ use crate::cuda::CUstream;
+
+ use super::super::test::CudaDriverFns;
+ use super::super::CUresult;
+ use std::{ffi::c_void, ptr};
+
+ const CU_STREAM_LEGACY: CUstream = 1 as *mut _;
+ const CU_STREAM_PER_THREAD: CUstream = 2 as *mut _;
+
+ cuda_driver_test!(default_stream_uses_current_ctx_legacy);
+ cuda_driver_test!(default_stream_uses_current_ctx_ptsd);
+
+ fn default_stream_uses_current_ctx_legacy<T: CudaDriverFns>() {
+ default_stream_uses_current_ctx_impl::<T>(CU_STREAM_LEGACY);
+ }
+
+ fn default_stream_uses_current_ctx_ptsd<T: CudaDriverFns>() {
+ default_stream_uses_current_ctx_impl::<T>(CU_STREAM_PER_THREAD);
+ }
+
+ fn default_stream_uses_current_ctx_impl<T: CudaDriverFns>(stream: CUstream) {
+ assert_eq!(T::cuInit(0), CUresult::CUDA_SUCCESS);
+ let mut ctx1 = ptr::null_mut();
+ assert_eq!(T::cuCtxCreate_v2(&mut ctx1, 0, 0), CUresult::CUDA_SUCCESS);
+ let mut stream_ctx1 = ptr::null_mut();
+ assert_eq!(
+ T::cuStreamGetCtx(stream, &mut stream_ctx1),
+ CUresult::CUDA_SUCCESS
+ );
+ assert_eq!(ctx1, stream_ctx1);
+ let mut ctx2 = ptr::null_mut();
+ assert_eq!(T::cuCtxCreate_v2(&mut ctx2, 0, 0), CUresult::CUDA_SUCCESS);
+ assert_ne!(ctx1, ctx2);
+ let mut stream_ctx2 = ptr::null_mut();
+ assert_eq!(
+ T::cuStreamGetCtx(stream, &mut stream_ctx2),
+ CUresult::CUDA_SUCCESS
+ );
+ assert_eq!(ctx2, stream_ctx2);
+ }
+}
diff --git a/notcuda/src/impl/test.rs b/notcuda/src/impl/test.rs
index 0ad625b..d4366b7 100644
--- a/notcuda/src/impl/test.rs
+++ b/notcuda/src/impl/test.rs
@@ -1,6 +1,6 @@
#![allow(non_snake_case)]
-use crate::r#impl as notcuda;
+use crate::{cuda::CUcontext, cuda::CUstream, r#impl as notcuda};
use crate::r#impl::CUresult;
use crate::{cuda::CUuuid, r#impl::Encuda};
use ::std::{
@@ -36,14 +36,14 @@ pub trait CudaDriverFns {
fn cuMemAlloc_v2(dptr: *mut *mut c_void, bytesize: usize) -> CUresult;
fn cuDeviceGetUuid(uuid: *mut CUuuid, dev: c_int) -> CUresult;
fn cuDevicePrimaryCtxGetState(dev: c_int, flags: *mut c_uint, active: *mut c_int) -> CUresult;
+ fn cuStreamGetCtx(hStream: CUstream, pctx: *mut *mut c_void) -> CUresult;
}
pub struct NotCuda();
impl CudaDriverFns for NotCuda {
fn cuInit(_flags: c_uint) -> CUresult {
- assert!(notcuda::context::is_context_stack_empty());
- notcuda::init().encuda()
+ crate::cuda::cuInit(_flags as _)
}
fn cuCtxCreate_v2(pctx: *mut *mut c_void, flags: c_uint, dev: c_int) -> CUresult {
@@ -76,6 +76,10 @@ impl CudaDriverFns for NotCuda {
fn cuDevicePrimaryCtxGetState(dev: c_int, flags: *mut c_uint, active: *mut c_int) -> CUresult {
notcuda::device::primary_ctx_get_state(notcuda::device::Index(dev), flags, active).encuda()
}
+
+ fn cuStreamGetCtx(hStream: CUstream, pctx: *mut *mut c_void) -> CUresult {
+ crate::cuda::cuStreamGetCtx(hStream, pctx as _)
+ }
}
pub struct Cuda();
@@ -115,4 +119,8 @@ impl CudaDriverFns for Cuda {
fn cuDevicePrimaryCtxGetState(dev: c_int, flags: *mut c_uint, active: *mut c_int) -> CUresult {
unsafe { CUresult(cuda::cuDevicePrimaryCtxGetState(dev, flags, active) as c_uint) }
}
+
+ fn cuStreamGetCtx(hStream: CUstream, pctx: *mut *mut c_void) -> CUresult {
+ unsafe { CUresult(cuda::cuStreamGetCtx(hStream as _, pctx as _) as c_uint) }
+ }
}
diff --git a/ptx/src/ast.rs b/ptx/src/ast.rs
index 77afee6..acefdc1 100644
--- a/ptx/src/ast.rs
+++ b/ptx/src/ast.rs
@@ -320,7 +320,7 @@ pub enum Instruction<P: ArgParams> {
MovVector(MovVectorDetails, Arg2Vec<P>),
Mul(MulDetails, Arg3<P>),
Add(AddDetails, Arg3<P>),
- Setp(SetpData, Arg4<P>),
+ Setp(SetpData, Arg4Setp<P>),
SetpBool(SetpBoolData, Arg5<P>),
Not(NotType, Arg2<P>),
Bra(BraData, Arg1<P>),
@@ -331,9 +331,13 @@ pub enum Instruction<P: ArgParams> {
Ret(RetData),
Call(CallInst<P>),
Abs(AbsDetails, Arg2<P>),
+ Mad(MulDetails, Arg4<P>),
}
#[derive(Copy, Clone)]
+pub struct MadFloatDesc {}
+
+#[derive(Copy, Clone)]
pub struct MovVectorDetails {
pub typ: MovVectorType,
pub length: u8,
@@ -398,6 +402,13 @@ pub struct Arg3<P: ArgParams> {
}
pub struct Arg4<P: ArgParams> {
+ pub dst: P::ID,
+ pub src1: P::Operand,
+ pub src2: P::Operand,
+ pub src3: P::Operand,
+}
+
+pub struct Arg4Setp<P: ArgParams> {
pub dst1: P::ID,
pub dst2: Option<P::ID>,
pub src1: P::Operand,
@@ -503,7 +514,7 @@ sub_scalar_type!(MovVectorType {
pub struct MovDetails {
pub typ: MovType,
- pub src_is_address: bool
+ pub src_is_address: bool,
}
sub_type! {
@@ -518,17 +529,20 @@ pub enum MulDetails {
Float(MulFloatDesc),
}
+#[derive(Copy, Clone)]
pub struct MulIntDesc {
pub typ: IntType,
pub control: MulIntControl,
}
+#[derive(Copy, Clone)]
pub enum MulIntControl {
Low,
High,
Wide,
}
+#[derive(Copy, Clone)]
pub struct MulFloatDesc {
pub typ: FloatType,
pub rounding: Option<RoundingMode>,
diff --git a/ptx/src/ptx.lalrpop b/ptx/src/ptx.lalrpop
index 208e076..50a6aeb 100644
--- a/ptx/src/ptx.lalrpop
+++ b/ptx/src/ptx.lalrpop
@@ -122,6 +122,7 @@ match {
"cvta",
"debug",
"ld",
+ "mad",
"map_f64_to_f32",
"mov",
"mul",
@@ -149,6 +150,7 @@ ExtendedID : &'input str = {
"cvta",
"debug",
"ld",
+ "mad",
"map_f64_to_f32",
"mov",
"mul",
@@ -442,6 +444,7 @@ Instruction: ast::Instruction<ast::ParsedArgParams<'input>> = {
InstCvta,
InstCall,
InstAbs,
+ InstMad
};
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-ld
@@ -649,7 +652,7 @@ InstAddMode: ast::AddDetails = {
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#half-precision-comparison-instructions-setp
// TODO: support f16 setp
InstSetp: ast::Instruction<ast::ParsedArgParams<'input>> = {
- "setp" <d:SetpMode> <a:Arg4> => ast::Instruction::Setp(d, a),
+ "setp" <d:SetpMode> <a:Arg4Setp> => ast::Instruction::Setp(d, a),
"setp" <d:SetpBoolMode> <a:Arg5> => ast::Instruction::SetpBool(d, a),
};
@@ -995,6 +998,13 @@ InstAbs: ast::Instruction<ast::ParsedArgParams<'input>> = {
},
};
+// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-mad
+// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-mad
+InstMad: ast::Instruction<ast::ParsedArgParams<'input>> = {
+ "mad" <d:InstMulMode> <a:Arg4> => ast::Instruction::Mad(d, a),
+ "mad" ".hi" ".sat" ".s32" => todo!()
+};
+
SignedIntType: ast::ScalarType = {
".s16" => ast::ScalarType::S16,
".s32" => ast::ScalarType::S32,
@@ -1056,7 +1066,11 @@ Arg3: ast::Arg3<ast::ParsedArgParams<'input>> = {
};
Arg4: ast::Arg4<ast::ParsedArgParams<'input>> = {
- <dst1:ExtendedID> <dst2:OptionalDst?> "," <src1:Operand> "," <src2:Operand> => ast::Arg4{<>}
+ <dst:ExtendedID> "," <src1:Operand> "," <src2:Operand> "," <src3:Operand> => ast::Arg4{<>}
+};
+
+Arg4Setp: ast::Arg4Setp<ast::ParsedArgParams<'input>> = {
+ <dst1:ExtendedID> <dst2:OptionalDst?> "," <src1:Operand> "," <src2:Operand> => ast::Arg4Setp{<>}
};
// TODO: pass src3 negation somewhere
diff --git a/ptx/src/test/mod.rs b/ptx/src/test/mod.rs
index d251884..0339141 100644
--- a/ptx/src/test/mod.rs
+++ b/ptx/src/test/mod.rs
@@ -40,3 +40,10 @@ fn _Z9vectorAddPKfS0_Pfi_ptx() -> Result<(), TranslateError> {
let vector_add = include_str!("_Z9vectorAddPKfS0_Pfi.ptx");
compile_and_assert(vector_add)
}
+
+#[test]
+#[allow(non_snake_case)]
+fn vectorAdd_11_ptx() -> Result<(), TranslateError> {
+ let vector_add = include_str!("vectorAdd_11.ptx");
+ compile_and_assert(vector_add)
+}
diff --git a/ptx/src/test/spirv_run/mad_s32.ptx b/ptx/src/test/spirv_run/mad_s32.ptx
new file mode 100644
index 0000000..a864266
--- /dev/null
+++ b/ptx/src/test/spirv_run/mad_s32.ptx
@@ -0,0 +1,28 @@
+.version 6.5
+.target sm_30
+.address_size 64
+
+.visible .entry mad_s32(
+ .param .u64 input,
+ .param .u64 output
+)
+{
+ .reg .u64 in_addr;
+ .reg .u64 out_addr;
+ .reg .s32 dst;
+ .reg .s32 src1;
+ .reg .s32 src2;
+ .reg .s32 src3;
+
+ ld.param.u64 in_addr, [input];
+ ld.param.u64 out_addr, [output];
+
+ ld.s32 src1, [in_addr];
+ ld.s32 src2, [in_addr+4];
+ ld.s32 src3, [in_addr+8];
+ mad.lo.s32 dst, src1, src2, src3;
+ st.s32 [out_addr], dst;
+ st.s32 [out_addr+4], dst;
+ st.s32 [out_addr+8], dst;
+ ret;
+}
diff --git a/ptx/src/test/spirv_run/mad_s32.spvtxt b/ptx/src/test/spirv_run/mad_s32.spvtxt
new file mode 100644
index 0000000..3a7153d
--- /dev/null
+++ b/ptx/src/test/spirv_run/mad_s32.spvtxt
@@ -0,0 +1,77 @@
+ OpCapability GenericPointer
+ OpCapability Linkage
+ OpCapability Addresses
+ OpCapability Kernel
+ OpCapability Int64
+ OpCapability Int8
+ OpCapability Float64
+ %48 = OpExtInstImport "OpenCL.std"
+ OpMemoryModel Physical64 OpenCL
+ OpEntryPoint Kernel %1 "mad_s32"
+ %void = OpTypeVoid
+ %ulong = OpTypeInt 64 0
+ %51 = OpTypeFunction %void %ulong %ulong
+%_ptr_Function_ulong = OpTypePointer Function %ulong
+ %uint = OpTypeInt 32 0
+%_ptr_Function_uint = OpTypePointer Function %uint
+%_ptr_Generic_uint = OpTypePointer Generic %uint
+ %ulong_4 = OpConstant %ulong 4
+ %ulong_8 = OpConstant %ulong 8
+ %ulong_4_0 = OpConstant %ulong 4
+ %ulong_8_0 = OpConstant %ulong 8
+ %1 = OpFunction %void None %51
+ %10 = OpFunctionParameter %ulong
+ %11 = OpFunctionParameter %ulong
+ %46 = OpLabel
+ %2 = OpVariable %_ptr_Function_ulong Function
+ %3 = OpVariable %_ptr_Function_ulong Function
+ %4 = OpVariable %_ptr_Function_ulong Function
+ %5 = OpVariable %_ptr_Function_ulong Function
+ %6 = OpVariable %_ptr_Function_uint Function
+ %7 = OpVariable %_ptr_Function_uint Function
+ %8 = OpVariable %_ptr_Function_uint Function
+ %9 = OpVariable %_ptr_Function_uint Function
+ OpStore %2 %10
+ OpStore %3 %11
+ %13 = OpLoad %ulong %2
+ %12 = OpCopyObject %ulong %13
+ OpStore %4 %12
+ %15 = OpLoad %ulong %3
+ %14 = OpCopyObject %ulong %15
+ OpStore %5 %14
+ %17 = OpLoad %ulong %4
+ %40 = OpConvertUToPtr %_ptr_Generic_uint %17
+ %16 = OpLoad %uint %40
+ OpStore %7 %16
+ %19 = OpLoad %ulong %4
+ %33 = OpIAdd %ulong %19 %ulong_4
+ %41 = OpConvertUToPtr %_ptr_Generic_uint %33
+ %18 = OpLoad %uint %41
+ OpStore %8 %18
+ %21 = OpLoad %ulong %4
+ %35 = OpIAdd %ulong %21 %ulong_8
+ %42 = OpConvertUToPtr %_ptr_Generic_uint %35
+ %20 = OpLoad %uint %42
+ OpStore %9 %20
+ %23 = OpLoad %uint %7
+ %24 = OpLoad %uint %8
+ %25 = OpLoad %uint %9
+ %56 = OpIMul %uint %23 %24
+ %22 = OpIAdd %uint %25 %56
+ OpStore %6 %22
+ %26 = OpLoad %ulong %5
+ %27 = OpLoad %uint %6
+ %43 = OpConvertUToPtr %_ptr_Generic_uint %26
+ OpStore %43 %27
+ %28 = OpLoad %ulong %5
+ %29 = OpLoad %uint %6
+ %37 = OpIAdd %ulong %28 %ulong_4_0
+ %44 = OpConvertUToPtr %_ptr_Generic_uint %37
+ OpStore %44 %29
+ %30 = OpLoad %ulong %5
+ %31 = OpLoad %uint %6
+ %39 = OpIAdd %ulong %30 %ulong_8_0
+ %45 = OpConvertUToPtr %_ptr_Generic_uint %39
+ OpStore %45 %31
+ OpReturn
+ OpFunctionEnd
diff --git a/ptx/src/test/spirv_run/mod.rs b/ptx/src/test/spirv_run/mod.rs
index 78c3375..27dc063 100644
--- a/ptx/src/test/spirv_run/mod.rs
+++ b/ptx/src/test/spirv_run/mod.rs
@@ -8,7 +8,6 @@ use spirv_headers::Word;
use spirv_tools_sys::{
spv_binary, spv_endianness_t, spv_parsed_instruction_t, spv_result_t, spv_target_env,
};
-use std::{collections::hash_map::Entry, cmp};
use std::error;
use std::ffi::{c_void, CStr, CString};
use std::fmt;
@@ -17,6 +16,7 @@ use std::hash::Hash;
use std::mem;
use std::slice;
use std::{borrow::Cow, collections::HashMap, env, fs, path::PathBuf, ptr, str};
+use std::{cmp, collections::hash_map::Entry};
macro_rules! test_ptx {
($fn_name:ident, $input:expr, $output:expr) => {
@@ -65,6 +65,8 @@ test_ptx!(mov_address, [0xDEADu64], [0u64]);
test_ptx!(b64tof64, [111u64], [111u64]);
test_ptx!(implicit_param, [34u32], [34u32]);
test_ptx!(pred_not, [10u64, 11u64], [2u64, 0u64]);
+test_ptx!(mad_s32, [2i32, 3i32, 4i32], [10i32, 10i32, 10i32]);
+test_ptx!(mul_wide, [0x01_00_00_00__01_00_00_00i64], [0x1_00_00_00_00_00_00i64]);
struct DisplayError<T: Debug> {
err: T,
@@ -93,7 +95,7 @@ fn test_ptx_assert<'a, T: From<u8> + ze::SafeRepr + Debug + Copy + PartialEq>(
let mut errors = Vec::new();
let ast = ptx::ModuleParser::new().parse(&mut errors, ptx_text)?;
assert!(errors.len() == 0);
- let spirv = translate::to_spirv(ast)?;
+ let (spirv, _) = translate::to_spirv(ast)?;
let name = CString::new(name)?;
let result =
run_spirv(name.as_c_str(), &spirv, input, output).map_err(|err| DisplayError { err })?;
@@ -127,7 +129,7 @@ fn run_spirv<T: From<u8> + ze::SafeRepr + Copy + Debug>(
kernel.set_indirect_access(
ze::sys::ze_kernel_indirect_access_flags_t::ZE_KERNEL_INDIRECT_ACCESS_FLAG_DEVICE,
)?;
- let mut inp_b = ze::DeviceBuffer::<T>::new(&mut ctx, &dev, cmp::max(input.len(),1))?;
+ let mut inp_b = ze::DeviceBuffer::<T>::new(&mut ctx, &dev, cmp::max(input.len(), 1))?;
let mut out_b = ze::DeviceBuffer::<T>::new(&mut ctx, &dev, cmp::max(output.len(), 1))?;
let inp_b_ptr_mut: ze::BufferPtrMut<T> = (&mut inp_b).into();
let event_pool = ze::EventPool::new(&mut ctx, 3, Some(&[&dev]))?;
@@ -157,7 +159,7 @@ fn test_spvtxt_assert<'a>(
let mut errors = Vec::new();
let ast = ptx::ModuleParser::new().parse(&mut errors, ptx_txt)?;
assert!(errors.len() == 0);
- let ptx_mod = translate::to_spirv_module(ast)?;
+ let (ptx_mod, _) = translate::to_spirv_module(ast)?;
let spv_context =
unsafe { spirv_tools::spvContextCreate(spv_target_env::SPV_ENV_UNIVERSAL_1_3) };
assert!(spv_context != ptr::null_mut());
diff --git a/ptx/src/test/spirv_run/mul_wide.ptx b/ptx/src/test/spirv_run/mul_wide.ptx
new file mode 100644
index 0000000..2d6f8a5
--- /dev/null
+++ b/ptx/src/test/spirv_run/mul_wide.ptx
@@ -0,0 +1,24 @@
+.version 6.5
+.target sm_30
+.address_size 64
+
+.visible .entry mul_wide(
+ .param .u64 input,
+ .param .u64 output
+)
+{
+ .reg .u64 in_addr;
+ .reg .u64 out_addr;
+ .reg .s32 inp1;
+ .reg .s32 inp2;
+ .reg .s64 result;
+
+ ld.param.u64 in_addr, [input];
+ ld.param.u64 out_addr, [output];
+
+ ld.global.s32 inp1, [in_addr];
+ ld.global.s32 inp2, [in_addr+4];
+ mul.wide.s32 result, inp1, inp2;
+ st.u64 [out_addr], result;
+ ret;
+}
diff --git a/ptx/src/test/spirv_run/mul_wide.spvtxt b/ptx/src/test/spirv_run/mul_wide.spvtxt
new file mode 100644
index 0000000..274612c
--- /dev/null
+++ b/ptx/src/test/spirv_run/mul_wide.spvtxt
@@ -0,0 +1,64 @@
+ OpCapability GenericPointer
+ OpCapability Linkage
+ OpCapability Addresses
+ OpCapability Kernel
+ OpCapability Int64
+ OpCapability Int8
+ OpCapability Float64
+ %32 = OpExtInstImport "OpenCL.std"
+ OpMemoryModel Physical64 OpenCL
+ OpEntryPoint Kernel %1 "mul_wide"
+ %void = OpTypeVoid
+ %ulong = OpTypeInt 64 0
+ %35 = OpTypeFunction %void %ulong %ulong
+%_ptr_Function_ulong = OpTypePointer Function %ulong
+ %uint = OpTypeInt 32 0
+%_ptr_Function_uint = OpTypePointer Function %uint
+%_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
+ %ulong_4 = OpConstant %ulong 4
+ %_struct_40 = OpTypeStruct %uint %uint
+ %v2uint = OpTypeVector %uint 2
+%_ptr_Generic_ulong = OpTypePointer Generic %ulong
+ %1 = OpFunction %void None %35
+ %9 = OpFunctionParameter %ulong
+ %10 = OpFunctionParameter %ulong
+ %30 = OpLabel
+ %2 = OpVariable %_ptr_Function_ulong Function
+ %3 = OpVariable %_ptr_Function_ulong Function
+ %4 = OpVariable %_ptr_Function_ulong Function
+ %5 = OpVariable %_ptr_Function_ulong Function
+ %6 = OpVariable %_ptr_Function_uint Function
+ %7 = OpVariable %_ptr_Function_uint Function
+ %8 = OpVariable %_ptr_Function_ulong Function
+ OpStore %2 %9
+ OpStore %3 %10
+ %12 = OpLoad %ulong %2
+ %11 = OpCopyObject %ulong %12
+ OpStore %4 %11
+ %14 = OpLoad %ulong %3
+ %13 = OpCopyObject %ulong %14
+ OpStore %5 %13
+ %16 = OpLoad %ulong %4
+ %26 = OpConvertUToPtr %_ptr_CrossWorkgroup_uint %16
+ %15 = OpLoad %uint %26
+ OpStore %6 %15
+ %18 = OpLoad %ulong %4
+ %25 = OpIAdd %ulong %18 %ulong_4
+ %27 = OpConvertUToPtr %_ptr_CrossWorkgroup_uint %25
+ %17 = OpLoad %uint %27
+ OpStore %7 %17
+ %20 = OpLoad %uint %6
+ %21 = OpLoad %uint %7
+ %41 = OpSMulExtended %_struct_40 %20 %21
+ %42 = OpCompositeExtract %uint %41 0
+ %43 = OpCompositeExtract %uint %41 1
+ %45 = OpCompositeConstruct %v2uint %42 %43
+ %19 = OpBitcast %ulong %45
+ OpStore %8 %19
+ %22 = OpLoad %ulong %5
+ %23 = OpLoad %ulong %8
+ %28 = OpCopyObject %ulong %23
+ %29 = OpConvertUToPtr %_ptr_Generic_ulong %22
+ OpStore %29 %28
+ OpReturn
+ OpFunctionEnd
diff --git a/ptx/src/test/vectorAdd_11.ptx b/ptx/src/test/vectorAdd_11.ptx
new file mode 100644
index 0000000..ba0381e
--- /dev/null
+++ b/ptx/src/test/vectorAdd_11.ptx
@@ -0,0 +1,55 @@
+
+
+
+
+
+
+
+
+.version 7.0
+.target sm_80
+.address_size 64
+
+
+
+.visible .entry _Z9vectorAddPKfS0_Pfi(
+.param .u64 _Z9vectorAddPKfS0_Pfi_param_0,
+.param .u64 _Z9vectorAddPKfS0_Pfi_param_1,
+.param .u64 _Z9vectorAddPKfS0_Pfi_param_2,
+.param .u32 _Z9vectorAddPKfS0_Pfi_param_3
+)
+{
+.reg .pred %p<2>;
+.reg .f32 %f<4>;
+.reg .b32 %r<6>;
+.reg .b64 %rd<11>;
+
+
+ld.param.u64 %rd1, [_Z9vectorAddPKfS0_Pfi_param_0];
+ld.param.u64 %rd2, [_Z9vectorAddPKfS0_Pfi_param_1];
+ld.param.u64 %rd3, [_Z9vectorAddPKfS0_Pfi_param_2];
+ld.param.u32 %r2, [_Z9vectorAddPKfS0_Pfi_param_3];
+mov.u32 %r3, %ntid.x;
+mov.u32 %r4, %ctaid.x;
+mov.u32 %r5, %tid.x;
+mad.lo.s32 %r1, %r4, %r3, %r5;
+setp.ge.s32 %p1, %r1, %r2;
+@%p1 bra BB0_2;
+
+cvta.to.global.u64 %rd4, %rd1;
+mul.wide.s32 %rd5, %r1, 4;
+add.s64 %rd6, %rd4, %rd5;
+cvta.to.global.u64 %rd7, %rd2;
+add.s64 %rd8, %rd7, %rd5;
+ld.global.f32 %f1, [%rd8];
+ld.global.f32 %f2, [%rd6];
+add.f32 %f3, %f2, %f1;
+cvta.to.global.u64 %rd9, %rd3;
+add.s64 %rd10, %rd9, %rd5;
+st.global.f32 [%rd10], %f3;
+
+BB0_2:
+ret;
+}
+
+
diff --git a/ptx/src/translate.rs b/ptx/src/translate.rs
index 5b03f0b..a1d4b6a 100644
--- a/ptx/src/translate.rs
+++ b/ptx/src/translate.rs
@@ -28,6 +28,7 @@ enum SpirvType {
Array(SpirvScalarKey, u32),
Pointer(Box<SpirvType>, spirv::StorageClass),
Func(Option<Box<SpirvType>>, Vec<SpirvType>),
+ Struct(Vec<SpirvScalarKey>),
}
impl SpirvType {
@@ -174,6 +175,16 @@ impl TypeWordMap {
.entry(t)
.or_insert_with(|| b.type_function(out_t, in_t))
}
+ SpirvType::Struct(ref underlying) => {
+ let underlying_ids = underlying
+ .iter()
+ .map(|t| self.get_or_add_spirv_scalar(b, *t))
+ .collect::<Vec<_>>();
+ *self
+ .complex
+ .entry(t)
+ .or_insert_with(|| b.type_struct(underlying_ids))
+ }
}
}
@@ -201,7 +212,9 @@ impl TypeWordMap {
}
}
-pub fn to_spirv_module<'a>(ast: ast::Module<'a>) -> Result<dr::Module, TranslateError> {
+pub fn to_spirv_module<'a>(
+ ast: ast::Module<'a>,
+) -> Result<(dr::Module, HashMap<String, Vec<usize>>), TranslateError> {
let mut id_defs = GlobalStringIdResolver::new(1);
let ssa_functions = ast
.functions
@@ -218,17 +231,24 @@ pub fn to_spirv_module<'a>(ast: ast::Module<'a>) -> Result<dr::Module, Translate
emit_memory_model(&mut builder);
let mut map = TypeWordMap::new(&mut builder);
emit_builtins(&mut builder, &mut map, &id_defs);
+ let mut args_len = HashMap::new();
for f in ssa_functions {
let f_body = match f.body {
Some(f) => f,
None => continue,
};
emit_function_body_ops(&mut builder, &mut map, opencl_id, &f.globals)?;
- emit_function_header(&mut builder, &mut map, &id_defs, f.func_directive)?;
+ emit_function_header(
+ &mut builder,
+ &mut map,
+ &id_defs,
+ f.func_directive,
+ &mut args_len,
+ )?;
emit_function_body_ops(&mut builder, &mut map, opencl_id, &f_body)?;
builder.end_function()?;
}
- Ok(builder.module())
+ Ok((builder.module(), args_len))
}
fn emit_builtins(
@@ -263,7 +283,12 @@ fn emit_function_header<'a>(
map: &mut TypeWordMap,
global: &GlobalStringIdResolver<'a>,
func_directive: ast::MethodDecl<ExpandedArgParams>,
+ all_args_lens: &mut HashMap<String, Vec<usize>>,
) -> Result<(), TranslateError> {
+ if let ast::MethodDecl::Kernel(name, args) = &func_directive {
+ let args_lens = args.iter().map(|param| param.v_type.width()).collect();
+ all_args_lens.insert(name.to_string(), args_lens);
+ }
let (ret_type, func_type) = get_function_type(builder, map, &func_directive);
let fn_id = match func_directive {
ast::MethodDecl::Kernel(name, _) => {
@@ -297,9 +322,11 @@ fn emit_function_header<'a>(
Ok(())
}
-pub fn to_spirv<'a>(ast: ast::Module<'a>) -> Result<Vec<u32>, TranslateError> {
- let module = to_spirv_module(ast)?;
- Ok(module.assemble())
+pub fn to_spirv<'a>(
+ ast: ast::Module<'a>,
+) -> Result<(Vec<u32>, HashMap<String, Vec<usize>>), TranslateError> {
+ let (module, all_args_lens) = to_spirv_module(ast)?;
+ Ok((module.assemble(), all_args_lens))
}
fn emit_capabilities(builder: &mut dr::Builder) {
@@ -905,7 +932,7 @@ impl<'a, 'b> ArgumentMapVisitor<NormalizedArgParams, ExpandedArgParams>
ArgumentSemantics::PhysicalPointer => {
let scalar_t = ast::ScalarType::U64;
let id_constant_stmt = self.id_def.new_id(ast::Type::Scalar(scalar_t));
- let result_id = self.id_def.new_id(typ);
+ let result_id = self.id_def.new_id(ast::Type::Scalar(scalar_t));
self.func.push(Statement::Constant(ConstantDefinition {
dst: id_constant_stmt,
typ: scalar_t,
@@ -1314,8 +1341,8 @@ fn emit_function_body_ops(
let type_pred = map.get_or_add_scalar(builder, ast::ScalarType::Pred);
let const_true = builder.constant_true(type_pred);
let const_false = builder.constant_false(type_pred);
- builder.select(result_type, result_id, operand, const_false, const_true)
- },
+ builder.select(result_type, result_id, operand, const_false, const_true)
+ }
_ => builder.not(result_type, result_id, operand),
}?;
}
@@ -1359,6 +1386,12 @@ fn emit_function_body_ops(
builder.copy_object(result_type, Some(*dst), *src)?;
}
},
+ ast::Instruction::Mad(mad, arg) => match mad {
+ ast::MulDetails::Int(ref desc) => {
+ emit_mad_int(builder, map, opencl, desc, arg)?
+ }
+ ast::MulDetails::Float(desc) => emit_mad_float(builder, map, desc, arg)?,
+ },
},
Statement::LoadVar(arg, typ) => {
let type_id = map.get_or_add(builder, SpirvType::from(*typ));
@@ -1385,6 +1418,47 @@ fn emit_function_body_ops(
Ok(())
}
+fn emit_mad_int(
+ builder: &mut dr::Builder,
+ map: &mut TypeWordMap,
+ opencl: spirv::Word,
+ desc: &ast::MulIntDesc,
+ arg: &ast::Arg4<ExpandedArgParams>,
+) -> Result<(), dr::Error> {
+ let inst_type = map.get_or_add(builder, SpirvType::from(ast::ScalarType::from(desc.typ)));
+ match desc.control {
+ ast::MulIntControl::Low => {
+ let mul_result = builder.i_mul(inst_type, None, arg.src1, arg.src2)?;
+ builder.i_add(inst_type, Some(arg.dst), arg.src3, mul_result)?;
+ }
+ ast::MulIntControl::High => {
+ let cl_op = if desc.typ.is_signed() {
+ spirv::CLOp::s_mad_hi
+ } else {
+ spirv::CLOp::u_mad_hi
+ };
+ builder.ext_inst(
+ inst_type,
+ Some(arg.dst),
+ opencl,
+ cl_op as spirv::Word,
+ [arg.src1, arg.src2, arg.src3],
+ )?;
+ }
+ ast::MulIntControl::Wide => todo!(),
+ };
+ Ok(())
+}
+
+fn emit_mad_float(
+ builder: &mut dr::Builder,
+ map: &mut TypeWordMap,
+ desc: &ast::MulFloatDesc,
+ arg: &ast::Arg4<ExpandedArgParams>,
+) -> Result<(), dr::Error> {
+ todo!()
+}
+
fn emit_add_float(
builder: &mut dr::Builder,
map: &mut TypeWordMap,
@@ -1529,7 +1603,7 @@ fn emit_setp(
builder: &mut dr::Builder,
map: &mut TypeWordMap,
setp: &ast::SetpData,
- arg: &ast::Arg4<ExpandedArgParams>,
+ arg: &ast::Arg4Setp<ExpandedArgParams>,
) -> Result<(), dr::Error> {
if setp.flush_to_zero {
todo!()
@@ -1607,6 +1681,7 @@ fn emit_mul_int(
desc: &ast::MulIntDesc,
arg: &ast::Arg3<ExpandedArgParams>,
) -> Result<(), dr::Error> {
+ let instruction_type = ast::ScalarType::from(desc.typ);
let inst_type = map.get_or_add(builder, SpirvType::from(ast::ScalarType::from(desc.typ)));
match desc.control {
ast::MulIntControl::Low => {
@@ -1626,11 +1701,53 @@ fn emit_mul_int(
[arg.src1, arg.src2],
)?;
}
- ast::MulIntControl::Wide => todo!(),
+ ast::MulIntControl::Wide => {
+ let mul_ext_type = SpirvType::Struct(vec![
+ SpirvScalarKey::from(instruction_type),
+ SpirvScalarKey::from(instruction_type),
+ ]);
+ let mul_ext_type_id = map.get_or_add(builder, mul_ext_type);
+ let mul = if desc.typ.is_signed() {
+ builder.s_mul_extended(mul_ext_type_id, None, arg.src1, arg.src2)?
+ } else {
+ builder.u_mul_extended(mul_ext_type_id, None, arg.src1, arg.src2)?
+ };
+ let instr_width = instruction_type.width();
+ let instr_kind = instruction_type.kind();
+ let dst_type = ast::ScalarType::from_parts(instr_width * 2, instr_kind);
+ let dst_type_id = map.get_or_add_scalar(builder, dst_type);
+ struct2_bitcast_to_wide(
+ builder,
+ map,
+ SpirvScalarKey::from(instruction_type),
+ inst_type,
+ arg.dst,
+ dst_type_id,
+ mul,
+ )?;
+ }
}
Ok(())
}
+// Surprisingly, structs can't be bitcast, so we route everything through a vector
+fn struct2_bitcast_to_wide(
+ builder: &mut dr::Builder,
+ map: &mut TypeWordMap,
+ base_type_key: SpirvScalarKey,
+ instruction_type: spirv::Word,
+ dst: spirv::Word,
+ dst_type_id: spirv::Word,
+ src: spirv::Word,
+) -> Result<(), dr::Error> {
+ let low_bits = builder.composite_extract(instruction_type, None, src, [0])?;
+ let high_bits = builder.composite_extract(instruction_type, None, src, [1])?;
+ let vector_type = map.get_or_add(builder, SpirvType::Vector(base_type_key, 2));
+ let vector = builder.composite_construct(vector_type, None, [low_bits, high_bits])?;
+ builder.bitcast(dst_type_id, Some(dst), vector)?;
+ Ok(())
+}
+
fn emit_abs(
builder: &mut dr::Builder,
map: &mut TypeWordMap,
@@ -1844,8 +1961,8 @@ impl PtxSpecialRegister {
fn get_builtin(self) -> spirv::BuiltIn {
match self {
- PtxSpecialRegister::Tid => spirv::BuiltIn::GlobalInvocationId,
- PtxSpecialRegister::Ntid => spirv::BuiltIn::GlobalSize,
+ PtxSpecialRegister::Tid => spirv::BuiltIn::LocalInvocationId,
+ PtxSpecialRegister::Ntid => spirv::BuiltIn::WorkgroupSize,
PtxSpecialRegister::Ctaid => spirv::BuiltIn::WorkgroupId,
PtxSpecialRegister::Nctaid => spirv::BuiltIn::NumWorkgroups,
}
@@ -2492,6 +2609,10 @@ impl<T: ArgParamsEx> ast::Instruction<T> {
let inst_type = ast::Type::Scalar(ast::ScalarType::B64);
ast::Instruction::Cvta(d, a.map(visitor, false, inst_type)?)
}
+ ast::Instruction::Mad(d, a) => {
+ let inst_type = d.get_type();
+ ast::Instruction::Mad(d, a.map(visitor, inst_type)?)
+ }
})
}
}
@@ -2641,7 +2762,8 @@ impl ast::Instruction<ExpandedArgParams> {
| ast::Instruction::St(_, _)
| ast::Instruction::Ret(_)
| ast::Instruction::Abs(_, _)
- | ast::Instruction::Call(_) => None,
+ | ast::Instruction::Call(_)
+ | ast::Instruction::Mad(_, _) => None,
}
}
}
@@ -2741,6 +2863,17 @@ impl<'a> ast::Instruction<ast::ParsedArgParams<'a>> {
}
}
+impl ast::VariableParamType {
+ fn width(self) -> usize {
+ match self {
+ ast::VariableParamType::Scalar(t) => ast::ScalarType::from(t).width() as usize,
+ ast::VariableParamType::Array(t, len) => {
+ (ast::ScalarType::from(t).width() as usize) * (len as usize)
+ }
+ }
+ }
+}
+
impl<T: ArgParamsEx> ast::Arg1<T> {
fn map<U: ArgParamsEx, V: ArgumentMapVisitor<T, U>>(
self,
@@ -3042,6 +3175,53 @@ impl<T: ArgParamsEx> ast::Arg4<T> {
visitor: &mut V,
t: ast::Type,
) -> Result<ast::Arg4<U>, TranslateError> {
+ let dst = visitor.variable(
+ ArgumentDescriptor {
+ op: self.dst,
+ is_dst: true,
+ sema: ArgumentSemantics::Default,
+ },
+ Some(t),
+ )?;
+ let src1 = visitor.operand(
+ ArgumentDescriptor {
+ op: self.src1,
+ is_dst: false,
+ sema: ArgumentSemantics::Default,
+ },
+ t,
+ )?;
+ let src2 = visitor.operand(
+ ArgumentDescriptor {
+ op: self.src2,
+ is_dst: false,
+ sema: ArgumentSemantics::Default,
+ },
+ t,
+ )?;
+ let src3 = visitor.operand(
+ ArgumentDescriptor {
+ op: self.src3,
+ is_dst: false,
+ sema: ArgumentSemantics::Default,
+ },
+ t,
+ )?;
+ Ok(ast::Arg4 {
+ dst,
+ src1,
+ src2,
+ src3,
+ })
+ }
+}
+
+impl<T: ArgParamsEx> ast::Arg4Setp<T> {
+ fn map<U: ArgParamsEx, V: ArgumentMapVisitor<T, U>>(
+ self,
+ visitor: &mut V,
+ t: ast::Type,
+ ) -> Result<ast::Arg4Setp<U>, TranslateError> {
let dst1 = visitor.variable(
ArgumentDescriptor {
op: self.dst1,
@@ -3079,7 +3259,7 @@ impl<T: ArgParamsEx> ast::Arg4<T> {
},
t,
)?;
- Ok(ast::Arg4 {
+ Ok(ast::Arg4Setp {
dst1,
dst2,
src1,