diff options
-rw-r--r-- | zluda/src/impl/dark_api.rs | 13 | ||||
-rw-r--r-- | zluda/tests/context_dark_api_primary_is_unretained.rs | 84 | ||||
-rw-r--r-- | zluda/tests/dark_api.rs | 138 | ||||
-rw-r--r-- | zluda_dark_api/src/lib.rs | 15 | ||||
-rw-r--r-- | zluda_dump/src/dark_api.rs | 73 |
5 files changed, 232 insertions, 91 deletions
diff --git a/zluda/src/impl/dark_api.rs b/zluda/src/impl/dark_api.rs index 8fe4cf3..08ffa17 100644 --- a/zluda/src/impl/dark_api.rs +++ b/zluda/src/impl/dark_api.rs @@ -53,7 +53,7 @@ impl CudaDarkApi for CudaDarkApiZluda { module::load_impl(module.cast(), CUmoduleContent::Fatbin(fatbin)).into_cuda() } - unsafe extern "system" fn get_primary_context( + unsafe extern "system" fn primary_context_allocate( pctx: *mut cuda_types::CUcontext, dev: cuda_types::CUdevice, ) -> CUresult { @@ -188,7 +188,7 @@ impl CudaDarkApi for CudaDarkApiZluda { } unsafe extern "system" fn heap_alloc( - alloc_ptr: *mut *mut zluda_dark_api::HeapAllocRecord, + halloc: *mut *mut zluda_dark_api::HeapAllocRecord, destructor: Option<unsafe extern "system" fn(u32, usize)>, value: usize, ) -> CUresult { @@ -218,7 +218,7 @@ impl CudaDarkApi for CudaDarkApiZluda { } match heap_alloc_impl(destructor, value) { Ok(result) => { - *alloc_ptr = result; + *halloc = result; CUresult::CUDA_SUCCESS } Err(err) => err, @@ -453,6 +453,13 @@ impl CudaDarkApi for CudaDarkApiZluda { *is_wrapped = 0; CUresult::CUDA_SUCCESS } + + unsafe extern "system" fn primary_context_create_with_flags( + dev: CUdevice, + flags: u32, + ) -> CUresult { + todo!() + } } unsafe fn with_context_or_current<T>( diff --git a/zluda/tests/context_dark_api_primary_is_unretained.rs b/zluda/tests/context_dark_api_primary_is_unretained.rs deleted file mode 100644 index 56eaee6..0000000 --- a/zluda/tests/context_dark_api_primary_is_unretained.rs +++ /dev/null @@ -1,84 +0,0 @@ -use crate::common::CudaDriverFns;
-use cuda_types::*;
-use std::mem;
-
-mod common;
-
-cuda_driver_test!(context_dark_api_primary_is_unretained);
-
-unsafe fn context_dark_api_primary_is_unretained<T: CudaDriverFns>(cuda: T) {
- assert_eq!(cuda.cuInit(0), CUresult::CUDA_SUCCESS);
- let dev = CUdevice_v1(0);
- let mut ctx1 = mem::zeroed();
- let mut export_table = mem::zeroed();
- assert_eq!(
- cuda.cuGetExportTable(
- &mut export_table,
- &CUuuid {
- bytes: [
- 0x6b, 0xd5, 0xfb, 0x6c, 0x5b, 0xf4, 0xe7, 0x4a, 0x89, 0x87, 0xd9, 0x39, 0x12,
- 0xfd, 0x9d, 0xf9
- ]
- }
- ),
- CUresult::CUDA_SUCCESS
- );
- let get_primary_ctx = mem::transmute::<
- _,
- unsafe extern "system" fn(*mut CUcontext, CUdevice) -> CUresult,
- >(*(export_table as *mut usize).add(2));
- assert_eq!(get_primary_ctx(&mut ctx1, dev), CUresult::CUDA_SUCCESS);
- let mut api_version = mem::zeroed();
- assert_eq!(
- cuda.cuCtxGetApiVersion(ctx1, &mut api_version),
- CUresult::CUDA_ERROR_INVALID_CONTEXT
- );
- assert_eq!(cuda.cuCtxSetCurrent(ctx1), CUresult::CUDA_SUCCESS);
- let mut device = mem::zeroed();
- assert_eq!(cuda.cuCtxGetDevice(&mut device), CUresult::CUDA_SUCCESS);
- // TODO: re-enable when adding context getters
- /*
- let mut cache_cfg = mem::zeroed();
- assert_eq!(
- cuda.cuCtxGetCacheConfig(&mut cache_cfg),
- CUresult::CUDA_ERROR_CONTEXT_IS_DESTROYED
- );
- let mut exec_affinity = mem::zeroed();
- assert_eq!(
- cuda.cuCtxGetExecAffinity(
- &mut exec_affinity,
- CUexecAffinityType::CU_EXEC_AFFINITY_TYPE_SM_COUNT
- ),
- CUresult::CUDA_ERROR_CONTEXT_IS_DESTROYED
- );
- let mut flags = mem::zeroed();
- assert_eq!(cuda.cuCtxGetFlags(&mut flags,), CUresult::CUDA_SUCCESS);
- let mut stack = mem::zeroed();
- assert_eq!(
- cuda.cuCtxGetLimit(&mut stack, CUlimit::CU_LIMIT_STACK_SIZE),
- CUresult::CUDA_ERROR_CONTEXT_IS_DESTROYED
- );
- let mut shared_mem_cfg = mem::zeroed();
- assert_eq!(
- cuda.cuCtxGetSharedMemConfig(&mut shared_mem_cfg),
- CUresult::CUDA_ERROR_CONTEXT_IS_DESTROYED
- );
- let mut lowest_priority = mem::zeroed();
- let mut highest_priority = mem::zeroed();
- assert_eq!(
- cuda.cuCtxGetStreamPriorityRange(&mut lowest_priority, &mut highest_priority),
- CUresult::CUDA_ERROR_CONTEXT_IS_DESTROYED
- );
- */
- let mut ctx2 = mem::zeroed();
- assert_eq!(
- cuda.cuDevicePrimaryCtxRetain(&mut ctx2, dev),
- CUresult::CUDA_SUCCESS
- );
- assert_eq!(ctx1, ctx2);
- assert_eq!(
- cuda.cuCtxGetApiVersion(ctx1, &mut api_version),
- CUresult::CUDA_SUCCESS
- );
- assert_eq!(cuda.cuCtxGetDevice(&mut device), CUresult::CUDA_SUCCESS);
-}
diff --git a/zluda/tests/dark_api.rs b/zluda/tests/dark_api.rs index 192167a..c1890fe 100644 --- a/zluda/tests/dark_api.rs +++ b/zluda/tests/dark_api.rs @@ -90,3 +90,141 @@ unsafe fn heap_free<T: CudaDriverFns>(cuda: T) { }
unsafe extern "system" fn shutdown(_unknown: u32, _value: usize) {}
+
+cuda_driver_test!(dark_api_primary_context_allocate);
+
+unsafe fn dark_api_primary_context_allocate<T: CudaDriverFns>(cuda: T) {
+ assert_eq!(cuda.cuInit(0), CUresult::CUDA_SUCCESS);
+ let dev = CUdevice_v1(0);
+ let mut ctx1 = mem::zeroed();
+ let mut export_table = mem::zeroed();
+ let guid = zluda_dark_api::CudartInterface::GUID;
+ assert_eq!(
+ cuda.cuGetExportTable(&mut export_table, &guid),
+ CUresult::CUDA_SUCCESS
+ );
+ let cudart_interface = zluda_dark_api::CudartInterface::new(export_table);
+ assert_eq!(
+ cudart_interface.primary_context_allocate(&mut ctx1, dev),
+ CUresult::CUDA_SUCCESS
+ );
+ let mut api_version = mem::zeroed();
+ assert_ne!(
+ cuda.cuCtxGetApiVersion(ctx1, &mut api_version),
+ CUresult::CUDA_SUCCESS
+ );
+ let mut flags = 0;
+ let mut active = 0;
+ assert_eq!(
+ cuda.cuDevicePrimaryCtxGetState(dev, &mut flags, &mut active),
+ CUresult::CUDA_SUCCESS
+ );
+ assert_eq!((flags, active), (0, 0));
+ assert_eq!(cuda.cuCtxSetCurrent(ctx1), CUresult::CUDA_SUCCESS);
+ assert_ne!(
+ cuda.cuMemAlloc_v2(&mut mem::zeroed(), 4),
+ CUresult::CUDA_SUCCESS
+ );
+ let mut device = mem::zeroed();
+ assert_eq!(cuda.cuCtxGetDevice(&mut device), CUresult::CUDA_SUCCESS);
+ // TODO: re-enable when adding context getters
+ /*
+ let mut cache_cfg = mem::zeroed();
+ assert_eq!(
+ cuda.cuCtxGetCacheConfig(&mut cache_cfg),
+ CUresult::CUDA_ERROR_CONTEXT_IS_DESTROYED
+ );
+ let mut exec_affinity = mem::zeroed();
+ assert_eq!(
+ cuda.cuCtxGetExecAffinity(
+ &mut exec_affinity,
+ CUexecAffinityType::CU_EXEC_AFFINITY_TYPE_SM_COUNT
+ ),
+ CUresult::CUDA_ERROR_CONTEXT_IS_DESTROYED
+ );
+ let mut flags = mem::zeroed();
+ assert_eq!(cuda.cuCtxGetFlags(&mut flags,), CUresult::CUDA_SUCCESS);
+ let mut stack = mem::zeroed();
+ assert_eq!(
+ cuda.cuCtxGetLimit(&mut stack, CUlimit::CU_LIMIT_STACK_SIZE),
+ CUresult::CUDA_ERROR_CONTEXT_IS_DESTROYED
+ );
+ let mut shared_mem_cfg = mem::zeroed();
+ assert_eq!(
+ cuda.cuCtxGetSharedMemConfig(&mut shared_mem_cfg),
+ CUresult::CUDA_ERROR_CONTEXT_IS_DESTROYED
+ );
+ let mut lowest_priority = mem::zeroed();
+ let mut highest_priority = mem::zeroed();
+ assert_eq!(
+ cuda.cuCtxGetStreamPriorityRange(&mut lowest_priority, &mut highest_priority),
+ CUresult::CUDA_ERROR_CONTEXT_IS_DESTROYED
+ );
+ */
+ let mut ctx2 = mem::zeroed();
+ assert_eq!(
+ cuda.cuDevicePrimaryCtxRetain(&mut ctx2, dev),
+ CUresult::CUDA_SUCCESS
+ );
+ assert_eq!(ctx1, ctx2);
+ assert_eq!(
+ cuda.cuCtxGetApiVersion(ctx1, &mut api_version),
+ CUresult::CUDA_SUCCESS
+ );
+ assert_eq!(cuda.cuCtxGetDevice(&mut device), CUresult::CUDA_SUCCESS);
+ assert_eq!(
+ cuda.cuDevicePrimaryCtxRelease_v2(dev),
+ CUresult::CUDA_SUCCESS
+ );
+ assert_ne!(
+ cuda.cuDevicePrimaryCtxRelease_v2(dev),
+ CUresult::CUDA_SUCCESS
+ );
+}
+
+cuda_driver_test!(dark_api_primary_context_create_with_flags);
+
+unsafe fn dark_api_primary_context_create_with_flags<T: CudaDriverFns>(cuda: T) {
+ assert_eq!(cuda.cuInit(0), CUresult::CUDA_SUCCESS);
+ let dev = CUdevice_v1(0);
+ let mut export_table = mem::zeroed();
+ let guid = zluda_dark_api::CudartInterface::GUID;
+ assert_eq!(
+ cuda.cuGetExportTable(&mut export_table, &guid),
+ CUresult::CUDA_SUCCESS
+ );
+ let cudart_interface = zluda_dark_api::CudartInterface::new(export_table);
+ assert_eq!(
+ cudart_interface.primary_context_create_with_flags(dev, 1),
+ CUresult::CUDA_SUCCESS
+ );
+ let mut flags = 0;
+ let mut active = 0;
+ assert_eq!(
+ cuda.cuDevicePrimaryCtxGetState(dev, &mut flags, &mut active),
+ CUresult::CUDA_SUCCESS
+ );
+ assert_eq!((flags, active), (1, 1));
+}
+
+cuda_driver_test!(dark_api_primary_context_create_with_flags_fail);
+
+unsafe fn dark_api_primary_context_create_with_flags_fail<T: CudaDriverFns>(cuda: T) {
+ assert_eq!(cuda.cuInit(0), CUresult::CUDA_SUCCESS);
+ let dev = CUdevice_v1(0);
+ let mut export_table = mem::zeroed();
+ let guid = zluda_dark_api::CudartInterface::GUID;
+ assert_eq!(
+ cuda.cuGetExportTable(&mut export_table, &guid),
+ CUresult::CUDA_SUCCESS
+ );
+ let cudart_interface = zluda_dark_api::CudartInterface::new(export_table);
+ assert_eq!(
+ cuda.cuDevicePrimaryCtxRetain(&mut mem::zeroed(), dev),
+ CUresult::CUDA_SUCCESS
+ );
+ assert_ne!(
+ cudart_interface.primary_context_create_with_flags(dev, 1),
+ CUresult::CUDA_SUCCESS
+ );
+}
diff --git a/zluda_dark_api/src/lib.rs b/zluda_dark_api/src/lib.rs index 3d0766e..ebe2c98 100644 --- a/zluda_dark_api/src/lib.rs +++ b/zluda_dark_api/src/lib.rs @@ -203,7 +203,18 @@ dark_api_table!( module: *mut CUmodule, fatbinc_wrapper: *const FatbincWrapper ) -> CUresult, - 2 => get_primary_context(pctx: *mut CUcontext, dev: CUdevice) -> CUresult, + // Allocate primary context and return the the context handle. + // Context allocated this way is not usable in any useful sense: + // * Primary context refcount is 0 + // * Can't be queried for its properties through cuCtxGet... + // * Can't be used to allocate memory + // * It can be used for context stack manipulation (cuCtxSetCurrent(...) etc) + // and one can query for device with cuCtxGetDevice(...) + #[dump] + 2 => primary_context_allocate(pctx: *mut CUcontext, dev: CUdevice) -> CUresult, + // This fails if the primary context is already active + #[dump] + 4 => primary_context_create_with_flags(dev: CUdevice, flags: u32) -> CUresult, #[dump] 6 => get_module_from_cubin_ex1( module: *mut CUmodule, @@ -287,7 +298,7 @@ dark_api_table!( 0 => SIZE_OF, #[dump] 1 => heap_alloc( - alloc_ptr: *mut *mut HeapAllocRecord, + halloc: *mut *mut HeapAllocRecord, // destructor is called only on CUDA exit, on Windows // that is DLL unload from DllMain destructor: Option<unsafe extern "system" fn(u32, usize)>, diff --git a/zluda_dump/src/dark_api.rs b/zluda_dump/src/dark_api.rs index 1bdceb9..23d8c9b 100644 --- a/zluda_dump/src/dark_api.rs +++ b/zluda_dump/src/dark_api.rs @@ -945,11 +945,39 @@ impl CudaDarkApiDump for CudaDarkApiDumpFns { unsafe fn heap_alloc_impl(
guid: &[u8; 16],
idx: usize,
- alloc_ptr: *mut *mut zluda_dark_api::HeapAllocRecord,
+ halloc: *mut *mut zluda_dark_api::HeapAllocRecord,
destructor: Option<unsafe extern "system" fn(u32, usize)>,
value: usize,
) -> CUresult {
- todo!()
+ let arguments_writer = Box::new(move |writer: &mut dyn std::io::Write| {
+ writer.write_all(b"(halloc: ")?;
+ format::CudaDisplay::write(&halloc.cast::<*mut c_void>(), "", 0, writer)?;
+ writer.write_all(b", destructor: ")?;
+ format::CudaDisplay::write(
+ &mem::transmute::<_, *const c_void>(destructor),
+ "",
+ 0,
+ writer,
+ )?;
+ write!(writer, ")")
+ });
+ let global_state = &mut *super::GLOBAL_STATE.lock().unwrap();
+ let mut fn_logger = global_state.log_factory.get_logger_dark_api(
+ CUuuid {
+ bytes: guid.clone(),
+ },
+ idx,
+ Some(arguments_writer),
+ );
+ let cuda_state = &mut global_state.delayed_state.unwrap_mut().cuda_state;
+ let original_ptr = cuda_state.dark_api.overrides[guid].1.add(idx);
+ let original_fn = mem::transmute::<
+ _,
+ unsafe extern "system" fn(*mut *mut zluda_dark_api::HeapAllocRecord, usize) -> CUresult,
+ >(*original_ptr);
+ let original_result = original_fn(halloc, value);
+ fn_logger.result = Some(original_result);
+ original_result
}
unsafe fn heap_free_impl(
@@ -958,6 +986,47 @@ impl CudaDarkApiDump for CudaDarkApiDumpFns { halloc: *mut zluda_dark_api::HeapAllocRecord,
value: *mut usize,
) -> CUresult {
+ let arguments_writer = Box::new(move |writer: &mut dyn std::io::Write| {
+ writer.write_all(b"(halloc: ")?;
+ format::CudaDisplay::write(&halloc.cast::<c_void>(), "", 0, writer)?;
+ writer.write_all(b", value: ")?;
+ format::CudaDisplay::write(&value, "", 0, writer)?;
+ write!(writer, ")")
+ });
+ let global_state = &mut *super::GLOBAL_STATE.lock().unwrap();
+ let mut fn_logger = global_state.log_factory.get_logger_dark_api(
+ CUuuid {
+ bytes: guid.clone(),
+ },
+ idx,
+ Some(arguments_writer),
+ );
+ let cuda_state = &mut global_state.delayed_state.unwrap_mut().cuda_state;
+ let original_ptr = cuda_state.dark_api.overrides[guid].1.add(idx);
+ let original_fn = mem::transmute::<
+ _,
+ unsafe extern "system" fn(*mut zluda_dark_api::HeapAllocRecord, *mut usize) -> CUresult,
+ >(*original_ptr);
+ let original_result = original_fn(halloc, value);
+ fn_logger.result = Some(original_result);
+ original_result
+ }
+
+ unsafe fn primary_context_allocate_impl(
+ guid: &[u8; 16],
+ idx: usize,
+ pctx: *mut CUcontext,
+ dev: CUdevice,
+ ) -> CUresult {
+ todo!()
+ }
+
+ unsafe fn primary_context_create_with_flags_impl(
+ guid: &[u8; 16],
+ idx: usize,
+ dev: CUdevice,
+ flags: u32,
+ ) -> CUresult {
todo!()
}
}
|