aboutsummaryrefslogtreecommitdiffhomepage
path: root/zluda_dump/src/lib.rs
diff options
context:
space:
mode:
authorAndrzej Janik <[email protected]>2022-01-07 04:20:33 +0100
committerAndrzej Janik <[email protected]>2022-01-07 04:20:33 +0100
commit869efbe0e2597d130a973d38d0d9bbdb70a67874 (patch)
tree347bee64790847828ab412c4ccc4bfe66f8520c6 /zluda_dump/src/lib.rs
parent9390db962bf228902c5c63ed2d2861d820d18210 (diff)
downloadZLUDA-869efbe0e2597d130a973d38d0d9bbdb70a67874.tar.gz
ZLUDA-869efbe0e2597d130a973d38d0d9bbdb70a67874.zip
Move zluda_dump to the new CUDA infrastructure
Diffstat (limited to 'zluda_dump/src/lib.rs')
-rw-r--r--zluda_dump/src/lib.rs1260
1 files changed, 75 insertions, 1185 deletions
diff --git a/zluda_dump/src/lib.rs b/zluda_dump/src/lib.rs
index ecfd1ac..d79c391 100644
--- a/zluda_dump/src/lib.rs
+++ b/zluda_dump/src/lib.rs
@@ -1,107 +1,76 @@
+use cuda_types::{
+ CUdevice, CUdevice_attribute, CUfunction, CUjit_option, CUmodule, CUresult, CUuuid,
+};
+use paste::paste;
+use std::io;
use std::{
- collections::{hash_map, BTreeMap, HashMap},
- env,
- error::Error,
- ffi::{c_void, CStr},
- fs,
- io::{self, prelude::*},
- mem,
- os::raw::{c_int, c_uint, c_ulong, c_ushort},
- path::PathBuf,
- ptr::NonNull,
- rc::Rc,
- slice,
+ collections::HashMap, env, error::Error, ffi::c_void, fs, path::PathBuf, ptr::NonNull, rc::Rc,
sync::Mutex,
};
-use std::{fs::File, ptr};
-
-use cuda::{
- CUdevice, CUdevice_attribute, CUdeviceptr, CUfunction, CUjitInputType, CUjit_option,
- CUlinkState, CUmodule, CUresult, CUstream, CUuuid,
-};
-use ptx::ast;
-use regex::Regex;
#[macro_use]
extern crate lazy_static;
-const CU_LAUNCH_PARAM_END: *mut c_void = 0 as *mut _;
-const CU_LAUNCH_PARAM_BUFFER_POINTER: *mut c_void = 1 as *mut _;
-const CU_LAUNCH_PARAM_BUFFER_SIZE: *mut c_void = 2 as *mut _;
-
macro_rules! extern_redirect {
- (pub fn $fn_name:ident ( $($arg_id:ident: $arg_type:ty),* $(,)? ) -> $ret_type:ty ;) => {
+ ($abi:literal fn $fn_name:ident( $($arg_id:ident : $arg_type:ty),* ) -> $ret_type:path) => {
#[no_mangle]
- pub extern "system" fn $fn_name ( $( $arg_id : $arg_type),* ) -> $ret_type {
+ pub extern $abi fn $fn_name ( $( $arg_id : $arg_type),* ) -> $ret_type {
let original_fn = |fn_ptr| {
let typed_fn = unsafe { std::mem::transmute::<_, extern "system" fn( $( $arg_id : $arg_type),* ) -> $ret_type>(fn_ptr) };
typed_fn($( $arg_id ),*)
};
- let get_formatted_args = |fn_logger: &mut crate::log::FunctionLogger, result: CUresult| {
- let arg_count = (count_tts!($($arg_id),*) + 1) / 2;
- fn_logger.begin_writing_arguments(arg_count);
- $(
- fn_logger.write_single_argument(result, $arg_id);
- )*
- fn_logger.end_writing_arguments();
- };
+ let get_formatted_args = Box::new(move |writer: &mut dyn std::io::Write| {
+ (paste! { format :: [<write_ $fn_name>] }) (
+ writer
+ $(,$arg_id)*
+ )
+ });
crate::handle_cuda_function_call(stringify!($fn_name), original_fn, get_formatted_args)
}
};
}
-macro_rules! count_tts {
- () => {0usize};
- ($_head:tt $($tail:tt)*) => {1usize + count_tts!($($tail)*)};
-}
-
macro_rules! extern_redirect_with_post {
- (
- pub fn $fn_name:ident ( $($arg_id:ident: $arg_type:ty),* $(,)? ) -> $ret_type:ty ;
- $post_fn:path ;
- ) => {
+ ($abi:literal fn $fn_name:ident( $($arg_id:ident : $arg_type:ty),* ) -> $ret_type:path) => {
#[no_mangle]
pub extern "system" fn $fn_name ( $( $arg_id : $arg_type),* ) -> $ret_type {
let original_fn = |fn_ptr| {
let typed_fn = unsafe { std::mem::transmute::<_, extern "system" fn( $( $arg_id : $arg_type),* ) -> $ret_type>(fn_ptr) };
typed_fn($( $arg_id ),*)
};
- let get_formatted_args = |fn_logger: &mut crate::log::FunctionLogger, result: CUresult| {
- let arg_count = (count_tts!($($arg_id),*) + 1) / 2;
- fn_logger.begin_writing_arguments(arg_count);
- $(
- fn_logger.write_single_argument(result, $arg_id);
- )*
- fn_logger.end_writing_arguments();
- };
+ let get_formatted_args = Box::new(move |writer: &mut dyn std::io::Write| {
+ (paste! { format :: [<write_ $fn_name>] }) (
+ writer
+ $(,$arg_id)*
+ )
+ });
crate::handle_cuda_function_call_with_probes(
stringify!($fn_name),
|| (), original_fn,
get_formatted_args,
- move |logger, state, _, cuda_result| $post_fn ( $( $arg_id ),* , logger, state, cuda_result )
+ move |logger, state, _, cuda_result| paste! { [<$fn_name _Post>] } ( $( $arg_id ),* , logger, state, cuda_result )
)
}
};
}
-macro_rules! extern_redirect_with {
- (
- pub fn $fn_name:ident ( $($arg_id:ident: $arg_type:ty),* $(,)? ) -> $ret_type:ty ;
- $receiver:path ;
- ) => {
- #[no_mangle]
- pub extern "system" fn $fn_name ( $( $arg_id : $arg_type),* ) -> $ret_type {
- let original_fn = |fn_ptr| {
- let typed_fn = unsafe { std::mem::transmute::<_, extern "system" fn( $( $arg_id : $arg_type),* ) -> $ret_type>(fn_ptr) };
- typed_fn($( $arg_id ),*)
- };
- crate::handle_cuda_function_call(stringify!($fn_name), original_fn)
- }
- };
-}
+use cuda_base::cuda_function_declarations;
+cuda_function_declarations!(
+ cuda_types,
+ extern_redirect,
+ extern_redirect_with_post,
+ [
+ cuModuleLoad,
+ cuModuleLoadData,
+ cuModuleLoadDataEx,
+ cuGetExportTable,
+ cuModuleGetFunction,
+ cuDeviceGetAttribute,
+ cuDeviceComputeCapability,
+ cuModuleLoadFatBinary
+ ]
+);
-#[allow(warnings)]
-mod cuda;
mod dark_api;
mod format;
mod log;
@@ -110,20 +79,6 @@ mod log;
mod os;
mod trace;
-pub static mut LIBCUDA_HANDLE: *mut c_void = ptr::null_mut();
-pub static mut PENDING_LINKING: Option<HashMap<CUlinkState, Vec<ModuleDump>>> = None;
-pub static mut LINKED_CUBINS: Option<HashMap<*mut c_void, ModuleDump>> = None;
-pub static mut MODULES: Option<HashMap<CUmodule, ModuleDump>> = None;
-pub static mut MODULE_DUMP_COUNTER: usize = 0;
-pub static mut KERNELS: Option<HashMap<CUfunction, KernelDump>> = None;
-static mut BUFFERS: Option<BTreeMap<usize, (usize, AllocLocation)>> = None;
-pub static mut LAUNCH_COUNTER: usize = 0;
-pub static mut KERNEL_PATTERN: Option<Regex> = None;
-pub static mut OVERRIDE_COMPUTE_CAPABILITY_MAJOR: Option<i32> = None;
-pub static mut KERNEL_INDEX_MINIMUM: usize = 0;
-pub static mut KERNEL_INDEX_MAXIMUM: usize = usize::MAX;
-static mut LOG_FACTORY: Option<log::Factory> = None;
-
lazy_static! {
static ref GLOBAL_STATE: Mutex<GlobalState> = Mutex::new(GlobalState::new());
}
@@ -179,9 +134,11 @@ struct GlobalDelayedState {
impl GlobalDelayedState {
fn new<'a>(
func: &'static str,
+ arguments_writer: Box<dyn FnMut(&mut dyn std::io::Write) -> std::io::Result<()>>,
factory: &'a mut log::Factory,
) -> (LateInit<Self>, log::FunctionLogger<'a>) {
- let (mut fn_logger, settings) = factory.get_first_logger_and_init_settings(func);
+ let (mut fn_logger, settings) =
+ factory.get_first_logger_and_init_settings(func, arguments_writer);
let maybe_libcuda_handle = unsafe { os::load_cuda_library(&settings.libcuda_path) };
let libcuda_handle = match NonNull::new(maybe_libcuda_handle) {
Some(h) => h,
@@ -285,16 +242,22 @@ pub struct ModuleDump {
fn handle_cuda_function_call(
func: &'static str,
original_cuda_fn: impl FnOnce(NonNull<c_void>) -> CUresult,
- print_arguments_fn: impl FnOnce(&mut crate::log::FunctionLogger, CUresult),
+ arguments_writer: Box<dyn FnMut(&mut dyn std::io::Write) -> std::io::Result<()>>,
) -> CUresult {
- handle_cuda_function_call_with_probes(func, || (), original_cuda_fn, print_arguments_fn, |_| ())
+ handle_cuda_function_call_with_probes(
+ func,
+ || (),
+ original_cuda_fn,
+ arguments_writer,
+ |_, _, _, _| (),
+ )
}
fn handle_cuda_function_call_with_probes<T, PostFn>(
func: &'static str,
pre_probe: impl FnOnce() -> T,
original_cuda_fn: impl FnOnce(NonNull<c_void>) -> CUresult,
- print_arguments_fn: impl FnOnce(&mut crate::log::FunctionLogger, CUresult),
+ arguments_writer: Box<dyn FnMut(&mut dyn std::io::Write) -> std::io::Result<()>>,
post_probe: PostFn,
) -> CUresult
where
@@ -307,14 +270,15 @@ where
// extract any useful trace or logging anyway
let mut global_state = &mut *global_state_mutex.lock().unwrap();
let (mut logger, delayed_state) = match global_state.delayed_state {
- LateInit::Success(ref mut delayed_state) => {
- (global_state.log_factory.get_logger(func), delayed_state)
- }
+ LateInit::Success(ref mut delayed_state) => (
+ global_state.log_factory.get_logger(func, arguments_writer),
+ delayed_state,
+ ),
// There's no libcuda to load, so we might as well panic
LateInit::Error => panic!(),
LateInit::Unitialized => {
let (new_delayed_state, logger) =
- GlobalDelayedState::new(func, &mut global_state.log_factory);
+ GlobalDelayedState::new(func, arguments_writer, &mut global_state.log_factory);
global_state.delayed_state = new_delayed_state;
(logger, global_state.delayed_state.as_mut().unwrap())
}
@@ -326,7 +290,6 @@ where
let pre_result = pre_probe();
let cu_result = original_cuda_fn(fn_ptr);
logger.result = Some(cu_result);
- print_arguments_fn(&mut logger, cu_result);
post_probe(
&mut logger,
&mut delayed_state.cuda_state,
@@ -349,72 +312,6 @@ pub struct KernelDump {
arguments: Option<Vec<usize>>,
}
-// We are doing dlopen here instead of just using LD_PRELOAD,
-// it's because CUDA Runtime API does dlopen to open libcuda.so, which ignores LD_PRELOAD
-pub unsafe fn init_libcuda_handle(func: &'static str) {
- if LIBCUDA_HANDLE == ptr::null_mut() {
- let mut log_factory = log::Factory::new();
- let (logger, settings) = log_factory.get_first_logger_and_init_settings(func);
- MODULES = Some(HashMap::new());
- KERNELS = Some(HashMap::new());
- BUFFERS = Some(BTreeMap::new());
- let libcuda_handle = ptr::null_mut();
- assert_ne!(libcuda_handle, ptr::null_mut());
- LIBCUDA_HANDLE = libcuda_handle;
- match env::var("ZLUDA_DUMP_KERNEL") {
- Ok(kernel_filter) => match Regex::new(&kernel_filter) {
- Ok(r) => KERNEL_PATTERN = Some(r),
- Err(err) => {
- os_log!("Error parsing ZLUDA_DUMP_KERNEL: {}", err);
- }
- },
- Err(_) => (),
- }
- if let Ok(kernel_min_str) = env::var("ZLUDA_DUMP_MIN_INDEX") {
- match kernel_min_str.parse::<usize>() {
- Ok(kernel_min_value) => KERNEL_INDEX_MINIMUM = kernel_min_value,
- Err(err) => {
- os_log!("Error parsing ZLUDA_DUMP_MIN_INDEX: {}", err);
- }
- }
- }
- if let Ok(kernel_max_str) = env::var("ZLUDA_DUMP_MAX_INDEX") {
- match kernel_max_str.parse::<usize>() {
- Ok(kernel_max_value) => KERNEL_INDEX_MAXIMUM = kernel_max_value,
- Err(err) => {
- os_log!("Error parsing ZLUDA_DUMP_MAX_INDEX: {}", err);
- }
- }
- }
- match env::var("ZLUDA_OVERRIDE_COMPUTE_CAPABILITY_MAJOR") {
- Ok(cc_override) => match str::parse::<i32>(&cc_override) {
- Ok(ver) => OVERRIDE_COMPUTE_CAPABILITY_MAJOR = Some(ver),
- Err(err) => {
- os_log!(
- "Error parsing ZLUDA_OVERRIDE_COMPUTE_CAPABILITY_MAJOR: {}",
- err
- );
- }
- },
- Err(_) => (),
- }
- drop(logger);
- }
-}
-
-#[allow(non_snake_case)]
-pub unsafe fn cuModuleLoadData(
- module: *mut CUmodule,
- raw_image: *const ::std::os::raw::c_void,
- cont: impl FnOnce(*mut CUmodule, *const c_void) -> CUresult,
-) -> CUresult {
- let result = cont(module, raw_image);
- if result == CUresult::CUDA_SUCCESS {
- record_module_image_raw(*module, raw_image);
- }
- result
-}
-
#[allow(non_snake_case)]
pub(crate) fn cuModuleLoad_Post(
module: *mut CUmodule,
@@ -457,1013 +354,6 @@ pub(crate) fn cuModuleLoadDataEx_Post(
cuModuleLoadData_Post(module, raw_image, fn_logger, state, result)
}
-unsafe fn record_module_image_raw(module: CUmodule, raw_image: *const ::std::os::raw::c_void) {
- if *(raw_image as *const u32) == 0x464c457f {
- os_log!("Unsupported ELF module image: {:?}", raw_image);
- return;
- }
- let image = to_str(raw_image);
- match image {
- None => os_log!("Malformed module image: {:?}", raw_image),
- Some(image) => record_module_image_with_module(module, raw_image, image),
- };
-}
-
-unsafe fn record_module_image_with_module(
- module: CUmodule,
- raw_image: *const ::std::os::raw::c_void,
- image: &str,
-) {
- match record_module_image_impl(raw_image, image) {
- Ok(dump) => {
- MODULES
- .get_or_insert_with(|| HashMap::new())
- .insert(module, dump);
- }
- Err(e) => {
- os_log!("{}", e);
- }
- }
-}
-
-unsafe fn record_module_image_with_linker(
- link_obj: CUlinkState,
- raw_image: *const ::std::os::raw::c_void,
- image: &str,
-) {
- match record_module_image_impl(raw_image, image) {
- Ok(dump) => {
- match PENDING_LINKING
- .get_or_insert_with(|| HashMap::new())
- .entry(link_obj)
- {
- hash_map::Entry::Occupied(mut vec) => {
- vec.get_mut().push(dump);
- }
- hash_map::Entry::Vacant(e) => {
- e.insert(vec![dump]);
- }
- };
- }
- Err(e) => {
- os_log!("{}", e);
- }
- }
-}
-
-unsafe fn record_module_image_impl(
- raw_image: *const ::std::os::raw::c_void,
- image: &str,
-) -> Result<ModuleDump, Box<dyn Error>> {
- if !image.contains(&".version") {
- return Err(format!(
- "Malformed module image (no `.version`): {:?}",
- raw_image
- ))?;
- }
- let mut errors = Vec::new();
- let ast = ptx::ModuleParser::new().parse(&mut errors, image);
- let kernels_args = match (&*errors, ast) {
- (&[], Ok(ast)) => {
- let kernels_args = ast
- .directives
- .iter()
- .filter_map(directive_to_kernel)
- .collect::<HashMap<_, _>>();
- Some(kernels_args)
- }
- (err_vec, res) => {
- // Don't print errors - it's usually too verbose to be useful
- os_log!(
- "{} errors when parsing module image: {:?}",
- err_vec.len() + res.iter().len(),
- raw_image
- );
- None
- }
- };
- let dump = ModuleDump {
- content: Rc::new(image.to_string()),
- kernels_args,
- };
- if let Err(e) = try_dump_module_image(image) {
- return Err(format!(
- "Errors when saving module image: {:?}, {}",
- raw_image, e
- ))?;
- }
- Ok(dump)
-}
-
-unsafe fn try_dump_module_image(image: &str) -> Result<(), Box<dyn Error>> {
- let mut dump_path = get_dump_dir()?;
- dump_path.push(format!("module_{:04}.ptx", MODULE_DUMP_COUNTER));
- MODULE_DUMP_COUNTER += 1;
- let mut file = File::create(dump_path)?;
- file.write_all(image.as_bytes())?;
- Ok(())
-}
-
-unsafe fn to_str<T>(image: *const T) -> Option<&'static str> {
- let ptr = image as *const u8;
- let mut offset = 0;
- loop {
- let c = *ptr.add(offset);
- if !c.is_ascii() {
- return None;
- }
- if c == 0 {
- return Some(std::str::from_utf8_unchecked(slice::from_raw_parts(
- ptr, offset,
- )));
- }
- offset += 1;
- }
-}
-
-fn directive_to_kernel(dir: &ast::Directive<ast::ParsedArgParams>) -> Option<(String, Vec<usize>)> {
- match dir {
- ast::Directive::Method(
- _,
- ast::Function {
- func_directive:
- ast::MethodDeclaration {
- name: ast::MethodName::Kernel(name),
- input_arguments,
- ..
- },
- ..
- },
- ) => {
- let arg_sizes = input_arguments
- .iter()
- .map(|arg| ast::Type::from(arg.v_type.clone()).size_of())
- .collect();
- Some((name.to_string(), arg_sizes))
- }
- _ => None,
- }
-}
-
-#[allow(non_snake_case)]
-pub unsafe fn cuModuleLoadDataEx(
- module: *mut CUmodule,
- image: *const c_void,
- numOptions: c_uint,
- options: *mut CUjit_option,
- optionValues: *mut *mut c_void,
- cont: impl FnOnce(
- *mut CUmodule,
- *const c_void,
- c_uint,
- *mut CUjit_option,
- *mut *mut c_void,
- ) -> CUresult,
-) -> CUresult {
- let result = cont(module, image, numOptions, options, optionValues);
- if result == CUresult::CUDA_SUCCESS {
- record_module_image_raw(*module, image);
- }
- result
-}
-
-#[allow(non_snake_case)]
-unsafe fn cuModuleGetFunction(
- hfunc: *mut CUfunction,
- hmod: CUmodule,
- name: *const ::std::os::raw::c_char,
- cont: impl FnOnce(*mut CUfunction, CUmodule, *const ::std::os::raw::c_char) -> CUresult,
-) -> CUresult {
- let result = cont(hfunc, hmod, name);
- if result != CUresult::CUDA_SUCCESS {
- return result;
- }
- if let Some(modules) = &MODULES {
- if let Some(module_dump) = modules.get(&hmod) {
- if let Some(kernel) = to_str(name) {
- let kernel_args = if let Some(kernels) = &module_dump.kernels_args {
- if let Some(args) = kernels.get(kernel) {
- Some(args.clone())
- } else {
- None
- }
- } else {
- None
- };
- KERNELS.as_mut().unwrap().insert(
- *hfunc,
- KernelDump {
- module_content: module_dump.content.clone(),
- name: kernel.to_string(),
- arguments: kernel_args,
- },
- );
- } else {
- os_log!("Malformed name at: {:?}", hfunc);
- }
- } else {
- os_log!("Unknown module: {:?}", hmod);
- }
- } else {
- os_log!("Unknown module: {:?}", hmod);
- }
- CUresult::CUDA_SUCCESS
-}
-
-#[allow(non_snake_case)]
-pub unsafe fn cuMemAlloc(
- dptr: *mut CUdeviceptr,
- bytesize: usize,
- cont: impl FnOnce(*mut CUdeviceptr, usize) -> CUresult,
-) -> CUresult {
- cuMemAlloc_impl(false, dptr, bytesize, cont)
-}
-
-#[allow(non_snake_case)]
-pub unsafe fn cuMemAlloc_v2(
- dptr: *mut CUdeviceptr,
- bytesize: usize,
- cont: impl FnOnce(*mut CUdeviceptr, usize) -> CUresult,
-) -> CUresult {
- cuMemAlloc_impl(true, dptr, bytesize, cont)
-}
-
-#[allow(non_snake_case)]
-pub unsafe fn cuMemAlloc_impl(
- is_v2: bool,
- dptr: *mut CUdeviceptr,
- bytesize: usize,
- cont: impl FnOnce(*mut CUdeviceptr, usize) -> CUresult,
-) -> CUresult {
- let result = cont(dptr, bytesize);
- assert_eq!(result, CUresult::CUDA_SUCCESS);
- let start = (*dptr).0 as usize;
- let location = if is_v2 {
- AllocLocation::DeviceV2
- } else {
- AllocLocation::Device
- };
- BUFFERS
- .as_mut()
- .unwrap()
- .insert(start, (bytesize, location));
- CUresult::CUDA_SUCCESS
-}
-
-#[allow(non_snake_case)]
-pub unsafe fn cuMemHostAlloc(
- pp: *mut *mut c_void,
- bytesize: usize,
- flags: c_uint,
- cont: impl FnOnce(*mut *mut c_void, usize, c_uint) -> CUresult,
-) -> CUresult {
- let result = cont(pp, bytesize, flags);
- assert_eq!(result, CUresult::CUDA_SUCCESS);
- let start = (*pp) as usize;
- BUFFERS
- .as_mut()
- .unwrap()
- .insert(start, (bytesize, AllocLocation::Host));
- CUresult::CUDA_SUCCESS
-}
-
-#[allow(non_snake_case)]
-pub unsafe fn cuLaunchKernel(
- f: CUfunction,
- gridDimX: ::std::os::raw::c_uint,
- gridDimY: ::std::os::raw::c_uint,
- gridDimZ: ::std::os::raw::c_uint,
- blockDimX: ::std::os::raw::c_uint,
- blockDimY: ::std::os::raw::c_uint,
- blockDimZ: ::std::os::raw::c_uint,
- sharedMemBytes: ::std::os::raw::c_uint,
- hStream: CUstream,
- kernelParams: *mut *mut ::std::os::raw::c_void,
- extra: *mut *mut ::std::os::raw::c_void,
- cont: impl FnOnce(
- CUfunction,
- ::std::os::raw::c_uint,
- ::std::os::raw::c_uint,
- ::std::os::raw::c_uint,
- ::std::os::raw::c_uint,
- ::std::os::raw::c_uint,
- ::std::os::raw::c_uint,
- ::std::os::raw::c_uint,
- CUstream,
- *mut *mut ::std::os::raw::c_void,
- *mut *mut ::std::os::raw::c_void,
- ) -> CUresult,
-) -> CUresult {
- let mut error;
- let dump_env = match create_dump_dir(f, LAUNCH_COUNTER) {
- Ok(dump_env) => dump_env,
- Err(err) => {
- os_log!("Error when creating the dump directory: {}", err);
- None
- }
- };
- if let Some(dump_env) = &dump_env {
- dump_pre_data(
- gridDimX,
- gridDimY,
- gridDimZ,
- blockDimX,
- blockDimY,
- blockDimZ,
- sharedMemBytes,
- kernelParams,
- extra,
- dump_env,
- )
- .unwrap_or_else(|err| os_log!("{}", err));
- };
- error = cont(
- f,
- gridDimX,
- gridDimY,
- gridDimZ,
- blockDimX,
- blockDimY,
- blockDimZ,
- sharedMemBytes,
- hStream,
- kernelParams,
- extra,
- );
- assert_eq!(error, CUresult::CUDA_SUCCESS);
- error = cuda::cuStreamSynchronize(hStream);
- assert_eq!(error, CUresult::CUDA_SUCCESS);
- if let Some((_, kernel_dump)) = &dump_env {
- dump_arguments(
- kernelParams,
- extra,
- "post",
- &kernel_dump.name,
- LAUNCH_COUNTER,
- kernel_dump.arguments.as_ref().map(|vec| &vec[..]),
- )
- .unwrap_or_else(|err| os_log!("{}", err));
- }
- LAUNCH_COUNTER += 1;
- CUresult::CUDA_SUCCESS
-}
-
-#[allow(non_snake_case)]
-fn dump_launch_arguments(
- gridDimX: u32,
- gridDimY: u32,
- gridDimZ: u32,
- blockDimX: u32,
- blockDimY: u32,
- blockDimZ: u32,
- sharedMemBytes: u32,
- dump_dir: &PathBuf,
-) -> Result<(), Box<dyn Error>> {
- let mut module_file_path = dump_dir.clone();
- module_file_path.push("launch.txt");
- let mut module_file = File::create(module_file_path)?;
- write!(&mut module_file, "{}\n", gridDimX)?;
- write!(&mut module_file, "{}\n", gridDimY)?;
- write!(&mut module_file, "{}\n", gridDimZ)?;
- write!(&mut module_file, "{}\n", blockDimX)?;
- write!(&mut module_file, "{}\n", blockDimY)?;
- write!(&mut module_file, "{}\n", blockDimZ)?;
- write!(&mut module_file, "{}\n", sharedMemBytes)?;
- Ok(())
-}
-
-unsafe fn should_dump_kernel(counter: usize, name: &str) -> bool {
- if counter < KERNEL_INDEX_MINIMUM {
- return false;
- }
- if counter > KERNEL_INDEX_MAXIMUM {
- return false;
- }
- match &KERNEL_PATTERN {
- Some(pattern) => pattern.is_match(name),
- None => true,
- }
-}
-
-unsafe fn create_dump_dir(
- f: CUfunction,
- counter: usize,
-) -> Result<Option<(PathBuf, &'static KernelDump)>, Box<dyn Error>> {
- match KERNELS.as_ref().and_then(|kernels| kernels.get(&f)) {
- Some(kernel_dump) => {
- if !should_dump_kernel(counter, &kernel_dump.name) {
- return Ok(None);
- }
- let mut dump_dir = get_dump_dir()?;
- dump_dir.push(format!("{:04}_{}", counter, kernel_dump.name));
- fs::create_dir_all(&dump_dir)?;
- Ok(Some((dump_dir, kernel_dump)))
- }
- None => Err(format!("Unknown kernel: {:?}", f))?,
- }
-}
-
-#[allow(non_snake_case)]
-unsafe fn dump_pre_data(
- gridDimX: ::std::os::raw::c_uint,
- gridDimY: ::std::os::raw::c_uint,
- gridDimZ: ::std::os::raw::c_uint,
- blockDimX: ::std::os::raw::c_uint,
- blockDimY: ::std::os::raw::c_uint,
- blockDimZ: ::std::os::raw::c_uint,
- sharedMemBytes: ::std::os::raw::c_uint,
- kernelParams: *mut *mut ::std::os::raw::c_void,
- extra: *mut *mut ::std::os::raw::c_void,
- (dump_dir, kernel_dump): &(PathBuf, &'static KernelDump),
-) -> Result<(), Box<dyn Error>> {
- dump_launch_arguments(
- gridDimX,
- gridDimY,
- gridDimZ,
- blockDimX,
- blockDimY,
- blockDimZ,
- sharedMemBytes,
- dump_dir,
- )?;
- let mut module_file_path = dump_dir.clone();
- module_file_path.push("module.ptx");
- let mut module_file = File::create(module_file_path)?;
- module_file.write_all(kernel_dump.module_content.as_bytes())?;
- dump_arguments(
- kernelParams,
- extra,
- "pre",
- &kernel_dump.name,
- LAUNCH_COUNTER,
- kernel_dump.arguments.as_ref().map(|vec| &vec[..]),
- )?;
- Ok(())
-}
-
-fn dump_arguments(
- kernel_params: *mut *mut ::std::os::raw::c_void,
- extra: *mut *mut ::std::os::raw::c_void,
- prefix: &str,
- kernel_name: &str,
- counter: usize,
- args: Option<&[usize]>,
-) -> Result<(), Box<dyn Error>> {
- let args = match args {
- None => return Ok(()),
- Some(a) => a,
- };
- let mut dump_dir = get_dump_dir()?;
- dump_dir.push(format!("{:04}_{}", counter, kernel_name));
- dump_dir.push(prefix);
- if dump_dir.exists() {
- fs::remove_dir_all(&dump_dir)?;
- }
- fs::create_dir_all(&dump_dir)?;
- if kernel_params != ptr::null_mut() {
- for (i, arg_len) in args.iter().enumerate() {
- unsafe { dump_argument_to_file(&dump_dir, i, *arg_len, *kernel_params.add(i))? };
- }
- } else {
- let mut offset = 0;
- let mut buffer_ptr = None;
- let mut buffer_size = None;
- loop {
- match unsafe { *extra.add(offset) } {
- CU_LAUNCH_PARAM_END => break,
- CU_LAUNCH_PARAM_BUFFER_POINTER => {
- buffer_ptr = Some(unsafe { *extra.add(offset + 1) as *mut u8 });
- }
- CU_LAUNCH_PARAM_BUFFER_SIZE => {
- buffer_size = Some(unsafe { *(*extra.add(offset + 1) as *mut usize) });
- }
- _ => return Err("Malformed `extra` parameter to kernel launch")?,
- }
- offset += 2;
- }
- match (buffer_size, buffer_ptr) {
- (Some(buffer_size), Some(buffer_ptr)) => {
- let sum_of_kernel_argument_sizes = args.iter().fold(0, |offset, size_of_arg| {
- size_of_arg + round_up_to_multiple(offset, *size_of_arg)
- });
- if buffer_size < sum_of_kernel_argument_sizes {
- return Err("Malformed `extra` parameter to kernel launch")?;
- }
- let mut offset = 0;
- for (i, arg_size) in args.iter().enumerate() {
- let buffer_offset = round_up_to_multiple(offset, *arg_size);
- unsafe {
- dump_argument_to_file(
- &dump_dir,
- i,
- *arg_size,
- buffer_ptr.add(buffer_offset) as *const _,
- )?
- };
- offset = buffer_offset + *arg_size;
- }
- }
- _ => return Err("Malformed `extra` parameter to kernel launch")?,
- }
- }
- Ok(())
-}
-
-fn round_up_to_multiple(x: usize, multiple: usize) -> usize {
- ((x + multiple - 1) / multiple) * multiple
-}
-
-unsafe fn dump_argument_to_file(
- dump_dir: &PathBuf,
- i: usize,
- arg_len: usize,
- ptr: *const c_void,
-) -> Result<(), Box<dyn Error>> {
- // Don't check if arg_len == sizeof(void*), there are libraries
- // which for some reason pass 32 pointers (4 bytes) in 8 byte arguments
- match get_buffer_length(*(ptr as *mut usize)) {
- Some((start, len, location)) => {
- let mut output = vec![0u8; len];
- let memcpy_fn = match location {
- AllocLocation::Device => |src, dst: usize, len| {
- let error = cuda::cuMemcpyDtoH(dst as *mut _, CUdeviceptr(src), len);
- assert_eq!(error, CUresult::CUDA_SUCCESS);
- },
- AllocLocation::DeviceV2 => |src, dst: usize, len| {
- let error = cuda::cuMemcpyDtoH_v2(dst as *mut _, CUdeviceptr(src), len);
- assert_eq!(error, CUresult::CUDA_SUCCESS);
- },
- AllocLocation::Host => |src, dst: usize, len| {
- ptr::copy_nonoverlapping(src as *mut u8, dst as *mut u8, len);
- },
- };
- memcpy_fn(start, output.as_mut_ptr() as usize, len);
- let mut path = dump_dir.clone();
- path.push(format!("arg_{:03}.buffer", i));
- let mut file = File::create(path)?;
- file.write_all(&mut output)?;
- }
- None => {
- let mut path = dump_dir.clone();
- path.push(format!("arg_{:03}", i));
- let mut file = File::create(path)?;
- file.write_all(slice::from_raw_parts(ptr as *mut u8, arg_len))?;
- }
- }
- Ok(())
-}
-
-unsafe fn get_buffer_length(ptr: usize) -> Option<(usize, usize, AllocLocation)> {
- BUFFERS
- .as_mut()
- .unwrap()
- .range(..=ptr)
- .next_back()
- .and_then(|(start, (len, loc))| {
- let end = *start + *len;
- if ptr < end {
- Some((ptr, end - ptr, *loc))
- } else {
- None
- }
- })
-}
-
-fn get_dump_dir() -> Result<PathBuf, Box<dyn Error>> {
- let dir = env::var("ZLUDA_DUMP_DIR")?;
- let mut main_dir = PathBuf::from(dir);
- let current_exe = env::current_exe()?;
- main_dir.push(current_exe.file_name().unwrap());
- fs::create_dir_all(&main_dir)?;
- Ok(main_dir)
-}
-
-// TODO make this more common with ZLUDA implementation
-const CUDART_INTERFACE_GUID: CUuuid = CUuuid {
- bytes: [
- 0x6b, 0xd5, 0xfb, 0x6c, 0x5b, 0xf4, 0xe7, 0x4a, 0x89, 0x87, 0xd9, 0x39, 0x12, 0xfd, 0x9d,
- 0xf9,
- ],
-};
-
-static mut OVERRIDEN_INTERFACE_VTABLES: Option<HashMap<Box<CUuuid>, Vec<*const c_void>>> = None;
-
-static mut ORIGINAL_GET_MODULE_FROM_CUBIN: Option<
- unsafe extern "system" fn(
- result: *mut CUmodule,
- fatbinc_wrapper: *const FatbincWrapper,
- ) -> CUresult,
-> = None;
-static mut ORIGINAL_GET_MODULE_FROM_CUBIN_EXT1: Option<
- unsafe extern "system" fn(
- result: *mut CUmodule,
- fatbinc_wrapper: *const FatbincWrapper,
- ptr1: *mut c_void,
- ptr2: *mut c_void,
- _unknown: usize,
- ) -> CUresult,
-> = None;
-static mut ORIGINAL_GET_MODULE_FROM_CUBIN_EXT2: Option<
- unsafe extern "system" fn(
- fatbinc_wrapper: *const FatbinHeader,
- result: *mut CUmodule,
- ptr1: *mut c_void,
- ptr2: *mut c_void,
- _unknown: usize,
- ) -> CUresult,
-> = None;
-
-unsafe extern "system" fn report_unknown_export_table_call(
- export_table: *const CUuuid,
- idx: usize,
-) {
- let guid = (*export_table).bytes;
- os_log!("Call to an unsupported export table function: {{{:02X}{:02X}{:02X}{:02X}-{:02X}{:02X}-{:02X}{:02X}-{:02X}{:02X}-{:02X}{:02X}{:02X}{:02X}{:02X}{:02X}}}::{}", guid[0], guid[1], guid[2], guid[3], guid[4], guid[5], guid[6], guid[7], guid[8], guid[9], guid[10], guid[11], guid[12], guid[13], guid[14], guid[15], idx);
-}
-
-#[allow(non_snake_case)]
-pub unsafe fn cuGetExportTable(
- ppExportTable: *mut *const ::std::os::raw::c_void,
- pExportTableId: *const CUuuid,
- cont: impl FnOnce(*mut *const ::std::os::raw::c_void, *const CUuuid) -> CUresult,
-) -> CUresult {
- if ppExportTable == ptr::null_mut() || pExportTableId == ptr::null() {
- return CUresult::CUDA_ERROR_INVALID_VALUE;
- }
- let guid = (*pExportTableId).bytes;
- os_log!("Requested export table id: {{{:02X}{:02X}{:02X}{:02X}-{:02X}{:02X}-{:02X}{:02X}-{:02X}{:02X}-{:02X}{:02X}{:02X}{:02X}{:02X}{:02X}}}", guid[0], guid[1], guid[2], guid[3], guid[4], guid[5], guid[6], guid[7], guid[8], guid[9], guid[10], guid[11], guid[12], guid[13], guid[14], guid[15]);
- override_export_table(ppExportTable, pExportTableId, cont)
-}
-
-unsafe fn override_export_table(
- export_table_ptr: *mut *const ::std::os::raw::c_void,
- export_table_id: *const CUuuid,
- cont: impl FnOnce(*mut *const ::std::os::raw::c_void, *const CUuuid) -> CUresult,
-) -> CUresult {
- let overrides_map = OVERRIDEN_INTERFACE_VTABLES.get_or_insert_with(|| HashMap::new());
- if let Some(override_table) = overrides_map.get(&*export_table_id) {
- *export_table_ptr = override_table.as_ptr() as *const _;
- return CUresult::CUDA_SUCCESS;
- }
- let base_result = cont(export_table_ptr, export_table_id);
- if base_result != CUresult::CUDA_SUCCESS {
- return base_result;
- }
- let export_table = (*export_table_ptr) as *mut *const c_void;
- let boxed_guid = Box::new(*export_table_id);
- let byte_length: usize = *(export_table as *const usize);
- let mut override_table = Vec::new();
- if byte_length < 0x10000 {
- override_table.push(byte_length as *const _);
- let length = byte_length / mem::size_of::<usize>();
- for i in 1..length {
- let current_fn = export_table.add(i);
- if (*current_fn as usize) == usize::max_value() {
- override_table.push(usize::max_value() as *const _);
- break;
- }
- override_table.push(get_export_override_fn(*current_fn, &*boxed_guid, i));
- }
- } else {
- let mut i = 0;
- loop {
- let current_fn = export_table.add(i);
- if (*current_fn as usize) == usize::max_value() {
- override_table.push(usize::max_value() as *const _);
- break;
- }
- override_table.push(get_export_override_fn(*current_fn, &*boxed_guid, i));
- i += 1;
- }
- }
- *export_table_ptr = override_table.as_ptr() as *const _;
- overrides_map.insert(boxed_guid, override_table);
- CUresult::CUDA_SUCCESS
-}
-
-const TOOLS_RUNTIME_CALLBACK_HOOKS_GUID: CUuuid = CUuuid {
- bytes: [
- 0xa0, 0x94, 0x79, 0x8c, 0x2e, 0x74, 0x2e, 0x74, 0x93, 0xf2, 0x08, 0x00, 0x20, 0x0c, 0x0a,
- 0x66,
- ],
-};
-
-const CONTEXT_LOCAL_STORAGE_INTERFACE_V0301_GUID: CUuuid = CUuuid {
- bytes: [
- 0xc6, 0x93, 0x33, 0x6e, 0x11, 0x21, 0xdf, 0x11, 0xa8, 0xc3, 0x68, 0xf3, 0x55, 0xd8, 0x95,
- 0x93,
- ],
-};
-
-const CTX_CREATE_BYPASS_GUID: CUuuid = CUuuid {
- bytes: [
- 0x0C, 0xA5, 0x0B, 0x8C, 0x10, 0x04, 0x92, 0x9A, 0x89, 0xA7, 0xD0, 0xDF, 0x10, 0xE7, 0x72,
- 0x86,
- ],
-};
-
-const HEAP_ACCESS_GUID: CUuuid = CUuuid {
- bytes: [
- 0x19, 0x5B, 0xCB, 0xF4, 0xD6, 0x7D, 0x02, 0x4A, 0xAC, 0xC5, 0x1D, 0x29, 0xCE, 0xA6, 0x31,
- 0xAE,
- ],
-};
-
-const DEVICE_EXTENDED_RT_GUID: CUuuid = CUuuid {
- bytes: [
- 0xB1u8, 0x05, 0x41, 0xE1, 0xF7, 0xC7, 0xC7, 0x4A, 0x9F, 0x64, 0xF2, 0x23, 0xBE, 0x99, 0xF1,
- 0xE2,
- ],
-};
-
-unsafe fn get_export_override_fn(
- original_fn: *const c_void,
- guid: *const CUuuid,
- idx: usize,
-) -> *const c_void {
- match (*guid, idx) {
- (TOOLS_RUNTIME_CALLBACK_HOOKS_GUID, 2)
- | (TOOLS_RUNTIME_CALLBACK_HOOKS_GUID, 6)
- | (CUDART_INTERFACE_GUID, 2)
- | (CUDART_INTERFACE_GUID, 7)
- | (CONTEXT_LOCAL_STORAGE_INTERFACE_V0301_GUID, 0)
- | (CONTEXT_LOCAL_STORAGE_INTERFACE_V0301_GUID, 1)
- | (CONTEXT_LOCAL_STORAGE_INTERFACE_V0301_GUID, 2)
- | (CTX_CREATE_BYPASS_GUID, 1)
- | (HEAP_ACCESS_GUID, 1)
- | (HEAP_ACCESS_GUID, 2)
- | (DEVICE_EXTENDED_RT_GUID, 5)
- | (DEVICE_EXTENDED_RT_GUID, 13) => original_fn,
- (CUDART_INTERFACE_GUID, 1) => {
- ORIGINAL_GET_MODULE_FROM_CUBIN = mem::transmute(original_fn);
- get_module_from_cubin as *const _
- }
- (CUDART_INTERFACE_GUID, 6) => {
- ORIGINAL_GET_MODULE_FROM_CUBIN_EXT1 = mem::transmute(original_fn);
- get_module_from_cubin_ext1 as *const _
- }
- (CUDART_INTERFACE_GUID, 8) => {
- ORIGINAL_GET_MODULE_FROM_CUBIN_EXT2 = mem::transmute(original_fn);
- get_module_from_cubin_ext2 as *const _
- }
- _ => os::get_thunk(original_fn, report_unknown_export_table_call, guid, idx),
- }
-}
-
-const FATBINC_MAGIC: c_uint = 0x466243B1;
-const FATBINC_VERSION: c_uint = 0x1;
-
-#[repr(C)]
-struct FatbincWrapper {
- magic: c_uint,
- version: c_uint,
- data: *const FatbinHeader,
- filename_or_fatbins: *const c_void,
-}
-
-const FATBIN_MAGIC: c_uint = 0xBA55ED50;
-const LEGACY_FATBIN_MAGIC: c_uint = 0x1EE55A01;
-const FATBIN_VERSION: c_ushort = 0x01;
-
-#[repr(C, align(8))]
-struct FatbinHeader {
- magic: c_uint,
- version: c_ushort,
- header_size: c_ushort,
- files_size: c_ulong, // excluding frame header, size of all blocks framed by this frame
-}
-
-const FATBIN_FILE_HEADER_KIND_PTX: c_ushort = 0x01;
-const FATBIN_FILE_HEADER_VERSION_CURRENT: c_ushort = 0x101;
-
-// assembly file header is a bit different, but we don't care
-#[repr(C)]
-#[derive(Debug)]
-struct FatbinFileHeader {
- kind: c_ushort,
- version: c_ushort,
- header_size: c_uint,
- padded_payload_size: c_uint,
- unknown0: c_uint, // check if it's written into separately
- payload_size: c_uint,
- unknown1: c_uint,
- unknown2: c_uint,
- sm_version: c_uint,
- bit_width: c_uint,
- unknown3: c_uint,
- unknown4: c_ulong,
- unknown5: c_ulong,
- uncompressed_payload: c_ulong,
-}
-
-unsafe fn get_module_from_cubin_impl(
- module: *mut CUmodule,
- fatbinc_wrapper: *const FatbincWrapper,
- get_module_base: impl FnOnce() -> CUresult,
-) -> CUresult {
- if module == ptr::null_mut()
- || (*fatbinc_wrapper).magic != FATBINC_MAGIC
- || (*fatbinc_wrapper).version != FATBINC_VERSION
- {
- return CUresult::CUDA_ERROR_INVALID_VALUE;
- }
- get_module_from_cubin_unwrapped(module, (*fatbinc_wrapper).data, get_module_base)
-}
-
-unsafe fn get_module_from_cubin_unwrapped(
- module: *mut CUmodule,
- fatbin_header: *const FatbinHeader,
- get_module_base: impl FnOnce() -> CUresult,
-) -> CUresult {
- if (*fatbin_header).magic != FATBIN_MAGIC || (*fatbin_header).version != FATBIN_VERSION {
- return CUresult::CUDA_ERROR_INVALID_VALUE;
- }
- let file = (fatbin_header as *const u8).add((*fatbin_header).header_size as usize);
- let end = file.add((*fatbin_header).files_size as usize);
- let mut ptx_files = get_ptx_files(file, end);
- ptx_files.sort_unstable_by_key(|f| c_uint::max_value() - (**f).sm_version);
- let mut maybe_kernel_text = None;
- for file in ptx_files {
- match decompress_kernel_module(file) {
- None => continue,
- Some(vec) => {
- maybe_kernel_text = Some(vec);
- break;
- }
- };
- }
- let result = get_module_base();
- if result != CUresult::CUDA_SUCCESS {
- return result;
- }
- if let Some(text) = maybe_kernel_text {
- match CStr::from_bytes_with_nul(&text) {
- Ok(cstr) => match cstr.to_str() {
- Ok(utf8_str) => {
- record_module_image_with_module(*module, text.as_ptr() as _, utf8_str)
- }
- Err(_) => {}
- },
- Err(_) => {}
- }
- } else {
- os_log!("Unsupported runtime module: {:?}", *module);
- }
- result
-}
-
-unsafe extern "system" fn get_module_from_cubin(
- module: *mut CUmodule,
- fatbinc_wrapper: *const FatbincWrapper,
-) -> CUresult {
- get_module_from_cubin_impl(module, fatbinc_wrapper, || {
- ORIGINAL_GET_MODULE_FROM_CUBIN.unwrap()(module, fatbinc_wrapper)
- })
-}
-
-unsafe extern "system" fn get_module_from_cubin_ext1(
- module: *mut CUmodule,
- fatbinc_wrapper: *const FatbincWrapper,
- ptr1: *mut c_void,
- ptr2: *mut c_void,
- _unknown: usize,
-) -> CUresult {
- get_module_from_cubin_impl(module, fatbinc_wrapper, || {
- ORIGINAL_GET_MODULE_FROM_CUBIN_EXT1.unwrap()(module, fatbinc_wrapper, ptr1, ptr2, _unknown)
- })
-}
-
-unsafe extern "system" fn get_module_from_cubin_ext2(
- fatbin_header: *const FatbinHeader,
- module: *mut CUmodule,
- ptr1: *mut c_void,
- ptr2: *mut c_void,
- _unknown: usize,
-) -> CUresult {
- get_module_from_cubin_unwrapped(module, fatbin_header, || {
- ORIGINAL_GET_MODULE_FROM_CUBIN_EXT2.unwrap()(fatbin_header, module, ptr1, ptr2, _unknown)
- })
-}
-
-unsafe fn get_ptx_files(file: *const u8, end: *const u8) -> Vec<*const FatbinFileHeader> {
- let mut index = file;
- let mut result = Vec::new();
- while index < end {
- let file = index as *const FatbinFileHeader;
- if (*file).kind == FATBIN_FILE_HEADER_KIND_PTX
- && (*file).version == FATBIN_FILE_HEADER_VERSION_CURRENT
- {
- result.push(file)
- }
- index = index.add((*file).header_size as usize + (*file).padded_payload_size as usize);
- }
- result
-}
-
-const MAX_PTX_MODULE_DECOMPRESSION_BOUND: usize = 16 * 1024 * 1024;
-
-unsafe fn decompress_kernel_module(file: *const FatbinFileHeader) -> Option<Vec<u8>> {
- let decompressed_size = usize::max(1024, (*file).uncompressed_payload as usize);
- let mut decompressed_vec = vec![0u8; decompressed_size];
- loop {
- match lz4_sys::LZ4_decompress_safe(
- (file as *const u8).add((*file).header_size as usize) as *const _,
- decompressed_vec.as_mut_ptr() as *mut _,
- (*file).payload_size as c_int,
- decompressed_vec.len() as c_int,
- ) {
- error if error < 0 => {
- let new_size = decompressed_vec.len() * 2;
- if new_size > MAX_PTX_MODULE_DECOMPRESSION_BOUND {
- return None;
- }
- decompressed_vec.resize(decompressed_vec.len() * 2, 0);
- }
- real_decompressed_size => {
- decompressed_vec.truncate(real_decompressed_size as usize);
- if decompressed_vec.last().copied().unwrap_or(1) != 0 {
- decompressed_vec.push(0);
- }
- return Some(decompressed_vec);
- }
- }
- }
-}
-
-#[allow(non_snake_case)]
-pub unsafe fn cuDeviceGetAttribute(
- pi: *mut ::std::os::raw::c_int,
- attrib: CUdevice_attribute,
- dev: CUdevice,
- cont: impl FnOnce(*mut ::std::os::raw::c_int, CUdevice_attribute, CUdevice) -> CUresult,
-) -> CUresult {
- if attrib == CUdevice_attribute::CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR {
- if let Some(ver) = OVERRIDE_COMPUTE_CAPABILITY_MAJOR {
- *pi = ver;
- return CUresult::CUDA_SUCCESS;
- }
- }
- cont(pi, attrib, dev)
-}
-
-#[allow(non_snake_case)]
-pub unsafe fn cuLinkAddData(
- state: CUlinkState,
- type_: CUjitInputType,
- data: *mut ::std::os::raw::c_void,
- size: usize,
- name: *const ::std::os::raw::c_char,
- numOptions: ::std::os::raw::c_uint,
- options: *mut CUjit_option,
- optionValues: *mut *mut ::std::os::raw::c_void,
- cont: impl FnOnce(
- CUlinkState,
- CUjitInputType,
- *mut ::std::os::raw::c_void,
- usize,
- *const ::std::os::raw::c_char,
- ::std::os::raw::c_uint,
- *mut CUjit_option,
- *mut *mut ::std::os::raw::c_void,
- ) -> CUresult,
-) -> CUresult {
- if let Some(image) = to_str(data) {
- record_module_image_with_linker(state, data, image)
- } else {
- os_log!("PTX module not a string: {:?}", data);
- }
- cont(
- state,
- type_,
- data,
- size,
- name,
- numOptions,
- options,
- optionValues,
- )
-}
-
-#[allow(non_snake_case)]
-pub unsafe fn cuLinkAddFile(
- state: CUlinkState,
- type_: CUjitInputType,
- path: *const ::std::os::raw::c_char,
- numOptions: ::std::os::raw::c_uint,
- options: *mut CUjit_option,
- optionValues: *mut *mut ::std::os::raw::c_void,
- cont: impl FnOnce(
- CUlinkState,
- CUjitInputType,
- *const ::std::os::raw::c_char,
- ::std::os::raw::c_uint,
- *mut CUjit_option,
- *mut *mut ::std::os::raw::c_void,
- ) -> CUresult,
-) -> CUresult {
- cont(state, type_, path, numOptions, options, optionValues)
-}
-
#[allow(non_snake_case)]
pub(crate) fn cuGetExportTable_Post(
ppExportTable: *mut *const ::std::os::raw::c_void,
@@ -1480,34 +370,34 @@ pub(crate) fn cuGetExportTable_Post(
#[allow(non_snake_case)]
pub(crate) fn cuModuleGetFunction_Post(
- hfunc: *mut CUfunction,
- hmod: CUmodule,
- name: *const ::std::os::raw::c_char,
- fn_logger: &mut log::FunctionLogger,
- state: &mut trace::StateTracker,
- result: CUresult,
+ _hfunc: *mut CUfunction,
+ _hmod: CUmodule,
+ _name: *const ::std::os::raw::c_char,
+ _fn_logger: &mut log::FunctionLogger,
+ _state: &mut trace::StateTracker,
+ _result: CUresult,
) {
}
#[allow(non_snake_case)]
pub(crate) fn cuDeviceGetAttribute_Post(
- pi: *mut ::std::os::raw::c_int,
- attrib: CUdevice_attribute,
- dev: CUdevice,
- fn_logger: &mut log::FunctionLogger,
- state: &mut trace::StateTracker,
- result: CUresult,
+ _pi: *mut ::std::os::raw::c_int,
+ _attrib: CUdevice_attribute,
+ _dev: CUdevice,
+ _fn_logger: &mut log::FunctionLogger,
+ _state: &mut trace::StateTracker,
+ _result: CUresult,
) {
}
#[allow(non_snake_case)]
pub(crate) fn cuDeviceComputeCapability_Post(
major: *mut ::std::os::raw::c_int,
- minor: *mut ::std::os::raw::c_int,
- dev: CUdevice,
- fn_logger: &mut log::FunctionLogger,
+ _minor: *mut ::std::os::raw::c_int,
+ _dev: CUdevice,
+ _fn_logger: &mut log::FunctionLogger,
state: &mut trace::StateTracker,
- result: CUresult,
+ _result: CUresult,
) {
if let Some(major_ver_override) = state.override_cc_major {
unsafe { *major = major_ver_override as i32 };
@@ -1516,10 +406,10 @@ pub(crate) fn cuDeviceComputeCapability_Post(
#[allow(non_snake_case)]
pub(crate) fn cuModuleLoadFatBinary_Post(
- module: *mut CUmodule,
- fatCubin: *const ::std::os::raw::c_void,
- fn_logger: &mut log::FunctionLogger,
- state: &mut trace::StateTracker,
+ _module: *mut CUmodule,
+ _fatCubin: *const ::std::os::raw::c_void,
+ _fn_logger: &mut log::FunctionLogger,
+ _state: &mut trace::StateTracker,
result: CUresult,
) {
if result == CUresult::CUDA_SUCCESS {