Nobody expects the Red Teamv3

Too many changes to list, but broadly: * Remove Intel GPU support from the compiler * Add AMD GPU support to the compiler * Remove Intel GPU host code * Add AMD GPU host code * More device instructions. From 40 to 68 * More host functions. From 48 to 184 * Add proof of concept implementation of OptiX framework * Add minimal support of cuDNN, cuBLAS, cuSPARSE, cuFFT, NCCL, NVML * Improve ZLUDA launcher for Windows
author: Andrzej Janik <[email protected]> 2021-02-27 20:55:19 +0100
committer: Andrzej Janik <[email protected]> 2024-02-11 20:45:51 +0100
commit: 1b9ba2b2333746c5e2b05a2bf24fa6ec3828dcdf (patch)
tree: 0b77ca4a41d4f232bd181e2bddc886475c608784 /rocblas-sys
parent: 60d2124a16a7a2a1a6be3707247afe82892a4163 (diff)
download: ZLUDA-1b9ba2b2333746c5e2b05a2bf24fa6ec3828dcdf.tar.gz
ZLUDA-1b9ba2b2333746c5e2b05a2bf24fa6ec3828dcdf.zip
5 files changed, 10115 insertions, 0 deletions
diff --git a/rocblas-sys/Cargo.toml b/rocblas-sys/Cargo.toml
new file mode 100644
index 0000000..d3623db
--- /dev/null
+++ b/rocblas-sys/Cargo.toml
@@ -0,0 +1,8 @@
+[package]
+name = "rocblas-sys"
+version = "0.0.0"
+authors = ["Andrzej Janik <[email protected]>"]
+edition = "2018"
+links = "rocblas"
+
+[lib]
diff --git a/rocblas-sys/README b/rocblas-sys/README
new file mode 100644
index 0000000..e6e0567
--- /dev/null
+++ b/rocblas-sys/README
@@ -0,0 +1 @@
+bindgen /opt/rocm/include/rocblas/rocblas.h -o src/rocblas.rs --no-layout-tests --size_t-is-usize --default-enum-style=newtype --no-derive-debug --allowlist-function "rocblas_.*" --allowlist-var "ROCBLAS_*" --must-use-type rocblas_status -- -I/opt/rocm/include
+\ No newline at end of file
diff --git a/rocblas-sys/build.rs b/rocblas-sys/build.rs
new file mode 100644
index 0000000..cd0dd1b
--- /dev/null
+++ b/rocblas-sys/build.rs
@@ -0,0 +1,4 @@
+fn main() {
+    println!("cargo:rustc-link-lib=dylib=rocblas");
+    println!("cargo:rustc-link-search=native=/opt/rocm/lib/");
+}
diff --git a/rocblas-sys/src/lib.rs b/rocblas-sys/src/lib.rs
new file mode 100644
index 0000000..dff261c
--- /dev/null
+++ b/rocblas-sys/src/lib.rs
@@ -0,0 +1,3 @@
+#![allow(warnings)]
+mod rocblas;
+pub use rocblas::*;
+\ No newline at end of file
diff --git a/rocblas-sys/src/rocblas.rs b/rocblas-sys/src/rocblas.rs
new file mode 100644
index 0000000..6fba5d2
--- /dev/null
+++ b/rocblas-sys/src/rocblas.rs
@@ -0,0 +1,10099 @@
+/* automatically generated by rust-bindgen 0.64.0 */
+
+#[doc = " \\brief Struct to represent a 16 bit Brain floating-point number."]
+#[repr(C)]
+#[derive(Copy, Clone)]
+pub struct rocblas_bfloat16 {
+    pub data: u16,
+}
+#[repr(C)]
+#[derive(Copy, Clone)]
+pub struct _rocblas_handle {
+    _unused: [u8; 0],
+}
+#[doc = " \\brief rocblas_handle is a structure holding the rocblas library context.\n It must be initialized using rocblas_create_handle(),\n and the returned handle must be passed\n to all subsequent library function calls.\n It should be destroyed at the end using rocblas_destroy_handle()."]
+pub type rocblas_handle = *mut _rocblas_handle;
+#[repr(C)]
+#[derive(Copy, Clone)]
+pub struct ihipStream_t {
+    _unused: [u8; 0],
+}
+#[doc = " \\brief Forward declaration of hipStream_t"]
+pub type hipStream_t = *mut ihipStream_t;
+#[repr(C)]
+#[derive(Copy, Clone)]
+pub struct ihipEvent_t {
+    _unused: [u8; 0],
+}
+#[doc = " \\brief Forward declaration of hipEvent_t"]
+pub type hipEvent_t = *mut ihipEvent_t;
+#[doc = " \\brief Opaque base class for device memory allocation"]
+#[repr(C)]
+#[derive(Copy, Clone)]
+pub struct rocblas_device_malloc_base {
+    _unused: [u8; 0],
+}
+pub type rocblas_int = i32;
+pub type rocblas_stride = i64;
+#[doc = " \\brief Structure definition for rocblas_half"]
+#[repr(C)]
+#[derive(Copy, Clone)]
+pub struct rocblas_half {
+    pub data: u16,
+}
+#[doc = " \\brief Struct to represent a complex number with single precision real and imaginary parts."]
+#[repr(C)]
+#[derive(Copy, Clone)]
+pub struct rocblas_float_complex {
+    pub x: f32,
+    pub y: f32,
+}
+#[doc = " \\brief Struct to represent a complex number with double precision real and imaginary parts."]
+#[repr(C)]
+#[derive(Copy, Clone)]
+pub struct rocblas_double_complex {
+    pub x: f64,
+    pub y: f64,
+}
+impl rocblas_operation_ {
+    #[doc = "< Operate with the matrix."]
+    pub const rocblas_operation_none: rocblas_operation_ = rocblas_operation_(111);
+}
+impl rocblas_operation_ {
+    #[doc = "< Operate with the transpose of the matrix."]
+    pub const rocblas_operation_transpose: rocblas_operation_ = rocblas_operation_(112);
+}
+impl rocblas_operation_ {
+    pub const rocblas_operation_conjugate_transpose: rocblas_operation_ = rocblas_operation_(113);
+}
+#[repr(transparent)]
+#[doc = " \\brief Used to specify whether the matrix is to be transposed or not."]
+#[derive(Copy, Clone, Hash, PartialEq, Eq)]
+pub struct rocblas_operation_(pub ::std::os::raw::c_uint);
+#[doc = " \\brief Used to specify whether the matrix is to be transposed or not."]
+pub use self::rocblas_operation_ as rocblas_operation;
+impl rocblas_fill_ {
+    #[doc = "< Upper triangle."]
+    pub const rocblas_fill_upper: rocblas_fill_ = rocblas_fill_(121);
+}
+impl rocblas_fill_ {
+    #[doc = "< Lower triangle."]
+    pub const rocblas_fill_lower: rocblas_fill_ = rocblas_fill_(122);
+}
+impl rocblas_fill_ {
+    pub const rocblas_fill_full: rocblas_fill_ = rocblas_fill_(123);
+}
+#[repr(transparent)]
+#[doc = " \\brief Used by the Hermitian, symmetric and triangular matrix\n routines to specify whether the upper, or lower triangle is being referenced."]
+#[derive(Copy, Clone, Hash, PartialEq, Eq)]
+pub struct rocblas_fill_(pub ::std::os::raw::c_uint);
+#[doc = " \\brief Used by the Hermitian, symmetric and triangular matrix\n routines to specify whether the upper, or lower triangle is being referenced."]
+pub use self::rocblas_fill_ as rocblas_fill;
+impl rocblas_diagonal_ {
+    #[doc = "< Non-unit triangular."]
+    pub const rocblas_diagonal_non_unit: rocblas_diagonal_ = rocblas_diagonal_(131);
+}
+impl rocblas_diagonal_ {
+    #[doc = "< Unit triangular."]
+    pub const rocblas_diagonal_unit: rocblas_diagonal_ = rocblas_diagonal_(132);
+}
+#[repr(transparent)]
+#[doc = " \\brief It is used by the triangular matrix routines to specify whether the\n matrix is unit triangular."]
+#[derive(Copy, Clone, Hash, PartialEq, Eq)]
+pub struct rocblas_diagonal_(pub ::std::os::raw::c_uint);
+#[doc = " \\brief It is used by the triangular matrix routines to specify whether the\n matrix is unit triangular."]
+pub use self::rocblas_diagonal_ as rocblas_diagonal;
+impl rocblas_side_ {
+    #[doc = "< Multiply general matrix by symmetric,\nHermitian, or triangular matrix on the left."]
+    pub const rocblas_side_left: rocblas_side_ = rocblas_side_(141);
+}
+impl rocblas_side_ {
+    #[doc = "< Multiply general matrix by symmetric,\nHermitian, or triangular matrix on the right."]
+    pub const rocblas_side_right: rocblas_side_ = rocblas_side_(142);
+}
+impl rocblas_side_ {
+    pub const rocblas_side_both: rocblas_side_ = rocblas_side_(143);
+}
+#[repr(transparent)]
+#[doc = " \\brief Indicates the side matrix A is located relative to matrix B during multiplication."]
+#[derive(Copy, Clone, Hash, PartialEq, Eq)]
+pub struct rocblas_side_(pub ::std::os::raw::c_uint);
+#[doc = " \\brief Indicates the side matrix A is located relative to matrix B during multiplication."]
+pub use self::rocblas_side_ as rocblas_side;
+impl rocblas_datatype_ {
+    #[doc = "< 16-bit floating point, real"]
+    pub const rocblas_datatype_f16_r: rocblas_datatype_ = rocblas_datatype_(150);
+}
+impl rocblas_datatype_ {
+    #[doc = "< 32-bit floating point, real"]
+    pub const rocblas_datatype_f32_r: rocblas_datatype_ = rocblas_datatype_(151);
+}
+impl rocblas_datatype_ {
+    #[doc = "< 64-bit floating point, real"]
+    pub const rocblas_datatype_f64_r: rocblas_datatype_ = rocblas_datatype_(152);
+}
+impl rocblas_datatype_ {
+    #[doc = "< 16-bit floating point, complex"]
+    pub const rocblas_datatype_f16_c: rocblas_datatype_ = rocblas_datatype_(153);
+}
+impl rocblas_datatype_ {
+    #[doc = "< 32-bit floating point, complex"]
+    pub const rocblas_datatype_f32_c: rocblas_datatype_ = rocblas_datatype_(154);
+}
+impl rocblas_datatype_ {
+    #[doc = "< 64-bit floating point, complex"]
+    pub const rocblas_datatype_f64_c: rocblas_datatype_ = rocblas_datatype_(155);
+}
+impl rocblas_datatype_ {
+    #[doc = "<  8-bit signed integer, real"]
+    pub const rocblas_datatype_i8_r: rocblas_datatype_ = rocblas_datatype_(160);
+}
+impl rocblas_datatype_ {
+    #[doc = "<  8-bit unsigned integer, real"]
+    pub const rocblas_datatype_u8_r: rocblas_datatype_ = rocblas_datatype_(161);
+}
+impl rocblas_datatype_ {
+    #[doc = "< 32-bit signed integer, real"]
+    pub const rocblas_datatype_i32_r: rocblas_datatype_ = rocblas_datatype_(162);
+}
+impl rocblas_datatype_ {
+    #[doc = "< 32-bit unsigned integer, real"]
+    pub const rocblas_datatype_u32_r: rocblas_datatype_ = rocblas_datatype_(163);
+}
+impl rocblas_datatype_ {
+    #[doc = "<  8-bit signed integer, complex"]
+    pub const rocblas_datatype_i8_c: rocblas_datatype_ = rocblas_datatype_(164);
+}
+impl rocblas_datatype_ {
+    #[doc = "<  8-bit unsigned integer, complex"]
+    pub const rocblas_datatype_u8_c: rocblas_datatype_ = rocblas_datatype_(165);
+}
+impl rocblas_datatype_ {
+    #[doc = "< 32-bit signed integer, complex"]
+    pub const rocblas_datatype_i32_c: rocblas_datatype_ = rocblas_datatype_(166);
+}
+impl rocblas_datatype_ {
+    #[doc = "< 32-bit unsigned integer, complex"]
+    pub const rocblas_datatype_u32_c: rocblas_datatype_ = rocblas_datatype_(167);
+}
+impl rocblas_datatype_ {
+    #[doc = "< 16-bit bfloat, real"]
+    pub const rocblas_datatype_bf16_r: rocblas_datatype_ = rocblas_datatype_(168);
+}
+impl rocblas_datatype_ {
+    #[doc = "< 16-bit bfloat, complex"]
+    pub const rocblas_datatype_bf16_c: rocblas_datatype_ = rocblas_datatype_(169);
+}
+impl rocblas_datatype_ {
+    #[doc = "< Invalid datatype value, do not use"]
+    pub const rocblas_datatype_invalid: rocblas_datatype_ = rocblas_datatype_(255);
+}
+#[repr(transparent)]
+#[doc = " \\brief Indicates the precision width of data stored in a blas type."]
+#[derive(Copy, Clone, Hash, PartialEq, Eq)]
+pub struct rocblas_datatype_(pub ::std::os::raw::c_uint);
+#[doc = " \\brief Indicates the precision width of data stored in a blas type."]
+pub use self::rocblas_datatype_ as rocblas_datatype;
+impl rocblas_status_ {
+    #[doc = "< Success"]
+    pub const rocblas_status_success: rocblas_status_ = rocblas_status_(0);
+}
+impl rocblas_status_ {
+    #[doc = "< Handle not initialized, invalid or null"]
+    pub const rocblas_status_invalid_handle: rocblas_status_ = rocblas_status_(1);
+}
+impl rocblas_status_ {
+    #[doc = "< Function is not implemented"]
+    pub const rocblas_status_not_implemented: rocblas_status_ = rocblas_status_(2);
+}
+impl rocblas_status_ {
+    #[doc = "< Invalid pointer argument"]
+    pub const rocblas_status_invalid_pointer: rocblas_status_ = rocblas_status_(3);
+}
+impl rocblas_status_ {
+    #[doc = "< Invalid size argument"]
+    pub const rocblas_status_invalid_size: rocblas_status_ = rocblas_status_(4);
+}
+impl rocblas_status_ {
+    #[doc = "< Failed internal memory allocation, copy or dealloc"]
+    pub const rocblas_status_memory_error: rocblas_status_ = rocblas_status_(5);
+}
+impl rocblas_status_ {
+    #[doc = "< Other internal library failure"]
+    pub const rocblas_status_internal_error: rocblas_status_ = rocblas_status_(6);
+}
+impl rocblas_status_ {
+    #[doc = "< Performance degraded due to low device memory"]
+    pub const rocblas_status_perf_degraded: rocblas_status_ = rocblas_status_(7);
+}
+impl rocblas_status_ {
+    #[doc = "< Unmatched start/stop size query"]
+    pub const rocblas_status_size_query_mismatch: rocblas_status_ = rocblas_status_(8);
+}
+impl rocblas_status_ {
+    #[doc = "< Queried device memory size increased"]
+    pub const rocblas_status_size_increased: rocblas_status_ = rocblas_status_(9);
+}
+impl rocblas_status_ {
+    #[doc = "< Queried device memory size unchanged"]
+    pub const rocblas_status_size_unchanged: rocblas_status_ = rocblas_status_(10);
+}
+impl rocblas_status_ {
+    #[doc = "< Passed argument not valid"]
+    pub const rocblas_status_invalid_value: rocblas_status_ = rocblas_status_(11);
+}
+impl rocblas_status_ {
+    #[doc = "< Nothing preventing function to proceed"]
+    pub const rocblas_status_continue: rocblas_status_ = rocblas_status_(12);
+}
+impl rocblas_status_ {
+    pub const rocblas_status_check_numerics_fail: rocblas_status_ = rocblas_status_(13);
+}
+#[repr(transparent)]
+#[doc = "   @brief rocblas status codes definition"]
+#[derive(Copy, Clone, Hash, PartialEq, Eq)]
+pub struct rocblas_status_(pub ::std::os::raw::c_uint);
+#[doc = "   @brief rocblas status codes definition"]
+pub use self::rocblas_status_ as rocblas_status;
+impl rocblas_pointer_mode_ {
+    #[doc = " \\brief Scalar values affected by this variable are located on the host."]
+    pub const rocblas_pointer_mode_host: rocblas_pointer_mode_ = rocblas_pointer_mode_(0);
+}
+impl rocblas_pointer_mode_ {
+    #[doc = " \\brief Scalar values affected by this variable are located on the device."]
+    pub const rocblas_pointer_mode_device: rocblas_pointer_mode_ = rocblas_pointer_mode_(1);
+}
+#[repr(transparent)]
+#[doc = " \\brief Indicates if scalar pointers are on host or device. This is used for\n    scalars alpha and beta and for scalar function return values."]
+#[derive(Copy, Clone, Hash, PartialEq, Eq)]
+pub struct rocblas_pointer_mode_(pub ::std::os::raw::c_uint);
+#[doc = " \\brief Indicates if scalar pointers are on host or device. This is used for\n    scalars alpha and beta and for scalar function return values."]
+pub use self::rocblas_pointer_mode_ as rocblas_pointer_mode;
+impl rocblas_atomics_mode_ {
+    #[doc = " \\brief Algorithms will refrain from atomics where applicable"]
+    pub const rocblas_atomics_not_allowed: rocblas_atomics_mode_ = rocblas_atomics_mode_(0);
+}
+impl rocblas_atomics_mode_ {
+    #[doc = " \\brief Algorithms will take advantage of atomics where applicable"]
+    pub const rocblas_atomics_allowed: rocblas_atomics_mode_ = rocblas_atomics_mode_(1);
+}
+#[repr(transparent)]
+#[doc = " \\brief Indicates if atomics operations are allowed. Not allowing atomic operations\n    may generally improve determinism and repeatability of results at a cost of performance"]
+#[derive(Copy, Clone, Hash, PartialEq, Eq)]
+pub struct rocblas_atomics_mode_(pub ::std::os::raw::c_uint);
+#[doc = " \\brief Indicates if atomics operations are allowed. Not allowing atomic operations\n    may generally improve determinism and repeatability of results at a cost of performance"]
+pub use self::rocblas_atomics_mode_ as rocblas_atomics_mode;
+impl rocblas_performance_metric_ {
+    #[doc = " \\brief Use Tensile's default performance metric for solution selection"]
+    pub const rocblas_default_performance_metric: rocblas_performance_metric_ =
+        rocblas_performance_metric_(0);
+}
+impl rocblas_performance_metric_ {
+    #[doc = " \\brief Select the solution with the highest GFlops across all compute units"]
+    pub const rocblas_device_efficiency_performance_metric: rocblas_performance_metric_ =
+        rocblas_performance_metric_(1);
+}
+impl rocblas_performance_metric_ {
+    #[doc = " \\brief Select the solution with the highest GFlops per compute unit it uses. This\n may be useful when running multiple small gemm problems simultaneously"]
+    pub const rocblas_cu_efficiency_performance_metric: rocblas_performance_metric_ =
+        rocblas_performance_metric_(2);
+}
+#[repr(transparent)]
+#[doc = " \\brief Indicates which performance metric Tensile uses when selecting the optimal\n    solution for gemm problems."]
+#[derive(Copy, Clone, Hash, PartialEq, Eq)]
+pub struct rocblas_performance_metric_(pub ::std::os::raw::c_uint);
+#[doc = " \\brief Indicates which performance metric Tensile uses when selecting the optimal\n    solution for gemm problems."]
+pub use self::rocblas_performance_metric_ as rocblas_performance_metric;
+impl rocblas_gemm_algo_ {
+    pub const rocblas_gemm_algo_standard: rocblas_gemm_algo_ = rocblas_gemm_algo_(0);
+}
+#[repr(transparent)]
+#[doc = " \\brief Indicates if layer is active with bitmask"]
+#[derive(Copy, Clone, Hash, PartialEq, Eq)]
+pub struct rocblas_gemm_algo_(pub ::std::os::raw::c_uint);
+#[doc = " \\brief Indicates if layer is active with bitmask"]
+pub use self::rocblas_gemm_algo_ as rocblas_gemm_algo;
+impl rocblas_gemm_flags_ {
+    #[doc = " \\brief Default empty flags"]
+    pub const rocblas_gemm_flags_none: rocblas_gemm_flags_ = rocblas_gemm_flags_(0);
+}
+impl rocblas_gemm_flags_ {
+    #[doc = " \\brief Before ROCm 4.2, this flags is not implemented and rocblas uses packed-Int8x4 by default.\n After ROCm 4.2, set flag is neccesary if we want packed-Int8x4. Default (0x0) uses unpacked."]
+    pub const rocblas_gemm_flags_pack_int8x4: rocblas_gemm_flags_ = rocblas_gemm_flags_(1);
+}
+impl rocblas_gemm_flags_ {
+    #[doc = " \\brief Select the gemm problem with the highest efficiency per compute unit used. Useful for running multiple smaller problems\n simultaneously. This takes precedence over the performance metric set in rocblas_handle and currently only works for\n gemm_*_ex problems."]
+    pub const rocblas_gemm_flags_use_cu_efficiency: rocblas_gemm_flags_ = rocblas_gemm_flags_(2);
+}
+impl rocblas_gemm_flags_ {
+    #[doc = " \\brief Select an alternate implementation for the MI200 FP16 HPA\n (High Precision Accumulate) GEMM kernel utilizing the BF16 matrix\n instructions with reduced accuracy in cases where computation cannot\n tolerate the FP16 matrix instructions flushing subnormal FP16\n input/output data to zero. See the \"MI200 (gfx90a) Considerations\"\n section for more details."]
+    pub const rocblas_gemm_flags_fp16_alt_impl: rocblas_gemm_flags_ = rocblas_gemm_flags_(4);
+}
+#[repr(transparent)]
+#[doc = " \\brief Control flags passed into gemm algorithms invoked by Tensile Host"]
+#[derive(Copy, Clone, Hash, PartialEq, Eq)]
+pub struct rocblas_gemm_flags_(pub ::std::os::raw::c_uint);
+#[doc = " \\brief Control flags passed into gemm algorithms invoked by Tensile Host"]
+pub use self::rocblas_gemm_flags_ as rocblas_gemm_flags;
+impl rocblas_int8_type_for_hipblas_ {
+    pub const rocblas_int8_type_for_hipblas_default: rocblas_int8_type_for_hipblas_ =
+        rocblas_int8_type_for_hipblas_(0);
+}
+impl rocblas_int8_type_for_hipblas_ {
+    pub const rocblas_int8_type_for_hipblas_int8: rocblas_int8_type_for_hipblas_ =
+        rocblas_int8_type_for_hipblas_(1);
+}
+impl rocblas_int8_type_for_hipblas_ {
+    pub const rocblas_int8_type_for_hipblas_pack_int8x4: rocblas_int8_type_for_hipblas_ =
+        rocblas_int8_type_for_hipblas_(2);
+}
+#[repr(transparent)]
+#[derive(Copy, Clone, Hash, PartialEq, Eq)]
+pub struct rocblas_int8_type_for_hipblas_(pub ::std::os::raw::c_uint);
+pub use self::rocblas_int8_type_for_hipblas_ as rocblas_int8_type_for_hipblas;
+extern "C" {
+    #[must_use]
+    #[doc = " \\brief Create handle"]
+    pub fn rocblas_create_handle(handle: *mut rocblas_handle) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\brief Destroy handle"]
+    pub fn rocblas_destroy_handle(handle: rocblas_handle) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\brief Set stream for handle"]
+    pub fn rocblas_set_stream(handle: rocblas_handle, stream: hipStream_t) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\brief Get stream [0] from handle"]
+    pub fn rocblas_get_stream(handle: rocblas_handle, stream: *mut hipStream_t) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\brief Set rocblas_pointer_mode"]
+    pub fn rocblas_set_pointer_mode(
+        handle: rocblas_handle,
+        pointer_mode: rocblas_pointer_mode,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\brief Get rocblas_pointer_mode"]
+    pub fn rocblas_get_pointer_mode(
+        handle: rocblas_handle,
+        pointer_mode: *mut rocblas_pointer_mode,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\brief Set rocblas_int8_type_for_hipblas"]
+    pub fn rocblas_set_int8_type_for_hipblas(
+        handle: rocblas_handle,
+        int8_type: rocblas_int8_type_for_hipblas,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\brief Get rocblas_int8_type_for_hipblas"]
+    pub fn rocblas_get_int8_type_for_hipblas(
+        handle: rocblas_handle,
+        int8_type: *mut rocblas_int8_type_for_hipblas,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\brief Set rocblas_atomics_mode"]
+    pub fn rocblas_set_atomics_mode(
+        handle: rocblas_handle,
+        atomics_mode: rocblas_atomics_mode,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\brief Get rocblas_atomics_mode"]
+    pub fn rocblas_get_atomics_mode(
+        handle: rocblas_handle,
+        atomics_mode: *mut rocblas_atomics_mode,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\brief Query the preferable supported int8 input layout for gemm\n\\details\nIndicates the supported int8 input layout for gemm according to the device.\nIf the device supports packed-int8x4 (1) only, output flag is rocblas_gemm_flags_pack_int8x4\nand users must bitwise-or your flag with rocblas_gemm_flags_pack_int8x4.\nIf output flag is rocblas_gemm_flags_none (0), then unpacked int8 is preferable and suggested.\n@param[in]\nhandle      [rocblas_handle]\nthe handle of device\n@param[out]\nflag        pointer to rocblas_gemm_flags"]
+    pub fn rocblas_query_int8_layout_flag(
+        handle: rocblas_handle,
+        flag: *mut rocblas_gemm_flags,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[doc = " \\brief  Indicates whether the pointer is on the host or device."]
+    pub fn rocblas_pointer_to_mode(ptr: *mut ::std::os::raw::c_void) -> rocblas_pointer_mode;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\brief Copy vector from host to device\n@param[in]\nn           [rocblas_int]\nnumber of elements in the vector\n@param[in]\nelem_size   [rocblas_int]\nnumber of bytes per element in the matrix\n@param[in]\nx           pointer to vector on the host\n@param[in]\nincx        [rocblas_int]\nspecifies the increment for the elements of the vector\n@param[out]\ny           pointer to vector on the device\n@param[in]\nincy        [rocblas_int]\nspecifies the increment for the elements of the vector"]
+    pub fn rocblas_set_vector(
+        n: rocblas_int,
+        elem_size: rocblas_int,
+        x: *const ::std::os::raw::c_void,
+        incx: rocblas_int,
+        y: *mut ::std::os::raw::c_void,
+        incy: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\brief Copy vector from device to host\n@param[in]\nn           [rocblas_int]\nnumber of elements in the vector\n@param[in]\nelem_size   [rocblas_int]\nnumber of bytes per element in the matrix\n@param[in]\nx           pointer to vector on the device\n@param[in]\nincx        [rocblas_int]\nspecifies the increment for the elements of the vector\n@param[out]\ny           pointer to vector on the host\n@param[in]\nincy        [rocblas_int]\nspecifies the increment for the elements of the vector"]
+    pub fn rocblas_get_vector(
+        n: rocblas_int,
+        elem_size: rocblas_int,
+        x: *const ::std::os::raw::c_void,
+        incx: rocblas_int,
+        y: *mut ::std::os::raw::c_void,
+        incy: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\brief Copy matrix from host to device\n@param[in]\nrows        [rocblas_int]\nnumber of rows in matrices\n@param[in]\ncols        [rocblas_int]\nnumber of columns in matrices\n@param[in]\nelem_size   [rocblas_int]\nnumber of bytes per element in the matrix\n@param[in]\na           pointer to matrix on the host\n@param[in]\nlda         [rocblas_int]\nspecifies the leading dimension of A, lda >= rows\n@param[out]\nb           pointer to matrix on the GPU\n@param[in]\nldb         [rocblas_int]\nspecifies the leading dimension of B, ldb >= rows"]
+    pub fn rocblas_set_matrix(
+        rows: rocblas_int,
+        cols: rocblas_int,
+        elem_size: rocblas_int,
+        a: *const ::std::os::raw::c_void,
+        lda: rocblas_int,
+        b: *mut ::std::os::raw::c_void,
+        ldb: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\brief Copy matrix from device to host\n@param[in]\nrows        [rocblas_int]\nnumber of rows in matrices\n@param[in]\ncols        [rocblas_int]\nnumber of columns in matrices\n@param[in]\nelem_size   [rocblas_int]\nnumber of bytes per element in the matrix\n@param[in]\na           pointer to matrix on the GPU\n@param[in]\nlda         [rocblas_int]\nspecifies the leading dimension of A, lda >= rows\n@param[out]\nb           pointer to matrix on the host\n@param[in]\nldb         [rocblas_int]\nspecifies the leading dimension of B, ldb >= rows"]
+    pub fn rocblas_get_matrix(
+        rows: rocblas_int,
+        cols: rocblas_int,
+        elem_size: rocblas_int,
+        a: *const ::std::os::raw::c_void,
+        lda: rocblas_int,
+        b: *mut ::std::os::raw::c_void,
+        ldb: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\brief Asynchronously copy vector from host to device\n\\details\nrocblas_set_vector_async copies a vector from pinned host memory to device memory asynchronously.\nMemory on the host must be allocated with hipHostMalloc or the transfer will be synchronous.\n@param[in]\nn           [rocblas_int]\nnumber of elements in the vector\n@param[in]\nelem_size   [rocblas_int]\nnumber of bytes per element in the matrix\n@param[in]\nx           pointer to vector on the host\n@param[in]\nincx        [rocblas_int]\nspecifies the increment for the elements of the vector\n@param[out]\ny           pointer to vector on the device\n@param[in]\nincy        [rocblas_int]\nspecifies the increment for the elements of the vector\n@param[in]\nstream      specifies the stream into which this transfer request is queued"]
+    pub fn rocblas_set_vector_async(
+        n: rocblas_int,
+        elem_size: rocblas_int,
+        x: *const ::std::os::raw::c_void,
+        incx: rocblas_int,
+        y: *mut ::std::os::raw::c_void,
+        incy: rocblas_int,
+        stream: hipStream_t,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\brief Asynchronously copy vector from device to host\n\\details\nrocblas_get_vector_async copies a vector from pinned host memory to device memory asynchronously.\nMemory on the host must be allocated with hipHostMalloc or the transfer will be synchronous.\n@param[in]\nn           [rocblas_int]\nnumber of elements in the vector\n@param[in]\nelem_size   [rocblas_int]\nnumber of bytes per element in the matrix\n@param[in]\nx           pointer to vector on the device\n@param[in]\nincx        [rocblas_int]\nspecifies the increment for the elements of the vector\n@param[out]\ny           pointer to vector on the host\n@param[in]\nincy        [rocblas_int]\nspecifies the increment for the elements of the vector\n@param[in]\nstream      specifies the stream into which this transfer request is queued"]
+    pub fn rocblas_get_vector_async(
+        n: rocblas_int,
+        elem_size: rocblas_int,
+        x: *const ::std::os::raw::c_void,
+        incx: rocblas_int,
+        y: *mut ::std::os::raw::c_void,
+        incy: rocblas_int,
+        stream: hipStream_t,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\brief Asynchronously copy matrix from host to device\n\\details\nrocblas_set_matrix_async copies a matrix from pinned host memory to device memory asynchronously.\nMemory on the host must be allocated with hipHostMalloc or the transfer will be synchronous.\n@param[in]\nrows        [rocblas_int]\nnumber of rows in matrices\n@param[in]\ncols        [rocblas_int]\nnumber of columns in matrices\n@param[in]\nelem_size   [rocblas_int]\nnumber of bytes per element in the matrix\n@param[in]\na           pointer to matrix on the host\n@param[in]\nlda         [rocblas_int]\nspecifies the leading dimension of A, lda >= rows\n@param[out]\nb           pointer to matrix on the GPU\n@param[in]\nldb         [rocblas_int]\nspecifies the leading dimension of B, ldb >= rows\n@param[in]\nstream      specifies the stream into which this transfer request is queued"]
+    pub fn rocblas_set_matrix_async(
+        rows: rocblas_int,
+        cols: rocblas_int,
+        elem_size: rocblas_int,
+        a: *const ::std::os::raw::c_void,
+        lda: rocblas_int,
+        b: *mut ::std::os::raw::c_void,
+        ldb: rocblas_int,
+        stream: hipStream_t,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\brief asynchronously copy matrix from device to host\n\\details\nrocblas_get_matrix_async copies a matrix from device memory to pinned host memory asynchronously.\nMemory on the host must be allocated with hipHostMalloc or the transfer will be synchronous.\n@param[in]\nrows        [rocblas_int]\nnumber of rows in matrices\n@param[in]\ncols        [rocblas_int]\nnumber of columns in matrices\n@param[in]\nelem_size   [rocblas_int]\nnumber of bytes per element in the matrix\n@param[in]\na           pointer to matrix on the GPU\n@param[in]\nlda         [rocblas_int]\nspecifies the leading dimension of A, lda >= rows\n@param[out]\nb           pointer to matrix on the host\n@param[in]\nldb         [rocblas_int]\nspecifies the leading dimension of B, ldb >= rows\n@param[in]\nstream      specifies the stream into which this transfer request is queued"]
+    pub fn rocblas_get_matrix_async(
+        rows: rocblas_int,
+        cols: rocblas_int,
+        elem_size: rocblas_int,
+        a: *const ::std::os::raw::c_void,
+        lda: rocblas_int,
+        b: *mut ::std::os::raw::c_void,
+        ldb: rocblas_int,
+        stream: hipStream_t,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " Function to set start/stop event handlers (for internal use only)"]
+    pub fn rocblas_set_start_stop_events(
+        handle: rocblas_handle,
+        startEvent: hipEvent_t,
+        stopEvent: hipEvent_t,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_set_solution_fitness_query(
+        handle: rocblas_handle,
+        fitness: *mut f64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\brief specifies the performance metric that solution selection uses\n\\details\nDetermines which performance metric will be used by Tensile when selecting the optimal solution\nfor gemm problems. If a valid solution benchmarked for this performance metric does not exist\nfor a problem, Tensile will default to a solution benchmarked for overall performance instead.\n@param[in]\nhandle      [rocblas_handle]\nthe handle of device\n@param[in]\nmetric      [rocblas_performance_metric]\nthe performance metric to be used"]
+    pub fn rocblas_set_performance_metric(
+        handle: rocblas_handle,
+        metric: rocblas_performance_metric,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\brief returns the performance metric being used for solution selection\n\\details\nReturns the performance metric used by Tensile to select the optimal solution for gemm problems.\n@param[in]\nhandle      [rocblas_handle]\nthe handle of device\n@param[out]\nmetric      [rocblas_performance_metric*]\npointer to where the metric will be stored"]
+    pub fn rocblas_get_performance_metric(
+        handle: rocblas_handle,
+        metric: *mut rocblas_performance_metric,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\nscal  scales each element of vector x with scalar alpha:\n\nx := alpha * x\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nthe number of elements in x.\n@param[in]\nalpha     device pointer or host pointer for the scalar alpha.\n@param[inout]\nx         device pointer storing vector x.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of x.\n\n"]
+    pub fn rocblas_sscal(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        alpha: *const f32,
+        x: *mut f32,
+        incx: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dscal(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        alpha: *const f64,
+        x: *mut f64,
+        incx: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cscal(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        x: *mut rocblas_float_complex,
+        incx: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zscal(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        x: *mut rocblas_double_complex,
+        incx: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_csscal(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        alpha: *const f32,
+        x: *mut rocblas_float_complex,
+        incx: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zdscal(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        alpha: *const f64,
+        x: *mut rocblas_double_complex,
+        incx: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\nscal_batched  scales each element of vector x_i with scalar alpha, for i = 1, ... , batch_count:\n\nx_i := alpha * x_i,\nwhere (x_i) is the i-th instance of the batch.\n\n@param[in]\nhandle      [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn           [rocblas_int]\nthe number of elements in each x_i.\n@param[in]\nalpha       host pointer or device pointer for the scalar alpha.\n@param[inout]\nx           device array of device pointers storing each vector x_i.\n@param[in]\nincx        [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\nbatch_count [rocblas_int]\nspecifies the number of batches in x."]
+    pub fn rocblas_sscal_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        alpha: *const f32,
+        x: *const *mut f32,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dscal_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        alpha: *const f64,
+        x: *const *mut f64,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cscal_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        x: *const *mut rocblas_float_complex,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zscal_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        x: *const *mut rocblas_double_complex,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_csscal_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        alpha: *const f32,
+        x: *const *mut rocblas_float_complex,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zdscal_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        alpha: *const f64,
+        x: *const *mut rocblas_double_complex,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\nscal_strided_batched  scales each element of vector x_i with scalar alpha, for i = 1, ... , batch_count:\n\nx_i := alpha * x_i,\nwhere (x_i) is the i-th instance of the batch.\n\n@param[in]\nhandle      [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn           [rocblas_int]\nthe number of elements in each x_i.\n@param[in]\nalpha       host pointer or device pointer for the scalar alpha.\n@param[inout]\nx           device pointer to the first vector (x_1) in the batch.\n@param[in]\nincx        [rocblas_int]\nspecifies the increment for the elements of x.\n@param[in]\nstride_x    [rocblas_stride]\nstride from the start of one vector (x_i) and the next one (x_i+1).\nThere are no restrictions placed on stride_x. However, ensure that stride_x is of appropriate size, for a typical\ncase this means stride_x >= n * incx.\n@param[in]\nbatch_count [rocblas_int]\nspecifies the number of batches in x."]
+    pub fn rocblas_sscal_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        alpha: *const f32,
+        x: *mut f32,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dscal_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        alpha: *const f64,
+        x: *mut f64,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cscal_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        x: *mut rocblas_float_complex,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zscal_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        x: *mut rocblas_double_complex,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_csscal_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        alpha: *const f32,
+        x: *mut rocblas_float_complex,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zdscal_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        alpha: *const f64,
+        x: *mut rocblas_double_complex,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\ncopy  copies each element x[i] into y[i], for  i = 1 , ... , n:\n\ny := x\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nthe number of elements in x to be copied to y.\n@param[in]\nx         device pointer storing vector x.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of x.\n@param[out]\ny         device pointer storing vector y.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of y.\n"]
+    pub fn rocblas_scopy(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const f32,
+        incx: rocblas_int,
+        y: *mut f32,
+        incy: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dcopy(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const f64,
+        incx: rocblas_int,
+        y: *mut f64,
+        incy: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ccopy(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        y: *mut rocblas_float_complex,
+        incy: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zcopy(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        y: *mut rocblas_double_complex,
+        incy: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\ncopy_batched copies each element x_i[j] into y_i[j], for  j = 1 , ... , n; i = 1 , ... , batch_count:\n\ny_i := x_i,\nwhere (x_i, y_i) is the i-th instance of the batch.\nx_i and y_i are vectors.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nthe number of elements in each x_i to be copied to y_i.\n@param[in]\nx         device array of device pointers storing each vector x_i.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each vector x_i.\n@param[out]\ny         device array of device pointers storing each vector y_i.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of each vector y_i.\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
+    pub fn rocblas_scopy_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const *const f32,
+        incx: rocblas_int,
+        y: *const *mut f32,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dcopy_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const *const f64,
+        incx: rocblas_int,
+        y: *const *mut f64,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ccopy_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const *const rocblas_float_complex,
+        incx: rocblas_int,
+        y: *const *mut rocblas_float_complex,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zcopy_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const *const rocblas_double_complex,
+        incx: rocblas_int,
+        y: *const *mut rocblas_double_complex,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\ncopy_strided_batched copies each element x_i[j] into y_i[j], for  j = 1 , ... , n; i = 1 , ... , batch_count:\n\ny_i := x_i,\nwhere (x_i, y_i) is the i-th instance of the batch.\nx_i and y_i are vectors.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nthe number of elements in each x_i to be copied to y_i.\n@param[in]\nx         device pointer to the first vector (x_1) in the batch.\n@param[in]\nincx      [rocblas_int]\nspecifies the increments for the elements of vectors x_i.\n@param[in]\nstridex     [rocblas_stride]\nstride from the start of one vector (x_i) and the next one (x_i+1).\nThere are no restrictions placed on stride_x. However, the user should\ntake care to ensure that stride_x is of appropriate size. For a typical\ncase, this means stride_x >= n * incx.\n@param[out]\ny         device pointer to the first vector (y_1) in the batch.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of vectors y_i.\n@param[in]\nstridey     [rocblas_stride]\nstride from the start of one vector (y_i) and the next one (y_i+1).\nThere are no restrictions placed on stride_y, However, ensure that stride_y is of appropriate size, for a typical\ncase this means stride_y >= n * incy. stridey should be non zero.\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
+    pub fn rocblas_scopy_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const f32,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        y: *mut f32,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dcopy_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const f64,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        y: *mut f64,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ccopy_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        y: *mut rocblas_float_complex,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zcopy_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        y: *mut rocblas_double_complex,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\ndot(u)  performs the dot product of vectors x and y:\n\nresult = x * y;\n\ndotc  performs the dot product of the conjugate of complex vector x and complex vector y.\n\nresult = conjugate (x) * y;\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nthe number of elements in x and y.\n@param[in]\nx         device pointer storing vector x.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of y.\n@param[in]\ny         device pointer storing vector y.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of y.\n@param[inout]\nresult\ndevice pointer or host pointer to store the dot product.\nreturn is 0.0 if n <= 0.\n"]
+    pub fn rocblas_sdot(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const f32,
+        incx: rocblas_int,
+        y: *const f32,
+        incy: rocblas_int,
+        result: *mut f32,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ddot(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const f64,
+        incx: rocblas_int,
+        y: *const f64,
+        incy: rocblas_int,
+        result: *mut f64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_hdot(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const rocblas_half,
+        incx: rocblas_int,
+        y: *const rocblas_half,
+        incy: rocblas_int,
+        result: *mut rocblas_half,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_bfdot(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const rocblas_bfloat16,
+        incx: rocblas_int,
+        y: *const rocblas_bfloat16,
+        incy: rocblas_int,
+        result: *mut rocblas_bfloat16,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cdotu(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        y: *const rocblas_float_complex,
+        incy: rocblas_int,
+        result: *mut rocblas_float_complex,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zdotu(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        y: *const rocblas_double_complex,
+        incy: rocblas_int,
+        result: *mut rocblas_double_complex,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cdotc(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        y: *const rocblas_float_complex,
+        incy: rocblas_int,
+        result: *mut rocblas_float_complex,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zdotc(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        y: *const rocblas_double_complex,
+        incy: rocblas_int,
+        result: *mut rocblas_double_complex,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\ndot_batched(u) performs a batch of dot products of vectors x and y:\n\nresult_i = x_i * y_i;\n\ndotc_batched  performs a batch of dot products of the conjugate of complex vector x and complex vector y\n\nresult_i = conjugate (x_i) * y_i;\nwhere (x_i, y_i) is the i-th instance of the batch.\nx_i and y_i are vectors, for i = 1, ..., batch_count.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nthe number of elements in each x_i and y_i.\n@param[in]\nx         device array of device pointers storing each vector x_i.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\ny         device array of device pointers storing each vector y_i.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of each y_i.\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n@param[inout]\nresult\ndevice array or host array of batch_count size to store the dot products of each batch.\nreturn 0.0 for each element if n <= 0.\n"]
+    pub fn rocblas_sdot_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const *const f32,
+        incx: rocblas_int,
+        y: *const *const f32,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+        result: *mut f32,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ddot_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const *const f64,
+        incx: rocblas_int,
+        y: *const *const f64,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+        result: *mut f64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_hdot_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const *const rocblas_half,
+        incx: rocblas_int,
+        y: *const *const rocblas_half,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+        result: *mut rocblas_half,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_bfdot_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const *const rocblas_bfloat16,
+        incx: rocblas_int,
+        y: *const *const rocblas_bfloat16,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+        result: *mut rocblas_bfloat16,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cdotu_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const *const rocblas_float_complex,
+        incx: rocblas_int,
+        y: *const *const rocblas_float_complex,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+        result: *mut rocblas_float_complex,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zdotu_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const *const rocblas_double_complex,
+        incx: rocblas_int,
+        y: *const *const rocblas_double_complex,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+        result: *mut rocblas_double_complex,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cdotc_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const *const rocblas_float_complex,
+        incx: rocblas_int,
+        y: *const *const rocblas_float_complex,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+        result: *mut rocblas_float_complex,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zdotc_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const *const rocblas_double_complex,
+        incx: rocblas_int,
+        y: *const *const rocblas_double_complex,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+        result: *mut rocblas_double_complex,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\ndot_strided_batched(u)  performs a batch of dot products of vectors x and y:\n\nresult_i = x_i * y_i;\n\ndotc_strided_batched  performs a batch of dot products of the conjugate of complex vector x and complex vector y\n\nresult_i = conjugate (x_i) * y_i;\nwhere (x_i, y_i) is the i-th instance of the batch.\nx_i and y_i are vectors, for i = 1, ..., batch_count.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nthe number of elements in each x_i and y_i.\n@param[in]\nx         device pointer to the first vector (x_1) in the batch.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\nstridex     [rocblas_stride]\nstride from the start of one vector (x_i) and the next one (x_i+1).\n@param[in]\ny         device pointer to the first vector (y_1) in the batch.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of each y_i.\n@param[in]\nstridey     [rocblas_stride]\nstride from the start of one vector (y_i) and the next one (y_i+1).\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n@param[inout]\nresult\ndevice array or host array of batch_count size to store the dot products of each batch.\nreturn 0.0 for each element if n <= 0.\n"]
+    pub fn rocblas_sdot_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const f32,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        y: *const f32,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        batch_count: rocblas_int,
+        result: *mut f32,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ddot_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const f64,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        y: *const f64,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        batch_count: rocblas_int,
+        result: *mut f64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_hdot_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const rocblas_half,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        y: *const rocblas_half,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        batch_count: rocblas_int,
+        result: *mut rocblas_half,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_bfdot_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const rocblas_bfloat16,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        y: *const rocblas_bfloat16,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        batch_count: rocblas_int,
+        result: *mut rocblas_bfloat16,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cdotu_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        y: *const rocblas_float_complex,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        batch_count: rocblas_int,
+        result: *mut rocblas_float_complex,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zdotu_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        y: *const rocblas_double_complex,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        batch_count: rocblas_int,
+        result: *mut rocblas_double_complex,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cdotc_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        y: *const rocblas_float_complex,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        batch_count: rocblas_int,
+        result: *mut rocblas_float_complex,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zdotc_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        y: *const rocblas_double_complex,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        batch_count: rocblas_int,
+        result: *mut rocblas_double_complex,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\nswap  interchanges vectors x and y:\n\ny := x;\nx := y\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nthe number of elements in x and y.\n@param[inout]\nx         device pointer storing vector x.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of x.\n@param[inout]\ny         device pointer storing vector y.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of y.\n"]
+    pub fn rocblas_sswap(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *mut f32,
+        incx: rocblas_int,
+        y: *mut f32,
+        incy: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dswap(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *mut f64,
+        incx: rocblas_int,
+        y: *mut f64,
+        incy: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cswap(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *mut rocblas_float_complex,
+        incx: rocblas_int,
+        y: *mut rocblas_float_complex,
+        incy: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zswap(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *mut rocblas_double_complex,
+        incx: rocblas_int,
+        y: *mut rocblas_double_complex,
+        incy: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\nswap_batched interchanges vectors x_i and y_i, for i = 1 , ... , batch_count:\n\ny_i := x_i;\nx_i := y_i\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nthe number of elements in each x_i and y_i.\n@param[inout]\nx         device array of device pointers storing each vector x_i.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[inout]\ny         device array of device pointers storing each vector y_i.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of each y_i.\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
+    pub fn rocblas_sswap_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const *mut f32,
+        incx: rocblas_int,
+        y: *const *mut f32,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dswap_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const *mut f64,
+        incx: rocblas_int,
+        y: *const *mut f64,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cswap_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const *mut rocblas_float_complex,
+        incx: rocblas_int,
+        y: *const *mut rocblas_float_complex,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zswap_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const *mut rocblas_double_complex,
+        incx: rocblas_int,
+        y: *const *mut rocblas_double_complex,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\nswap_strided_batched interchanges vectors x_i and y_i, for i = 1 , ... , batch_count:\n\ny_i := x_i;\nx_i := y_i\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nthe number of elements in each x_i and y_i.\n@param[inout]\nx         device pointer to the first vector x_1.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of x.\n@param[in]\nstridex   [rocblas_stride]\nstride from the start of one vector (x_i) and the next one (x_i+1).\nThere are no restrictions placed on stride_x. However, ensure that stride_x is of appropriate size. For a typical\ncase this means stride_x >= n * incx.\n@param[inout]\ny         device pointer to the first vector y_1.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of y.\n@param[in]\nstridey   [rocblas_stride]\nstride from the start of one vector (y_i) and the next one (y_i+1).\nThere are no restrictions placed on stride_x. However, ensure that stride_y is of appropriate size. For a typical\ncase this means stride_y >= n * incy. stridey should be non zero.\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
+    pub fn rocblas_sswap_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *mut f32,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        y: *mut f32,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dswap_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *mut f64,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        y: *mut f64,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cswap_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *mut rocblas_float_complex,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        y: *mut rocblas_float_complex,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zswap_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *mut rocblas_double_complex,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        y: *mut rocblas_double_complex,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\naxpy   computes constant alpha multiplied by vector x, plus vector y:\n\ny := alpha * x + y\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nthe number of elements in x and y.\n@param[in]\nalpha     device pointer or host pointer to specify the scalar alpha.\n@param[in]\nx         device pointer storing vector x.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of x.\n@param[out]\ny         device pointer storing vector y.\n@param[inout]\nincy      [rocblas_int]\nspecifies the increment for the elements of y.\n"]
+    pub fn rocblas_saxpy(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        alpha: *const f32,
+        x: *const f32,
+        incx: rocblas_int,
+        y: *mut f32,
+        incy: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_daxpy(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        alpha: *const f64,
+        x: *const f64,
+        incx: rocblas_int,
+        y: *mut f64,
+        incy: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_haxpy(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        alpha: *const rocblas_half,
+        x: *const rocblas_half,
+        incx: rocblas_int,
+        y: *mut rocblas_half,
+        incy: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_caxpy(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        y: *mut rocblas_float_complex,
+        incy: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zaxpy(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        y: *mut rocblas_double_complex,
+        incy: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\naxpy_batched compute y := alpha * x + y over a set of batched vectors.\n\n@param[in]\nhandle    rocblas_handle\nhandle to the rocblas library context queue.\n@param[in]\nn         rocblas_int\n@param[in]\nalpha     specifies the scalar alpha.\n@param[in]\nx         pointer storing vector x on the GPU.\n@param[in]\nincx      rocblas_int\nspecifies the increment for the elements of x.\n@param[out]\ny         pointer storing vector y on the GPU.\n@param[inout]\nincy      rocblas_int\nspecifies the increment for the elements of y.\n\n@param[in]\nbatch_count rocblas_int\nnumber of instances in the batch.\n"]
+    pub fn rocblas_haxpy_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        alpha: *const rocblas_half,
+        x: *const *const rocblas_half,
+        incx: rocblas_int,
+        y: *const *mut rocblas_half,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_saxpy_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        alpha: *const f32,
+        x: *const *const f32,
+        incx: rocblas_int,
+        y: *const *mut f32,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_daxpy_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        alpha: *const f64,
+        x: *const *const f64,
+        incx: rocblas_int,
+        y: *const *mut f64,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_caxpy_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        x: *const *const rocblas_float_complex,
+        incx: rocblas_int,
+        y: *const *mut rocblas_float_complex,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zaxpy_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        x: *const *const rocblas_double_complex,
+        incx: rocblas_int,
+        y: *const *mut rocblas_double_complex,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\naxpy_strided_batched compute y := alpha * x + y over a set of strided batched vectors.\n\n@param[in]\nhandle    rocblas_handle\nhandle to the rocblas library context queue.\n@param[in]\nn         rocblas_int.\n@param[in]\nalpha     specifies the scalar alpha.\n@param[in]\nx         pointer storing vector x on the GPU.\n@param[in]\nincx      rocblas_int\nspecifies the increment for the elements of x.\n@param[in]\nstridex   rocblas_stride\nspecifies the increment between vectors of x.\n@param[out]\ny         pointer storing vector y on the GPU.\n@param[inout]\nincy      rocblas_int\nspecifies the increment for the elements of y.\n@param[in]\nstridey   rocblas_stride\nspecifies the increment between vectors of y.\n\n@param[in]\nbatch_count rocblas_int\nnumber of instances in the batch.\n"]
+    pub fn rocblas_haxpy_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        alpha: *const rocblas_half,
+        x: *const rocblas_half,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        y: *mut rocblas_half,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_saxpy_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        alpha: *const f32,
+        x: *const f32,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        y: *mut f32,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_daxpy_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        alpha: *const f64,
+        x: *const f64,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        y: *mut f64,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_caxpy_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        y: *mut rocblas_float_complex,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zaxpy_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        y: *mut rocblas_double_complex,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\nasum computes the sum of the magnitudes of elements of a real vector x,\nor the sum of magnitudes of the real and imaginary parts of elements if x is a complex vector.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nthe number of elements in x and y.\n@param[in]\nx         device pointer storing vector x.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of x. incx must be > 0.\n@param[inout]\nresult\ndevice pointer or host pointer to store the asum product.\nreturn is 0.0 if n <= 0.\n"]
+    pub fn rocblas_sasum(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const f32,
+        incx: rocblas_int,
+        result: *mut f32,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dasum(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const f64,
+        incx: rocblas_int,
+        result: *mut f64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_scasum(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        result: *mut f32,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dzasum(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        result: *mut f64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\nasum_batched computes the sum of the magnitudes of the elements in a batch of real vectors x_i,\nor the sum of magnitudes of the real and imaginary parts of elements if x_i is a complex\nvector, for i = 1, ..., batch_count.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nnumber of elements in each vector x_i.\n@param[in]\nx         device array of device pointers storing each vector x_i.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i. incx must be > 0.\n@param[out]\nresults\ndevice array or host array of batch_count size for results.\nreturn is 0.0 if n, incx<=0.\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch."]
+    pub fn rocblas_sasum_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const *const f32,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+        results: *mut f32,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dasum_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const *const f64,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+        results: *mut f64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_scasum_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const *const rocblas_float_complex,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+        results: *mut f32,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dzasum_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const *const rocblas_double_complex,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+        results: *mut f64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\nasum_strided_batched computes the sum of the magnitudes of elements of a real vectors x_i,\nor the sum of magnitudes of the real and imaginary parts of elements if x_i is a complex\nvector, for i = 1, ..., batch_count.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nnumber of elements in each vector x_i.\n@param[in]\nx         device pointer to the first vector x_1.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i. incx must be > 0.\n@param[in]\nstridex   [rocblas_stride]\nstride from the start of one vector (x_i) and the next one (x_i+1).\nThere are no restrictions placed on stride_x. However, ensure that stride_x is of appropriate size. For a typical\ncase this means stride_x >= n * incx.\n@param[out]\nresults\ndevice pointer or host pointer to array for storing contiguous batch_count results.\nreturn is 0.0 if n, incx<=0.\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch."]
+    pub fn rocblas_sasum_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const f32,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        batch_count: rocblas_int,
+        results: *mut f32,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dasum_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const f64,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        batch_count: rocblas_int,
+        results: *mut f64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_scasum_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        batch_count: rocblas_int,
+        results: *mut f32,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dzasum_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        batch_count: rocblas_int,
+        results: *mut f64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\nnrm2 computes the euclidean norm of a real or complex vector:\n\nresult := sqrt( x'*x ) for real vectors\nresult := sqrt( x**H*x ) for complex vectors\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nthe number of elements in x.\n@param[in]\nx         device pointer storing vector x.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of y.\n@param[inout]\nresult\ndevice pointer or host pointer to store the nrm2 product.\nreturn is 0.0 if n, incx<=0."]
+    pub fn rocblas_snrm2(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const f32,
+        incx: rocblas_int,
+        result: *mut f32,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dnrm2(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const f64,
+        incx: rocblas_int,
+        result: *mut f64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_scnrm2(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        result: *mut f32,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dznrm2(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        result: *mut f64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\nnrm2_batched computes the euclidean norm over a batch of real or complex vectors:\n\nresult := sqrt( x_i'*x_i ) for real vectors x, for i = 1, ..., batch_count\nresult := sqrt( x_i**H*x_i ) for complex vectors x, for i = 1, ..., batch_count\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nnumber of elements in each x_i.\n@param[in]\nx         device array of device pointers storing each vector x_i.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i. incx must be > 0.\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n@param[out]\nresults\ndevice pointer or host pointer to array of batch_count size for nrm2 results.\nreturn is 0.0 for each element if n <= 0, incx<=0.\n"]
+    pub fn rocblas_snrm2_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const *const f32,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+        results: *mut f32,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dnrm2_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const *const f64,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+        results: *mut f64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_scnrm2_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const *const rocblas_float_complex,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+        results: *mut f32,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dznrm2_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const *const rocblas_double_complex,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+        results: *mut f64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\nnrm2_strided_batched computes the euclidean norm over a batch of real or complex vectors:\n\nresult := sqrt( x_i'*x_i ) for real vectors x, for i = 1, ..., batch_count\nresult := sqrt( x_i**H*x_i ) for complex vectors, for i = 1, ..., batch_count\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nnumber of elements in each x_i.\n@param[in]\nx         device pointer to the first vector x_1.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i. incx must be > 0.\n@param[in]\nstridex   [rocblas_stride]\nstride from the start of one vector (x_i) and the next one (x_i+1).\nThere are no restrictions placed on stride_x. However, ensure that stride_x is of appropriate size. For a typical\ncase this means stride_x >= n * incx.\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n@param[out]\nresults\ndevice pointer or host pointer to array for storing contiguous batch_count results.\nreturn is 0.0 for each element if n <= 0, incx<=0.\n"]
+    pub fn rocblas_snrm2_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const f32,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        batch_count: rocblas_int,
+        results: *mut f32,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dnrm2_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const f64,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        batch_count: rocblas_int,
+        results: *mut f64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_scnrm2_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        batch_count: rocblas_int,
+        results: *mut f32,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dznrm2_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        batch_count: rocblas_int,
+        results: *mut f64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\namax finds the first index of the element of maximum magnitude of a vector x.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nthe number of elements in x.\n@param[in]\nx         device pointer storing vector x.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of y.\n@param[inout]\nresult\ndevice pointer or host pointer to store the amax index.\nreturn is 0.0 if n, incx<=0."]
+    pub fn rocblas_isamax(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const f32,
+        incx: rocblas_int,
+        result: *mut rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_idamax(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const f64,
+        incx: rocblas_int,
+        result: *mut rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_icamax(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        result: *mut rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_izamax(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        result: *mut rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\namax_batched finds the first index of the element of maximum magnitude of each vector x_i in a batch, for i = 1, ..., batch_count.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nnumber of elements in each vector x_i.\n@param[in]\nx         device array of device pointers storing each vector x_i.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i. incx must be > 0.\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch. Must be > 0.\n@param[out]\nresult\ndevice or host array of pointers of batch_count size for results.\nreturn is 0 if n, incx<=0."]
+    pub fn rocblas_isamax_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const *const f32,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+        result: *mut rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_idamax_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const *const f64,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+        result: *mut rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_icamax_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const *const rocblas_float_complex,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+        result: *mut rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_izamax_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const *const rocblas_double_complex,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+        result: *mut rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\namax_strided_batched finds the first index of the element of maximum magnitude of each vector x_i in a batch, for i = 1, ..., batch_count.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nnumber of elements in each vector x_i.\n@param[in]\nx         device pointer to the first vector x_1.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i. incx must be > 0.\n@param[in]\nstridex   [rocblas_stride]\nspecifies the pointer increment between one x_i and the next x_(i + 1).\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n@param[out]\nresult\ndevice or host pointer for storing contiguous batch_count results.\nreturn is 0 if n <= 0, incx<=0.\n"]
+    pub fn rocblas_isamax_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const f32,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        batch_count: rocblas_int,
+        result: *mut rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_idamax_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const f64,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        batch_count: rocblas_int,
+        result: *mut rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_icamax_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        batch_count: rocblas_int,
+        result: *mut rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_izamax_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        batch_count: rocblas_int,
+        result: *mut rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\namin finds the first index of the element of minimum magnitude of a vector x.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nthe number of elements in x.\n@param[in]\nx         device pointer storing vector x.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of y.\n@param[inout]\nresult\ndevice pointer or host pointer to store the amin index.\nreturn is 0.0 if n, incx<=0."]
+    pub fn rocblas_isamin(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const f32,
+        incx: rocblas_int,
+        result: *mut rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_idamin(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const f64,
+        incx: rocblas_int,
+        result: *mut rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_icamin(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        result: *mut rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_izamin(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        result: *mut rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\namin_batched finds the first index of the element of minimum magnitude of each vector x_i in a batch, for i = 1, ..., batch_count.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nnumber of elements in each vector x_i.\n@param[in]\nx         device array of device pointers storing each vector x_i.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i. incx must be > 0.\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch. Must be > 0.\n@param[out]\nresult\ndevice or host pointers to array of batch_count size for results.\nreturn is 0 if n, incx<=0."]
+    pub fn rocblas_isamin_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const *const f32,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+        result: *mut rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_idamin_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const *const f64,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+        result: *mut rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_icamin_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const *const rocblas_float_complex,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+        result: *mut rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_izamin_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const *const rocblas_double_complex,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+        result: *mut rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\namin_strided_batched finds the first index of the element of minimum magnitude of each vector x_i in a batch, for i = 1, ..., batch_count.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nnumber of elements in each vector x_i.\n@param[in]\nx         device pointer to the first vector x_1.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i. incx must be > 0.\n@param[in]\nstridex   [rocblas_stride]\nspecifies the pointer increment between one x_i and the next x_(i + 1).\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n@param[out]\nresult\ndevice or host pointer to array for storing contiguous batch_count results.\nreturn is 0 if n <= 0, incx<=0.\n"]
+    pub fn rocblas_isamin_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const f32,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        batch_count: rocblas_int,
+        result: *mut rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_idamin_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const f64,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        batch_count: rocblas_int,
+        result: *mut rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_icamin_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        batch_count: rocblas_int,
+        result: *mut rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_izamin_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        batch_count: rocblas_int,
+        result: *mut rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\nrot applies the Givens rotation matrix defined by c=cos(alpha) and s=sin(alpha) to vectors x and y.\nScalars c and s may be stored in either host or device memory. Location is specified by calling rocblas_set_pointer_mode.\n\n@param[in]\nhandle  [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn       [rocblas_int]\nnumber of elements in the x and y vectors.\n@param[inout]\nx       device pointer storing vector x.\n@param[in]\nincx    [rocblas_int]\nspecifies the increment between elements of x.\n@param[inout]\ny       device pointer storing vector y.\n@param[in]\nincy    [rocblas_int]\nspecifies the increment between elements of y.\n@param[in]\nc       device pointer or host pointer storing scalar cosine component of the rotation matrix.\n@param[in]\ns       device pointer or host pointer storing scalar sine component of the rotation matrix.\n"]
+    pub fn rocblas_srot(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *mut f32,
+        incx: rocblas_int,
+        y: *mut f32,
+        incy: rocblas_int,
+        c: *const f32,
+        s: *const f32,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_drot(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *mut f64,
+        incx: rocblas_int,
+        y: *mut f64,
+        incy: rocblas_int,
+        c: *const f64,
+        s: *const f64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_crot(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *mut rocblas_float_complex,
+        incx: rocblas_int,
+        y: *mut rocblas_float_complex,
+        incy: rocblas_int,
+        c: *const f32,
+        s: *const rocblas_float_complex,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_csrot(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *mut rocblas_float_complex,
+        incx: rocblas_int,
+        y: *mut rocblas_float_complex,
+        incy: rocblas_int,
+        c: *const f32,
+        s: *const f32,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zrot(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *mut rocblas_double_complex,
+        incx: rocblas_int,
+        y: *mut rocblas_double_complex,
+        incy: rocblas_int,
+        c: *const f64,
+        s: *const rocblas_double_complex,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zdrot(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *mut rocblas_double_complex,
+        incx: rocblas_int,
+        y: *mut rocblas_double_complex,
+        incy: rocblas_int,
+        c: *const f64,
+        s: *const f64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\nrot_batched applies the Givens rotation matrix defined by c=cos(alpha) and s=sin(alpha) to batched vectors x_i and y_i, for i = 1, ..., batch_count.\nScalars c and s may be stored in either host or device memory. Location is specified by calling rocblas_set_pointer_mode.\n\n@param[in]\nhandle  [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn       [rocblas_int]\nnumber of elements in each x_i and y_i vectors.\n@param[inout]\nx       device array of deivce pointers storing each vector x_i.\n@param[in]\nincx    [rocblas_int]\nspecifies the increment between elements of each x_i.\n@param[inout]\ny       device array of device pointers storing each vector y_i.\n@param[in]\nincy    [rocblas_int]\nspecifies the increment between elements of each y_i.\n@param[in]\nc       device pointer or host pointer to scalar cosine component of the rotation matrix.\n@param[in]\ns       device pointer or host pointer to scalar sine component of the rotation matrix.\n@param[in]\nbatch_count [rocblas_int]\nthe number of x and y arrays, i.e. the number of batches.\n"]
+    pub fn rocblas_srot_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const *mut f32,
+        incx: rocblas_int,
+        y: *const *mut f32,
+        incy: rocblas_int,
+        c: *const f32,
+        s: *const f32,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_drot_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const *mut f64,
+        incx: rocblas_int,
+        y: *const *mut f64,
+        incy: rocblas_int,
+        c: *const f64,
+        s: *const f64,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_crot_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const *mut rocblas_float_complex,
+        incx: rocblas_int,
+        y: *const *mut rocblas_float_complex,
+        incy: rocblas_int,
+        c: *const f32,
+        s: *const rocblas_float_complex,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_csrot_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const *mut rocblas_float_complex,
+        incx: rocblas_int,
+        y: *const *mut rocblas_float_complex,
+        incy: rocblas_int,
+        c: *const f32,
+        s: *const f32,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zrot_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const *mut rocblas_double_complex,
+        incx: rocblas_int,
+        y: *const *mut rocblas_double_complex,
+        incy: rocblas_int,
+        c: *const f64,
+        s: *const rocblas_double_complex,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zdrot_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const *mut rocblas_double_complex,
+        incx: rocblas_int,
+        y: *const *mut rocblas_double_complex,
+        incy: rocblas_int,
+        c: *const f64,
+        s: *const f64,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\nrot_strided_batched applies the Givens rotation matrix defined by c=cos(alpha) and s=sin(alpha) to strided batched vectors x_i and y_i, for i = 1, ..., batch_count.\nScalars c and s may be stored in either host or device memory, location is specified by calling rocblas_set_pointer_mode.\n\n@param[in]\nhandle  [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn       [rocblas_int]\nnumber of elements in each x_i and y_i vectors.\n@param[inout]\nx       device pointer to the first vector x_1.\n@param[in]\nincx    [rocblas_int]\nspecifies the increment between elements of each x_i.\n@param[in]\nstride_x [rocblas_stride]\nspecifies the increment from the beginning of x_i to the beginning of x_(i+1).\n@param[inout]\ny       device pointer to the first vector y_1.\n@param[in]\nincy    [rocblas_int]\nspecifies the increment between elements of each y_i.\n@param[in]\nstride_y [rocblas_stride]\nspecifies the increment from the beginning of y_i to the beginning of y_(i+1)\n@param[in]\nc       device pointer or host pointer to scalar cosine component of the rotation matrix.\n@param[in]\ns       device pointer or host pointer to scalar sine component of the rotation matrix.\n@param[in]\nbatch_count [rocblas_int]\nthe number of x and y arrays, i.e. the number of batches.\n"]
+    pub fn rocblas_srot_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *mut f32,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        y: *mut f32,
+        incy: rocblas_int,
+        stride_y: rocblas_stride,
+        c: *const f32,
+        s: *const f32,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_drot_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *mut f64,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        y: *mut f64,
+        incy: rocblas_int,
+        stride_y: rocblas_stride,
+        c: *const f64,
+        s: *const f64,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_crot_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *mut rocblas_float_complex,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        y: *mut rocblas_float_complex,
+        incy: rocblas_int,
+        stride_y: rocblas_stride,
+        c: *const f32,
+        s: *const rocblas_float_complex,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_csrot_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *mut rocblas_float_complex,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        y: *mut rocblas_float_complex,
+        incy: rocblas_int,
+        stride_y: rocblas_stride,
+        c: *const f32,
+        s: *const f32,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zrot_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *mut rocblas_double_complex,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        y: *mut rocblas_double_complex,
+        incy: rocblas_int,
+        stride_y: rocblas_stride,
+        c: *const f64,
+        s: *const rocblas_double_complex,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zdrot_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *mut rocblas_double_complex,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        y: *mut rocblas_double_complex,
+        incy: rocblas_int,
+        stride_y: rocblas_stride,
+        c: *const f64,
+        s: *const f64,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\nrotg creates the Givens rotation matrix for the vector (a b).\nScalars c and s and arrays a and b may be stored in either host or device memory, location is specified by calling rocblas_set_pointer_mode:\n\n- If the pointer mode is set to rocblas_pointer_mode_host, then this function blocks the CPU until the GPU has finished and the results are available in host memory.\n- If the pointer mode is set to rocblas_pointer_mode_device, then this function returns immediately and synchronization is required to read the results.\n\n@param[in]\nhandle  [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[inout]\na       device pointer or host pointer to input vector element, overwritten with r.\n@param[inout]\nb       device pointer or host pointer to input vector element, overwritten with z.\n@param[inout]\nc       device pointer or host pointer to cosine element of Givens rotation.\n@param[inout]\ns       device pointer or host pointer sine element of Givens rotation.\n"]
+    pub fn rocblas_srotg(
+        handle: rocblas_handle,
+        a: *mut f32,
+        b: *mut f32,
+        c: *mut f32,
+        s: *mut f32,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_drotg(
+        handle: rocblas_handle,
+        a: *mut f64,
+        b: *mut f64,
+        c: *mut f64,
+        s: *mut f64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_crotg(
+        handle: rocblas_handle,
+        a: *mut rocblas_float_complex,
+        b: *mut rocblas_float_complex,
+        c: *mut f32,
+        s: *mut rocblas_float_complex,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zrotg(
+        handle: rocblas_handle,
+        a: *mut rocblas_double_complex,
+        b: *mut rocblas_double_complex,
+        c: *mut f64,
+        s: *mut rocblas_double_complex,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\nrotg_batched creates the Givens rotation matrix for the batched vectors (a_i b_i), for i = 1, ..., batch_count.\na, b, c, and s may be stored in either host or device memory, location is specified by calling rocblas_set_pointer_mode:\n\n- If the pointer mode is set to rocblas_pointer_mode_host, then this function blocks the CPU until the GPU has finished and the results are available in host memory.\n- If the pointer mode is set to rocblas_pointer_mode_device, then this function returns immediately and synchronization is required to read the results.\n\n@param[in]\nhandle  [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[inout]\na       device array of device pointers storing each single input vector element a_i, overwritten with r_i.\n@param[inout]\nb       device array of device pointers storing each single input vector element b_i, overwritten with z_i.\n@param[inout]\nc       device array of device pointers storing each cosine element of Givens rotation for the batch.\n@param[inout]\ns       device array of device pointers storing each sine element of Givens rotation for the batch.\n@param[in]\nbatch_count [rocblas_int]\nnumber of batches (length of arrays a, b, c, and s).\n"]
+    pub fn rocblas_srotg_batched(
+        handle: rocblas_handle,
+        a: *const *mut f32,
+        b: *const *mut f32,
+        c: *const *mut f32,
+        s: *const *mut f32,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_drotg_batched(
+        handle: rocblas_handle,
+        a: *const *mut f64,
+        b: *const *mut f64,
+        c: *const *mut f64,
+        s: *const *mut f64,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_crotg_batched(
+        handle: rocblas_handle,
+        a: *const *mut rocblas_float_complex,
+        b: *const *mut rocblas_float_complex,
+        c: *const *mut f32,
+        s: *const *mut rocblas_float_complex,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zrotg_batched(
+        handle: rocblas_handle,
+        a: *const *mut rocblas_double_complex,
+        b: *const *mut rocblas_double_complex,
+        c: *const *mut f64,
+        s: *const *mut rocblas_double_complex,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\nrotg_strided_batched creates the Givens rotation matrix for the strided batched vectors (a_i b_i), for i = 1, ..., batch_count.\na, b, c, and s may be stored in either host or device memory, location is specified by calling rocblas_set_pointer_mode:\n\n- If the pointer mode is set to rocblas_pointer_mode_host, then this function blocks the CPU until the GPU has finished and the results are available in host memory.\n- If the pointer mode is set to rocblas_pointer_mode_device, then this function returns immediately and synchronization is required to read the results.\n\n@param[in]\nhandle  [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[inout]\na       device strided_batched pointer or host strided_batched pointer to first single input vector element a_1, overwritten with r.\n@param[in]\nstride_a [rocblas_stride]\ndistance between elements of a in batch (distance between a_i and a_(i + 1)).\n@param[inout]\nb       device strided_batched pointer or host strided_batched pointer to first single input vector element b_1, overwritten with z.\n@param[in]\nstride_b [rocblas_stride]\ndistance between elements of b in batch (distance between b_i and b_(i + 1)).\n@param[inout]\nc       device strided_batched pointer or host strided_batched pointer to first cosine element of Givens rotations c_1.\n@param[in]\nstride_c [rocblas_stride]\ndistance between elements of c in batch (distance between c_i and c_(i + 1)).\n@param[inout]\ns       device strided_batched pointer or host strided_batched pointer to sine element of Givens rotations s_1.\n@param[in]\nstride_s [rocblas_stride]\ndistance between elements of s in batch (distance between s_i and s_(i + 1)).\n@param[in]\nbatch_count [rocblas_int]\nnumber of batches (length of arrays a, b, c, and s).\n"]
+    pub fn rocblas_srotg_strided_batched(
+        handle: rocblas_handle,
+        a: *mut f32,
+        stride_a: rocblas_stride,
+        b: *mut f32,
+        stride_b: rocblas_stride,
+        c: *mut f32,
+        stride_c: rocblas_stride,
+        s: *mut f32,
+        stride_s: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_drotg_strided_batched(
+        handle: rocblas_handle,
+        a: *mut f64,
+        stride_a: rocblas_stride,
+        b: *mut f64,
+        stride_b: rocblas_stride,
+        c: *mut f64,
+        stride_c: rocblas_stride,
+        s: *mut f64,
+        stride_s: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_crotg_strided_batched(
+        handle: rocblas_handle,
+        a: *mut rocblas_float_complex,
+        stride_a: rocblas_stride,
+        b: *mut rocblas_float_complex,
+        stride_b: rocblas_stride,
+        c: *mut f32,
+        stride_c: rocblas_stride,
+        s: *mut rocblas_float_complex,
+        stride_s: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zrotg_strided_batched(
+        handle: rocblas_handle,
+        a: *mut rocblas_double_complex,
+        stride_a: rocblas_stride,
+        b: *mut rocblas_double_complex,
+        stride_b: rocblas_stride,
+        c: *mut f64,
+        stride_c: rocblas_stride,
+        s: *mut rocblas_double_complex,
+        stride_s: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\nrotm applies the modified Givens rotation matrix defined by param to vectors x and y.\n\n@param[in]\nhandle  [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn       [rocblas_int]\nnumber of elements in the x and y vectors.\n@param[inout]\nx       device pointer storing vector x.\n@param[in]\nincx    [rocblas_int]\nspecifies the increment between elements of x.\n@param[inout]\ny       device pointer storing vector y.\n@param[in]\nincy    [rocblas_int]\nspecifies the increment between elements of y.\n@param[in]\nparam   device vector or host vector of 5 elements defining the rotation.\n\nparam[0] = flag\nparam[1] = H11\nparam[2] = H21\nparam[3] = H12\nparam[4] = H22\n\nThe flag parameter defines the form of H:\n\nflag = -1 => H = ( H11 H12 H21 H22 )\nflag =  0 => H = ( 1.0 H12 H21 1.0 )\nflag =  1 => H = ( H11 1.0 -1.0 H22 )\nflag = -2 => H = ( 1.0 0.0 0.0 1.0 )\n\nparam may be stored in either host or device memory,\nlocation is specified by calling rocblas_set_pointer_mode.\n"]
+    pub fn rocblas_srotm(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *mut f32,
+        incx: rocblas_int,
+        y: *mut f32,
+        incy: rocblas_int,
+        param: *const f32,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_drotm(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *mut f64,
+        incx: rocblas_int,
+        y: *mut f64,
+        incy: rocblas_int,
+        param: *const f64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\nrotm_batched applies the modified Givens rotation matrix defined by param_i to batched vectors x_i and y_i, for i = 1, ..., batch_count.\n\n@param[in]\nhandle  [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn       [rocblas_int]\nnumber of elements in the x and y vectors.\n@param[inout]\nx       device array of device pointers storing each vector x_i.\n@param[in]\nincx    [rocblas_int]\nspecifies the increment between elements of each x_i.\n@param[inout]\ny       device array of device pointers storing each vector y_1.\n@param[in]\nincy    [rocblas_int]\nspecifies the increment between elements of each y_i.\n@param[in]\nparam   device array of device vectors of 5 elements defining the rotation.\n\nparam[0] = flag\nparam[1] = H11\nparam[2] = H21\nparam[3] = H12\nparam[4] = H22\n\nThe flag parameter defines the form of H:\n\nflag = -1 => H = ( H11 H12 H21 H22 )\nflag =  0 => H = ( 1.0 H12 H21 1.0 )\nflag =  1 => H = ( H11 1.0 -1.0 H22 )\nflag = -2 => H = ( 1.0 0.0 0.0 1.0 )\n\nparam may ONLY be stored on the device for the batched version of this function.\n\n@param[in]\nbatch_count [rocblas_int]\nthe number of x and y arrays, i.e. the number of batches.\n"]
+    pub fn rocblas_srotm_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const *mut f32,
+        incx: rocblas_int,
+        y: *const *mut f32,
+        incy: rocblas_int,
+        param: *const *const f32,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_drotm_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const *mut f64,
+        incx: rocblas_int,
+        y: *const *mut f64,
+        incy: rocblas_int,
+        param: *const *const f64,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\nrotm_strided_batched applies the modified Givens rotation matrix defined by param_i to strided batched vectors x_i and y_i, for i = 1, ..., batch_count\n\n@param[in]\nhandle  [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn       [rocblas_int]\nnumber of elements in the x and y vectors.\n@param[inout]\nx       device pointer pointing to first strided batched vector x_1.\n@param[in]\nincx    [rocblas_int]\nspecifies the increment between elements of each x_i.\n@param[in]\nstride_x [rocblas_stride]\nspecifies the increment between the beginning of x_i and x_(i + 1)\n@param[inout]\ny       device pointer pointing to first strided batched vector y_1.\n@param[in]\nincy    [rocblas_int]\nspecifies the increment between elements of each y_i.\n@param[in]\nstride_y [rocblas_stride]\nspecifies the increment between the beginning of y_i and y_(i + 1).\n@param[in]\nparam   device pointer pointing to first array of 5 elements defining the rotation (param_1).\n\nparam[0] = flag\nparam[1] = H11\nparam[2] = H21\nparam[3] = H12\nparam[4] = H22\n\nThe flag parameter defines the form of H:\n\nflag = -1 => H = ( H11 H12 H21 H22 )\nflag =  0 => H = ( 1.0 H12 H21 1.0 )\nflag =  1 => H = ( H11 1.0 -1.0 H22 )\nflag = -2 => H = ( 1.0 0.0 0.0 1.0 )\n\nparam may ONLY be stored on the device for the strided_batched\nversion of this function.\n\n@param[in]\nstride_param [rocblas_stride]\nspecifies the increment between the beginning of param_i and param_(i + 1).\n@param[in]\nbatch_count [rocblas_int]\nthe number of x and y arrays, i.e. the number of batches.\n"]
+    pub fn rocblas_srotm_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *mut f32,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        y: *mut f32,
+        incy: rocblas_int,
+        stride_y: rocblas_stride,
+        param: *const f32,
+        stride_param: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_drotm_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *mut f64,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        y: *mut f64,
+        incy: rocblas_int,
+        stride_y: rocblas_stride,
+        param: *const f64,
+        stride_param: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\nrotmg creates the modified Givens rotation matrix for the vector (d1 * x1, d2 * y1).\nParameters may be stored in either host or device memory. Location is specified by calling rocblas_set_pointer_mode:\n\n- If the pointer mode is set to rocblas_pointer_mode_host, then this function blocks the CPU until the GPU has finished and the results are available in host memory.\n- If the pointer mode is set to rocblas_pointer_mode_device, then this function returns immediately and synchronization is required to read the results.\n\n@param[in]\nhandle  [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[inout]\nd1      device pointer or host pointer to input scalar that is overwritten.\n@param[inout]\nd2      device pointer or host pointer to input scalar that is overwritten.\n@param[inout]\nx1      device pointer or host pointer to input scalar that is overwritten.\n@param[in]\ny1      device pointer or host pointer to input scalar.\n@param[out]\nparam   device vector or host vector of five elements defining the rotation.\n\nparam[0] = flag\nparam[1] = H11\nparam[2] = H21\nparam[3] = H12\nparam[4] = H22\n\nThe flag parameter defines the form of H:\n\nflag = -1 => H = ( H11 H12 H21 H22 )\nflag =  0 => H = ( 1.0 H12 H21 1.0 )\nflag =  1 => H = ( H11 1.0 -1.0 H22 )\nflag = -2 => H = ( 1.0 0.0 0.0 1.0 )\n\nparam may be stored in either host or device memory.\nLocation is specified by calling rocblas_set_pointer_mode.\n"]
+    pub fn rocblas_srotmg(
+        handle: rocblas_handle,
+        d1: *mut f32,
+        d2: *mut f32,
+        x1: *mut f32,
+        y1: *const f32,
+        param: *mut f32,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_drotmg(
+        handle: rocblas_handle,
+        d1: *mut f64,
+        d2: *mut f64,
+        x1: *mut f64,
+        y1: *const f64,
+        param: *mut f64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\nrotmg_batched creates the modified Givens rotation matrix for the batched vectors (d1_i * x1_i, d2_i * y1_i), for i = 1, ..., batch_count.\nParameters may be stored in either host or device memory. Location is specified by calling rocblas_set_pointer_mode:\n\n- If the pointer mode is set to rocblas_pointer_mode_host, then this function blocks the CPU until the GPU has finished and the results are available in host memory.\n- If the pointer mode is set to rocblas_pointer_mode_device, then this function returns immediately and synchronization is required to read the results.\n\n@param[in]\nhandle  [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[inout]\nd1      device batched array or host batched array of input scalars that is overwritten.\n@param[inout]\nd2      device batched array or host batched array of input scalars that is overwritten.\n@param[inout]\nx1      device batched array or host batched array of input scalars that is overwritten.\n@param[in]\ny1      device batched array or host batched array of input scalars.\n@param[out]\nparam   device batched array or host batched array of vectors of 5 elements defining the rotation.\n\nparam[0] = flag\nparam[1] = H11\nparam[2] = H21\nparam[3] = H12\nparam[4] = H22\n\nThe flag parameter defines the form of H:\n\nflag = -1 => H = ( H11 H12 H21 H22 )\nflag =  0 => H = ( 1.0 H12 H21 1.0 )\nflag =  1 => H = ( H11 1.0 -1.0 H22 )\nflag = -2 => H = ( 1.0 0.0 0.0 1.0 )\n\nparam may be stored in either host or device memory.\nLocation is specified by calling rocblas_set_pointer_mode.\n\n@param[in]\nbatch_count [rocblas_int]\nthe number of instances in the batch.\n"]
+    pub fn rocblas_srotmg_batched(
+        handle: rocblas_handle,
+        d1: *const *mut f32,
+        d2: *const *mut f32,
+        x1: *const *mut f32,
+        y1: *const *const f32,
+        param: *const *mut f32,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_drotmg_batched(
+        handle: rocblas_handle,
+        d1: *const *mut f64,
+        d2: *const *mut f64,
+        x1: *const *mut f64,
+        y1: *const *const f64,
+        param: *const *mut f64,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\nrotmg_strided_batched creates the modified Givens rotation matrix for the strided batched vectors (d1_i * x1_i, d2_i * y1_i), for i = 1, ..., batch_count.\nParameters may be stored in either host or device memory. Location is specified by calling rocblas_set_pointer_mode:\n\n- If the pointer mode is set to rocblas_pointer_mode_host, then this function blocks the CPU until the GPU has finished and the results are available in host memory.\n- If the pointer mode is set to rocblas_pointer_mode_device, then this function returns immediately and synchronization is required to read the results.\n\n@param[in]\nhandle  [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[inout]\nd1      device strided_batched array or host strided_batched array of input scalars that is overwritten.\n@param[in]\nstride_d1 [rocblas_stride]\nspecifies the increment between the beginning of d1_i and d1_(i+1).\n@param[inout]\nd2      device strided_batched array or host strided_batched array of input scalars that is overwritten.\n@param[in]\nstride_d2 [rocblas_stride]\nspecifies the increment between the beginning of d2_i and d2_(i+1).\n@param[inout]\nx1      device strided_batched array or host strided_batched array of input scalars that is overwritten.\n@param[in]\nstride_x1 [rocblas_stride]\nspecifies the increment between the beginning of x1_i and x1_(i+1).\n@param[in]\ny1      device strided_batched array or host strided_batched array of input scalars.\n@param[in]\nstride_y1 [rocblas_stride]\nspecifies the increment between the beginning of y1_i and y1_(i+1).\n@param[out]\nparam   device strided_batched array or host strided_batched array of vectors of 5 elements defining the rotation.\n\nparam[0] = flag\nparam[1] = H11\nparam[2] = H21\nparam[3] = H12\nparam[4] = H22\nThe flag parameter defines the form of H:\n\nflag = -1 => H = ( H11 H12 H21 H22 )\nflag =  0 => H = ( 1.0 H12 H21 1.0 )\nflag =  1 => H = ( H11 1.0 -1.0 H22 )\nflag = -2 => H = ( 1.0 0.0 0.0 1.0 )\n\nparam may be stored in either host or device memory.\nLocation is specified by calling rocblas_set_pointer_mode.\n\n@param[in]\nstride_param [rocblas_stride]\nspecifies the increment between the beginning of param_i and param_(i + 1).\n@param[in]\nbatch_count [rocblas_int]\nthe number of instances in the batch.\n"]
+    pub fn rocblas_srotmg_strided_batched(
+        handle: rocblas_handle,
+        d1: *mut f32,
+        stride_d1: rocblas_stride,
+        d2: *mut f32,
+        stride_d2: rocblas_stride,
+        x1: *mut f32,
+        stride_x1: rocblas_stride,
+        y1: *const f32,
+        stride_y1: rocblas_stride,
+        param: *mut f32,
+        stride_param: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_drotmg_strided_batched(
+        handle: rocblas_handle,
+        d1: *mut f64,
+        stride_d1: rocblas_stride,
+        d2: *mut f64,
+        stride_d2: rocblas_stride,
+        x1: *mut f64,
+        stride_x1: rocblas_stride,
+        y1: *const f64,
+        stride_y1: rocblas_stride,
+        param: *mut f64,
+        stride_param: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\ngbmv performs one of the matrix-vector operations:\n\ny := alpha*A*x    + beta*y,   or\ny := alpha*A**T*x + beta*y,   or\ny := alpha*A**H*x + beta*y,\nwhere alpha and beta are scalars, x and y are vectors and A is an\nm by n banded matrix with kl sub-diagonals and ku super-diagonals.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\ntrans     [rocblas_operation]\nindicates whether matrix A is tranposed (conjugated) or not.\n@param[in]\nm         [rocblas_int]\nnumber of rows of matrix A.\n@param[in]\nn         [rocblas_int]\nnumber of columns of matrix A.\n@param[in]\nkl        [rocblas_int]\nnumber of sub-diagonals of A.\n@param[in]\nku        [rocblas_int]\nnumber of super-diagonals of A.\n@param[in]\nalpha     device pointer or host pointer to scalar alpha.\n@param[in]\nA     device pointer storing banded matrix A.\nLeading (kl + ku + 1) by n part of the matrix contains the coefficients\nof the banded matrix. The leading diagonal resides in row (ku + 1) with\nthe first super-diagonal above on the RHS of row ku. The first sub-diagonal\nresides below on the LHS of row ku + 2. This propagates up and down across\nsub/super-diagonals.\n\nEx: (m = n = 7; ku = 2, kl = 2)\n1 2 3 0 0 0 0             0 0 3 3 3 3 3\n4 1 2 3 0 0 0             0 2 2 2 2 2 2\n5 4 1 2 3 0 0    ---->    1 1 1 1 1 1 1\n0 5 4 1 2 3 0             4 4 4 4 4 4 0\n0 0 5 4 1 2 0             5 5 5 5 5 0 0\n0 0 0 5 4 1 2             0 0 0 0 0 0 0\n0 0 0 0 5 4 1             0 0 0 0 0 0 0\n\nNote that the empty elements which do not correspond to data will not\nbe referenced.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of A. Must be >= (kl + ku + 1).\n@param[in]\nx         device pointer storing vector x.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of x.\n@param[in]\nbeta      device pointer or host pointer to scalar beta.\n@param[inout]\ny         device pointer storing vector y.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of y.\n"]
+    pub fn rocblas_sgbmv(
+        handle: rocblas_handle,
+        trans: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        kl: rocblas_int,
+        ku: rocblas_int,
+        alpha: *const f32,
+        A: *const f32,
+        lda: rocblas_int,
+        x: *const f32,
+        incx: rocblas_int,
+        beta: *const f32,
+        y: *mut f32,
+        incy: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dgbmv(
+        handle: rocblas_handle,
+        trans: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        kl: rocblas_int,
+        ku: rocblas_int,
+        alpha: *const f64,
+        A: *const f64,
+        lda: rocblas_int,
+        x: *const f64,
+        incx: rocblas_int,
+        beta: *const f64,
+        y: *mut f64,
+        incy: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cgbmv(
+        handle: rocblas_handle,
+        trans: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        kl: rocblas_int,
+        ku: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        beta: *const rocblas_float_complex,
+        y: *mut rocblas_float_complex,
+        incy: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zgbmv(
+        handle: rocblas_handle,
+        trans: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        kl: rocblas_int,
+        ku: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        beta: *const rocblas_double_complex,
+        y: *mut rocblas_double_complex,
+        incy: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\ngbmv_batched performs one of the matrix-vector operations:\n\ny_i := alpha*A_i*x_i    + beta*y_i,   or\ny_i := alpha*A_i**T*x_i + beta*y_i,   or\ny_i := alpha*A_i**H*x_i + beta*y_i,\nwhere (A_i, x_i, y_i) is the i-th instance of the batch.\nalpha and beta are scalars, x_i and y_i are vectors and A_i is an\nm by n banded matrix with kl sub-diagonals and ku super-diagonals,\nfor i = 1, ..., batch_count.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\ntrans     [rocblas_operation]\nindicates whether matrix A is tranposed (conjugated) or not.\n@param[in]\nm         [rocblas_int]\nnumber of rows of each matrix A_i.\n@param[in]\nn         [rocblas_int]\nnumber of columns of each matrix A_i.\n@param[in]\nkl        [rocblas_int]\nnumber of sub-diagonals of each A_i.\n@param[in]\nku        [rocblas_int]\nnumber of super-diagonals of each A_i.\n@param[in]\nalpha     device pointer or host pointer to scalar alpha.\n@param[in]\nA     device array of device pointers storing each banded matrix A_i.\nLeading (kl + ku + 1) by n part of the matrix contains the coefficients\nof the banded matrix. The leading diagonal resides in row (ku + 1) with\nthe first super-diagonal above on the RHS of row ku. The first sub-diagonal\nresides below on the LHS of row ku + 2. This propagates up and down across\nsub/super-diagonals.\n\nEx: (m = n = 7; ku = 2, kl = 2)\n1 2 3 0 0 0 0             0 0 3 3 3 3 3\n4 1 2 3 0 0 0             0 2 2 2 2 2 2\n5 4 1 2 3 0 0    ---->    1 1 1 1 1 1 1\n0 5 4 1 2 3 0             4 4 4 4 4 4 0\n0 0 5 4 1 2 0             5 5 5 5 5 0 0\n0 0 0 5 4 1 2             0 0 0 0 0 0 0\n0 0 0 0 5 4 1             0 0 0 0 0 0 0\n\nNote that the empty elements which do not correspond to data will not\nbe referenced.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of each A_i. Must be >= (kl + ku + 1)\n@param[in]\nx         device array of device pointers storing each vector x_i.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\nbeta      device pointer or host pointer to scalar beta.\n@param[inout]\ny         device array of device pointers storing each vector y_i.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of each y_i.\n@param[in]\nbatch_count [rocblas_int]\nspecifies the number of instances in the batch.\n"]
+    pub fn rocblas_sgbmv_batched(
+        handle: rocblas_handle,
+        trans: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        kl: rocblas_int,
+        ku: rocblas_int,
+        alpha: *const f32,
+        A: *const *const f32,
+        lda: rocblas_int,
+        x: *const *const f32,
+        incx: rocblas_int,
+        beta: *const f32,
+        y: *const *mut f32,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dgbmv_batched(
+        handle: rocblas_handle,
+        trans: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        kl: rocblas_int,
+        ku: rocblas_int,
+        alpha: *const f64,
+        A: *const *const f64,
+        lda: rocblas_int,
+        x: *const *const f64,
+        incx: rocblas_int,
+        beta: *const f64,
+        y: *const *mut f64,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cgbmv_batched(
+        handle: rocblas_handle,
+        trans: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        kl: rocblas_int,
+        ku: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const *const rocblas_float_complex,
+        lda: rocblas_int,
+        x: *const *const rocblas_float_complex,
+        incx: rocblas_int,
+        beta: *const rocblas_float_complex,
+        y: *const *mut rocblas_float_complex,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zgbmv_batched(
+        handle: rocblas_handle,
+        trans: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        kl: rocblas_int,
+        ku: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const *const rocblas_double_complex,
+        lda: rocblas_int,
+        x: *const *const rocblas_double_complex,
+        incx: rocblas_int,
+        beta: *const rocblas_double_complex,
+        y: *const *mut rocblas_double_complex,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\ngbmv_strided_batched performs one of the matrix-vector operations:\n\ny_i := alpha*A_i*x_i    + beta*y_i,   or\ny_i := alpha*A_i**T*x_i + beta*y_i,   or\ny_i := alpha*A_i**H*x_i + beta*y_i,\nwhere (A_i, x_i, y_i) is the i-th instance of the batch.\nalpha and beta are scalars, x_i and y_i are vectors and A_i is an\nm by n banded matrix with kl sub-diagonals and ku super-diagonals,\nfor i = 1, ..., batch_count.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\ntrans     [rocblas_operation]\nindicates whether matrix A is tranposed (conjugated) or not.\n@param[in]\nm         [rocblas_int]\nnumber of rows of matrix A.\n@param[in]\nn         [rocblas_int]\nnumber of columns of matrix A.\n@param[in]\nkl        [rocblas_int]\nnumber of sub-diagonals of A.\n@param[in]\nku        [rocblas_int]\nnumber of super-diagonals of A.\n@param[in]\nalpha     device pointer or host pointer to scalar alpha.\n@param[in]\nA     device pointer to first banded matrix (A_1).\nLeading (kl + ku + 1) by n part of the matrix contains the coefficients\nof the banded matrix. The leading diagonal resides in row (ku + 1) with\nthe first super-diagonal above on the RHS of row ku. The first sub-diagonal\nresides below on the LHS of row ku + 2. This propagates up and down across\nsub/super-diagonals.\n\nEx: (m = n = 7; ku = 2, kl = 2)\n1 2 3 0 0 0 0             0 0 3 3 3 3 3\n4 1 2 3 0 0 0             0 2 2 2 2 2 2\n5 4 1 2 3 0 0    ---->    1 1 1 1 1 1 1\n0 5 4 1 2 3 0             4 4 4 4 4 4 0\n0 0 5 4 1 2 0             5 5 5 5 5 0 0\n0 0 0 5 4 1 2             0 0 0 0 0 0 0\n0 0 0 0 5 4 1             0 0 0 0 0 0 0\n\nNote that the empty elements which do not correspond to data will not\nbe referenced.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of A. Must be >= (kl + ku + 1).\n@param[in]\nstride_A  [rocblas_stride]\nstride from the start of one matrix (A_i) and the next one (A_i+1).\n@param[in]\nx         device pointer to first vector (x_1).\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of x.\n@param[in]\nstride_x  [rocblas_stride]\nstride from the start of one vector (x_i) and the next one (x_i+1).\n@param[in]\nbeta      device pointer or host pointer to scalar beta.\n@param[inout]\ny         device pointer to first vector (y_1).\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of y.\n@param[in]\nstride_y  [rocblas_stride]\nstride from the start of one vector (y_i) and the next one (x_i+1).\n@param[in]\nbatch_count [rocblas_int]\nspecifies the number of instances in the batch.\n"]
+    pub fn rocblas_sgbmv_strided_batched(
+        handle: rocblas_handle,
+        trans: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        kl: rocblas_int,
+        ku: rocblas_int,
+        alpha: *const f32,
+        A: *const f32,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        x: *const f32,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        beta: *const f32,
+        y: *mut f32,
+        incy: rocblas_int,
+        stride_y: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dgbmv_strided_batched(
+        handle: rocblas_handle,
+        trans: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        kl: rocblas_int,
+        ku: rocblas_int,
+        alpha: *const f64,
+        A: *const f64,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        x: *const f64,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        beta: *const f64,
+        y: *mut f64,
+        incy: rocblas_int,
+        stride_y: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cgbmv_strided_batched(
+        handle: rocblas_handle,
+        trans: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        kl: rocblas_int,
+        ku: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        beta: *const rocblas_float_complex,
+        y: *mut rocblas_float_complex,
+        incy: rocblas_int,
+        stride_y: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zgbmv_strided_batched(
+        handle: rocblas_handle,
+        trans: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        kl: rocblas_int,
+        ku: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        beta: *const rocblas_double_complex,
+        y: *mut rocblas_double_complex,
+        incy: rocblas_int,
+        stride_y: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\ngemv performs one of the matrix-vector operations:\n\ny := alpha*A*x    + beta*y,   or\ny := alpha*A**T*x + beta*y,   or\ny := alpha*A**H*x + beta*y,\nwhere alpha and beta are scalars, x and y are vectors and A is an\nm by n matrix.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\ntrans     [rocblas_operation]\nindicates whether matrix A is tranposed (conjugated) or not.\n@param[in]\nm         [rocblas_int]\nnumber of rows of matrix A.\n@param[in]\nn         [rocblas_int]\nnumber of columns of matrix A.\n@param[in]\nalpha     device pointer or host pointer to scalar alpha.\n@param[in]\nA         device pointer storing matrix A.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of A.\n@param[in]\nx         device pointer storing vector x.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of x.\n@param[in]\nbeta      device pointer or host pointer to scalar beta.\n@param[inout]\ny         device pointer storing vector y.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of y.\n"]
+    pub fn rocblas_sgemv(
+        handle: rocblas_handle,
+        trans: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const f32,
+        A: *const f32,
+        lda: rocblas_int,
+        x: *const f32,
+        incx: rocblas_int,
+        beta: *const f32,
+        y: *mut f32,
+        incy: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dgemv(
+        handle: rocblas_handle,
+        trans: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const f64,
+        A: *const f64,
+        lda: rocblas_int,
+        x: *const f64,
+        incx: rocblas_int,
+        beta: *const f64,
+        y: *mut f64,
+        incy: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cgemv(
+        handle: rocblas_handle,
+        trans: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        beta: *const rocblas_float_complex,
+        y: *mut rocblas_float_complex,
+        incy: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zgemv(
+        handle: rocblas_handle,
+        trans: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        beta: *const rocblas_double_complex,
+        y: *mut rocblas_double_complex,
+        incy: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\ngemv_batched performs a batch of matrix-vector operations:\n\ny_i := alpha*A_i*x_i    + beta*y_i,   or\ny_i := alpha*A_i**T*x_i + beta*y_i,   or\ny_i := alpha*A_i**H*x_i + beta*y_i,\nwhere (A_i, x_i, y_i) is the i-th instance of the batch.\nalpha and beta are scalars, x_i and y_i are vectors and A_i is an\nm by n matrix, for i = 1, ..., batch_count.\n\n@param[in]\nhandle      [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\ntrans       [rocblas_operation]\nindicates whether matrices A_i are tranposed (conjugated) or not.\n@param[in]\nm           [rocblas_int]\nnumber of rows of each matrix A_i.\n@param[in]\nn           [rocblas_int]\nnumber of columns of each matrix A_i.\n@param[in]\nalpha       device pointer or host pointer to scalar alpha.\n@param[in]\nA           device array of device pointers storing each matrix A_i.\n@param[in]\nlda         [rocblas_int]\nspecifies the leading dimension of each matrix A_i.\n@param[in]\nx           device array of device pointers storing each vector x_i.\n@param[in]\nincx        [rocblas_int]\nspecifies the increment for the elements of each vector x_i.\n@param[in]\nbeta        device pointer or host pointer to scalar beta.\n@param[inout]\ny           device array of device pointers storing each vector y_i.\n@param[in]\nincy        [rocblas_int]\nspecifies the increment for the elements of each vector y_i.\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
+    pub fn rocblas_sgemv_batched(
+        handle: rocblas_handle,
+        trans: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const f32,
+        A: *const *const f32,
+        lda: rocblas_int,
+        x: *const *const f32,
+        incx: rocblas_int,
+        beta: *const f32,
+        y: *const *mut f32,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dgemv_batched(
+        handle: rocblas_handle,
+        trans: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const f64,
+        A: *const *const f64,
+        lda: rocblas_int,
+        x: *const *const f64,
+        incx: rocblas_int,
+        beta: *const f64,
+        y: *const *mut f64,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cgemv_batched(
+        handle: rocblas_handle,
+        trans: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const *const rocblas_float_complex,
+        lda: rocblas_int,
+        x: *const *const rocblas_float_complex,
+        incx: rocblas_int,
+        beta: *const rocblas_float_complex,
+        y: *const *mut rocblas_float_complex,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zgemv_batched(
+        handle: rocblas_handle,
+        trans: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const *const rocblas_double_complex,
+        lda: rocblas_int,
+        x: *const *const rocblas_double_complex,
+        incx: rocblas_int,
+        beta: *const rocblas_double_complex,
+        y: *const *mut rocblas_double_complex,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\ngemv_strided_batched performs a batch of matrix-vector operations:\n\ny_i := alpha*A_i*x_i    + beta*y_i,   or\ny_i := alpha*A_i**T*x_i + beta*y_i,   or\ny_i := alpha*A_i**H*x_i + beta*y_i,\nwhere (A_i, x_i, y_i) is the i-th instance of the batch.\nalpha and beta are scalars, x_i and y_i are vectors and A_i is an\nm by n matrix, for i = 1, ..., batch_count.\n\n@param[in]\nhandle      [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\ntransA      [rocblas_operation]\nindicates whether matrices A_i are tranposed (conjugated) or not.\n@param[in]\nm           [rocblas_int]\nnumber of rows of matrices A_i.\n@param[in]\nn           [rocblas_int]\nnumber of columns of matrices A_i.\n@param[in]\nalpha       device pointer or host pointer to scalar alpha.\n@param[in]\nA           device pointer to the first matrix (A_1) in the batch.\n@param[in]\nlda         [rocblas_int]\nspecifies the leading dimension of matrices A_i.\n@param[in]\nstrideA     [rocblas_stride]\nstride from the start of one matrix (A_i) and the next one (A_i+1).\n@param[in]\nx           device pointer to the first vector (x_1) in the batch.\n@param[in]\nincx        [rocblas_int]\nspecifies the increment for the elements of vectors x_i.\n@param[in]\nstridex     [rocblas_stride]\nstride from the start of one vector (x_i) and the next one (x_i+1).\nThere are no restrictions placed on stride_x. However, ensure that stride_x is of appropriate size. When trans equals rocblas_operation_none\nthis typically means stride_x >= n * incx, otherwise stride_x >= m * incx.\n@param[in]\nbeta        device pointer or host pointer to scalar beta.\n@param[inout]\ny           device pointer to the first vector (y_1) in the batch.\n@param[in]\nincy        [rocblas_int]\nspecifies the increment for the elements of vectors y_i.\n@param[in]\nstridey     [rocblas_stride]\nstride from the start of one vector (y_i) and the next one (y_i+1).\nThere are no restrictions placed on stride_y. However, ensure that stride_y is of appropriate size. When trans equals rocblas_operation_none\nthis typically means stride_y >= m * incy, otherwise stride_y >= n * incy. stridey should be non zero.\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
+    pub fn rocblas_sgemv_strided_batched(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const f32,
+        A: *const f32,
+        lda: rocblas_int,
+        strideA: rocblas_stride,
+        x: *const f32,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        beta: *const f32,
+        y: *mut f32,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dgemv_strided_batched(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const f64,
+        A: *const f64,
+        lda: rocblas_int,
+        strideA: rocblas_stride,
+        x: *const f64,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        beta: *const f64,
+        y: *mut f64,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cgemv_strided_batched(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        strideA: rocblas_stride,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        beta: *const rocblas_float_complex,
+        y: *mut rocblas_float_complex,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zgemv_strided_batched(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        strideA: rocblas_stride,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        beta: *const rocblas_double_complex,
+        y: *mut rocblas_double_complex,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nhbmv performs the matrix-vector operations:\n\ny := alpha*A*x + beta*y\nwhere alpha and beta are scalars, x and y are n element vectors and A is an\nn by n Hermitian band matrix, with k super-diagonals.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\n- rocblas_fill_upper: The upper triangular part of A is being supplied.\n- rocblas_fill_lower: The lower triangular part of A is being supplied.\n@param[in]\nn         [rocblas_int]\nthe order of the matrix A.\n@param[in]\nk         [rocblas_int]\nthe number of super-diagonals of the matrix A. Must be >= 0.\n@param[in]\nalpha     device pointer or host pointer to scalar alpha.\n@param[in]\nA         device pointer storing matrix A. Of dimension (lda, n).\n\nif uplo == rocblas_fill_upper:\nThe leading (k + 1) by n part of A must contain the upper\ntriangular band part of the Hermitian matrix, with the leading\ndiagonal in row (k + 1), the first super-diagonal on the RHS\nof row k, etc.\nThe top left k by x triangle of A will not be referenced.\nEx (upper, lda = n = 4, k = 1):\nA                             Represented matrix\n(0,0) (5,9) (6,8) (7,7)       (1, 0) (5, 9) (0, 0) (0, 0)\n(1,0) (2,0) (3,0) (4,0)       (5,-9) (2, 0) (6, 8) (0, 0)\n(0,0) (0,0) (0,0) (0,0)       (0, 0) (6,-8) (3, 0) (7, 7)\n(0,0) (0,0) (0,0) (0,0)       (0, 0) (0, 0) (7,-7) (4, 0)\n\nif uplo == rocblas_fill_lower:\nThe leading (k + 1) by n part of A must contain the lower\ntriangular band part of the Hermitian matrix, with the leading\ndiagonal in row (1), the first sub-diagonal on the LHS of\nrow 2, etc.\nThe bottom right k by k triangle of A will not be referenced.\nEx (lower, lda = 2, n = 4, k = 1):\nA                               Represented matrix\n(1,0) (2,0) (3,0) (4,0)         (1, 0) (5,-9) (0, 0) (0, 0)\n(5,9) (6,8) (7,7) (0,0)         (5, 9) (2, 0) (6,-8) (0, 0)\n(0, 0) (6, 8) (3, 0) (7,-7)\n(0, 0) (0, 0) (7, 7) (4, 0)\n\nAs a Hermitian matrix, the imaginary part of the main diagonal\nof A will not be referenced and is assumed to be == 0.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of A. must be >= k + 1.\n@param[in]\nx         device pointer storing vector x.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of x.\n@param[in]\nbeta      device pointer or host pointer to scalar beta.\n@param[inout]\ny         device pointer storing vector y.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of y.\n"]
+    pub fn rocblas_chbmv(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        beta: *const rocblas_float_complex,
+        y: *mut rocblas_float_complex,
+        incy: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zhbmv(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        beta: *const rocblas_double_complex,
+        y: *mut rocblas_double_complex,
+        incy: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nhbmv_batched performs one of the matrix-vector operations:\n\ny_i := alpha*A_i*x_i + beta*y_i\nwhere alpha and beta are scalars, x_i and y_i are n element vectors and A_i is an\nn by n Hermitian band matrix with k super-diagonals, for each batch in i = [1, batch_count].\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\n- rocblas_fill_upper: The upper triangular part of each A_i is being supplied.\n- rocblas_fill_lower: The lower triangular part of each A_i is being supplied.\n@param[in]\nn         [rocblas_int]\nthe order of each matrix A_i.\n@param[in]\nk         [rocblas_int]\nthe number of super-diagonals of each matrix A_i. Must be >= 0.\n@param[in]\nalpha     device pointer or host pointer to scalar alpha.\n@param[in]\nA         device array of device pointers storing each matrix_i A of dimension (lda, n).\n\nif uplo == rocblas_fill_upper:\nThe leading (k + 1) by n part of each A_i must contain the upper\ntriangular band part of the Hermitian matrix, with the leading\ndiagonal in row (k + 1), the first super-diagonal on the RHS\nof row k, etc.\nThe top left k by x triangle of each A_i will not be referenced.\nEx (upper, lda = n = 4, k = 1):\nA                             Represented matrix\n(0,0) (5,9) (6,8) (7,7)       (1, 0) (5, 9) (0, 0) (0, 0)\n(1,0) (2,0) (3,0) (4,0)       (5,-9) (2, 0) (6, 8) (0, 0)\n(0,0) (0,0) (0,0) (0,0)       (0, 0) (6,-8) (3, 0) (7, 7)\n(0,0) (0,0) (0,0) (0,0)       (0, 0) (0, 0) (7,-7) (4, 0)\n\nif uplo == rocblas_fill_lower:\nThe leading (k + 1) by n part of each A_i must contain the lower\ntriangular band part of the Hermitian matrix, with the leading\ndiagonal in row (1), the first sub-diagonal on the LHS of\nrow 2, etc.\nThe bottom right k by k triangle of each A_i will not be referenced.\nEx (lower, lda = 2, n = 4, k = 1):\nA                               Represented matrix\n(1,0) (2,0) (3,0) (4,0)         (1, 0) (5,-9) (0, 0) (0, 0)\n(5,9) (6,8) (7,7) (0,0)         (5, 9) (2, 0) (6,-8) (0, 0)\n(0, 0) (6, 8) (3, 0) (7,-7)\n(0, 0) (0, 0) (7, 7) (4, 0)\n\nAs a Hermitian matrix, the imaginary part of the main diagonal\nof each A_i will not be referenced and is assumed to be == 0.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of each A_i. must be >= max(1, n).\n@param[in]\nx         device array of device pointers storing each vector x_i.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\nbeta      device pointer or host pointer to scalar beta.\n@param[inout]\ny         device array of device pointers storing each vector y_i.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of y.\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
+    pub fn rocblas_chbmv_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const *const rocblas_float_complex,
+        lda: rocblas_int,
+        x: *const *const rocblas_float_complex,
+        incx: rocblas_int,
+        beta: *const rocblas_float_complex,
+        y: *const *mut rocblas_float_complex,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zhbmv_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const *const rocblas_double_complex,
+        lda: rocblas_int,
+        x: *const *const rocblas_double_complex,
+        incx: rocblas_int,
+        beta: *const rocblas_double_complex,
+        y: *const *mut rocblas_double_complex,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nhbmv_strided_batched performs one of the matrix-vector operations:\n\ny_i := alpha*A_i*x_i + beta*y_i\nwhere alpha and beta are scalars, x_i and y_i are n element vectors and A_i is an\nn by n Hermitian band matrix with k super-diagonals, for each batch in i = [1, batch_count].\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\n- rocblas_fill_upper: The upper triangular part of each A_i is being supplied.\n- rocblas_fill_lower: The lower triangular part of each A_i is being supplied.\n@param[in]\nn         [rocblas_int]\nthe order of each matrix A_i.\n@param[in]\nk         [rocblas_int]\nthe number of super-diagonals of each matrix A_i. Must be >= 0.\n@param[in]\nalpha     device pointer or host pointer to scalar alpha.\n@param[in]\nA         device array pointing to the first matrix A_1. Each A_i is of dimension (lda, n).\n\nif uplo == rocblas_fill_upper:\nThe leading (k + 1) by n part of each A_i must contain the upper\ntriangular band part of the Hermitian matrix, with the leading\ndiagonal in row (k + 1), the first super-diagonal on the RHS\nof row k, etc.\nThe top left k by x triangle of each A_i will not be referenced.\nEx (upper, lda = n = 4, k = 1):\nA                             Represented matrix\n(0,0) (5,9) (6,8) (7,7)       (1, 0) (5, 9) (0, 0) (0, 0)\n(1,0) (2,0) (3,0) (4,0)       (5,-9) (2, 0) (6, 8) (0, 0)\n(0,0) (0,0) (0,0) (0,0)       (0, 0) (6,-8) (3, 0) (7, 7)\n(0,0) (0,0) (0,0) (0,0)       (0, 0) (0, 0) (7,-7) (4, 0)\n\nif uplo == rocblas_fill_lower:\nThe leading (k + 1) by n part of each A_i must contain the lower\ntriangular band part of the Hermitian matrix, with the leading\ndiagonal in row (1), the first sub-diagonal on the LHS of\nrow 2, etc.\nThe bottom right k by k triangle of each A_i will not be referenced.\nEx (lower, lda = 2, n = 4, k = 1):\nA                               Represented matrix\n(1,0) (2,0) (3,0) (4,0)         (1, 0) (5,-9) (0, 0) (0, 0)\n(5,9) (6,8) (7,7) (0,0)         (5, 9) (2, 0) (6,-8) (0, 0)\n(0, 0) (6, 8) (3, 0) (7,-7)\n(0, 0) (0, 0) (7, 7) (4, 0)\n\nAs a Hermitian matrix, the imaginary part of the main diagonal\nof each A_i will not be referenced and is assumed to be == 0.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of each A_i. must be >= max(1, n).\n@param[in]\nstride_A  [rocblas_stride]\nstride from the start of one matrix (A_i) and the next one (A_i+1).\n@param[in]\nx         device array pointing to the first vector y_1.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\nstride_x  [rocblas_stride]\nstride from the start of one vector (x_i) and the next one (x_i+1).\n@param[in]\nbeta      device pointer or host pointer to scalar beta.\n@param[inout]\ny         device array pointing to the first vector y_1.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of y.\n@param[in]\nstride_y  [rocblas_stride]\nstride from the start of one vector (y_i) and the next one (y_i+1).\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
+    pub fn rocblas_chbmv_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        beta: *const rocblas_float_complex,
+        y: *mut rocblas_float_complex,
+        incy: rocblas_int,
+        stride_y: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zhbmv_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        beta: *const rocblas_double_complex,
+        y: *mut rocblas_double_complex,
+        incy: rocblas_int,
+        stride_y: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nhemv performs one of the matrix-vector operations:\n\ny := alpha*A*x + beta*y\nwhere alpha and beta are scalars, x and y are n element vectors and A is an\nn by n Hermitian matrix.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\n- rocblas_fill_upper: the upper triangular part of the Hermitian matrix A is supplied.\n- rocblas_fill_lower: the lower triangular part of the Hermitian matrix A is supplied.\n@param[in]\nn         [rocblas_int]\nthe order of the matrix A.\n@param[in]\nalpha     device pointer or host pointer to scalar alpha.\n@param[in]\nA         device pointer storing matrix A. Of dimension (lda, n).\n\nif uplo == rocblas_fill_upper:\nThe upper triangular part of A must contain\nthe upper triangular part of a Hermitian matrix. The lower\ntriangular part of A will not be referenced.\n\nif uplo == rocblas_fill_lower:\nThe lower triangular part of A must contain\nthe lower triangular part of a Hermitian matrix. The upper\ntriangular part of A will not be referenced.\nAs a Hermitian matrix, the imaginary part of the main diagonal\nof A will not be referenced and is assumed to be == 0.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of A. must be >= max(1, n).\n@param[in]\nx         device pointer storing vector x.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of x.\n@param[in]\nbeta      device pointer or host pointer to scalar beta.\n@param[inout]\ny         device pointer storing vector y.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of y.\n"]
+    pub fn rocblas_chemv(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        beta: *const rocblas_float_complex,
+        y: *mut rocblas_float_complex,
+        incy: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zhemv(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        beta: *const rocblas_double_complex,
+        y: *mut rocblas_double_complex,
+        incy: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nhemv_batched performs one of the matrix-vector operations:\n\ny_i := alpha*A_i*x_i + beta*y_i\nwhere alpha and beta are scalars, x_i and y_i are n element vectors and A_i is an\nn by n Hermitian matrix, for each batch in i = [1, batch_count].\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\n- rocblas_fill_upper: the upper triangular part of the Hermitian matrix A is supplied.\n- rocblas_fill_lower: the lower triangular part of the Hermitian matrix A is supplied.\n@param[in]\nn         [rocblas_int]\nthe order of each matrix A_i.\n@param[in]\nalpha     device pointer or host pointer to scalar alpha.\n@param[in]\nA         device array of device pointers storing each matrix A_i of dimension (lda, n).\n\nif uplo == rocblas_fill_upper:\nThe upper triangular part of each A_i must contain\nthe upper triangular part of a Hermitian matrix. The lower\ntriangular part of each A_i will not be referenced.\n\nif uplo == rocblas_fill_lower:\nThe lower triangular part of each A_i must contain\nthe lower triangular part of a Hermitian matrix. The upper\ntriangular part of each A_i will not be referenced.\nAs a Hermitian matrix, the imaginary part of the main diagonal\nof each A_i will not be referenced and is assumed to be == 0.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of each A_i. must be >= max(1, n).\n@param[in]\nx         device array of device pointers storing each vector x_i.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\nbeta      device pointer or host pointer to scalar beta.\n@param[inout]\ny         device array of device pointers storing each vector y_i.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of y.\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
+    pub fn rocblas_chemv_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const *const rocblas_float_complex,
+        lda: rocblas_int,
+        x: *const *const rocblas_float_complex,
+        incx: rocblas_int,
+        beta: *const rocblas_float_complex,
+        y: *const *mut rocblas_float_complex,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zhemv_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const *const rocblas_double_complex,
+        lda: rocblas_int,
+        x: *const *const rocblas_double_complex,
+        incx: rocblas_int,
+        beta: *const rocblas_double_complex,
+        y: *const *mut rocblas_double_complex,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nhemv_strided_batched performs one of the matrix-vector operations:\n\ny_i := alpha*A_i*x_i + beta*y_i\nwhere alpha and beta are scalars, x_i and y_i are n element vectors and A_i is an\nn by n Hermitian matrix, for each batch in i = [1, batch_count].\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\n- rocblas_fill_upper: the upper triangular part of the Hermitian matrix A is supplied.\n- rocblas_fill_lower: the lower triangular part of the Hermitian matrix A is supplied.\n@param[in]\nn         [rocblas_int]\nthe order of each matrix A_i.\n@param[in]\nalpha     device pointer or host pointer to scalar alpha.\n@param[in]\nA         device array of device pointers storing each matrix A_i of dimension (lda, n).\n\nif uplo == rocblas_fill_upper:\nThe upper triangular part of each A_i must contain\nthe upper triangular part of a Hermitian matrix. The lower\ntriangular part of each A_i will not be referenced.\n\nif uplo == rocblas_fill_lower:\nThe lower triangular part of each A_i must contain\nthe lower triangular part of a Hermitian matrix. The upper\ntriangular part of each A_i will not be referenced.\nAs a Hermitian matrix, the imaginary part of the main diagonal\nof each A_i will not be referenced and is assumed to be == 0.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of each A_i. must be >= max(1, n).\n@param[in]\nstride_A    [rocblas_stride]\nstride from the start of one (A_i) to the next (A_i+1).\n@param[in]\nx         device array of device pointers storing each vector x_i.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\nstride_x  [rocblas_stride]\nstride from the start of one vector (x_i) and the next one (x_i+1).\n@param[in]\nbeta      device pointer or host pointer to scalar beta.\n@param[inout]\ny         device array of device pointers storing each vector y_i.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of y.\n@param[in]\nstride_y  [rocblas_stride]\nstride from the start of one vector (y_i) and the next one (y_i+1).\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
+    pub fn rocblas_chemv_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        beta: *const rocblas_float_complex,
+        y: *mut rocblas_float_complex,
+        incy: rocblas_int,
+        stride_y: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zhemv_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        beta: *const rocblas_double_complex,
+        y: *mut rocblas_double_complex,
+        incy: rocblas_int,
+        stride_y: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nher performs the matrix-vector operations:\n\nA := A + alpha*x*x**H\nwhere alpha is a real scalar, x is a vector, and A is an\nn by n Hermitian matrix.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\nspecifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'\n- rocblas_fill_upper: The upper triangular part of A is supplied in A.\n- rocblas_fill_lower: The lower triangular part of A is supplied in A.\n@param[in]\nn         [rocblas_int]\nthe number of rows and columns of matrix A. Must be at least 0.\n@param[in]\nalpha\ndevice pointer or host pointer to scalar alpha.\n@param[in]\nx         device pointer storing vector x.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of x.\n@param[inout]\nA         device pointer storing the specified triangular portion of the Hermitian matrix A. Of size (lda * n).\n\nif uplo == rocblas_fill_upper:\nThe upper triangular portion of the Hermitian matrix A is supplied.\nThe lower triangluar portion will not be touched.\n\nif uplo == rocblas_fill_lower:\nThe lower triangular portion of the Hermitian matrix A is supplied.\nThe upper triangular portion will not be touched.\nNote that the imaginary part of the diagonal elements are not accessed\nand are assumed to be 0.\n\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of A. Must be at least max(1, n)."]
+    pub fn rocblas_cher(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f32,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        A: *mut rocblas_float_complex,
+        lda: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zher(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f64,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        A: *mut rocblas_double_complex,
+        lda: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nher_batched performs the matrix-vector operations:\n\nA_i := A_i + alpha*x_i*x_i**H\nwhere alpha is a real scalar, x_i is a vector, and A_i is an\nn by n symmetric matrix, for i = 1, ..., batch_count.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\nspecifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'\n- rocblas_fill_upper: The upper triangular part of each A_i is supplied in A.\n- rocblas_fill_lower: The lower triangular part of each A_i is supplied in A.\n@param[in]\nn         [rocblas_int]\nthe number of rows and columns of each matrix A_i. Must be at least 0.\n@param[in]\nalpha\ndevice pointer or host pointer to scalar alpha.\n@param[in]\nx         device array of device pointers storing each vector x_i.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[inout]\nA         device array of device pointers storing the specified triangular portion of\neach Hermitian matrix A_i of at least size ((n * (n + 1)) / 2). Array is of at least size batch_count.\n\nif uplo == rocblas_fill_upper:\nThe upper triangular portion of each Hermitian matrix A_i is supplied.\nThe lower triangular portion of each A_i will not be touched.\nif uplo == rocblas_fill_lower:\nThe lower triangular portion of each Hermitian matrix A_i is supplied.\nThe upper triangular portion of each A_i will not be touched.\nNote that the imaginary part of the diagonal elements are not accessed\nand are assumed to be 0.\n\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of each A_i. Must be at least max(1, n).\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch."]
+    pub fn rocblas_cher_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f32,
+        x: *const *const rocblas_float_complex,
+        incx: rocblas_int,
+        A: *const *mut rocblas_float_complex,
+        lda: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zher_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f64,
+        x: *const *const rocblas_double_complex,
+        incx: rocblas_int,
+        A: *const *mut rocblas_double_complex,
+        lda: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nher_strided_batched performs the matrix-vector operations:\n\nA_i := A_i + alpha*x_i*x_i**H\nwhere alpha is a real scalar, x_i is a vector, and A_i is an\nn by n Hermitian matrix, for i = 1, ..., batch_count.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\nspecifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'\n- rocblas_fill_upper: The upper triangular part of each A_i is supplied in A.\n- rocblas_fill_lower: The lower triangular part of each A_i is supplied in A.\n@param[in]\nn         [rocblas_int]\nthe number of rows and columns of each matrix A_i. Must be at least 0.\n@param[in]\nalpha\ndevice pointer or host pointer to scalar alpha.\n@param[in]\nx         device pointer pointing to the first vector (x_1).\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\nstride_x  [rocblas_stride]\nstride from the start of one vector (x_i) and the next one (x_i+1).\n@param[inout]\nA         device array of device pointers storing the specified triangular portion of\neach Hermitian matrix A_i. Points to the first matrix (A_1).\n\nif uplo == rocblas_fill_upper:\nThe upper triangular portion of each Hermitian matrix A_i is supplied.\nThe lower triangular portion of each A_i will not be touched.\n\nif uplo == rocblas_fill_lower:\nThe lower triangular portion of each Hermitian matrix A_i is supplied.\nThe upper triangular portion of each A_i will not be touched.\nNote that the imaginary part of the diagonal elements are not accessed\nand are assumed to be 0.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of each A_i.\n@param[in]\nstride_A    [rocblas_stride]\nstride from the start of one (A_i) and the next (A_i+1).\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch."]
+    pub fn rocblas_cher_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f32,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        A: *mut rocblas_float_complex,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zher_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f64,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        A: *mut rocblas_double_complex,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nher2 performs the matrix-vector operations:\n\nA := A + alpha*x*y**H + conj(alpha)*y*x**H\nwhere alpha is a complex scalar, x and y are vectors, and A is an\nn by n Hermitian matrix.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\nspecifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'\n- rocblas_fill_upper: The upper triangular part of A is supplied.\n- rocblas_fill_lower: The lower triangular part of A is supplied.\n@param[in]\nn         [rocblas_int]\nthe number of rows and columns of matrix A. Must be at least 0.\n@param[in]\nalpha\ndevice pointer or host pointer to scalar alpha.\n@param[in]\nx         device pointer storing vector x.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of x.\n@param[in]\ny         device pointer storing vector y.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of y.\n@param[inout]\nA         device pointer storing the specified triangular portion of\nthe Hermitian matrix A. Of size (lda, n).\n\nif uplo == rocblas_fill_upper:\nThe upper triangular portion of the Hermitian matrix A is supplied.\nThe lower triangular portion of A will not be touched.\n\nif uplo == rocblas_fill_lower:\nThe lower triangular portion of the Hermitian matrix A is supplied.\nThe upper triangular portion of A will not be touched.\nNote that the imaginary part of the diagonal elements are not accessed\nand are assumed to be 0.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of A. Must be at least max(lda, 1)."]
+    pub fn rocblas_cher2(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        y: *const rocblas_float_complex,
+        incy: rocblas_int,
+        A: *mut rocblas_float_complex,
+        lda: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zher2(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        y: *const rocblas_double_complex,
+        incy: rocblas_int,
+        A: *mut rocblas_double_complex,
+        lda: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nher2_batched performs the matrix-vector operations:\n\nA_i := A_i + alpha*x_i*y_i**H + conj(alpha)*y_i*x_i**H\nwhere alpha is a complex scalar, x_i and y_i are vectors, and A_i is an\nn by n Hermitian matrix for each batch in i = [1, batch_count].\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\nspecifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'\n- rocblas_fill_upper: The upper triangular part of each A_i is supplied.\n- rocblas_fill_lower: The lower triangular part of each A_i is supplied.\n@param[in]\nn         [rocblas_int]\nthe number of rows and columns of each matrix A_i. Must be at least 0.\n@param[in]\nalpha\ndevice pointer or host pointer to scalar alpha.\n@param[in]\nx         device array of device pointers storing each vector x_i.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of x.\n@param[in]\ny         device array of device pointers storing each vector y_i.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of each y_i.\n@param[inout]\nA         device array of device pointers storing the specified triangular portion of\neach Hermitian matrix A_i of size (lda, n).\n\nif uplo == rocblas_fill_upper:\nThe upper triangular portion of each Hermitian matrix A_i is supplied.\nThe lower triangular portion of each A_i will not be touched.\n\nif uplo == rocblas_fill_lower:\nThe lower triangular portion of each Hermitian matrix A_i is supplied.\nThe upper triangular portion of each A_i will not be touched.\nNote that the imaginary part of the diagonal elements are not accessed\nand are assumed to be 0.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of each A_i. Must be at least max(lda, 1).\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch."]
+    pub fn rocblas_cher2_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        x: *const *const rocblas_float_complex,
+        incx: rocblas_int,
+        y: *const *const rocblas_float_complex,
+        incy: rocblas_int,
+        A: *const *mut rocblas_float_complex,
+        lda: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zher2_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        x: *const *const rocblas_double_complex,
+        incx: rocblas_int,
+        y: *const *const rocblas_double_complex,
+        incy: rocblas_int,
+        A: *const *mut rocblas_double_complex,
+        lda: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nher2_strided_batched performs the matrix-vector operations:\n\nA_i := A_i + alpha*x_i*y_i**H + conj(alpha)*y_i*x_i**H\nwhere alpha is a complex scalar, x_i and y_i are vectors, and A_i is an\nn by n Hermitian matrix for each batch in i = [1, batch_count].\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\nspecifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'\n- rocblas_fill_upper: The upper triangular part of each A_i is supplied.\n- rocblas_fill_lower: The lower triangular part of each A_i is supplied.\n@param[in]\nn         [rocblas_int]\nthe number of rows and columns of each matrix A_i. Must be at least 0.\n@param[in]\nalpha\ndevice pointer or host pointer to scalar alpha.\n@param[in]\nx         device pointer pointing to the first vector x_1.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\nstride_x  [rocblas_stride]\nspecifies the stride between the beginning of one vector (x_i) and the next (x_i+1).\n@param[in]\ny         device pointer pointing to the first vector y_i.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of each y_i.\n@param[in]\nstride_y  [rocblas_stride]\nspecifies the stride between the beginning of one vector (y_i) and the next (y_i+1).\n@param[inout]\nA         device pointer pointing to the first matrix (A_1). Stores the specified triangular portion of\neach Hermitian matrix A_i.\n\nif uplo == rocblas_fill_upper:\nThe upper triangular portion of each Hermitian matrix A_i is supplied.\nThe lower triangular portion of each A_i will not be touched.\n\nif uplo == rocblas_fill_lower:\nThe lower triangular portion of each Hermitian matrix A_i is supplied.\nThe upper triangular portion of each A_i will not be touched.\nNote that the imaginary part of the diagonal elements are not accessed\nand are assumed to be 0.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of each A_i. Must be at least max(lda, 1).\n@param[in]\nstride_A  [rocblas_stride]\nspecifies the stride between the beginning of one matrix (A_i) and the next (A_i+1).\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch."]
+    pub fn rocblas_cher2_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        y: *const rocblas_float_complex,
+        incy: rocblas_int,
+        stride_y: rocblas_stride,
+        A: *mut rocblas_float_complex,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zher2_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        y: *const rocblas_double_complex,
+        incy: rocblas_int,
+        stride_y: rocblas_stride,
+        A: *mut rocblas_double_complex,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nhpmv performs the matrix-vector operation:\n\ny := alpha*A*x + beta*y\nwhere alpha and beta are scalars, x and y are n element vectors and A is an\nn by n Hermitian matrix, supplied in packed form (see description below).\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\n- rocblas_fill_upper: the upper triangular part of the Hermitian matrix A is supplied in AP.\n- rocblas_fill_lower: the lower triangular part of the Hermitian matrix A is supplied in AP.\n@param[in]\nn         [rocblas_int]\nthe order of the matrix A. Must be >= 0.\n@param[in]\nalpha     device pointer or host pointer to scalar alpha.\n@param[in]\nAP        device pointer storing the packed version of the specified triangular portion of\nthe Hermitian matrix A. Of at least size ((n * (n + 1)) / 2).\n\nif uplo == rocblas_fill_upper:\nThe upper triangular portion of the Hermitian matrix A is supplied.\nThe matrix is compacted so that AP contains the triangular portion\ncolumn-by-column\nso that:\nAP(0) = A(0,0)\nAP(1) = A(0,1)\nAP(2) = A(1,1), etc.\nEx: (rocblas_fill_upper; n = 3)\n(1, 0) (2, 1) (3, 2)\n(2,-1) (4, 0) (5,-1) ---> [(1,0),(2,1),(4,0),(3,2),(5,-1),(6,0)]\n(3,-2) (5, 1) (6, 0)\n\nif uplo == rocblas_fill_lower:\nThe lower triangular portion of the Hermitian matrix A is supplied.\nThe matrix is compacted so that AP contains the triangular portion\ncolumn-by-column\nso that:\nAP(0) = A(0,0)\nAP(1) = A(1,0)\nAP(2) = A(2,1), etc.\nEx: (rocblas_fill_lower; n = 3)\n(1, 0) (2, 1) (3, 2)\n(2,-1) (4, 0) (5,-1) ---> [(1,0),(2,-1),(3,-2),(4,0),(5,1),(6,0)]\n(3,-2) (5, 1) (6, 0)\nNote that the imaginary part of the diagonal elements are not accessed\nand are assumed to be 0.\n@param[in]\nx         device pointer storing vector x.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of x.\n@param[in]\nbeta      device pointer or host pointer to scalar beta.\n@param[inout]\ny         device pointer storing vector y.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of y.\n"]
+    pub fn rocblas_chpmv(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        AP: *const rocblas_float_complex,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        beta: *const rocblas_float_complex,
+        y: *mut rocblas_float_complex,
+        incy: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zhpmv(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        AP: *const rocblas_double_complex,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        beta: *const rocblas_double_complex,
+        y: *mut rocblas_double_complex,
+        incy: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nhpmv_batched performs the matrix-vector operation:\n\ny_i := alpha*A_i*x_i + beta*y_i\nwhere alpha and beta are scalars, x_i and y_i are n element vectors and A_i is an\nn by n Hermitian matrix, supplied in packed form (see description below),\nfor each batch in i = [1, batch_count].\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\n- rocblas_fill_upper: the upper triangular part of each Hermitian matrix A_i is supplied in AP.\n- rocblas_fill_lower: the lower triangular part of each Hermitian matrix A_i is supplied in AP.\n@param[in]\nn         [rocblas_int]\nthe order of each matrix A_i.\n@param[in]\nalpha     device pointer or host pointer to scalar alpha.\n@param[in]\nAP      device pointer of device pointers storing the packed version of the specified triangular\nportion of each Hermitian matrix A_i. Each A_i is of at least size ((n * (n + 1)) / 2).\n\nif uplo == rocblas_fill_upper:\nThe upper triangular portion of each Hermitian matrix A_i is supplied.\nThe matrix is compacted so that each AP_i contains the triangular portion\ncolumn-by-column\nso that:\nAP(0) = A(0,0)\nAP(1) = A(0,1)\nAP(2) = A(1,1), etc.\nEx: (rocblas_fill_upper; n = 3)\n(1, 0) (2, 1) (3, 2)\n(2,-1) (4, 0) (5,-1) ---> [(1,0),(2,1),(4,0),(3,2),(5,-1),(6,0)]\n(3,-2) (5, 1) (6, 0)\n\nif uplo == rocblas_fill_lower:\nThe lower triangular portion of each Hermitian matrix A_i is supplied.\nThe matrix is compacted so that each AP_i contains the triangular portion\ncolumn-by-column\nso that:\nAP(0) = A(0,0)\nAP(1) = A(1,0)\nAP(2) = A(2,1), etc.\nEx: (rocblas_fill_lower; n = 3)\n(1, 0) (2, 1) (3, 2)\n(2,-1) (4, 0) (5,-1) ---> [(1,0),(2,-1),(3,-2),(4,0),(5,1),(6,0)]\n(3,-2) (5, 1) (6, 0)\nNote that the imaginary part of the diagonal elements are not accessed\nand are assumed to be 0.\n@param[in]\nx         device array of device pointers storing each vector x_i.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\nbeta      device pointer or host pointer to scalar beta.\n@param[inout]\ny         device array of device pointers storing each vector y_i.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of y.\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
+    pub fn rocblas_chpmv_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        AP: *const *const rocblas_float_complex,
+        x: *const *const rocblas_float_complex,
+        incx: rocblas_int,
+        beta: *const rocblas_float_complex,
+        y: *const *mut rocblas_float_complex,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zhpmv_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        AP: *const *const rocblas_double_complex,
+        x: *const *const rocblas_double_complex,
+        incx: rocblas_int,
+        beta: *const rocblas_double_complex,
+        y: *const *mut rocblas_double_complex,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nhpmv_strided_batched performs the matrix-vector operation:\n\ny_i := alpha*A_i*x_i + beta*y_i\nwhere alpha and beta are scalars, x_i and y_i are n element vectors and A_i is an\nn by n Hermitian matrix, supplied in packed form (see description below),\nfor each batch in i = [1, batch_count].\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\n- rocblas_fill_upper: the upper triangular part of each Hermitian matrix A_i is supplied in AP.\n- rocblas_fill_lower: the lower triangular part of each Hermitian matrix A_i is supplied in AP.\n@param[in]\nn         [rocblas_int]\nthe order of each matrix A_i.\n@param[in]\nalpha     device pointer or host pointer to scalar alpha.\n@param[in]\nAP        device pointer pointing to the beginning of the first matrix (AP_1). Stores the packed\nversion of the specified triangular portion of each Hermitian matrix AP_i of size ((n * (n + 1)) / 2).\n\nif uplo == rocblas_fill_upper:\nThe upper triangular portion of each Hermitian matrix A_i is supplied.\nThe matrix is compacted so that each AP_i contains the triangular portion\ncolumn-by-column\nso that:\nAP(0) = A(0,0)\nAP(1) = A(0,1)\nAP(2) = A(1,1), etc.\nEx: (rocblas_fill_upper; n = 3)\n(1, 0) (2, 1) (3, 2)\n(2,-1) (4, 0) (5,-1) ---> [(1,0),(2,1),(4,0),(3,2),(5,-1),(6,0)]\n(3,-2) (5, 1) (6, 0)\n\nif uplo == rocblas_fill_lower:\nThe lower triangular portion of each Hermitian matrix A_i is supplied.\nThe matrix is compacted so that each AP_i contains the triangular portion\ncolumn-by-column\nso that:\nAP(0) = A(0,0)\nAP(1) = A(1,0)\nAP(2) = A(2,1), etc.\nEx: (rocblas_fill_lower; n = 3)\n(1, 0) (2, 1) (3, 2)\n(2,-1) (4, 0) (5,-1) ---> [(1,0),(2,-1),(3,-2),(4,0),(5,1),(6,0)]\n(3,-2) (5, 1) (6, 0)\nNote that the imaginary part of the diagonal elements are not accessed\nand are assumed to be 0.\n@param[in]\nstride_A  [rocblas_stride]\nstride from the start of one matrix (AP_i) and the next one (AP_i+1).\n@param[in]\nx         device array pointing to the beginning of the first vector (x_1).\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\nstride_x  [rocblas_stride]\nstride from the start of one vector (x_i) and the next one (x_i+1).\n@param[in]\nbeta      device pointer or host pointer to scalar beta.\n@param[inout]\ny         device array pointing to the beginning of the first vector (y_1).\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of y.\n@param[in]\nstride_y  [rocblas_stride]\nstride from the start of one vector (y_i) and the next one (y_i+1).\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
+    pub fn rocblas_chpmv_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        AP: *const rocblas_float_complex,
+        stride_A: rocblas_stride,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        beta: *const rocblas_float_complex,
+        y: *mut rocblas_float_complex,
+        incy: rocblas_int,
+        stride_y: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zhpmv_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        AP: *const rocblas_double_complex,
+        stride_A: rocblas_stride,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        beta: *const rocblas_double_complex,
+        y: *mut rocblas_double_complex,
+        incy: rocblas_int,
+        stride_y: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nhpr performs the matrix-vector operations:\n\nA := A + alpha*x*x**H\nwhere alpha is a real scalar, x is a vector, and A is an\nn by n Hermitian matrix, supplied in packed form.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\nspecifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'\n- rocblas_fill_upper: The upper triangular part of A is supplied in AP.\n- rocblas_fill_lower: The lower triangular part of A is supplied in AP.\n@param[in]\nn         [rocblas_int]\nthe number of rows and columns of matrix A. Must be at least 0.\n@param[in]\nalpha\ndevice pointer or host pointer to scalar alpha.\n@param[in]\nx         device pointer storing vector x.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of x.\n@param[inout]\nAP        device pointer storing the packed version of the specified triangular portion of\nthe Hermitian matrix A. Of at least size ((n * (n + 1)) / 2).\n\nif uplo == rocblas_fill_upper:\nThe upper triangular portion of the Hermitian matrix A is supplied.\nThe matrix is compacted so that AP contains the triangular portion\ncolumn-by-column\nso that:\nAP(0) = A(0,0)\nAP(1) = A(0,1)\nAP(2) = A(1,1), etc.\nEx: (rocblas_fill_upper; n = 3)\n(1, 0) (2, 1) (4,9)\n(2,-1) (3, 0) (5,3) ---> [(1,0),(2,1),(3,0),(4,9),(5,3),(6,0)]\n(4,-9) (5,-3) (6,0)\n\nif uplo == rocblas_fill_lower:\nThe lower triangular portion of the Hermitian matrix A is supplied.\nThe matrix is compacted so that AP contains the triangular portion\ncolumn-by-column\nso that:\nAP(0) = A(0,0)\nAP(1) = A(1,0)\nAP(2) = A(2,1), etc.\nEx: (rocblas_fill_lower; n = 3)\n(1, 0) (2, 1) (4,9)\n(2,-1) (3, 0) (5,3) ---> [(1,0),(2,-1),(4,-9),(3,0),(5,-3),(6,0)]\n(4,-9) (5,-3) (6,0)\nNote that the imaginary part of the diagonal elements are not accessed\nand are assumed to be 0."]
+    pub fn rocblas_chpr(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f32,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        AP: *mut rocblas_float_complex,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zhpr(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f64,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        AP: *mut rocblas_double_complex,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nhpr_batched performs the matrix-vector operations:\n\nA_i := A_i + alpha*x_i*x_i**H\nwhere alpha is a real scalar, x_i is a vector, and A_i is an\nn by n symmetric matrix, supplied in packed form, for i = 1, ..., batch_count.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\nspecifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'\n- rocblas_fill_upper: The upper triangular part of each A_i is supplied in AP.\n- rocblas_fill_lower: The lower triangular part of each A_i is supplied in AP.\n@param[in]\nn         [rocblas_int]\nthe number of rows and columns of each matrix A_i. Must be at least 0.\n@param[in]\nalpha\ndevice pointer or host pointer to scalar alpha.\n@param[in]\nx         device array of device pointers storing each vector x_i.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[inout]\nAP        device array of device pointers storing the packed version of the specified triangular portion of\neach Hermitian matrix A_i of at least size ((n * (n + 1)) / 2). Array is of at least size batch_count.\n\nif uplo == rocblas_fill_upper:\nThe upper triangular portion of each Hermitian matrix A_i is supplied.\nThe matrix is compacted so that AP contains the triangular portion\ncolumn-by-column\nso that:\nAP(0) = A(0,0)\nAP(1) = A(0,1)\nAP(2) = A(1,1), etc.\nEx: (rocblas_fill_upper; n = 3)\n(1, 0) (2, 1) (4,9)\n(2,-1) (3, 0) (5,3) ---> [(1,0),(2,1),(3,0),(4,9),(5,3),(6,0)]\n(4,-9) (5,-3) (6,0)\n\nif uplo == rocblas_fill_lower:\nThe lower triangular portion of each Hermitian matrix A_i is supplied.\nThe matrix is compacted so that AP contains the triangular portion\ncolumn-by-column\nso that:\nAP(0) = A(0,0)\nAP(1) = A(1,0)\nAP(2) = A(2,1), etc.\nEx: (rocblas_fill_lower; n = 3)\n(1, 0) (2, 1) (4,9)\n(2,-1) (3, 0) (5,3) ---> [(1,0),(2,-1),(4,-9),(3,0),(5,-3),(6,0)]\n(4,-9) (5,-3) (6,0)\nNote that the imaginary part of the diagonal elements are not accessed\nand are assumed to be 0.\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch."]
+    pub fn rocblas_chpr_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f32,
+        x: *const *const rocblas_float_complex,
+        incx: rocblas_int,
+        AP: *const *mut rocblas_float_complex,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zhpr_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f64,
+        x: *const *const rocblas_double_complex,
+        incx: rocblas_int,
+        AP: *const *mut rocblas_double_complex,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nhpr_strided_batched performs the matrix-vector operations:\n\nA_i := A_i + alpha*x_i*x_i**H\nwhere alpha is a real scalar, x_i is a vector, and A_i is an\nn by n symmetric matrix, supplied in packed form, for i = 1, ..., batch_count.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\nspecifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'\n- rocblas_fill_upper: The upper triangular part of each A_i is supplied in AP.\n- rocblas_fill_lower: The lower triangular part of each A_i is supplied in AP.\n@param[in]\nn         [rocblas_int]\nthe number of rows and columns of each matrix A_i. Must be at least 0.\n@param[in]\nalpha\ndevice pointer or host pointer to scalar alpha.\n@param[in]\nx         device pointer pointing to the first vector (x_1).\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\nstride_x  [rocblas_stride]\nstride from the start of one vector (x_i) and the next one (x_i+1).\n@param[inout]\nAP        device array of device pointers storing the packed version of the specified triangular portion of\neach Hermitian matrix A_i. Points to the first matrix (A_1).\n\nif uplo == rocblas_fill_upper:\nThe upper triangular portion of each Hermitian matrix A_i is supplied.\nThe matrix is compacted so that AP contains the triangular portion\ncolumn-by-column\nso that:\nAP(0) = A(0,0)\nAP(1) = A(0,1)\nAP(2) = A(1,1), etc.\nEx: (rocblas_fill_upper; n = 3)\n(1, 0) (2, 1) (4,9)\n(2,-1) (3, 0) (5,3) ---> [(1,0),(2,1),(3,0),(4,9),(5,3),(6,0)]\n(4,-9) (5,-3) (6,0)\n\nif uplo == rocblas_fill_lower:\nThe lower triangular portion of each Hermitian matrix A_i is supplied.\nThe matrix is compacted so that AP contains the triangular portion\ncolumn-by-column\nso that:\nAP(0) = A(0,0)\nAP(1) = A(1,0)\nAP(2) = A(2,1), etc.\nEx: (rocblas_fill_lower; n = 3)\n(1, 0) (2, 1) (4,9)\n(2,-1) (3, 0) (5,3) ---> [(1,0),(2,-1),(4,-9),(3,0),(5,-3),(6,0)]\n(4,-9) (5,-3) (6,0)\nNote that the imaginary part of the diagonal elements are not accessed\nand are assumed to be 0.\n@param[in]\nstride_A    [rocblas_stride]\nstride from the start of one (A_i) and the next (A_i+1).\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch."]
+    pub fn rocblas_chpr_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f32,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        AP: *mut rocblas_float_complex,
+        stride_A: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zhpr_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f64,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        AP: *mut rocblas_double_complex,
+        stride_A: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nhpr2 performs the matrix-vector operations:\n\nA := A + alpha*x*y**H + conj(alpha)*y*x**H\nwhere alpha is a complex scalar, x and y are vectors, and A is an\nn by n Hermitian matrix, supplied in packed form.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\nspecifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'\n- rocblas_fill_upper: The upper triangular part of A is supplied in AP.\n- rocblas_fill_lower: The lower triangular part of A is supplied in AP.\n@param[in]\nn         [rocblas_int]\nthe number of rows and columns of matrix A. Must be at least 0.\n@param[in]\nalpha\ndevice pointer or host pointer to scalar alpha.\n@param[in]\nx         device pointer storing vector x.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of x.\n@param[in]\ny         device pointer storing vector y.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of y.\n@param[inout]\nAP        device pointer storing the packed version of the specified triangular portion of\nthe Hermitian matrix A. Of at least size ((n * (n + 1)) / 2).\n\nif uplo == rocblas_fill_upper:\nThe upper triangular portion of the Hermitian matrix A is supplied.\nThe matrix is compacted so that AP contains the triangular portion\ncolumn-by-column\nso that:\nAP(0) = A(0,0)\nAP(1) = A(0,1)\nAP(2) = A(1,1), etc.\nEx: (rocblas_fill_upper; n = 3)\n(1, 0) (2, 1) (4,9)\n(2,-1) (3, 0) (5,3) ---> [(1,0),(2,1),(3,0),(4,9),(5,3),(6,0)]\n(4,-9) (5,-3) (6,0)\n\nif uplo == rocblas_fill_lower:\nThe lower triangular portion of the Hermitian matrix A is supplied.\nThe matrix is compacted so that AP contains the triangular portion\ncolumn-by-column\nso that:\nAP(0) = A(0,0)\nAP(1) = A(1,0)\nAP(2) = A(2,1), etc.\nEx: (rocblas_fill_lower; n = 3)\n(1, 0) (2, 1) (4,9)\n(2,-1) (3, 0) (5,3) ---> [(1,0),(2,-1),(4,-9),(3,0),(5,-3),(6,0)]\n(4,-9) (5,-3) (6,0)\nNote that the imaginary part of the diagonal elements are not accessed\nand are assumed to be 0."]
+    pub fn rocblas_chpr2(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        y: *const rocblas_float_complex,
+        incy: rocblas_int,
+        AP: *mut rocblas_float_complex,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zhpr2(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        y: *const rocblas_double_complex,
+        incy: rocblas_int,
+        AP: *mut rocblas_double_complex,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nhpr2_batched performs the matrix-vector operations:\n\nA_i := A_i + alpha*x_i*y_i**H + conj(alpha)*y_i*x_i**H\nwhere alpha is a complex scalar, x_i and y_i are vectors, and A_i is an\nn by n symmetric matrix, supplied in packed form, for i = 1, ..., batch_count.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\nspecifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'\n- rocblas_fill_upper: The upper triangular part of each A_i is supplied in AP.\n- rocblas_fill_lower: The lower triangular part of each A_i is supplied in AP.\n@param[in]\nn         [rocblas_int]\nthe number of rows and columns of each matrix A_i. Must be at least 0.\n@param[in]\nalpha\ndevice pointer or host pointer to scalar alpha.\n@param[in]\nx         device array of device pointers storing each vector x_i.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\ny         device array of device pointers storing each vector y_i.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of each y_i.\n@param[inout]\nAP        device array of device pointers storing the packed version of the specified triangular portion of\neach Hermitian matrix A_i of at least size ((n * (n + 1)) / 2). Array is of at least size batch_count.\n\nif uplo == rocblas_fill_upper:\nThe upper triangular portion of each Hermitian matrix A_i is supplied.\nThe matrix is compacted so that AP contains the triangular portion\ncolumn-by-column\nso that:\nAP(0) = A(0,0)\nAP(1) = A(0,1)\nAP(2) = A(1,1), etc.\nEx: (rocblas_fill_upper; n = 3)\n(1, 0) (2, 1) (4,9)\n(2,-1) (3, 0) (5,3) ---> [(1,0),(2,1),(3,0),(4,9),(5,3),(6,0)]\n(4,-9) (5,-3) (6,0)\n\nif uplo == rocblas_fill_lower:\nThe lower triangular portion of each Hermitian matrix A_i is supplied.\nThe matrix is compacted so that AP contains the triangular portion\ncolumn-by-column\nso that:\nAP(0) = A(0,0)\nAP(1) = A(1,0)\nAP(2) = A(2,1), etc.\nEx: (rocblas_fill_lower; n = 3)\n(1, 0) (2, 1) (4,9)\n(2,-1) (3, 0) (5,3) --> [(1,0),(2,-1),(4,-9),(3,0),(5,-3),(6,0)]\n(4,-9) (5,-3) (6,0)\nNote that the imaginary part of the diagonal elements are not accessed\nand are assumed to be 0.\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch."]
+    pub fn rocblas_chpr2_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        x: *const *const rocblas_float_complex,
+        incx: rocblas_int,
+        y: *const *const rocblas_float_complex,
+        incy: rocblas_int,
+        AP: *const *mut rocblas_float_complex,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zhpr2_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        x: *const *const rocblas_double_complex,
+        incx: rocblas_int,
+        y: *const *const rocblas_double_complex,
+        incy: rocblas_int,
+        AP: *const *mut rocblas_double_complex,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nhpr2_strided_batched performs the matrix-vector operations:\n\nA_i := A_i + alpha*x_i*y_i**H + conj(alpha)*y_i*x_i**H\nwhere alpha is a complex scalar, x_i and y_i are vectors, and A_i is an\nn by n symmetric matrix, supplied in packed form, for i = 1, ..., batch_count.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\nspecifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'\n- rocblas_fill_upper: The upper triangular part of each A_i is supplied in AP.\n- rocblas_fill_lower: The lower triangular part of each A_i is supplied in AP.\n@param[in]\nn         [rocblas_int]\nthe number of rows and columns of each matrix A_i. Must be at least 0.\n@param[in]\nalpha\ndevice pointer or host pointer to scalar alpha.\n@param[in]\nx         device pointer pointing to the first vector (x_1).\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\nstride_x  [rocblas_stride]\nstride from the start of one vector (x_i) and the next one (x_i+1).\n@param[in]\ny         device pointer pointing to the first vector (y_1).\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of each y_i.\n@param[in]\nstride_y  [rocblas_stride]\nstride from the start of one vector (y_i) and the next one (y_i+1).\n@param[inout]\nAP        device array of device pointers storing the packed version of the specified triangular portion of\neach Hermitian matrix A_i. Points to the first matrix (A_1).\n\nif uplo == rocblas_fill_upper:\nThe upper triangular portion of each Hermitian matrix A_i is supplied.\nThe matrix is compacted so that AP contains the triangular portion\ncolumn-by-column\nso that:\nAP(0) = A(0,0)\nAP(1) = A(0,1)\nAP(2) = A(1,1), etc.\nEx: (rocblas_fill_upper; n = 3)\n(1, 0) (2, 1) (4,9)\n(2,-1) (3, 0) (5,3) ---> [(1,0),(2,1),(3,0),(4,9),(5,3),(6,0)]\n(4,-9) (5,-3) (6,0)\n\nif uplo == rocblas_fill_lower:\nThe lower triangular portion of each Hermitian matrix A_i is supplied.\nThe matrix is compacted so that AP contains the triangular portion\ncolumn-by-column\nso that:\nAP(0) = A(0,0)\nAP(1) = A(1,0)\nAP(2) = A(2,1), etc.\nEx: (rocblas_fill_lower; n = 3)\n(1, 0) (2, 1) (4,9)\n(2,-1) (3, 0) (5,3) ---> [(1,0),(2,-1),(4,-9),(3,0),(5,-3),(6,0)]\n(4,-9) (5,-3) (6,0)\nNote that the imaginary part of the diagonal elements are not accessed\nand are assumed to be 0.\n@param[in]\nstride_A    [rocblas_stride]\nstride from the start of one (A_i) and the next (A_i+1).\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch."]
+    pub fn rocblas_chpr2_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        y: *const rocblas_float_complex,
+        incy: rocblas_int,
+        stride_y: rocblas_stride,
+        AP: *mut rocblas_float_complex,
+        stride_A: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zhpr2_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        y: *const rocblas_double_complex,
+        incy: rocblas_int,
+        stride_y: rocblas_stride,
+        AP: *mut rocblas_double_complex,
+        stride_A: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\ntrmv performs one of the matrix-vector operations:\n\nx = A*x or\nx = A**T*x,\nwhere x is an n element vector and A is an n by n unit, or non-unit, upper or lower triangular matrix.\nThe vector x is overwritten.\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  A is an upper triangular matrix.\n- rocblas_fill_lower:  A is a  lower triangular matrix.\n\n@param[in]\ntransA     [rocblas_operation]\n\n@param[in]\ndiag    [rocblas_diagonal]\n- rocblas_diagonal_unit:     A is assumed to be unit triangular.\n- rocblas_diagonal_non_unit:  A is not assumed to be unit triangular.\n\n@param[in]\nm         [rocblas_int]\nm specifies the number of rows of A. m >= 0.\n\n@param[in]\nA         device pointer storing matrix A,\nof dimension ( lda, m ).\n\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of A.\nlda = max( 1, m ).\n\n@param[in]\nx         device pointer storing vector x.\n\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of x.\n"]
+    pub fn rocblas_strmv(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        A: *const f32,
+        lda: rocblas_int,
+        x: *mut f32,
+        incx: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dtrmv(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        A: *const f64,
+        lda: rocblas_int,
+        x: *mut f64,
+        incx: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ctrmv(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        x: *mut rocblas_float_complex,
+        incx: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ztrmv(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        x: *mut rocblas_double_complex,
+        incx: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\ntrmv_batched performs one of the matrix-vector operations:\n\nx_i = A_i*x_i or\nx_i = A**T*x_i, 0 < i < batch_count\nwhere x_i is an n element vector and A_i is an n by n (unit, or non-unit, upper or lower triangular matrix)\nThe vectors x_i are overwritten.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  A_i is an upper triangular matrix.\n- rocblas_fill_lower:  A_i is a  lower triangular matrix.\n\n@param[in]\ntransA     [rocblas_operation]\n\n@param[in]\ndiag    [rocblas_diagonal]\n- rocblas_diagonal_unit:     A_i is assumed to be unit triangular.\n- rocblas_diagonal_non_unit:  A_i is not assumed to be unit triangular.\n\n@param[in]\nm         [rocblas_int]\nm specifies the number of rows of matrices A_i. m >= 0.\n\n@param[in]\nA         device pointer storing pointer of matrices A_i,\nof dimension ( lda, m )\n\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of A_i.\nlda >= max( 1, m ).\n\n@param[in]\nx         device pointer storing vectors x_i.\n\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of vectors x_i.\n\n@param[in]\nbatch_count [rocblas_int]\nThe number of batched matrices/vectors.\n\n"]
+    pub fn rocblas_strmv_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        A: *const *const f32,
+        lda: rocblas_int,
+        x: *const *mut f32,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dtrmv_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        A: *const *const f64,
+        lda: rocblas_int,
+        x: *const *mut f64,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ctrmv_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        A: *const *const rocblas_float_complex,
+        lda: rocblas_int,
+        x: *const *mut rocblas_float_complex,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ztrmv_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        A: *const *const rocblas_double_complex,
+        lda: rocblas_int,
+        x: *const *mut rocblas_double_complex,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\ntrmv_strided_batched performs one of the matrix-vector operations:\n\nx_i = A_i*x_i or\nx_i = A**T*x_i, 0 < i < batch_count\nwhere x_i is an n element vector and A_i is an n by n (unit, or non-unit, upper or lower triangular matrix)\nwith strides specifying how to retrieve $x_i$ (resp. $A_i$) from $x_{i-1}$ (resp. $A_i$).\n\nThe vectors x_i are overwritten.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  A_i is an upper triangular matrix.\n- rocblas_fill_lower:  A_i is a  lower triangular matrix.\n\n@param[in]\ntransA     [rocblas_operation]\n\n@param[in]\ndiag    [rocblas_diagonal]\n- rocblas_diagonal_unit:     A_i is assumed to be unit triangular.\n- rocblas_diagonal_non_unit:  A_i is not assumed to be unit triangular.\n\n@param[in]\nm         [rocblas_int]\nm specifies the number of rows of matrices A_i. m >= 0.\n\n@param[in]\nA         device pointer of the matrix A_0,\nof dimension ( lda, m ).\n\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of A_i.\nlda >= max( 1, m ).\n\n@param[in]\nstride_A  [rocblas_stride]\nstride from the start of one A_i matrix to the next A_{i + 1}.\n\n@param[in]\nx         device pointer storing the vector x_0.\n\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of one vector x.\n\n@param[in]\nstride_x  [rocblas_stride]\nstride from the start of one x_i vector to the next x_{i + 1}.\n\n@param[in]\nbatch_count [rocblas_int]\nThe number of batched matrices/vectors.\n\n"]
+    pub fn rocblas_strmv_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        A: *const f32,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        x: *mut f32,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dtrmv_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        A: *const f64,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        x: *mut f64,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ctrmv_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        x: *mut rocblas_float_complex,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ztrmv_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        x: *mut rocblas_double_complex,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\ntpmv performs one of the matrix-vector operations:\n\nx = A*x or\nx = A**T*x,\nwhere x is an n element vector and A is an n by n unit, or non-unit,\nupper or lower triangular matrix, supplied in the pack form.\nThe vector x is overwritten.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  A is an upper triangular matrix.\n- rocblas_fill_lower:  A is a  lower triangular matrix.\n\n@param[in]\ntransA     [rocblas_operation]\n\n@param[in]\ndiag    [rocblas_diagonal]\n- rocblas_diagonal_unit:     A is assumed to be unit triangular.\n- rocblas_diagonal_non_unit:  A is not assumed to be unit triangular.\n\n@param[in]\nm       [rocblas_int]\nm specifies the number of rows of A. m >= 0.\n\n@param[in]\nA       device pointer storing matrix A,\nof dimension at leat ( m * ( m + 1 ) / 2 ).\n- Before entry with uplo = rocblas_fill_upper, the array A\nmust contain the upper triangular matrix packed sequentially,\ncolumn by column, so that\nA[0] contains a_{0,0}, A[1] and A[2] contain\na_{0,1} and a_{1, 1}, respectively, and so on.\n\n- Before entry with uplo = rocblas_fill_lower, the array A\nmust contain the lower triangular matrix packed sequentially,\ncolumn by column, so that\nA[0] contains a_{0,0}, A[1] and A[2] contain\na_{1,0} and a_{2,0}, respectively, and so on.\n\nNote that when DIAG = rocblas_diagonal_unit, the diagonal elements of A are\nnot referenced, but are assumed to be unity.\n\n@param[in]\nx       device pointer storing vector x.\n\n@param[in]\nincx    [rocblas_int]\nspecifies the increment for the elements of x. incx must not be zero.\n"]
+    pub fn rocblas_stpmv(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        A: *const f32,
+        x: *mut f32,
+        incx: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dtpmv(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        A: *const f64,
+        x: *mut f64,
+        incx: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ctpmv(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        A: *const rocblas_float_complex,
+        x: *mut rocblas_float_complex,
+        incx: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ztpmv(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        A: *const rocblas_double_complex,
+        x: *mut rocblas_double_complex,
+        incx: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\ntpmv_batched performs one of the matrix-vector operations:\n\nx_i = A_i*x_i or\nx_i = A**T*x_i, 0 < i < batch_count\nwhere x_i is an n element vector and A_i is an n by n (unit, or non-unit, upper or lower triangular matrix)\nThe vectors x_i are overwritten.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  A_i is an upper triangular matrix.\n- rocblas_fill_lower:  A_i is a  lower triangular matrix.\n\n@param[in]\ntransA     [rocblas_operation]\n\n@param[in]\ndiag    [rocblas_diagonal]\n- rocblas_diagonal_unit:     A_i is assumed to be unit triangular.\n- rocblas_diagonal_non_unit:  A_i is not assumed to be unit triangular.\n\n@param[in]\nm         [rocblas_int]\nm specifies the number of rows of matrices A_i. m >= 0.\n\n@param[in]\nA         device pointer storing pointer of matrices A_i,\nof dimension ( lda, m ).\n\n@param[in]\nx         device pointer storing vectors x_i.\n\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of vectors x_i.\n\n@param[in]\nbatch_count [rocblas_int]\nThe number of batched matrices/vectors.\n\n"]
+    pub fn rocblas_stpmv_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        A: *const *const f32,
+        x: *const *mut f32,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dtpmv_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        A: *const *const f64,
+        x: *const *mut f64,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ctpmv_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        A: *const *const rocblas_float_complex,
+        x: *const *mut rocblas_float_complex,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ztpmv_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        A: *const *const rocblas_double_complex,
+        x: *const *mut rocblas_double_complex,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\ntpmv_strided_batched performs one of the matrix-vector operations:\n\nx_i = A_i*x_i or\nx_i = A**T*x_i, 0 < i < batch_count\nwhere x_i is an n element vector and A_i is an n by n (unit, or non-unit, upper or lower triangular matrix)\nwith strides specifying how to retrieve $x_i$ (resp. $A_i$) from $x_{i-1}$ (resp. $A_i$).\nThe vectors x_i are overwritten.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  A_i is an upper triangular matrix.\n- rocblas_fill_lower:  A_i is a  lower triangular matrix.\n\n@param[in]\ntransA     [rocblas_operation]\n\n@param[in]\ndiag    [rocblas_diagonal]\n- rocblas_diagonal_unit:     A_i is assumed to be unit triangular.\n- rocblas_diagonal_non_unit:  A_i is not assumed to be unit triangular.\n\n@param[in]\nm         [rocblas_int]\nm specifies the number of rows of matrices A_i. m >= 0.\n\n@param[in]\nA         device pointer of the matrix A_0,\nof dimension ( lda, m )\n\n@param[in]\nstride_A  [rocblas_stride]\nstride from the start of one A_i matrix to the next A_{i + 1}.\n\n@param[in]\nx         device pointer storing the vector x_0.\n\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of one vector x.\n\n@param[in]\nstride_x  [rocblas_stride]\nstride from the start of one x_i vector to the next x_{i + 1}.\n\n@param[in]\nbatch_count [rocblas_int]\nThe number of batched matrices/vectors.\n\n"]
+    pub fn rocblas_stpmv_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        A: *const f32,
+        stride_A: rocblas_stride,
+        x: *mut f32,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dtpmv_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        A: *const f64,
+        stride_A: rocblas_stride,
+        x: *mut f64,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ctpmv_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        A: *const rocblas_float_complex,
+        stride_A: rocblas_stride,
+        x: *mut rocblas_float_complex,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ztpmv_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        A: *const rocblas_double_complex,
+        stride_A: rocblas_stride,
+        x: *mut rocblas_double_complex,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\ntbmv performs one of the matrix-vector operations:\n\nx := A*x      or\nx := A**T*x   or\nx := A**H*x,\nx is a vectors and A is a banded m by m matrix (see description below).\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\n- rocblas_fill_upper: A is an upper banded triangular matrix.\n- rocblas_fill_lower: A is a  lower banded triangular matrix.\n@param[in]\ntrans     [rocblas_operation]\nindicates whether matrix A is tranposed (conjugated) or not.\n@param[in]\ndiag      [rocblas_diagonal]\n- rocblas_diagonal_unit: The main diagonal of A is assumed to consist of only\n1's and is not referenced.\n- rocblas_diagonal_non_unit: No assumptions are made of A's main diagonal.\n@param[in]\nm         [rocblas_int]\nthe number of rows and columns of the matrix represented by A.\n@param[in]\nk         [rocblas_int]\n\nif uplo == rocblas_fill_upper, k specifies the number of super-diagonals\nof the matrix A.\n\nif uplo == rocblas_fill_lower, k specifies the number of sub-diagonals\nof the matrix A.\nk must satisfy k > 0 && k < lda.\n@param[in]\nA         device pointer storing banded triangular matrix A.\n\nif uplo == rocblas_fill_upper:\nThe matrix represented is an upper banded triangular matrix\nwith the main diagonal and k super-diagonals, everything\nelse can be assumed to be 0.\nThe matrix is compacted so that the main diagonal resides on the k'th\nrow, the first super diagonal resides on the RHS of the k-1'th row, etc,\nwith the k'th diagonal on the RHS of the 0'th row.\nEx: (rocblas_fill_upper; m = 5; k = 2)\n1 6 9 0 0              0 0 9 8 7\n0 2 7 8 0              0 6 7 8 9\n0 0 3 8 7     ---->    1 2 3 4 5\n0 0 0 4 9              0 0 0 0 0\n0 0 0 0 5              0 0 0 0 0\n\nif uplo == rocblas_fill_lower:\nThe matrix represnted is a lower banded triangular matrix\nwith the main diagonal and k sub-diagonals, everything else can be\nassumed to be 0.\nThe matrix is compacted so that the main diagonal resides on the 0'th row,\nworking up to the k'th diagonal residing on the LHS of the k'th row.\nEx: (rocblas_fill_lower; m = 5; k = 2)\n1 0 0 0 0              1 2 3 4 5\n6 2 0 0 0              6 7 8 9 0\n9 7 3 0 0     ---->    9 8 7 0 0\n0 8 8 4 0              0 0 0 0 0\n0 0 7 9 5              0 0 0 0 0\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of A. lda must satisfy lda > k.\n@param[inout]\nx         device pointer storing vector x.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of x.\n"]
+    pub fn rocblas_stbmv(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        k: rocblas_int,
+        A: *const f32,
+        lda: rocblas_int,
+        x: *mut f32,
+        incx: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dtbmv(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        k: rocblas_int,
+        A: *const f64,
+        lda: rocblas_int,
+        x: *mut f64,
+        incx: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ctbmv(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        k: rocblas_int,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        x: *mut rocblas_float_complex,
+        incx: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ztbmv(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        k: rocblas_int,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        x: *mut rocblas_double_complex,
+        incx: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\ntbmv_batched performs one of the matrix-vector operations:\n\nx_i := A_i*x_i      or\nx_i := A_i**T*x_i   or\nx_i := A_i**H*x_i,\nwhere (A_i, x_i) is the i-th instance of the batch.\nx_i is a vector and A_i is an m by m matrix, for i = 1, ..., batch_count.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\n- rocblas_fill_upper: each A_i is an upper banded triangular matrix.\n- rocblas_fill_lower: each A_i is a  lower banded triangular matrix.\n@param[in]\ntrans     [rocblas_operation]\nindicates whether each matrix A_i is tranposed (conjugated) or not.\n@param[in]\ndiag      [rocblas_diagonal]\n- rocblas_diagonal_unit: The main diagonal of each A_i is assumed to consist of only\n1's and is not referenced.\n- rocblas_diagonal_non_unit: No assumptions are made of each A_i's main diagonal.\n@param[in]\nm         [rocblas_int]\nthe number of rows and columns of the matrix represented by each A_i.\n@param[in]\nk         [rocblas_int]\n\nif uplo == rocblas_fill_upper, k specifies the number of super-diagonals\nof each matrix A_i.\n\nif uplo == rocblas_fill_lower, k specifies the number of sub-diagonals\nof each matrix A_i.\nk must satisfy k > 0 && k < lda.\n@param[in]\nA         device array of device pointers storing each banded triangular matrix A_i.\n\nif uplo == rocblas_fill_upper:\nThe matrix represented is an upper banded triangular matrix\nwith the main diagonal and k super-diagonals, everything\nelse can be assumed to be 0.\nThe matrix is compacted so that the main diagonal resides on the k'th\nrow, the first super diagonal resides on the RHS of the k-1'th row, etc,\nwith the k'th diagonal on the RHS of the 0'th row.\nEx: (rocblas_fill_upper; m = 5; k = 2)\n1 6 9 0 0              0 0 9 8 7\n0 2 7 8 0              0 6 7 8 9\n0 0 3 8 7     ---->    1 2 3 4 5\n0 0 0 4 9              0 0 0 0 0\n0 0 0 0 5              0 0 0 0 0\n\nif uplo == rocblas_fill_lower:\nThe matrix represnted is a lower banded triangular matrix\nwith the main diagonal and k sub-diagonals, everything else can be\nassumed to be 0.\nThe matrix is compacted so that the main diagonal resides on the 0'th row,\nworking up to the k'th diagonal residing on the LHS of the k'th row.\nEx: (rocblas_fill_lower; m = 5; k = 2)\n1 0 0 0 0              1 2 3 4 5\n6 2 0 0 0              6 7 8 9 0\n9 7 3 0 0     ---->    9 8 7 0 0\n0 8 8 4 0              0 0 0 0 0\n0 0 7 9 5              0 0 0 0 0\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of each A_i. lda must satisfy lda > k.\n@param[inout]\nx         device array of device pointer storing each vector x_i.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
+    pub fn rocblas_stbmv_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        k: rocblas_int,
+        A: *const *const f32,
+        lda: rocblas_int,
+        x: *const *mut f32,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dtbmv_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        k: rocblas_int,
+        A: *const *const f64,
+        lda: rocblas_int,
+        x: *const *mut f64,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ctbmv_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        k: rocblas_int,
+        A: *const *const rocblas_float_complex,
+        lda: rocblas_int,
+        x: *const *mut rocblas_float_complex,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ztbmv_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        k: rocblas_int,
+        A: *const *const rocblas_double_complex,
+        lda: rocblas_int,
+        x: *const *mut rocblas_double_complex,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\ntbmv_strided_batched performs one of the matrix-vector operations:\n\nx_i := A_i*x_i      or\nx_i := A_i**T*x_i   or\nx_i := A_i**H*x_i,\nwhere (A_i, x_i) is the i-th instance of the batch.\nx_i is a vector and A_i is an m by m matrix, for i = 1, ..., batch_count.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\n- rocblas_fill_upper: each A_i is an upper banded triangular matrix.\n- rocblas_fill_lower: each A_i is a  lower banded triangular matrix.\n@param[in]\ntrans     [rocblas_operation]\nindicates whether each matrix A_i is tranposed (conjugated) or not.\n@param[in]\ndiag      [rocblas_diagonal]\n- rocblas_diagonal_unit: The main diagonal of each A_i is assumed to consist of only\n1's and is not referenced.\n- rocblas_diagonal_non_unit: No assumptions are made of each A_i's main diagonal.\n@param[in]\nm         [rocblas_int]\nthe number of rows and columns of the matrix represented by each A_i.\n@param[in]\nk         [rocblas_int]\n\nif uplo == rocblas_fill_upper, k specifies the number of super-diagonals\nof each matrix A_i.\n\nif uplo == rocblas_fill_lower, k specifies the number of sub-diagonals\nof each matrix A_i.\nk must satisfy k > 0 && k < lda.\n@param[in]\nA         device array to the first matrix A_i of the batch. Stores each banded triangular matrix A_i.\n\nif uplo == rocblas_fill_upper:\nThe matrix represented is an upper banded triangular matrix\nwith the main diagonal and k super-diagonals, everything\nelse can be assumed to be 0.\nThe matrix is compacted so that the main diagonal resides on the k'th\nrow, the first super diagonal resides on the RHS of the k-1'th row, etc,\nwith the k'th diagonal on the RHS of the 0'th row.\nEx: (rocblas_fill_upper; m = 5; k = 2)\n1 6 9 0 0              0 0 9 8 7\n0 2 7 8 0              0 6 7 8 9\n0 0 3 8 7     ---->    1 2 3 4 5\n0 0 0 4 9              0 0 0 0 0\n0 0 0 0 5              0 0 0 0 0\n\nif uplo == rocblas_fill_lower:\nThe matrix represnted is a lower banded triangular matrix\nwith the main diagonal and k sub-diagonals, everything else can be\nassumed to be 0.\nThe matrix is compacted so that the main diagonal resides on the 0'th row,\nworking up to the k'th diagonal residing on the LHS of the k'th row.\nEx: (rocblas_fill_lower; m = 5; k = 2)\n1 0 0 0 0              1 2 3 4 5\n6 2 0 0 0              6 7 8 9 0\n9 7 3 0 0     ---->    9 8 7 0 0\n0 8 8 4 0              0 0 0 0 0\n0 0 7 9 5              0 0 0 0 0\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of each A_i. lda must satisfy lda > k.\n@param[in]\nstride_A  [rocblas_stride]\nstride from the start of one A_i matrix to the next A_(i + 1).\n@param[inout]\nx         device array to the first vector x_i of the batch.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\nstride_x  [rocblas_stride]\nstride from the start of one x_i matrix to the next x_(i + 1).\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
+    pub fn rocblas_stbmv_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        k: rocblas_int,
+        A: *const f32,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        x: *mut f32,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dtbmv_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        k: rocblas_int,
+        A: *const f64,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        x: *mut f64,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ctbmv_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        k: rocblas_int,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        x: *mut rocblas_float_complex,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ztbmv_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        k: rocblas_int,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        x: *mut rocblas_double_complex,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\ntbsv solves:\n\nA*x = b or\nA**T*x = b or\nA**H*x = b\nwhere x and b are vectors and A is a banded triangular matrix.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  A is an upper triangular matrix.\n- rocblas_fill_lower:  A is a  lower triangular matrix.\n\n@param[in]\ntransA     [rocblas_operation]\n- rocblas_operation_none: Solves A*x = b\n- rocblas_operation_transpose: Solves A**T*x = b\n- rocblas_operation_conjugate_transpose: Solves A**H*x = b\n\n@param[in]\ndiag    [rocblas_diagonal]\n- rocblas_diagonal_unit: A is assumed to be unit triangular (i.e. the diagonal elements\nof A are not used in computations).\n- rocblas_diagonal_non_unit: A is not assumed to be unit triangular.\n\n@param[in]\nn         [rocblas_int]\nn specifies the number of rows of b. n >= 0.\n@param[in]\nk         [rocblas_int]\n\nif(uplo == rocblas_fill_upper)\nk specifies the number of super-diagonals of A.\nif(uplo == rocblas_fill_lower)\nk specifies the number of sub-diagonals of A.\nk >= 0.\n\n@param[in]\nA         device pointer storing the matrix A in banded format.\n\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of A.\nlda >= (k + 1).\n\n@param[inout]\nx         device pointer storing input vector b. Overwritten by the output vector x.\n\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of x.\n"]
+    pub fn rocblas_stbsv(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        k: rocblas_int,
+        A: *const f32,
+        lda: rocblas_int,
+        x: *mut f32,
+        incx: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dtbsv(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        k: rocblas_int,
+        A: *const f64,
+        lda: rocblas_int,
+        x: *mut f64,
+        incx: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ctbsv(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        k: rocblas_int,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        x: *mut rocblas_float_complex,
+        incx: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ztbsv(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        k: rocblas_int,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        x: *mut rocblas_double_complex,
+        incx: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\ntbsv_batched solves:\n\nA_i*x_i = b_i or\nA_i**T*x_i = b_i or\nA_i**H*x_i = b_i\nwhere x_i and b_i are vectors and A_i is a banded triangular matrix,\nfor i = [1, batch_count].\n\nThe input vectors b_i are overwritten by the output vectors x_i.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  A_i is an upper triangular matrix.\n- rocblas_fill_lower:  A_i is a  lower triangular matrix.\n\n@param[in]\ntransA     [rocblas_operation]\n- rocblas_operation_none: Solves A_i*x_i = b_i\n- rocblas_operation_transpose: Solves A_i**T*x_i = b_i\n- rocblas_operation_conjugate_transpose: Solves A_i**H*x_i = b_i\n\n@param[in]\ndiag    [rocblas_diagonal]\n- rocblas_diagonal_unit:     each A_i is assumed to be unit triangular (i.e. the diagonal elements\nof each A_i are not used in computations).\n- rocblas_diagonal_non_unit: each A_i is not assumed to be unit triangular.\n\n@param[in]\nn         [rocblas_int]\nn specifies the number of rows of each b_i. n >= 0.\n@param[in]\nk         [rocblas_int]\n\nif(uplo == rocblas_fill_upper)\nk specifies the number of super-diagonals of each A_i.\nif(uplo == rocblas_fill_lower)\nk specifies the number of sub-diagonals of each A_i.\nk >= 0.\n\n@param[in]\nA         device vector of device pointers storing each matrix A_i in banded format.\n\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of each A_i.\nlda >= (k + 1).\n\n@param[inout]\nx         device vector of device pointers storing each input vector b_i. Overwritten by each output\nvector x_i.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
+    pub fn rocblas_stbsv_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        k: rocblas_int,
+        A: *const *const f32,
+        lda: rocblas_int,
+        x: *const *mut f32,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dtbsv_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        k: rocblas_int,
+        A: *const *const f64,
+        lda: rocblas_int,
+        x: *const *mut f64,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ctbsv_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        k: rocblas_int,
+        A: *const *const rocblas_float_complex,
+        lda: rocblas_int,
+        x: *const *mut rocblas_float_complex,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ztbsv_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        k: rocblas_int,
+        A: *const *const rocblas_double_complex,
+        lda: rocblas_int,
+        x: *const *mut rocblas_double_complex,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\ntbsv_strided_batched solves:\n\nA_i*x_i = b_i or\nA_i**T*x_i = b_i or\nA_i**H*x_i = b_i\nwhere x_i and b_i are vectors and A_i is a banded triangular matrix,\nfor i = [1, batch_count].\n\nThe input vectors b_i are overwritten by the output vectors x_i.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  A_i is an upper triangular matrix.\n- rocblas_fill_lower:  A_i is a  lower triangular matrix.\n\n@param[in]\ntransA     [rocblas_operation]\n- rocblas_operation_none: Solves A_i*x_i = b_i\n- rocblas_operation_transpose: Solves A_i**T*x_i = b_i\n- rocblas_operation_conjugate_transpose: Solves A_i**H*x_i = b_i\n\n@param[in]\ndiag    [rocblas_diagonal]\n- rocblas_diagonal_unit:     each A_i is assumed to be unit triangular (i.e. the diagonal elements\nof each A_i are not used in computations).\n- rocblas_diagonal_non_unit: each A_i is not assumed to be unit triangular.\n\n@param[in]\nn         [rocblas_int]\nn specifies the number of rows of each b_i. n >= 0.\n@param[in]\nk         [rocblas_int]\n\nif(uplo == rocblas_fill_upper)\nk specifies the number of super-diagonals of each A_i.\nif(uplo == rocblas_fill_lower)\nk specifies the number of sub-diagonals of each A_i.\nk >= 0.\n\n@param[in]\nA         device pointer pointing to the first banded matrix A_1.\n\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of each A_i.\nlda >= (k + 1).\n@param[in]\nstride_A  [rocblas_stride]\nspecifies the distance between the start of one matrix (A_i) and the next (A_i+1).\n\n@param[inout]\nx         device pointer pointing to the first input vector b_1. Overwritten by output vectors x.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\nstride_x  [rocblas_stride]\nspecifies the distance between the start of one vector (x_i) and the next (x_i+1).\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
+    pub fn rocblas_stbsv_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        k: rocblas_int,
+        A: *const f32,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        x: *mut f32,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dtbsv_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        k: rocblas_int,
+        A: *const f64,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        x: *mut f64,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ctbsv_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        k: rocblas_int,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        x: *mut rocblas_float_complex,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ztbsv_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        k: rocblas_int,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        x: *mut rocblas_double_complex,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\ntrsv solves:\n\nA*x = b or\nA**T*x = b\nwhere x and b are vectors and A is a triangular matrix.\nThe vector x is overwritten on b.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  A is an upper triangular matrix.\n- rocblas_fill_lower:  A is a  lower triangular matrix.\n\n@param[in]\ntransA     [rocblas_operation]\n\n@param[in]\ndiag    [rocblas_diagonal]\n- rocblas_diagonal_unit:     A is assumed to be unit triangular.\n- rocblas_diagonal_non_unit:  A is not assumed to be unit triangular.\n\n@param[in]\nm         [rocblas_int]\nm specifies the number of rows of b. m >= 0.\n\n@param[in]\nA         device pointer storing matrix A,\nof dimension ( lda, m )\n\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of A.\nlda = max( 1, m ).\n\n@param[in]\nx         device pointer storing vector x.\n\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of x.\n"]
+    pub fn rocblas_strsv(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        A: *const f32,
+        lda: rocblas_int,
+        x: *mut f32,
+        incx: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dtrsv(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        A: *const f64,
+        lda: rocblas_int,
+        x: *mut f64,
+        incx: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ctrsv(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        x: *mut rocblas_float_complex,
+        incx: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ztrsv(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        x: *mut rocblas_double_complex,
+        incx: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\ntrsv_batched solves:\n\nA_i*x_i = b_i or\nA_i**T*x_i = b_i\nwhere (A_i, x_i, b_i) is the i-th instance of the batch.\nx_i and b_i are vectors and A_i is an\nm by m triangular matrix.\n\nThe vector x is overwritten on b.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  A is an upper triangular matrix.\n- rocblas_fill_lower:  A is a  lower triangular matrix.\n\n@param[in]\ntransA     [rocblas_operation]\n\n@param[in]\ndiag    [rocblas_diagonal]\n- rocblas_diagonal_unit:     A is assumed to be unit triangular.\n- rocblas_diagonal_non_unit:  A is not assumed to be unit triangular.\n\n@param[in]\nm         [rocblas_int]\nm specifies the number of rows of b. m >= 0.\n\n@param[in]\nA         device array of device pointers storing each matrix A_i.\n\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of each A_i.\nlda = max(1, m)\n\n@param[in]\nx         device array of device pointers storing each vector x_i.\n\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of x.\n\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
+    pub fn rocblas_strsv_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        A: *const *const f32,
+        lda: rocblas_int,
+        x: *const *mut f32,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dtrsv_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        A: *const *const f64,
+        lda: rocblas_int,
+        x: *const *mut f64,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ctrsv_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        A: *const *const rocblas_float_complex,
+        lda: rocblas_int,
+        x: *const *mut rocblas_float_complex,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ztrsv_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        A: *const *const rocblas_double_complex,
+        lda: rocblas_int,
+        x: *const *mut rocblas_double_complex,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\ntrsv_strided_batched solves:\n\nA_i*x_i = b_i or\nA_i**T*x_i = b_i\nwhere (A_i, x_i, b_i) is the i-th instance of the batch.\nx_i and b_i are vectors and A_i is an m by m triangular matrix, for i = 1, ..., batch_count.\n\nThe vector x is overwritten on b.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  A is an upper triangular matrix.\n- rocblas_fill_lower:  A is a  lower triangular matrix.\n\n@param[in]\ntransA     [rocblas_operation]\n\n@param[in]\ndiag    [rocblas_diagonal]\n- rocblas_diagonal_unit:     A is assumed to be unit triangular.\n- rocblas_diagonal_non_unit:  A is not assumed to be unit triangular.\n\n@param[in]\nm         [rocblas_int]\nm specifies the number of rows of each b_i. m >= 0.\n\n@param[in]\nA         device pointer to the first matrix (A_1) in the batch, of dimension ( lda, m ).\n\n@param[in]\nstride_A  [rocblas_stride]\nstride from the start of one A_i matrix to the next A_(i + 1).\n\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of each A_i.\nlda = max( 1, m ).\n\n@param[in, out]\nx         device pointer to the first vector (x_1) in the batch.\n\n@param[in]\nstride_x [rocblas_stride]\nstride from the start of one x_i vector to the next x_(i + 1)\n\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
+    pub fn rocblas_strsv_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        A: *const f32,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        x: *mut f32,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dtrsv_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        A: *const f64,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        x: *mut f64,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ctrsv_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        x: *mut rocblas_float_complex,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ztrsv_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        x: *mut rocblas_double_complex,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\ntpsv solves:\n\nA*x = b or\nA**T*x = b or\nA**H*x = b\nwhere x and b are vectors and A is a triangular matrix stored in the packed format.\n\nThe input vector b is overwritten by the output vector x.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  A is an upper triangular matrix.\n- rocblas_fill_lower:  A is a  lower triangular matrix.\n\n@param[in]\ntransA  [rocblas_operation]\n- rocblas_operation_none: Solves A*x = b\n- rocblas_operation_transpose: Solves A**T*x = b\n- rocblas_operation_conjugate_transpose: Solves A**H*x = b\n\n@param[in]\ndiag    [rocblas_diagonal]\n- rocblas_diagonal_unit:  A is assumed to be unit triangular (i.e. the diagonal elements\nof A are not used in computations).\n- rocblas_diagonal_non_unit: A is not assumed to be unit triangular.\n\n@param[in]\nn         [rocblas_int]\nn specifies the number of rows of b. n >= 0.\n\n@param[in]\nAP        device pointer storing the packed version of matrix A,\nof dimension >= (n * (n + 1) / 2).\n\n@param[inout]\nx         device pointer storing vector b on input, overwritten by x on output.\n\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of x.\n"]
+    pub fn rocblas_stpsv(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        AP: *const f32,
+        x: *mut f32,
+        incx: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dtpsv(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        AP: *const f64,
+        x: *mut f64,
+        incx: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ctpsv(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        AP: *const rocblas_float_complex,
+        x: *mut rocblas_float_complex,
+        incx: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ztpsv(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        AP: *const rocblas_double_complex,
+        x: *mut rocblas_double_complex,
+        incx: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\ntpsv_batched solves:\n\nA_i*x_i = b_i or\nA_i**T*x_i = b_i or\nA_i**H*x_i = b_i\nwhere x_i and b_i are vectors and A_i is a triangular matrix stored in the packed format,\nfor i in [1, batch_count].\n\nThe input vectors b_i are overwritten by the output vectors x_i.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  each A_i is an upper triangular matrix.\n- rocblas_fill_lower:  each A_i is a  lower triangular matrix.\n\n@param[in]\ntransA  [rocblas_operation]\n- rocblas_operation_none: Solves A*x = b\n- rocblas_operation_transpose: Solves A**T*x = b\n- rocblas_operation_conjugate_transpose: Solves A**H*x = b\n\n@param[in]\ndiag    [rocblas_diagonal]\n- rocblas_diagonal_unit: Each A_i is assumed to be unit triangular (i.e. the diagonal elements\nof each A_i are not used in computations).\n- rocblas_diagonal_non_unit: each A_i is not assumed to be unit triangular.\n\n@param[in]\nn         [rocblas_int]\nn specifies the number of rows of each b_i. n >= 0.\n\n@param[in]\nAP        device array of device pointers storing the packed versions of each matrix A_i,\nof dimension >= (n * (n + 1) / 2).\n\n@param[inout]\nx         device array of device pointers storing each input vector b_i, overwritten by x_i on output.\n\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\nbatch_count [rocblas_int]\nspecifies the number of instances in the batch.\n"]
+    pub fn rocblas_stpsv_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        AP: *const *const f32,
+        x: *const *mut f32,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dtpsv_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        AP: *const *const f64,
+        x: *const *mut f64,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ctpsv_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        AP: *const *const rocblas_float_complex,
+        x: *const *mut rocblas_float_complex,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ztpsv_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        AP: *const *const rocblas_double_complex,
+        x: *const *mut rocblas_double_complex,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\ntpsv_strided_batched solves:\n\nA_i*x_i = b_i or\nA_i**T*x_i = b_i or\nA_i**H*x_i = b_i\nwhere x_i and b_i are vectors and A_i is a triangular matrix stored in the packed format,\nfor i in [1, batch_count].\n\nThe input vectors b_i are overwritten by the output vectors x_i.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  each A_i is an upper triangular matrix.\n- rocblas_fill_lower:  each A_i is a  lower triangular matrix.\n\n@param[in]\ntransA  [rocblas_operation]\n- rocblas_operation_none: Solves A*x = b\n- rocblas_operation_transpose: Solves A**T*x = b\n- rocblas_operation_conjugate_transpose: Solves A**H*x = b\n\n@param[in]\ndiag    [rocblas_diagonal]\n- rocblas_diagonal_unit:     each A_i is assumed to be unit triangular (i.e. the diagonal elements\nof each A_i are not used in computations).\n- rocblas_diagonal_non_unit: each A_i is not assumed to be unit triangular.\n\n@param[in]\nn         [rocblas_int]\nn specifies the number of rows of each b_i. n >= 0.\n\n@param[in]\nAP        device pointer pointing to the first packed matrix A_1,\nof dimension >= (n * (n + 1) / 2).\n\n@param[in]\nstride_A  [rocblas_stride]\nstride from the beginning of one packed matrix (AP_i) and the next (AP_i+1).\n\n@param[inout]\nx         device pointer pointing to the first input vector b_1. Overwritten by each x_i on output.\n\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\nstride_x  [rocblas_stride]\nstride from the beginning of one vector (x_i) and the next (x_i+1).\n@param[in]\nbatch_count [rocblas_int]\nspecifies the number of instances in the batch.\n"]
+    pub fn rocblas_stpsv_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        AP: *const f32,
+        stride_A: rocblas_stride,
+        x: *mut f32,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dtpsv_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        AP: *const f64,
+        stride_A: rocblas_stride,
+        x: *mut f64,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ctpsv_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        AP: *const rocblas_float_complex,
+        stride_A: rocblas_stride,
+        x: *mut rocblas_float_complex,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ztpsv_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        AP: *const rocblas_double_complex,
+        stride_A: rocblas_stride,
+        x: *mut rocblas_double_complex,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nsymv performs the matrix-vector operation:\n\ny := alpha*A*x + beta*y\nwhere alpha and beta are scalars, x and y are n element vectors and\nA should contain an upper or lower triangular n by n symmetric matrix.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo     [rocblas_fill]\nspecifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'\n- if rocblas_fill_upper, the lower part of A is not referenced.\n- if rocblas_fill_lower, the upper part of A is not referenced.\n@param[in]\nn         [rocblas_int]\n@param[in]\nalpha\nspecifies the scalar alpha.\n@param[in]\nA         pointer storing matrix A on the GPU\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of A.\n@param[in]\nx         pointer storing vector x on the GPU.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of x.\n@param[in]\nbeta      specifies the scalar beta\n@param[out]\ny         pointer storing vector y on the GPU.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of y.\n"]
+    pub fn rocblas_ssymv(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f32,
+        A: *const f32,
+        lda: rocblas_int,
+        x: *const f32,
+        incx: rocblas_int,
+        beta: *const f32,
+        y: *mut f32,
+        incy: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dsymv(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f64,
+        A: *const f64,
+        lda: rocblas_int,
+        x: *const f64,
+        incx: rocblas_int,
+        beta: *const f64,
+        y: *mut f64,
+        incy: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_csymv(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        beta: *const rocblas_float_complex,
+        y: *mut rocblas_float_complex,
+        incy: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zsymv(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        beta: *const rocblas_double_complex,
+        y: *mut rocblas_double_complex,
+        incy: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nsymv_batched performs the matrix-vector operation:\n\ny_i := alpha*A_i*x_i + beta*y_i\nwhere (A_i, x_i, y_i) is the i-th instance of the batch.\nalpha and beta are scalars, x_i and y_i are vectors and A_i is an\nn by n symmetric matrix, for i = 1, ..., batch_count.\nA a should contain an upper or lower triangular symmetric matrix\nand the opposing triangular part of A is not referenced.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue\n@param[in]\nuplo      [rocblas_fill]\nspecifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'\n- if rocblas_fill_upper, the lower part of A is not referenced.\n- if rocblas_fill_lower, the upper part of A is not referenced.\n@param[in]\nn         [rocblas_int]\nnumber of rows and columns of each matrix A_i.\n@param[in]\nalpha\ndevice pointer or host pointer to scalar alpha.\n@param[in]\nA         device array of device pointers storing each matrix A_i.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of each matrix A_i.\n@param[in]\nx         device array of device pointers storing each vector x_i.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each vector x_i.\n@param[in]\nbeta      device pointer or host pointer to scalar beta.\n@param[out]\ny         device array of device pointers storing each vector y_i.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of each vector y_i.\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
+    pub fn rocblas_ssymv_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f32,
+        A: *const *const f32,
+        lda: rocblas_int,
+        x: *const *const f32,
+        incx: rocblas_int,
+        beta: *const f32,
+        y: *const *mut f32,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dsymv_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f64,
+        A: *const *const f64,
+        lda: rocblas_int,
+        x: *const *const f64,
+        incx: rocblas_int,
+        beta: *const f64,
+        y: *const *mut f64,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_csymv_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const *const rocblas_float_complex,
+        lda: rocblas_int,
+        x: *const *const rocblas_float_complex,
+        incx: rocblas_int,
+        beta: *const rocblas_float_complex,
+        y: *const *mut rocblas_float_complex,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zsymv_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const *const rocblas_double_complex,
+        lda: rocblas_int,
+        x: *const *const rocblas_double_complex,
+        incx: rocblas_int,
+        beta: *const rocblas_double_complex,
+        y: *const *mut rocblas_double_complex,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nsymv_strided_batched performs the matrix-vector operation:\n\ny_i := alpha*A_i*x_i + beta*y_i\nwhere (A_i, x_i, y_i) is the i-th instance of the batch.\nalpha and beta are scalars, x_i and y_i are vectors and A_i is an\nn by n symmetric matrix, for i = 1, ..., batch_count.\nA a should contain an upper or lower triangular symmetric matrix\nand the opposing triangular part of A is not referenced.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue\n@param[in]\nuplo      [rocblas_fill]\nspecifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'\n- if rocblas_fill_upper, the lower part of A is not referenced\n- if rocblas_fill_lower, the upper part of A is not referenced\n@param[in]\nn         [rocblas_int]\nnumber of rows and columns of each matrix A_i.\n@param[in]\nalpha\ndevice pointer or host pointer to scalar alpha.\n@param[in]\nA         Device pointer to the first matrix A_1 on the GPU.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of each matrix A_i.\n@param[in]\nstrideA     [rocblas_stride]\nstride from the start of one matrix (A_i) and the next one (A_i+1).\n@param[in]\nx         Device pointer to the first vector x_1 on the GPU.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each vector x_i.\n@param[in]\nstridex     [rocblas_stride]\nstride from the start of one vector (x_i) and the next one (x_i+1).\nThere are no restrictions placed on stride_x. However, ensure that stridex is of appropriate size.\nThis typically means stridex >= n * incx. stridex should be non zero.\n@param[in]\nbeta      device pointer or host pointer to scalar beta.\n@param[out]\ny         Device pointer to the first vector y_1 on the GPU.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of each vector y_i.\n@param[in]\nstridey     [rocblas_stride]\nstride from the start of one vector (y_i) and the next one (y_i+1).\nThere are no restrictions placed on stride_y. However, ensure that stridey is of appropriate size.\nThis typically means stridey >= n * incy. stridey should be non zero.\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
+    pub fn rocblas_ssymv_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f32,
+        A: *const f32,
+        lda: rocblas_int,
+        strideA: rocblas_stride,
+        x: *const f32,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        beta: *const f32,
+        y: *mut f32,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dsymv_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f64,
+        A: *const f64,
+        lda: rocblas_int,
+        strideA: rocblas_stride,
+        x: *const f64,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        beta: *const f64,
+        y: *mut f64,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_csymv_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        strideA: rocblas_stride,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        beta: *const rocblas_float_complex,
+        y: *mut rocblas_float_complex,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zsymv_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        strideA: rocblas_stride,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        beta: *const rocblas_double_complex,
+        y: *mut rocblas_double_complex,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nspmv performs the matrix-vector operation:\n\ny := alpha*A*x + beta*y\nwhere alpha and beta are scalars, x and y are n element vectors and\nA should contain an upper or lower triangular n by n packed symmetric matrix.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      rocblas_fill\nspecifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'\n- if rocblas_fill_upper, the lower part of A is not referenced\n- if rocblas_fill_lower, the upper part of A is not referenced\n@param[in]\nn         [rocblas_int]\n@param[in]\nalpha\nspecifies the scalar alpha.\n@param[in]\nA         pointer storing matrix A on the GPU.\n@param[in]\nx         pointer storing vector x on the GPU.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of x.\n@param[in]\nbeta      specifies the scalar beta.\n@param[out]\ny         pointer storing vector y on the GPU.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of y.\n"]
+    pub fn rocblas_sspmv(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f32,
+        A: *const f32,
+        x: *const f32,
+        incx: rocblas_int,
+        beta: *const f32,
+        y: *mut f32,
+        incy: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dspmv(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f64,
+        A: *const f64,
+        x: *const f64,
+        incx: rocblas_int,
+        beta: *const f64,
+        y: *mut f64,
+        incy: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nspmv_batched performs the matrix-vector operation:\n\ny_i := alpha*A_i*x_i + beta*y_i\nwhere (A_i, x_i, y_i) is the i-th instance of the batch.\nalpha and beta are scalars, x_i and y_i are vectors and A_i is an\nn by n symmetric matrix, for i = 1, ..., batch_count.\nA should contain an upper or lower triangular n by n packed symmetric matrix.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\nspecifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'\n- if rocblas_fill_upper, the lower part of A is not referenced\n- if rocblas_fill_lower, the upper part of A is not referenced\n@param[in]\nn         [rocblas_int]\nnumber of rows and columns of each matrix A_i.\n@param[in]\nalpha\ndevice pointer or host pointer to scalar alpha.\n@param[in]\nA         device array of device pointers storing each matrix A_i.\n@param[in]\nx         device array of device pointers storing each vector x_i.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each vector x_i.\n@param[in]\nbeta      device pointer or host pointer to scalar beta.\n@param[out]\ny         device array of device pointers storing each vector y_i.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of each vector y_i.\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
+    pub fn rocblas_sspmv_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f32,
+        A: *const *const f32,
+        x: *const *const f32,
+        incx: rocblas_int,
+        beta: *const f32,
+        y: *const *mut f32,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dspmv_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f64,
+        A: *const *const f64,
+        x: *const *const f64,
+        incx: rocblas_int,
+        beta: *const f64,
+        y: *const *mut f64,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nspmv_strided_batched performs the matrix-vector operation:\n\ny_i := alpha*A_i*x_i + beta*y_i\nwhere (A_i, x_i, y_i) is the i-th instance of the batch.\nalpha and beta are scalars, x_i and y_i are vectors and A_i is an\nn by n symmetric matrix, for i = 1, ..., batch_count.\nA should contain an upper or lower triangular n by n packed symmetric matrix.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\nspecifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'\n- if rocblas_fill_upper, the lower part of A is not referenced\n- if rocblas_fill_lower, the upper part of A is not referenced\n@param[in]\nn         [rocblas_int]\nnumber of rows and columns of each matrix A_i.\n@param[in]\nalpha\ndevice pointer or host pointer to scalar alpha.\n@param[in]\nA         Device pointer to the first matrix A_1 on the GPU.\n@param[in]\nstrideA     [rocblas_stride]\nstride from the start of one matrix (A_i) and the next one (A_i+1).\n@param[in]\nx         Device pointer to the first vector x_1 on the GPU.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each vector x_i.\n@param[in]\nstridex     [rocblas_stride]\nstride from the start of one vector (x_i) and the next one (x_i+1).\nThere are no restrictions placed on stridex. However, ensure that stridex is of appropriate size.\nThis typically means stridex >= n * incx. stridex should be non zero.\n@param[in]\nbeta      device pointer or host pointer to scalar beta.\n@param[out]\ny         Device pointer to the first vector y_1 on the GPU.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of each vector y_i.\n@param[in]\nstridey     [rocblas_stride]\nstride from the start of one vector (y_i) and the next one (y_i+1).\nThere are no restrictions placed on stridey. However, ensure that stridey is of appropriate size.\nThis typically means stridey >= n * incy. stridey should be non zero.\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
+    pub fn rocblas_sspmv_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f32,
+        A: *const f32,
+        strideA: rocblas_stride,
+        x: *const f32,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        beta: *const f32,
+        y: *mut f32,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dspmv_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f64,
+        A: *const f64,
+        strideA: rocblas_stride,
+        x: *const f64,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        beta: *const f64,
+        y: *mut f64,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nsbmv performs the matrix-vector operation:\n\ny := alpha*A*x + beta*y\nwhere alpha and beta are scalars, x and y are n element vectors and\nA should contain an upper or lower triangular n by n symmetric banded matrix.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      rocblas_fill\nspecifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'\n- if rocblas_fill_upper, the lower part of A is not referenced\n- if rocblas_fill_lower, the upper part of A is not referenced\n@param[in]\nn         [rocblas_int]\n@param[in]\nk         [rocblas_int]\nspecifies the number of sub- and super-diagonals.\n@param[in]\nalpha\nspecifies the scalar alpha.\n@param[in]\nA         pointer storing matrix A on the GPU.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of matrix A.\n@param[in]\nx         pointer storing vector x on the GPU.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of x.\n@param[in]\nbeta      specifies the scalar beta.\n@param[out]\ny         pointer storing vector y on the GPU.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of y.\n"]
+    pub fn rocblas_ssbmv(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const f32,
+        A: *const f32,
+        lda: rocblas_int,
+        x: *const f32,
+        incx: rocblas_int,
+        beta: *const f32,
+        y: *mut f32,
+        incy: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dsbmv(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const f64,
+        A: *const f64,
+        lda: rocblas_int,
+        x: *const f64,
+        incx: rocblas_int,
+        beta: *const f64,
+        y: *mut f64,
+        incy: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nsbmv_batched performs the matrix-vector operation:\n\ny_i := alpha*A_i*x_i + beta*y_i\nwhere (A_i, x_i, y_i) is the i-th instance of the batch.\nalpha and beta are scalars, x_i and y_i are vectors and A_i is an\nn by n symmetric banded matrix, for i = 1, ..., batch_count.\nA should contain an upper or lower triangular n by n symmetric banded matrix.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\nspecifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'\n- if rocblas_fill_upper, the lower part of A is not referenced\n- if rocblas_fill_lower, the upper part of A is not referenced\n@param[in]\nn         [rocblas_int]\nnumber of rows and columns of each matrix A_i.\n@param[in]\nk         [rocblas_int]\nspecifies the number of sub- and super-diagonals.\n@param[in]\nalpha\ndevice pointer or host pointer to scalar alpha.\n@param[in]\nA         device array of device pointers storing each matrix A_i.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of each matrix A_i.\n@param[in]\nx         device array of device pointers storing each vector x_i.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each vector x_i.\n@param[in]\nbeta      device pointer or host pointer to scalar beta.\n@param[out]\ny         device array of device pointers storing each vector y_i.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of each vector y_i.\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
+    pub fn rocblas_dsbmv_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const f64,
+        A: *const *const f64,
+        lda: rocblas_int,
+        x: *const *const f64,
+        incx: rocblas_int,
+        beta: *const f64,
+        y: *const *mut f64,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ssbmv_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const f32,
+        A: *const *const f32,
+        lda: rocblas_int,
+        x: *const *const f32,
+        incx: rocblas_int,
+        beta: *const f32,
+        y: *const *mut f32,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nsbmv_strided_batched performs the matrix-vector operation:\n\ny_i := alpha*A_i*x_i + beta*y_i\nwhere (A_i, x_i, y_i) is the i-th instance of the batch.\nalpha and beta are scalars, x_i and y_i are vectors and A_i is an\nn by n symmetric banded matrix, for i = 1, ..., batch_count.\nA should contain an upper or lower triangular n by n symmetric banded matrix.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\nspecifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'\n- if rocblas_fill_upper, the lower part of A is not referenced\n- if rocblas_fill_lower, the upper part of A is not referenced\n@param[in]\nn         [rocblas_int]\nnumber of rows and columns of each matrix A_i.\n@param[in]\nk         [rocblas_int]\nspecifies the number of sub- and super-diagonals.\n@param[in]\nalpha\ndevice pointer or host pointer to scalar alpha.\n@param[in]\nA         Device pointer to the first matrix A_1 on the GPU.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of each matrix A_i.\n@param[in]\nstrideA     [rocblas_stride]\nstride from the start of one matrix (A_i) and the next one (A_i+1).\n@param[in]\nx         Device pointer to the first vector x_1 on the GPU.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each vector x_i.\n@param[in]\nstridex     [rocblas_stride]\nstride from the start of one vector (x_i) and the next one (x_i+1).\nThere are no restrictions placed on stridex. However, ensure that stridex is of appropriate size.\nThis typically means stridex >= n * incx. stridex should be non zero.\n@param[in]\nbeta      device pointer or host pointer to scalar beta.\n@param[out]\ny         Device pointer to the first vector y_1 on the GPU.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of each vector y_i.\n@param[in]\nstridey     [rocblas_stride]\nstride from the start of one vector (y_i) and the next one (y_i+1).\nThere are no restrictions placed on stridey. However, ensure that stridey is of appropriate size.\nThis typically means stridey >= n * incy. stridey should be non zero.\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
+    pub fn rocblas_ssbmv_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const f32,
+        A: *const f32,
+        lda: rocblas_int,
+        strideA: rocblas_stride,
+        x: *const f32,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        beta: *const f32,
+        y: *mut f32,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dsbmv_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const f64,
+        A: *const f64,
+        lda: rocblas_int,
+        strideA: rocblas_stride,
+        x: *const f64,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        beta: *const f64,
+        y: *mut f64,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nger,geru,gerc performs the matrix-vector operations:\n\nA := A + alpha*x*y**T , OR\nA := A + alpha*x*y**H for gerc\nwhere alpha is a scalar, x and y are vectors, and A is an\nm by n matrix.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nm         [rocblas_int]\nthe number of rows of the matrix A.\n@param[in]\nn         [rocblas_int]\nthe number of columns of the matrix A.\n@param[in]\nalpha\ndevice pointer or host pointer to scalar alpha.\n@param[in]\nx         device pointer storing vector x.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of x.\n@param[in]\ny         device pointer storing vector y.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of y.\n@param[inout]\nA         device pointer storing matrix A.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of A.\n"]
+    pub fn rocblas_sger(
+        handle: rocblas_handle,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const f32,
+        x: *const f32,
+        incx: rocblas_int,
+        y: *const f32,
+        incy: rocblas_int,
+        A: *mut f32,
+        lda: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dger(
+        handle: rocblas_handle,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const f64,
+        x: *const f64,
+        incx: rocblas_int,
+        y: *const f64,
+        incy: rocblas_int,
+        A: *mut f64,
+        lda: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cgeru(
+        handle: rocblas_handle,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        y: *const rocblas_float_complex,
+        incy: rocblas_int,
+        A: *mut rocblas_float_complex,
+        lda: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zgeru(
+        handle: rocblas_handle,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        y: *const rocblas_double_complex,
+        incy: rocblas_int,
+        A: *mut rocblas_double_complex,
+        lda: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cgerc(
+        handle: rocblas_handle,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        y: *const rocblas_float_complex,
+        incy: rocblas_int,
+        A: *mut rocblas_float_complex,
+        lda: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zgerc(
+        handle: rocblas_handle,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        y: *const rocblas_double_complex,
+        incy: rocblas_int,
+        A: *mut rocblas_double_complex,
+        lda: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nger_batched,geru_batched,gerc_batched perform a batch of the matrix-vector operations:\n\nA := A + alpha*x*y**T , OR\nA := A + alpha*x*y**H for gerc\nwhere (A_i, x_i, y_i) is the i-th instance of the batch.\nalpha is a scalar, x_i and y_i are vectors and A_i is an\nm by n matrix, for i = 1, ..., batch_count.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nm         [rocblas_int]\nthe number of rows of each matrix A_i.\n@param[in]\nn         [rocblas_int]\nthe number of columns of each matrix A_i.\n@param[in]\nalpha\ndevice pointer or host pointer to scalar alpha.\n@param[in]\nx         device array of device pointers storing each vector x_i.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each vector x_i.\n@param[in]\ny         device array of device pointers storing each vector y_i.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of each vector y_i.\n@param[inout]\nA         device array of device pointers storing each matrix A_i.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of each A_i.\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
+    pub fn rocblas_sger_batched(
+        handle: rocblas_handle,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const f32,
+        x: *const *const f32,
+        incx: rocblas_int,
+        y: *const *const f32,
+        incy: rocblas_int,
+        A: *const *mut f32,
+        lda: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dger_batched(
+        handle: rocblas_handle,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const f64,
+        x: *const *const f64,
+        incx: rocblas_int,
+        y: *const *const f64,
+        incy: rocblas_int,
+        A: *const *mut f64,
+        lda: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cgeru_batched(
+        handle: rocblas_handle,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        x: *const *const rocblas_float_complex,
+        incx: rocblas_int,
+        y: *const *const rocblas_float_complex,
+        incy: rocblas_int,
+        A: *const *mut rocblas_float_complex,
+        lda: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zgeru_batched(
+        handle: rocblas_handle,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        x: *const *const rocblas_double_complex,
+        incx: rocblas_int,
+        y: *const *const rocblas_double_complex,
+        incy: rocblas_int,
+        A: *const *mut rocblas_double_complex,
+        lda: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cgerc_batched(
+        handle: rocblas_handle,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        x: *const *const rocblas_float_complex,
+        incx: rocblas_int,
+        y: *const *const rocblas_float_complex,
+        incy: rocblas_int,
+        A: *const *mut rocblas_float_complex,
+        lda: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zgerc_batched(
+        handle: rocblas_handle,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        x: *const *const rocblas_double_complex,
+        incx: rocblas_int,
+        y: *const *const rocblas_double_complex,
+        incy: rocblas_int,
+        A: *const *mut rocblas_double_complex,
+        lda: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nger_strided_batched,geru_strided_batched,gerc_strided_batched performs the matrix-vector operations:\n\nA_i := A_i + alpha*x_i*y_i**T, OR\nA_i := A_i + alpha*x_i*y_i**H  for gerc\nwhere (A_i, x_i, y_i) is the i-th instance of the batch.\nalpha is a scalar, x_i and y_i are vectors and A_i is an\nm by n matrix, for i = 1, ..., batch_count.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nm         [rocblas_int]\nthe number of rows of each matrix A_i.\n@param[in]\nn         [rocblas_int]\nthe number of columns of each matrix A_i.\n@param[in]\nalpha\ndevice pointer or host pointer to scalar alpha.\n@param[in]\nx         device pointer to the first vector (x_1) in the batch.\n@param[in]\nincx      [rocblas_int]\nspecifies the increments for the elements of each vector x_i.\n@param[in]\nstridex   [rocblas_stride]\nstride from the start of one vector (x_i) and the next one (x_i+1).\nThere are no restrictions placed on stride_x. However, ensure that stride_x is of appropriate size. For a typical\ncase this means stride_x >= m * incx.\n@param[inout]\ny         device pointer to the first vector (y_1) in the batch.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of each vector y_i.\n@param[in]\nstridey   [rocblas_stride]\nstride from the start of one vector (y_i) and the next one (y_i+1).\nThere are no restrictions placed on stride_y. However, ensure that stride_y is of appropriate size. For a typical\ncase this means stride_y >= n * incy.\n@param[inout]\nA         device pointer to the first matrix (A_1) in the batch.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of each A_i.\n@param[in]\nstrideA     [rocblas_stride]\nstride from the start of one matrix (A_i) and the next one (A_i+1)\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
+    pub fn rocblas_sger_strided_batched(
+        handle: rocblas_handle,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const f32,
+        x: *const f32,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        y: *const f32,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        A: *mut f32,
+        lda: rocblas_int,
+        strideA: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dger_strided_batched(
+        handle: rocblas_handle,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const f64,
+        x: *const f64,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        y: *const f64,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        A: *mut f64,
+        lda: rocblas_int,
+        strideA: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cgeru_strided_batched(
+        handle: rocblas_handle,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        y: *const rocblas_float_complex,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        A: *mut rocblas_float_complex,
+        lda: rocblas_int,
+        strideA: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zgeru_strided_batched(
+        handle: rocblas_handle,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        y: *const rocblas_double_complex,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        A: *mut rocblas_double_complex,
+        lda: rocblas_int,
+        strideA: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cgerc_strided_batched(
+        handle: rocblas_handle,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        y: *const rocblas_float_complex,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        A: *mut rocblas_float_complex,
+        lda: rocblas_int,
+        strideA: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zgerc_strided_batched(
+        handle: rocblas_handle,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        y: *const rocblas_double_complex,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        A: *mut rocblas_double_complex,
+        lda: rocblas_int,
+        strideA: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nspr performs the matrix-vector operations:\n\nA := A + alpha*x*x**T\nwhere alpha is a scalar, x is a vector, and A is an\nn by n symmetric matrix, supplied in packed form.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\nspecifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'\n- rocblas_fill_upper: The upper triangular part of A is supplied in AP.\n- rocblas_fill_lower: The lower triangular part of A is supplied in AP.\n@param[in]\nn         [rocblas_int]\nthe number of rows and columns of matrix A. Must be at least 0.\n@param[in]\nalpha\ndevice pointer or host pointer to scalar alpha.\n@param[in]\nx         device pointer storing vector x.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of x.\n@param[inout]\nAP        device pointer storing the packed version of the specified triangular portion of\nthe symmetric matrix A. Of at least size ((n * (n + 1)) / 2).\n\nif uplo == rocblas_fill_upper:\nThe upper triangular portion of the symmetric matrix A is supplied.\nThe matrix is compacted so that AP contains the triangular portion\ncolumn-by-column\nso that:\nAP(0) = A(0,0)\nAP(1) = A(0,1)\nAP(2) = A(1,1), etc.\nEx: (rocblas_fill_upper; n = 4)\n1 2 4 7\n2 3 5 8   -----> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0]\n4 5 6 9\n7 8 9 0\n\nif uplo == rocblas_fill_lower:\nThe lower triangular portion of the symmetric matrix A is supplied.\nThe matrix is compacted so that AP contains the triangular portion\ncolumn-by-column\nso that:\nAP(0) = A(0,0)\nAP(1) = A(1,0)\nAP(2) = A(2,1), etc.\nEx: (rocblas_fill_lower; n = 4)\n1 2 3 4\n2 5 6 7    -----> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0]\n3 6 8 9\n4 7 9 0"]
+    pub fn rocblas_sspr(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f32,
+        x: *const f32,
+        incx: rocblas_int,
+        AP: *mut f32,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dspr(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f64,
+        x: *const f64,
+        incx: rocblas_int,
+        AP: *mut f64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cspr(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        AP: *mut rocblas_float_complex,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zspr(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        AP: *mut rocblas_double_complex,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nspr_batched performs the matrix-vector operations:\n\nA_i := A_i + alpha*x_i*x_i**T\nwhere alpha is a scalar, x_i is a vector, and A_i is an\nn by n symmetric matrix, supplied in packed form, for i = 1, ..., batch_count.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\nspecifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'\n- rocblas_fill_upper: The upper triangular part of each A_i is supplied in AP.\n- rocblas_fill_lower: The lower triangular part of each A_i is supplied in AP.\n@param[in]\nn         [rocblas_int]\nthe number of rows and columns of each matrix A_i. Must be at least 0.\n@param[in]\nalpha\ndevice pointer or host pointer to scalar alpha.\n@param[in]\nx         device array of device pointers storing each vector x_i.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[inout]\nAP        device array of device pointers storing the packed version of the specified triangular portion of\neach symmetric matrix A_i of at least size ((n * (n + 1)) / 2). Array is of at least size batch_count.\n\nif uplo == rocblas_fill_upper:\nThe upper triangular portion of each symmetric matrix A_i is supplied.\nThe matrix is compacted so that AP contains the triangular portion\ncolumn-by-column\nso that:\nAP(0) = A(0,0)\nAP(1) = A(0,1)\nAP(2) = A(1,1), etc.\nEx: (rocblas_fill_upper; n = 4)\n1 2 4 7\n2 3 5 8   -----> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0]\n4 5 6 9\n7 8 9 0\n\nif uplo == rocblas_fill_lower:\nThe lower triangular portion of each symmetric matrix A_i is supplied.\nThe matrix is compacted so that AP contains the triangular portion\ncolumn-by-column\nso that:\nAP(0) = A(0,0)\nAP(1) = A(1,0)\nAP(2) = A(2,1), etc.\nEx: (rocblas_fill_lower; n = 4)\n1 2 3 4\n2 5 6 7    -----> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0]\n3 6 8 9\n4 7 9 0\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch."]
+    pub fn rocblas_sspr_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f32,
+        x: *const *const f32,
+        incx: rocblas_int,
+        AP: *const *mut f32,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dspr_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f64,
+        x: *const *const f64,
+        incx: rocblas_int,
+        AP: *const *mut f64,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cspr_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        x: *const *const rocblas_float_complex,
+        incx: rocblas_int,
+        AP: *const *mut rocblas_float_complex,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zspr_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        x: *const *const rocblas_double_complex,
+        incx: rocblas_int,
+        AP: *const *mut rocblas_double_complex,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nspr_strided_batched performs the matrix-vector operations:\n\nA_i := A_i + alpha*x_i*x_i**T\nwhere alpha is a scalar, x_i is a vector, and A_i is an\nn by n symmetric matrix, supplied in packed form, for i = 1, ..., batch_count.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\nspecifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'\n- rocblas_fill_upper: The upper triangular part of each A_i is supplied in AP.\n- rocblas_fill_lower: The lower triangular part of each A_i is supplied in AP.\n@param[in]\nn         [rocblas_int]\nthe number of rows and columns of each matrix A_i. Must be at least 0.\n@param[in]\nalpha\ndevice pointer or host pointer to scalar alpha.\n@param[in]\nx         device pointer pointing to the first vector (x_1).\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\nstride_x  [rocblas_stride]\nstride from the start of one vector (x_i) and the next one (x_i+1).\n@param[inout]\nAP        device pointer storing the packed version of the specified triangular portion of\neach symmetric matrix A_i. Points to the first A_1.\n\nif uplo == rocblas_fill_upper:\nThe upper triangular portion of each symmetric matrix A_i is supplied.\nThe matrix is compacted so that AP contains the triangular portion\ncolumn-by-column\nso that:\nAP(0) = A(0,0)\nAP(1) = A(0,1)\nAP(2) = A(1,1), etc.\nEx: (rocblas_fill_upper; n = 4)\n1 2 4 7\n2 3 5 8   -----> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0]\n4 5 6 9\n7 8 9 0\n\nif uplo == rocblas_fill_lower:\nThe lower triangular portion of each symmetric matrix A_i is supplied.\nThe matrix is compacted so that AP contains the triangular portion\ncolumn-by-column\nso that:\nAP(0) = A(0,0)\nAP(1) = A(1,0)\nAP(2) = A(2,1), etc.\nEx: (rocblas_fill_lower; n = 4)\n1 2 3 4\n2 5 6 7    -----> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0]\n3 6 8 9\n4 7 9 0\n@param[in]\nstride_A    [rocblas_stride]\nstride from the start of one (A_i) and the next (A_i+1).\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch."]
+    pub fn rocblas_sspr_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f32,
+        x: *const f32,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        AP: *mut f32,
+        stride_A: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dspr_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f64,
+        x: *const f64,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        AP: *mut f64,
+        stride_A: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cspr_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        AP: *mut rocblas_float_complex,
+        stride_A: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zspr_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        AP: *mut rocblas_double_complex,
+        stride_A: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nspr2 performs the matrix-vector operation:\n\nA := A + alpha*x*y**T + alpha*y*x**T\nwhere alpha is a scalar, x and y are vectors, and A is an\nn by n symmetric matrix, supplied in packed form.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\nspecifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'\n- rocblas_fill_upper: The upper triangular part of A is supplied in AP.\n- rocblas_fill_lower: The lower triangular part of A is supplied in AP.\n@param[in]\nn         [rocblas_int]\nthe number of rows and columns of matrix A. Must be at least 0.\n@param[in]\nalpha\ndevice pointer or host pointer to scalar alpha.\n@param[in]\nx         device pointer storing vector x.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of x.\n@param[in]\ny         device pointer storing vector y.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of y.\n@param[inout]\nAP        device pointer storing the packed version of the specified triangular portion of\nthe symmetric matrix A. Of at least size ((n * (n + 1)) / 2).\n\nif uplo == rocblas_fill_upper:\nThe upper triangular portion of the symmetric matrix A is supplied.\nThe matrix is compacted so that AP contains the triangular portion\ncolumn-by-column\nso that:\nAP(0) = A(0,0)\nAP(1) = A(0,1)\nAP(2) = A(1,1), etc.\nEx: (rocblas_fill_upper; n = 4)\n1 2 4 7\n2 3 5 8   -----> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0]\n4 5 6 9\n7 8 9 0\n\nif uplo == rocblas_fill_lower:\nThe lower triangular portion of the symmetric matrix A is supplied.\nThe matrix is compacted so that AP contains the triangular portion\ncolumn-by-column\nso that:\nAP(0) = A(0,0)\nAP(1) = A(1,0)\nAP(n) = A(2,1), etc.\nEx: (rocblas_fill_lower; n = 4)\n1 2 3 4\n2 5 6 7    -----> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0]\n3 6 8 9\n4 7 9 0"]
+    pub fn rocblas_sspr2(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f32,
+        x: *const f32,
+        incx: rocblas_int,
+        y: *const f32,
+        incy: rocblas_int,
+        AP: *mut f32,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dspr2(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f64,
+        x: *const f64,
+        incx: rocblas_int,
+        y: *const f64,
+        incy: rocblas_int,
+        AP: *mut f64,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nspr2_batched performs the matrix-vector operation:\n\nA_i := A_i + alpha*x_i*y_i**T + alpha*y_i*x_i**T\nwhere alpha is a scalar, x_i and y_i are vectors, and A_i is an\nn by n symmetric matrix, supplied in packed form, for i = 1, ..., batch_count.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\nspecifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'\n- rocblas_fill_upper: The upper triangular part of each A_i is supplied in AP.\n- rocblas_fill_lower: The lower triangular part of each A_i is supplied in AP.\n@param[in]\nn         [rocblas_int]\nthe number of rows and columns of each matrix A_i. Must be at least 0.\n@param[in]\nalpha\ndevice pointer or host pointer to scalar alpha.\n@param[in]\nx         device array of device pointers storing each vector x_i.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\ny         device array of device pointers storing each vector y_i.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of each y_i.\n@param[inout]\nAP        device array of device pointers storing the packed version of the specified triangular portion of\neach symmetric matrix A_i of at least size ((n * (n + 1)) / 2). Array is of at least size batch_count.\n\nif uplo == rocblas_fill_upper:\nThe upper triangular portion of each symmetric matrix A_i is supplied.\nThe matrix is compacted so that AP contains the triangular portion\ncolumn-by-column\nso that:\nAP(0) = A(0,0)\nAP(1) = A(0,1)\nAP(2) = A(1,1), etc.\nEx: (rocblas_fill_upper; n = 4)\n1 2 4 7\n2 3 5 8   -----> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0]\n4 5 6 9\n7 8 9 0\n\nif uplo == rocblas_fill_lower:\nThe lower triangular portion of each symmetric matrix A_i is supplied.\nThe matrix is compacted so that AP contains the triangular portion\ncolumn-by-column\nso that:\nAP(0) = A(0,0)\nAP(1) = A(1,0)\nAP(n) = A(2,1), etc.\nEx: (rocblas_fill_lower; n = 4)\n1 2 3 4\n2 5 6 7    -----> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0]\n3 6 8 9\n4 7 9 0\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch."]
+    pub fn rocblas_sspr2_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f32,
+        x: *const *const f32,
+        incx: rocblas_int,
+        y: *const *const f32,
+        incy: rocblas_int,
+        AP: *const *mut f32,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dspr2_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f64,
+        x: *const *const f64,
+        incx: rocblas_int,
+        y: *const *const f64,
+        incy: rocblas_int,
+        AP: *const *mut f64,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nspr2_strided_batched performs the matrix-vector operation:\n\nA_i := A_i + alpha*x_i*y_i**T + alpha*y_i*x_i**T\nwhere alpha is a scalar, x_i and y_i are vectors, and A_i is an\nn by n symmetric matrix, supplied in packed form, for i = 1, ..., batch_count.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\nspecifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'\n- rocblas_fill_upper: The upper triangular part of each A_i is supplied in AP.\n- rocblas_fill_lower: The lower triangular part of each A_i is supplied in AP.\n@param[in]\nn         [rocblas_int]\nthe number of rows and columns of each matrix A_i. Must be at least 0.\n@param[in]\nalpha\ndevice pointer or host pointer to scalar alpha.\n@param[in]\nx         device pointer pointing to the first vector (x_1).\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\nstride_x  [rocblas_stride]\nstride from the start of one vector (x_i) and the next one (x_i+1).\n@param[in]\ny         device pointer pointing to the first vector (y_1).\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of each y_i.\n@param[in]\nstride_y  [rocblas_stride]\nstride from the start of one vector (y_i) and the next one (y_i+1).\n@param[inout]\nAP        device pointer storing the packed version of the specified triangular portion of\neach symmetric matrix A_i. Points to the first A_1.\n\nif uplo == rocblas_fill_upper:\nThe upper triangular portion of each symmetric matrix A_i is supplied.\nThe matrix is compacted so that AP contains the triangular portion\ncolumn-by-column\nso that:\nAP(0) = A(0,0)\nAP(1) = A(0,1)\nAP(2) = A(1,1), etc.\nEx: (rocblas_fill_upper; n = 4)\n1 2 4 7\n2 3 5 8   -----> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0]\n4 5 6 9\n7 8 9 0\n\nif uplo == rocblas_fill_lower:\nThe lower triangular portion of each symmetric matrix A_i is supplied.\nThe matrix is compacted so that AP contains the triangular portion\ncolumn-by-column\nso that:\nAP(0) = A(0,0)\nAP(1) = A(1,0)\nAP(n) = A(2,1), etc.\nEx: (rocblas_fill_lower; n = 4)\n1 2 3 4\n2 5 6 7    -----> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0]\n3 6 8 9\n4 7 9 0\n@param[in]\nstride_A    [rocblas_stride]\nstride from the start of one (A_i) and the next (A_i+1).\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch."]
+    pub fn rocblas_sspr2_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f32,
+        x: *const f32,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        y: *const f32,
+        incy: rocblas_int,
+        stride_y: rocblas_stride,
+        AP: *mut f32,
+        stride_A: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dspr2_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f64,
+        x: *const f64,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        y: *const f64,
+        incy: rocblas_int,
+        stride_y: rocblas_stride,
+        AP: *mut f64,
+        stride_A: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nsyr performs the matrix-vector operations:\n\nA := A + alpha*x*x**T\nwhere alpha is a scalar, x is a vector, and A is an\nn by n symmetric matrix.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\nspecifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'\n- if rocblas_fill_upper, the lower part of A is not referenced\n- if rocblas_fill_lower, the upper part of A is not referenced\n\n@param[in]\nn         [rocblas_int]\nthe number of rows and columns of matrix A.\n@param[in]\nalpha\ndevice pointer or host pointer to scalar alpha.\n@param[in]\nx         device pointer storing vector x.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of x.\n@param[inout]\nA         device pointer storing matrix A.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of A.\n"]
+    pub fn rocblas_ssyr(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f32,
+        x: *const f32,
+        incx: rocblas_int,
+        A: *mut f32,
+        lda: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dsyr(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f64,
+        x: *const f64,
+        incx: rocblas_int,
+        A: *mut f64,
+        lda: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_csyr(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        A: *mut rocblas_float_complex,
+        lda: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zsyr(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        A: *mut rocblas_double_complex,
+        lda: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nsyr_batched performs a batch of matrix-vector operations:\n\nA[i] := A[i] + alpha*x[i]*x[i]**T\nwhere alpha is a scalar, x is an array of vectors, and A is an array of\nn by n symmetric matrices, for i = 1 , ... , batch_count.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\nspecifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'\n- if rocblas_fill_upper, the lower part of A is not referenced\n- if rocblas_fill_lower, the upper part of A is not referenced\n@param[in]\nn         [rocblas_int]\nthe number of rows and columns of matrix A.\n@param[in]\nalpha\ndevice pointer or host pointer to scalar alpha.\n@param[in]\nx         device array of device pointers storing each vector x_i.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[inout]\nA         device array of device pointers storing each matrix A_i.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of each A_i.\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
+    pub fn rocblas_ssyr_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f32,
+        x: *const *const f32,
+        incx: rocblas_int,
+        A: *const *mut f32,
+        lda: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dsyr_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f64,
+        x: *const *const f64,
+        incx: rocblas_int,
+        A: *const *mut f64,
+        lda: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_csyr_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        x: *const *const rocblas_float_complex,
+        incx: rocblas_int,
+        A: *const *mut rocblas_float_complex,
+        lda: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zsyr_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        x: *const *const rocblas_double_complex,
+        incx: rocblas_int,
+        A: *const *mut rocblas_double_complex,
+        lda: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nsyr_strided_batched performs the matrix-vector operations:\n\nA[i] := A[i] + alpha*x[i]*x[i]**T\nwhere alpha is a scalar, vectors, and A is an array of\nn by n symmetric matrices, for i = 1 , ... , batch_count.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\nspecifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'\n- if rocblas_fill_upper, the lower part of A is not referenced\n- if rocblas_fill_lower, the upper part of A is not referenced\n@param[in]\nn         [rocblas_int]\nthe number of rows and columns of each matrix A.\n@param[in]\nalpha\ndevice pointer or host pointer to scalar alpha.\n@param[in]\nx         device pointer to the first vector x_1.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\nstridex   [rocblas_stride]\nspecifies the pointer increment between vectors (x_i) and (x_i+1).\n@param[inout]\nA         device pointer to the first matrix A_1.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of each A_i.\n@param[in]\nstrideA   [rocblas_stride]\nstride from the start of one matrix (A_i) and the next one (A_i+1).\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
+    pub fn rocblas_ssyr_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f32,
+        x: *const f32,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        A: *mut f32,
+        lda: rocblas_int,
+        strideA: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dsyr_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f64,
+        x: *const f64,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        A: *mut f64,
+        lda: rocblas_int,
+        strideA: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_csyr_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        A: *mut rocblas_float_complex,
+        lda: rocblas_int,
+        strideA: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zsyr_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        A: *mut rocblas_double_complex,
+        lda: rocblas_int,
+        strideA: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nsyr2 performs the matrix-vector operations:\n\nA := A + alpha*x*y**T + alpha*y*x**T\nwhere alpha is a scalar, x and y are vectors, and A is an\nn by n symmetric matrix.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\nspecifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'\n- if rocblas_fill_upper, the lower part of A is not referenced\n- if rocblas_fill_lower, the upper part of A is not referenced\n\n@param[in]\nn         [rocblas_int]\nthe number of rows and columns of matrix A.\n@param[in]\nalpha\ndevice pointer or host pointer to scalar alpha.\n@param[in]\nx         device pointer storing vector x.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of x.\n@param[in]\ny         device pointer storing vector y.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of y.\n@param[inout]\nA         device pointer storing matrix A.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of A.\n"]
+    pub fn rocblas_ssyr2(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f32,
+        x: *const f32,
+        incx: rocblas_int,
+        y: *const f32,
+        incy: rocblas_int,
+        A: *mut f32,
+        lda: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dsyr2(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f64,
+        x: *const f64,
+        incx: rocblas_int,
+        y: *const f64,
+        incy: rocblas_int,
+        A: *mut f64,
+        lda: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_csyr2(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        y: *const rocblas_float_complex,
+        incy: rocblas_int,
+        A: *mut rocblas_float_complex,
+        lda: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zsyr2(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        y: *const rocblas_double_complex,
+        incy: rocblas_int,
+        A: *mut rocblas_double_complex,
+        lda: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nsyr2_batched performs a batch of matrix-vector operations:\n\nA[i] := A[i] + alpha*x[i]*y[i]**T + alpha*y[i]*x[i]**T\nwhere alpha is a scalar, x[i] and y[i] are vectors, and A[i] is a\nn by n symmetric matrix, for i = 1 , ... , batch_count.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\nspecifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'\n- if rocblas_fill_upper, the lower part of A is not referenced\n- if rocblas_fill_lower, the upper part of A is not referenced\n@param[in]\nn         [rocblas_int]\nthe number of rows and columns of matrix A.\n@param[in]\nalpha\ndevice pointer or host pointer to scalar alpha.\n@param[in]\nx         device array of device pointers storing each vector x_i.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\ny         device array of device pointers storing each vector y_i.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of each y_i.\n@param[inout]\nA         device array of device pointers storing each matrix A_i.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of each A_i.\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
+    pub fn rocblas_ssyr2_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f32,
+        x: *const *const f32,
+        incx: rocblas_int,
+        y: *const *const f32,
+        incy: rocblas_int,
+        A: *const *mut f32,
+        lda: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dsyr2_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f64,
+        x: *const *const f64,
+        incx: rocblas_int,
+        y: *const *const f64,
+        incy: rocblas_int,
+        A: *const *mut f64,
+        lda: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_csyr2_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        x: *const *const rocblas_float_complex,
+        incx: rocblas_int,
+        y: *const *const rocblas_float_complex,
+        incy: rocblas_int,
+        A: *const *mut rocblas_float_complex,
+        lda: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zsyr2_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        x: *const *const rocblas_double_complex,
+        incx: rocblas_int,
+        y: *const *const rocblas_double_complex,
+        incy: rocblas_int,
+        A: *const *mut rocblas_double_complex,
+        lda: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 2 API </b>\n\n\\details\nsyr2_strided_batched the matrix-vector operations:\n\nA[i] := A[i] + alpha*x[i]*y[i]**T + alpha*y[i]*x[i]**T\nwhere alpha is a scalar, x[i] and y[i] are vectors, and A[i] is a\nn by n symmetric matrices, for i = 1 , ... , batch_count\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\nspecifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'\n- if rocblas_fill_upper, the lower part of A is not referenced\n- if rocblas_fill_lower, the upper part of A is not referenced\n@param[in]\nn         [rocblas_int]\nthe number of rows and columns of each matrix A.\n@param[in]\nalpha\ndevice pointer or host pointer to scalar alpha.\n@param[in]\nx         device pointer to the first vector x_1.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\nstridex   [rocblas_stride]\nspecifies the pointer increment between vectors (x_i) and (x_i+1).\n@param[in]\ny         device pointer to the first vector y_1.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of each y_i.\n@param[in]\nstridey   [rocblas_stride]\nspecifies the pointer increment between vectors (y_i) and (y_i+1).\n@param[inout]\nA         device pointer to the first matrix A_1.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of each A_i.\n@param[in]\nstrideA   [rocblas_stride]\nstride from the start of one matrix (A_i) and the next one (A_i+1).\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
+    pub fn rocblas_ssyr2_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f32,
+        x: *const f32,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        y: *const f32,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        A: *mut f32,
+        lda: rocblas_int,
+        strideA: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dsyr2_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f64,
+        x: *const f64,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        y: *const f64,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        A: *mut f64,
+        lda: rocblas_int,
+        strideA: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_csyr2_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        y: *const rocblas_float_complex,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        A: *mut rocblas_float_complex,
+        lda: rocblas_int,
+        strideA: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zsyr2_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        y: *const rocblas_double_complex,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        A: *mut rocblas_double_complex,
+        lda: rocblas_int,
+        strideA: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 3 API </b>\n\n\\details\nhemm performs one of the matrix-matrix operations:\n\nC := alpha*A*B + beta*C if side == rocblas_side_left,\nC := alpha*B*A + beta*C if side == rocblas_side_right,\n\nwhere alpha and beta are scalars, B and C are m by n matrices, and\nA is a Hermitian matrix stored as either upper or lower.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nside  [rocblas_side]\n- rocblas_side_left:      C := alpha*A*B + beta*C\n- rocblas_side_right:     C := alpha*B*A + beta*C\n\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  A is an upper triangular matrix\n- rocblas_fill_lower:  A is a  lower triangular matrix\n\n@param[in]\nm       [rocblas_int]\nm specifies the number of rows of B and C. m >= 0.\n\n@param[in]\nn       [rocblas_int]\nn specifies the number of columns of B and C. n >= 0.\n\n@param[in]\nalpha\nalpha specifies the scalar alpha. When alpha is\nzero then A and B are not referenced.\n\n@param[in]\nA       pointer storing matrix A on the GPU.\n- A is m by m if side == rocblas_side_left\n- A is n by n if side == rocblas_side_right\nOnly the upper/lower triangular part is accessed.\nThe imaginary component of the diagonal elements is not used.\n\n@param[in]\nlda     [rocblas_int]\nlda specifies the first dimension of A.\n\nif side = rocblas_side_left,  lda >= max( 1, m ),\notherwise lda >= max( 1, n ).\n\n@param[in]\nB       pointer storing matrix B on the GPU.\nMatrix dimension is m by n\n\n@param[in]\nldb     [rocblas_int]\nldb specifies the first dimension of B. ldb >= max( 1, m ).\n\n@param[in]\nbeta\nbeta specifies the scalar beta. When beta is\nzero then C need not be set before entry.\n\n@param[in]\nC       pointer storing matrix C on the GPU.\nMatrix dimension is m by n\n\n@param[in]\nldc    [rocblas_int]\nldc specifies the first dimension of C. ldc >= max( 1, m ).\n"]
+    pub fn rocblas_chemm(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        B: *const rocblas_float_complex,
+        ldb: rocblas_int,
+        beta: *const rocblas_float_complex,
+        C: *mut rocblas_float_complex,
+        ldc: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zhemm(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        B: *const rocblas_double_complex,
+        ldb: rocblas_int,
+        beta: *const rocblas_double_complex,
+        C: *mut rocblas_double_complex,
+        ldc: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 3 API </b>\n\n\\details\nhemm_batched performs a batch of the matrix-matrix operations:\n\nC_i := alpha*A_i*B_i + beta*C_i if side == rocblas_side_left,\nC_i := alpha*B_i*A_i + beta*C_i if side == rocblas_side_right,\n\nwhere alpha and beta are scalars, B_i and C_i are m by n matrices, and\nA_i is a Hermitian matrix stored as either upper or lower.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nside  [rocblas_side]\n- rocblas_side_left:      C_i := alpha*A_i*B_i + beta*C_i\n- rocblas_side_right:     C_i := alpha*B_i*A_i + beta*C_i\n\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  A_i is an upper triangular matrix\n- rocblas_fill_lower:  A_i is a  lower triangular matrix\n\n@param[in]\nm       [rocblas_int]\nm specifies the number of rows of B_i and C_i. m >= 0.\n\n@param[in]\nn       [rocblas_int]\nn specifies the number of columns of B_i and C_i. n >= 0.\n\n@param[in]\nalpha\nalpha specifies the scalar alpha. When alpha is\nzero then A_i and B_i are not referenced.\n\n@param[in]\nA       device array of device pointers storing each matrix A_i on the GPU.\n- A_i is m by m if side == rocblas_side_left\n- A_i is n by n if side == rocblas_side_right\nOnly the upper/lower triangular part is accessed.\nThe imaginary component of the diagonal elements is not used.\n\n@param[in]\nlda     [rocblas_int]\nlda specifies the first dimension of A_i.\n\nif side = rocblas_side_left,  lda >= max( 1, m ),\notherwise lda >= max( 1, n ).\n\n@param[in]\nB       device array of device pointers storing each matrix B_i on the GPU.\nMatrix dimension is m by n\n\n@param[in]\nldb     [rocblas_int]\nldb specifies the first dimension of B_i. ldb >= max( 1, m ).\n\n@param[in]\nbeta\nbeta specifies the scalar beta. When beta is\nzero then C_i need not be set before entry.\n\n@param[in]\nC       device array of device pointers storing each matrix C_i on the GPU.\nMatrix dimension is m by n\n\n@param[in]\nldc    [rocblas_int]\nldc specifies the first dimension of C_i. ldc >= max( 1, m ).\n\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
+    pub fn rocblas_chemm_batched(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const *const rocblas_float_complex,
+        lda: rocblas_int,
+        B: *const *const rocblas_float_complex,
+        ldb: rocblas_int,
+        beta: *const rocblas_float_complex,
+        C: *const *mut rocblas_float_complex,
+        ldc: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zhemm_batched(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const *const rocblas_double_complex,
+        lda: rocblas_int,
+        B: *const *const rocblas_double_complex,
+        ldb: rocblas_int,
+        beta: *const rocblas_double_complex,
+        C: *const *mut rocblas_double_complex,
+        ldc: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 3 API </b>\n\n\\details\nhemm_strided_batched performs a batch of the matrix-matrix operations:\n\nC_i := alpha*A_i*B_i + beta*C_i if side == rocblas_side_left,\nC_i := alpha*B_i*A_i + beta*C_i if side == rocblas_side_right,\n\nwhere alpha and beta are scalars, B_i and C_i are m by n matrices, and\nA_i is a Hermitian matrix stored as either upper or lower.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nside  [rocblas_side]\n- rocblas_side_left:      C_i := alpha*A_i*B_i + beta*C_i\n- rocblas_side_right:     C_i := alpha*B_i*A_i + beta*C_i\n\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  A_i is an upper triangular matrix\n- rocblas_fill_lower:  A_i is a  lower triangular matrix\n\n@param[in]\nm       [rocblas_int]\nm specifies the number of rows of B_i and C_i. m >= 0.\n\n@param[in]\nn       [rocblas_int]\nn specifies the number of columns of B_i and C_i. n >= 0.\n\n@param[in]\nalpha\nalpha specifies the scalar alpha. When alpha is\nzero then A_i and B_i are not referenced.\n\n@param[in]\nA       device pointer to first matrix A_1\n- A_i is m by m if side == rocblas_side_left\n- A_i is n by n if side == rocblas_side_right\nOnly the upper/lower triangular part is accessed.\nThe imaginary component of the diagonal elements is not used.\n\n@param[in]\nlda     [rocblas_int]\nlda specifies the first dimension of A_i.\n\nif side = rocblas_side_left,  lda >= max( 1, m ),\notherwise lda >= max( 1, n ).\n\n@param[in]\nstride_A  [rocblas_stride]\nstride from the start of one matrix (A_i) and the next one (A_i+1).\n\n@param[in]\nB       device pointer to first matrix B_1 of dimension (ldb, n) on the GPU\n\n@param[in]\nldb     [rocblas_int]\nldb specifies the first dimension of B_i.\n\nif side = rocblas_operation_none,  ldb >= max( 1, m ),\notherwise ldb >= max( 1, n ).\n\n@param[in]\nstride_B  [rocblas_stride]\nstride from the start of one matrix (B_i) and the next one (B_i+1).\n\n@param[in]\nbeta\nbeta specifies the scalar beta. When beta is\nzero then C need not be set before entry.\n\n@param[in]\nC        device pointer to first matrix C_1 of dimension (ldc, n) on the GPU.\n\n@param[in]\nldc    [rocblas_int]\nldc specifies the first dimension of C. ldc >= max( 1, m ).\n\n@param[inout]\nstride_C  [rocblas_stride]\nstride from the start of one matrix (C_i) and the next one (C_i+1).\n\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
+    pub fn rocblas_chemm_strided_batched(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        B: *const rocblas_float_complex,
+        ldb: rocblas_int,
+        stride_B: rocblas_stride,
+        beta: *const rocblas_float_complex,
+        C: *mut rocblas_float_complex,
+        ldc: rocblas_int,
+        stride_C: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zhemm_strided_batched(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        B: *const rocblas_double_complex,
+        ldb: rocblas_int,
+        stride_B: rocblas_stride,
+        beta: *const rocblas_double_complex,
+        C: *mut rocblas_double_complex,
+        ldc: rocblas_int,
+        stride_C: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 3 API </b>\n\n\\details\nherk performs one of the matrix-matrix operations for a Hermitian rank-k update:\n\nC := alpha*op( A )*op( A )^H + beta*C,\n\nwhere  alpha and beta are scalars, op(A) is an n by k matrix, and\nC is a n x n Hermitian matrix stored as either upper or lower.\n\nop( A ) = A, and A is n by k if transA == rocblas_operation_none\nop( A ) = A^H and A is k by n if transA == rocblas_operation_conjugate_transpose\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  C is an upper triangular matrix\n- rocblas_fill_lower:  C is a  lower triangular matrix\n\n@param[in]\ntransA  [rocblas_operation]\n- rocblas_operation_conjugate_transpose:  op(A) = A^H\n- rocblas_operation_none:                 op(A) = A\n\n@param[in]\nn       [rocblas_int]\nn specifies the number of rows and columns of C. n >= 0.\n\n@param[in]\nk       [rocblas_int]\nk specifies the number of columns of op(A). k >= 0.\n\n@param[in]\nalpha\nalpha specifies the scalar alpha. When alpha is\nzero then A is not referenced and A need not be set before\nentry.\n\n@param[in]\nA       pointer storing matrix A on the GPU.\nMatrix dimension is ( lda, k ) when if transA = rocblas_operation_none, otherwise (lda, n)\n\n@param[in]\nlda     [rocblas_int]\nlda specifies the first dimension of A.\n\nif transA = rocblas_operation_none,  lda >= max( 1, n ),\notherwise lda >= max( 1, k ).\n\n@param[in]\nbeta\nbeta specifies the scalar beta. When beta is\nzero then C need not be set before entry.\n\n@param[in]\nC       pointer storing matrix C on the GPU.\nThe imaginary component of the diagonal elements are not used but are set to zero unless quick return.\nonly the upper/lower triangular part is accessed.\n\n@param[in]\nldc    [rocblas_int]\nldc specifies the first dimension of C. ldc >= max( 1, n ).\n"]
+    pub fn rocblas_cherk(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const f32,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        beta: *const f32,
+        C: *mut rocblas_float_complex,
+        ldc: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zherk(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const f64,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        beta: *const f64,
+        C: *mut rocblas_double_complex,
+        ldc: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 3 API </b>\n\n\\details\nherk_batched performs a batch of the matrix-matrix operations for a Hermitian rank-k update:\n\nC_i := alpha*op( A_i )*op( A_i )^H + beta*C_i,\n\nwhere  alpha and beta are scalars, op(A) is an n by k matrix, and\nC_i is a n x n Hermitian matrix stored as either upper or lower.\n\nop( A_i ) = A_i, and A_i is n by k if transA == rocblas_operation_none\nop( A_i ) = A_i^H and A_i is k by n if transA == rocblas_operation_conjugate_transpose\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  C_i is an upper triangular matrix\n- rocblas_fill_lower:  C_i is a  lower triangular matrix\n\n@param[in]\ntransA  [rocblas_operation]\n- rocblas_operation_conjugate_transpose: op(A) = A^H\n- rocblas_operation_none:                op(A) = A\n\n@param[in]\nn       [rocblas_int]\nn specifies the number of rows and columns of C_i. n >= 0.\n\n@param[in]\nk       [rocblas_int]\nk specifies the number of columns of op(A). k >= 0.\n\n@param[in]\nalpha\nalpha specifies the scalar alpha. When alpha is\nzero then A is not referenced and A need not be set before\nentry.\n\n@param[in]\nA       device array of device pointers storing each matrix_i A of dimension (lda, k)\nwhen transA is rocblas_operation_none, otherwise of dimension (lda, n).\n\n@param[in]\nlda     [rocblas_int]\nlda specifies the first dimension of A_i.\n\nif transA = rocblas_operation_none,  lda >= max( 1, n ),\notherwise lda >= max( 1, k ).\n\n@param[in]\nbeta\nbeta specifies the scalar beta. When beta is\nzero then C need not be set before entry.\n\n@param[in]\nC       device array of device pointers storing each matrix C_i on the GPU.\nThe imaginary component of the diagonal elements are not used but are set to zero unless quick return.\nonly the upper/lower triangular part of each C_i is accessed.\n\n@param[in]\nldc    [rocblas_int]\nldc specifies the first dimension of C. ldc >= max( 1, n ).\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
+    pub fn rocblas_cherk_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const f32,
+        A: *const *const rocblas_float_complex,
+        lda: rocblas_int,
+        beta: *const f32,
+        C: *const *mut rocblas_float_complex,
+        ldc: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zherk_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const f64,
+        A: *const *const rocblas_double_complex,
+        lda: rocblas_int,
+        beta: *const f64,
+        C: *const *mut rocblas_double_complex,
+        ldc: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 3 API </b>\n\n\\details\nherk_strided_batched performs a batch of the matrix-matrix operations for a Hermitian rank-k update:\n\nC_i := alpha*op( A_i )*op( A_i )^H + beta*C_i,\n\nwhere  alpha and beta are scalars, op(A) is an n by k matrix, and\nC_i is a n x n Hermitian matrix stored as either upper or lower.\n\nop( A_i ) = A_i, and A_i is n by k if transA == rocblas_operation_none\nop( A_i ) = A_i^H and A_i is k by n if transA == rocblas_operation_conjugate_transpose\n\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  C_i is an upper triangular matrix\n- rocblas_fill_lower:  C_i is a  lower triangular matrix\n\n@param[in]\ntransA  [rocblas_operation]\n- rocblas_operation_conjugate_transpose: op(A) = A^H\n- rocblas_operation_none:                op(A) = A\n\n@param[in]\nn       [rocblas_int]\nn specifies the number of rows and columns of C_i. n >= 0.\n\n@param[in]\nk       [rocblas_int]\nk specifies the number of columns of op(A). k >= 0.\n\n@param[in]\nalpha\nalpha specifies the scalar alpha. When alpha is\nzero then A is not referenced and A need not be set before\nentry.\n\n@param[in]\nA       Device pointer to the first matrix A_1 on the GPU of dimension (lda, k)\nwhen transA is rocblas_operation_none, otherwise of dimension (lda, n)\n\n@param[in]\nlda     [rocblas_int]\nlda specifies the first dimension of A_i.\n\nif transA = rocblas_operation_none,  lda >= max( 1, n ),\notherwise lda >= max( 1, k ).\n\n@param[in]\nstride_A  [rocblas_stride]\nstride from the start of one matrix (A_i) and the next one (A_i+1).\n\n@param[in]\nbeta\nbeta specifies the scalar beta. When beta is\nzero then C need not be set before entry.\n\n@param[in]\nC       Device pointer to the first matrix C_1 on the GPU.\nThe imaginary component of the diagonal elements are not used but are set to zero unless quick return.\nonly the upper/lower triangular part of each C_i is accessed.\n\n@param[in]\nldc    [rocblas_int]\nldc specifies the first dimension of C. ldc >= max( 1, n ).\n\n@param[inout]\nstride_C  [rocblas_stride]\nstride from the start of one matrix (C_i) and the next one (C_i+1).\n\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
+    pub fn rocblas_cherk_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const f32,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        beta: *const f32,
+        C: *mut rocblas_float_complex,
+        ldc: rocblas_int,
+        stride_C: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zherk_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const f64,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        beta: *const f64,
+        C: *mut rocblas_double_complex,
+        ldc: rocblas_int,
+        stride_C: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 3 API </b>\n\n\\details\nher2k performs one of the matrix-matrix operations for a Hermitian rank-2k update:\n\nC := alpha*op( A )*op( B )^H + conj(alpha)*op( B )*op( A )^H + beta*C,\n\nwhere  alpha and beta are scalars, op(A) and op(B) are n by k matrices, and\nC is a n x n Hermitian matrix stored as either upper or lower.\n\nop( A ) = A, op( B ) = B, and A and B are n by k if trans == rocblas_operation_none\nop( A ) = A^H, op( B ) = B^H,  and A and B are k by n if trans == rocblas_operation_conjugate_transpose\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  C is an upper triangular matrix\n- rocblas_fill_lower:  C is a  lower triangular matrix\n\n@param[in]\ntrans  [rocblas_operation]\n- rocblas_operation_conjugate_transpose:  op( A ) = A^H, op( B ) = B^H\n- rocblas_operation_none:                 op( A ) = A, op( B ) = B\n\n@param[in]\nn       [rocblas_int]\nn specifies the number of rows and columns of C. n >= 0.\n\n@param[in]\nk       [rocblas_int]\nk specifies the number of columns of op(A). k >= 0.\n\n@param[in]\nalpha\nalpha specifies the scalar alpha. When alpha is\nzero then A is not referenced and A need not be set before\nentry.\n\n@param[in]\nA       pointer storing matrix A on the GPU.\nMatrix dimension is ( lda, k ) when if trans = rocblas_operation_none, otherwise (lda, n)\n\n@param[in]\nlda     [rocblas_int]\nlda specifies the first dimension of A.\n\nif trans = rocblas_operation_none,  lda >= max( 1, n ),\notherwise lda >= max( 1, k ).\n\n@param[in]\nB       pointer storing matrix B on the GPU.\nMatrix dimension is ( ldb, k ) when if trans = rocblas_operation_none, otherwise (ldb, n)\n\n@param[in]\nldb     [rocblas_int]\nldb specifies the first dimension of B.\n\nif trans = rocblas_operation_none,  ldb >= max( 1, n ),\notherwise ldb >= max( 1, k ).\n\n@param[in]\nbeta\nbeta specifies the scalar beta. When beta is\nzero then C need not be set before entry.\n\n@param[in]\nC       pointer storing matrix C on the GPU.\nThe imaginary component of the diagonal elements are not used but are set to zero unless quick return.\nonly the upper/lower triangular part is accessed.\n\n@param[in]\nldc    [rocblas_int]\nldc specifies the first dimension of C. ldc >= max( 1, n ).\n"]
+    pub fn rocblas_cher2k(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        B: *const rocblas_float_complex,
+        ldb: rocblas_int,
+        beta: *const f32,
+        C: *mut rocblas_float_complex,
+        ldc: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zher2k(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        B: *const rocblas_double_complex,
+        ldb: rocblas_int,
+        beta: *const f64,
+        C: *mut rocblas_double_complex,
+        ldc: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 3 API </b>\n\n\\details\nher2k_batched performs a batch of the matrix-matrix operations for a Hermitian rank-2k update:\n\nC_i := alpha*op( A_i )*op( B_i )^H + conj(alpha)*op( B_i )*op( A_i )^H + beta*C_i,\n\nwhere  alpha and beta are scalars, op(A_i) and op(B_i) are n by k matrices, and\nC_i is a n x n Hermitian matrix stored as either upper or lower.\n\nop( A_i ) = A_i, op( B_i ) = B_i, and A_i and B_i are n by k if trans == rocblas_operation_none\nop( A_i ) = A_i^H, op( B_i ) = B_i^H,  and A_i and B_i are k by n if trans == rocblas_operation_conjugate_transpose\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  C_i is an upper triangular matrix\n- rocblas_fill_lower:  C_i is a  lower triangular matrix\n\n@param[in]\ntrans  [rocblas_operation]\n- rocblas_operation_conjugate_transpose: op(A) = A^H\n- rocblas_operation_none:                op(A) = A\n\n@param[in]\nn       [rocblas_int]\nn specifies the number of rows and columns of C_i. n >= 0.\n\n@param[in]\nk       [rocblas_int]\nk specifies the number of columns of op(A). k >= 0.\n\n@param[in]\nalpha\nalpha specifies the scalar alpha. When alpha is\nzero then A is not referenced and A need not be set before\nentry.\n\n@param[in]\nA       device array of device pointers storing each matrix_i A of dimension (lda, k)\nwhen trans is rocblas_operation_none, otherwise of dimension (lda, n).\n\n@param[in]\nlda     [rocblas_int]\nlda specifies the first dimension of A_i.\n\nif trans = rocblas_operation_none,  lda >= max( 1, n ),\notherwise lda >= max( 1, k ).\n@param[in]\nB       device array of device pointers storing each matrix_i B of dimension (ldb, k)\nwhen trans is rocblas_operation_none, otherwise of dimension (ldb, n).\n\n@param[in]\nldb     [rocblas_int]\nldb specifies the first dimension of B_i.\n\nif trans = rocblas_operation_none,  ldb >= max( 1, n ),\notherwise ldb >= max( 1, k ).\n@param[in]\nbeta\nbeta specifies the scalar beta. When beta is\nzero then C need not be set before entry.\n\n@param[in]\nC       device array of device pointers storing each matrix C_i on the GPU.\nThe imaginary component of the diagonal elements are not used but are set to zero unless quick return.\nonly the upper/lower triangular part of each C_i is accessed.\n\n@param[in]\nldc    [rocblas_int]\nldc specifies the first dimension of C. ldc >= max( 1, n ).\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
+    pub fn rocblas_cher2k_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const *const rocblas_float_complex,
+        lda: rocblas_int,
+        B: *const *const rocblas_float_complex,
+        ldb: rocblas_int,
+        beta: *const f32,
+        C: *const *mut rocblas_float_complex,
+        ldc: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zher2k_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const *const rocblas_double_complex,
+        lda: rocblas_int,
+        B: *const *const rocblas_double_complex,
+        ldb: rocblas_int,
+        beta: *const f64,
+        C: *const *mut rocblas_double_complex,
+        ldc: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 3 API </b>\n\n\\details\nher2k_strided_batched performs a batch of the matrix-matrix operations for a Hermitian rank-2k update:\n\nC_i := alpha*op( A_i )*op( B_i )^H + conj(alpha)*op( B_i )*op( A_i )^H + beta*C_i,\n\nwhere  alpha and beta are scalars, op(A_i) and op(B_i) are n by k matrices, and\nC_i is a n x n Hermitian matrix stored as either upper or lower.\n\nop( A_i ) = A_i, op( B_i ) = B_i, and A_i and B_i are n by k if trans == rocblas_operation_none\nop( A_i ) = A_i^H, op( B_i ) = B_i^H,  and A_i and B_i are k by n if trans == rocblas_operation_conjugate_transpose\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  C_i is an upper triangular matrix\n- rocblas_fill_lower:  C_i is a  lower triangular matrix\n\n@param[in]\ntrans  [rocblas_operation]\n- rocblas_operation_conjugate_transpose: op( A_i ) = A_i^H, op( B_i ) = B_i^H\n- rocblas_operation_none:                op( A_i ) = A_i, op( B_i ) = B_i\n\n@param[in]\nn       [rocblas_int]\nn specifies the number of rows and columns of C_i. n >= 0.\n\n@param[in]\nk       [rocblas_int]\nk specifies the number of columns of op(A). k >= 0.\n\n@param[in]\nalpha\nalpha specifies the scalar alpha. When alpha is\nzero then A is not referenced and A need not be set before\nentry.\n\n@param[in]\nA       Device pointer to the first matrix A_1 on the GPU of dimension (lda, k)\nwhen trans is rocblas_operation_none, otherwise of dimension (lda, n).\n\n@param[in]\nlda     [rocblas_int]\nlda specifies the first dimension of A_i.\n\nif trans = rocblas_operation_none,  lda >= max( 1, n ),\notherwise lda >= max( 1, k ).\n\n@param[in]\nstride_A  [rocblas_stride]\nstride from the start of one matrix (A_i) and the next one (A_i+1).\n\n@param[in]\nB       Device pointer to the first matrix B_1 on the GPU of dimension (ldb, k)\nwhen trans is rocblas_operation_none, otherwise of dimension (ldb, n).\n\n@param[in]\nldb     [rocblas_int]\nldb specifies the first dimension of B_i.\n\nif trans = rocblas_operation_none,  ldb >= max( 1, n ),\notherwise ldb >= max( 1, k ).\n\n@param[in]\nstride_B  [rocblas_stride]\nstride from the start of one matrix (B_i) and the next one (B_i+1).\n\n@param[in]\nbeta\nbeta specifies the scalar beta. When beta is\nzero then C need not be set before entry.\n\n@param[in]\nC       Device pointer to the first matrix C_1 on the GPU.\nThe imaginary component of the diagonal elements are not used but are set to zero unless quick return.\nonly the upper/lower triangular part of each C_i is accessed.\n\n@param[in]\nldc    [rocblas_int]\nldc specifies the first dimension of C. ldc >= max( 1, n ).\n\n@param[inout]\nstride_C  [rocblas_stride]\nstride from the start of one matrix (C_i) and the next one (C_i+1).\n\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
+    pub fn rocblas_cher2k_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        B: *const rocblas_float_complex,
+        ldb: rocblas_int,
+        stride_B: rocblas_stride,
+        beta: *const f32,
+        C: *mut rocblas_float_complex,
+        ldc: rocblas_int,
+        stride_C: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zher2k_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        B: *const rocblas_double_complex,
+        ldb: rocblas_int,
+        stride_B: rocblas_stride,
+        beta: *const f64,
+        C: *mut rocblas_double_complex,
+        ldc: rocblas_int,
+        stride_C: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 3 API </b>\n\n\\details\nherkx performs one of the matrix-matrix operations for a Hermitian rank-k update:\n\nC := alpha*op( A )*op( B )^H + beta*C,\n\nwhere  alpha and beta are scalars, op(A) and op(B) are n by k matrices, and\nC is a n x n Hermitian matrix stored as either upper or lower.\n\nThis routine should only be used when the caller can guarantee that the result of op( A )*op( B )^T will be Hermitian.\n\nop( A ) = A, op( B ) = B, and A and B are n by k if trans == rocblas_operation_none\nop( A ) = A^H, op( B ) = B^H,  and A and B are k by n if trans == rocblas_operation_conjugate_transpose\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  C is an upper triangular matrix\n- rocblas_fill_lower:  C is a  lower triangular matrix\n\n@param[in]\ntrans  [rocblas_operation]\n- rocblas_operation_conjugate_transpose:  op( A ) = A^H, op( B ) = B^H\n- rocblas_operation_none:                 op( A ) = A, op( B ) = B\n\n@param[in]\nn       [rocblas_int]\nn specifies the number of rows and columns of C. n >= 0.\n\n@param[in]\nk       [rocblas_int]\nk specifies the number of columns of op(A). k >= 0.\n\n@param[in]\nalpha\nalpha specifies the scalar alpha. When alpha is\nzero then A is not referenced and A need not be set before\nentry.\n\n@param[in]\nA       pointer storing matrix A on the GPU.\nMatrix dimension is ( lda, k ) when if trans = rocblas_operation_none, otherwise (lda, n)\n\n@param[in]\nlda     [rocblas_int]\nlda specifies the first dimension of A.\n\nif trans = rocblas_operation_none,  lda >= max( 1, n ),\notherwise lda >= max( 1, k ).\n@param[in]\nB       pointer storing matrix B on the GPU.\nMatrix dimension is ( ldb, k ) when if trans = rocblas_operation_none, otherwise (ldb, n)\n\n@param[in]\nldb     [rocblas_int]\nldb specifies the first dimension of B.\n\nif trans = rocblas_operation_none,  ldb >= max( 1, n ),\notherwise ldb >= max( 1, k ).\n\n@param[in]\nbeta\nbeta specifies the scalar beta. When beta is\nzero then C need not be set before entry.\n\n@param[in]\nC       pointer storing matrix C on the GPU.\nThe imaginary component of the diagonal elements are not used but are set to zero unless quick return.\nonly the upper/lower triangular part is accessed.\n\n@param[in]\nldc    [rocblas_int]\nldc specifies the first dimension of C. ldc >= max( 1, n ).\n"]
+    pub fn rocblas_cherkx(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        B: *const rocblas_float_complex,
+        ldb: rocblas_int,
+        beta: *const f32,
+        C: *mut rocblas_float_complex,
+        ldc: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zherkx(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        B: *const rocblas_double_complex,
+        ldb: rocblas_int,
+        beta: *const f64,
+        C: *mut rocblas_double_complex,
+        ldc: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 3 API </b>\n\n\\details\nherkx_batched performs a batch of the matrix-matrix operations for a Hermitian rank-k update:\n\nC_i := alpha*op( A_i )*op( B_i )^H + beta*C_i,\n\nwhere  alpha and beta are scalars, op(A_i) and op(B_i) are n by k matrices, and\nC_i is a n x n Hermitian matrix stored as either upper or lower.\n\nThis routine should only be used when the caller can guarantee that the result of op( A )*op( B )^T will be Hermitian.\n\nop( A_i ) = A_i, op( B_i ) = B_i, and A_i and B_i are n by k if trans == rocblas_operation_none\nop( A_i ) = A_i^H, op( B_i ) = B_i^H,  and A_i and B_i are k by n if trans == rocblas_operation_conjugate_transpose\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  C_i is an upper triangular matrix\n- rocblas_fill_lower:  C_i is a  lower triangular matrix\n\n@param[in]\ntrans  [rocblas_operation]\n- rocblas_operation_conjugate_transpose: op(A) = A^H\n- rocblas_operation_none:                op(A) = A\n\n@param[in]\nn       [rocblas_int]\nn specifies the number of rows and columns of C_i. n >= 0.\n\n@param[in]\nk       [rocblas_int]\nk specifies the number of columns of op(A). k >= 0.\n\n@param[in]\nalpha\nalpha specifies the scalar alpha. When alpha is\nzero then A is not referenced and A need not be set before\nentry.\n\n@param[in]\nA       device array of device pointers storing each matrix_i A of dimension (lda, k)\nwhen trans is rocblas_operation_none, otherwise of dimension (lda, n)\n\n@param[in]\nlda     [rocblas_int]\nlda specifies the first dimension of A_i.\n\nif trans = rocblas_operation_none,  lda >= max( 1, n ),\notherwise lda >= max( 1, k ).\n\n@param[in]\nB       device array of device pointers storing each matrix_i B of dimension (ldb, k)\nwhen trans is rocblas_operation_none, otherwise of dimension (ldb, n)\n\n@param[in]\nldb     [rocblas_int]\nldb specifies the first dimension of B_i.\n\nif trans = rocblas_operation_none,  ldb >= max( 1, n ),\notherwise ldb >= max( 1, k ).\n\n@param[in]\nbeta\nbeta specifies the scalar beta. When beta is\nzero then C need not be set before entry.\n\n@param[in]\nC       device array of device pointers storing each matrix C_i on the GPU.\nThe imaginary component of the diagonal elements are not used but are set to zero unless quick return.\nonly the upper/lower triangular part of each C_i is accessed.\n\n@param[in]\nldc    [rocblas_int]\nldc specifies the first dimension of C. ldc >= max( 1, n ).\n\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
+    pub fn rocblas_cherkx_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const *const rocblas_float_complex,
+        lda: rocblas_int,
+        B: *const *const rocblas_float_complex,
+        ldb: rocblas_int,
+        beta: *const f32,
+        C: *const *mut rocblas_float_complex,
+        ldc: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zherkx_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const *const rocblas_double_complex,
+        lda: rocblas_int,
+        B: *const *const rocblas_double_complex,
+        ldb: rocblas_int,
+        beta: *const f64,
+        C: *const *mut rocblas_double_complex,
+        ldc: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 3 API </b>\n\n\\details\nherkx_strided_batched performs a batch of the matrix-matrix operations for a Hermitian rank-k update:\n\nC_i := alpha*op( A_i )*op( B_i )^H + beta*C_i,\n\nwhere  alpha and beta are scalars, op(A_i) and op(B_i) are n by k matrices, and\nC_i is a n x n Hermitian matrix stored as either upper or lower.\n\nThis routine should only be used when the caller can guarantee that the result of op( A )*op( B )^T will be Hermitian.\n\nop( A_i ) = A_i, op( B_i ) = B_i, and A_i and B_i are n by k if trans == rocblas_operation_none\nop( A_i ) = A_i^H, op( B_i ) = B_i^H,  and A_i and B_i are k by n if trans == rocblas_operation_conjugate_transpose\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  C_i is an upper triangular matrix\n- rocblas_fill_lower:  C_i is a  lower triangular matrix\n\n@param[in]\ntrans  [rocblas_operation]\n- rocblas_operation_conjugate_transpose: op( A_i ) = A_i^H, op( B_i ) = B_i^H\n- rocblas_operation_none:                op( A_i ) = A_i, op( B_i ) = B_i\n\n@param[in]\nn       [rocblas_int]\nn specifies the number of rows and columns of C_i. n >= 0.\n\n@param[in]\nk       [rocblas_int]\nk specifies the number of columns of op(A). k >= 0.\n\n@param[in]\nalpha\nalpha specifies the scalar alpha. When alpha is\nzero then A is not referenced and A need not be set before\nentry.\n\n@param[in]\nA       Device pointer to the first matrix A_1 on the GPU of dimension (lda, k)\nwhen trans is rocblas_operation_none, otherwise of dimension (lda, n).\n\n@param[in]\nlda     [rocblas_int]\nlda specifies the first dimension of A_i.\n\nif trans = rocblas_operation_none,  lda >= max( 1, n ),\notherwise lda >= max( 1, k ).\n\n@param[in]\nstride_A  [rocblas_stride]\nstride from the start of one matrix (A_i) and the next one (A_i+1)\n\n@param[in]\nB       Device pointer to the first matrix B_1 on the GPU of dimension (ldb, k)\nwhen trans is rocblas_operation_none, otherwise of dimension (ldb, n).\n\n@param[in]\nldb     [rocblas_int]\nldb specifies the first dimension of B_i.\n\nif trans = rocblas_operation_none,  ldb >= max( 1, n ),\notherwise ldb >= max( 1, k ).\n\n@param[in]\nstride_B  [rocblas_stride]\nstride from the start of one matrix (B_i) and the next one (B_i+1)\n\n@param[in]\nbeta\nbeta specifies the scalar beta. When beta is\nzero then C need not be set before entry.\n\n@param[in]\nC       Device pointer to the first matrix C_1 on the GPU.\nThe imaginary component of the diagonal elements are not used but are set to zero unless quick return.\nonly the upper/lower triangular part of each C_i is accessed.\n\n@param[in]\nldc    [rocblas_int]\nldc specifies the first dimension of C. ldc >= max( 1, n ).\n\n@param[inout]\nstride_C  [rocblas_stride]\nstride from the start of one matrix (C_i) and the next one (C_i+1).\n\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
+    pub fn rocblas_cherkx_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        B: *const rocblas_float_complex,
+        ldb: rocblas_int,
+        stride_B: rocblas_stride,
+        beta: *const f32,
+        C: *mut rocblas_float_complex,
+        ldc: rocblas_int,
+        stride_C: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zherkx_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        B: *const rocblas_double_complex,
+        ldb: rocblas_int,
+        stride_B: rocblas_stride,
+        beta: *const f64,
+        C: *mut rocblas_double_complex,
+        ldc: rocblas_int,
+        stride_C: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 3 API </b>\n\n\\details\nsymm performs one of the matrix-matrix operations:\n\nC := alpha*A*B + beta*C if side == rocblas_side_left,\nC := alpha*B*A + beta*C if side == rocblas_side_right,\n\nwhere alpha and beta are scalars, B and C are m by n matrices, and\nA is a symmetric matrix stored as either upper or lower.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nside  [rocblas_side]\n- rocblas_side_left:      C := alpha*A*B + beta*C\n- rocblas_side_right:     C := alpha*B*A + beta*C\n\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  A is an upper triangular matrix\n- rocblas_fill_lower:  A is a  lower triangular matrix\n\n@param[in]\nm       [rocblas_int]\nm specifies the number of rows of B and C. m >= 0.\n\n@param[in]\nn       [rocblas_int]\nn specifies the number of columns of B and C. n >= 0.\n\n@param[in]\nalpha\nalpha specifies the scalar alpha. When alpha is\nzero then A and B are not referenced.\n\n@param[in]\nA       pointer storing matrix A on the GPU.\n- A is m by m if side == rocblas_side_left\n- A is n by n if side == rocblas_side_right\nonly the upper/lower triangular part is accessed.\n\n@param[in]\nlda     [rocblas_int]\nlda specifies the first dimension of A.\n\nif side = rocblas_side_left,  lda >= max( 1, m ),\notherwise lda >= max( 1, n ).\n\n@param[in]\nB       pointer storing matrix B on the GPU.\nMatrix dimension is m by n\n\n@param[in]\nldb     [rocblas_int]\nldb specifies the first dimension of B. ldb >= max( 1, m ).\n\n@param[in]\nbeta\nbeta specifies the scalar beta. When beta is\nzero then C need not be set before entry.\n\n@param[in]\nC       pointer storing matrix C on the GPU.\nMatrix dimension is m by n\n\n@param[in]\nldc    [rocblas_int]\nldc specifies the first dimension of C. ldc >= max( 1, m ).\n"]
+    pub fn rocblas_ssymm(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const f32,
+        A: *const f32,
+        lda: rocblas_int,
+        B: *const f32,
+        ldb: rocblas_int,
+        beta: *const f32,
+        C: *mut f32,
+        ldc: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dsymm(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const f64,
+        A: *const f64,
+        lda: rocblas_int,
+        B: *const f64,
+        ldb: rocblas_int,
+        beta: *const f64,
+        C: *mut f64,
+        ldc: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_csymm(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        B: *const rocblas_float_complex,
+        ldb: rocblas_int,
+        beta: *const rocblas_float_complex,
+        C: *mut rocblas_float_complex,
+        ldc: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zsymm(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        B: *const rocblas_double_complex,
+        ldb: rocblas_int,
+        beta: *const rocblas_double_complex,
+        C: *mut rocblas_double_complex,
+        ldc: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 3 API </b>\n\n\\details\nsymm_batched performs a batch of the matrix-matrix operations:\n\nC_i := alpha*A_i*B_i + beta*C_i if side == rocblas_side_left,\nC_i := alpha*B_i*A_i + beta*C_i if side == rocblas_side_right,\n\nwhere alpha and beta are scalars, B_i and C_i are m by n matrices, and\nA_i is a symmetric matrix stored as either upper or lower.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nside  [rocblas_side]\n- rocblas_side_left:      C_i := alpha*A_i*B_i + beta*C_i\n- rocblas_side_right:     C_i := alpha*B_i*A_i + beta*C_i\n\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  A_i is an upper triangular matrix\n- rocblas_fill_lower:  A_i is a  lower triangular matrix\n\n@param[in]\nm       [rocblas_int]\nm specifies the number of rows of B_i and C_i. m >= 0.\n\n@param[in]\nn       [rocblas_int]\nn specifies the number of columns of B_i and C_i. n >= 0.\n\n@param[in]\nalpha\nalpha specifies the scalar alpha. When alpha is\nzero then A_i and B_i are not referenced.\n\n@param[in]\nA       device array of device pointers storing each matrix A_i on the GPU.\n- A_i is m by m if side == rocblas_side_left\n- A_i is n by n if side == rocblas_side_right\nonly the upper/lower triangular part is accessed.\n\n@param[in]\nlda     [rocblas_int]\nlda specifies the first dimension of A_i.\n\nif side = rocblas_side_left,  lda >= max( 1, m ),\notherwise lda >= max( 1, n ).\n\n@param[in]\nB       device array of device pointers storing each matrix B_i on the GPU.\nMatrix dimension is m by n\n\n@param[in]\nldb     [rocblas_int]\nldb specifies the first dimension of B_i. ldb >= max( 1, m ).\n\n@param[in]\nbeta\nbeta specifies the scalar beta. When beta is\nzero then C_i need not be set before entry.\n\n@param[in]\nC       device array of device pointers storing each matrix C_i on the GPU.\nMatrix dimension is m by n.\n\n@param[in]\nldc    [rocblas_int]\nldc specifies the first dimension of C_i. ldc >= max( 1, m ).\n\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
+    pub fn rocblas_ssymm_batched(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const f32,
+        A: *const *const f32,
+        lda: rocblas_int,
+        B: *const *const f32,
+        ldb: rocblas_int,
+        beta: *const f32,
+        C: *const *mut f32,
+        ldc: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dsymm_batched(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const f64,
+        A: *const *const f64,
+        lda: rocblas_int,
+        B: *const *const f64,
+        ldb: rocblas_int,
+        beta: *const f64,
+        C: *const *mut f64,
+        ldc: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_csymm_batched(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const *const rocblas_float_complex,
+        lda: rocblas_int,
+        B: *const *const rocblas_float_complex,
+        ldb: rocblas_int,
+        beta: *const rocblas_float_complex,
+        C: *const *mut rocblas_float_complex,
+        ldc: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zsymm_batched(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const *const rocblas_double_complex,
+        lda: rocblas_int,
+        B: *const *const rocblas_double_complex,
+        ldb: rocblas_int,
+        beta: *const rocblas_double_complex,
+        C: *const *mut rocblas_double_complex,
+        ldc: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 3 API </b>\n\n\\details\nsymm_strided_batched performs a batch of the matrix-matrix operations:\n\nC_i := alpha*A_i*B_i + beta*C_i if side == rocblas_side_left,\nC_i := alpha*B_i*A_i + beta*C_i if side == rocblas_side_right,\n\nwhere alpha and beta are scalars, B_i and C_i are m by n matrices, and\nA_i is a symmetric matrix stored as either upper or lower.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nside  [rocblas_side]\n- rocblas_side_left:      C_i := alpha*A_i*B_i + beta*C_i\n- rocblas_side_right:     C_i := alpha*B_i*A_i + beta*C_i\n\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  A_i is an upper triangular matrix\n- rocblas_fill_lower:  A_i is a  lower triangular matrix\n\n@param[in]\nm       [rocblas_int]\nm specifies the number of rows of B_i and C_i. m >= 0.\n\n@param[in]\nn       [rocblas_int]\nn specifies the number of columns of B_i and C_i. n >= 0.\n\n@param[in]\nalpha\nalpha specifies the scalar alpha. When alpha is\nzero then A_i and B_i are not referenced.\n\n@param[in]\nA       device pointer to first matrix A_1\n- A_i is m by m if side == rocblas_side_left\n- A_i is n by n if side == rocblas_side_right\nonly the upper/lower triangular part is accessed.\n\n@param[in]\nlda     [rocblas_int]\nlda specifies the first dimension of A_i.\n\nif side = rocblas_side_left,  lda >= max( 1, m ),\notherwise lda >= max( 1, n ).\n\n@param[in]\nstride_A  [rocblas_stride]\nstride from the start of one matrix (A_i) and the next one (A_i+1).\n\n@param[in]\nB       device pointer to first matrix B_1 of dimension (ldb, n) on the GPU.\n\n@param[in]\nldb     [rocblas_int]\nldb specifies the first dimension of B_i. ldb >= max( 1, m ).\n\n@param[in]\nstride_B  [rocblas_stride]\nstride from the start of one matrix (B_i) and the next one (B_i+1).\n@param[in]\nbeta\nbeta specifies the scalar beta. When beta is\nzero then C need not be set before entry.\n\n@param[in]\nC        device pointer to first matrix C_1 of dimension (ldc, n) on the GPU.\n\n@param[in]\nldc    [rocblas_int]\nldc specifies the first dimension of C. ldc >= max( 1, m ).\n\n@param[inout]\nstride_C  [rocblas_stride]\nstride from the start of one matrix (C_i) and the next one (C_i+1).\n\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
+    pub fn rocblas_ssymm_strided_batched(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const f32,
+        A: *const f32,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        B: *const f32,
+        ldb: rocblas_int,
+        stride_B: rocblas_stride,
+        beta: *const f32,
+        C: *mut f32,
+        ldc: rocblas_int,
+        stride_C: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dsymm_strided_batched(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const f64,
+        A: *const f64,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        B: *const f64,
+        ldb: rocblas_int,
+        stride_B: rocblas_stride,
+        beta: *const f64,
+        C: *mut f64,
+        ldc: rocblas_int,
+        stride_C: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_csymm_strided_batched(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        B: *const rocblas_float_complex,
+        ldb: rocblas_int,
+        stride_B: rocblas_stride,
+        beta: *const rocblas_float_complex,
+        C: *mut rocblas_float_complex,
+        ldc: rocblas_int,
+        stride_C: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zsymm_strided_batched(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        B: *const rocblas_double_complex,
+        ldb: rocblas_int,
+        stride_B: rocblas_stride,
+        beta: *const rocblas_double_complex,
+        C: *mut rocblas_double_complex,
+        ldc: rocblas_int,
+        stride_C: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 3 API </b>\n\n\\details\nsyrk performs one of the matrix-matrix operations for a symmetric rank-k update:\n\nC := alpha*op( A )*op( A )^T + beta*C,\n\nwhere  alpha and beta are scalars, op(A) is an n by k matrix, and\nC is a symmetric n x n matrix stored as either upper or lower.\n\nop( A ) = A, and A is n by k if transA == rocblas_operation_none\nop( A ) = A^T and A is k by n if transA == rocblas_operation_transpose\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  C is an upper triangular matrix\n- rocblas_fill_lower:  C is a  lower triangular matrix\n\n@param[in]\ntransA  [rocblas_operation]\n- rocblas_operation_transpose:           op(A) = A^T\n- rocblas_operation_none:                op(A) = A\n- rocblas_operation_conjugate_transpose: op(A) = A^T\n\nrocblas_operation_conjugate_transpose is not supported for complex types. See cherk\nand zherk.\n\n@param[in]\nn       [rocblas_int]\nn specifies the number of rows and columns of C. n >= 0.\n\n@param[in]\nk       [rocblas_int]\nk specifies the number of columns of op(A). k >= 0.\n\n@param[in]\nalpha\nalpha specifies the scalar alpha. When alpha is\nzero then A is not referenced and A need not be set before\nentry.\n\n@param[in]\nA       pointer storing matrix A on the GPU.\nMatrix dimension is ( lda, k ) when if transA = rocblas_operation_none, otherwise (lda, n)\n\n@param[in]\nlda     [rocblas_int]\nlda specifies the first dimension of A.\n\nif transA = rocblas_operation_none,  lda >= max( 1, n ),\notherwise lda >= max( 1, k ).\n\n@param[in]\nbeta\nbeta specifies the scalar beta. When beta is\nzero then C need not be set before entry.\n\n@param[in]\nC       pointer storing matrix C on the GPU.\nonly the upper/lower triangular part is accessed.\n\n@param[in]\nldc    [rocblas_int]\nldc specifies the first dimension of C. ldc >= max( 1, n ).\n"]
+    pub fn rocblas_ssyrk(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const f32,
+        A: *const f32,
+        lda: rocblas_int,
+        beta: *const f32,
+        C: *mut f32,
+        ldc: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dsyrk(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const f64,
+        A: *const f64,
+        lda: rocblas_int,
+        beta: *const f64,
+        C: *mut f64,
+        ldc: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_csyrk(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        beta: *const rocblas_float_complex,
+        C: *mut rocblas_float_complex,
+        ldc: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zsyrk(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        beta: *const rocblas_double_complex,
+        C: *mut rocblas_double_complex,
+        ldc: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 3 API </b>\n\n\\details\nsyrk_batched performs a batch of the matrix-matrix operations for a symmetric rank-k update:\n\nC_i := alpha*op( A_i )*op( A_i )^T + beta*C_i,\n\nwhere  alpha and beta are scalars, op(A_i) is an n by k matrix, and\nC_i is a symmetric n x n matrix stored as either upper or lower.\n\nop( A_i ) = A_i, and A_i is n by k if transA == rocblas_operation_none\nop( A_i ) = A_i^T and A_i is k by n if transA == rocblas_operation_transpose\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  C_i is an upper triangular matrix\n- rocblas_fill_lower:  C_i is a  lower triangular matrix\n\n@param[in]\ntransA  [rocblas_operation]\n- rocblas_operation_transpose:           op(A) = A^T\n- rocblas_operation_none:                op(A) = A\n- rocblas_operation_conjugate_transpose: op(A) = A^T\n\nrocblas_operation_conjugate_transpose is not supported for complex types. See cherk\nand zherk.\n\n@param[in]\nn       [rocblas_int]\nn specifies the number of rows and columns of C_i. n >= 0.\n\n@param[in]\nk       [rocblas_int]\nk specifies the number of columns of op(A). k >= 0.\n\n@param[in]\nalpha\nalpha specifies the scalar alpha. When alpha is\nzero then A is not referenced and A need not be set before\nentry.\n\n@param[in]\nA       device array of device pointers storing each matrix_i A of dimension (lda, k)\nwhen transA is rocblas_operation_none, otherwise of dimension (lda, n).\n\n@param[in]\nlda     [rocblas_int]\nlda specifies the first dimension of A_i.\n\nif transA = rocblas_operation_none,  lda >= max( 1, n ),\notherwise lda >= max( 1, k ).\n\n@param[in]\nbeta\nbeta specifies the scalar beta. When beta is\nzero then C need not be set before entry.\n\n@param[in]\nC       device array of device pointers storing each matrix C_i on the GPU.\nonly the upper/lower triangular part of each C_i is accessed.\n\n@param[in]\nldc    [rocblas_int]\nldc specifies the first dimension of C. ldc >= max( 1, n ).\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
+    pub fn rocblas_ssyrk_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const f32,
+        A: *const *const f32,
+        lda: rocblas_int,
+        beta: *const f32,
+        C: *const *mut f32,
+        ldc: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dsyrk_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const f64,
+        A: *const *const f64,
+        lda: rocblas_int,
+        beta: *const f64,
+        C: *const *mut f64,
+        ldc: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_csyrk_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const *const rocblas_float_complex,
+        lda: rocblas_int,
+        beta: *const rocblas_float_complex,
+        C: *const *mut rocblas_float_complex,
+        ldc: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zsyrk_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const *const rocblas_double_complex,
+        lda: rocblas_int,
+        beta: *const rocblas_double_complex,
+        C: *const *mut rocblas_double_complex,
+        ldc: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 3 API </b>\n\n\\details\nsyrk_strided_batched performs a batch of the matrix-matrix operations for a symmetric rank-k update:\n\nC_i := alpha*op( A_i )*op( A_i )^T + beta*C_i,\n\nwhere  alpha and beta are scalars, op(A_i) is an n by k matrix, and\nC_i is a symmetric n x n matrix stored as either upper or lower.\n\nop( A_i ) = A_i, and A_i is n by k if transA == rocblas_operation_none\nop( A_i ) = A_i^T and A_i is k by n if transA == rocblas_operation_transpose\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  C_i is an upper triangular matrix\n- rocblas_fill_lower:  C_i is a  lower triangular matrix\n\n@param[in]\ntransA  [rocblas_operation]\n- rocblas_operation_transpose:           op(A) = A^T\n- rocblas_operation_none:                op(A) = A\n- rocblas_operation_conjugate_transpose: op(A) = A^T\n\nrocblas_operation_conjugate_transpose is not supported for complex types. See cherk\nand zherk.\n\n@param[in]\nn       [rocblas_int]\nn specifies the number of rows and columns of C_i. n >= 0.\n\n@param[in]\nk       [rocblas_int]\nk specifies the number of columns of op(A). k >= 0.\n\n@param[in]\nalpha\nalpha specifies the scalar alpha. When alpha is\nzero then A is not referenced and A need not be set before\nentry.\n\n@param[in]\nA       Device pointer to the first matrix A_1 on the GPU of dimension (lda, k)\nwhen transA is rocblas_operation_none, otherwise of dimension (lda, n).\n\n@param[in]\nlda     [rocblas_int]\nlda specifies the first dimension of A_i.\n\nif transA = rocblas_operation_none,  lda >= max( 1, n ),\notherwise lda >= max( 1, k ).\n\n@param[in]\nstride_A  [rocblas_stride]\nstride from the start of one matrix (A_i) and the next one (A_i+1).\n\n@param[in]\nbeta\nbeta specifies the scalar beta. When beta is\nzero then C need not be set before entry.\n\n@param[in]\nC       Device pointer to the first matrix C_1 on the GPU. on the GPU.\nonly the upper/lower triangular part of each C_i is accessed.\n\n@param[in]\nldc    [rocblas_int]\nldc specifies the first dimension of C. ldc >= max( 1, n ).\n\n@param[inout]\nstride_C  [rocblas_stride]\nstride from the start of one matrix (C_i) and the next one (C_i+1)\n\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
+    pub fn rocblas_ssyrk_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const f32,
+        A: *const f32,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        beta: *const f32,
+        C: *mut f32,
+        ldc: rocblas_int,
+        stride_C: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dsyrk_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const f64,
+        A: *const f64,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        beta: *const f64,
+        C: *mut f64,
+        ldc: rocblas_int,
+        stride_C: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_csyrk_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        beta: *const rocblas_float_complex,
+        C: *mut rocblas_float_complex,
+        ldc: rocblas_int,
+        stride_C: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zsyrk_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        beta: *const rocblas_double_complex,
+        C: *mut rocblas_double_complex,
+        ldc: rocblas_int,
+        stride_C: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 3 API </b>\n\n\\details\nsyr2k performs one of the matrix-matrix operations for a symmetric rank-2k update:\n\nC := alpha*(op( A )*op( B )^T + op( B )*op( A )^T) + beta*C,\n\nwhere  alpha and beta are scalars, op(A) and op(B) are n by k matrix, and\nC is a symmetric n x n matrix stored as either upper or lower.\n\nop( A ) = A, op( B ) = B, and A and B are n by k if trans == rocblas_operation_none\nop( A ) = A^T, op( B ) = B^T, and A and B are k by n if trans == rocblas_operation_transpose\nor for ssyr2k and dsyr2k when trans == rocblas_operation_conjugate_transpose\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  C is an upper triangular matrix\n- rocblas_fill_lower:  C is a  lower triangular matrix\n\n@param[in]\ntrans  [rocblas_operation]\n- rocblas_operation_transpose:           op( A ) = A^T, op( B ) = B^T\n- rocblas_operation_none:                op( A ) = A, op( B ) = B\n- rocblas_operation_conjugate_transpose: op( A ) = A^T, op( B ) = B^T\n\nrocblas_operation_conjugate_transpose is not supported for complex types in csyr2k and zsyr2k.\n\n@param[in]\nn       [rocblas_int]\nn specifies the number of rows and columns of C. n >= 0.\n\n@param[in]\nk       [rocblas_int]\nk specifies the number of columns of op(A) and op(B). k >= 0.\n\n@param[in]\nalpha\nalpha specifies the scalar alpha. When alpha is\nzero then A is not referenced and A need not be set before\nentry.\n\n@param[in]\nA       pointer storing matrix A on the GPU.\nMatrix dimension is ( lda, k ) when if trans = rocblas_operation_none, otherwise (lda, n)\nonly the upper/lower triangular part is accessed.\n\n@param[in]\nlda     [rocblas_int]\nlda specifies the first dimension of A.\n\nif trans = rocblas_operation_none,  lda >= max( 1, n ),\notherwise lda >= max( 1, k ).\n\n@param[in]\nB       pointer storing matrix B on the GPU.\nMatrix dimension is ( ldb, k ) when if trans = rocblas_operation_none, otherwise (ldb, n)\nonly the upper/lower triangular part is accessed.\n\n@param[in]\nldb     [rocblas_int]\nldb specifies the first dimension of B.\nif trans = rocblas_operation_none,  ldb >= max( 1, n ),\notherwise ldb >= max( 1, k ).\n@param[in]\nbeta\nbeta specifies the scalar beta. When beta is\nzero then C need not be set before entry.\n\n@param[in]\nC       pointer storing matrix C on the GPU.\n\n@param[in]\nldc    [rocblas_int]\nldc specifies the first dimension of C. ldc >= max( 1, n ).\n"]
+    pub fn rocblas_ssyr2k(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const f32,
+        A: *const f32,
+        lda: rocblas_int,
+        B: *const f32,
+        ldb: rocblas_int,
+        beta: *const f32,
+        C: *mut f32,
+        ldc: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dsyr2k(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const f64,
+        A: *const f64,
+        lda: rocblas_int,
+        B: *const f64,
+        ldb: rocblas_int,
+        beta: *const f64,
+        C: *mut f64,
+        ldc: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_csyr2k(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        B: *const rocblas_float_complex,
+        ldb: rocblas_int,
+        beta: *const rocblas_float_complex,
+        C: *mut rocblas_float_complex,
+        ldc: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zsyr2k(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        B: *const rocblas_double_complex,
+        ldb: rocblas_int,
+        beta: *const rocblas_double_complex,
+        C: *mut rocblas_double_complex,
+        ldc: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 3 API </b>\n\n\\details\nsyr2k_batched performs a batch of the matrix-matrix operations for a symmetric rank-2k update:\n\nC_i := alpha*(op( A_i )*op( B_i )^T + op( B_i )*op( A_i )^T) + beta*C_i,\n\nwhere  alpha and beta are scalars, op(A_i) and op(B_i) are n by k matrix, and\nC_i is a symmetric n x n matrix stored as either upper or lower.\n\nop( A_i ) = A_i, op( B_i ) = B_i, and A_i and B_i are n by k if trans == rocblas_operation_none\nop( A_i ) = A_i^T, op( B_i ) = B_i^T, and A_i and B_i are k by n if trans == rocblas_operation_transpose\nor for ssyr2k_batched and dsyr2k_batched when trans == rocblas_operation_conjugate_transpose\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  C_i is an upper triangular matrix\n- rocblas_fill_lower:  C_i is a  lower triangular matrix\n\n@param[in]\ntrans  [rocblas_operation]\n- rocblas_operation_transpose:           op( A_i ) = A_i^T, op( B_i ) = B_i^T\n- rocblas_operation_none:                op( A_i ) = A_i, op( B_i ) = B_i\n- rocblas_operation_conjugate_transpose: op( A_i ) = A_i^T, op( B_i ) = B_i^T\n\nrocblas_operation_conjugate_transpose is not supported for complex types in csyr2k_batched and zsyr2k_batched.\n\n@param[in]\nn       [rocblas_int]\nn specifies the number of rows and columns of C_i. n >= 0.\n\n@param[in]\nk       [rocblas_int]\nk specifies the number of columns of op(A). k >= 0.\n\n@param[in]\nalpha\nalpha specifies the scalar alpha. When alpha is\nzero then A is not referenced and A need not be set before\nentry.\n\n@param[in]\nA       device array of device pointers storing each matrix_i A of dimension (lda, k)\nwhen trans is rocblas_operation_none, otherwise of dimension (lda, n).\n\n@param[in]\nlda     [rocblas_int]\nlda specifies the first dimension of A_i.\nif trans = rocblas_operation_none,  lda >= max( 1, n ),\notherwise lda >= max( 1, k ).\n@param[in]\nB       device array of device pointers storing each matrix_i B of dimension (ldb, k)\nwhen trans is rocblas_operation_none, otherwise of dimension (ldb, n).\n@param[in]\nldb     [rocblas_int]\nldb specifies the first dimension of B.\n\nif trans = rocblas_operation_none,  ldb >= max( 1, n ),\notherwise ldb >= max( 1, k ).\n@param[in]\nbeta\nbeta specifies the scalar beta. When beta is\nzero then C need not be set before entry.\n\n@param[in]\nC       device array of device pointers storing each matrix C_i on the GPU.\n\n@param[in]\nldc    [rocblas_int]\nldc specifies the first dimension of C. ldc >= max( 1, n ).\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
+    pub fn rocblas_ssyr2k_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const f32,
+        A: *const *const f32,
+        lda: rocblas_int,
+        B: *const *const f32,
+        ldb: rocblas_int,
+        beta: *const f32,
+        C: *const *mut f32,
+        ldc: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dsyr2k_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const f64,
+        A: *const *const f64,
+        lda: rocblas_int,
+        B: *const *const f64,
+        ldb: rocblas_int,
+        beta: *const f64,
+        C: *const *mut f64,
+        ldc: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_csyr2k_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const *const rocblas_float_complex,
+        lda: rocblas_int,
+        B: *const *const rocblas_float_complex,
+        ldb: rocblas_int,
+        beta: *const rocblas_float_complex,
+        C: *const *mut rocblas_float_complex,
+        ldc: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zsyr2k_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const *const rocblas_double_complex,
+        lda: rocblas_int,
+        B: *const *const rocblas_double_complex,
+        ldb: rocblas_int,
+        beta: *const rocblas_double_complex,
+        C: *const *mut rocblas_double_complex,
+        ldc: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 3 API </b>\n\n\\details\nsyr2k_strided_batched performs a batch of the matrix-matrix operations for a symmetric rank-2k update:\n\nC_i := alpha*(op( A_i )*op( B_i )^T + op( B_i )*op( A_i )^T) + beta*C_i,\n\nwhere  alpha and beta are scalars, op(A_i) and op(B_i) are n by k matrix, and\nC_i is a symmetric n x n matrix stored as either upper or lower.\n\nop( A_i ) = A_i, op( B_i ) = B_i, and A_i and B_i are n by k if trans == rocblas_operation_none\nop( A_i ) = A_i^T, op( B_i ) = B_i^T, and A_i and B_i are k by n if trans == rocblas_operation_transpose\nor for ssyr2k_strided_batched and dsyr2k_strided_batched when trans == rocblas_operation_conjugate_transpose\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  C_i is an upper triangular matrix\n- rocblas_fill_lower:  C_i is a  lower triangular matrix\n\n@param[in]\ntrans  [rocblas_operation]\n- rocblas_operation_transpose:           op( A_i ) = A_i^T, op( B_i ) = B_i^T\n- rocblas_operation_none:                op( A_i ) = A_i, op( B_i ) = B_i\n- rocblas_operation_conjugate_transpose: op( A_i ) = A_i^T, op( B_i ) = B_i^T\n\nrocblas_operation_conjugate_transpose is not supported for complex types in csyr2k_strided_batched and zsyr2k_strided_batched.\n\n@param[in]\nn       [rocblas_int]\nn specifies the number of rows and columns of C_i. n >= 0.\n\n@param[in]\nk       [rocblas_int]\nk specifies the number of columns of op(A). k >= 0.\n\n@param[in]\nalpha\nalpha specifies the scalar alpha. When alpha is\nzero then A is not referenced and A need not be set before\nentry.\n\n@param[in]\nA       Device pointer to the first matrix A_1 on the GPU of dimension (lda, k)\nwhen trans is rocblas_operation_none, otherwise of dimension (lda, n).\n\n@param[in]\nlda     [rocblas_int]\nlda specifies the first dimension of A_i.\n\nif trans = rocblas_operation_none,  lda >= max( 1, n ),\notherwise lda >= max( 1, k ).\n\n@param[in]\nstride_A  [rocblas_stride]\nstride from the start of one matrix (A_i) and the next one (A_i+1)\n\n@param[in]\nB       Device pointer to the first matrix B_1 on the GPU of dimension (ldb, k)\nwhen trans is rocblas_operation_none, otherwise of dimension (ldb, n)\n\n@param[in]\nldb     [rocblas_int]\nldb specifies the first dimension of B_i.\n\nif trans = rocblas_operation_none,  ldb >= max( 1, n ),\notherwise ldb >= max( 1, k ).\n\n@param[in]\nstride_B  [rocblas_stride]\nstride from the start of one matrix (B_i) and the next one (B_i+1)\n\n@param[in]\nbeta\nbeta specifies the scalar beta. When beta is\nzero then C need not be set before entry.\n\n@param[in]\nC       Device pointer to the first matrix C_1 on the GPU.\n\n@param[in]\nldc    [rocblas_int]\nldc specifies the first dimension of C. ldc >= max( 1, n ).\n\n@param[inout]\nstride_C  [rocblas_stride]\nstride from the start of one matrix (C_i) and the next one (C_i+1).\n\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
+    pub fn rocblas_ssyr2k_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const f32,
+        A: *const f32,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        B: *const f32,
+        ldb: rocblas_int,
+        stride_B: rocblas_stride,
+        beta: *const f32,
+        C: *mut f32,
+        ldc: rocblas_int,
+        stride_C: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dsyr2k_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const f64,
+        A: *const f64,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        B: *const f64,
+        ldb: rocblas_int,
+        stride_B: rocblas_stride,
+        beta: *const f64,
+        C: *mut f64,
+        ldc: rocblas_int,
+        stride_C: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_csyr2k_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        B: *const rocblas_float_complex,
+        ldb: rocblas_int,
+        stride_B: rocblas_stride,
+        beta: *const rocblas_float_complex,
+        C: *mut rocblas_float_complex,
+        ldc: rocblas_int,
+        stride_C: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zsyr2k_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        B: *const rocblas_double_complex,
+        ldb: rocblas_int,
+        stride_B: rocblas_stride,
+        beta: *const rocblas_double_complex,
+        C: *mut rocblas_double_complex,
+        ldc: rocblas_int,
+        stride_C: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 3 API </b>\n\n\\details\nsyrkx performs one of the matrix-matrix operations for a symmetric rank-k update:\n\nC := alpha*op( A )*op( B )^T + beta*C,\n\nwhere  alpha and beta are scalars, op(A) and op(B) are n by k matrix, and\nC is a symmetric n x n matrix stored as either upper or lower.\n\nThis routine should only be used when the caller can guarantee that the result of op( A )*op( B )^T will be symmetric.\n\nop( A ) = A, op( B ) = B, and A and B are n by k if trans == rocblas_operation_none\nop( A ) = A^T, op( B ) = B^T,  and A and B are k by n if trans == rocblas_operation_transpose\nor for ssyrkx and dsyrkx when trans == rocblas_operation_conjugate_transpose\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  C is an upper triangular matrix\n- rocblas_fill_lower:  C is a  lower triangular matrix\n\n@param[in]\ntrans  [rocblas_operation]\n- rocblas_operation_transpose:           op( A ) = A^T, op( B ) = B^T\n- rocblas_operation_none:                op( A ) = A, op( B ) = B\n- rocblas_operation_conjugate_transpose: op( A ) = A^T, op( B ) = B^T\n\nrocblas_operation_conjugate_transpose is not supported for complex types in csyrkx and zsyrkx.\n\n@param[in]\nn       [rocblas_int]\nn specifies the number of rows and columns of C. n >= 0.\n\n@param[in]\nk       [rocblas_int]\nk specifies the number of columns of op(A) and op(B). k >= 0.\n\n@param[in]\nalpha\nalpha specifies the scalar alpha. When alpha is\nzero then A is not referenced and A need not be set before\nentry.\n\n@param[in]\nA       pointer storing matrix A on the GPU.\nMatrix dimension is ( lda, k ) when if trans = rocblas_operation_none, otherwise (lda, n)\n\n@param[in]\nlda     [rocblas_int]\nlda specifies the first dimension of A.\n\nif trans = rocblas_operation_none,  lda >= max( 1, n ),\notherwise lda >= max( 1, k ).\n\n@param[in]\nB       pointer storing matrix B on the GPU.\nMatrix dimension is ( ldb, k ) when if trans = rocblas_operation_none, otherwise (ldb, n)\n\n@param[in]\nldb     [rocblas_int]\nldb specifies the first dimension of B.\n\nif trans = rocblas_operation_none,  ldb >= max( 1, n ),\notherwise ldb >= max( 1, k ).\n\n@param[in]\nbeta\nbeta specifies the scalar beta. When beta is\nzero then C need not be set before entry.\n\n@param[in]\nC       pointer storing matrix C on the GPU.\nonly the upper/lower triangular part is accessed.\n\n@param[in]\nldc    [rocblas_int]\nldc specifies the first dimension of C. ldc >= max( 1, n ).\n"]
+    pub fn rocblas_ssyrkx(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const f32,
+        A: *const f32,
+        lda: rocblas_int,
+        B: *const f32,
+        ldb: rocblas_int,
+        beta: *const f32,
+        C: *mut f32,
+        ldc: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dsyrkx(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const f64,
+        A: *const f64,
+        lda: rocblas_int,
+        B: *const f64,
+        ldb: rocblas_int,
+        beta: *const f64,
+        C: *mut f64,
+        ldc: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_csyrkx(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        B: *const rocblas_float_complex,
+        ldb: rocblas_int,
+        beta: *const rocblas_float_complex,
+        C: *mut rocblas_float_complex,
+        ldc: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zsyrkx(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        B: *const rocblas_double_complex,
+        ldb: rocblas_int,
+        beta: *const rocblas_double_complex,
+        C: *mut rocblas_double_complex,
+        ldc: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 3 API </b>\n\n\\details\nsyrkx_batched performs a batch of the matrix-matrix operations for a symmetric rank-k update:\n\nC_i := alpha*op( A_i )*op( B_i )^T + beta*C_i,\n\nwhere  alpha and beta are scalars, op(A_i) and op(B_i) are n by k matrix, and\nC_i is a symmetric n x n matrix stored as either upper or lower.\n\nThis routine should only be used when the caller can guarantee that the result of op( A_i )*op( B_i )^T will be symmetric.\n\nop( A_i ) = A_i, op( B_i ) = B_i, and A_i and B_i are n by k if trans == rocblas_operation_none\nop( A_i ) = A_i^T, op( B_i ) = B_i^T,  and A_i and B_i are k by n if trans == rocblas_operation_transpose\nor for ssyrkx_batched and dsyrkx_batched when trans == rocblas_operation_conjugate_transpose\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  C_i is an upper triangular matrix\n- rocblas_fill_lower:  C_i is a  lower triangular matrix\n\n@param[in]\ntrans  [rocblas_operation]\n- rocblas_operation_transpose:           op( A_i ) = A_i^T, op( B_i ) = B_i^T\n- rocblas_operation_none:                op( A_i ) = A_i, op( B_i ) = B_i\n- rocblas_operation_conjugate_transpose: op( A_i ) = A_i^T, op( B_i ) = B_i^T\n\nrocblas_operation_conjugate_transpose is not supported for complex types in csyrkx_batched and zsyrkx_batched.\n\n@param[in]\nn       [rocblas_int]\nn specifies the number of rows and columns of C_i. n >= 0.\n\n@param[in]\nk       [rocblas_int]\nk specifies the number of columns of op(A). k >= 0.\n\n@param[in]\nalpha\nalpha specifies the scalar alpha. When alpha is\nzero then A is not referenced and A need not be set before\nentry.\n\n@param[in]\nA       device array of device pointers storing each matrix_i A of dimension (lda, k)\nwhen trans is rocblas_operation_none, otherwise of dimension (lda, n)\n\n@param[in]\nlda     [rocblas_int]\nlda specifies the first dimension of A_i.\n\nif trans = rocblas_operation_none,  lda >= max( 1, n ),\notherwise lda >= max( 1, k ).\n\n@param[in]\nB       device array of device pointers storing each matrix_i B of dimension (ldb, k)\nwhen trans is rocblas_operation_none, otherwise of dimension (ldb, n)\n\n@param[in]\nldb     [rocblas_int]\nldb specifies the first dimension of B.\n\nif trans = rocblas_operation_none,  ldb >= max( 1, n ),\notherwise ldb >= max( 1, k ).\n\n@param[in]\nbeta\nbeta specifies the scalar beta. When beta is\nzero then C need not be set before entry.\n\n@param[in]\nC       device array of device pointers storing each matrix C_i on the GPU.\nonly the upper/lower triangular part of each C_i is accessed.\n\n@param[in]\nldc    [rocblas_int]\nldc specifies the first dimension of C. ldc >= max( 1, n ).\n\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
+    pub fn rocblas_ssyrkx_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const f32,
+        A: *const *const f32,
+        lda: rocblas_int,
+        B: *const *const f32,
+        ldb: rocblas_int,
+        beta: *const f32,
+        C: *const *mut f32,
+        ldc: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dsyrkx_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const f64,
+        A: *const *const f64,
+        lda: rocblas_int,
+        B: *const *const f64,
+        ldb: rocblas_int,
+        beta: *const f64,
+        C: *const *mut f64,
+        ldc: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_csyrkx_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const *const rocblas_float_complex,
+        lda: rocblas_int,
+        B: *const *const rocblas_float_complex,
+        ldb: rocblas_int,
+        beta: *const rocblas_float_complex,
+        C: *const *mut rocblas_float_complex,
+        ldc: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zsyrkx_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const *const rocblas_double_complex,
+        lda: rocblas_int,
+        B: *const *const rocblas_double_complex,
+        ldb: rocblas_int,
+        beta: *const rocblas_double_complex,
+        C: *const *mut rocblas_double_complex,
+        ldc: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 3 API </b>\n\n\\details\nsyrkx_strided_batched performs a batch of the matrix-matrix operations for a symmetric rank-k update:\n\nC_i := alpha*op( A_i )*op( B_i )^T + beta*C_i,\n\nwhere  alpha and beta are scalars, op(A_i) and op(B_i) are n by k matrix, and\nC_i is a symmetric n x n matrix stored as either upper or lower.\n\nThis routine should only be used when the caller can guarantee that the result of op( A_i )*op( B_i )^T will be symmetric.\n\nop( A_i ) = A_i, op( B_i ) = B_i, and A_i and B_i are n by k if trans == rocblas_operation_none\nop( A_i ) = A_i^T, op( B_i ) = B_i^T,  and A_i and B_i are k by n if trans == rocblas_operation_transpose\nor for ssyrkx_strided_batched and dsyrkx_strided_batched when trans == rocblas_operation_conjugate_transpose\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  C_i is an upper triangular matrix\n- rocblas_fill_lower:  C_i is a  lower triangular matrix\n\n@param[in]\ntrans  [rocblas_operation]\n- rocblas_operation_transpose:           op( A_i ) = A_i^T, op( B_i ) = B_i^T\n- rocblas_operation_none:                op( A_i ) = A_i, op( B_i ) = B_i\n- rocblas_operation_conjugate_transpose: op( A_i ) = A_i^T, op( B_i ) = B_i^T\n\nrocblas_operation_conjugate_transpose is not supported for complex types in csyrkx_strided_batched and zsyrkx_strided_batched.\n\n@param[in]\nn       [rocblas_int]\nn specifies the number of rows and columns of C_i. n >= 0.\n\n@param[in]\nk       [rocblas_int]\nk specifies the number of columns of op(A). k >= 0.\n\n@param[in]\nalpha\nalpha specifies the scalar alpha. When alpha is\nzero then A is not referenced and A need not be set before\nentry.\n\n@param[in]\nA       Device pointer to the first matrix A_1 on the GPU of dimension (lda, k)\nwhen trans is rocblas_operation_none, otherwise of dimension (lda, n)\n\n@param[in]\nlda     [rocblas_int]\nlda specifies the first dimension of A_i.\n\nif trans = rocblas_operation_none,  lda >= max( 1, n ),\notherwise lda >= max( 1, k ).\n\n@param[in]\nstride_A  [rocblas_stride]\nstride from the start of one matrix (A_i) and the next one (A_i+1).\n\n@param[in]\nB       Device pointer to the first matrix B_1 on the GPU of dimension (ldb, k)\nwhen trans is rocblas_operation_none, otherwise of dimension (ldb, n).\n\n@param[in]\nldb     [rocblas_int]\nldb specifies the first dimension of B_i.\n\nif trans = rocblas_operation_none,  ldb >= max( 1, n ),\notherwise ldb >= max( 1, k ).\n\n@param[in]\nstride_B  [rocblas_stride]\nstride from the start of one matrix (B_i) and the next one (B_i+1).\n\n@param[in]\nbeta\nbeta specifies the scalar beta. When beta is\nzero then C need not be set before entry.\n\n@param[in]\nC       Device pointer to the first matrix C_1 on the GPU.\nonly the upper/lower triangular part of each C_i is accessed.\n\n@param[in]\nldc    [rocblas_int]\nldc specifies the first dimension of C. ldc >= max( 1, n ).\n\n@param[inout]\nstride_C  [rocblas_stride]\nstride from the start of one matrix (C_i) and the next one (C_i+1).\n\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
+    pub fn rocblas_ssyrkx_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const f32,
+        A: *const f32,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        B: *const f32,
+        ldb: rocblas_int,
+        stride_B: rocblas_stride,
+        beta: *const f32,
+        C: *mut f32,
+        ldc: rocblas_int,
+        stride_C: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dsyrkx_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const f64,
+        A: *const f64,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        B: *const f64,
+        ldb: rocblas_int,
+        stride_B: rocblas_stride,
+        beta: *const f64,
+        C: *mut f64,
+        ldc: rocblas_int,
+        stride_C: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_csyrkx_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        B: *const rocblas_float_complex,
+        ldb: rocblas_int,
+        stride_B: rocblas_stride,
+        beta: *const rocblas_float_complex,
+        C: *mut rocblas_float_complex,
+        ldc: rocblas_int,
+        stride_C: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zsyrkx_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        B: *const rocblas_double_complex,
+        ldb: rocblas_int,
+        stride_B: rocblas_stride,
+        beta: *const rocblas_double_complex,
+        C: *mut rocblas_double_complex,
+        ldc: rocblas_int,
+        stride_C: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 3 API </b>\n\n\\details\ntrmm performs one of the matrix-matrix operations:\n\nB := alpha*op( A )*B,   or\nB := alpha*B*op( A ),\n\nwhere  alpha  is a scalar,  B  is an m by n matrix,  A  is a unit, or\nnon-unit,  upper or lower triangular matrix  and  op( A )  is one  of\n\nop( A ) = A   or\nop( A ) = A^T   or\nop( A ) = A^H.\n\nWhen uplo == rocblas_fill_upper the  leading  k by k\nupper triangular part of the array  A must contain the upper\ntriangular matrix and the strictly lower triangular part of\nA is not referenced. Here k is m when side == rocblas_side_left\nand is n when side == rocblas_side_right.\n\nWhen uplo == rocblas_fill_lower the  leading  k by k\nlower triangular part of the array  A must contain the lower\ntriangular matrix  and the strictly upper triangular part of\nA is not referenced. Here k is m when  side == rocblas_side_left\nand is n when side == rocblas_side_right.\n\nNote that when  diag == rocblas_diagonal_unit  the diagonal elements of\nA  are not referenced either,  but are assumed to be  unity.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nside    [rocblas_side]\nSpecifies whether op(A) multiplies B from the left or right as follows:\n- rocblas_side_left:       B := alpha*op( A )*B\n- rocblas_side_right:      B := alpha*B*op( A )\n\n@param[in]\nuplo    [rocblas_fill]\nSpecifies whether the matrix A is an upper or lower triangular matrix as follows:\n- rocblas_fill_upper:  A is an upper triangular matrix.\n- rocblas_fill_lower:  A is a  lower triangular matrix.\n\n@param[in]\ntransA  [rocblas_operation]\nSpecifies the form of op(A) to be used in the matrix multiplication as follows:\n- rocblas_operation_none:    op(A) = A\n- rocblas_operation_transpose:      op(A) = A^T\n- rocblas_operation_conjugate_transpose:  op(A) = A^H\n\n@param[in]\ndiag    [rocblas_diagonal]\nSpecifies whether or not A is unit triangular as follows:\n- rocblas_diagonal_unit:      A is assumed to be unit triangular.\n- rocblas_diagonal_non_unit:  A is not assumed to be unit triangular.\n\n@param[in]\nm       [rocblas_int]\nm specifies the number of rows of B. m >= 0.\n\n@param[in]\nn       [rocblas_int]\nn specifies the number of columns of B. n >= 0.\n\n@param[in]\nalpha\nalpha specifies the scalar alpha. When alpha is\nzero then A is not referenced and B need not be set before\nentry.\n\n@param[in]\nA       Device pointer to matrix A on the GPU.\nA has dimension ( lda, k ), where k is m\nwhen  side == rocblas_side_left  and\nis  n  when  side == rocblas_side_right.\n\n@param[in]\nlda     [rocblas_int]\nlda specifies the first dimension of A.\n\nif side == rocblas_side_left,  lda >= max( 1, m ),\nif side == rocblas_side_right, lda >= max( 1, n ).\n\n@param[inout]\nB       Device pointer to the first matrix B_0 on the GPU.\nOn entry,  the leading  m by n part of the array  B must\ncontain the matrix  B,  and  on exit  is overwritten  by the\ntransformed matrix.\n\n@param[in]\nldb    [rocblas_int]\nldb specifies the first dimension of B. ldb >= max( 1, m ).\n"]
+    pub fn rocblas_strmm(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const f32,
+        A: *const f32,
+        lda: rocblas_int,
+        B: *mut f32,
+        ldb: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dtrmm(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const f64,
+        A: *const f64,
+        lda: rocblas_int,
+        B: *mut f64,
+        ldb: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ctrmm(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        B: *mut rocblas_float_complex,
+        ldb: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ztrmm(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        B: *mut rocblas_double_complex,
+        ldb: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 3 API </b>\n\n\\details\ntrmm_batched performs one of the batched matrix-matrix operations:\n\nB_i := alpha*op( A_i )*B_i,   or\nB_i := alpha*B_i*op( A_i )  for i = 0, 1, ... batch_count -1,\n\nwhere  alpha  is a scalar,  B_i  is an m by n matrix,  A_i  is a unit, or\nnon-unit,  upper or lower triangular matrix  and  op( A_i )  is one  of\n\nop( A_i ) = A_i   or   op( A_i ) = A_i^T   or   op( A_i ) = A_i^H.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nside    [rocblas_side]\nSpecifies whether op(A_i) multiplies B_i from the left or right as follows:\n- rocblas_side_left:       B_i := alpha*op( A_i )*B_i\n- rocblas_side_right:      B_i := alpha*B_i*op( A_i )\n\n@param[in]\nuplo    [rocblas_fill]\nSpecifies whether the matrix A is an upper or lower triangular matrix as follows:\n- rocblas_fill_upper:  A is an upper triangular matrix.\n- rocblas_fill_lower:  A is a  lower triangular matrix.\n\n@param[in]\ntransA  [rocblas_operation]\nSpecifies the form of op(A_i) to be used in the matrix multiplication as follows:\n- rocblas_operation_none:    op(A_i) = A_i\n- rocblas_operation_transpose:      op(A_i) = A_i^T\n- rocblas_operation_conjugate_transpose:  op(A_i) = A_i^H\n\n@param[in]\ndiag    [rocblas_diagonal]\nSpecifies whether or not A_i is unit triangular as follows:\n- rocblas_diagonal_unit:      A_i is assumed to be unit triangular.\n- rocblas_diagonal_non_unit:  A_i is not assumed to be unit triangular.\n\n@param[in]\nm       [rocblas_int]\nm specifies the number of rows of B_i. m >= 0.\n\n@param[in]\nn       [rocblas_int]\nn specifies the number of columns of B_i. n >= 0.\n\n@param[in]\nalpha\nalpha specifies the scalar alpha. When alpha is\nzero then A_i is not referenced and B_i need not be set before\nentry.\n\n@param[in]\nA       Device array of device pointers storing each matrix A_i on the GPU.\nEach A_i is of dimension ( lda, k ), where k is m\nwhen  side == rocblas_side_left  and\nis  n  when  side == rocblas_side_right.\n\nWhen uplo == rocblas_fill_upper the  leading  k by k\nupper triangular part of the array  A must contain the upper\ntriangular matrix  and the strictly lower triangular part of\nA is not referenced.\n\nWhen uplo == rocblas_fill_lower the  leading  k by k\nlower triangular part of the array  A must contain the lower\ntriangular matrix  and the strictly upper triangular part of\nA is not referenced.\n\nNote that when  diag == rocblas_diagonal_unit  the diagonal elements of\nA_i  are not referenced either,  but are assumed to be  unity.\n\n@param[in]\nlda     [rocblas_int]\nlda specifies the first dimension of A.\n\nif side == rocblas_side_left,  lda >= max( 1, m ),\nif side == rocblas_side_right, lda >= max( 1, n ).\n\n@param[inout]\nB       device array of device pointers storing each matrix B_i on the GPU.\nOn entry,  the leading  m by n part of the array  B_i must\ncontain the matrix  B_i,  and  on exit  is overwritten  by the\ntransformed matrix.\n\n@param[in]\nldb    [rocblas_int]\nldb specifies the first dimension of B_i. ldb >= max( 1, m ).\n\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances i in the batch."]
+    pub fn rocblas_strmm_batched(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const f32,
+        A: *const *const f32,
+        lda: rocblas_int,
+        B: *const *mut f32,
+        ldb: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dtrmm_batched(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const f64,
+        A: *const *const f64,
+        lda: rocblas_int,
+        B: *const *mut f64,
+        ldb: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ctrmm_batched(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const *const rocblas_float_complex,
+        lda: rocblas_int,
+        B: *const *mut rocblas_float_complex,
+        ldb: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ztrmm_batched(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const *const rocblas_double_complex,
+        lda: rocblas_int,
+        B: *const *mut rocblas_double_complex,
+        ldb: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 3 API </b>\n\n\\details\ntrmm_strided_batched performs one of the strided_batched matrix-matrix operations:\n\nB_i := alpha*op( A_i )*B_i,   or\nB_i := alpha*B_i*op( A_i )  for i = 0, 1, ... batch_count -1,\n\nwhere  alpha  is a scalar,  B_i  is an m by n matrix,  A_i  is a unit, or\nnon-unit,  upper or lower triangular matrix  and  op( A_i )  is one  of\n\nop( A_i ) = A_i   or\nop( A_i ) = A_i^T   or\nop( A_i ) = A_i^H.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nside    [rocblas_side]\nSpecifies whether op(A_i) multiplies B_i from the left or right as follows:\n- rocblas_side_left:       B_i := alpha*op( A_i )*B_i\n- rocblas_side_right:      B_i := alpha*B_i*op( A_i )\n\n@param[in]\nuplo    [rocblas_fill]\nSpecifies whether the matrix A is an upper or lower triangular matrix as follows:\n- rocblas_fill_upper:  A is an upper triangular matrix\n- rocblas_fill_lower:  A is a  lower triangular matrix\n\n@param[in]\ntransA  [rocblas_operation]\nSpecifies the form of op(A_i) to be used in the matrix multiplication as follows:\n- rocblas_operation_none:    op(A_i) = A_i\n- rocblas_operation_transpose:      op(A_i) = A_i^T\n- rocblas_operation_conjugate_transpose:  op(A_i) = A_i^H\n\n@param[in]\ndiag    [rocblas_diagonal]\nSpecifies whether or not A_i is unit triangular as follows:\n- rocblas_diagonal_unit:      A_i is assumed to be unit triangular.\n- rocblas_diagonal_non_unit:  A_i is not assumed to be unit triangular.\n\n@param[in]\nm       [rocblas_int]\nm specifies the number of rows of B_i. m >= 0.\n\n@param[in]\nn       [rocblas_int]\nn specifies the number of columns of B_i. n >= 0.\n\n@param[in]\nalpha\nalpha specifies the scalar alpha. When alpha is\nzero then A_i is not referenced and B_i need not be set before\nentry.\n\n@param[in]\nA       Device pointer to the first matrix A_0 on the GPU.\nEach A_i is of dimension ( lda, k ), where k is m\nwhen  side == rocblas_side_left  and\nis  n  when  side == rocblas_side_right.\n\nWhen uplo == rocblas_fill_upper the  leading  k by k\nupper triangular part of the array  A must contain the upper\ntriangular matrix  and the strictly lower triangular part of\nA is not referenced.\n\nWhen uplo == rocblas_fill_lower the  leading  k by k\nlower triangular part of the array  A must contain the lower\ntriangular matrix  and the strictly upper triangular part of\nA is not referenced.\n\nNote that when  diag == rocblas_diagonal_unit  the diagonal elements of\nA_i  are not referenced either,  but are assumed to be  unity.\n\n@param[in]\nlda     [rocblas_int]\nlda specifies the first dimension of A.\n\nif side == rocblas_side_left,  lda >= max( 1, m ),\nif side == rocblas_side_right, lda >= max( 1, n ).\n\n@param[in]\nstride_A  [rocblas_stride]\nstride from the start of one matrix (A_i) and the next one (A_i+1).\n\n@param[inout]\nB       Device pointer to the first matrix B_0 on the GPU.\nOn entry,  the leading  m by n part of the array  B_i must\ncontain the matrix  B_i,  and  on exit  is overwritten  by the\ntransformed matrix.\n\n@param[in]\nldb    [rocblas_int]\nldb specifies the first dimension of B_i. ldb >= max( 1, m ).\n\n@param[in]\nstride_B  [rocblas_stride]\nstride from the start of one matrix (B_i) and the next one (B_i+1).\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances i in the batch."]
+    pub fn rocblas_strmm_strided_batched(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const f32,
+        A: *const f32,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        B: *mut f32,
+        ldb: rocblas_int,
+        stride_B: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dtrmm_strided_batched(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const f64,
+        A: *const f64,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        B: *mut f64,
+        ldb: rocblas_int,
+        stride_B: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ctrmm_strided_batched(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        B: *mut rocblas_float_complex,
+        ldb: rocblas_int,
+        stride_B: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ztrmm_strided_batched(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        B: *mut rocblas_double_complex,
+        ldb: rocblas_int,
+        stride_B: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 3 API </b>\n\n\\details\ntrmm_outofplace performs one of the matrix-matrix operations:\n\nC := alpha*op( A )*B,   or\nC := alpha*B*op( A ),\n\nwhere  alpha  is a scalar,  B and C are m by n matrices,  A  is a unit, or\nnon-unit,  upper or lower triangular matrix  and  op( A )  is one  of\n\nop( A ) = A   or\nop( A ) = A^T   or\nop( A ) = A^H.\n\nNote that trmm_outofplace can provide in-place functionality in the same way as trmm\nby passing in the same address for both matrices B and C.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nside    [rocblas_side]\nSpecifies whether op(A) multiplies B from the left or right as follows:\n- rocblas_side_left:       C := alpha*op( A )*B\n- rocblas_side_right:      C := alpha*B*op( A )\n\n@param[in]\nuplo    [rocblas_fill]\nSpecifies whether the matrix A is an upper or lower triangular matrix as follows:\n- rocblas_fill_upper:  A is an upper triangular matrix.\n- rocblas_fill_lower:  A is a  lower triangular matrix.\n\n@param[in]\ntransA  [rocblas_operation]\nSpecifies the form of op(A) to be used in the matrix multiplication as follows:\n- rocblas_operation_none:    op(A) = A\n- rocblas_operation_transpose:      op(A) = A^T\n- rocblas_operation_conjugate_transpose:  op(A) = A^H\n\n@param[in]\ndiag    [rocblas_diagonal]\nSpecifies whether or not A is unit triangular as follows:\n- rocblas_diagonal_unit:      A is assumed to be unit triangular.\n- rocblas_diagonal_non_unit:  A is not assumed to be unit triangular.\n\n@param[in]\nm       [rocblas_int]\nm specifies the number of rows of B. m >= 0.\n\n@param[in]\nn       [rocblas_int]\nn specifies the number of columns of B. n >= 0.\n\n@param[in]\nalpha\nalpha specifies the scalar alpha. When alpha is\nzero then A is not referenced and B need not be set before\nentry.\n\n@param[in]\nA       Device pointer to matrix A on the GPU.\nA has dimension ( lda, k ), where k is m\nwhen  side == rocblas_side_left  and\nis  n  when  side == rocblas_side_right.\n\nWhen uplo == rocblas_fill_upper the  leading  k by k\nupper triangular part of the array  A must contain the upper\ntriangular matrix  and the strictly lower triangular part of\nA is not referenced.\n\nWhen uplo == rocblas_fill_lower the  leading  k by k\nlower triangular part of the array  A must contain the lower\ntriangular matrix  and the strictly upper triangular part of\nA is not referenced.\n\nNote that when  diag == rocblas_diagonal_unit  the diagonal elements of\nA  are not referenced either,  but are assumed to be  unity.\n\n@param[in]\nlda     [rocblas_int]\nlda specifies the first dimension of A.\n\nif side == rocblas_side_left,  lda >= max( 1, m ),\nif side == rocblas_side_right, lda >= max( 1, n ).\n\n@param[in]\nB       Device pointer to the matrix B on the GPU.\n\n@param[in]\nldb    [rocblas_int]\nldb specifies the first dimension of B. ldb >= max( 1, m ).\n\n@param[out]\nC      Device pointer to the matrix C on the GPU.\n\n@param[in]\nldc   [rocblas_int]\nldc specifies the first dimension of C. ldc >= max( 1, m).\nIf B and C pointers are to the same matrix then ldc must equal ldb or\nrocblas_status_invalid_size will be returned.\n"]
+    pub fn rocblas_strmm_outofplace(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const f32,
+        A: *const f32,
+        lda: rocblas_int,
+        B: *const f32,
+        ldb: rocblas_int,
+        C: *mut f32,
+        ldc: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dtrmm_outofplace(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const f64,
+        A: *const f64,
+        lda: rocblas_int,
+        B: *const f64,
+        ldb: rocblas_int,
+        C: *mut f64,
+        ldc: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ctrmm_outofplace(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        B: *const rocblas_float_complex,
+        ldb: rocblas_int,
+        C: *mut rocblas_float_complex,
+        ldc: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ztrmm_outofplace(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        B: *const rocblas_double_complex,
+        ldb: rocblas_int,
+        C: *mut rocblas_double_complex,
+        ldc: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 3 API </b>\n\n\\details\ntrmm_outofplace_batched performs one of the batched matrix-matrix operations:\n\nC_i := alpha*op( A_i )*B_i,   or\nC_i := alpha*B_i*op( A_i )  for i = 0, 1, ... batch_count -1,\n\nwhere  alpha  is a scalar,  B_i  is an m by n matrix,  A_i  is a unit, or\nnon-unit,  upper or lower triangular matrix  and  op( A_i )  is one  of\n\nop( A_i ) = A_i   or\nop( A_i ) = A_i^T   or\nop( A_i ) = A_i^H.\n\nNote that trmm_outofplace_batched can provide in-place functionality in the same way as trmm_batched\nby passing in the same address for both matrices B and C.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nside    [rocblas_side]\nSpecifies whether op(A_i) multiplies B_i from the left or right as follows:\n- rocblas_side_left:       C_i := alpha*op( A_i )*B_i\n- rocblas_side_right:      C_i := alpha*B_i*op( A_i )\n\n@param[in]\nuplo    [rocblas_fill]\nSpecifies whether the matrix A is an upper or lower triangular matrix as follows:\n- rocblas_fill_upper:  A is an upper triangular matrix.\n- rocblas_fill_lower:  A is a  lower triangular matrix.\n\n@param[in]\ntransA  [rocblas_operation]\nSpecifies the form of op(A_i) to be used in the matrix multiplication as follows:\n- rocblas_operation_none:    op(A_i) = A_i\n- rocblas_operation_transpose:      op(A_i) = A_i^T\n- rocblas_operation_conjugate_transpose:  op(A_i) = A_i^H\n\n@param[in]\ndiag    [rocblas_diagonal]\nSpecifies whether or not A_i is unit triangular as follows:\n- rocblas_diagonal_unit:      A_i is assumed to be unit triangular.\n- rocblas_diagonal_non_unit:  A_i is not assumed to be unit triangular.\n\n@param[in]\nm       [rocblas_int]\nm specifies the number of rows of B_i. m >= 0.\n\n@param[in]\nn       [rocblas_int]\nn specifies the number of columns of B_i. n >= 0.\n\n@param[in]\nalpha\nalpha specifies the scalar alpha. When alpha is\nzero then A_i is not referenced and B_i need not be set before\nentry.\n\n@param[in]\nA       Device array of device pointers storing each matrix A_i on the GPU.\nEach A_i is of dimension ( lda, k ), where k is m\nwhen  side == rocblas_side_left  and\nis  n  when  side == rocblas_side_right.\n\nWhen uplo == rocblas_fill_upper the  leading  k by k\nupper triangular part of the array  A must contain the upper\ntriangular matrix  and the strictly lower triangular part of\nA is not referenced.\n\nWhen uplo == rocblas_fill_lower the  leading  k by k\nlower triangular part of the array  A must contain the lower\ntriangular matrix  and the strictly upper triangular part of\nA is not referenced.\n\nNote that when  diag == rocblas_diagonal_unit  the diagonal elements of\nA_i  are not referenced either,  but are assumed to be  unity.\n\n@param[in]\nlda     [rocblas_int]\nlda specifies the first dimension of A.\n\nif side == rocblas_side_left,  lda >= max( 1, m ),\nif side == rocblas_side_right, lda >= max( 1, n ).\n\n@param[in]\nB       device array of device pointers storing each matrix B_i on the GPU.\n\n@param[in]\nldb    [rocblas_int]\nldb specifies the first dimension of B_i. ldb >= max( 1, m ).\n\n@param[out]\nC      device array of device pointers storing each matrix C_i on the GPU.\n\n@param[in]\nldc   [rocblas_int]\nldc specifies the first dimension of C. ldc >= max( 1, m).\nIf B and C pointers are to the same matrix then ldc must equal ldb or\nrocblas_status_invalid_size will be returned.\n\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances i in the batch."]
+    pub fn rocblas_strmm_outofplace_batched(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const f32,
+        A: *const *const f32,
+        lda: rocblas_int,
+        B: *const *const f32,
+        ldb: rocblas_int,
+        C: *const *mut f32,
+        ldc: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dtrmm_outofplace_batched(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const f64,
+        A: *const *const f64,
+        lda: rocblas_int,
+        B: *const *const f64,
+        ldb: rocblas_int,
+        C: *const *mut f64,
+        ldc: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ctrmm_outofplace_batched(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const *const rocblas_float_complex,
+        lda: rocblas_int,
+        B: *const *const rocblas_float_complex,
+        ldb: rocblas_int,
+        C: *const *mut rocblas_float_complex,
+        ldc: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ztrmm_outofplace_batched(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const *const rocblas_double_complex,
+        lda: rocblas_int,
+        B: *const *const rocblas_double_complex,
+        ldb: rocblas_int,
+        C: *const *mut rocblas_double_complex,
+        ldc: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 3 API </b>\n\n\\details\ntrmm_outofplace_strided_batched performs one of the strided_batched matrix-matrix operations:\n\nC_i := alpha*op( A_i )*B_i,   or\nC_i := alpha*B_i*op( A_i )  for i = 0, 1, ... batch_count -1,\n\nwhere  alpha  is a scalar,  B_i  is an m by n matrix,  A_i  is a unit, or\nnon-unit,  upper or lower triangular matrix  and  op( A_i )  is one  of\n\nop( A_i ) = A_i   or\nop( A_i ) = A_i^T   or\nop( A_i ) = A_i^H.\n\nNote that trmm_outofplace_strided_batched can provide in-place functionality in the same way as trmm_strided_batched\nby passing in the same address for both matrices B and C.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nside    [rocblas_side]\nSpecifies whether op(A_i) multiplies B_i from the left or right as follows:\n- rocblas_side_left:       C_i := alpha*op( A_i )*B_i\n- rocblas_side_right:      C_i := alpha*B_i*op( A_i )\n\n@param[in]\nuplo    [rocblas_fill]\nSpecifies whether the matrix A is an upper or lower triangular matrix as follows:\n- rocblas_fill_upper:  A is an upper triangular matrix.\n- rocblas_fill_lower:  A is a  lower triangular matrix.\n\n@param[in]\ntransA  [rocblas_operation]\nSpecifies the form of op(A_i) to be used in the matrix multiplication as follows:\n- rocblas_operation_none:    op(A_i) = A_i\n- rocblas_operation_transpose:      op(A_i) = A_i^T\n- rocblas_operation_conjugate_transpose:  op(A_i) = A_i^H\n\n@param[in]\ndiag    [rocblas_diagonal]\nSpecifies whether or not A_i is unit triangular as follows:\n- rocblas_diagonal_unit:      A_i is assumed to be unit triangular.\n- rocblas_diagonal_non_unit:  A_i is not assumed to be unit triangular.\n\n@param[in]\nm       [rocblas_int]\nm specifies the number of rows of B_i. m >= 0.\n\n@param[in]\nn       [rocblas_int]\nn specifies the number of columns of B_i. n >= 0.\n\n@param[in]\nalpha\nalpha specifies the scalar alpha. When alpha is\nzero then A_i is not referenced and B_i need not be set before\nentry.\n\n@param[in]\nA       Device pointer to the first matrix A_0 on the GPU.\nEach A_i is of dimension ( lda, k ), where k is m\nwhen  side == rocblas_side_left  and\nis  n  when  side == rocblas_side_right.\n\nWhen uplo == rocblas_fill_upper the  leading  k by k\nupper triangular part of the array  A must contain the upper\ntriangular matrix  and the strictly lower triangular part of\nA is not referenced.\n\nWhen uplo == rocblas_fill_lower the  leading  k by k\nlower triangular part of the array  A must contain the lower\ntriangular matrix  and the strictly upper triangular part of\nA is not referenced.\n\nNote that when  diag == rocblas_diagonal_unit  the diagonal elements of\nA_i  are not referenced either,  but are assumed to be  unity.\n\n@param[in]\nlda     [rocblas_int]\nlda specifies the first dimension of A.\n\nif side == rocblas_side_left,  lda >= max( 1, m ),\nif side == rocblas_side_right, lda >= max( 1, n ).\n\n@param[in]\nstride_A  [rocblas_stride]\nstride from the start of one matrix (A_i) and the next one (A_i+1).\n\n@param[in]\nB       Device pointer to the first matrix B_0 on the GPU.\n\n@param[in]\nldb    [rocblas_int]\nldb specifies the first dimension of B_i. ldb >= max( 1, m ).\n\n@param[in]\nstride_B  [rocblas_stride]\nstride from the start of one matrix (B_i) and the next one (B_i+1).\n\n@param[out]\nC      Device pointer to the first matrix C_0 on the GPU.\n\n@param[in]\nldc   [rocblas_int]\nldc specifies the first dimension of C_i. ldc >= max( 1, m).\nIf B and C pointers are to the same matrix then ldc must equal ldb or\nrocblas_status_invalid_size will be returned.\n\n@param[in]\nstride_C  [rocblas_stride]\nstride from the start of one matrix (C_i) and the next one (C_i+1).\n\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances i in the batch."]
+    pub fn rocblas_strmm_outofplace_strided_batched(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const f32,
+        A: *const f32,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        B: *const f32,
+        ldb: rocblas_int,
+        stride_B: rocblas_stride,
+        C: *mut f32,
+        ldc: rocblas_int,
+        stride_C: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dtrmm_outofplace_strided_batched(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const f64,
+        A: *const f64,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        B: *const f64,
+        ldb: rocblas_int,
+        stride_B: rocblas_stride,
+        C: *mut f64,
+        ldc: rocblas_int,
+        stride_C: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ctrmm_outofplace_strided_batched(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        B: *const rocblas_float_complex,
+        ldb: rocblas_int,
+        stride_B: rocblas_stride,
+        C: *mut rocblas_float_complex,
+        ldc: rocblas_int,
+        stride_C: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ztrmm_outofplace_strided_batched(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        B: *const rocblas_double_complex,
+        ldb: rocblas_int,
+        stride_B: rocblas_stride,
+        C: *mut rocblas_double_complex,
+        ldc: rocblas_int,
+        stride_C: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 3 API </b>\n\n\\details\ntrtri  compute the inverse of a matrix A, namely, invA\nand write the result into invA;\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\nspecifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'\n\nif rocblas_fill_upper, the lower part of A is not referenced\nif rocblas_fill_lower, the upper part of A is not referenced\n@param[in]\ndiag      [rocblas_diagonal]\n- 'rocblas_diagonal_non_unit', A is non-unit triangular;\n- 'rocblas_diagonal_unit', A is unit triangular;\n@param[in]\nn         [rocblas_int]\nsize of matrix A and invA.\n@param[in]\nA         device pointer storing matrix A.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of A.\n@param[out]\ninvA      device pointer storing matrix invA.\n@param[in]\nldinvA    [rocblas_int]\nspecifies the leading dimension of invA."]
+    pub fn rocblas_strtri(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        A: *const f32,
+        lda: rocblas_int,
+        invA: *mut f32,
+        ldinvA: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dtrtri(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        A: *const f64,
+        lda: rocblas_int,
+        invA: *mut f64,
+        ldinvA: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ctrtri(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        invA: *mut rocblas_float_complex,
+        ldinvA: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ztrtri(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        invA: *mut rocblas_double_complex,
+        ldinvA: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 3 API </b>\n\n\\details\ntrtri_batched  compute the inverse of A_i and write into invA_i where\nA_i and invA_i are the i-th matrices in the batch,\nfor i = 1, ..., batch_count.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\nspecifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'\n@param[in]\ndiag      [rocblas_diagonal]\n- 'rocblas_diagonal_non_unit', A is non-unit triangular;\n- 'rocblas_diagonal_unit', A is unit triangular;\n@param[in]\nn         [rocblas_int]\n@param[in]\nA         device array of device pointers storing each matrix A_i.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of each A_i.\n@param[out]\ninvA      device array of device pointers storing the inverse of each matrix A_i.\nPartial inplace operation is supported. See below:\n-If UPLO = 'U', the leading N-by-N upper triangular part of the invA will store\nthe inverse of the upper triangular matrix, and the strictly lower\ntriangular part of invA is cleared.\n- If UPLO = 'L', the leading N-by-N lower triangular part of the invA will store\nthe inverse of the lower triangular matrix, and the strictly upper\ntriangular part of invA is cleared.\n@param[in]\nldinvA    [rocblas_int]\nspecifies the leading dimension of each invA_i.\n@param[in]\nbatch_count [rocblas_int]\nnumbers of matrices in the batch."]
+    pub fn rocblas_strtri_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        A: *const *const f32,
+        lda: rocblas_int,
+        invA: *const *mut f32,
+        ldinvA: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dtrtri_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        A: *const *const f64,
+        lda: rocblas_int,
+        invA: *const *mut f64,
+        ldinvA: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ctrtri_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        A: *const *const rocblas_float_complex,
+        lda: rocblas_int,
+        invA: *const *mut rocblas_float_complex,
+        ldinvA: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ztrtri_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        A: *const *const rocblas_double_complex,
+        lda: rocblas_int,
+        invA: *const *mut rocblas_double_complex,
+        ldinvA: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 3 API </b>\n\n\\details\ntrtri_strided_batched compute the inverse of A_i and write into invA_i where\nA_i and invA_i are the i-th matrices in the batch,\nfor i = 1, ..., batch_count.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nuplo      [rocblas_fill]\nspecifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'\n@param[in]\ndiag      [rocblas_diagonal]\n- 'rocblas_diagonal_non_unit', A is non-unit triangular;\n- 'rocblas_diagonal_unit', A is unit triangular;\n@param[in]\nn         [rocblas_int]\n@param[in]\nA         device pointer pointing to address of first matrix A_1.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of each A.\n@param[in]\nstride_a  [rocblas_stride]\n\"batch stride a\": stride from the start of one A_i matrix to the next A_(i + 1).\n@param[out]\ninvA      device pointer storing the inverses of each matrix A_i.\nPartial inplace operation is supported. See below:\n\n- If UPLO = 'U', the leading N-by-N upper triangular part of the invA will store\nthe inverse of the upper triangular matrix, and the strictly lower\ntriangular part of invA is cleared.\n\n- If UPLO = 'L', the leading N-by-N lower triangular part of the invA will store\nthe inverse of the lower triangular matrix, and the strictly upper\ntriangular part of invA is cleared.\n@param[in]\nldinvA    [rocblas_int]\nspecifies the leading dimension of each invA_i.\n@param[in]\nstride_invA  [rocblas_stride]\n\"batch stride invA\": stride from the start of one invA_i matrix to the next invA_(i + 1).\n@param[in]\nbatch_count  [rocblas_int]\nnumbers of matrices in the batch."]
+    pub fn rocblas_strtri_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        A: *const f32,
+        lda: rocblas_int,
+        stride_a: rocblas_stride,
+        invA: *mut f32,
+        ldinvA: rocblas_int,
+        stride_invA: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dtrtri_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        A: *const f64,
+        lda: rocblas_int,
+        stride_a: rocblas_stride,
+        invA: *mut f64,
+        ldinvA: rocblas_int,
+        stride_invA: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ctrtri_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        stride_a: rocblas_stride,
+        invA: *mut rocblas_float_complex,
+        ldinvA: rocblas_int,
+        stride_invA: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ztrtri_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        stride_a: rocblas_stride,
+        invA: *mut rocblas_double_complex,
+        ldinvA: rocblas_int,
+        stride_invA: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 3 API </b>\n\n\\details\ntrsm solves:\n\nop(A)*X = alpha*B or  X*op(A) = alpha*B,\n\nwhere alpha is a scalar, X and B are m by n matrices,\n\nA is triangular matrix and op(A) is one of\n\nop( A ) = A   or   op( A ) = A^T   or   op( A ) = A^H.\n\nThe matrix X is overwritten on B.\n\nNote about memory allocation:\nWhen trsm is launched with a k evenly divisible by the internal block size of 128,\nand is no larger than 10 of these blocks, the API takes advantage of utilizing pre-allocated\nmemory found in the handle to increase overall performance. This memory can be managed by using\nthe environment variable WORKBUF_TRSM_B_CHNK. When this variable is not set the device memory\nused for temporary storage will default to 1 MB and may result in chunking, which in turn may\nreduce performance. Under these circumstances it is recommended that WORKBUF_TRSM_B_CHNK be set\nto the desired chunk of right hand sides to be used at a time\n(where k is m when rocblas_side_left and is n when rocblas_side_right).\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nside    [rocblas_side]\n- rocblas_side_left:       op(A)*X = alpha*B\n- rocblas_side_right:      X*op(A) = alpha*B\n\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  A is an upper triangular matrix.\n- rocblas_fill_lower:  A is a  lower triangular matrix.\n\n@param[in]\ntransA  [rocblas_operation]\n- transB:    op(A) = A.\n- rocblas_operation_transpose:      op(A) = A^T\n- rocblas_operation_conjugate_transpose:  op(A) = A^H\n\n@param[in]\ndiag    [rocblas_diagonal]\n- rocblas_diagonal_unit:     A is assumed to be unit triangular.\n- rocblas_diagonal_non_unit:  A is not assumed to be unit triangular.\n\n@param[in]\nm       [rocblas_int]\nm specifies the number of rows of B. m >= 0.\n\n@param[in]\nn       [rocblas_int]\nn specifies the number of columns of B. n >= 0.\n\n@param[in]\nalpha\ndevice pointer or host pointer specifying the scalar alpha. When alpha is\n&zero then A is not referenced and B need not be set before\nentry.\n\n@param[in]\nA       device pointer storing matrix A.\nof dimension ( lda, k ), where k is m\nwhen  rocblas_side_left  and\nis  n  when  rocblas_side_right\nonly the upper/lower triangular part is accessed.\n\n@param[in]\nlda     [rocblas_int]\nlda specifies the first dimension of A.\n\nif side = rocblas_side_left,  lda >= max( 1, m ),\nif side = rocblas_side_right, lda >= max( 1, n ).\n\n@param[in,out]\nB       device pointer storing matrix B.\n\n@param[in]\nldb    [rocblas_int]\nldb specifies the first dimension of B. ldb >= max( 1, m ).\n"]
+    pub fn rocblas_strsm(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const f32,
+        A: *const f32,
+        lda: rocblas_int,
+        B: *mut f32,
+        ldb: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dtrsm(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const f64,
+        A: *const f64,
+        lda: rocblas_int,
+        B: *mut f64,
+        ldb: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ctrsm(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        B: *mut rocblas_float_complex,
+        ldb: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ztrsm(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        B: *mut rocblas_double_complex,
+        ldb: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 3 API </b>\n\n\\details\ntrsm_batched performs the following batched operation:\n\nop(A_i)*X_i = alpha*B_i or\nX_i*op(A_i) = alpha*B_i, for i = 1, ..., batch_count,\n\nwhere alpha is a scalar, X and B are batched m by n matrices,\n\nA is triangular batched matrix and op(A) is one of\n\nop( A ) = A   or\nop( A ) = A^T   or\nop( A ) = A^H.\n\nEach matrix X_i is overwritten on B_i for i = 1, ..., batch_count.\n\nNote about memory allocation:\nWhen trsm is launched with a k evenly divisible by the internal block size of 128,\nand is no larger than 10 of these blocks, the API takes advantage of utilizing pre-allocated\nmemory found in the handle to increase overall performance. This memory can be managed by using\nthe environment variable WORKBUF_TRSM_B_CHNK. When this variable is not set the device memory\nused for temporary storage will default to 1 MB and may result in chunking, which in turn may\nreduce performance. Under these circumstances it is recommended that WORKBUF_TRSM_B_CHNK be set\nto the desired chunk of right hand sides to be used at a time\n(where k is m when rocblas_side_left and is n when rocblas_side_right).\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nside    [rocblas_side]\n- rocblas_side_left:       op(A)*X = alpha*B\n- rocblas_side_right:      X*op(A) = alpha*B\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  each A_i is an upper triangular matrix.\n- rocblas_fill_lower:  each A_i is a  lower triangular matrix.\n@param[in]\ntransA  [rocblas_operation]\n- transB:    op(A) = A\n- rocblas_operation_transpose:      op(A) = A^T\n- rocblas_operation_conjugate_transpose:  op(A) = A^H\n@param[in]\ndiag    [rocblas_diagonal]\n- rocblas_diagonal_unit:     each A_i is assumed to be unit triangular.\n- rocblas_diagonal_non_unit:  each A_i is not assumed to be unit triangular.\n@param[in]\nm       [rocblas_int]\nm specifies the number of rows of each B_i. m >= 0.\n@param[in]\nn       [rocblas_int]\nn specifies the number of columns of each B_i. n >= 0.\n@param[in]\nalpha\ndevice pointer or host pointer specifying the scalar alpha. When alpha is\n&zero then A is not referenced and B need not be set before\nentry.\n@param[in]\nA       device array of device pointers storing each matrix A_i on the GPU.\nMatricies are of dimension ( lda, k ), where k is m\nwhen  rocblas_side_left  and is  n  when  rocblas_side_right\nonly the upper/lower triangular part is accessed.\n@param[in]\nlda     [rocblas_int]\nlda specifies the first dimension of each A_i.\n\nif side = rocblas_side_left,  lda >= max( 1, m ),\nif side = rocblas_side_right, lda >= max( 1, n ).\n@param[in,out]\nB       device array of device pointers storing each matrix B_i on the GPU.\n@param[in]\nldb    [rocblas_int]\nldb specifies the first dimension of each B_i. ldb >= max( 1, m ).\n@param[in]\nbatch_count [rocblas_int]\nnumber of trsm operatons in the batch."]
+    pub fn rocblas_strsm_batched(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const f32,
+        A: *const *const f32,
+        lda: rocblas_int,
+        B: *const *mut f32,
+        ldb: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dtrsm_batched(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const f64,
+        A: *const *const f64,
+        lda: rocblas_int,
+        B: *const *mut f64,
+        ldb: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ctrsm_batched(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const *const rocblas_float_complex,
+        lda: rocblas_int,
+        B: *const *mut rocblas_float_complex,
+        ldb: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ztrsm_batched(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const *const rocblas_double_complex,
+        lda: rocblas_int,
+        B: *const *mut rocblas_double_complex,
+        ldb: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 3 API </b>\n\n\\details\ntrsm_srided_batched performs the following strided batched operation:\n\nop(A_i)*X_i = alpha*B_i or\nX_i*op(A_i) = alpha*B_i, for i = 1, ..., batch_count,\n\nwhere alpha is a scalar, X and B are strided batched m by n matrices,\n\nA is triangular strided batched matrix and op(A) is one of\n\nop( A ) = A   or\nop( A ) = A^T   or\nop( A ) = A^H.\n\nEach matrix X_i is overwritten on B_i for i = 1, ..., batch_count.\n\nNote about memory allocation:\nWhen trsm is launched with a k evenly divisible by the internal block size of 128,\nand is no larger than 10 of these blocks, the API takes advantage of utilizing pre-allocated\nmemory found in the handle to increase overall performance. This memory can be managed by using\nthe environment variable WORKBUF_TRSM_B_CHNK. When this variable is not set the device memory\nused for temporary storage will default to 1 MB and may result in chunking, which in turn may\nreduce performance. Under these circumstances it is recommended that WORKBUF_TRSM_B_CHNK be set\nto the desired chunk of right hand sides to be used at a time\n(where k is m when rocblas_side_left and is n when rocblas_side_right).\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nside    [rocblas_side]\n- rocblas_side_left:       op(A)*X = alpha*B.\n- rocblas_side_right:      X*op(A) = alpha*B.\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  each A_i is an upper triangular matrix.\n- rocblas_fill_lower:  each A_i is a  lower triangular matrix.\n@param[in]\ntransA  [rocblas_operation]\n- transB:    op(A) = A.\n- rocblas_operation_transpose:      op(A) = A^T.\n- rocblas_operation_conjugate_transpose:  op(A) = A^H.\n@param[in]\ndiag    [rocblas_diagonal]\n- rocblas_diagonal_unit:     each A_i is assumed to be unit triangular.\n- rocblas_diagonal_non_unit:  each A_i is not assumed to be unit triangular.\n@param[in]\nm       [rocblas_int]\nm specifies the number of rows of each B_i. m >= 0.\n@param[in]\nn       [rocblas_int]\nn specifies the number of columns of each B_i. n >= 0.\n@param[in]\nalpha\ndevice pointer or host pointer specifying the scalar alpha. When alpha is\n&zero then A is not referenced and B need not be set before\nentry.\n@param[in]\nA       device pointer pointing to the first matrix A_1.\nof dimension ( lda, k ), where k is m\nwhen  rocblas_side_left  and\nis  n  when  rocblas_side_right\nonly the upper/lower triangular part is accessed.\n@param[in]\nlda     [rocblas_int]\nlda specifies the first dimension of each A_i.\n\nif side = rocblas_side_left,  lda >= max( 1, m ).\nif side = rocblas_side_right, lda >= max( 1, n ).\n@param[in]\nstride_a [rocblas_stride]\nstride from the start of one A_i matrix to the next A_(i + 1).\n@param[in,out]\nB       device pointer pointing to the first matrix B_1.\n@param[in]\nldb    [rocblas_int]\nldb specifies the first dimension of each B_i. ldb >= max( 1, m ).\n@param[in]\nstride_b [rocblas_stride]\nstride from the start of one B_i matrix to the next B_(i + 1).\n@param[in]\nbatch_count [rocblas_int]\nnumber of trsm operatons in the batch."]
+    pub fn rocblas_strsm_strided_batched(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const f32,
+        A: *const f32,
+        lda: rocblas_int,
+        stride_a: rocblas_stride,
+        B: *mut f32,
+        ldb: rocblas_int,
+        stride_b: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dtrsm_strided_batched(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const f64,
+        A: *const f64,
+        lda: rocblas_int,
+        stride_a: rocblas_stride,
+        B: *mut f64,
+        ldb: rocblas_int,
+        stride_b: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ctrsm_strided_batched(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        stride_a: rocblas_stride,
+        B: *mut rocblas_float_complex,
+        ldb: rocblas_int,
+        stride_b: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ztrsm_strided_batched(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        stride_a: rocblas_stride,
+        B: *mut rocblas_double_complex,
+        ldb: rocblas_int,
+        stride_b: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 3 API </b>\n\n\\details\ngemm performs one of the matrix-matrix operations:\n\nC = alpha*op( A )*op( B ) + beta*C,\n\nwhere op( X ) is one of\n\nop( X ) = X      or\nop( X ) = X**T   or\nop( X ) = X**H,\n\nalpha and beta are scalars, and A, B and C are matrices, with\nop( A ) an m by k matrix, op( B ) a k by n matrix and C an m by n matrix.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\ntransA    [rocblas_operation]\nspecifies the form of op( A ).\n@param[in]\ntransB    [rocblas_operation]\nspecifies the form of op( B ).\n@param[in]\nm         [rocblas_int]\nnumber or rows of matrices op( A ) and C.\n@param[in]\nn         [rocblas_int]\nnumber of columns of matrices op( B ) and C.\n@param[in]\nk         [rocblas_int]\nnumber of columns of matrix op( A ) and number of rows of matrix op( B ).\n@param[in]\nalpha     device pointer or host pointer specifying the scalar alpha.\n@param[in]\nA         device pointer storing matrix A.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of A.\n@param[in]\nB         device pointer storing matrix B.\n@param[in]\nldb       [rocblas_int]\nspecifies the leading dimension of B.\n@param[in]\nbeta      device pointer or host pointer specifying the scalar beta.\n@param[in, out]\nC         device pointer storing matrix C on the GPU.\n@param[in]\nldc       [rocblas_int]\nspecifies the leading dimension of C.\n"]
+    pub fn rocblas_sgemm(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const f32,
+        A: *const f32,
+        lda: rocblas_int,
+        B: *const f32,
+        ldb: rocblas_int,
+        beta: *const f32,
+        C: *mut f32,
+        ldc: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dgemm(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const f64,
+        A: *const f64,
+        lda: rocblas_int,
+        B: *const f64,
+        ldb: rocblas_int,
+        beta: *const f64,
+        C: *mut f64,
+        ldc: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_hgemm(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_half,
+        A: *const rocblas_half,
+        lda: rocblas_int,
+        B: *const rocblas_half,
+        ldb: rocblas_int,
+        beta: *const rocblas_half,
+        C: *mut rocblas_half,
+        ldc: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cgemm(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        B: *const rocblas_float_complex,
+        ldb: rocblas_int,
+        beta: *const rocblas_float_complex,
+        C: *mut rocblas_float_complex,
+        ldc: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zgemm(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        B: *const rocblas_double_complex,
+        ldb: rocblas_int,
+        beta: *const rocblas_double_complex,
+        C: *mut rocblas_double_complex,
+        ldc: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 3 API </b>\n\n\\details\ngemm_batched performs one of the batched matrix-matrix operations:\n\nC_i = alpha*op( A_i )*op( B_i ) + beta*C_i, for i = 1, ..., batch_count,\n\nwhere op( X ) is one of\n\nop( X ) = X      or\nop( X ) = X**T   or\nop( X ) = X**H,\n\nalpha and beta are scalars, and A, B and C are strided batched matrices, with\n\nop( A ) an m by k by batch_count strided_batched matrix,\nop( B ) an k by n by batch_count strided_batched matrix and\nC an m by n by batch_count strided_batched matrix.\n\n@param[in]\nhandle    [rocblas_handle\nhandle to the rocblas library context queue.\n@param[in]\ntransA    [rocblas_operation]\nspecifies the form of op( A ).\n@param[in]\ntransB    [rocblas_operation]\nspecifies the form of op( B ).\n@param[in]\nm         [rocblas_int]\nmatrix dimention m.\n@param[in]\nn         [rocblas_int]\nmatrix dimention n.\n@param[in]\nk         [rocblas_int]\nmatrix dimention k.\n@param[in]\nalpha     device pointer or host pointer specifying the scalar alpha.\n@param[in]\nA         device array of device pointers storing each matrix A_i.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of each A_i.\n@param[in]\nB         device array of device pointers storing each matrix B_i.\n@param[in]\nldb       [rocblas_int]\nspecifies the leading dimension of each B_i.\n@param[in]\nbeta      device pointer or host pointer specifying the scalar beta.\n@param[in, out]\nC         device array of device pointers storing each matrix C_i.\n@param[in]\nldc       [rocblas_int]\nspecifies the leading dimension of each C_i.\n@param[in]\nbatch_count\n[rocblas_int]\nnumber of gemm operations in the batch."]
+    pub fn rocblas_sgemm_batched(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const f32,
+        A: *const *const f32,
+        lda: rocblas_int,
+        B: *const *const f32,
+        ldb: rocblas_int,
+        beta: *const f32,
+        C: *const *mut f32,
+        ldc: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dgemm_batched(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const f64,
+        A: *const *const f64,
+        lda: rocblas_int,
+        B: *const *const f64,
+        ldb: rocblas_int,
+        beta: *const f64,
+        C: *const *mut f64,
+        ldc: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_hgemm_batched(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_half,
+        A: *const *const rocblas_half,
+        lda: rocblas_int,
+        B: *const *const rocblas_half,
+        ldb: rocblas_int,
+        beta: *const rocblas_half,
+        C: *const *mut rocblas_half,
+        ldc: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cgemm_batched(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const *const rocblas_float_complex,
+        lda: rocblas_int,
+        B: *const *const rocblas_float_complex,
+        ldb: rocblas_int,
+        beta: *const rocblas_float_complex,
+        C: *const *mut rocblas_float_complex,
+        ldc: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zgemm_batched(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const *const rocblas_double_complex,
+        lda: rocblas_int,
+        B: *const *const rocblas_double_complex,
+        ldb: rocblas_int,
+        beta: *const rocblas_double_complex,
+        C: *const *mut rocblas_double_complex,
+        ldc: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 3 API </b>\n\n\\details\ngemm_strided_batched performs one of the strided batched matrix-matrix operations:\n\nC_i = alpha*op( A_i )*op( B_i ) + beta*C_i, for i = 1, ..., batch_count,\n\nwhere op( X ) is one of\n\nop( X ) = X      or\nop( X ) = X**T   or\nop( X ) = X**H,\n\nalpha and beta are scalars, and A, B and C are strided batched matrices, with\nop( A ) an m by k by batch_count strided_batched matrix,\nop( B ) an k by n by batch_count strided_batched matrix and\nC an m by n by batch_count strided_batched matrix.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\ntransA    [rocblas_operation]\nspecifies the form of op( A ).\n@param[in]\ntransB    [rocblas_operation]\nspecifies the form of op( B ).\n@param[in]\nm         [rocblas_int]\nmatrix dimention m.\n@param[in]\nn         [rocblas_int]\nmatrix dimention n.\n@param[in]\nk         [rocblas_int]\nmatrix dimention k.\n@param[in]\nalpha     device pointer or host pointer specifying the scalar alpha.\n@param[in]\nA         device pointer pointing to the first matrix A_1.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of each A_i.\n@param[in]\nstride_a  [rocblas_stride]\nstride from the start of one A_i matrix to the next A_(i + 1).\n@param[in]\nB         device pointer pointing to the first matrix B_1.\n@param[in]\nldb       [rocblas_int]\nspecifies the leading dimension of each B_i.\n@param[in]\nstride_b  [rocblas_stride]\nstride from the start of one B_i matrix to the next B_(i + 1).\n@param[in]\nbeta      device pointer or host pointer specifying the scalar beta.\n@param[in, out]\nC         device pointer pointing to the first matrix C_1.\n@param[in]\nldc       [rocblas_int]\nspecifies the leading dimension of each C_i.\n@param[in]\nstride_c  [rocblas_stride]\nstride from the start of one C_i matrix to the next C_(i + 1).\n@param[in]\nbatch_count\n[rocblas_int]\nnumber of gemm operatons in the batch.\n"]
+    pub fn rocblas_sgemm_strided_batched(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const f32,
+        A: *const f32,
+        lda: rocblas_int,
+        stride_a: rocblas_stride,
+        B: *const f32,
+        ldb: rocblas_int,
+        stride_b: rocblas_stride,
+        beta: *const f32,
+        C: *mut f32,
+        ldc: rocblas_int,
+        stride_c: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dgemm_strided_batched(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const f64,
+        A: *const f64,
+        lda: rocblas_int,
+        stride_a: rocblas_stride,
+        B: *const f64,
+        ldb: rocblas_int,
+        stride_b: rocblas_stride,
+        beta: *const f64,
+        C: *mut f64,
+        ldc: rocblas_int,
+        stride_c: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_hgemm_strided_batched(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_half,
+        A: *const rocblas_half,
+        lda: rocblas_int,
+        stride_a: rocblas_stride,
+        B: *const rocblas_half,
+        ldb: rocblas_int,
+        stride_b: rocblas_stride,
+        beta: *const rocblas_half,
+        C: *mut rocblas_half,
+        ldc: rocblas_int,
+        stride_c: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_hgemm_kernel_name(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_half,
+        A: *const rocblas_half,
+        lda: rocblas_int,
+        stride_a: rocblas_stride,
+        B: *const rocblas_half,
+        ldb: rocblas_int,
+        stride_b: rocblas_stride,
+        beta: *const rocblas_half,
+        C: *mut rocblas_half,
+        ldc: rocblas_int,
+        stride_c: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_sgemm_kernel_name(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const f32,
+        A: *const f32,
+        lda: rocblas_int,
+        stride_a: rocblas_stride,
+        B: *const f32,
+        ldb: rocblas_int,
+        stride_b: rocblas_stride,
+        beta: *const f32,
+        C: *mut f32,
+        ldc: rocblas_int,
+        stride_c: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dgemm_kernel_name(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const f64,
+        A: *const f64,
+        lda: rocblas_int,
+        stride_a: rocblas_stride,
+        B: *const f64,
+        ldb: rocblas_int,
+        stride_b: rocblas_stride,
+        beta: *const f64,
+        C: *mut f64,
+        ldc: rocblas_int,
+        stride_c: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cgemm_strided_batched(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        stride_a: rocblas_stride,
+        B: *const rocblas_float_complex,
+        ldb: rocblas_int,
+        stride_b: rocblas_stride,
+        beta: *const rocblas_float_complex,
+        C: *mut rocblas_float_complex,
+        ldc: rocblas_int,
+        stride_c: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zgemm_strided_batched(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        stride_a: rocblas_stride,
+        B: *const rocblas_double_complex,
+        ldb: rocblas_int,
+        stride_b: rocblas_stride,
+        beta: *const rocblas_double_complex,
+        C: *mut rocblas_double_complex,
+        ldc: rocblas_int,
+        stride_c: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 3 API </b>\n\n\\details\ndgmm performs one of the matrix-matrix operations:\n\nC = A * diag(x) if side == rocblas_side_right\nC = diag(x) * A if side == rocblas_side_left\n\nwhere C and A are m by n dimensional matrices. diag( x ) is a diagonal matrix\nand x is vector of dimension n if side == rocblas_side_right and dimension m\nif side == rocblas_side_left.\n\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nside      [rocblas_side]\nspecifies the side of diag(x).\n@param[in]\nm         [rocblas_int]\nmatrix dimension m.\n@param[in]\nn         [rocblas_int]\nmatrix dimension n.\n@param[in]\nA         device pointer storing matrix A.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of A.\n@param[in]\nx         device pointer storing vector x.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment between values of x\n@param[in, out]\nC         device pointer storing matrix C.\n@param[in]\nldc       [rocblas_int]\nspecifies the leading dimension of C.\n"]
+    pub fn rocblas_sdgmm(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        m: rocblas_int,
+        n: rocblas_int,
+        A: *const f32,
+        lda: rocblas_int,
+        x: *const f32,
+        incx: rocblas_int,
+        C: *mut f32,
+        ldc: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ddgmm(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        m: rocblas_int,
+        n: rocblas_int,
+        A: *const f64,
+        lda: rocblas_int,
+        x: *const f64,
+        incx: rocblas_int,
+        C: *mut f64,
+        ldc: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cdgmm(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        m: rocblas_int,
+        n: rocblas_int,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        C: *mut rocblas_float_complex,
+        ldc: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zdgmm(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        m: rocblas_int,
+        n: rocblas_int,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        C: *mut rocblas_double_complex,
+        ldc: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 3 API </b>\n\n\\details\ndgmm_batched performs one of the batched matrix-matrix operations:\n\nC_i = A_i * diag(x_i) for i = 0, 1, ... batch_count-1 if side == rocblas_side_right\nC_i = diag(x_i) * A_i for i = 0, 1, ... batch_count-1 if side == rocblas_side_left,\n\nwhere C_i and A_i are m by n dimensional matrices. diag(x_i) is a diagonal matrix\nand x_i is vector of dimension n if side == rocblas_side_right and dimension m\nif side == rocblas_side_left.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nside      [rocblas_side]\nspecifies the side of diag(x).\n@param[in]\nm         [rocblas_int]\nmatrix dimension m.\n@param[in]\nn         [rocblas_int]\nmatrix dimension n.\n@param[in]\nA         device array of device pointers storing each matrix A_i on the GPU.\nEach A_i is of dimension ( lda, n ).\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of A_i.\n@param[in]\nx         device array of device pointers storing each vector x_i on the GPU.\nEach x_i is of dimension n if side == rocblas_side_right and dimension\nm if side == rocblas_side_left.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment between values of x_i.\n@param[in, out]\nC         device array of device pointers storing each matrix C_i on the GPU.\nEach C_i is of dimension ( ldc, n ).\n@param[in]\nldc       [rocblas_int]\nspecifies the leading dimension of C_i.\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n"]
+    pub fn rocblas_sdgmm_batched(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        m: rocblas_int,
+        n: rocblas_int,
+        A: *const *const f32,
+        lda: rocblas_int,
+        x: *const *const f32,
+        incx: rocblas_int,
+        C: *const *mut f32,
+        ldc: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ddgmm_batched(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        m: rocblas_int,
+        n: rocblas_int,
+        A: *const *const f64,
+        lda: rocblas_int,
+        x: *const *const f64,
+        incx: rocblas_int,
+        C: *const *mut f64,
+        ldc: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cdgmm_batched(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        m: rocblas_int,
+        n: rocblas_int,
+        A: *const *const rocblas_float_complex,
+        lda: rocblas_int,
+        x: *const *const rocblas_float_complex,
+        incx: rocblas_int,
+        C: *const *mut rocblas_float_complex,
+        ldc: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zdgmm_batched(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        m: rocblas_int,
+        n: rocblas_int,
+        A: *const *const rocblas_double_complex,
+        lda: rocblas_int,
+        x: *const *const rocblas_double_complex,
+        incx: rocblas_int,
+        C: *const *mut rocblas_double_complex,
+        ldc: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 3 API </b>\n\n\\details\ndgmm_strided_batched performs one of the batched matrix-matrix operations:\n\nC_i = A_i * diag(x_i)   if side == rocblas_side_right   for i = 0, 1, ... batch_count-1\nC_i = diag(x_i) * A_i   if side == rocblas_side_left    for i = 0, 1, ... batch_count-1,\n\nwhere C_i and A_i are m by n dimensional matrices. diag(x_i) is a diagonal matrix\nand x_i is vector of dimension n if side == rocblas_side_right and dimension m\nif side == rocblas_side_left.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nside      [rocblas_side]\nspecifies the side of diag(x).\n@param[in]\nm         [rocblas_int]\nmatrix dimension m.\n@param[in]\nn         [rocblas_int]\nmatrix dimension n.\n@param[in]\nA         device pointer to the first matrix A_0 on the GPU.\nEach A_i is of dimension ( lda, n ).\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of A.\n@param[in]\nstride_A  [rocblas_stride]\nstride from the start of one matrix (A_i) and the next one (A_i+1).\n@param[in]\nx         pointer to the first vector x_0 on the GPU.\nEach x_i is of dimension n if side == rocblas_side_right and dimension\nm if side == rocblas_side_left.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment between values of x.\n@param[in]\nstride_x  [rocblas_stride]\nstride from the start of one vector(x_i) and the next one (x_i+1).\n@param[in, out]\nC         device pointer to the first matrix C_0 on the GPU.\nEach C_i is of dimension ( ldc, n ).\n@param[in]\nldc       [rocblas_int]\nspecifies the leading dimension of C.\n@param[in]\nstride_C  [rocblas_stride]\nstride from the start of one matrix (C_i) and the next one (C_i+1).\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances i in the batch.\n"]
+    pub fn rocblas_sdgmm_strided_batched(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        m: rocblas_int,
+        n: rocblas_int,
+        A: *const f32,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        x: *const f32,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        C: *mut f32,
+        ldc: rocblas_int,
+        stride_C: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ddgmm_strided_batched(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        m: rocblas_int,
+        n: rocblas_int,
+        A: *const f64,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        x: *const f64,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        C: *mut f64,
+        ldc: rocblas_int,
+        stride_C: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cdgmm_strided_batched(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        m: rocblas_int,
+        n: rocblas_int,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        C: *mut rocblas_float_complex,
+        ldc: rocblas_int,
+        stride_C: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zdgmm_strided_batched(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        m: rocblas_int,
+        n: rocblas_int,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        C: *mut rocblas_double_complex,
+        ldc: rocblas_int,
+        stride_C: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 3 API </b>\n\n\\details\ngeam performs one of the matrix-matrix operations:\n\nC = alpha*op( A ) + beta*op( B ),\n\nwhere op( X ) is one of\n\nop( X ) = X      or\nop( X ) = X**T   or\nop( X ) = X**H,\n\nalpha and beta are scalars, and A, B and C are matrices, with\nop( A ) an m by n matrix, op( B ) an m by n matrix, and C an m by n matrix.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\ntransA    [rocblas_operation]\nspecifies the form of op( A ).\n@param[in]\ntransB    [rocblas_operation]\nspecifies the form of op( B ).\n@param[in]\nm         [rocblas_int]\nmatrix dimension m.\n@param[in]\nn         [rocblas_int]\nmatrix dimension n.\n@param[in]\nalpha     device pointer or host pointer specifying the scalar alpha.\n@param[in]\nA         device pointer storing matrix A.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of A.\n@param[in]\nbeta      device pointer or host pointer specifying the scalar beta.\n@param[in]\nB         device pointer storing matrix B.\n@param[in]\nldb       [rocblas_int]\nspecifies the leading dimension of B.\n@param[in, out]\nC         device pointer storing matrix C.\n@param[in]\nldc       [rocblas_int]\nspecifies the leading dimension of C.\n"]
+    pub fn rocblas_sgeam(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const f32,
+        A: *const f32,
+        lda: rocblas_int,
+        beta: *const f32,
+        B: *const f32,
+        ldb: rocblas_int,
+        C: *mut f32,
+        ldc: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dgeam(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const f64,
+        A: *const f64,
+        lda: rocblas_int,
+        beta: *const f64,
+        B: *const f64,
+        ldb: rocblas_int,
+        C: *mut f64,
+        ldc: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cgeam(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        beta: *const rocblas_float_complex,
+        B: *const rocblas_float_complex,
+        ldb: rocblas_int,
+        C: *mut rocblas_float_complex,
+        ldc: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zgeam(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        beta: *const rocblas_double_complex,
+        B: *const rocblas_double_complex,
+        ldb: rocblas_int,
+        C: *mut rocblas_double_complex,
+        ldc: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 3 API </b>\n\n\\details\ngeam_batched performs one of the batched matrix-matrix operations:\n\nC_i = alpha*op( A_i ) + beta*op( B_i )  for i = 0, 1, ... batch_count - 1,\n\nwhere alpha and beta are scalars, and op(A_i), op(B_i) and C_i are m by n matrices\nand op( X ) is one of\n\nop( X ) = X      or\nop( X ) = X**T\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\ntransA    [rocblas_operation]\nspecifies the form of op( A ).\n@param[in]\ntransB    [rocblas_operation]\nspecifies the form of op( B ).\n@param[in]\nm         [rocblas_int]\nmatrix dimension m.\n@param[in]\nn         [rocblas_int]\nmatrix dimension n.\n@param[in]\nalpha     device pointer or host pointer specifying the scalar alpha.\n@param[in]\nA         device array of device pointers storing each matrix A_i on the GPU.\nEach A_i is of dimension ( lda, k ), where k is m\nwhen  transA == rocblas_operation_none and\nis  n  when  transA == rocblas_operation_transpose.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of A.\n@param[in]\nbeta      device pointer or host pointer specifying the scalar beta.\n@param[in]\nB         device array of device pointers storing each matrix B_i on the GPU.\nEach B_i is of dimension ( ldb, k ), where k is m\nwhen  transB == rocblas_operation_none and\nis  n  when  transB == rocblas_operation_transpose.\n@param[in]\nldb       [rocblas_int]\nspecifies the leading dimension of B.\n@param[in, out]\nC         device array of device pointers storing each matrix C_i on the GPU.\nEach C_i is of dimension ( ldc, n ).\n@param[in]\nldc       [rocblas_int]\nspecifies the leading dimension of C.\n\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances i in the batch.\n"]
+    pub fn rocblas_sgeam_batched(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const f32,
+        A: *const *const f32,
+        lda: rocblas_int,
+        beta: *const f32,
+        B: *const *const f32,
+        ldb: rocblas_int,
+        C: *const *mut f32,
+        ldc: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dgeam_batched(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const f64,
+        A: *const *const f64,
+        lda: rocblas_int,
+        beta: *const f64,
+        B: *const *const f64,
+        ldb: rocblas_int,
+        C: *const *mut f64,
+        ldc: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cgeam_batched(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const *const rocblas_float_complex,
+        lda: rocblas_int,
+        beta: *const rocblas_float_complex,
+        B: *const *const rocblas_float_complex,
+        ldb: rocblas_int,
+        C: *const *mut rocblas_float_complex,
+        ldc: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zgeam_batched(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const *const rocblas_double_complex,
+        lda: rocblas_int,
+        beta: *const rocblas_double_complex,
+        B: *const *const rocblas_double_complex,
+        ldb: rocblas_int,
+        C: *const *mut rocblas_double_complex,
+        ldc: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 3 API </b>\n\n\\details\ngeam_strided_batched performs one of the batched matrix-matrix operations:\n\nC_i = alpha*op( A_i ) + beta*op( B_i )  for i = 0, 1, ... batch_count - 1,\n\nwhere alpha and beta are scalars, and op(A_i), op(B_i) and C_i are m by n matrices\nand op( X ) is one of\n\nop( X ) = X      or\nop( X ) = X**T\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\ntransA    [rocblas_operation]\nspecifies the form of op( A ).\n\n@param[in]\ntransB    [rocblas_operation]\nspecifies the form of op( B ).\n\n@param[in]\nm         [rocblas_int]\nmatrix dimension m.\n\n@param[in]\nn         [rocblas_int]\nmatrix dimension n.\n\n@param[in]\nalpha     device pointer or host pointer specifying the scalar alpha.\n\n@param[in]\nA         device pointer to the first matrix A_0 on the GPU.\nEach A_i is of dimension ( lda, k ), where k is m\nwhen  transA == rocblas_operation_none and\nis  n  when  transA == rocblas_operation_transpose.\n\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of A.\n\n@param[in]\nstride_A  [rocblas_stride]\nstride from the start of one matrix (A_i) and the next one (A_i+1).\n\n@param[in]\nbeta      device pointer or host pointer specifying the scalar beta.\n\n@param[in]\nB         pointer to the first matrix B_0 on the GPU.\nEach B_i is of dimension ( ldb, k ), where k is m\nwhen  transB == rocblas_operation_none and\nis  n  when  transB == rocblas_operation_transpose.\n\n@param[in]\nldb       [rocblas_int]\nspecifies the leading dimension of B.\n\n@param[in]\nstride_B  [rocblas_stride]\nstride from the start of one matrix (B_i) and the next one (B_i+1)\n\n@param[in, out]\nC         pointer to the first matrix C_0 on the GPU.\nEach C_i is of dimension ( ldc, n ).\n\n@param[in]\nldc       [rocblas_int]\nspecifies the leading dimension of C.\n\n@param[in]\nstride_C  [rocblas_stride]\nstride from the start of one matrix (C_i) and the next one (C_i+1).\n\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances i in the batch.\n"]
+    pub fn rocblas_sgeam_strided_batched(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const f32,
+        A: *const f32,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        beta: *const f32,
+        B: *const f32,
+        ldb: rocblas_int,
+        stride_B: rocblas_stride,
+        C: *mut f32,
+        ldc: rocblas_int,
+        stride_C: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dgeam_strided_batched(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const f64,
+        A: *const f64,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        beta: *const f64,
+        B: *const f64,
+        ldb: rocblas_int,
+        stride_B: rocblas_stride,
+        C: *mut f64,
+        ldc: rocblas_int,
+        stride_C: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cgeam_strided_batched(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        beta: *const rocblas_float_complex,
+        B: *const rocblas_float_complex,
+        ldb: rocblas_int,
+        stride_B: rocblas_stride,
+        C: *mut rocblas_float_complex,
+        ldc: rocblas_int,
+        stride_C: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zgeam_strided_batched(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        beta: *const rocblas_double_complex,
+        B: *const rocblas_double_complex,
+        ldb: rocblas_int,
+        stride_B: rocblas_stride,
+        C: *mut rocblas_double_complex,
+        ldc: rocblas_int,
+        stride_C: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS EX API </b>\n\n\\details\ngemm_ex performs one of the matrix-matrix operations:\n\nD = alpha*op( A )*op( B ) + beta*C,\n\nwhere op( X ) is one of\n\nop( X ) = X      or\nop( X ) = X**T   or\nop( X ) = X**H,\n\nalpha and beta are scalars, and A, B, C, and D are matrices, with\nop( A ) an m by k matrix, op( B ) a k by n matrix and C and D are m by n matrices.\nC and D may point to the same matrix if their parameters are identical.\n\nSupported types are as follows:\n- rocblas_datatype_f64_r = a_type = b_type = c_type = d_type = compute_type\n- rocblas_datatype_f32_r = a_type = b_type = c_type = d_type = compute_type\n- rocblas_datatype_f16_r = a_type = b_type = c_type = d_type = compute_type\n- rocblas_datatype_f16_r = a_type = b_type = c_type = d_type; rocblas_datatype_f32_r =\ncompute_type\n- rocblas_datatype_f16_r = a_type = b_type; rocblas_datatype_f32_r = c_type = d_type =\ncompute_type\n- rocblas_datatype_bf16_r = a_type = b_type = c_type = d_type; rocblas_datatype_f32_r =\ncompute_type\n- rocblas_datatype_bf16_r = a_type = b_type; rocblas_datatype_f32_r = c_type = d_type =\ncompute_type\n- rocblas_datatype_i8_r = a_type = b_type; rocblas_datatype_i32_r = c_type = d_type =\ncompute_type\n- rocblas_datatype_f32_c  = a_type = b_type = c_type = d_type = compute_type\n- rocblas_datatype_f64_c  = a_type = b_type = c_type = d_type = compute_type\n\nTwo int8 datatypes are supported: int8_t and rocblas_int8x4. int8_t is the C99 signed\n8 bit integer. The default is int8_t and it is recommended int8_t be used. rocblas_int8x4\nis a packed datatype. The packed int 8 datatype occurs if the user sets:\n\n@code\nflags |= rocblas_gemm_flags_pack_int8x4;\n@endcode\n\nFor this packed int8 datatype matrices A and B are packed into int8x4 in the k dimension.\nThis will impose the following size restrictions on A or B:\n\n- k must be a multiple of 4\n- if transA == rocblas_operation_transpose then lda must be a multiple of 4\n- if transB == rocblas_operation_none then ldb must be a multiple of 4\n- if transA == rocblas_operation_none the matrix A must have each 4 consecutive\nvalues in the k dimension packed\n- if transB == rocblas_operation_transpose the matrix B must have each 4\nconsecutive values in the k dimension packed.\n\nThis packing can be achieved with the following pseudo-code. The code assumes the\noriginal matrices are in A and B, and the packed matrices are A_packed and B_packed.\nThe size of the A_packed and B_packed are the same as the size of the A and B respectively.\n\n@code\nif(transA == rocblas_operation_none)\n{\nint nb = 4;\nfor(int i_m = 0; i_m < m; i_m++)\n{\nfor(int i_k = 0; i_k < k; i_k++)\n{\nA_packed[i_k % nb + (i_m + (i_k / nb) * lda) * nb] = A[i_m + i_k * lda];\n}\n}\n}\nelse\n{\nA_packed = A;\n}\nif(transB == rocblas_operation_transpose)\n{\nint nb = 4;\nfor(int i_n = 0; i_n < m; i_n++)\n{\nfor(int i_k = 0; i_k < k; i_k++)\n{\nB_packed[i_k % nb + (i_n + (i_k / nb) * ldb) * nb] = B[i_n + i_k * ldb];\n}\n}\n}\nelse\n{\nB_packed = B;\n}\n@endcode\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\ntransA    [rocblas_operation]\nspecifies the form of op( A ).\n@param[in]\ntransB    [rocblas_operation]\nspecifies the form of op( B ).\n@param[in]\nm         [rocblas_int]\nmatrix dimension m.\n@param[in]\nn         [rocblas_int]\nmatrix dimension n.\n@param[in]\nk         [rocblas_int]\nmatrix dimension k.\n@param[in]\nalpha     [const void *]\ndevice pointer or host pointer specifying the scalar alpha. Same datatype as compute_type.\n@param[in]\na         [void *]\ndevice pointer storing matrix A.\n@param[in]\na_type    [rocblas_datatype]\nspecifies the datatype of matrix A.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of A.\n@param[in]\nb         [void *]\ndevice pointer storing matrix B.\n@param[in]\nb_type    [rocblas_datatype]\nspecifies the datatype of matrix B.\n@param[in]\nldb       [rocblas_int]\nspecifies the leading dimension of B.\n@param[in]\nbeta      [const void *]\ndevice pointer or host pointer specifying the scalar beta. Same datatype as compute_type.\n@param[in]\nc         [void *]\ndevice pointer storing matrix C.\n@param[in]\nc_type    [rocblas_datatype]\nspecifies the datatype of matrix C.\n@param[in]\nldc       [rocblas_int]\nspecifies the leading dimension of C.\n@param[out]\nd         [void *]\ndevice pointer storing matrix D.\nIf d and c pointers are to the same matrix then d_type must equal c_type and ldd must equal ldc\nor the respective invalid status will be returned.\n@param[in]\nd_type    [rocblas_datatype]\nspecifies the datatype of matrix D.\n@param[in]\nldd       [rocblas_int]\nspecifies the leading dimension of D.\n@param[in]\ncompute_type\n[rocblas_datatype]\nspecifies the datatype of computation.\n@param[in]\nalgo      [rocblas_gemm_algo]\nenumerant specifying the algorithm type.\n@param[in]\nsolution_index\n[int32_t]\nreserved for future use.\n@param[in]\nflags     [uint32_t]\noptional gemm flags.\n"]
+    pub fn rocblas_gemm_ex(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const ::std::os::raw::c_void,
+        a: *const ::std::os::raw::c_void,
+        a_type: rocblas_datatype,
+        lda: rocblas_int,
+        b: *const ::std::os::raw::c_void,
+        b_type: rocblas_datatype,
+        ldb: rocblas_int,
+        beta: *const ::std::os::raw::c_void,
+        c: *const ::std::os::raw::c_void,
+        c_type: rocblas_datatype,
+        ldc: rocblas_int,
+        d: *mut ::std::os::raw::c_void,
+        d_type: rocblas_datatype,
+        ldd: rocblas_int,
+        compute_type: rocblas_datatype,
+        algo: rocblas_gemm_algo,
+        solution_index: i32,
+        flags: u32,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS EX API </b>\n\n\\details\ngemm_batched_ex performs one of the batched matrix-matrix operations:\nD_i = alpha*op(A_i)*op(B_i) + beta*C_i, for i = 1, ..., batch_count.\nwhere op( X ) is one of\nop( X ) = X      or\nop( X ) = X**T   or\nop( X ) = X**H,\nalpha and beta are scalars, and A, B, C, and D are batched pointers to matrices, with\nop( A ) an m by k by batch_count batched matrix,\nop( B ) a k by n by batch_count batched matrix and\nC and D are m by n by batch_count batched matrices.\nThe batched matrices are an array of pointers to matrices.\nThe number of pointers to matrices is batch_count.\nC and D may point to the same matrices if their parameters are identical.\n\nSupported types are as follows:\n- rocblas_datatype_f64_r = a_type = b_type = c_type = d_type = compute_type\n- rocblas_datatype_f32_r = a_type = b_type = c_type = d_type = compute_type\n- rocblas_datatype_f16_r = a_type = b_type = c_type = d_type = compute_type\n- rocblas_datatype_f16_r = a_type = b_type = c_type = d_type; rocblas_datatype_f32_r =\ncompute_type\n- rocblas_datatype_bf16_r = a_type = b_type = c_type = d_type; rocblas_datatype_f32_r =\ncompute_type\n- rocblas_datatype_i8_r = a_type = b_type; rocblas_datatype_i32_r = c_type = d_type =\ncompute_type\n- rocblas_datatype_f32_c  = a_type = b_type = c_type = d_type = compute_type\n- rocblas_datatype_f64_c  = a_type = b_type = c_type = d_type = compute_type\n\nTwo int8 datatypes are supported: int8_t and rocblas_int8x4. int8_t is the C99 signed\n8 bit integer. The default is int8_t and it is recommended int8_t be used. rocblas_int8x4\nis a packed datatype. The packed int 8 datatype occurs if the user sets:\n\n@code\nflags |= rocblas_gemm_flags_pack_int8x4;\n@endcode\n\nFor this packed int8 datatype matrices A and B are packed into int8x4 in the k dimension.\nThis will impose the following size restrictions on A or B:\n\n- k must be a multiple of 4\n- if transA == rocblas_operation_transpose then lda must be a multiple of 4\n- if transB == rocblas_operation_none then ldb must be a multiple of 4\n- if transA == rocblas_operation_none the matrix A must have each 4 consecutive\nvalues in the k dimension packed\n- if transB == rocblas_operation_transpose the matrix B must have each 4\nconsecutive values in the k dimension packed.\n\nThis packing can be achieved with the following pseudo-code. The code assumes the\noriginal matrices are in A and B, and the packed matrices are A_packed and B_packed.\nThe size of the A_packed and B_packed are the same as the size of the A and B respectively.\n\n@code\nif(transA == rocblas_operation_none)\n{\nint nb = 4;\nfor(int i_m = 0; i_m < m; i_m++)\n{\nfor(int i_k = 0; i_k < k; i_k++)\n{\nA_packed[i_k % nb + (i_m + (i_k / nb) * lda) * nb] = A[i_m + i_k * lda];\n}\n}\n}\nelse\n{\nA_packed = A;\n}\nif(transB == rocblas_operation_transpose)\n{\nint nb = 4;\nfor(int i_n = 0; i_n < m; i_n++)\n{\nfor(int i_k = 0; i_k < k; i_k++)\n{\nB_packed[i_k % nb + (i_n + (i_k / nb) * ldb) * nb] = B[i_n + i_k * ldb];\n}\n}\n}\nelse\n{\nB_packed = B;\n}\n@endcode\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\ntransA    [rocblas_operation]\nspecifies the form of op( A ).\n@param[in]\ntransB    [rocblas_operation]\nspecifies the form of op( B ).\n@param[in]\nm         [rocblas_int]\nmatrix dimension m.\n@param[in]\nn         [rocblas_int]\nmatrix dimension n.\n@param[in]\nk         [rocblas_int]\nmatrix dimension k.\n@param[in]\nalpha     [const void *]\ndevice pointer or host pointer specifying the scalar alpha. Same datatype as compute_type.\n@param[in]\na         [void *]\ndevice pointer storing array of pointers to each matrix A_i.\n@param[in]\na_type    [rocblas_datatype]\nspecifies the datatype of each matrix A_i.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of each A_i.\n@param[in]\nb         [void *]\ndevice pointer storing array of pointers to each matrix B_i.\n@param[in]\nb_type    [rocblas_datatype]\nspecifies the datatype of each matrix B_i.\n@param[in]\nldb       [rocblas_int]\nspecifies the leading dimension of each B_i.\n@param[in]\nbeta      [const void *]\ndevice pointer or host pointer specifying the scalar beta. Same datatype as compute_type.\n@param[in]\nc         [void *]\ndevice array of device pointers to each matrix C_i.\n@param[in]\nc_type    [rocblas_datatype]\nspecifies the datatype of each matrix C_i.\n@param[in]\nldc       [rocblas_int]\nspecifies the leading dimension of each C_i.\n@param[out]\nd         [void *]\ndevice array of device pointers to each matrix D_i.\nIf d and c are the same array of matrix pointers then d_type must equal c_type and ldd must equal ldc\nor the respective invalid status will be returned.\n@param[in]\nd_type    [rocblas_datatype]\nspecifies the datatype of each matrix D_i.\n@param[in]\nldd       [rocblas_int]\nspecifies the leading dimension of each D_i.\n@param[in]\nbatch_count\n[rocblas_int]\nnumber of gemm operations in the batch.\n@param[in]\ncompute_type\n[rocblas_datatype]\nspecifies the datatype of computation.\n@param[in]\nalgo      [rocblas_gemm_algo]\nenumerant specifying the algorithm type.\n@param[in]\nsolution_index\n[int32_t]\nreserved for future use.\n@param[in]\nflags     [uint32_t]\noptional gemm flags.\n"]
+    pub fn rocblas_gemm_batched_ex(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const ::std::os::raw::c_void,
+        a: *const ::std::os::raw::c_void,
+        a_type: rocblas_datatype,
+        lda: rocblas_int,
+        b: *const ::std::os::raw::c_void,
+        b_type: rocblas_datatype,
+        ldb: rocblas_int,
+        beta: *const ::std::os::raw::c_void,
+        c: *const ::std::os::raw::c_void,
+        c_type: rocblas_datatype,
+        ldc: rocblas_int,
+        d: *mut ::std::os::raw::c_void,
+        d_type: rocblas_datatype,
+        ldd: rocblas_int,
+        batch_count: rocblas_int,
+        compute_type: rocblas_datatype,
+        algo: rocblas_gemm_algo,
+        solution_index: i32,
+        flags: u32,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS EX API </b>\n\n\\details\ngemm_strided_batched_ex performs one of the strided_batched matrix-matrix operations:\n\nD_i = alpha*op(A_i)*op(B_i) + beta*C_i, for i = 1, ..., batch_count\n\nwhere op( X ) is one of\n\nop( X ) = X      or\nop( X ) = X**T   or\nop( X ) = X**H,\n\nalpha and beta are scalars, and A, B, C, and D are strided_batched matrices, with\nop( A ) an m by k by batch_count strided_batched matrix,\nop( B ) a k by n by batch_count strided_batched matrix and\nC and D are m by n by batch_count strided_batched matrices.\nC and D may point to the same matrices if their parameters are identical.\n\nThe strided_batched matrices are multiple matrices separated by a constant stride.\nThe number of matrices is batch_count.\n\nSupported types are as follows:\n- rocblas_datatype_f64_r = a_type = b_type = c_type = d_type = compute_type\n- rocblas_datatype_f32_r = a_type = b_type = c_type = d_type = compute_type\n- rocblas_datatype_f16_r = a_type = b_type = c_type = d_type = compute_type\n- rocblas_datatype_f16_r = a_type = b_type = c_type = d_type; rocblas_datatype_f32_r =\ncompute_type\n- rocblas_datatype_bf16_r = a_type = b_type = c_type = d_type; rocblas_datatype_f32_r =\ncompute_type\n- rocblas_datatype_i8_r = a_type = b_type; rocblas_datatype_i32_r = c_type = d_type =\ncompute_type\n- rocblas_datatype_f32_c  = a_type = b_type = c_type = d_type = compute_type\n- rocblas_datatype_f64_c  = a_type = b_type = c_type = d_type = compute_type\n\nTwo int8 datatypes are supported: int8_t and rocblas_int8x4. int8_t is the C99 signed\n8 bit integer. The default is int8_t and it is recommended int8_t be used. rocblas_int8x4\nis a packed datatype. The packed int 8 datatype occurs if the user sets:\n\n@code\nflags |= rocblas_gemm_flags_pack_int8x4;\n@endcode\n\nFor this packed int8 datatype matrices A and B are packed into int8x4 in the k dimension.\nThis will impose the following size restrictions on A or B:\n\n- k must be a multiple of 4\n- if transA == rocblas_operation_transpose then lda must be a multiple of 4\n- if transB == rocblas_operation_none then ldb must be a multiple of 4\n- if transA == rocblas_operation_none the matrix A must have each 4 consecutive\nvalues in the k dimension packed\n- if transB == rocblas_operation_transpose the matrix B must have each 4\nconsecutive values in the k dimension packed.\n\nThis packing can be achieved with the following pseudo-code. The code assumes the\noriginal matrices are in A and B, and the packed matrices are A_packed and B_packed.\nThe size of the A_packed and B_packed are the same as the size of the A and B respectively.\n\n@code\nif(transA == rocblas_operation_none)\n{\nint nb = 4;\nfor(int i_m = 0; i_m < m; i_m++)\n{\nfor(int i_k = 0; i_k < k; i_k++)\n{\nA_packed[i_k % nb + (i_m + (i_k / nb) * lda) * nb] = A[i_m + i_k * lda];\n}\n}\n}\nelse\n{\nA_packed = A;\n}\nif(transB == rocblas_operation_transpose)\n{\nint nb = 4;\nfor(int i_n = 0; i_n < m; i_n++)\n{\nfor(int i_k = 0; i_k < k; i_k++)\n{\nB_packed[i_k % nb + (i_n + (i_k / nb) * ldb) * nb] = B[i_n + i_k * ldb];\n}\n}\n}\nelse\n{\nB_packed = B;\n}\n@endcode\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\ntransA    [rocblas_operation]\nspecifies the form of op( A ).\n@param[in]\ntransB    [rocblas_operation]\nspecifies the form of op( B ).\n@param[in]\nm         [rocblas_int]\nmatrix dimension m.\n@param[in]\nn         [rocblas_int]\nmatrix dimension n.\n@param[in]\nk         [rocblas_int]\nmatrix dimension k.\n@param[in]\nalpha     [const void *]\ndevice pointer or host pointer specifying the scalar alpha. Same datatype as compute_type.\n@param[in]\na         [void *]\ndevice pointer pointing to first matrix A_1.\n@param[in]\na_type    [rocblas_datatype]\nspecifies the datatype of each matrix A_i.\n@param[in]\nlda       [rocblas_int]\nspecifies the leading dimension of each A_i.\n@param[in]\nstride_a  [rocblas_stride]\nspecifies stride from start of one A_i matrix to the next A_(i + 1).\n@param[in]\nb         [void *]\ndevice pointer pointing to first matrix B_1.\n@param[in]\nb_type    [rocblas_datatype]\nspecifies the datatype of each matrix B_i.\n@param[in]\nldb       [rocblas_int]\nspecifies the leading dimension of each B_i.\n@param[in]\nstride_b  [rocblas_stride]\nspecifies stride from start of one B_i matrix to the next B_(i + 1).\n@param[in]\nbeta      [const void *]\ndevice pointer or host pointer specifying the scalar beta. Same datatype as compute_type.\n@param[in]\nc         [void *]\ndevice pointer pointing to first matrix C_1.\n@param[in]\nc_type    [rocblas_datatype]\nspecifies the datatype of each matrix C_i.\n@param[in]\nldc       [rocblas_int]\nspecifies the leading dimension of each C_i.\n@param[in]\nstride_c  [rocblas_stride]\nspecifies stride from start of one C_i matrix to the next C_(i + 1).\n@param[out]\nd         [void *]\ndevice pointer storing each matrix D_i.\nIf d and c pointers are to the same matrix then d_type must equal c_type and ldd must equal ldc\nand stride_d must equal stride_c or the respective invalid status will be returned.\n@param[in]\nd_type    [rocblas_datatype]\nspecifies the datatype of each matrix D_i.\n@param[in]\nldd       [rocblas_int]\nspecifies the leading dimension of each D_i.\n@param[in]\nstride_d  [rocblas_stride]\nspecifies stride from start of one D_i matrix to the next D_(i + 1).\n@param[in]\nbatch_count\n[rocblas_int]\nnumber of gemm operations in the batch.\n@param[in]\ncompute_type\n[rocblas_datatype]\nspecifies the datatype of computation.\n@param[in]\nalgo      [rocblas_gemm_algo]\nenumerant specifying the algorithm type.\n@param[in]\nsolution_index\n[int32_t]\nreserved for future use.\n@param[in]\nflags     [uint32_t]\noptional gemm flags.\n"]
+    pub fn rocblas_gemm_strided_batched_ex(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const ::std::os::raw::c_void,
+        a: *const ::std::os::raw::c_void,
+        a_type: rocblas_datatype,
+        lda: rocblas_int,
+        stride_a: rocblas_stride,
+        b: *const ::std::os::raw::c_void,
+        b_type: rocblas_datatype,
+        ldb: rocblas_int,
+        stride_b: rocblas_stride,
+        beta: *const ::std::os::raw::c_void,
+        c: *const ::std::os::raw::c_void,
+        c_type: rocblas_datatype,
+        ldc: rocblas_int,
+        stride_c: rocblas_stride,
+        d: *mut ::std::os::raw::c_void,
+        d_type: rocblas_datatype,
+        ldd: rocblas_int,
+        stride_d: rocblas_stride,
+        batch_count: rocblas_int,
+        compute_type: rocblas_datatype,
+        algo: rocblas_gemm_algo,
+        solution_index: i32,
+        flags: u32,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS EX API </b>\n\n\\details\ngemm_ext2 performs the matrix-matrix operations:\n\nD = alpha * A * B  + beta * C,\n\nalpha and beta are scalars, and A, B, C, and D are matrices, with A a m by k\nmatrtix, B a k by n matrix, and C and D are m by n matrices. Each matrix A, B, C, D\nhas independent row and column strides.\n\nThis is a beta feature.\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nm         [rocblas_int]\nmatrix dimension m.\n@param[in]\nn         [rocblas_int]\nmatrix dimension n.\n@param[in]\nk         [rocblas_int]\nmatrix dimension k.\n@param[in]\nalpha     [const void *]\ndevice pointer or host pointer specifying the scalar alpha. Same datatype as compute_type.\n@param[in]\na         [void *]\ndevice pointer storing matrix A.\n@param[in]\na_type    [rocblas_datatype]\nspecifies the datatype of matrix A.\n@param[in]\nrow_stride_a [rocblas_int]\nspecifies the row stride of A.\n@param[in]\ncol_stride_a [rocblas_int]\nspecifies the column stride of A.\n@param[in]\nb         [void *]\ndevice pointer storing matrix B.\n@param[in]\nb_type    [rocblas_datatype]\nspecifies the datatype of matrix B.\n@param[in]\nrow_stride_b    [rocblas_int]\nspecifies the row stride of B.\n@param[in]\ncol_stride_b    [rocblas_int]\nspecifies the column stride of B.\n@param[in]\nbeta      [const void *]\ndevice pointer or host pointer specifying the scalar beta. Same datatype as compute_type.\n@param[in]\nc         [void *]\ndevice pointer storing matrix C.\n@param[in]\nc_type    [rocblas_datatype]\nspecifies the datatype of matrix C.\n@param[in]\nrow_stride_c [rocblas_int]\nspecifies the row stride of C.\n@param[in]\ncol_stride_c [rocblas_int]\nspecifies the column stride of C.\n@param[out]\nd         [void *]\ndevice pointer storing matrix D.\n@param[in]\nd_type    [rocblas_datatype]\nspecifies the datatype of matrix D.\n@param[in]\nrow_stride_d [rocblas_int]\nspecifies the row stride of D.\n@param[in]\ncol_stride_d [rocblas_int]\nspecifies the column stride of D.\n@param[in]\ncompute_type\n[rocblas_datatype]\nspecifies the datatype of computation.\n@param[in]\nalgo      [rocblas_gemm_algo]\nenumerant specifying the algorithm type.\n@param[in]\nsolution_index\n[int32_t]\nreserved for future use.\n@param[in]\nflags     [uint32_t]\noptional gemm flags.\n"]
+    pub fn rocblas_gemm_ext2(
+        handle: rocblas_handle,
+        m: rocblas_int,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const ::std::os::raw::c_void,
+        a: *const ::std::os::raw::c_void,
+        a_type: rocblas_datatype,
+        row_stride_a: rocblas_stride,
+        col_stride_a: rocblas_stride,
+        b: *const ::std::os::raw::c_void,
+        b_type: rocblas_datatype,
+        row_stride_b: rocblas_stride,
+        col_stride_b: rocblas_stride,
+        beta: *const ::std::os::raw::c_void,
+        c: *const ::std::os::raw::c_void,
+        c_type: rocblas_datatype,
+        row_stride_c: rocblas_stride,
+        col_stride_c: rocblas_stride,
+        d: *mut ::std::os::raw::c_void,
+        d_type: rocblas_datatype,
+        row_stride_d: rocblas_stride,
+        col_stride_d: rocblas_stride,
+        compute_type: rocblas_datatype,
+        algo: rocblas_gemm_algo,
+        solution_index: i32,
+        flags: u32,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS EX API </b>\n\n\\details\ntrsm_ex solves:\n\nop(A)*X = alpha*B or X*op(A) = alpha*B,\n\nwhere alpha is a scalar, X and B are m by n matrices,\nA is triangular matrix and op(A) is one of\n\nop( A ) = A   or   op( A ) = A^T   or   op( A ) = A^H.\n\nThe matrix X is overwritten on B.\n\nThis function gives the user the ability to reuse the invA matrix between runs.\nIf invA == NULL, rocblas_trsm_ex will automatically calculate invA on every run.\n\nSetting up invA:\nThe accepted invA matrix consists of the packed 128x128 inverses of the diagonal blocks of\nmatrix A, followed by any smaller diagonal block that remains.\nTo set up invA it is recommended that rocblas_trtri_batched be used with matrix A as the input.\n\nDevice memory of size 128 x k should be allocated for invA ahead of time, where k is m when\nrocblas_side_left and is n when rocblas_side_right. The actual number of elements in invA\nshould be passed as invA_size.\n\nTo begin, rocblas_trtri_batched must be called on the full 128x128-sized diagonal blocks of\nmatrix A. Below are the restricted parameters:\n- n = 128\n- ldinvA = 128\n- stride_invA = 128x128\n- batch_count = k / 128,\n\nThen any remaining block may be added:\n- n = k % 128\n- invA = invA + stride_invA * previous_batch_count\n- ldinvA = 128\n- batch_count = 1\n\n@param[in]\nhandle  [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nside    [rocblas_side]\n- rocblas_side_left:       op(A)*X = alpha*B\n- rocblas_side_right:      X*op(A) = alpha*B\n\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  A is an upper triangular matrix.\n- rocblas_fill_lower:  A is a lower triangular matrix.\n\n@param[in]\ntransA  [rocblas_operation]\n- transB:    op(A) = A.\n- rocblas_operation_transpose:      op(A) = A^T\n- rocblas_operation_conjugate_transpose:  op(A) = A^H\n\n@param[in]\ndiag    [rocblas_diagonal]\n- rocblas_diagonal_unit:     A is assumed to be unit triangular.\n- rocblas_diagonal_non_unit:  A is not assumed to be unit triangular.\n\n@param[in]\nm       [rocblas_int]\nm specifies the number of rows of B. m >= 0.\n\n@param[in]\nn       [rocblas_int]\nn specifies the number of columns of B. n >= 0.\n\n@param[in]\nalpha   [void *]\ndevice pointer or host pointer specifying the scalar alpha. When alpha is\n&zero then A is not referenced, and B need not be set before\nentry.\n\n@param[in]\nA       [void *]\ndevice pointer storing matrix A.\nof dimension ( lda, k ), where k is m\nwhen rocblas_side_left and\nis n when rocblas_side_right\nonly the upper/lower triangular part is accessed.\n\n@param[in]\nlda     [rocblas_int]\nlda specifies the first dimension of A.\n\nif side = rocblas_side_left,  lda >= max( 1, m ),\nif side = rocblas_side_right, lda >= max( 1, n ).\n\n@param[in, out]\nB       [void *]\ndevice pointer storing matrix B.\nB is of dimension ( ldb, n ).\nBefore entry, the leading m by n part of the array B must\ncontain the right-hand side matrix B, and on exit is\noverwritten by the solution matrix X.\n\n@param[in]\nldb    [rocblas_int]\nldb specifies the first dimension of B. ldb >= max( 1, m ).\n\n@param[in]\ninvA    [void *]\ndevice pointer storing the inverse diagonal blocks of A.\ninvA is of dimension ( ld_invA, k ), where k is m\nwhen rocblas_side_left and\nis n when rocblas_side_right.\nld_invA must be equal to 128.\n\n@param[in]\ninvA_size [rocblas_int]\ninvA_size specifies the number of elements of device memory in invA.\n\n@param[in]\ncompute_type [rocblas_datatype]\nspecifies the datatype of computation.\n"]
+    pub fn rocblas_trsm_ex(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const ::std::os::raw::c_void,
+        A: *const ::std::os::raw::c_void,
+        lda: rocblas_int,
+        B: *mut ::std::os::raw::c_void,
+        ldb: rocblas_int,
+        invA: *const ::std::os::raw::c_void,
+        invA_size: rocblas_int,
+        compute_type: rocblas_datatype,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS EX API </b>\n\n\\details\ntrsm_batched_ex solves:\n\nop(A_i)*X_i = alpha*B_i or X_i*op(A_i) = alpha*B_i,\n\nfor i = 1, ..., batch_count; and where alpha is a scalar, X and B are arrays of m by n matrices,\nA is an array of triangular matrix and each op(A_i) is one of\n\nop( A_i ) = A_i   or   op( A_i ) = A_i^T   or   op( A_i ) = A_i^H.\n\nEach matrix X_i is overwritten on B_i.\n\nThis function gives the user the ability to reuse the invA matrix between runs.\nIf invA == NULL, rocblas_trsm_batched_ex will automatically calculate each invA_i on every run.\n\nSetting up invA:\nEach accepted invA_i matrix consists of the packed 128x128 inverses of the diagonal blocks of\nmatrix A_i, followed by any smaller diagonal block that remains.\nTo set up each invA_i it is recommended that rocblas_trtri_batched be used with matrix A_i as the input.\ninvA is an array of pointers of batch_count length holding each invA_i.\n\nDevice memory of size 128 x k should be allocated for each invA_i ahead of time, where k is m when\nrocblas_side_left and is n when rocblas_side_right. The actual number of elements in each invA_i\nshould be passed as invA_size.\n\nTo begin, rocblas_trtri_batched must be called on the full 128x128-sized diagonal blocks of each\nmatrix A_i. Below are the restricted parameters:\n- n = 128\n- ldinvA = 128\n- stride_invA = 128x128\n- batch_count = k / 128,\n\nThen any remaining block may be added:\n- n = k % 128\n- invA = invA + stride_invA * previous_batch_count\n- ldinvA = 128\n- batch_count = 1\n\n@param[in]\nhandle  [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nside    [rocblas_side]\n- rocblas_side_left:       op(A)*X = alpha*B\n- rocblas_side_right:      X*op(A) = alpha*B\n\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  each A_i is an upper triangular matrix.\n- rocblas_fill_lower:  each A_i is a lower triangular matrix.\n\n@param[in]\ntransA  [rocblas_operation]\n- transB:    op(A) = A.\n- rocblas_operation_transpose:      op(A) = A^T\n- rocblas_operation_conjugate_transpose:  op(A) = A^H\n\n@param[in]\ndiag    [rocblas_diagonal]\n- rocblas_diagonal_unit:     each A_i is assumed to be unit triangular.\n- rocblas_diagonal_non_unit:  each A_i is not assumed to be unit triangular.\n\n@param[in]\nm       [rocblas_int]\nm specifies the number of rows of each B_i. m >= 0.\n\n@param[in]\nn       [rocblas_int]\nn specifies the number of columns of each B_i. n >= 0.\n\n@param[in]\nalpha   [void *]\ndevice pointer or host pointer alpha specifying the scalar alpha. When alpha is\n&zero then A is not referenced, and B need not be set before\nentry.\n\n@param[in]\nA       [void *]\ndevice array of device pointers storing each matrix A_i.\neach A_i is of dimension ( lda, k ), where k is m\nwhen rocblas_side_left and\nis n when rocblas_side_right\nonly the upper/lower triangular part is accessed.\n\n@param[in]\nlda     [rocblas_int]\nlda specifies the first dimension of each A_i.\n\nif side = rocblas_side_left,  lda >= max( 1, m ),\nif side = rocblas_side_right, lda >= max( 1, n ).\n\n@param[in, out]\nB       [void *]\ndevice array of device pointers storing each matrix B_i.\neach B_i is of dimension ( ldb, n ).\nBefore entry, the leading m by n part of the array B_i must\ncontain the right-hand side matrix B_i, and on exit is\noverwritten by the solution matrix X_i\n\n@param[in]\nldb    [rocblas_int]\nldb specifies the first dimension of each B_i. ldb >= max( 1, m ).\n\n@param[in]\nbatch_count [rocblas_int]\nspecifies how many batches.\n\n@param[in]\ninvA    [void *]\ndevice array of device pointers storing the inverse diagonal blocks of each A_i.\neach invA_i is of dimension ( ld_invA, k ), where k is m\nwhen rocblas_side_left and\nis n when rocblas_side_right.\nld_invA must be equal to 128.\n\n@param[in]\ninvA_size [rocblas_int]\ninvA_size specifies the number of elements of device memory in each invA_i.\n\n@param[in]\ncompute_type [rocblas_datatype]\nspecifies the datatype of computation.\n"]
+    pub fn rocblas_trsm_batched_ex(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const ::std::os::raw::c_void,
+        A: *const ::std::os::raw::c_void,
+        lda: rocblas_int,
+        B: *mut ::std::os::raw::c_void,
+        ldb: rocblas_int,
+        batch_count: rocblas_int,
+        invA: *const ::std::os::raw::c_void,
+        invA_size: rocblas_int,
+        compute_type: rocblas_datatype,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS EX API </b>\n\n\\details\ntrsm_strided_batched_ex solves:\n\nop(A_i)*X_i = alpha*B_i or X_i*op(A_i) = alpha*B_i,\n\nfor i = 1, ..., batch_count; and where alpha is a scalar, X and B are strided batched m by n matrices,\nA is a strided batched triangular matrix and op(A_i) is one of\n\nop( A_i ) = A_i   or   op( A_i ) = A_i^T   or   op( A_i ) = A_i^H.\n\nEach matrix X_i is overwritten on B_i.\n\nThis function gives the user the ability to reuse each invA_i matrix between runs.\nIf invA == NULL, rocblas_trsm_batched_ex will automatically calculate each invA_i on every run.\n\nSetting up invA:\nEach accepted invA_i matrix consists of the packed 128x128 inverses of the diagonal blocks of\nmatrix A_i, followed by any smaller diagonal block that remains.\nTo set up invA_i it is recommended that rocblas_trtri_batched be used with matrix A_i as the input.\ninvA is a contiguous piece of memory holding each invA_i.\n\nDevice memory of size 128 x k should be allocated for each invA_i ahead of time, where k is m when\nrocblas_side_left and is n when rocblas_side_right. The actual number of elements in each invA_i\nshould be passed as invA_size.\n\nTo begin, rocblas_trtri_batched must be called on the full 128x128-sized diagonal blocks of each\nmatrix A_i. Below are the restricted parameters:\n- n = 128\n- ldinvA = 128\n- stride_invA = 128x128\n- batch_count = k / 128\n\nThen any remaining block may be added:\n- n = k % 128\n- invA = invA + stride_invA * previous_batch_count\n- ldinvA = 128\n- batch_count = 1\n\n@param[in]\nhandle  [rocblas_handle]\nhandle to the rocblas library context queue.\n\n@param[in]\nside    [rocblas_side]\n- rocblas_side_left:       op(A)*X = alpha*B\n- rocblas_side_right:      X*op(A) = alpha*B\n\n@param[in]\nuplo    [rocblas_fill]\n- rocblas_fill_upper:  each A_i is an upper triangular matrix.\n- rocblas_fill_lower:  each A_i is a lower triangular matrix.\n\n@param[in]\ntransA  [rocblas_operation]\n- transB:    op(A) = A.\n- rocblas_operation_transpose:      op(A) = A^T\n- rocblas_operation_conjugate_transpose:  op(A) = A^H\n\n@param[in]\ndiag    [rocblas_diagonal]\n- rocblas_diagonal_unit:     each A_i is assumed to be unit triangular.\n- rocblas_diagonal_non_unit:  each A_i is not assumed to be unit triangular.\n\n@param[in]\nm       [rocblas_int]\nm specifies the number of rows of each B_i. m >= 0.\n\n@param[in]\nn       [rocblas_int]\nn specifies the number of columns of each B_i. n >= 0.\n\n@param[in]\nalpha   [void *]\ndevice pointer or host pointer specifying the scalar alpha. When alpha is\n&zero then A is not referenced, and B need not be set before\nentry.\n\n@param[in]\nA       [void *]\ndevice pointer storing matrix A.\nof dimension ( lda, k ), where k is m\nwhen rocblas_side_left and\nis n when rocblas_side_right\nonly the upper/lower triangular part is accessed.\n\n@param[in]\nlda     [rocblas_int]\nlda specifies the first dimension of A.\n\nif side = rocblas_side_left,  lda >= max( 1, m ),\nif side = rocblas_side_right, lda >= max( 1, n ).\n\n@param[in]\nstride_A [rocblas_stride]\nThe stride between each A matrix.\n\n@param[in, out]\nB       [void *]\ndevice pointer pointing to first matrix B_i.\neach B_i is of dimension ( ldb, n ).\nBefore entry, the leading m by n part of each array B_i must\ncontain the right-hand side of matrix B_i, and on exit is\noverwritten by the solution matrix X_i.\n\n@param[in]\nldb    [rocblas_int]\nldb specifies the first dimension of each B_i. ldb >= max( 1, m ).\n\n@param[in]\nstride_B [rocblas_stride]\nThe stride between each B_i matrix.\n\n@param[in]\nbatch_count [rocblas_int]\nspecifies how many batches.\n\n@param[in]\ninvA    [void *]\ndevice pointer storing the inverse diagonal blocks of each A_i.\ninvA points to the first invA_1.\neach invA_i is of dimension ( ld_invA, k ), where k is m\nwhen rocblas_side_left and\nis n when rocblas_side_right.\nld_invA must be equal to 128.\n\n@param[in]\ninvA_size [rocblas_int]\ninvA_size specifies the number of elements of device memory in each invA_i.\n\n@param[in]\nstride_invA [rocblas_stride]\nThe stride between each invA matrix.\n\n@param[in]\ncompute_type [rocblas_datatype]\nspecifies the datatype of computation.\n"]
+    pub fn rocblas_trsm_strided_batched_ex(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const ::std::os::raw::c_void,
+        A: *const ::std::os::raw::c_void,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        B: *mut ::std::os::raw::c_void,
+        ldb: rocblas_int,
+        stride_B: rocblas_stride,
+        batch_count: rocblas_int,
+        invA: *const ::std::os::raw::c_void,
+        invA_size: rocblas_int,
+        stride_invA: rocblas_stride,
+        compute_type: rocblas_datatype,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS EX API </b>\n\n\\details\naxpy_ex   computes constant alpha multiplied by vector x, plus vector y.\n\ny := alpha * x + y\n\nCurrently supported datatypes are as follows:\n\n-------------------------------------------------\n| alpha_type | x_type | y_type | execution_type |\n|------------|--------|--------|----------------|\n|  f16_r     | f16_r  |  f16_r |      f16_r     |\n|  f16_r     | f16_r  |  f16_r |      f32_r     |\n|  f32_r     | f16_r  |  f16_r |      f32_r     |\n|  f32_r     | f32_r  |  f32_r |      f32_r     |\n|  f64_r     | f64_r  |  f64_r |      f64_r     |\n|  f32_c     | f32_c  |  f32_c |      f32_c     |\n|  f64_c     | f64_c  |  f64_c |      f64_c     |\n-------------------------------------------------\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nthe number of elements in x and y.\n@param[in]\nalpha     device pointer or host pointer to specify the scalar alpha.\n@param[in]\nalpha_type [rocblas_datatype]\nspecifies the datatype of alpha.\n@param[in]\nx         device pointer storing vector x.\n@param[in]\nx_type [rocblas_datatype]\nspecifies the datatype of vector x.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of x.\n@param[inout]\ny         device pointer storing vector y.\n@param[in]\ny_type [rocblas_datatype]\nspecifies the datatype of vector y.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of y.\n@param[in]\nexecution_type [rocblas_datatype]\nspecifies the datatype of computation.\n"]
+    pub fn rocblas_axpy_ex(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        alpha: *const ::std::os::raw::c_void,
+        alpha_type: rocblas_datatype,
+        x: *const ::std::os::raw::c_void,
+        x_type: rocblas_datatype,
+        incx: rocblas_int,
+        y: *mut ::std::os::raw::c_void,
+        y_type: rocblas_datatype,
+        incy: rocblas_int,
+        execution_type: rocblas_datatype,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS EX API </b>\n\n\\details\naxpy_batched_ex   computes constant alpha multiplied by vector x, plus vector y over\na set of batched vectors.\n\ny := alpha * x + y\n\nCurrently supported datatypes are as follows:\n\n-------------------------------------------------\n| alpha_type | x_type | y_type | execution_type |\n|------------|--------|--------|----------------|\n|  f16_r     | f16_r  |  f16_r |      f16_r     |\n|  f16_r     | f16_r  |  f16_r |      f32_r     |\n|  f32_r     | f16_r  |  f16_r |      f32_r     |\n|  f32_r     | f32_r  |  f32_r |      f32_r     |\n|  f64_r     | f64_r  |  f64_r |      f64_r     |\n|  f32_c     | f32_c  |  f32_c |      f32_c     |\n|  f64_c     | f64_c  |  f64_c |      f64_c     |\n-------------------------------------------------\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nthe number of elements in each x_i and y_i.\n@param[in]\nalpha     device pointer or host pointer to specify the scalar alpha.\n@param[in]\nalpha_type [rocblas_datatype]\nspecifies the datatype of alpha.\n@param[in]\nx         device array of device pointers storing each vector x_i.\n@param[in]\nx_type [rocblas_datatype]\nspecifies the datatype of each vector x_i.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[inout]\ny         device array of device pointers storing each vector y_i.\n@param[in]\ny_type [rocblas_datatype]\nspecifies the datatype of each vector y_i.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of each y_i.\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n@param[in]\nexecution_type [rocblas_datatype]\nspecifies the datatype of computation.\n"]
+    pub fn rocblas_axpy_batched_ex(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        alpha: *const ::std::os::raw::c_void,
+        alpha_type: rocblas_datatype,
+        x: *const ::std::os::raw::c_void,
+        x_type: rocblas_datatype,
+        incx: rocblas_int,
+        y: *mut ::std::os::raw::c_void,
+        y_type: rocblas_datatype,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+        execution_type: rocblas_datatype,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS EX API </b>\n\n\\details\naxpy_strided_batched_ex   computes constant alpha multiplied by vector x, plus vector y over\na set of strided batched vectors.\n\ny := alpha * x + y\n\nCurrently supported datatypes are as follows:\n\n-------------------------------------------------\n| alpha_type | x_type | y_type | execution_type |\n|------------|--------|--------|----------------|\n|  f16_r     | f16_r  |  f16_r |      f16_r     |\n|  f16_r     | f16_r  |  f16_r |      f32_r     |\n|  f32_r     | f16_r  |  f16_r |      f32_r     |\n|  f32_r     | f32_r  |  f32_r |      f32_r     |\n|  f64_r     | f64_r  |  f64_r |      f64_r     |\n|  f32_c     | f32_c  |  f32_c |      f32_c     |\n|  f64_c     | f64_c  |  f64_c |      f64_c     |\n-------------------------------------------------\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nthe number of elements in each x_i and y_i.\n@param[in]\nalpha     device pointer or host pointer to specify the scalar alpha.\n@param[in]\nalpha_type [rocblas_datatype]\nspecifies the datatype of alpha.\n@param[in]\nx         device pointer to the first vector x_1.\n@param[in]\nx_type [rocblas_datatype]\nspecifies the datatype of each vector x_i.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\nstridex   [rocblas_stride]\nstride from the start of one vector (x_i) to the next one (x_i+1).\nThere are no restrictions placed on stridex. However, ensure that stridex is of appropriate size. For a typical\ncase this means stridex >= n * incx.\n@param[inout]\ny         device pointer to the first vector y_1.\n@param[in]\ny_type [rocblas_datatype]\nspecifies the datatype of each vector y_i.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of each y_i.\n@param[in]\nstridey   [rocblas_stride]\nstride from the start of one vector (y_i) to the next one (y_i+1).\nThere are no restrictions placed on stridey. However, ensure that stridey is of appropriate size. For a typical\ncase this means stridey >= n * incy.\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n@param[in]\nexecution_type [rocblas_datatype]\nspecifies the datatype of computation.\n"]
+    pub fn rocblas_axpy_strided_batched_ex(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        alpha: *const ::std::os::raw::c_void,
+        alpha_type: rocblas_datatype,
+        x: *const ::std::os::raw::c_void,
+        x_type: rocblas_datatype,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        y: *mut ::std::os::raw::c_void,
+        y_type: rocblas_datatype,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        batch_count: rocblas_int,
+        execution_type: rocblas_datatype,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS EX API </b>\n\n\\details\ndot_ex  performs the dot product of vectors x and y.\n\nresult = x * y;\n\ndotc_ex  performs the dot product of the conjugate of complex vector x and complex vector y\n\nresult = conjugate (x) * y;\n\nCurrently supported datatypes are as follows:\n\n--------------------------------------------------\n| x_type | y_type | result_type | execution_type |\n|--------|--------|-------------|----------------|\n| f16_r  | f16_r  |    f16_r    |     f16_r      |\n| f16_r  | f16_r  |    f16_r    |     f32_r      |\n| bf16_r | bf16_r |    bf16_r   |     f32_r      |\n| f32_r  | f32_r  |    f32_r    |     f32_r      |\n| f64_r  | f64_r  |    f64_r    |     f64_r      |\n| f32_c  | f32_c  |    f32_c    |     f32_c      |\n| f64_c  | f64_c  |    f64_c    |     f64_c      |\n--------------------------------------------------\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nthe number of elements in x and y.\n@param[in]\nx         device pointer storing vector x.\n@param[in]\nx_type [rocblas_datatype]\nspecifies the datatype of vector x.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of y.\n@param[in]\ny         device pointer storing vector y.\n@param[in]\ny_type [rocblas_datatype]\nspecifies the datatype of vector y.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of y.\n@param[inout]\nresult\ndevice pointer or host pointer to store the dot product.\nreturn is 0.0 if n <= 0.\n@param[in]\nresult_type [rocblas_datatype]\nspecifies the datatype of the result.\n@param[in]\nexecution_type [rocblas_datatype]\nspecifies the datatype of computation.\n"]
+    pub fn rocblas_dot_ex(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const ::std::os::raw::c_void,
+        x_type: rocblas_datatype,
+        incx: rocblas_int,
+        y: *const ::std::os::raw::c_void,
+        y_type: rocblas_datatype,
+        incy: rocblas_int,
+        result: *mut ::std::os::raw::c_void,
+        result_type: rocblas_datatype,
+        execution_type: rocblas_datatype,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dotc_ex(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const ::std::os::raw::c_void,
+        x_type: rocblas_datatype,
+        incx: rocblas_int,
+        y: *const ::std::os::raw::c_void,
+        y_type: rocblas_datatype,
+        incy: rocblas_int,
+        result: *mut ::std::os::raw::c_void,
+        result_type: rocblas_datatype,
+        execution_type: rocblas_datatype,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS EX API </b>\n\n\\details\ndot_batched_ex performs a batch of dot products of vectors x and y.\n\nresult_i = x_i * y_i;\n\ndotc_batched_ex  performs a batch of dot products of the conjugate of complex vector x and complex vector y\n\nresult_i = conjugate (x_i) * y_i;\n\nwhere (x_i, y_i) is the i-th instance of the batch.\nx_i and y_i are vectors, for i = 1, ..., batch_count\n\nCurrently supported datatypes are as follows:\n\n--------------------------------------------------\n| x_type | y_type | result_type | execution_type |\n|--------|--------|-------------|----------------|\n| f16_r  | f16_r  |    f16_r    |     f16_r      |\n| f16_r  | f16_r  |    f16_r    |     f32_r      |\n| bf16_r | bf16_r |    bf16_r   |     f32_r      |\n| f32_r  | f32_r  |    f32_r    |     f32_r      |\n| f64_r  | f64_r  |    f64_r    |     f64_r      |\n| f32_c  | f32_c  |    f32_c    |     f32_c      |\n| f64_c  | f64_c  |    f64_c    |     f64_c      |\n--------------------------------------------------\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nthe number of elements in each x_i and y_i.\n@param[in]\nx         device array of device pointers storing each vector x_i.\n@param[in]\nx_type [rocblas_datatype]\nspecifies the datatype of each vector x_i.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\ny         device array of device pointers storing each vector y_i.\n@param[in]\ny_type [rocblas_datatype]\nspecifies the datatype of each vector y_i.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of each y_i.\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n@param[inout]\nresult\ndevice array or host array of batch_count size to store the dot products of each batch.\nreturn 0.0 for each element if n <= 0.\n@param[in]\nresult_type [rocblas_datatype]\nspecifies the datatype of the result.\n@param[in]\nexecution_type [rocblas_datatype]\nspecifies the datatype of computation.\n"]
+    pub fn rocblas_dot_batched_ex(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const ::std::os::raw::c_void,
+        x_type: rocblas_datatype,
+        incx: rocblas_int,
+        y: *const ::std::os::raw::c_void,
+        y_type: rocblas_datatype,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+        result: *mut ::std::os::raw::c_void,
+        result_type: rocblas_datatype,
+        execution_type: rocblas_datatype,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dotc_batched_ex(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const ::std::os::raw::c_void,
+        x_type: rocblas_datatype,
+        incx: rocblas_int,
+        y: *const ::std::os::raw::c_void,
+        y_type: rocblas_datatype,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+        result: *mut ::std::os::raw::c_void,
+        result_type: rocblas_datatype,
+        execution_type: rocblas_datatype,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS EX API </b>\n\n\\details\ndot_strided_batched_ex  performs a batch of dot products of vectors x and y.\n\nresult_i = x_i * y_i;\n\ndotc_strided_batched_ex  performs a batch of dot products of the conjugate of complex vector x and complex vector y\n\nresult_i = conjugate (x_i) * y_i;\n\nwhere (x_i, y_i) is the i-th instance of the batch.\nx_i and y_i are vectors, for i = 1, ..., batch_count\n\nCurrently supported datatypes are as follows:\n\n--------------------------------------------------\n| x_type | y_type | result_type | execution_type |\n|--------|--------|-------------|----------------|\n| f16_r  | f16_r  |    f16_r    |     f16_r      |\n| f16_r  | f16_r  |    f16_r    |     f32_r      |\n| bf16_r | bf16_r |    bf16_r   |     f32_r      |\n| f32_r  | f32_r  |    f32_r    |     f32_r      |\n| f64_r  | f64_r  |    f64_r    |     f64_r      |\n| f32_c  | f32_c  |    f32_c    |     f32_c      |\n| f64_c  | f64_c  |    f64_c    |     f64_c      |\n--------------------------------------------------\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nthe number of elements in each x_i and y_i.\n@param[in]\nx         device pointer to the first vector (x_1) in the batch.\n@param[in]\nx_type [rocblas_datatype]\nspecifies the datatype of each vector x_i.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\nstride_x    [rocblas_stride]\nstride from the start of one vector (x_i) and the next one (x_i+1)\n@param[in]\ny         device pointer to the first vector (y_1) in the batch.\n@param[in]\ny_type [rocblas_datatype]\nspecifies the datatype of each vector y_i.\n@param[in]\nincy      [rocblas_int]\nspecifies the increment for the elements of each y_i.\n@param[in]\nstride_y    [rocblas_stride]\nstride from the start of one vector (y_i) and the next one (y_i+1)\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n@param[inout]\nresult\ndevice array or host array of batch_count size to store the dot products of each batch.\nreturn 0.0 for each element if n <= 0.\n@param[in]\nresult_type [rocblas_datatype]\nspecifies the datatype of the result.\n@param[in]\nexecution_type [rocblas_datatype]\nspecifies the datatype of computation.\n"]
+    pub fn rocblas_dot_strided_batched_ex(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const ::std::os::raw::c_void,
+        x_type: rocblas_datatype,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        y: *const ::std::os::raw::c_void,
+        y_type: rocblas_datatype,
+        incy: rocblas_int,
+        stride_y: rocblas_stride,
+        batch_count: rocblas_int,
+        result: *mut ::std::os::raw::c_void,
+        result_type: rocblas_datatype,
+        execution_type: rocblas_datatype,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dotc_strided_batched_ex(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const ::std::os::raw::c_void,
+        x_type: rocblas_datatype,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        y: *const ::std::os::raw::c_void,
+        y_type: rocblas_datatype,
+        incy: rocblas_int,
+        stride_y: rocblas_stride,
+        batch_count: rocblas_int,
+        result: *mut ::std::os::raw::c_void,
+        result_type: rocblas_datatype,
+        execution_type: rocblas_datatype,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief BLAS_EX API\n\n\\details\nnrm2_ex computes the euclidean norm of a real or complex vector.\n\nresult := sqrt( x'*x ) for real vectors\nresult := sqrt( x**H*x ) for complex vectors\n\nCurrently supported datatypes are as follows:\n\n-------------------------------------\n|  x_type | result | execution_type |\n|---------|--------|----------------|\n|  f16_r  |  f16_r |     f32_r      |\n|  f32_r  |  f32_r |     f32_r      |\n|  f64_r  |  f64_r |     f64_r      |\n|  f32_c  |  f32_r |     f32_r      |\n|  f64_c  |  f64_r |     f64_r      |\n-------------------------------------\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nthe number of elements in x.\n@param[in]\nx         device pointer storing vector x.\n@param[in]\nx_type [rocblas_datatype]\nspecifies the datatype of the vector x.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of y.\n@param[inout]\nresults\ndevice pointer or host pointer to store the nrm2 product.\nreturn is 0.0 if n, incx<=0.\n@param[in]\nresult_type [rocblas_datatype]\nspecifies the datatype of the result.\n@param[in]\nexecution_type [rocblas_datatype]\nspecifies the datatype of computation."]
+    pub fn rocblas_nrm2_ex(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const ::std::os::raw::c_void,
+        x_type: rocblas_datatype,
+        incx: rocblas_int,
+        results: *mut ::std::os::raw::c_void,
+        result_type: rocblas_datatype,
+        execution_type: rocblas_datatype,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief BLAS_EX API\n\n\\details\nnrm2_batched_ex computes the euclidean norm over a batch of real or complex vectors.\n\nresult := sqrt( x_i'*x_i ) for real vectors x, for i = 1, ..., batch_count\nresult := sqrt( x_i**H*x_i ) for complex vectors x, for i = 1, ..., batch_count\n\nCurrently supported datatypes are as follows:\n\n-------------------------------------\n|  x_type | result | execution_type |\n|---------|--------|----------------|\n|  f16_r  |  f16_r |     f32_r      |\n|  f32_r  |  f32_r |     f32_r      |\n|  f64_r  |  f64_r |     f64_r      |\n|  f32_c  |  f32_r |     f32_r      |\n|  f64_c  |  f64_r |     f64_r      |\n-------------------------------------\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nnumber of elements in each x_i.\n@param[in]\nx         device array of device pointers storing each vector x_i.\n@param[in]\nx_type [rocblas_datatype]\nspecifies the datatype of each vector x_i.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i. incx must be > 0.\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n@param[out]\nresults\ndevice pointer or host pointer to array of batch_count size for nrm2 results.\nreturn is 0.0 for each element if n <= 0, incx<=0.\n@param[in]\nresult_type [rocblas_datatype]\nspecifies the datatype of the result.\n@param[in]\nexecution_type [rocblas_datatype]\nspecifies the datatype of computation.\n"]
+    pub fn rocblas_nrm2_batched_ex(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const ::std::os::raw::c_void,
+        x_type: rocblas_datatype,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+        results: *mut ::std::os::raw::c_void,
+        result_type: rocblas_datatype,
+        execution_type: rocblas_datatype,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief BLAS_EX API\n\n\\details\nnrm2_strided_batched_ex computes the euclidean norm over a batch of real or complex vectors.\n\nresult := sqrt( x_i'*x_i ) for real vectors x, for i = 1, ..., batch_count\nresult := sqrt( x_i**H*x_i ) for complex vectors, for i = 1, ..., batch_count\n\nCurrently supported datatypes are as follows:\n\n-------------------------------------\n|  x_type | result | execution_type |\n|---------|--------|----------------|\n|  f16_r  |  f16_r |     f32_r      |\n|  f32_r  |  f32_r |     f32_r      |\n|  f64_r  |  f64_r |     f64_r      |\n|  f32_c  |  f32_r |     f32_r      |\n|  f64_c  |  f64_r |     f64_r      |\n-------------------------------------\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nnumber of elements in each x_i.\n@param[in]\nx         device pointer to the first vector x_1.\n@param[in]\nx_type [rocblas_datatype]\nspecifies the datatype of each vector x_i.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i. incx must be > 0.\n@param[in]\nstride_x  [rocblas_stride]\nstride from the start of one vector (x_i) and the next one (x_i+1).\nThere are no restrictions placed on stride_x. However, ensure that stride_x is of appropriate size. For a typical\ncase this means stride_x >= n * incx.\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n@param[out]\nresults\ndevice pointer or host pointer to array for storing contiguous batch_count results.\nreturn is 0.0 for each element if n <= 0, incx<=0.\n@param[in]\nresult_type [rocblas_datatype]\nspecifies the datatype of the result.\n@param[in]\nexecution_type [rocblas_datatype]\nspecifies the datatype of computation.\n"]
+    pub fn rocblas_nrm2_strided_batched_ex(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const ::std::os::raw::c_void,
+        x_type: rocblas_datatype,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        batch_count: rocblas_int,
+        results: *mut ::std::os::raw::c_void,
+        result_type: rocblas_datatype,
+        execution_type: rocblas_datatype,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS EX API </b>\n\n\\details\nrot_ex applies the Givens rotation matrix defined by c=cos(alpha) and s=sin(alpha) to vectors x and y.\nScalars c and s may be stored in either host or device memory. Location is specified by calling rocblas_set_pointer_mode.\n\nIn the case where cs_type is real:\n\nx := c * x + s * y\ny := c * y - s * x\n\nIn the case where cs_type is complex, the imaginary part of c is ignored:\n\nx := real(c) * x + s * y\ny := real(c) * y - conj(s) * x\n\nCurrently supported datatypes are as follows:\n\n------------------------------------------------\n|  x_type | y_type  | cs_type | execution_type |\n|---------|---------|---------|----------------|\n|  bf16_r |  bf16_r | bf16_r  |  f32_r         |\n|  f16_r  |  f16_r  | f16_r   |  f32_r         |\n|  f32_r  |  f32_r  | f32_r   |  f32_r         |\n|  f64_r  |  f64_r  | f64_r   |  f64_r         |\n|  f32_c  |  f32_c  | f32_c   |  f32_c         |\n|  f32_c  |  f32_c  | f32_r   |  f32_c         |\n|  f64_c  |  f64_c  | f64_c   |  f64_c         |\n|  f64_c  |  f64_c  | f64_r   |  f64_c         |\n------------------------------------------------\n\n@param[in]\nhandle  [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn       [rocblas_int]\nnumber of elements in the x and y vectors.\n@param[inout]\nx       device pointer storing vector x.\n@param[in]\nx_type [rocblas_datatype]\nspecifies the datatype of vector x.\n@param[in]\nincx    [rocblas_int]\nspecifies the increment between elements of x.\n@param[inout]\ny       device pointer storing vector y.\n@param[in]\ny_type [rocblas_datatype]\nspecifies the datatype of vector y.\n@param[in]\nincy    [rocblas_int]\nspecifies the increment between elements of y.\n@param[in]\nc       device pointer or host pointer storing scalar cosine component of the rotation matrix.\n@param[in]\ns       device pointer or host pointer storing scalar sine component of the rotation matrix.\n@param[in]\ncs_type [rocblas_datatype]\nspecifies the datatype of c and s.\n@param[in]\nexecution_type [rocblas_datatype]\nspecifies the datatype of computation.\n"]
+    pub fn rocblas_rot_ex(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *mut ::std::os::raw::c_void,
+        x_type: rocblas_datatype,
+        incx: rocblas_int,
+        y: *mut ::std::os::raw::c_void,
+        y_type: rocblas_datatype,
+        incy: rocblas_int,
+        c: *const ::std::os::raw::c_void,
+        s: *const ::std::os::raw::c_void,
+        cs_type: rocblas_datatype,
+        execution_type: rocblas_datatype,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS EX API </b>\n\n\\details\nrot_batched_ex applies the Givens rotation matrix defined by c=cos(alpha) and s=sin(alpha) to batched vectors x_i and y_i, for i = 1, ..., batch_count.\nScalars c and s may be stored in either host or device memory. Location is specified by calling rocblas_set_pointer_mode.\n\nIn the case where cs_type is real:\n\nx := c * x + s * y\ny := c * y - s * x\n\nIn the case where cs_type is complex, the imaginary part of c is ignored:\n\nx := real(c) * x + s * y\ny := real(c) * y - conj(s) * x\n\nCurrently supported datatypes are as follows:\n\n------------------------------------------------\n|  x_type | y_type  | cs_type | execution_type |\n|---------|---------|---------|----------------|\n|  bf16_r |  bf16_r | bf16_r  |  f32_r         |\n|  f16_r  |  f16_r  | f16_r   |  f32_r         |\n|  f32_r  |  f32_r  | f32_r   |  f32_r         |\n|  f64_r  |  f64_r  | f64_r   |  f64_r         |\n|  f32_c  |  f32_c  | f32_c   |  f32_c         |\n|  f32_c  |  f32_c  | f32_r   |  f32_c         |\n|  f64_c  |  f64_c  | f64_c   |  f64_c         |\n|  f64_c  |  f64_c  | f64_r   |  f64_c         |\n------------------------------------------------\n\n@param[in]\nhandle  [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn       [rocblas_int]\nnumber of elements in each x_i and y_i vectors.\n@param[inout]\nx       device array of deivce pointers storing each vector x_i.\n@param[in]\nx_type [rocblas_datatype]\nspecifies the datatype of each vector x_i.\n@param[in]\nincx    [rocblas_int]\nspecifies the increment between elements of each x_i.\n@param[inout]\ny       device array of device pointers storing each vector y_i.\n@param[in]\ny_type [rocblas_datatype]\nspecifies the datatype of each vector y_i.\n@param[in]\nincy    [rocblas_int]\nspecifies the increment between elements of each y_i.\n@param[in]\nc       device pointer or host pointer to scalar cosine component of the rotation matrix.\n@param[in]\ns       device pointer or host pointer to scalar sine component of the rotation matrix.\n@param[in]\ncs_type [rocblas_datatype]\nspecifies the datatype of c and s.\n@param[in]\nbatch_count [rocblas_int]\nthe number of x and y arrays, the number of batches.\n@param[in]\nexecution_type [rocblas_datatype]\nspecifies the datatype of computation.\n"]
+    pub fn rocblas_rot_batched_ex(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *mut ::std::os::raw::c_void,
+        x_type: rocblas_datatype,
+        incx: rocblas_int,
+        y: *mut ::std::os::raw::c_void,
+        y_type: rocblas_datatype,
+        incy: rocblas_int,
+        c: *const ::std::os::raw::c_void,
+        s: *const ::std::os::raw::c_void,
+        cs_type: rocblas_datatype,
+        batch_count: rocblas_int,
+        execution_type: rocblas_datatype,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS Level 1 API </b>\n\n\\details\nrot_strided_batched_ex applies the Givens rotation matrix defined by c=cos(alpha) and s=sin(alpha) to strided batched vectors x_i and y_i, for i = 1, ..., batch_count.\nScalars c and s may be stored in either host or device memory. Location is specified by calling rocblas_set_pointer_mode.\n\nIn the case where cs_type is real:\n\nx := c * x + s * y\ny := c * y - s * x\n\nIn the case where cs_type is complex, the imaginary part of c is ignored:\n\nx := real(c) * x + s * y\ny := real(c) * y - conj(s) * x\n\nCurrently supported datatypes are as follows:\n\n------------------------------------------------\n|  x_type | y_type  | cs_type | execution_type |\n|---------|---------|---------|----------------|\n|  bf16_r |  bf16_r | bf16_r  |  f32_r         |\n|  f16_r  |  f16_r  | f16_r   |  f32_r         |\n|  f32_r  |  f32_r  | f32_r   |  f32_r         |\n|  f64_r  |  f64_r  | f64_r   |  f64_r         |\n|  f32_c  |  f32_c  | f32_c   |  f32_c         |\n|  f32_c  |  f32_c  | f32_r   |  f32_c         |\n|  f64_c  |  f64_c  | f64_c   |  f64_c         |\n|  f64_c  |  f64_c  | f64_r   |  f64_c         |\n------------------------------------------------\n\n@param[in]\nhandle  [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn       [rocblas_int]\nnumber of elements in each x_i and y_i vectors.\n@param[inout]\nx       device pointer to the first vector x_1.\n@param[in]\nx_type [rocblas_datatype]\nspecifies the datatype of each vector x_i.\n@param[in]\nincx    [rocblas_int]\nspecifies the increment between elements of each x_i.\n@param[in]\nstride_x [rocblas_stride]\nspecifies the increment from the beginning of x_i to the beginning of x_(i+1)\n@param[inout]\ny       device pointer to the first vector y_1.\n@param[in]\ny_type [rocblas_datatype]\nspecifies the datatype of each vector y_i.\n@param[in]\nincy    [rocblas_int]\nspecifies the increment between elements of each y_i.\n@param[in]\nstride_y [rocblas_stride]\nspecifies the increment from the beginning of y_i to the beginning of y_(i+1)\n@param[in]\nc       device pointer or host pointer to scalar cosine component of the rotation matrix.\n@param[in]\ns       device pointer or host pointer to scalar sine component of the rotation matrix.\n@param[in]\ncs_type [rocblas_datatype]\nspecifies the datatype of c and s.\n@param[in]\nbatch_count [rocblas_int]\nthe number of x and y arrays, the number of batches.\n@param[in]\nexecution_type [rocblas_datatype]\nspecifies the datatype of computation.\n"]
+    pub fn rocblas_rot_strided_batched_ex(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *mut ::std::os::raw::c_void,
+        x_type: rocblas_datatype,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        y: *mut ::std::os::raw::c_void,
+        y_type: rocblas_datatype,
+        incy: rocblas_int,
+        stride_y: rocblas_stride,
+        c: *const ::std::os::raw::c_void,
+        s: *const ::std::os::raw::c_void,
+        cs_type: rocblas_datatype,
+        batch_count: rocblas_int,
+        execution_type: rocblas_datatype,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS EX API </b>\n\n\\details\nscal_ex  scales each element of vector x with scalar alpha.\n\nx := alpha * x\n\nCurrently supported datatypes are as follows:\n\n----------------------------------------\n| alpha_type | x_type | execution_type |\n|------------|--------|----------------|\n|  f16_r     | f16_r  |     f16_r      |\n|  f16_r     | f16_r  |     f32_r      |\n|  f32_r     | f16_r  |     f32_r      |\n|  f32_r     | f32_r  |     f32_r      |\n|  f64_r     | f64_r  |     f64_r      |\n|  f32_c     | f32_c  |     f32_c      |\n|  f64_c     | f64_c  |     f64_c      |\n|  f32_r     | f32_c  |     f32_c      |\n|  f64_r     | f64_c  |     f64_c      |\n----------------------------------------\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nthe number of elements in x.\n@param[in]\nalpha     device pointer or host pointer for the scalar alpha.\n@param[in]\nalpha_type [rocblas_datatype]\nspecifies the datatype of alpha.\n@param[inout]\nx         device pointer storing vector x.\n@param[in]\nx_type [rocblas_datatype]\nspecifies the datatype of vector x.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of x.\n@param[in]\nexecution_type [rocblas_datatype]\nspecifies the datatype of computation.\n"]
+    pub fn rocblas_scal_ex(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        alpha: *const ::std::os::raw::c_void,
+        alpha_type: rocblas_datatype,
+        x: *mut ::std::os::raw::c_void,
+        x_type: rocblas_datatype,
+        incx: rocblas_int,
+        execution_type: rocblas_datatype,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS EX API </b>\n\n\\details\nscal_batched_ex  scales each element of each vector x_i with scalar alpha.\n\nx_i := alpha * x_i\n\nCurrently supported datatypes are as follows:\n\n----------------------------------------\n| alpha_type | x_type | execution_type |\n|------------|--------|----------------|\n|  f16_r     | f16_r  |     f16_r      |\n|  f16_r     | f16_r  |     f32_r      |\n|  f32_r     | f16_r  |     f32_r      |\n|  f32_r     | f32_r  |     f32_r      |\n|  f64_r     | f64_r  |     f64_r      |\n|  f32_c     | f32_c  |     f32_c      |\n|  f64_c     | f64_c  |     f64_c      |\n|  f32_r     | f32_c  |     f32_c      |\n|  f64_r     | f64_c  |     f64_c      |\n----------------------------------------\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nthe number of elements in x.\n@param[in]\nalpha     device pointer or host pointer for the scalar alpha.\n@param[in]\nalpha_type [rocblas_datatype]\nspecifies the datatype of alpha.\n@param[inout]\nx         device array of device pointers storing each vector x_i.\n@param[in]\nx_type [rocblas_datatype]\nspecifies the datatype of each vector x_i.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n@param[in]\nexecution_type [rocblas_datatype]\nspecifies the datatype of computation.\n"]
+    pub fn rocblas_scal_batched_ex(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        alpha: *const ::std::os::raw::c_void,
+        alpha_type: rocblas_datatype,
+        x: *mut ::std::os::raw::c_void,
+        x_type: rocblas_datatype,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+        execution_type: rocblas_datatype,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " @{\n\\brief <b> BLAS EX API </b>\n\n\\details\nscal_strided_batched_ex  scales each element of vector x with scalar alpha over a set\nof strided batched vectors.\n\nx := alpha * x\n\nCurrently supported datatypes are as follows:\n\n----------------------------------------\n| alpha_type | x_type | execution_type |\n|------------|--------|----------------|\n|  f16_r     | f16_r  |     f16_r      |\n|  f16_r     | f16_r  |     f32_r      |\n|  f32_r     | f16_r  |     f32_r      |\n|  f32_r     | f32_r  |     f32_r      |\n|  f64_r     | f64_r  |     f64_r      |\n|  f32_c     | f32_c  |     f32_c      |\n|  f64_c     | f64_c  |     f64_c      |\n|  f32_r     | f32_c  |     f32_c      |\n|  f64_r     | f64_c  |     f64_c      |\n----------------------------------------\n\n@param[in]\nhandle    [rocblas_handle]\nhandle to the rocblas library context queue.\n@param[in]\nn         [rocblas_int]\nthe number of elements in x.\n@param[in]\nalpha     device pointer or host pointer for the scalar alpha.\n@param[in]\nalpha_type [rocblas_datatype]\nspecifies the datatype of alpha.\n@param[inout]\nx         device pointer to the first vector x_1.\n@param[in]\nx_type [rocblas_datatype]\nspecifies the datatype of each vector x_i.\n@param[in]\nincx      [rocblas_int]\nspecifies the increment for the elements of each x_i.\n@param[in]\nstridex   [rocblas_stride]\nstride from the start of one vector (x_i) to the next one (x_i+1).\nThere are no restrictions placed on stridex. However, ensure that stridex is of appropriate size. For a typical\ncase this means stridex >= n * incx.\n@param[in]\nbatch_count [rocblas_int]\nnumber of instances in the batch.\n@param[in]\nexecution_type [rocblas_datatype]\nspecifies the datatype of computation.\n"]
+    pub fn rocblas_scal_strided_batched_ex(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        alpha: *const ::std::os::raw::c_void,
+        alpha_type: rocblas_datatype,
+        x: *mut ::std::os::raw::c_void,
+        x_type: rocblas_datatype,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        batch_count: rocblas_int,
+        execution_type: rocblas_datatype,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[doc = " BLAS Auxiliary API\n\n\\details\nrocblas_status_to_string\n\nReturns string representing rocblas_status value\n\n@param[in]\nstatus  [rocblas_status]\nrocBLAS status to convert to string"]
+    pub fn rocblas_status_to_string(status: rocblas_status) -> *const ::std::os::raw::c_char;
+}
+extern "C" {
+    #[doc = " \\brief Initialize rocBLAS on the current HIP device, to avoid costly startup time at the first call on that device.\n\\details\n\nCalling `rocblas_initialize()` allows upfront initialization including device specific kernel setup.\nOtherwise this function is automatically called on the first function call that requires these initializations (mainly GEMM).\n"]
+    pub fn rocblas_initialize();
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\brief   Loads char* buf with the rocblas library version. size_t len\nis the maximum length of char* buf.\n\\details\n\n@param[in, out]\nbuf             pointer to buffer for version string\n\n@param[in]\nlen             length of buf\n"]
+    pub fn rocblas_get_version_string(
+        buf: *mut ::std::os::raw::c_char,
+        len: usize,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\brief   Queries the minimum buffer size for a successful call to\n\\ref rocblas_get_version_string\n\\details\n\n@param[out]\nlen             pointer to size_t for storing the length\n"]
+    pub fn rocblas_get_version_string_size(len: *mut usize) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\brief\n\\details\nIndicates that subsequent rocBLAS kernel calls should collect the optimal device memory size in bytes for their given kernel arguments\nand keep track of the maximum.\nEach kernel call can reuse temporary device memory on the same stream so the maximum is collected.\nReturns rocblas_status_size_query_mismatch if another size query is already in progress; returns rocblas_status_success otherwise\n@param[in]\nhandle          rocblas handle"]
+    pub fn rocblas_start_device_memory_size_query(handle: rocblas_handle) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\brief\n\\details\nStops collecting optimal device memory size information.\nReturns rocblas_status_size_query_mismatch if a collection is not underway; rocblas_status_invalid_handle if handle is nullptr;\nrocblas_status_invalid_pointer if size is nullptr; rocblas_status_success otherwise\n@param[in]\nhandle          rocblas handle\n@param[out]\nsize            maximum of the optimal sizes collected"]
+    pub fn rocblas_stop_device_memory_size_query(
+        handle: rocblas_handle,
+        size: *mut usize,
+    ) -> rocblas_status;
+}
+extern "C" {
+    pub fn rocblas_is_device_memory_size_query(handle: rocblas_handle) -> bool;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_set_optimal_device_memory_size_impl(
+        handle: rocblas_handle,
+        count: usize,
+        ...
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_device_malloc_alloc(
+        handle: rocblas_handle,
+        res: *mut *mut rocblas_device_malloc_base,
+        count: usize,
+        ...
+    ) -> rocblas_status;
+}
+extern "C" {
+    pub fn rocblas_device_malloc_success(ptr: *mut rocblas_device_malloc_base) -> bool;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_device_malloc_ptr(
+        ptr: *mut rocblas_device_malloc_base,
+        res: *mut *mut ::std::os::raw::c_void,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_device_malloc_get(
+        ptr: *mut rocblas_device_malloc_base,
+        index: usize,
+        res: *mut *mut ::std::os::raw::c_void,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    pub fn rocblas_device_malloc_free(ptr: *mut rocblas_device_malloc_base) -> rocblas_status;
+}
+extern "C" {
+    pub fn rocblas_device_malloc_set_default_memory_size(size: usize);
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\brief\n\\details\nGets the current device memory size for the handle.\nReturns rocblas_status_invalid_handle if handle is nullptr; rocblas_status_invalid_pointer if size is nullptr; rocblas_status_success otherwise\n@param[in]\nhandle          rocblas handle\n@param[out]\nsize            current device memory size for the handle"]
+    pub fn rocblas_get_device_memory_size(
+        handle: rocblas_handle,
+        size: *mut usize,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\brief\n\\details\nChanges the size of allocated device memory at runtime.\n\nAny previously allocated device memory managed by the handle is freed.\n\nIf size > 0 sets the device memory size to the specified size (in bytes).\nIf size == 0, frees the memory allocated so far, and lets rocBLAS manage device memory in the future, expanding it when necessary.\nReturns rocblas_status_invalid_handle if handle is nullptr; rocblas_status_invalid_pointer if size is nullptr; rocblas_status_success otherwise\n@param[in]\nhandle          rocblas handle\n@param[in]\nsize            size of allocated device memory"]
+    pub fn rocblas_set_device_memory_size(handle: rocblas_handle, size: usize) -> rocblas_status;
+}
+extern "C" {
+    #[must_use]
+    #[doc = " \\brief\n\\details\nSets the device workspace for the handle to use.\n\nAny previously allocated device memory managed by the handle is freed.\n\nReturns rocblas_status_invalid_handle if handle is nullptr; rocblas_status_success otherwise\n@param[in]\nhandle          rocblas handle\n@param[in]\naddr            address of workspace memory\n@param[in]\nsize            size of workspace memory\n"]
+    pub fn rocblas_set_workspace(
+        handle: rocblas_handle,
+        addr: *mut ::std::os::raw::c_void,
+        size: usize,
+    ) -> rocblas_status;
+}
+extern "C" {
+    #[doc = " \\brief\n\\details\nReturns true when device memory in handle is managed by rocBLAS\n@param[in]\nhandle          rocblas handle"]
+    pub fn rocblas_is_managing_device_memory(handle: rocblas_handle) -> bool;
+}
+extern "C" {
+    #[doc = " \\brief\n\\details\nReturns true when device memory in handle is managed by the user\n@param[in]\nhandle          rocblas handle"]
+    pub fn rocblas_is_user_managing_device_memory(handle: rocblas_handle) -> bool;
+}
+extern "C" {
+    pub fn rocblas_abort() -> !;
+}
author	Andrzej Janik <[email protected]>	2021-02-27 20:55:19 +0100
committer	Andrzej Janik <[email protected]>	2024-02-11 20:45:51 +0100
commit	1b9ba2b2333746c5e2b05a2bf24fa6ec3828dcdf (patch)
tree	0b77ca4a41d4f232bd181e2bddc886475c608784 /rocblas-sys
parent	60d2124a16a7a2a1a6be3707247afe82892a4163 (diff)
download	ZLUDA-1b9ba2b2333746c5e2b05a2bf24fa6ec3828dcdf.tar.gz ZLUDA-1b9ba2b2333746c5e2b05a2bf24fa6ec3828dcdf.zip