aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorAndrzej Janik <[email protected]>2024-05-06 00:28:49 +0200
committerGitHub <[email protected]>2024-05-06 00:28:49 +0200
commit27c0e136777a2db49dbb0caa888d561819230493 (patch)
tree06a395462378d64ed504f3ec0a1b877de12ba98c
parentbdc652f9ebcac9a79849eeee84a391a4ac107913 (diff)
downloadZLUDA-27c0e136777a2db49dbb0caa888d561819230493.tar.gz
ZLUDA-27c0e136777a2db49dbb0caa888d561819230493.zip
Minor codegen improvements (#225)
-rw-r--r--ptx/src/emit.rs88
-rw-r--r--ptx/src/test/spirv_run/abs.ll14
-rw-r--r--ptx/src/test/spirv_run/activemask.ll6
-rw-r--r--ptx/src/test/spirv_run/add.ll6
-rw-r--r--ptx/src/test/spirv_run/add_global.ll6
-rw-r--r--ptx/src/test/spirv_run/add_non_coherent.ll6
-rw-r--r--ptx/src/test/spirv_run/add_param_ptr.ll26
-rw-r--r--ptx/src/test/spirv_run/add_tuning.ll6
-rw-r--r--ptx/src/test/spirv_run/addc_cc.ll54
-rw-r--r--ptx/src/test/spirv_run/addc_cc2.ll56
-rw-r--r--ptx/src/test/spirv_run/alloca_call.ll26
-rw-r--r--ptx/src/test/spirv_run/amdgpu_unnamed.ll32
-rw-r--r--ptx/src/test/spirv_run/and.ll10
-rw-r--r--ptx/src/test/spirv_run/assertfail.ll32
-rw-r--r--ptx/src/test/spirv_run/atom_add.ll14
-rw-r--r--ptx/src/test/spirv_run/atom_add_f16.ll14
-rw-r--r--ptx/src/test/spirv_run/atom_add_float.ll14
-rw-r--r--ptx/src/test/spirv_run/atom_cas.ll20
-rw-r--r--ptx/src/test/spirv_run/atom_inc.ll14
-rw-r--r--ptx/src/test/spirv_run/atom_ld_st.ll6
-rw-r--r--ptx/src/test/spirv_run/atom_ld_st_vec.ll18
-rw-r--r--ptx/src/test/spirv_run/atom_max_u32.ll10
-rw-r--r--ptx/src/test/spirv_run/b64tof64.ll12
-rw-r--r--ptx/src/test/spirv_run/barrier.ll4
-rw-r--r--ptx/src/test/spirv_run/bfe.ll14
-rw-r--r--ptx/src/test/spirv_run/bfi.ll18
-rw-r--r--ptx/src/test/spirv_run/bfind.ll46
-rw-r--r--ptx/src/test/spirv_run/bfind_shiftamt.ll40
-rw-r--r--ptx/src/test/spirv_run/block.ll6
-rw-r--r--ptx/src/test/spirv_run/bra.ll12
-rw-r--r--ptx/src/test/spirv_run/brev.ll6
-rw-r--r--ptx/src/test/spirv_run/call.ll12
-rw-r--r--ptx/src/test/spirv_run/call_bug.ll20
-rw-r--r--ptx/src/test/spirv_run/call_multi_return.ll32
-rw-r--r--ptx/src/test/spirv_run/callprototype.ll16
-rw-r--r--ptx/src/test/spirv_run/carry_set_all.ll210
-rw-r--r--ptx/src/test/spirv_run/clz.ll10
-rw-r--r--ptx/src/test/spirv_run/const.ll18
-rw-r--r--ptx/src/test/spirv_run/constant_f32.ll6
-rw-r--r--ptx/src/test/spirv_run/constant_negative.ll6
-rw-r--r--ptx/src/test/spirv_run/cos.ll6
-rw-r--r--ptx/src/test/spirv_run/cvt_clamp.ll30
-rw-r--r--ptx/src/test/spirv_run/cvt_f32_f16.ll6
-rw-r--r--ptx/src/test/spirv_run/cvt_f32_s32.ll30
-rw-r--r--ptx/src/test/spirv_run/cvt_f64_f32.ll6
-rw-r--r--ptx/src/test/spirv_run/cvt_rni.ll14
-rw-r--r--ptx/src/test/spirv_run/cvt_rzi.ll14
-rw-r--r--ptx/src/test/spirv_run/cvt_s16_s8.ll10
-rw-r--r--ptx/src/test/spirv_run/cvt_s32_f32.ll14
-rw-r--r--ptx/src/test/spirv_run/cvt_s64_s32.ll6
-rw-r--r--ptx/src/test/spirv_run/cvt_sat_s_u.ll20
-rw-r--r--ptx/src/test/spirv_run/cvt_u32_s16.ll6
-rw-r--r--ptx/src/test/spirv_run/cvta.ll18
-rw-r--r--ptx/src/test/spirv_run/div_approx.ll10
-rw-r--r--ptx/src/test/spirv_run/dp4a.ll14
-rw-r--r--ptx/src/test/spirv_run/ex2.ll30
-rw-r--r--ptx/src/test/spirv_run/extern_shared.ll6
-rw-r--r--ptx/src/test/spirv_run/extern_shared_call.ll18
-rw-r--r--ptx/src/test/spirv_run/fma.ll14
-rw-r--r--ptx/src/test/spirv_run/func_ptr.ll12
-rw-r--r--ptx/src/test/spirv_run/generic.ll12
-rw-r--r--ptx/src/test/spirv_run/global_array.ll12
-rw-r--r--ptx/src/test/spirv_run/lanemask_lt.ll12
-rw-r--r--ptx/src/test/spirv_run/ld_st.ll6
-rw-r--r--ptx/src/test/spirv_run/ld_st_implicit.ll20
-rw-r--r--ptx/src/test/spirv_run/ld_st_offset.ll14
-rw-r--r--ptx/src/test/spirv_run/lg2.ll6
-rw-r--r--ptx/src/test/spirv_run/local_align.ll6
-rw-r--r--ptx/src/test/spirv_run/mad_hi_cc.ll58
-rw-r--r--ptx/src/test/spirv_run/mad_s32.ll50
-rw-r--r--ptx/src/test/spirv_run/madc_cc.ll42
-rw-r--r--ptx/src/test/spirv_run/max.ll10
-rw-r--r--ptx/src/test/spirv_run/membar.ll6
-rw-r--r--ptx/src/test/spirv_run/min.ll10
-rw-r--r--ptx/src/test/spirv_run/mov.ll12
-rw-r--r--ptx/src/test/spirv_run/mov_address.ll12
-rw-r--r--ptx/src/test/spirv_run/mov_vector_cast.ll22
-rw-r--r--ptx/src/test/spirv_run/mul_ftz.ll10
-rw-r--r--ptx/src/test/spirv_run/mul_hi.ll6
-rw-r--r--ptx/src/test/spirv_run/mul_lo.ll6
-rw-r--r--ptx/src/test/spirv_run/mul_non_ftz.ll10
-rw-r--r--ptx/src/test/spirv_run/mul_wide.ll16
-rw-r--r--ptx/src/test/spirv_run/multireg.ll6
-rw-r--r--ptx/src/test/spirv_run/neg.ll6
-rw-r--r--ptx/src/test/spirv_run/non_scalar_ptr_offset.ll10
-rw-r--r--ptx/src/test/spirv_run/not.ll6
-rw-r--r--ptx/src/test/spirv_run/ntid.ll12
-rw-r--r--ptx/src/test/spirv_run/or.ll10
-rw-r--r--ptx/src/test/spirv_run/param_ptr.ll12
-rw-r--r--ptx/src/test/spirv_run/popc.ll6
-rw-r--r--ptx/src/test/spirv_run/pred_not.ll26
-rw-r--r--ptx/src/test/spirv_run/prmt.ll42
-rw-r--r--ptx/src/test/spirv_run/prmt_non_immediate.ll24
-rw-r--r--ptx/src/test/spirv_run/rcp.ll6
-rw-r--r--ptx/src/test/spirv_run/reg_local.ll14
-rw-r--r--ptx/src/test/spirv_run/rem.ll10
-rw-r--r--ptx/src/test/spirv_run/rsqrt.ll10
-rw-r--r--ptx/src/test/spirv_run/s64_min.ll12
-rw-r--r--ptx/src/test/spirv_run/sad.ll38
-rw-r--r--ptx/src/test/spirv_run/selp.ll10
-rw-r--r--ptx/src/test/spirv_run/selp_true.ll10
-rw-r--r--ptx/src/test/spirv_run/set_f16x2.ll32
-rw-r--r--ptx/src/test/spirv_run/setp.ll26
-rw-r--r--ptx/src/test/spirv_run/setp_bool.ll44
-rw-r--r--ptx/src/test/spirv_run/setp_gt.ll26
-rw-r--r--ptx/src/test/spirv_run/setp_leu.ll26
-rw-r--r--ptx/src/test/spirv_run/setp_nan.ll98
-rw-r--r--ptx/src/test/spirv_run/setp_num.ll98
-rw-r--r--ptx/src/test/spirv_run/setp_pred2.ll26
-rw-r--r--ptx/src/test/spirv_run/shared_ptr_32.ll16
-rw-r--r--ptx/src/test/spirv_run/shared_ptr_take_address.ll12
-rw-r--r--ptx/src/test/spirv_run/shared_unify_decl.ll34
-rw-r--r--ptx/src/test/spirv_run/shared_unify_extern.ll34
-rw-r--r--ptx/src/test/spirv_run/shared_unify_local.ll36
-rw-r--r--ptx/src/test/spirv_run/shared_variable.ll6
-rw-r--r--ptx/src/test/spirv_run/shf.ll10
-rw-r--r--ptx/src/test/spirv_run/shl.ll10
-rw-r--r--ptx/src/test/spirv_run/shl_link_hack.ll10
-rw-r--r--ptx/src/test/spirv_run/shl_overflow.ll44
-rw-r--r--ptx/src/test/spirv_run/shr_s32.ll16
-rw-r--r--ptx/src/test/spirv_run/shr_u32.ll30
-rw-r--r--ptx/src/test/spirv_run/sign_extend.ll6
-rw-r--r--ptx/src/test/spirv_run/sin.ll6
-rw-r--r--ptx/src/test/spirv_run/sqrt.ll6
-rw-r--r--ptx/src/test/spirv_run/sub.ll6
-rw-r--r--ptx/src/test/spirv_run/subc_cc.ll54
-rw-r--r--ptx/src/test/spirv_run/vector.ll74
-rw-r--r--ptx/src/test/spirv_run/vector4.ll12
-rw-r--r--ptx/src/test/spirv_run/vector_extract.ll48
-rw-r--r--ptx/src/test/spirv_run/vote_ballot.ll22
-rw-r--r--ptx/src/test/spirv_run/vshr.ll22
-rw-r--r--ptx/src/test/spirv_run/xor.ll10
-rw-r--r--ptx/src/translate.rs106
133 files changed, 1543 insertions, 1341 deletions
diff --git a/ptx/src/emit.rs b/ptx/src/emit.rs
index 9e62d5b..7388203 100644
--- a/ptx/src/emit.rs
+++ b/ptx/src/emit.rs
@@ -7,7 +7,7 @@ use std::ffi::CStr;
use std::fmt::Display;
use std::io::Write;
use std::ptr::null_mut;
-use std::{convert, iter, mem, ptr};
+use std::{iter, mem, ptr};
use zluda_llvm::core::*;
use zluda_llvm::prelude::*;
use zluda_llvm::zluda::*;
@@ -157,7 +157,7 @@ impl NamedIdGenerator {
if let Some(id) = id {
self.register_result(id, func)
} else {
- func(b"\0".as_ptr() as _)
+ func(LLVM_UNNAMED)
}
}
@@ -505,10 +505,12 @@ fn emit_function_variable(
) -> Result<(), TranslateError> {
let builder = ctx.builder.get();
let llvm_type = get_llvm_type(ctx, &variable.type_)?;
- let addr_space = get_llvm_address_space(&ctx.constants, variable.state_space)?;
- let value = ctx.names.register_result(variable.name, |name| unsafe {
- LLVMZludaBuildAlloca(builder, llvm_type, addr_space, name)
- });
+ let value = emit_alloca(
+ ctx,
+ llvm_type,
+ get_llvm_address_space(&ctx.constants, variable.state_space)?,
+ Some(variable.name),
+ );
match variable.initializer {
None => {}
Some(init) => {
@@ -531,12 +533,27 @@ fn emit_method<'a, 'input>(
let llvm_method = emit_method_declaration(ctx, &method)?;
emit_linkage_for_method(&method, is_kernel, llvm_method);
emit_tuning(ctx, llvm_method, &method.tuning);
- for statement in method.body.iter().flat_map(convert::identity) {
+ let statements = match method.body {
+ Some(statements) => statements,
+ None => return Ok(()),
+ };
+ // Initial BB that holds all the variable declarations
+ let bb_with_variables =
+ unsafe { LLVMAppendBasicBlockInContext(ctx.context.get(), llvm_method, LLVM_UNNAMED) };
+ // Rest of the code
+ let starting_bb =
+ unsafe { LLVMAppendBasicBlockInContext(ctx.context.get(), llvm_method, LLVM_UNNAMED) };
+ unsafe { LLVMPositionBuilderAtEnd(ctx.builder.get(), starting_bb) };
+ for statement in statements.iter() {
register_basic_blocks(ctx, llvm_method, statement);
}
- for statement in method.body.into_iter().flatten() {
+ for statement in statements.into_iter() {
emit_statement(ctx, is_kernel, statement)?;
}
+ // happens if there is a post-ret trailing label
+ terminate_current_block_if_needed(ctx, None);
+ unsafe { LLVMPositionBuilderAtEnd(ctx.builder.get(), bb_with_variables) };
+ unsafe { LLVMBuildBr(ctx.builder.get(), starting_bb) };
Ok(())
}
@@ -604,7 +621,6 @@ fn emit_statement(
is_kernel: bool,
statement: crate::translate::ExpandedStatement,
) -> Result<(), TranslateError> {
- start_synthetic_basic_block_if_needed(ctx, &statement);
Ok(match statement {
crate::translate::Statement::Label(label) => emit_label(ctx, label)?,
crate::translate::Statement::Variable(var) => emit_function_variable(ctx, var)?,
@@ -749,27 +765,6 @@ fn emit_ret_value(
Ok(())
}
-fn start_synthetic_basic_block_if_needed(
- ctx: &mut EmitContext,
- statement: &crate::translate::ExpandedStatement,
-) {
- let current_block = unsafe { LLVMGetInsertBlock(ctx.builder.get()) };
- if current_block == ptr::null_mut() {
- return;
- }
- let terminator = unsafe { LLVMGetBasicBlockTerminator(current_block) };
- if terminator == ptr::null_mut() {
- return;
- }
- if let crate::translate::Statement::Label(..) = statement {
- return;
- }
- let new_block =
- unsafe { LLVMCreateBasicBlockInContext(ctx.context.get(), b"\0".as_ptr() as _) };
- unsafe { LLVMInsertExistingBasicBlockAfterInsertBlock(ctx.builder.get(), new_block) };
- unsafe { LLVMPositionBuilderAtEnd(ctx.builder.get(), new_block) };
-}
-
fn emit_ptr_access(
ctx: &mut EmitContext,
ptr_access: &crate::translate::PtrAccess<crate::translate::ExpandedArgParams>,
@@ -1073,7 +1068,7 @@ fn emit_value_copy(
) -> Result<(), TranslateError> {
let builder = ctx.builder.get();
let type_ = get_llvm_type(ctx, type_)?;
- let temp_value = unsafe { LLVMBuildAlloca(builder, type_, LLVM_UNNAMED) };
+ let temp_value = emit_alloca(ctx, type_, ctx.constants.private_space, None);
unsafe { LLVMBuildStore(builder, src, temp_value) };
ctx.names.register_result(dst, |dst| unsafe {
LLVMBuildLoad2(builder, type_, temp_value, dst)
@@ -1081,6 +1076,28 @@ fn emit_value_copy(
Ok(())
}
+// From "Performance Tips for Frontend Authors" (https://llvm.org/docs/Frontend/PerformanceTips.html):
+// "The SROA (Scalar Replacement Of Aggregates) and Mem2Reg passes only attempt to eliminate alloca
+// instructions that are in the entry basic block. Given SSA is the canonical form expected by much
+// of the optimizer; if allocas can not be eliminated by Mem2Reg or SROA, the optimizer is likely to
+// be less effective than it could be."
+fn emit_alloca(
+ ctx: &mut EmitContext,
+ type_: LLVMTypeRef,
+ addr_space: u32,
+ name: Option<Id>,
+) -> LLVMValueRef {
+ let builder = ctx.builder.get();
+ let current_bb = unsafe { LLVMGetInsertBlock(builder) };
+ let variables_bb = unsafe { LLVMGetFirstBasicBlock(LLVMGetBasicBlockParent(current_bb)) };
+ unsafe { LLVMPositionBuilderAtEnd(builder, variables_bb) };
+ let result = ctx.names.register_result_option(name, |name| unsafe {
+ LLVMZludaBuildAlloca(builder, type_, addr_space, name)
+ });
+ unsafe { LLVMPositionBuilderAtEnd(builder, current_bb) };
+ result
+}
+
fn emit_instruction(
ctx: &mut EmitContext,
is_kernel: bool,
@@ -3494,12 +3511,12 @@ fn emit_store_var(
fn emit_label(ctx: &mut EmitContext, label: Id) -> Result<(), TranslateError> {
let new_block = unsafe { LLVMValueAsBasicBlock(ctx.names.value(label)?) };
- terminate_current_block_if_needed(ctx, new_block);
+ terminate_current_block_if_needed(ctx, Some(new_block));
unsafe { LLVMPositionBuilderAtEnd(ctx.builder.get(), new_block) };
Ok(())
}
-fn terminate_current_block_if_needed(ctx: &mut EmitContext, new_block: LLVMBasicBlockRef) {
+fn terminate_current_block_if_needed(ctx: &mut EmitContext, new_block: Option<LLVMBasicBlockRef>) {
let current_block = unsafe { LLVMGetInsertBlock(ctx.builder.get()) };
if current_block == ptr::null_mut() {
return;
@@ -3508,7 +3525,10 @@ fn terminate_current_block_if_needed(ctx: &mut EmitContext, new_block: LLVMBasic
if terminator != ptr::null_mut() {
return;
}
- unsafe { LLVMBuildBr(ctx.builder.get(), new_block) };
+ match new_block {
+ Some(new_block) => unsafe { LLVMBuildBr(ctx.builder.get(), new_block) },
+ None => unsafe { LLVMBuildUnreachable(ctx.builder.get()) },
+ };
}
fn emit_method_declaration<'input>(
diff --git a/ptx/src/test/spirv_run/abs.ll b/ptx/src/test/spirv_run/abs.ll
index e086eda..4300790 100644
--- a/ptx/src/test/spirv_run/abs.ll
+++ b/ptx/src/test/spirv_run/abs.ll
@@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @abs(ptr addrspace(4) byref(i64) %"27", ptr addrspace(4) byref(i64) %"28") #0 {
-"37":
%"8" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"8", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i32, align 4, addrspace(5)
%"7" = alloca i32, align 4, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"8", align 1
%"9" = load i64, ptr addrspace(4) %"27", align 8
store i64 %"9", ptr addrspace(5) %"4", align 8
%"10" = load i64, ptr addrspace(4) %"28", align 8
@@ -19,8 +21,8 @@ define protected amdgpu_kernel void @abs(ptr addrspace(4) byref(i64) %"27", ptr
store i32 %"29", ptr addrspace(5) %"6", align 4
%"14" = load i64, ptr addrspace(5) %"4", align 8
%"31" = inttoptr i64 %"14" to ptr
- %"39" = getelementptr inbounds i8, ptr %"31", i64 4
- %"32" = load i32, ptr %"39", align 4
+ %"38" = getelementptr inbounds i8, ptr %"31", i64 4
+ %"32" = load i32, ptr %"38", align 4
store i32 %"32", ptr addrspace(5) %"7", align 4
%"16" = load i32, ptr addrspace(5) %"6", align 4
%"15" = call i32 @llvm.abs.i32(i32 %"16", i1 false)
@@ -35,8 +37,8 @@ define protected amdgpu_kernel void @abs(ptr addrspace(4) byref(i64) %"27", ptr
%"21" = load i64, ptr addrspace(5) %"5", align 8
%"22" = load i32, ptr addrspace(5) %"7", align 4
%"35" = inttoptr i64 %"21" to ptr
- %"41" = getelementptr inbounds i8, ptr %"35", i64 4
- store i32 %"22", ptr %"41", align 4
+ %"40" = getelementptr inbounds i8, ptr %"35", i64 4
+ store i32 %"22", ptr %"40", align 4
ret void
}
diff --git a/ptx/src/test/spirv_run/activemask.ll b/ptx/src/test/spirv_run/activemask.ll
index 5ca886c..684f89a 100644
--- a/ptx/src/test/spirv_run/activemask.ll
+++ b/ptx/src/test/spirv_run/activemask.ll
@@ -4,11 +4,13 @@ target triple = "amdgcn-amd-amdhsa"
declare i32 @__zluda_ptx_impl__activemask() #0
define protected amdgpu_kernel void @activemask(ptr addrspace(4) byref(i64) %"11", ptr addrspace(4) byref(i64) %"12") #1 {
-"15":
%"6" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"6", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i32, align 4, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"6", align 1
%"7" = load i64, ptr addrspace(4) %"12", align 8
store i64 %"7", ptr addrspace(5) %"4", align 8
%"8" = call i32 @__zluda_ptx_impl__activemask()
diff --git a/ptx/src/test/spirv_run/add.ll b/ptx/src/test/spirv_run/add.ll
index 6a8ed12..babe5bb 100644
--- a/ptx/src/test/spirv_run/add.ll
+++ b/ptx/src/test/spirv_run/add.ll
@@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @add(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #0 {
-"22":
%"8" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"8", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i64, align 8, addrspace(5)
%"7" = alloca i64, align 8, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"8", align 1
%"9" = load i64, ptr addrspace(4) %"18", align 8
store i64 %"9", ptr addrspace(5) %"4", align 8
%"10" = load i64, ptr addrspace(4) %"19", align 8
diff --git a/ptx/src/test/spirv_run/add_global.ll b/ptx/src/test/spirv_run/add_global.ll
index 754623c..7034857 100644
--- a/ptx/src/test/spirv_run/add_global.ll
+++ b/ptx/src/test/spirv_run/add_global.ll
@@ -4,13 +4,15 @@ target triple = "amdgcn-amd-amdhsa"
@PI = protected addrspace(1) externally_initialized global float 0x400921FB60000000, align 4
define protected amdgpu_kernel void @add_global(ptr addrspace(4) byref(i64) %"20", ptr addrspace(4) byref(i64) %"21") #0 {
-"24":
%"9" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"9", align 1
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i64, align 8, addrspace(5)
%"7" = alloca float, align 4, addrspace(5)
%"8" = alloca float, align 4, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"9", align 1
%"10" = load i64, ptr addrspace(4) %"20", align 8
store i64 %"10", ptr addrspace(5) %"5", align 8
%"11" = load i64, ptr addrspace(4) %"21", align 8
diff --git a/ptx/src/test/spirv_run/add_non_coherent.ll b/ptx/src/test/spirv_run/add_non_coherent.ll
index ab8d0bc..4d97dad 100644
--- a/ptx/src/test/spirv_run/add_non_coherent.ll
+++ b/ptx/src/test/spirv_run/add_non_coherent.ll
@@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @add_non_coherent(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #0 {
-"22":
%"8" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"8", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i64, align 8, addrspace(5)
%"7" = alloca i64, align 8, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"8", align 1
%"9" = load i64, ptr addrspace(4) %"18", align 8
store i64 %"9", ptr addrspace(5) %"4", align 8
%"10" = load i64, ptr addrspace(4) %"19", align 8
diff --git a/ptx/src/test/spirv_run/add_param_ptr.ll b/ptx/src/test/spirv_run/add_param_ptr.ll
index 810e9c8..9553fa5 100644
--- a/ptx/src/test/spirv_run/add_param_ptr.ll
+++ b/ptx/src/test/spirv_run/add_param_ptr.ll
@@ -2,32 +2,34 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @add_param_ptr(ptr addrspace(4) byref(i64) %"26", ptr addrspace(4) byref(i64) %"27") #0 {
-"38":
%"8" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"8", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i64, align 8, addrspace(5)
%"7" = alloca i64, align 8, addrspace(5)
+ %1 = alloca i64, align 8, addrspace(5)
+ %2 = alloca i64, align 8, addrspace(5)
+ br label %3
+
+3: ; preds = %0
+ store i1 false, ptr addrspace(5) %"8", align 1
%"31" = ptrtoint ptr addrspace(4) %"26" to i64
- %0 = alloca i64, align 8, addrspace(5)
- store i64 %"31", ptr addrspace(5) %0, align 8
- %"30" = load i64, ptr addrspace(5) %0, align 8
+ store i64 %"31", ptr addrspace(5) %1, align 8
+ %"30" = load i64, ptr addrspace(5) %1, align 8
store i64 %"30", ptr addrspace(5) %"4", align 8
%"33" = ptrtoint ptr addrspace(4) %"27" to i64
- %1 = alloca i64, align 8, addrspace(5)
- store i64 %"33", ptr addrspace(5) %1, align 8
- %"32" = load i64, ptr addrspace(5) %1, align 8
+ store i64 %"33", ptr addrspace(5) %2, align 8
+ %"32" = load i64, ptr addrspace(5) %2, align 8
store i64 %"32", ptr addrspace(5) %"5", align 8
%"12" = load i64, ptr addrspace(5) %"4", align 8
%"34" = inttoptr i64 %"12" to ptr addrspace(4)
- %"40" = getelementptr inbounds i8, ptr addrspace(4) %"34", i64 0
- %"11" = load i64, ptr addrspace(4) %"40", align 8
+ %"39" = getelementptr inbounds i8, ptr addrspace(4) %"34", i64 0
+ %"11" = load i64, ptr addrspace(4) %"39", align 8
store i64 %"11", ptr addrspace(5) %"4", align 8
%"14" = load i64, ptr addrspace(5) %"5", align 8
%"35" = inttoptr i64 %"14" to ptr addrspace(4)
- %"42" = getelementptr inbounds i8, ptr addrspace(4) %"35", i64 0
- %"13" = load i64, ptr addrspace(4) %"42", align 8
+ %"41" = getelementptr inbounds i8, ptr addrspace(4) %"35", i64 0
+ %"13" = load i64, ptr addrspace(4) %"41", align 8
store i64 %"13", ptr addrspace(5) %"5", align 8
%"16" = load i64, ptr addrspace(5) %"4", align 8
%"36" = inttoptr i64 %"16" to ptr
diff --git a/ptx/src/test/spirv_run/add_tuning.ll b/ptx/src/test/spirv_run/add_tuning.ll
index 9ec6795..ac2972c 100644
--- a/ptx/src/test/spirv_run/add_tuning.ll
+++ b/ptx/src/test/spirv_run/add_tuning.ll
@@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @add_tuning(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #0 {
-"22":
%"8" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"8", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i64, align 8, addrspace(5)
%"7" = alloca i64, align 8, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"8", align 1
%"9" = load i64, ptr addrspace(4) %"18", align 8
store i64 %"9", ptr addrspace(5) %"4", align 8
%"10" = load i64, ptr addrspace(4) %"19", align 8
diff --git a/ptx/src/test/spirv_run/addc_cc.ll b/ptx/src/test/spirv_run/addc_cc.ll
index 3299982..d781744 100644
--- a/ptx/src/test/spirv_run/addc_cc.ll
+++ b/ptx/src/test/spirv_run/addc_cc.ll
@@ -2,9 +2,7 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @addc_cc(ptr addrspace(4) byref(i64) %"53", ptr addrspace(4) byref(i64) %"54") #0 {
-"68":
%"13" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"13", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i32, align 4, addrspace(5)
@@ -14,6 +12,10 @@ define protected amdgpu_kernel void @addc_cc(ptr addrspace(4) byref(i64) %"53",
%"10" = alloca i32, align 4, addrspace(5)
%"11" = alloca i32, align 4, addrspace(5)
%"12" = alloca i32, align 4, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"13", align 1
%"14" = load i64, ptr addrspace(4) %"53", align 8
store i64 %"14", ptr addrspace(5) %"4", align 8
%"15" = load i64, ptr addrspace(4) %"54", align 8
@@ -24,45 +26,45 @@ define protected amdgpu_kernel void @addc_cc(ptr addrspace(4) byref(i64) %"53",
store i32 %"55", ptr addrspace(5) %"9", align 4
%"19" = load i64, ptr addrspace(5) %"4", align 8
%"57" = inttoptr i64 %"19" to ptr
- %"70" = getelementptr inbounds i8, ptr %"57", i64 4
- %"58" = load i32, ptr %"70", align 4
+ %"69" = getelementptr inbounds i8, ptr %"57", i64 4
+ %"58" = load i32, ptr %"69", align 4
store i32 %"58", ptr addrspace(5) %"10", align 4
%"21" = load i64, ptr addrspace(5) %"4", align 8
%"59" = inttoptr i64 %"21" to ptr
- %"72" = getelementptr inbounds i8, ptr %"59", i64 8
- %"20" = load i32, ptr %"72", align 4
+ %"71" = getelementptr inbounds i8, ptr %"59", i64 8
+ %"20" = load i32, ptr %"71", align 4
store i32 %"20", ptr addrspace(5) %"11", align 4
%"23" = load i64, ptr addrspace(5) %"4", align 8
%"60" = inttoptr i64 %"23" to ptr
- %"74" = getelementptr inbounds i8, ptr %"60", i64 12
- %"22" = load i32, ptr %"74", align 4
+ %"73" = getelementptr inbounds i8, ptr %"60", i64 12
+ %"22" = load i32, ptr %"73", align 4
store i32 %"22", ptr addrspace(5) %"12", align 4
%"26" = load i32, ptr addrspace(5) %"9", align 4
%"27" = load i32, ptr addrspace(5) %"10", align 4
- %0 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %"26", i32 %"27")
- %"24" = extractvalue { i32, i1 } %0, 0
- %"25" = extractvalue { i32, i1 } %0, 1
+ %2 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %"26", i32 %"27")
+ %"24" = extractvalue { i32, i1 } %2, 0
+ %"25" = extractvalue { i32, i1 } %2, 1
store i32 %"24", ptr addrspace(5) %"6", align 4
store i1 %"25", ptr addrspace(5) %"13", align 1
%"30" = load i1, ptr addrspace(5) %"13", align 1
%"31" = load i32, ptr addrspace(5) %"6", align 4
%"32" = load i32, ptr addrspace(5) %"11", align 4
- %1 = zext i1 %"30" to i32
- %2 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %"31", i32 %"32")
- %3 = extractvalue { i32, i1 } %2, 0
- %4 = extractvalue { i32, i1 } %2, 1
- %5 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %3, i32 %1)
- %"28" = extractvalue { i32, i1 } %5, 0
- %6 = extractvalue { i32, i1 } %5, 1
- %"29" = xor i1 %4, %6
+ %3 = zext i1 %"30" to i32
+ %4 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %"31", i32 %"32")
+ %5 = extractvalue { i32, i1 } %4, 0
+ %6 = extractvalue { i32, i1 } %4, 1
+ %7 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %5, i32 %3)
+ %"28" = extractvalue { i32, i1 } %7, 0
+ %8 = extractvalue { i32, i1 } %7, 1
+ %"29" = xor i1 %6, %8
store i32 %"28", ptr addrspace(5) %"7", align 4
store i1 %"29", ptr addrspace(5) %"13", align 1
%"34" = load i1, ptr addrspace(5) %"13", align 1
%"35" = load i32, ptr addrspace(5) %"7", align 4
%"36" = load i32, ptr addrspace(5) %"12", align 4
- %7 = zext i1 %"34" to i32
- %8 = add i32 %"35", %"36"
- %"33" = add i32 %8, %7
+ %9 = zext i1 %"34" to i32
+ %10 = add i32 %"35", %"36"
+ %"33" = add i32 %10, %9
store i32 %"33", ptr addrspace(5) %"8", align 4
%"37" = load i64, ptr addrspace(5) %"5", align 8
%"38" = load i32, ptr addrspace(5) %"6", align 4
@@ -71,13 +73,13 @@ define protected amdgpu_kernel void @addc_cc(ptr addrspace(4) byref(i64) %"53",
%"39" = load i64, ptr addrspace(5) %"5", align 8
%"40" = load i32, ptr addrspace(5) %"7", align 4
%"66" = inttoptr i64 %"39" to ptr
- %"76" = getelementptr inbounds i8, ptr %"66", i64 4
- store i32 %"40", ptr %"76", align 4
+ %"75" = getelementptr inbounds i8, ptr %"66", i64 4
+ store i32 %"40", ptr %"75", align 4
%"41" = load i64, ptr addrspace(5) %"5", align 8
%"42" = load i32, ptr addrspace(5) %"8", align 4
%"67" = inttoptr i64 %"41" to ptr
- %"78" = getelementptr inbounds i8, ptr %"67", i64 8
- store i32 %"42", ptr %"78", align 4
+ %"77" = getelementptr inbounds i8, ptr %"67", i64 8
+ store i32 %"42", ptr %"77", align 4
ret void
}
diff --git a/ptx/src/test/spirv_run/addc_cc2.ll b/ptx/src/test/spirv_run/addc_cc2.ll
index 836d8d5..cd06ea2 100644
--- a/ptx/src/test/spirv_run/addc_cc2.ll
+++ b/ptx/src/test/spirv_run/addc_cc2.ll
@@ -2,50 +2,52 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @addc_cc2(ptr addrspace(4) byref(i64) %"39", ptr addrspace(4) byref(i64) %"40") #0 {
-"50":
%"9" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"9", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i32, align 4, addrspace(5)
%"7" = alloca i32, align 4, addrspace(5)
%"8" = alloca i32, align 4, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"9", align 1
%"10" = load i64, ptr addrspace(4) %"40", align 8
store i64 %"10", ptr addrspace(5) %"5", align 8
- %0 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 -1, i32 -1)
- %"41" = extractvalue { i32, i1 } %0, 0
- %"12" = extractvalue { i32, i1 } %0, 1
+ %2 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 -1, i32 -1)
+ %"41" = extractvalue { i32, i1 } %2, 0
+ %"12" = extractvalue { i32, i1 } %2, 1
store i32 %"41", ptr addrspace(5) %"6", align 4
store i1 %"12", ptr addrspace(5) %"9", align 1
%"15" = load i1, ptr addrspace(5) %"9", align 1
- %1 = zext i1 %"15" to i32
- %2 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 -4, i32 -4)
- %3 = extractvalue { i32, i1 } %2, 0
- %4 = extractvalue { i32, i1 } %2, 1
- %5 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %3, i32 %1)
- %"42" = extractvalue { i32, i1 } %5, 0
- %6 = extractvalue { i32, i1 } %5, 1
- %"14" = xor i1 %4, %6
+ %3 = zext i1 %"15" to i32
+ %4 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 -4, i32 -4)
+ %5 = extractvalue { i32, i1 } %4, 0
+ %6 = extractvalue { i32, i1 } %4, 1
+ %7 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %5, i32 %3)
+ %"42" = extractvalue { i32, i1 } %7, 0
+ %8 = extractvalue { i32, i1 } %7, 1
+ %"14" = xor i1 %6, %8
store i32 %"42", ptr addrspace(5) %"6", align 4
store i1 %"14", ptr addrspace(5) %"9", align 1
%"17" = load i1, ptr addrspace(5) %"9", align 1
- %7 = zext i1 %"17" to i32
- %"43" = add i32 0, %7
+ %9 = zext i1 %"17" to i32
+ %"43" = add i32 0, %9
store i32 %"43", ptr addrspace(5) %"7", align 4
%"20" = load i1, ptr addrspace(5) %"9", align 1
- %8 = zext i1 %"20" to i32
- %9 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 0, i32 -1)
- %10 = extractvalue { i32, i1 } %9, 0
- %11 = extractvalue { i32, i1 } %9, 1
- %12 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %10, i32 %8)
- %"44" = extractvalue { i32, i1 } %12, 0
- %13 = extractvalue { i32, i1 } %12, 1
- %"19" = xor i1 %11, %13
+ %10 = zext i1 %"20" to i32
+ %11 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 0, i32 -1)
+ %12 = extractvalue { i32, i1 } %11, 0
+ %13 = extractvalue { i32, i1 } %11, 1
+ %14 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %12, i32 %10)
+ %"44" = extractvalue { i32, i1 } %14, 0
+ %15 = extractvalue { i32, i1 } %14, 1
+ %"19" = xor i1 %13, %15
store i32 %"44", ptr addrspace(5) %"6", align 4
store i1 %"19", ptr addrspace(5) %"9", align 1
%"22" = load i1, ptr addrspace(5) %"9", align 1
- %14 = zext i1 %"22" to i32
- %"45" = add i32 0, %14
+ %16 = zext i1 %"22" to i32
+ %"45" = add i32 0, %16
store i32 %"45", ptr addrspace(5) %"8", align 4
%"23" = load i64, ptr addrspace(5) %"5", align 8
%"24" = load i32, ptr addrspace(5) %"7", align 4
@@ -54,8 +56,8 @@ define protected amdgpu_kernel void @addc_cc2(ptr addrspace(4) byref(i64) %"39",
%"25" = load i64, ptr addrspace(5) %"5", align 8
%"26" = load i32, ptr addrspace(5) %"8", align 4
%"48" = inttoptr i64 %"25" to ptr
- %"52" = getelementptr inbounds i8, ptr %"48", i64 4
- store i32 %"26", ptr %"52", align 4
+ %"51" = getelementptr inbounds i8, ptr %"48", i64 4
+ store i32 %"26", ptr %"51", align 4
ret void
}
diff --git a/ptx/src/test/spirv_run/alloca_call.ll b/ptx/src/test/spirv_run/alloca_call.ll
index e6a9d6f..aae7a91 100644
--- a/ptx/src/test/spirv_run/alloca_call.ll
+++ b/ptx/src/test/spirv_run/alloca_call.ll
@@ -2,9 +2,7 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @_Z13callback_onlyIdEvPvS0_10callback_tx(ptr addrspace(4) byref(i64) %"42", ptr addrspace(4) byref(i64) %"43", ptr addrspace(4) byref(i64) %"44", ptr addrspace(4) byref(i64) %"45") #0 {
-"58":
%"22" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"22", align 1
%"7" = alloca i1, align 1, addrspace(5)
%"8" = alloca double, align 8, addrspace(5)
%"9" = alloca double, align 8, addrspace(5)
@@ -14,6 +12,10 @@ define protected amdgpu_kernel void @_Z13callback_onlyIdEvPvS0_10callback_tx(ptr
%"13" = alloca i64, align 8, addrspace(5)
%"46" = alloca i64, align 8, addrspace(5)
%"48" = alloca [4 x i32], align 16, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"22", align 1
%"50" = load i64, ptr addrspace(4) %"42", align 8
store i64 %"50", ptr addrspace(5) %"10", align 8
%"51" = load i64, ptr addrspace(4) %"43", align 8
@@ -29,30 +31,30 @@ define protected amdgpu_kernel void @_Z13callback_onlyIdEvPvS0_10callback_tx(ptr
%"30" = load i1, ptr addrspace(5) %"7", align 1
br i1 %"30", label %"6", label %"18"
-"18": ; preds = %"58"
+"18": ; preds = %1
%"31" = load i64, ptr addrspace(5) %"11", align 8
- %"60" = getelementptr inbounds i8, ptr addrspace(5) %"46", i64 0
- store i64 %"31", ptr addrspace(5) %"60", align 8
+ %"59" = getelementptr inbounds i8, ptr addrspace(5) %"46", i64 0
+ store i64 %"31", ptr addrspace(5) %"59", align 8
%"32" = load i64, ptr addrspace(5) %"11", align 8
- %0 = inttoptr i64 %"32" to ptr
- %"21" = call [4 x i32] %0()
+ %2 = inttoptr i64 %"32" to ptr
+ %"21" = call [4 x i32] %2()
store [4 x i32] %"21", ptr addrspace(5) %"48", align 4
- %"62" = getelementptr inbounds i8, ptr addrspace(5) %"48", i64 0
- %"19" = load <2 x double>, ptr addrspace(5) %"62", align 16
+ %"61" = getelementptr inbounds i8, ptr addrspace(5) %"48", i64 0
+ %"19" = load <2 x double>, ptr addrspace(5) %"61", align 16
%"33" = extractelement <2 x double> %"19", i32 0
%"34" = extractelement <2 x double> %"19", i32 1
store double %"33", ptr addrspace(5) %"8", align 8
store double %"34", ptr addrspace(5) %"9", align 8
%"35" = load double, ptr addrspace(5) %"8", align 8
%"36" = load double, ptr addrspace(5) %"9", align 8
- %1 = insertelement <2 x double> undef, double %"35", i32 0
- %"20" = insertelement <2 x double> %1, double %"36", i32 1
+ %3 = insertelement <2 x double> undef, double %"35", i32 0
+ %"20" = insertelement <2 x double> %3, double %"36", i32 1
%"37" = load i64, ptr addrspace(5) %"10", align 8
%"57" = inttoptr i64 %"37" to ptr addrspace(1)
store <2 x double> %"20", ptr addrspace(1) %"57", align 16
br label %"6"
-"6": ; preds = %"18", %"58"
+"6": ; preds = %"18", %1
ret void
}
diff --git a/ptx/src/test/spirv_run/amdgpu_unnamed.ll b/ptx/src/test/spirv_run/amdgpu_unnamed.ll
index 61e3de4..1a1ce58 100644
--- a/ptx/src/test/spirv_run/amdgpu_unnamed.ll
+++ b/ptx/src/test/spirv_run/amdgpu_unnamed.ll
@@ -8,9 +8,7 @@ target triple = "amdgcn-amd-amdhsa"
declare void @__zluda_ptx_impl____assertfail(i64, i64, i32, i64, i64) #0
define protected amdgpu_kernel void @amdgpu_unnamed(ptr addrspace(4) byref(i64) %"57", ptr addrspace(4) byref(i64) %"58") #1 {
-"73":
%"33" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"33", align 1
%"14" = alloca i64, align 8, addrspace(5)
%"15" = alloca i64, align 8, addrspace(5)
%"16" = alloca i64, align 8, addrspace(5)
@@ -19,10 +17,17 @@ define protected amdgpu_kernel void @amdgpu_unnamed(ptr addrspace(4) byref(i64)
%"19" = alloca i64, align 8, addrspace(5)
%"20" = alloca i32, align 4, addrspace(5)
%"59" = alloca i64, align 8, addrspace(5)
+ %1 = alloca i64, align 8, addrspace(5)
%"60" = alloca i64, align 8, addrspace(5)
+ %2 = alloca i64, align 8, addrspace(5)
%"61" = alloca i32, align 4, addrspace(5)
%"62" = alloca i64, align 8, addrspace(5)
+ %3 = alloca i64, align 8, addrspace(5)
%"63" = alloca i64, align 8, addrspace(5)
+ br label %4
+
+4: ; preds = %0
+ store i1 false, ptr addrspace(5) %"33", align 1
%"34" = load i64, ptr addrspace(4) %"57", align 8
store i64 %"34", ptr addrspace(5) %"14", align 8
%"35" = load i64, ptr addrspace(4) %"58", align 8
@@ -37,28 +42,25 @@ define protected amdgpu_kernel void @amdgpu_unnamed(ptr addrspace(4) byref(i64)
%"40" = load i1, ptr addrspace(5) %"18", align 1
br i1 %"40", label %"13", label %"27"
-"27": ; preds = %"73"
- %0 = alloca i64, align 8, addrspace(5)
- store i64 ptrtoint (ptr addrspace(1) @0 to i64), ptr addrspace(5) %0, align 8
- %"66" = load i64, ptr addrspace(5) %0, align 8
+"27": ; preds = %4
+ store i64 ptrtoint (ptr addrspace(1) @0 to i64), ptr addrspace(5) %1, align 8
+ %"66" = load i64, ptr addrspace(5) %1, align 8
store i64 %"66", ptr addrspace(5) %"19", align 8
%"42" = load i64, ptr addrspace(5) %"19", align 8
store i64 %"42", ptr addrspace(5) %"59", align 8
- %1 = alloca i64, align 8, addrspace(5)
- store i64 ptrtoint (ptr addrspace(1) @1 to i64), ptr addrspace(5) %1, align 8
- %"68" = load i64, ptr addrspace(5) %1, align 8
+ store i64 ptrtoint (ptr addrspace(1) @1 to i64), ptr addrspace(5) %2, align 8
+ %"68" = load i64, ptr addrspace(5) %2, align 8
store i64 %"68", ptr addrspace(5) %"19", align 8
%"44" = load i64, ptr addrspace(5) %"19", align 8
store i64 %"44", ptr addrspace(5) %"60", align 8
store i32 1, ptr addrspace(5) %"61", align 4
- %2 = alloca i64, align 8, addrspace(5)
- store i64 ptrtoint (ptr addrspace(1) @2 to i64), ptr addrspace(5) %2, align 8
- %"70" = load i64, ptr addrspace(5) %2, align 8
+ store i64 ptrtoint (ptr addrspace(1) @2 to i64), ptr addrspace(5) %3, align 8
+ %"70" = load i64, ptr addrspace(5) %3, align 8
store i64 %"70", ptr addrspace(5) %"19", align 8
%"46" = load i64, ptr addrspace(5) %"19", align 8
store i64 %"46", ptr addrspace(5) %"62", align 8
- %"75" = getelementptr inbounds i8, ptr addrspace(5) %"63", i64 0
- store i64 1, ptr addrspace(5) %"75", align 8
+ %"74" = getelementptr inbounds i8, ptr addrspace(5) %"63", i64 0
+ store i64 1, ptr addrspace(5) %"74", align 8
%"28" = load i64, ptr addrspace(5) %"59", align 8
%"29" = load i64, ptr addrspace(5) %"60", align 8
%"30" = load i32, ptr addrspace(5) %"61", align 4
@@ -67,7 +69,7 @@ define protected amdgpu_kernel void @amdgpu_unnamed(ptr addrspace(4) byref(i64)
call void @__zluda_ptx_impl____assertfail(i64 %"28", i64 %"29", i32 %"30", i64 %"31", i64 %"32")
br label %"13"
-"13": ; preds = %"27", %"73"
+"13": ; preds = %"27", %4
%"48" = load i64, ptr addrspace(5) %"16", align 8
%"47" = add i64 %"48", 1
store i64 %"47", ptr addrspace(5) %"17", align 8
diff --git a/ptx/src/test/spirv_run/and.ll b/ptx/src/test/spirv_run/and.ll
index c90f390..7bb262d 100644
--- a/ptx/src/test/spirv_run/and.ll
+++ b/ptx/src/test/spirv_run/and.ll
@@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @and(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #0 {
-"30":
%"8" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"8", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i32, align 4, addrspace(5)
%"7" = alloca i32, align 4, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"8", align 1
%"9" = load i64, ptr addrspace(4) %"22", align 8
store i64 %"9", ptr addrspace(5) %"4", align 8
%"10" = load i64, ptr addrspace(4) %"23", align 8
@@ -19,8 +21,8 @@ define protected amdgpu_kernel void @and(ptr addrspace(4) byref(i64) %"22", ptr
store i32 %"11", ptr addrspace(5) %"6", align 4
%"14" = load i64, ptr addrspace(5) %"4", align 8
%"25" = inttoptr i64 %"14" to ptr
- %"32" = getelementptr inbounds i8, ptr %"25", i64 4
- %"13" = load i32, ptr %"32", align 4
+ %"31" = getelementptr inbounds i8, ptr %"25", i64 4
+ %"13" = load i32, ptr %"31", align 4
store i32 %"13", ptr addrspace(5) %"7", align 4
%"16" = load i32, ptr addrspace(5) %"6", align 4
%"17" = load i32, ptr addrspace(5) %"7", align 4
diff --git a/ptx/src/test/spirv_run/assertfail.ll b/ptx/src/test/spirv_run/assertfail.ll
index 001dbfe..9334859 100644
--- a/ptx/src/test/spirv_run/assertfail.ll
+++ b/ptx/src/test/spirv_run/assertfail.ll
@@ -4,42 +4,44 @@ target triple = "amdgcn-amd-amdhsa"
declare void @__zluda_ptx_impl____assertfail(i64, i64, i32, i64, i64) #0
define protected amdgpu_kernel void @assertfail(ptr addrspace(4) byref(i64) %"62", ptr addrspace(4) byref(i64) %"63") #1 {
-"81":
%"35" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"35", align 1
%"15" = alloca i64, align 8, addrspace(5)
%"16" = alloca i64, align 8, addrspace(5)
%"17" = alloca i64, align 8, addrspace(5)
%"18" = alloca i64, align 8, addrspace(5)
%"19" = alloca i32, align 4, addrspace(5)
+ %1 = alloca i32, align 4, addrspace(5)
%"64" = alloca i64, align 8, addrspace(5)
%"66" = alloca i64, align 8, addrspace(5)
%"68" = alloca i32, align 4, addrspace(5)
%"70" = alloca i64, align 8, addrspace(5)
%"72" = alloca i64, align 8, addrspace(5)
+ br label %2
+
+2: ; preds = %0
+ store i1 false, ptr addrspace(5) %"35", align 1
%"36" = load i64, ptr addrspace(4) %"62", align 8
store i64 %"36", ptr addrspace(5) %"15", align 8
%"37" = load i64, ptr addrspace(4) %"63", align 8
store i64 %"37", ptr addrspace(5) %"16", align 8
- %0 = alloca i32, align 4, addrspace(5)
- store i32 0, ptr addrspace(5) %0, align 4
- %"74" = load i32, ptr addrspace(5) %0, align 4
+ store i32 0, ptr addrspace(5) %1, align 4
+ %"74" = load i32, ptr addrspace(5) %1, align 4
store i32 %"74", ptr addrspace(5) %"19", align 4
%"39" = load i64, ptr addrspace(5) %"15", align 8
- %"83" = getelementptr inbounds i8, ptr addrspace(5) %"64", i64 0
- store i64 %"39", ptr addrspace(5) %"83", align 8
+ %"82" = getelementptr inbounds i8, ptr addrspace(5) %"64", i64 0
+ store i64 %"39", ptr addrspace(5) %"82", align 8
%"40" = load i64, ptr addrspace(5) %"15", align 8
- %"85" = getelementptr inbounds i8, ptr addrspace(5) %"66", i64 0
- store i64 %"40", ptr addrspace(5) %"85", align 8
+ %"84" = getelementptr inbounds i8, ptr addrspace(5) %"66", i64 0
+ store i64 %"40", ptr addrspace(5) %"84", align 8
%"41" = load i32, ptr addrspace(5) %"19", align 4
- %"87" = getelementptr inbounds i8, ptr addrspace(5) %"68", i64 0
- store i32 %"41", ptr addrspace(5) %"87", align 4
+ %"86" = getelementptr inbounds i8, ptr addrspace(5) %"68", i64 0
+ store i32 %"41", ptr addrspace(5) %"86", align 4
%"42" = load i64, ptr addrspace(5) %"15", align 8
- %"89" = getelementptr inbounds i8, ptr addrspace(5) %"70", i64 0
- store i64 %"42", ptr addrspace(5) %"89", align 8
+ %"88" = getelementptr inbounds i8, ptr addrspace(5) %"70", i64 0
+ store i64 %"42", ptr addrspace(5) %"88", align 8
%"43" = load i64, ptr addrspace(5) %"15", align 8
- %"91" = getelementptr inbounds i8, ptr addrspace(5) %"72", i64 0
- store i64 %"43", ptr addrspace(5) %"91", align 8
+ %"90" = getelementptr inbounds i8, ptr addrspace(5) %"72", i64 0
+ store i64 %"43", ptr addrspace(5) %"90", align 8
%"30" = load i64, ptr addrspace(5) %"64", align 8
%"31" = load i64, ptr addrspace(5) %"66", align 8
%"32" = load i32, ptr addrspace(5) %"68", align 4
diff --git a/ptx/src/test/spirv_run/atom_add.ll b/ptx/src/test/spirv_run/atom_add.ll
index dff9e0e..6dd159f 100644
--- a/ptx/src/test/spirv_run/atom_add.ll
+++ b/ptx/src/test/spirv_run/atom_add.ll
@@ -4,13 +4,15 @@ target triple = "amdgcn-amd-amdhsa"
@"4" = private addrspace(3) global [1024 x i8] undef, align 4
define protected amdgpu_kernel void @atom_add(ptr addrspace(4) byref(i64) %"28", ptr addrspace(4) byref(i64) %"29") #0 {
-"37":
%"9" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"9", align 1
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i64, align 8, addrspace(5)
%"7" = alloca i32, align 4, addrspace(5)
%"8" = alloca i32, align 4, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"9", align 1
%"10" = load i64, ptr addrspace(4) %"28", align 8
store i64 %"10", ptr addrspace(5) %"5", align 8
%"11" = load i64, ptr addrspace(4) %"29", align 8
@@ -21,8 +23,8 @@ define protected amdgpu_kernel void @atom_add(ptr addrspace(4) byref(i64) %"28",
store i32 %"12", ptr addrspace(5) %"7", align 4
%"15" = load i64, ptr addrspace(5) %"5", align 8
%"31" = inttoptr i64 %"15" to ptr
- %"39" = getelementptr inbounds i8, ptr %"31", i64 4
- %"14" = load i32, ptr %"39", align 4
+ %"38" = getelementptr inbounds i8, ptr %"31", i64 4
+ %"14" = load i32, ptr %"38", align 4
store i32 %"14", ptr addrspace(5) %"8", align 4
%"16" = load i32, ptr addrspace(5) %"7", align 4
store i32 %"16", ptr addrspace(3) @"4", align 4
@@ -38,8 +40,8 @@ define protected amdgpu_kernel void @atom_add(ptr addrspace(4) byref(i64) %"28",
%"22" = load i64, ptr addrspace(5) %"6", align 8
%"23" = load i32, ptr addrspace(5) %"8", align 4
%"36" = inttoptr i64 %"22" to ptr
- %"41" = getelementptr inbounds i8, ptr %"36", i64 4
- store i32 %"23", ptr %"41", align 4
+ %"40" = getelementptr inbounds i8, ptr %"36", i64 4
+ store i32 %"23", ptr %"40", align 4
ret void
}
diff --git a/ptx/src/test/spirv_run/atom_add_f16.ll b/ptx/src/test/spirv_run/atom_add_f16.ll
index e63de90..a8fa430 100644
--- a/ptx/src/test/spirv_run/atom_add_f16.ll
+++ b/ptx/src/test/spirv_run/atom_add_f16.ll
@@ -4,20 +4,22 @@ target triple = "amdgcn-amd-amdhsa"
@"4" = private addrspace(3) global [1024 x i8] undef, align 4
define protected amdgpu_kernel void @atom_add_f16(ptr addrspace(4) byref(i64) %"26", ptr addrspace(4) byref(i64) %"27") #0 {
-"37":
%"8" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"8", align 1
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i64, align 8, addrspace(5)
%"7" = alloca half, align 2, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"8", align 1
%"9" = load i64, ptr addrspace(4) %"26", align 8
store i64 %"9", ptr addrspace(5) %"5", align 8
%"10" = load i64, ptr addrspace(4) %"27", align 8
store i64 %"10", ptr addrspace(5) %"6", align 8
%"12" = load i64, ptr addrspace(5) %"5", align 8
%"28" = inttoptr i64 %"12" to ptr
- %"39" = getelementptr inbounds i8, ptr %"28", i64 2
- %"29" = load i16, ptr %"39", align 2
+ %"38" = getelementptr inbounds i8, ptr %"28", i64 2
+ %"29" = load i16, ptr %"38", align 2
%"11" = bitcast i16 %"29" to half
store half %"11", ptr addrspace(5) %"7", align 2
%"14" = load i64, ptr addrspace(5) %"5", align 8
@@ -38,9 +40,9 @@ define protected amdgpu_kernel void @atom_add_f16(ptr addrspace(4) byref(i64) %"
%"20" = load i64, ptr addrspace(5) %"6", align 8
%"21" = load half, ptr addrspace(5) %"7", align 2
%"35" = inttoptr i64 %"20" to ptr
- %"41" = getelementptr inbounds i8, ptr %"35", i64 2
+ %"40" = getelementptr inbounds i8, ptr %"35", i64 2
%"36" = bitcast half %"21" to i16
- store i16 %"36", ptr %"41", align 2
+ store i16 %"36", ptr %"40", align 2
ret void
}
diff --git a/ptx/src/test/spirv_run/atom_add_float.ll b/ptx/src/test/spirv_run/atom_add_float.ll
index 329d198..d0e3c14 100644
--- a/ptx/src/test/spirv_run/atom_add_float.ll
+++ b/ptx/src/test/spirv_run/atom_add_float.ll
@@ -4,13 +4,15 @@ target triple = "amdgcn-amd-amdhsa"
@"4" = private addrspace(3) global [1024 x i8] undef, align 4
define protected amdgpu_kernel void @atom_add_float(ptr addrspace(4) byref(i64) %"28", ptr addrspace(4) byref(i64) %"29") #0 {
-"37":
%"9" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"9", align 1
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i64, align 8, addrspace(5)
%"7" = alloca float, align 4, addrspace(5)
%"8" = alloca float, align 4, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"9", align 1
%"10" = load i64, ptr addrspace(4) %"28", align 8
store i64 %"10", ptr addrspace(5) %"5", align 8
%"11" = load i64, ptr addrspace(4) %"29", align 8
@@ -21,8 +23,8 @@ define protected amdgpu_kernel void @atom_add_float(ptr addrspace(4) byref(i64)
store float %"12", ptr addrspace(5) %"7", align 4
%"15" = load i64, ptr addrspace(5) %"5", align 8
%"31" = inttoptr i64 %"15" to ptr
- %"39" = getelementptr inbounds i8, ptr %"31", i64 4
- %"14" = load float, ptr %"39", align 4
+ %"38" = getelementptr inbounds i8, ptr %"31", i64 4
+ %"14" = load float, ptr %"38", align 4
store float %"14", ptr addrspace(5) %"8", align 4
%"16" = load float, ptr addrspace(5) %"7", align 4
store float %"16", ptr addrspace(3) @"4", align 4
@@ -38,8 +40,8 @@ define protected amdgpu_kernel void @atom_add_float(ptr addrspace(4) byref(i64)
%"22" = load i64, ptr addrspace(5) %"6", align 8
%"23" = load float, ptr addrspace(5) %"8", align 4
%"36" = inttoptr i64 %"22" to ptr
- %"41" = getelementptr inbounds i8, ptr %"36", i64 4
- store float %"23", ptr %"41", align 4
+ %"40" = getelementptr inbounds i8, ptr %"36", i64 4
+ store float %"23", ptr %"40", align 4
ret void
}
diff --git a/ptx/src/test/spirv_run/atom_cas.ll b/ptx/src/test/spirv_run/atom_cas.ll
index 2e0475a..a9af2c4 100644
--- a/ptx/src/test/spirv_run/atom_cas.ll
+++ b/ptx/src/test/spirv_run/atom_cas.ll
@@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @atom_cas(ptr addrspace(4) byref(i64) %"29", ptr addrspace(4) byref(i64) %"30") #0 {
-"38":
%"8" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"8", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i32, align 4, addrspace(5)
%"7" = alloca i32, align 4, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"8", align 1
%"9" = load i64, ptr addrspace(4) %"29", align 8
store i64 %"9", ptr addrspace(5) %"4", align 8
%"10" = load i64, ptr addrspace(4) %"30", align 8
@@ -20,14 +22,14 @@ define protected amdgpu_kernel void @atom_cas(ptr addrspace(4) byref(i64) %"29",
%"14" = load i64, ptr addrspace(5) %"4", align 8
%"15" = load i32, ptr addrspace(5) %"6", align 4
%"32" = inttoptr i64 %"14" to ptr
- %"40" = getelementptr inbounds i8, ptr %"32", i64 4
- %0 = cmpxchg ptr %"40", i32 %"15", i32 100 syncscope("agent-one-as") monotonic monotonic, align 4
- %"33" = extractvalue { i32, i1 } %0, 0
+ %"39" = getelementptr inbounds i8, ptr %"32", i64 4
+ %2 = cmpxchg ptr %"39", i32 %"15", i32 100 syncscope("agent-one-as") monotonic monotonic, align 4
+ %"33" = extractvalue { i32, i1 } %2, 0
store i32 %"33", ptr addrspace(5) %"6", align 4
%"17" = load i64, ptr addrspace(5) %"4", align 8
%"35" = inttoptr i64 %"17" to ptr
- %"42" = getelementptr inbounds i8, ptr %"35", i64 4
- %"16" = load i32, ptr %"42", align 4
+ %"41" = getelementptr inbounds i8, ptr %"35", i64 4
+ %"16" = load i32, ptr %"41", align 4
store i32 %"16", ptr addrspace(5) %"7", align 4
%"18" = load i64, ptr addrspace(5) %"5", align 8
%"19" = load i32, ptr addrspace(5) %"6", align 4
@@ -36,8 +38,8 @@ define protected amdgpu_kernel void @atom_cas(ptr addrspace(4) byref(i64) %"29",
%"20" = load i64, ptr addrspace(5) %"5", align 8
%"21" = load i32, ptr addrspace(5) %"7", align 4
%"37" = inttoptr i64 %"20" to ptr
- %"44" = getelementptr inbounds i8, ptr %"37", i64 4
- store i32 %"21", ptr %"44", align 4
+ %"43" = getelementptr inbounds i8, ptr %"37", i64 4
+ store i32 %"21", ptr %"43", align 4
ret void
}
diff --git a/ptx/src/test/spirv_run/atom_inc.ll b/ptx/src/test/spirv_run/atom_inc.ll
index 6fdc3c7..212c592 100644
--- a/ptx/src/test/spirv_run/atom_inc.ll
+++ b/ptx/src/test/spirv_run/atom_inc.ll
@@ -6,14 +6,16 @@ declare i32 @__zluda_ptx_impl__atom_relaxed_gpu_generic_inc(ptr, i32) #0
declare i32 @__zluda_ptx_impl__atom_relaxed_gpu_global_inc(ptr addrspace(1), i32) #0
define protected amdgpu_kernel void @atom_inc(ptr addrspace(4) byref(i64) %"30", ptr addrspace(4) byref(i64) %"31") #1 {
-"38":
%"9" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"9", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i32, align 4, addrspace(5)
%"7" = alloca i32, align 4, addrspace(5)
%"8" = alloca i32, align 4, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"9", align 1
%"10" = load i64, ptr addrspace(4) %"30", align 8
store i64 %"10", ptr addrspace(5) %"4", align 8
%"11" = load i64, ptr addrspace(4) %"31", align 8
@@ -37,13 +39,13 @@ define protected amdgpu_kernel void @atom_inc(ptr addrspace(4) byref(i64) %"30",
%"20" = load i64, ptr addrspace(5) %"5", align 8
%"21" = load i32, ptr addrspace(5) %"7", align 4
%"36" = inttoptr i64 %"20" to ptr
- %"48" = getelementptr inbounds i8, ptr %"36", i64 4
- store i32 %"21", ptr %"48", align 4
+ %"47" = getelementptr inbounds i8, ptr %"36", i64 4
+ store i32 %"21", ptr %"47", align 4
%"22" = load i64, ptr addrspace(5) %"5", align 8
%"23" = load i32, ptr addrspace(5) %"8", align 4
%"37" = inttoptr i64 %"22" to ptr
- %"50" = getelementptr inbounds i8, ptr %"37", i64 8
- store i32 %"23", ptr %"50", align 4
+ %"49" = getelementptr inbounds i8, ptr %"37", i64 8
+ store i32 %"23", ptr %"49", align 4
ret void
}
diff --git a/ptx/src/test/spirv_run/atom_ld_st.ll b/ptx/src/test/spirv_run/atom_ld_st.ll
index 3b6488c..eb59d31 100644
--- a/ptx/src/test/spirv_run/atom_ld_st.ll
+++ b/ptx/src/test/spirv_run/atom_ld_st.ll
@@ -2,12 +2,14 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @atom_ld_st(ptr addrspace(4) byref(i64) %"14", ptr addrspace(4) byref(i64) %"15") #0 {
-"18":
%"7" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"7", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i32, align 4, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"7", align 1
%"8" = load i64, ptr addrspace(4) %"14", align 8
store i64 %"8", ptr addrspace(5) %"4", align 8
%"9" = load i64, ptr addrspace(4) %"15", align 8
diff --git a/ptx/src/test/spirv_run/atom_ld_st_vec.ll b/ptx/src/test/spirv_run/atom_ld_st_vec.ll
index 7ea0fc5..5fa2409 100644
--- a/ptx/src/test/spirv_run/atom_ld_st_vec.ll
+++ b/ptx/src/test/spirv_run/atom_ld_st_vec.ll
@@ -2,33 +2,35 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @atom_ld_st_vec(ptr addrspace(4) byref(i64) %"19", ptr addrspace(4) byref(i64) %"20") #0 {
-"23":
%"10" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"10", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i64, align 8, addrspace(5)
%"7" = alloca i64, align 8, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"10", align 1
%"11" = load i64, ptr addrspace(4) %"19", align 8
store i64 %"11", ptr addrspace(5) %"4", align 8
%"12" = load i64, ptr addrspace(4) %"20", align 8
store i64 %"12", ptr addrspace(5) %"5", align 8
%"13" = load i64, ptr addrspace(5) %"4", align 8
%"21" = inttoptr i64 %"13" to ptr
- %0 = load atomic i128, ptr %"21" syncscope("agent-one-as") acquire, align 16
- %"8" = bitcast i128 %0 to <2 x i64>
+ %2 = load atomic i128, ptr %"21" syncscope("agent-one-as") acquire, align 16
+ %"8" = bitcast i128 %2 to <2 x i64>
%"14" = extractelement <2 x i64> %"8", i32 0
%"15" = extractelement <2 x i64> %"8", i32 1
store i64 %"14", ptr addrspace(5) %"6", align 8
store i64 %"15", ptr addrspace(5) %"7", align 8
%"16" = load i64, ptr addrspace(5) %"6", align 8
%"17" = load i64, ptr addrspace(5) %"7", align 8
- %1 = insertelement <2 x i64> undef, i64 %"16", i32 0
- %"9" = insertelement <2 x i64> %1, i64 %"17", i32 1
+ %3 = insertelement <2 x i64> undef, i64 %"16", i32 0
+ %"9" = insertelement <2 x i64> %3, i64 %"17", i32 1
%"18" = load i64, ptr addrspace(5) %"5", align 8
%"22" = inttoptr i64 %"18" to ptr
- %2 = bitcast <2 x i64> %"9" to i128
- store atomic i128 %2, ptr %"22" syncscope("agent-one-as") release, align 16
+ %4 = bitcast <2 x i64> %"9" to i128
+ store atomic i128 %4, ptr %"22" syncscope("agent-one-as") release, align 16
ret void
}
diff --git a/ptx/src/test/spirv_run/atom_max_u32.ll b/ptx/src/test/spirv_run/atom_max_u32.ll
index 64cb430..8135e3d 100644
--- a/ptx/src/test/spirv_run/atom_max_u32.ll
+++ b/ptx/src/test/spirv_run/atom_max_u32.ll
@@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @atom_max_u32(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #0 {
-"30":
%"8" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"8", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i32, align 4, addrspace(5)
%"7" = alloca i32, align 4, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"8", align 1
%"9" = load i64, ptr addrspace(4) %"22", align 8
store i64 %"9", ptr addrspace(5) %"4", align 8
%"10" = load i64, ptr addrspace(4) %"23", align 8
@@ -23,8 +25,8 @@ define protected amdgpu_kernel void @atom_max_u32(ptr addrspace(4) byref(i64) %"
store i32 %"14", ptr %"25", align 4
%"16" = load i64, ptr addrspace(5) %"4", align 8
%"26" = inttoptr i64 %"16" to ptr
- %"32" = getelementptr inbounds i8, ptr %"26", i64 4
- %"15" = load i32, ptr %"32", align 4
+ %"31" = getelementptr inbounds i8, ptr %"26", i64 4
+ %"15" = load i32, ptr %"31", align 4
store i32 %"15", ptr addrspace(5) %"7", align 4
%"18" = load i64, ptr addrspace(5) %"5", align 8
%"19" = load i32, ptr addrspace(5) %"7", align 4
diff --git a/ptx/src/test/spirv_run/b64tof64.ll b/ptx/src/test/spirv_run/b64tof64.ll
index 5cd7a2c..4a8d9b3 100644
--- a/ptx/src/test/spirv_run/b64tof64.ll
+++ b/ptx/src/test/spirv_run/b64tof64.ll
@@ -2,22 +2,24 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @b64tof64(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 {
-"23":
%"8" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"8", align 1
%"4" = alloca double, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i64, align 8, addrspace(5)
%"7" = alloca i64, align 8, addrspace(5)
+ %1 = alloca i64, align 8, addrspace(5)
+ br label %2
+
+2: ; preds = %0
+ store i1 false, ptr addrspace(5) %"8", align 1
%"9" = load double, ptr addrspace(4) %"17", align 8
store double %"9", ptr addrspace(5) %"4", align 8
%"10" = load i64, ptr addrspace(4) %"18", align 8
store i64 %"10", ptr addrspace(5) %"6", align 8
%"12" = load double, ptr addrspace(5) %"4", align 8
%"20" = bitcast double %"12" to i64
- %0 = alloca i64, align 8, addrspace(5)
- store i64 %"20", ptr addrspace(5) %0, align 8
- %"11" = load i64, ptr addrspace(5) %0, align 8
+ store i64 %"20", ptr addrspace(5) %1, align 8
+ %"11" = load i64, ptr addrspace(5) %1, align 8
store i64 %"11", ptr addrspace(5) %"5", align 8
%"14" = load i64, ptr addrspace(5) %"5", align 8
%"21" = inttoptr i64 %"14" to ptr
diff --git a/ptx/src/test/spirv_run/barrier.ll b/ptx/src/test/spirv_run/barrier.ll
index e2e65f2..55d0c93 100644
--- a/ptx/src/test/spirv_run/barrier.ll
+++ b/ptx/src/test/spirv_run/barrier.ll
@@ -4,8 +4,10 @@ target triple = "amdgcn-amd-amdhsa"
declare void @__zluda_ptx_impl__barrier_sync(i32) #0
define protected amdgpu_kernel void @barrier() #1 {
-"4":
%"2" = alloca i1, align 1, addrspace(5)
+ br label %1
+
+1: ; preds = %0
store i1 false, ptr addrspace(5) %"2", align 1
call void @__zluda_ptx_impl__barrier_sync(i32 0)
ret void
diff --git a/ptx/src/test/spirv_run/bfe.ll b/ptx/src/test/spirv_run/bfe.ll
index 99fd766..6644c20 100644
--- a/ptx/src/test/spirv_run/bfe.ll
+++ b/ptx/src/test/spirv_run/bfe.ll
@@ -4,14 +4,16 @@ target triple = "amdgcn-amd-amdhsa"
declare i32 @__zluda_ptx_impl__bfe_u32(i32, i32, i32) #0
define protected amdgpu_kernel void @bfe(ptr addrspace(4) byref(i64) %"28", ptr addrspace(4) byref(i64) %"29") #1 {
-"34":
%"9" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"9", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i32, align 4, addrspace(5)
%"7" = alloca i32, align 4, addrspace(5)
%"8" = alloca i32, align 4, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"9", align 1
%"10" = load i64, ptr addrspace(4) %"28", align 8
store i64 %"10", ptr addrspace(5) %"4", align 8
%"11" = load i64, ptr addrspace(4) %"29", align 8
@@ -22,13 +24,13 @@ define protected amdgpu_kernel void @bfe(ptr addrspace(4) byref(i64) %"28", ptr
store i32 %"12", ptr addrspace(5) %"6", align 4
%"15" = load i64, ptr addrspace(5) %"4", align 8
%"31" = inttoptr i64 %"15" to ptr
- %"41" = getelementptr inbounds i8, ptr %"31", i64 4
- %"14" = load i32, ptr %"41", align 4
+ %"40" = getelementptr inbounds i8, ptr %"31", i64 4
+ %"14" = load i32, ptr %"40", align 4
store i32 %"14", ptr addrspace(5) %"7", align 4
%"17" = load i64, ptr addrspace(5) %"4", align 8
%"32" = inttoptr i64 %"17" to ptr
- %"43" = getelementptr inbounds i8, ptr %"32", i64 8
- %"16" = load i32, ptr %"43", align 4
+ %"42" = getelementptr inbounds i8, ptr %"32", i64 8
+ %"16" = load i32, ptr %"42", align 4
store i32 %"16", ptr addrspace(5) %"8", align 4
%"19" = load i32, ptr addrspace(5) %"6", align 4
%"20" = load i32, ptr addrspace(5) %"7", align 4
diff --git a/ptx/src/test/spirv_run/bfi.ll b/ptx/src/test/spirv_run/bfi.ll
index bea4ac5..3c6a377 100644
--- a/ptx/src/test/spirv_run/bfi.ll
+++ b/ptx/src/test/spirv_run/bfi.ll
@@ -4,15 +4,17 @@ target triple = "amdgcn-amd-amdhsa"
declare i32 @__zluda_ptx_impl__bfi_b32(i32, i32, i32, i32) #0
define protected amdgpu_kernel void @bfi(ptr addrspace(4) byref(i64) %"34", ptr addrspace(4) byref(i64) %"35") #1 {
-"44":
%"10" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"10", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i32, align 4, addrspace(5)
%"7" = alloca i32, align 4, addrspace(5)
%"8" = alloca i32, align 4, addrspace(5)
%"9" = alloca i32, align 4, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"10", align 1
%"11" = load i64, ptr addrspace(4) %"34", align 8
store i64 %"11", ptr addrspace(5) %"4", align 8
%"12" = load i64, ptr addrspace(4) %"35", align 8
@@ -23,18 +25,18 @@ define protected amdgpu_kernel void @bfi(ptr addrspace(4) byref(i64) %"34", ptr
store i32 %"13", ptr addrspace(5) %"6", align 4
%"16" = load i64, ptr addrspace(5) %"4", align 8
%"37" = inttoptr i64 %"16" to ptr
- %"52" = getelementptr inbounds i8, ptr %"37", i64 4
- %"15" = load i32, ptr %"52", align 4
+ %"51" = getelementptr inbounds i8, ptr %"37", i64 4
+ %"15" = load i32, ptr %"51", align 4
store i32 %"15", ptr addrspace(5) %"7", align 4
%"18" = load i64, ptr addrspace(5) %"4", align 8
%"38" = inttoptr i64 %"18" to ptr
- %"54" = getelementptr inbounds i8, ptr %"38", i64 8
- %"17" = load i32, ptr %"54", align 4
+ %"53" = getelementptr inbounds i8, ptr %"38", i64 8
+ %"17" = load i32, ptr %"53", align 4
store i32 %"17", ptr addrspace(5) %"8", align 4
%"20" = load i64, ptr addrspace(5) %"4", align 8
%"39" = inttoptr i64 %"20" to ptr
- %"56" = getelementptr inbounds i8, ptr %"39", i64 12
- %"19" = load i32, ptr %"56", align 4
+ %"55" = getelementptr inbounds i8, ptr %"39", i64 12
+ %"19" = load i32, ptr %"55", align 4
store i32 %"19", ptr addrspace(5) %"9", align 4
%"22" = load i32, ptr addrspace(5) %"6", align 4
%"23" = load i32, ptr addrspace(5) %"7", align 4
diff --git a/ptx/src/test/spirv_run/bfind.ll b/ptx/src/test/spirv_run/bfind.ll
index ebd9fea..a427332 100644
--- a/ptx/src/test/spirv_run/bfind.ll
+++ b/ptx/src/test/spirv_run/bfind.ll
@@ -2,9 +2,7 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @bfind(ptr addrspace(4) byref(i64) %"41", ptr addrspace(4) byref(i64) %"42") #0 {
-"52":
%"12" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"12", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i32, align 4, addrspace(5)
@@ -13,6 +11,10 @@ define protected amdgpu_kernel void @bfind(ptr addrspace(4) byref(i64) %"41", pt
%"9" = alloca i32, align 4, addrspace(5)
%"10" = alloca i32, align 4, addrspace(5)
%"11" = alloca i32, align 4, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"12", align 1
%"13" = load i64, ptr addrspace(4) %"41", align 8
store i64 %"13", ptr addrspace(5) %"4", align 8
%"14" = load i64, ptr addrspace(4) %"42", align 8
@@ -23,31 +25,31 @@ define protected amdgpu_kernel void @bfind(ptr addrspace(4) byref(i64) %"41", pt
store i32 %"15", ptr addrspace(5) %"6", align 4
%"18" = load i64, ptr addrspace(5) %"4", align 8
%"44" = inttoptr i64 %"18" to ptr
- %"54" = getelementptr inbounds i8, ptr %"44", i64 4
- %"17" = load i32, ptr %"54", align 4
+ %"53" = getelementptr inbounds i8, ptr %"44", i64 4
+ %"17" = load i32, ptr %"53", align 4
store i32 %"17", ptr addrspace(5) %"7", align 4
%"20" = load i64, ptr addrspace(5) %"4", align 8
%"45" = inttoptr i64 %"20" to ptr
- %"56" = getelementptr inbounds i8, ptr %"45", i64 8
- %"19" = load i32, ptr %"56", align 4
+ %"55" = getelementptr inbounds i8, ptr %"45", i64 8
+ %"19" = load i32, ptr %"55", align 4
store i32 %"19", ptr addrspace(5) %"8", align 4
%"22" = load i32, ptr addrspace(5) %"6", align 4
- %0 = icmp eq i32 %"22", 0
- %1 = call i32 @llvm.ctlz.i32(i32 %"22", i1 true)
- %2 = sub i32 31, %1
- %"46" = select i1 %0, i32 -1, i32 %2
+ %2 = icmp eq i32 %"22", 0
+ %3 = call i32 @llvm.ctlz.i32(i32 %"22", i1 true)
+ %4 = sub i32 31, %3
+ %"46" = select i1 %2, i32 -1, i32 %4
store i32 %"46", ptr addrspace(5) %"9", align 4
%"24" = load i32, ptr addrspace(5) %"7", align 4
- %3 = icmp eq i32 %"24", 0
- %4 = call i32 @llvm.ctlz.i32(i32 %"24", i1 true)
- %5 = sub i32 31, %4
- %"47" = select i1 %3, i32 -1, i32 %5
+ %5 = icmp eq i32 %"24", 0
+ %6 = call i32 @llvm.ctlz.i32(i32 %"24", i1 true)
+ %7 = sub i32 31, %6
+ %"47" = select i1 %5, i32 -1, i32 %7
store i32 %"47", ptr addrspace(5) %"10", align 4
%"26" = load i32, ptr addrspace(5) %"8", align 4
- %6 = icmp eq i32 %"26", 0
- %7 = call i32 @llvm.ctlz.i32(i32 %"26", i1 true)
- %8 = sub i32 31, %7
- %"48" = select i1 %6, i32 -1, i32 %8
+ %8 = icmp eq i32 %"26", 0
+ %9 = call i32 @llvm.ctlz.i32(i32 %"26", i1 true)
+ %10 = sub i32 31, %9
+ %"48" = select i1 %8, i32 -1, i32 %10
store i32 %"48", ptr addrspace(5) %"11", align 4
%"27" = load i64, ptr addrspace(5) %"5", align 8
%"28" = load i32, ptr addrspace(5) %"9", align 4
@@ -56,13 +58,13 @@ define protected amdgpu_kernel void @bfind(ptr addrspace(4) byref(i64) %"41", pt
%"29" = load i64, ptr addrspace(5) %"5", align 8
%"30" = load i32, ptr addrspace(5) %"10", align 4
%"50" = inttoptr i64 %"29" to ptr
- %"58" = getelementptr inbounds i8, ptr %"50", i64 4
- store i32 %"30", ptr %"58", align 4
+ %"57" = getelementptr inbounds i8, ptr %"50", i64 4
+ store i32 %"30", ptr %"57", align 4
%"31" = load i64, ptr addrspace(5) %"5", align 8
%"32" = load i32, ptr addrspace(5) %"11", align 4
%"51" = inttoptr i64 %"31" to ptr
- %"60" = getelementptr inbounds i8, ptr %"51", i64 8
- store i32 %"32", ptr %"60", align 4
+ %"59" = getelementptr inbounds i8, ptr %"51", i64 8
+ store i32 %"32", ptr %"59", align 4
ret void
}
diff --git a/ptx/src/test/spirv_run/bfind_shiftamt.ll b/ptx/src/test/spirv_run/bfind_shiftamt.ll
index fd21514..9968d85 100644
--- a/ptx/src/test/spirv_run/bfind_shiftamt.ll
+++ b/ptx/src/test/spirv_run/bfind_shiftamt.ll
@@ -2,9 +2,7 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @bfind_shiftamt(ptr addrspace(4) byref(i64) %"41", ptr addrspace(4) byref(i64) %"42") #0 {
-"52":
%"12" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"12", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i32, align 4, addrspace(5)
@@ -13,6 +11,10 @@ define protected amdgpu_kernel void @bfind_shiftamt(ptr addrspace(4) byref(i64)
%"9" = alloca i32, align 4, addrspace(5)
%"10" = alloca i32, align 4, addrspace(5)
%"11" = alloca i32, align 4, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"12", align 1
%"13" = load i64, ptr addrspace(4) %"41", align 8
store i64 %"13", ptr addrspace(5) %"4", align 8
%"14" = load i64, ptr addrspace(4) %"42", align 8
@@ -23,28 +25,28 @@ define protected amdgpu_kernel void @bfind_shiftamt(ptr addrspace(4) byref(i64)
store i32 %"15", ptr addrspace(5) %"6", align 4
%"18" = load i64, ptr addrspace(5) %"4", align 8
%"44" = inttoptr i64 %"18" to ptr
- %"54" = getelementptr inbounds i8, ptr %"44", i64 4
- %"17" = load i32, ptr %"54", align 4
+ %"53" = getelementptr inbounds i8, ptr %"44", i64 4
+ %"17" = load i32, ptr %"53", align 4
store i32 %"17", ptr addrspace(5) %"7", align 4
%"20" = load i64, ptr addrspace(5) %"4", align 8
%"45" = inttoptr i64 %"20" to ptr
- %"56" = getelementptr inbounds i8, ptr %"45", i64 8
- %"19" = load i32, ptr %"56", align 4
+ %"55" = getelementptr inbounds i8, ptr %"45", i64 8
+ %"19" = load i32, ptr %"55", align 4
store i32 %"19", ptr addrspace(5) %"8", align 4
%"22" = load i32, ptr addrspace(5) %"6", align 4
- %0 = icmp eq i32 %"22", 0
- %1 = call i32 @llvm.ctlz.i32(i32 %"22", i1 true)
- %"46" = select i1 %0, i32 -1, i32 %1
+ %2 = icmp eq i32 %"22", 0
+ %3 = call i32 @llvm.ctlz.i32(i32 %"22", i1 true)
+ %"46" = select i1 %2, i32 -1, i32 %3
store i32 %"46", ptr addrspace(5) %"9", align 4
%"24" = load i32, ptr addrspace(5) %"7", align 4
- %2 = icmp eq i32 %"24", 0
- %3 = call i32 @llvm.ctlz.i32(i32 %"24", i1 true)
- %"47" = select i1 %2, i32 -1, i32 %3
+ %4 = icmp eq i32 %"24", 0
+ %5 = call i32 @llvm.ctlz.i32(i32 %"24", i1 true)
+ %"47" = select i1 %4, i32 -1, i32 %5
store i32 %"47", ptr addrspace(5) %"10", align 4
%"26" = load i32, ptr addrspace(5) %"8", align 4
- %4 = icmp eq i32 %"26", 0
- %5 = call i32 @llvm.ctlz.i32(i32 %"26", i1 true)
- %"48" = select i1 %4, i32 -1, i32 %5
+ %6 = icmp eq i32 %"26", 0
+ %7 = call i32 @llvm.ctlz.i32(i32 %"26", i1 true)
+ %"48" = select i1 %6, i32 -1, i32 %7
store i32 %"48", ptr addrspace(5) %"11", align 4
%"27" = load i64, ptr addrspace(5) %"5", align 8
%"28" = load i32, ptr addrspace(5) %"9", align 4
@@ -53,13 +55,13 @@ define protected amdgpu_kernel void @bfind_shiftamt(ptr addrspace(4) byref(i64)
%"29" = load i64, ptr addrspace(5) %"5", align 8
%"30" = load i32, ptr addrspace(5) %"10", align 4
%"50" = inttoptr i64 %"29" to ptr
- %"58" = getelementptr inbounds i8, ptr %"50", i64 4
- store i32 %"30", ptr %"58", align 4
+ %"57" = getelementptr inbounds i8, ptr %"50", i64 4
+ store i32 %"30", ptr %"57", align 4
%"31" = load i64, ptr addrspace(5) %"5", align 8
%"32" = load i32, ptr addrspace(5) %"11", align 4
%"51" = inttoptr i64 %"31" to ptr
- %"60" = getelementptr inbounds i8, ptr %"51", i64 8
- store i32 %"32", ptr %"60", align 4
+ %"59" = getelementptr inbounds i8, ptr %"51", i64 8
+ store i32 %"32", ptr %"59", align 4
ret void
}
diff --git a/ptx/src/test/spirv_run/block.ll b/ptx/src/test/spirv_run/block.ll
index 87dd227..b482fe2 100644
--- a/ptx/src/test/spirv_run/block.ll
+++ b/ptx/src/test/spirv_run/block.ll
@@ -2,14 +2,16 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @block(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #0 {
-"26":
%"9" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"9", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i64, align 8, addrspace(5)
%"7" = alloca i64, align 8, addrspace(5)
%"8" = alloca i64, align 8, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"9", align 1
%"10" = load i64, ptr addrspace(4) %"22", align 8
store i64 %"10", ptr addrspace(5) %"4", align 8
%"11" = load i64, ptr addrspace(4) %"23", align 8
diff --git a/ptx/src/test/spirv_run/bra.ll b/ptx/src/test/spirv_run/bra.ll
index 6d62cca..4173392 100644
--- a/ptx/src/test/spirv_run/bra.ll
+++ b/ptx/src/test/spirv_run/bra.ll
@@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @bra(ptr addrspace(4) byref(i64) %"24", ptr addrspace(4) byref(i64) %"25") #0 {
-"28":
%"11" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"11", align 1
%"7" = alloca i64, align 8, addrspace(5)
%"8" = alloca i64, align 8, addrspace(5)
%"9" = alloca i64, align 8, addrspace(5)
%"10" = alloca i64, align 8, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"11", align 1
%"12" = load i64, ptr addrspace(4) %"24", align 8
store i64 %"12", ptr addrspace(5) %"7", align 8
%"13" = load i64, ptr addrspace(4) %"25", align 8
@@ -19,19 +21,19 @@ define protected amdgpu_kernel void @bra(ptr addrspace(4) byref(i64) %"24", ptr
store i64 %"14", ptr addrspace(5) %"9", align 8
br label %"4"
-"4": ; preds = %"28"
+"4": ; preds = %1
%"17" = load i64, ptr addrspace(5) %"9", align 8
%"16" = add i64 %"17", 1
store i64 %"16", ptr addrspace(5) %"10", align 8
br label %"6"
-0: ; No predecessors!
+"5": ; No predecessors!
%"19" = load i64, ptr addrspace(5) %"9", align 8
%"18" = add i64 %"19", 2
store i64 %"18", ptr addrspace(5) %"10", align 8
br label %"6"
-"6": ; preds = %0, %"4"
+"6": ; preds = %"5", %"4"
%"20" = load i64, ptr addrspace(5) %"8", align 8
%"21" = load i64, ptr addrspace(5) %"10", align 8
%"27" = inttoptr i64 %"20" to ptr
diff --git a/ptx/src/test/spirv_run/brev.ll b/ptx/src/test/spirv_run/brev.ll
index a519c2b..d838750 100644
--- a/ptx/src/test/spirv_run/brev.ll
+++ b/ptx/src/test/spirv_run/brev.ll
@@ -2,12 +2,14 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @brev(ptr addrspace(4) byref(i64) %"16", ptr addrspace(4) byref(i64) %"17") #0 {
-"20":
%"7" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"7", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i32, align 4, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"7", align 1
%"8" = load i64, ptr addrspace(4) %"16", align 8
store i64 %"8", ptr addrspace(5) %"4", align 8
%"9" = load i64, ptr addrspace(4) %"17", align 8
diff --git a/ptx/src/test/spirv_run/call.ll b/ptx/src/test/spirv_run/call.ll
index d89322e..684bb0c 100644
--- a/ptx/src/test/spirv_run/call.ll
+++ b/ptx/src/test/spirv_run/call.ll
@@ -2,15 +2,17 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define private i64 @incr(i64 %"29") #0 {
-"49":
%"18" = alloca i64, align 8, addrspace(5)
%"17" = alloca i64, align 8, addrspace(5)
%"20" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"20", align 1
%"42" = alloca i64, align 8, addrspace(5)
%"43" = alloca i64, align 8, addrspace(5)
%"14" = alloca i64, align 8, addrspace(5)
+ br label %1
+
+1: ; preds = %0
store i64 %"29", ptr addrspace(5) %"18", align 8
+ store i1 false, ptr addrspace(5) %"20", align 1
%"30" = load i64, ptr addrspace(5) %"18", align 8
store i64 %"30", ptr addrspace(5) %"43", align 8
%"31" = load i64, ptr addrspace(5) %"43", align 8
@@ -27,14 +29,16 @@ define private i64 @incr(i64 %"29") #0 {
}
define protected amdgpu_kernel void @call(ptr addrspace(4) byref(i64) %"38", ptr addrspace(4) byref(i64) %"39") #0 {
-"48":
%"19" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"19", align 1
%"7" = alloca i64, align 8, addrspace(5)
%"8" = alloca i64, align 8, addrspace(5)
%"9" = alloca i64, align 8, addrspace(5)
%"40" = alloca i64, align 8, addrspace(5)
%"41" = alloca i64, align 8, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"19", align 1
%"21" = load i64, ptr addrspace(4) %"38", align 8
store i64 %"21", ptr addrspace(5) %"7", align 8
%"22" = load i64, ptr addrspace(4) %"39", align 8
diff --git a/ptx/src/test/spirv_run/call_bug.ll b/ptx/src/test/spirv_run/call_bug.ll
index 3ad9146..12c8e2c 100644
--- a/ptx/src/test/spirv_run/call_bug.ll
+++ b/ptx/src/test/spirv_run/call_bug.ll
@@ -2,15 +2,17 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define private [2 x i32] @incr(i64 %"21") #0 {
-"56":
%"16" = alloca i64, align 8, addrspace(5)
%"15" = alloca [2 x i32], align 4, addrspace(5)
%"19" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"19", align 1
%"42" = alloca [2 x i32], align 4, addrspace(5)
%"43" = alloca i64, align 8, addrspace(5)
%"4" = alloca i64, align 8, addrspace(5)
+ br label %1
+
+1: ; preds = %0
store i64 %"21", ptr addrspace(5) %"16", align 8
+ store i1 false, ptr addrspace(5) %"19", align 1
%"22" = load i64, ptr addrspace(5) %"16", align 8
store i64 %"22", ptr addrspace(5) %"43", align 8
%"23" = load i64, ptr addrspace(5) %"43", align 8
@@ -27,15 +29,17 @@ define private [2 x i32] @incr(i64 %"21") #0 {
}
define protected amdgpu_kernel void @call_bug(ptr addrspace(4) byref(i64) %"44", ptr addrspace(4) byref(i64) %"45") #0 {
-"57":
%"20" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"20", align 1
%"8" = alloca i64, align 8, addrspace(5)
%"9" = alloca i64, align 8, addrspace(5)
%"10" = alloca i64, align 8, addrspace(5)
%"11" = alloca i64, align 8, addrspace(5)
%"46" = alloca i64, align 8, addrspace(5)
%"47" = alloca [2 x i32], align 4, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"20", align 1
%"29" = load i64, ptr addrspace(4) %"44", align 8
store i64 %"29", ptr addrspace(5) %"8", align 8
%"30" = load i64, ptr addrspace(4) %"45", align 8
@@ -49,11 +53,11 @@ define protected amdgpu_kernel void @call_bug(ptr addrspace(4) byref(i64) %"44",
store i64 ptrtoint (ptr @incr to i64), ptr addrspace(5) %"11", align 8
%"17" = load i64, ptr addrspace(5) %"46", align 8
%"35" = load i64, ptr addrspace(5) %"11", align 8
- %0 = inttoptr i64 %"35" to ptr
- %"18" = call [2 x i32] %0(i64 %"17")
+ %2 = inttoptr i64 %"35" to ptr
+ %"18" = call [2 x i32] %2(i64 %"17")
store [2 x i32] %"18", ptr addrspace(5) %"47", align 4
- %"59" = getelementptr inbounds i8, ptr addrspace(5) %"47", i64 0
- %"36" = load i64, ptr addrspace(5) %"59", align 8
+ %"57" = getelementptr inbounds i8, ptr addrspace(5) %"47", i64 0
+ %"36" = load i64, ptr addrspace(5) %"57", align 8
store i64 %"36", ptr addrspace(5) %"10", align 8
%"37" = load i64, ptr addrspace(5) %"9", align 8
%"38" = load i64, ptr addrspace(5) %"10", align 8
diff --git a/ptx/src/test/spirv_run/call_multi_return.ll b/ptx/src/test/spirv_run/call_multi_return.ll
index 35cc5e0..5cf701b 100644
--- a/ptx/src/test/spirv_run/call_multi_return.ll
+++ b/ptx/src/test/spirv_run/call_multi_return.ll
@@ -4,16 +4,18 @@ target triple = "amdgcn-amd-amdhsa"
%struct.i64i32 = type { i64, i32 }
define private %struct.i64i32 @"1"(i32 %"39", i32 %"40") #0 {
-"62":
%"18" = alloca i32, align 4, addrspace(5)
%"19" = alloca i32, align 4, addrspace(5)
%"16" = alloca i64, align 8, addrspace(5)
%"17" = alloca i32, align 4, addrspace(5)
%"22" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"22", align 1
%"20" = alloca i32, align 4, addrspace(5)
+ br label %1
+
+1: ; preds = %0
store i32 %"39", ptr addrspace(5) %"18", align 4
store i32 %"40", ptr addrspace(5) %"19", align 4
+ store i1 false, ptr addrspace(5) %"22", align 1
%"42" = load i32, ptr addrspace(5) %"18", align 4
%"43" = load i32, ptr addrspace(5) %"19", align 4
%"41" = add i32 %"42", %"43"
@@ -27,15 +29,13 @@ define private %struct.i64i32 @"1"(i32 %"39", i32 %"40") #0 {
store i32 %"46", ptr addrspace(5) %"17", align 4
%"49" = load i64, ptr addrspace(5) %"16", align 8
%"50" = load i32, ptr addrspace(5) %"17", align 4
- %0 = insertvalue %struct.i64i32 undef, i64 %"49", 0
- %1 = insertvalue %struct.i64i32 %0, i32 %"50", 1
- ret %struct.i64i32 %1
+ %2 = insertvalue %struct.i64i32 undef, i64 %"49", 0
+ %3 = insertvalue %struct.i64i32 %2, i32 %"50", 1
+ ret %struct.i64i32 %3
}
define protected amdgpu_kernel void @call_multi_return(ptr addrspace(4) byref(i64) %"55", ptr addrspace(4) byref(i64) %"56") #0 {
-"61":
%"21" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"21", align 1
%"9" = alloca i64, align 8, addrspace(5)
%"10" = alloca i64, align 8, addrspace(5)
%"11" = alloca i32, align 4, addrspace(5)
@@ -43,6 +43,10 @@ define protected amdgpu_kernel void @call_multi_return(ptr addrspace(4) byref(i6
%"13" = alloca i64, align 8, addrspace(5)
%"14" = alloca i64, align 8, addrspace(5)
%"15" = alloca i32, align 4, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"21", align 1
%"23" = load i64, ptr addrspace(4) %"55", align 8
store i64 %"23", ptr addrspace(5) %"9", align 8
%"24" = load i64, ptr addrspace(4) %"56", align 8
@@ -53,14 +57,14 @@ define protected amdgpu_kernel void @call_multi_return(ptr addrspace(4) byref(i6
store i32 %"25", ptr addrspace(5) %"11", align 4
%"28" = load i64, ptr addrspace(5) %"9", align 8
%"58" = inttoptr i64 %"28" to ptr addrspace(1)
- %"64" = getelementptr inbounds i8, ptr addrspace(1) %"58", i64 4
- %"27" = load i32, ptr addrspace(1) %"64", align 4
+ %"62" = getelementptr inbounds i8, ptr addrspace(1) %"58", i64 4
+ %"27" = load i32, ptr addrspace(1) %"62", align 4
store i32 %"27", ptr addrspace(5) %"12", align 4
%"31" = load i32, ptr addrspace(5) %"11", align 4
%"32" = load i32, ptr addrspace(5) %"12", align 4
- %0 = call %struct.i64i32 @"1"(i32 %"31", i32 %"32")
- %"29" = extractvalue %struct.i64i32 %0, 0
- %"30" = extractvalue %struct.i64i32 %0, 1
+ %2 = call %struct.i64i32 @"1"(i32 %"31", i32 %"32")
+ %"29" = extractvalue %struct.i64i32 %2, 0
+ %"30" = extractvalue %struct.i64i32 %2, 1
store i64 %"29", ptr addrspace(5) %"13", align 8
store i32 %"30", ptr addrspace(5) %"15", align 4
%"34" = load i32, ptr addrspace(5) %"15", align 4
@@ -73,8 +77,8 @@ define protected amdgpu_kernel void @call_multi_return(ptr addrspace(4) byref(i6
%"37" = load i64, ptr addrspace(5) %"10", align 8
%"38" = load i64, ptr addrspace(5) %"14", align 8
%"60" = inttoptr i64 %"37" to ptr addrspace(1)
- %"66" = getelementptr inbounds i8, ptr addrspace(1) %"60", i64 8
- store i64 %"38", ptr addrspace(1) %"66", align 8
+ %"64" = getelementptr inbounds i8, ptr addrspace(1) %"60", i64 8
+ store i64 %"38", ptr addrspace(1) %"64", align 8
ret void
}
diff --git a/ptx/src/test/spirv_run/callprototype.ll b/ptx/src/test/spirv_run/callprototype.ll
index be431ea..9cba37c 100644
--- a/ptx/src/test/spirv_run/callprototype.ll
+++ b/ptx/src/test/spirv_run/callprototype.ll
@@ -2,15 +2,17 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define private i64 @incr(i64 %"33") #0 {
-"54":
%"20" = alloca i64, align 8, addrspace(5)
%"19" = alloca i64, align 8, addrspace(5)
%"22" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"22", align 1
%"46" = alloca i64, align 8, addrspace(5)
%"47" = alloca i64, align 8, addrspace(5)
%"16" = alloca i64, align 8, addrspace(5)
+ br label %1
+
+1: ; preds = %0
store i64 %"33", ptr addrspace(5) %"20", align 8
+ store i1 false, ptr addrspace(5) %"22", align 1
%"34" = load i64, ptr addrspace(5) %"20", align 8
store i64 %"34", ptr addrspace(5) %"47", align 8
%"35" = load i64, ptr addrspace(5) %"47", align 8
@@ -27,15 +29,17 @@ define private i64 @incr(i64 %"33") #0 {
}
define protected amdgpu_kernel void @callprototype(ptr addrspace(4) byref(i64) %"42", ptr addrspace(4) byref(i64) %"43") #0 {
-"53":
%"21" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"21", align 1
%"7" = alloca i64, align 8, addrspace(5)
%"8" = alloca i64, align 8, addrspace(5)
%"9" = alloca i64, align 8, addrspace(5)
%"10" = alloca i64, align 8, addrspace(5)
%"44" = alloca i64, align 8, addrspace(5)
%"45" = alloca i64, align 8, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"21", align 1
%"23" = load i64, ptr addrspace(4) %"42", align 8
store i64 %"23", ptr addrspace(5) %"7", align 8
%"24" = load i64, ptr addrspace(4) %"43", align 8
@@ -49,8 +53,8 @@ define protected amdgpu_kernel void @callprototype(ptr addrspace(4) byref(i64) %
store i64 ptrtoint (ptr @incr to i64), ptr addrspace(5) %"10", align 8
%"17" = load i64, ptr addrspace(5) %"44", align 8
%"29" = load i64, ptr addrspace(5) %"10", align 8
- %0 = inttoptr i64 %"29" to ptr
- %"18" = call i64 %0(i64 %"17")
+ %2 = inttoptr i64 %"29" to ptr
+ %"18" = call i64 %2(i64 %"17")
store i64 %"18", ptr addrspace(5) %"45", align 8
%"30" = load i64, ptr addrspace(5) %"45", align 8
store i64 %"30", ptr addrspace(5) %"9", align 8
diff --git a/ptx/src/test/spirv_run/carry_set_all.ll b/ptx/src/test/spirv_run/carry_set_all.ll
index 8b412c1..8983b70 100644
--- a/ptx/src/test/spirv_run/carry_set_all.ll
+++ b/ptx/src/test/spirv_run/carry_set_all.ll
@@ -2,9 +2,7 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @carry_set_all(ptr addrspace(4) byref(i64) %"208", ptr addrspace(4) byref(i64) %"209") #0 {
-"268":
%"22" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"22", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i32, align 4, addrspace(5)
@@ -23,147 +21,151 @@ define protected amdgpu_kernel void @carry_set_all(ptr addrspace(4) byref(i64) %
%"19" = alloca i32, align 4, addrspace(5)
%"20" = alloca i32, align 4, addrspace(5)
%"21" = alloca i32, align 4, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"22", align 1
%"37" = load i64, ptr addrspace(4) %"209", align 8
store i64 %"37", ptr addrspace(5) %"5", align 8
- %0 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 0, i32 0)
- %"210" = extractvalue { i32, i1 } %0, 0
- %"23" = extractvalue { i32, i1 } %0, 1
+ %2 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 0, i32 0)
+ %"210" = extractvalue { i32, i1 } %2, 0
+ %"23" = extractvalue { i32, i1 } %2, 1
store i32 %"210", ptr addrspace(5) %"6", align 4
%"39" = xor i1 %"23", true
store i1 %"39", ptr addrspace(5) %"22", align 1
%"41" = load i1, ptr addrspace(5) %"22", align 1
- %1 = zext i1 %"41" to i32
- %"211" = add i32 0, %1
+ %3 = zext i1 %"41" to i32
+ %"211" = add i32 0, %3
store i32 %"211", ptr addrspace(5) %"6", align 4
%"42" = load i1, ptr addrspace(5) %"22", align 1
%"24" = xor i1 %"42", true
- %2 = zext i1 %"24" to i32
- %"212" = sub i32 0, %2
+ %4 = zext i1 %"24" to i32
+ %"212" = sub i32 0, %4
store i32 %"212", ptr addrspace(5) %"7", align 4
- %3 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 0, i32 1)
- %"213" = extractvalue { i32, i1 } %3, 0
- %"25" = extractvalue { i32, i1 } %3, 1
+ %5 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 0, i32 1)
+ %"213" = extractvalue { i32, i1 } %5, 0
+ %"25" = extractvalue { i32, i1 } %5, 1
store i32 %"213", ptr addrspace(5) %"8", align 4
%"45" = xor i1 %"25", true
store i1 %"45", ptr addrspace(5) %"22", align 1
%"47" = load i1, ptr addrspace(5) %"22", align 1
- %4 = zext i1 %"47" to i32
- %"214" = add i32 0, %4
+ %6 = zext i1 %"47" to i32
+ %"214" = add i32 0, %6
store i32 %"214", ptr addrspace(5) %"8", align 4
%"48" = load i1, ptr addrspace(5) %"22", align 1
%"26" = xor i1 %"48", true
- %5 = zext i1 %"26" to i32
- %"215" = sub i32 0, %5
+ %7 = zext i1 %"26" to i32
+ %"215" = sub i32 0, %7
store i32 %"215", ptr addrspace(5) %"9", align 4
- %6 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 0, i32 0)
- %"216" = extractvalue { i32, i1 } %6, 0
- %"51" = extractvalue { i32, i1 } %6, 1
+ %8 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 0, i32 0)
+ %"216" = extractvalue { i32, i1 } %8, 0
+ %"51" = extractvalue { i32, i1 } %8, 1
store i32 %"216", ptr addrspace(5) %"10", align 4
store i1 %"51", ptr addrspace(5) %"22", align 1
%"53" = load i1, ptr addrspace(5) %"22", align 1
- %7 = zext i1 %"53" to i32
- %"217" = add i32 0, %7
+ %9 = zext i1 %"53" to i32
+ %"217" = add i32 0, %9
store i32 %"217", ptr addrspace(5) %"10", align 4
%"54" = load i1, ptr addrspace(5) %"22", align 1
%"27" = xor i1 %"54", true
- %8 = zext i1 %"27" to i32
- %"218" = sub i32 0, %8
+ %10 = zext i1 %"27" to i32
+ %"218" = sub i32 0, %10
store i32 %"218", ptr addrspace(5) %"11", align 4
- %9 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 -1, i32 -1)
- %"219" = extractvalue { i32, i1 } %9, 0
- %"57" = extractvalue { i32, i1 } %9, 1
+ %11 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 -1, i32 -1)
+ %"219" = extractvalue { i32, i1 } %11, 0
+ %"57" = extractvalue { i32, i1 } %11, 1
store i32 %"219", ptr addrspace(5) %"12", align 4
store i1 %"57", ptr addrspace(5) %"22", align 1
%"59" = load i1, ptr addrspace(5) %"22", align 1
- %10 = zext i1 %"59" to i32
- %"220" = add i32 0, %10
+ %12 = zext i1 %"59" to i32
+ %"220" = add i32 0, %12
store i32 %"220", ptr addrspace(5) %"12", align 4
%"60" = load i1, ptr addrspace(5) %"22", align 1
%"28" = xor i1 %"60", true
- %11 = zext i1 %"28" to i32
- %"221" = sub i32 0, %11
+ %13 = zext i1 %"28" to i32
+ %"221" = sub i32 0, %13
store i32 %"221", ptr addrspace(5) %"13", align 4
- %12 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 0, i32 0)
- %"222" = extractvalue { i32, i1 } %12, 0
- %"63" = extractvalue { i32, i1 } %12, 1
+ %14 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 0, i32 0)
+ %"222" = extractvalue { i32, i1 } %14, 0
+ %"63" = extractvalue { i32, i1 } %14, 1
store i32 %"222", ptr addrspace(5) %"14", align 4
store i1 %"63", ptr addrspace(5) %"22", align 1
%"65" = load i1, ptr addrspace(5) %"22", align 1
- %13 = zext i1 %"65" to i32
- %"223" = add i32 0, %13
+ %15 = zext i1 %"65" to i32
+ %"223" = add i32 0, %15
store i32 %"223", ptr addrspace(5) %"14", align 4
%"66" = load i1, ptr addrspace(5) %"22", align 1
%"29" = xor i1 %"66", true
- %14 = zext i1 %"29" to i32
- %"224" = sub i32 0, %14
+ %16 = zext i1 %"29" to i32
+ %"224" = sub i32 0, %16
store i32 %"224", ptr addrspace(5) %"15", align 4
- %15 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 -1, i32 -1)
- %"225" = extractvalue { i32, i1 } %15, 0
- %"69" = extractvalue { i32, i1 } %15, 1
+ %17 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 -1, i32 -1)
+ %"225" = extractvalue { i32, i1 } %17, 0
+ %"69" = extractvalue { i32, i1 } %17, 1
store i32 %"225", ptr addrspace(5) %"16", align 4
store i1 %"69", ptr addrspace(5) %"22", align 1
%"71" = load i1, ptr addrspace(5) %"22", align 1
- %16 = zext i1 %"71" to i32
- %"226" = add i32 0, %16
+ %18 = zext i1 %"71" to i32
+ %"226" = add i32 0, %18
store i32 %"226", ptr addrspace(5) %"16", align 4
%"72" = load i1, ptr addrspace(5) %"22", align 1
%"30" = xor i1 %"72", true
- %17 = zext i1 %"30" to i32
- %"227" = sub i32 0, %17
+ %19 = zext i1 %"30" to i32
+ %"227" = sub i32 0, %19
store i32 %"227", ptr addrspace(5) %"17", align 4
- %18 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 0, i32 0)
- %"228" = extractvalue { i32, i1 } %18, 0
- %"75" = extractvalue { i32, i1 } %18, 1
+ %20 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 0, i32 0)
+ %"228" = extractvalue { i32, i1 } %20, 0
+ %"75" = extractvalue { i32, i1 } %20, 1
store i32 %"228", ptr addrspace(5) %"18", align 4
store i1 %"75", ptr addrspace(5) %"22", align 1
%"76" = load i1, ptr addrspace(5) %"22", align 1
%"31" = xor i1 %"76", true
- %19 = zext i1 %"31" to i32
- %20 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 0, i32 0)
- %21 = extractvalue { i32, i1 } %20, 0
- %22 = extractvalue { i32, i1 } %20, 1
- %23 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %21, i32 %19)
- %"229" = extractvalue { i32, i1 } %23, 0
- %24 = extractvalue { i32, i1 } %23, 1
- %"32" = xor i1 %22, %24
+ %21 = zext i1 %"31" to i32
+ %22 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 0, i32 0)
+ %23 = extractvalue { i32, i1 } %22, 0
+ %24 = extractvalue { i32, i1 } %22, 1
+ %25 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %23, i32 %21)
+ %"229" = extractvalue { i32, i1 } %25, 0
+ %26 = extractvalue { i32, i1 } %25, 1
+ %"32" = xor i1 %24, %26
store i32 %"229", ptr addrspace(5) %"18", align 4
%"78" = xor i1 %"32", true
store i1 %"78", ptr addrspace(5) %"22", align 1
%"80" = load i1, ptr addrspace(5) %"22", align 1
- %25 = zext i1 %"80" to i32
- %"230" = add i32 0, %25
+ %27 = zext i1 %"80" to i32
+ %"230" = add i32 0, %27
store i32 %"230", ptr addrspace(5) %"18", align 4
%"81" = load i1, ptr addrspace(5) %"22", align 1
%"33" = xor i1 %"81", true
- %26 = zext i1 %"33" to i32
- %"231" = sub i32 0, %26
+ %28 = zext i1 %"33" to i32
+ %"231" = sub i32 0, %28
store i32 %"231", ptr addrspace(5) %"19", align 4
- %27 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 0, i32 0)
- %"232" = extractvalue { i32, i1 } %27, 0
- %"84" = extractvalue { i32, i1 } %27, 1
+ %29 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 0, i32 0)
+ %"232" = extractvalue { i32, i1 } %29, 0
+ %"84" = extractvalue { i32, i1 } %29, 1
store i32 %"232", ptr addrspace(5) %"20", align 4
store i1 %"84", ptr addrspace(5) %"22", align 1
%"85" = load i1, ptr addrspace(5) %"22", align 1
%"34" = xor i1 %"85", true
- %28 = zext i1 %"34" to i32
- %29 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 0, i32 1)
- %30 = extractvalue { i32, i1 } %29, 0
- %31 = extractvalue { i32, i1 } %29, 1
- %32 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %30, i32 %28)
- %"233" = extractvalue { i32, i1 } %32, 0
- %33 = extractvalue { i32, i1 } %32, 1
- %"35" = xor i1 %31, %33
+ %30 = zext i1 %"34" to i32
+ %31 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 0, i32 1)
+ %32 = extractvalue { i32, i1 } %31, 0
+ %33 = extractvalue { i32, i1 } %31, 1
+ %34 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %32, i32 %30)
+ %"233" = extractvalue { i32, i1 } %34, 0
+ %35 = extractvalue { i32, i1 } %34, 1
+ %"35" = xor i1 %33, %35
store i32 %"233", ptr addrspace(5) %"20", align 4
%"87" = xor i1 %"35", true
store i1 %"87", ptr addrspace(5) %"22", align 1
%"89" = load i1, ptr addrspace(5) %"22", align 1
- %34 = zext i1 %"89" to i32
- %"234" = add i32 0, %34
+ %36 = zext i1 %"89" to i32
+ %"234" = add i32 0, %36
store i32 %"234", ptr addrspace(5) %"20", align 4
%"90" = load i1, ptr addrspace(5) %"22", align 1
%"36" = xor i1 %"90", true
- %35 = zext i1 %"36" to i32
- %"235" = sub i32 0, %35
+ %37 = zext i1 %"36" to i32
+ %"235" = sub i32 0, %37
store i32 %"235", ptr addrspace(5) %"21", align 4
%"92" = load i64, ptr addrspace(5) %"5", align 8
%"93" = load i32, ptr addrspace(5) %"6", align 4
@@ -172,78 +174,78 @@ define protected amdgpu_kernel void @carry_set_all(ptr addrspace(4) byref(i64) %
%"94" = load i64, ptr addrspace(5) %"5", align 8
%"95" = load i32, ptr addrspace(5) %"8", align 4
%"238" = inttoptr i64 %"94" to ptr
- %"270" = getelementptr inbounds i8, ptr %"238", i64 4
- store i32 %"95", ptr %"270", align 4
+ %"269" = getelementptr inbounds i8, ptr %"238", i64 4
+ store i32 %"95", ptr %"269", align 4
%"96" = load i64, ptr addrspace(5) %"5", align 8
%"97" = load i32, ptr addrspace(5) %"10", align 4
%"240" = inttoptr i64 %"96" to ptr
- %"272" = getelementptr inbounds i8, ptr %"240", i64 8
- store i32 %"97", ptr %"272", align 4
+ %"271" = getelementptr inbounds i8, ptr %"240", i64 8
+ store i32 %"97", ptr %"271", align 4
%"98" = load i64, ptr addrspace(5) %"5", align 8
%"99" = load i32, ptr addrspace(5) %"12", align 4
%"242" = inttoptr i64 %"98" to ptr
- %"274" = getelementptr inbounds i8, ptr %"242", i64 12
- store i32 %"99", ptr %"274", align 4
+ %"273" = getelementptr inbounds i8, ptr %"242", i64 12
+ store i32 %"99", ptr %"273", align 4
%"100" = load i64, ptr addrspace(5) %"5", align 8
%"101" = load i32, ptr addrspace(5) %"14", align 4
%"244" = inttoptr i64 %"100" to ptr
- %"276" = getelementptr inbounds i8, ptr %"244", i64 16
- store i32 %"101", ptr %"276", align 4
+ %"275" = getelementptr inbounds i8, ptr %"244", i64 16
+ store i32 %"101", ptr %"275", align 4
%"102" = load i64, ptr addrspace(5) %"5", align 8
%"103" = load i32, ptr addrspace(5) %"16", align 4
%"246" = inttoptr i64 %"102" to ptr
- %"278" = getelementptr inbounds i8, ptr %"246", i64 20
- store i32 %"103", ptr %"278", align 4
+ %"277" = getelementptr inbounds i8, ptr %"246", i64 20
+ store i32 %"103", ptr %"277", align 4
%"104" = load i64, ptr addrspace(5) %"5", align 8
%"105" = load i32, ptr addrspace(5) %"18", align 4
%"248" = inttoptr i64 %"104" to ptr
- %"280" = getelementptr inbounds i8, ptr %"248", i64 24
- store i32 %"105", ptr %"280", align 4
+ %"279" = getelementptr inbounds i8, ptr %"248", i64 24
+ store i32 %"105", ptr %"279", align 4
%"106" = load i64, ptr addrspace(5) %"5", align 8
%"107" = load i32, ptr addrspace(5) %"20", align 4
%"250" = inttoptr i64 %"106" to ptr
- %"282" = getelementptr inbounds i8, ptr %"250", i64 28
- store i32 %"107", ptr %"282", align 4
+ %"281" = getelementptr inbounds i8, ptr %"250", i64 28
+ store i32 %"107", ptr %"281", align 4
%"108" = load i64, ptr addrspace(5) %"5", align 8
%"109" = load i32, ptr addrspace(5) %"7", align 4
%"252" = inttoptr i64 %"108" to ptr
- %"284" = getelementptr inbounds i8, ptr %"252", i64 32
- store i32 %"109", ptr %"284", align 4
+ %"283" = getelementptr inbounds i8, ptr %"252", i64 32
+ store i32 %"109", ptr %"283", align 4
%"110" = load i64, ptr addrspace(5) %"5", align 8
%"111" = load i32, ptr addrspace(5) %"9", align 4
%"254" = inttoptr i64 %"110" to ptr
- %"286" = getelementptr inbounds i8, ptr %"254", i64 36
- store i32 %"111", ptr %"286", align 4
+ %"285" = getelementptr inbounds i8, ptr %"254", i64 36
+ store i32 %"111", ptr %"285", align 4
%"112" = load i64, ptr addrspace(5) %"5", align 8
%"113" = load i32, ptr addrspace(5) %"11", align 4
%"256" = inttoptr i64 %"112" to ptr
- %"288" = getelementptr inbounds i8, ptr %"256", i64 40
- store i32 %"113", ptr %"288", align 4
+ %"287" = getelementptr inbounds i8, ptr %"256", i64 40
+ store i32 %"113", ptr %"287", align 4
%"114" = load i64, ptr addrspace(5) %"5", align 8
%"115" = load i32, ptr addrspace(5) %"13", align 4
%"258" = inttoptr i64 %"114" to ptr
- %"290" = getelementptr inbounds i8, ptr %"258", i64 44
- store i32 %"115", ptr %"290", align 4
+ %"289" = getelementptr inbounds i8, ptr %"258", i64 44
+ store i32 %"115", ptr %"289", align 4
%"116" = load i64, ptr addrspace(5) %"5", align 8
%"117" = load i32, ptr addrspace(5) %"15", align 4
%"260" = inttoptr i64 %"116" to ptr
- %"292" = getelementptr inbounds i8, ptr %"260", i64 48
- store i32 %"117", ptr %"292", align 4
+ %"291" = getelementptr inbounds i8, ptr %"260", i64 48
+ store i32 %"117", ptr %"291", align 4
%"118" = load i64, ptr addrspace(5) %"5", align 8
%"119" = load i32, ptr addrspace(5) %"17", align 4
%"262" = inttoptr i64 %"118" to ptr
- %"294" = getelementptr inbounds i8, ptr %"262", i64 52
- store i32 %"119", ptr %"294", align 4
+ %"293" = getelementptr inbounds i8, ptr %"262", i64 52
+ store i32 %"119", ptr %"293", align 4
%"120" = load i64, ptr addrspace(5) %"5", align 8
%"121" = load i32, ptr addrspace(5) %"19", align 4
%"264" = inttoptr i64 %"120" to ptr
- %"296" = getelementptr inbounds i8, ptr %"264", i64 56
- store i32 %"121", ptr %"296", align 4
+ %"295" = getelementptr inbounds i8, ptr %"264", i64 56
+ store i32 %"121", ptr %"295", align 4
%"122" = load i64, ptr addrspace(5) %"5", align 8
%"123" = load i32, ptr addrspace(5) %"21", align 4
%"266" = inttoptr i64 %"122" to ptr
- %"298" = getelementptr inbounds i8, ptr %"266", i64 60
- store i32 %"123", ptr %"298", align 4
+ %"297" = getelementptr inbounds i8, ptr %"266", i64 60
+ store i32 %"123", ptr %"297", align 4
ret void
}
diff --git a/ptx/src/test/spirv_run/clz.ll b/ptx/src/test/spirv_run/clz.ll
index 31f408d..5a93145 100644
--- a/ptx/src/test/spirv_run/clz.ll
+++ b/ptx/src/test/spirv_run/clz.ll
@@ -2,12 +2,14 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @clz(ptr addrspace(4) byref(i64) %"16", ptr addrspace(4) byref(i64) %"17") #0 {
-"20":
%"7" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"7", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i32, align 4, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"7", align 1
%"8" = load i64, ptr addrspace(4) %"16", align 8
store i64 %"8", ptr addrspace(5) %"4", align 8
%"9" = load i64, ptr addrspace(4) %"17", align 8
@@ -17,8 +19,8 @@ define protected amdgpu_kernel void @clz(ptr addrspace(4) byref(i64) %"16", ptr
%"10" = load i32, ptr %"18", align 4
store i32 %"10", ptr addrspace(5) %"6", align 4
%"13" = load i32, ptr addrspace(5) %"6", align 4
- %0 = call i32 @llvm.ctlz.i32(i32 %"13", i1 false)
- store i32 %0, ptr addrspace(5) %"6", align 4
+ %2 = call i32 @llvm.ctlz.i32(i32 %"13", i1 false)
+ store i32 %2, ptr addrspace(5) %"6", align 4
%"14" = load i64, ptr addrspace(5) %"5", align 8
%"15" = load i32, ptr addrspace(5) %"6", align 4
%"19" = inttoptr i64 %"14" to ptr
diff --git a/ptx/src/test/spirv_run/const.ll b/ptx/src/test/spirv_run/const.ll
index 80fcc07..df0de94 100644
--- a/ptx/src/test/spirv_run/const.ll
+++ b/ptx/src/test/spirv_run/const.ll
@@ -4,15 +4,17 @@ target triple = "amdgcn-amd-amdhsa"
@constparams = protected addrspace(4) externally_initialized global [4 x i16] [i16 10, i16 20, i16 30, i16 40], align 8
define protected amdgpu_kernel void @const(ptr addrspace(4) byref(i64) %"38", ptr addrspace(4) byref(i64) %"39") #0 {
-"52":
%"11" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"11", align 1
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i64, align 8, addrspace(5)
%"7" = alloca i16, align 2, addrspace(5)
%"8" = alloca i16, align 2, addrspace(5)
%"9" = alloca i16, align 2, addrspace(5)
%"10" = alloca i16, align 2, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"11", align 1
%"12" = load i64, ptr addrspace(4) %"38", align 8
store i64 %"12", ptr addrspace(5) %"5", align 8
%"13" = load i64, ptr addrspace(4) %"39", align 8
@@ -32,18 +34,18 @@ define protected amdgpu_kernel void @const(ptr addrspace(4) byref(i64) %"38", pt
%"20" = load i64, ptr addrspace(5) %"6", align 8
%"21" = load i16, ptr addrspace(5) %"8", align 2
%"46" = inttoptr i64 %"20" to ptr
- %"60" = getelementptr inbounds i8, ptr %"46", i64 2
- store i16 %"21", ptr %"60", align 2
+ %"59" = getelementptr inbounds i8, ptr %"46", i64 2
+ store i16 %"21", ptr %"59", align 2
%"22" = load i64, ptr addrspace(5) %"6", align 8
%"23" = load i16, ptr addrspace(5) %"9", align 2
%"48" = inttoptr i64 %"22" to ptr
- %"62" = getelementptr inbounds i8, ptr %"48", i64 4
- store i16 %"23", ptr %"62", align 2
+ %"61" = getelementptr inbounds i8, ptr %"48", i64 4
+ store i16 %"23", ptr %"61", align 2
%"24" = load i64, ptr addrspace(5) %"6", align 8
%"25" = load i16, ptr addrspace(5) %"10", align 2
%"50" = inttoptr i64 %"24" to ptr
- %"64" = getelementptr inbounds i8, ptr %"50", i64 6
- store i16 %"25", ptr %"64", align 2
+ %"63" = getelementptr inbounds i8, ptr %"50", i64 6
+ store i16 %"25", ptr %"63", align 2
ret void
}
diff --git a/ptx/src/test/spirv_run/constant_f32.ll b/ptx/src/test/spirv_run/constant_f32.ll
index e0309ea..a6558c9 100644
--- a/ptx/src/test/spirv_run/constant_f32.ll
+++ b/ptx/src/test/spirv_run/constant_f32.ll
@@ -2,12 +2,14 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @constant_f32(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 {
-"21":
%"7" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"7", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca float, align 4, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"7", align 1
%"8" = load i64, ptr addrspace(4) %"17", align 8
store i64 %"8", ptr addrspace(5) %"4", align 8
%"9" = load i64, ptr addrspace(4) %"18", align 8
diff --git a/ptx/src/test/spirv_run/constant_negative.ll b/ptx/src/test/spirv_run/constant_negative.ll
index 337689f..c3e7e86 100644
--- a/ptx/src/test/spirv_run/constant_negative.ll
+++ b/ptx/src/test/spirv_run/constant_negative.ll
@@ -2,12 +2,14 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @constant_negative(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 {
-"21":
%"7" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"7", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i32, align 4, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"7", align 1
%"8" = load i64, ptr addrspace(4) %"17", align 8
store i64 %"8", ptr addrspace(5) %"4", align 8
%"9" = load i64, ptr addrspace(4) %"18", align 8
diff --git a/ptx/src/test/spirv_run/cos.ll b/ptx/src/test/spirv_run/cos.ll
index d385e1f..da48297 100644
--- a/ptx/src/test/spirv_run/cos.ll
+++ b/ptx/src/test/spirv_run/cos.ll
@@ -2,12 +2,14 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @cos(ptr addrspace(4) byref(i64) %"16", ptr addrspace(4) byref(i64) %"17") #0 {
-"20":
%"7" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"7", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca float, align 4, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"7", align 1
%"8" = load i64, ptr addrspace(4) %"16", align 8
store i64 %"8", ptr addrspace(5) %"4", align 8
%"9" = load i64, ptr addrspace(4) %"17", align 8
diff --git a/ptx/src/test/spirv_run/cvt_clamp.ll b/ptx/src/test/spirv_run/cvt_clamp.ll
index f2be477..b610ca9 100644
--- a/ptx/src/test/spirv_run/cvt_clamp.ll
+++ b/ptx/src/test/spirv_run/cvt_clamp.ll
@@ -4,12 +4,14 @@ target triple = "amdgcn-amd-amdhsa"
declare float @__zluda_ptx_impl__cvt_sat_f32_f32(float) #0
define protected amdgpu_kernel void @cvt_clamp(ptr addrspace(4) byref(i64) %"46", ptr addrspace(4) byref(i64) %"47") #1 {
-"56":
%"7" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"7", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca float, align 4, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"7", align 1
%"8" = load i64, ptr addrspace(4) %"46", align 8
store i64 %"8", ptr addrspace(5) %"4", align 8
%"9" = load i64, ptr addrspace(4) %"47", align 8
@@ -27,8 +29,8 @@ define protected amdgpu_kernel void @cvt_clamp(ptr addrspace(4) byref(i64) %"46"
store float %"15", ptr addrspace(1) %"49", align 4
%"17" = load i64, ptr addrspace(5) %"4", align 8
%"50" = inttoptr i64 %"17" to ptr addrspace(1)
- %"61" = getelementptr inbounds i8, ptr addrspace(1) %"50", i64 4
- %"16" = load float, ptr addrspace(1) %"61", align 4
+ %"60" = getelementptr inbounds i8, ptr addrspace(1) %"50", i64 4
+ %"16" = load float, ptr addrspace(1) %"60", align 4
store float %"16", ptr addrspace(5) %"6", align 4
%"19" = load float, ptr addrspace(5) %"6", align 4
%"18" = call float @__zluda_ptx_impl__cvt_sat_f32_f32(float %"19")
@@ -36,12 +38,12 @@ define protected amdgpu_kernel void @cvt_clamp(ptr addrspace(4) byref(i64) %"46"
%"20" = load i64, ptr addrspace(5) %"5", align 8
%"21" = load float, ptr addrspace(5) %"6", align 4
%"51" = inttoptr i64 %"20" to ptr addrspace(1)
- %"63" = getelementptr inbounds i8, ptr addrspace(1) %"51", i64 4
- store float %"21", ptr addrspace(1) %"63", align 4
+ %"62" = getelementptr inbounds i8, ptr addrspace(1) %"51", i64 4
+ store float %"21", ptr addrspace(1) %"62", align 4
%"23" = load i64, ptr addrspace(5) %"4", align 8
%"52" = inttoptr i64 %"23" to ptr addrspace(1)
- %"65" = getelementptr inbounds i8, ptr addrspace(1) %"52", i64 8
- %"22" = load float, ptr addrspace(1) %"65", align 4
+ %"64" = getelementptr inbounds i8, ptr addrspace(1) %"52", i64 8
+ %"22" = load float, ptr addrspace(1) %"64", align 4
store float %"22", ptr addrspace(5) %"6", align 4
%"25" = load float, ptr addrspace(5) %"6", align 4
%"24" = call float @__zluda_ptx_impl__cvt_sat_f32_f32(float %"25")
@@ -49,12 +51,12 @@ define protected amdgpu_kernel void @cvt_clamp(ptr addrspace(4) byref(i64) %"46"
%"26" = load i64, ptr addrspace(5) %"5", align 8
%"27" = load float, ptr addrspace(5) %"6", align 4
%"53" = inttoptr i64 %"26" to ptr addrspace(1)
- %"67" = getelementptr inbounds i8, ptr addrspace(1) %"53", i64 8
- store float %"27", ptr addrspace(1) %"67", align 4
+ %"66" = getelementptr inbounds i8, ptr addrspace(1) %"53", i64 8
+ store float %"27", ptr addrspace(1) %"66", align 4
%"29" = load i64, ptr addrspace(5) %"4", align 8
%"54" = inttoptr i64 %"29" to ptr addrspace(1)
- %"69" = getelementptr inbounds i8, ptr addrspace(1) %"54", i64 12
- %"28" = load float, ptr addrspace(1) %"69", align 4
+ %"68" = getelementptr inbounds i8, ptr addrspace(1) %"54", i64 12
+ %"28" = load float, ptr addrspace(1) %"68", align 4
store float %"28", ptr addrspace(5) %"6", align 4
%"31" = load float, ptr addrspace(5) %"6", align 4
%"30" = call float @__zluda_ptx_impl__cvt_sat_f32_f32(float %"31")
@@ -62,8 +64,8 @@ define protected amdgpu_kernel void @cvt_clamp(ptr addrspace(4) byref(i64) %"46"
%"32" = load i64, ptr addrspace(5) %"5", align 8
%"33" = load float, ptr addrspace(5) %"6", align 4
%"55" = inttoptr i64 %"32" to ptr addrspace(1)
- %"71" = getelementptr inbounds i8, ptr addrspace(1) %"55", i64 12
- store float %"33", ptr addrspace(1) %"71", align 4
+ %"70" = getelementptr inbounds i8, ptr addrspace(1) %"55", i64 12
+ store float %"33", ptr addrspace(1) %"70", align 4
ret void
}
diff --git a/ptx/src/test/spirv_run/cvt_f32_f16.ll b/ptx/src/test/spirv_run/cvt_f32_f16.ll
index e3acdb6..7379876 100644
--- a/ptx/src/test/spirv_run/cvt_f32_f16.ll
+++ b/ptx/src/test/spirv_run/cvt_f32_f16.ll
@@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @cvt_f32_f16(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 {
-"22":
%"8" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"8", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca half, align 2, addrspace(5)
%"7" = alloca float, align 4, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"8", align 1
%"9" = load i64, ptr addrspace(4) %"17", align 8
store i64 %"9", ptr addrspace(5) %"4", align 8
%"10" = load i64, ptr addrspace(4) %"18", align 8
diff --git a/ptx/src/test/spirv_run/cvt_f32_s32.ll b/ptx/src/test/spirv_run/cvt_f32_s32.ll
index 65b00ce..90b0e4a 100644
--- a/ptx/src/test/spirv_run/cvt_f32_s32.ll
+++ b/ptx/src/test/spirv_run/cvt_f32_s32.ll
@@ -10,15 +10,17 @@ declare float @__zluda_ptx_impl__cvt_rp_f32_s32(i32) #0
declare float @__zluda_ptx_impl__cvt_rz_f32_s32(i32) #0
define protected amdgpu_kernel void @cvt_f32_s32(ptr addrspace(4) byref(i64) %"49", ptr addrspace(4) byref(i64) %"50") #1 {
-"75":
%"10" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"10", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i32, align 4, addrspace(5)
%"7" = alloca i32, align 4, addrspace(5)
%"8" = alloca i32, align 4, addrspace(5)
%"9" = alloca i32, align 4, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"10", align 1
%"11" = load i64, ptr addrspace(4) %"49", align 8
store i64 %"11", ptr addrspace(5) %"4", align 8
%"12" = load i64, ptr addrspace(4) %"50", align 8
@@ -29,18 +31,18 @@ define protected amdgpu_kernel void @cvt_f32_s32(ptr addrspace(4) byref(i64) %"4
store i32 %"51", ptr addrspace(5) %"6", align 4
%"16" = load i64, ptr addrspace(5) %"4", align 8
%"53" = inttoptr i64 %"16" to ptr
- %"89" = getelementptr inbounds i8, ptr %"53", i64 4
- %"54" = load i32, ptr %"89", align 4
+ %"88" = getelementptr inbounds i8, ptr %"53", i64 4
+ %"54" = load i32, ptr %"88", align 4
store i32 %"54", ptr addrspace(5) %"7", align 4
%"18" = load i64, ptr addrspace(5) %"4", align 8
%"55" = inttoptr i64 %"18" to ptr
- %"91" = getelementptr inbounds i8, ptr %"55", i64 8
- %"56" = load i32, ptr %"91", align 4
+ %"90" = getelementptr inbounds i8, ptr %"55", i64 8
+ %"56" = load i32, ptr %"90", align 4
store i32 %"56", ptr addrspace(5) %"8", align 4
%"20" = load i64, ptr addrspace(5) %"4", align 8
%"57" = inttoptr i64 %"20" to ptr
- %"93" = getelementptr inbounds i8, ptr %"57", i64 12
- %"58" = load i32, ptr %"93", align 4
+ %"92" = getelementptr inbounds i8, ptr %"57", i64 12
+ %"58" = load i32, ptr %"92", align 4
store i32 %"58", ptr addrspace(5) %"9", align 4
%"22" = load i32, ptr addrspace(5) %"6", align 4
%"59" = call float @__zluda_ptx_impl__cvt_rn_f32_s32(i32 %"22")
@@ -66,21 +68,21 @@ define protected amdgpu_kernel void @cvt_f32_s32(ptr addrspace(4) byref(i64) %"4
%"31" = load i64, ptr addrspace(5) %"5", align 8
%"32" = load i32, ptr addrspace(5) %"7", align 4
%"69" = inttoptr i64 %"31" to ptr addrspace(1)
- %"95" = getelementptr inbounds i8, ptr addrspace(1) %"69", i64 4
+ %"94" = getelementptr inbounds i8, ptr addrspace(1) %"69", i64 4
%"70" = bitcast i32 %"32" to float
- store float %"70", ptr addrspace(1) %"95", align 4
+ store float %"70", ptr addrspace(1) %"94", align 4
%"33" = load i64, ptr addrspace(5) %"5", align 8
%"34" = load i32, ptr addrspace(5) %"8", align 4
%"71" = inttoptr i64 %"33" to ptr addrspace(1)
- %"97" = getelementptr inbounds i8, ptr addrspace(1) %"71", i64 8
+ %"96" = getelementptr inbounds i8, ptr addrspace(1) %"71", i64 8
%"72" = bitcast i32 %"34" to float
- store float %"72", ptr addrspace(1) %"97", align 4
+ store float %"72", ptr addrspace(1) %"96", align 4
%"35" = load i64, ptr addrspace(5) %"5", align 8
%"36" = load i32, ptr addrspace(5) %"9", align 4
%"73" = inttoptr i64 %"35" to ptr addrspace(1)
- %"99" = getelementptr inbounds i8, ptr addrspace(1) %"73", i64 12
+ %"98" = getelementptr inbounds i8, ptr addrspace(1) %"73", i64 12
%"74" = bitcast i32 %"36" to float
- store float %"74", ptr addrspace(1) %"99", align 4
+ store float %"74", ptr addrspace(1) %"98", align 4
ret void
}
diff --git a/ptx/src/test/spirv_run/cvt_f64_f32.ll b/ptx/src/test/spirv_run/cvt_f64_f32.ll
index 96267f4..64b4bb8 100644
--- a/ptx/src/test/spirv_run/cvt_f64_f32.ll
+++ b/ptx/src/test/spirv_run/cvt_f64_f32.ll
@@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @cvt_f64_f32(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 {
-"21":
%"8" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"8", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca float, align 4, addrspace(5)
%"7" = alloca double, align 8, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"8", align 1
%"9" = load i64, ptr addrspace(4) %"17", align 8
store i64 %"9", ptr addrspace(5) %"4", align 8
%"10" = load i64, ptr addrspace(4) %"18", align 8
diff --git a/ptx/src/test/spirv_run/cvt_rni.ll b/ptx/src/test/spirv_run/cvt_rni.ll
index 5eb6eaa..77d2999 100644
--- a/ptx/src/test/spirv_run/cvt_rni.ll
+++ b/ptx/src/test/spirv_run/cvt_rni.ll
@@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @cvt_rni(ptr addrspace(4) byref(i64) %"27", ptr addrspace(4) byref(i64) %"28") #0 {
-"33":
%"8" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"8", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca float, align 4, addrspace(5)
%"7" = alloca float, align 4, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"8", align 1
%"9" = load i64, ptr addrspace(4) %"27", align 8
store i64 %"9", ptr addrspace(5) %"4", align 8
%"10" = load i64, ptr addrspace(4) %"28", align 8
@@ -19,8 +21,8 @@ define protected amdgpu_kernel void @cvt_rni(ptr addrspace(4) byref(i64) %"27",
store float %"11", ptr addrspace(5) %"6", align 4
%"14" = load i64, ptr addrspace(5) %"4", align 8
%"30" = inttoptr i64 %"14" to ptr
- %"35" = getelementptr inbounds i8, ptr %"30", i64 4
- %"13" = load float, ptr %"35", align 4
+ %"34" = getelementptr inbounds i8, ptr %"30", i64 4
+ %"13" = load float, ptr %"34", align 4
store float %"13", ptr addrspace(5) %"7", align 4
%"16" = load float, ptr addrspace(5) %"6", align 4
%"15" = call float @llvm.rint.f32(float %"16")
@@ -35,8 +37,8 @@ define protected amdgpu_kernel void @cvt_rni(ptr addrspace(4) byref(i64) %"27",
%"21" = load i64, ptr addrspace(5) %"5", align 8
%"22" = load float, ptr addrspace(5) %"7", align 4
%"32" = inttoptr i64 %"21" to ptr
- %"37" = getelementptr inbounds i8, ptr %"32", i64 4
- store float %"22", ptr %"37", align 4
+ %"36" = getelementptr inbounds i8, ptr %"32", i64 4
+ store float %"22", ptr %"36", align 4
ret void
}
diff --git a/ptx/src/test/spirv_run/cvt_rzi.ll b/ptx/src/test/spirv_run/cvt_rzi.ll
index 83783d8..e651db5 100644
--- a/ptx/src/test/spirv_run/cvt_rzi.ll
+++ b/ptx/src/test/spirv_run/cvt_rzi.ll
@@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @cvt_rzi(ptr addrspace(4) byref(i64) %"27", ptr addrspace(4) byref(i64) %"28") #0 {
-"33":
%"8" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"8", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca float, align 4, addrspace(5)
%"7" = alloca float, align 4, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"8", align 1
%"9" = load i64, ptr addrspace(4) %"27", align 8
store i64 %"9", ptr addrspace(5) %"4", align 8
%"10" = load i64, ptr addrspace(4) %"28", align 8
@@ -19,8 +21,8 @@ define protected amdgpu_kernel void @cvt_rzi(ptr addrspace(4) byref(i64) %"27",
store float %"11", ptr addrspace(5) %"6", align 4
%"14" = load i64, ptr addrspace(5) %"4", align 8
%"30" = inttoptr i64 %"14" to ptr
- %"35" = getelementptr inbounds i8, ptr %"30", i64 4
- %"13" = load float, ptr %"35", align 4
+ %"34" = getelementptr inbounds i8, ptr %"30", i64 4
+ %"13" = load float, ptr %"34", align 4
store float %"13", ptr addrspace(5) %"7", align 4
%"16" = load float, ptr addrspace(5) %"6", align 4
%"15" = call float @llvm.trunc.f32(float %"16")
@@ -35,8 +37,8 @@ define protected amdgpu_kernel void @cvt_rzi(ptr addrspace(4) byref(i64) %"27",
%"21" = load i64, ptr addrspace(5) %"5", align 8
%"22" = load float, ptr addrspace(5) %"7", align 4
%"32" = inttoptr i64 %"21" to ptr
- %"37" = getelementptr inbounds i8, ptr %"32", i64 4
- store float %"22", ptr %"37", align 4
+ %"36" = getelementptr inbounds i8, ptr %"32", i64 4
+ store float %"22", ptr %"36", align 4
ret void
}
diff --git a/ptx/src/test/spirv_run/cvt_s16_s8.ll b/ptx/src/test/spirv_run/cvt_s16_s8.ll
index 841178e..6f49cea 100644
--- a/ptx/src/test/spirv_run/cvt_s16_s8.ll
+++ b/ptx/src/test/spirv_run/cvt_s16_s8.ll
@@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @cvt_s16_s8(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 {
-"23":
%"8" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"8", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i32, align 4, addrspace(5)
%"7" = alloca i32, align 4, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"8", align 1
%"9" = load i64, ptr addrspace(4) %"17", align 8
store i64 %"9", ptr addrspace(5) %"4", align 8
%"10" = load i64, ptr addrspace(4) %"18", align 8
@@ -18,8 +20,8 @@ define protected amdgpu_kernel void @cvt_s16_s8(ptr addrspace(4) byref(i64) %"17
%"11" = load i32, ptr addrspace(1) %"19", align 4
store i32 %"11", ptr addrspace(5) %"7", align 4
%"14" = load i32, ptr addrspace(5) %"7", align 4
- %"25" = trunc i32 %"14" to i8
- %"20" = sext i8 %"25" to i16
+ %"24" = trunc i32 %"14" to i8
+ %"20" = sext i8 %"24" to i16
%"13" = sext i16 %"20" to i32
store i32 %"13", ptr addrspace(5) %"6", align 4
%"15" = load i64, ptr addrspace(5) %"5", align 8
diff --git a/ptx/src/test/spirv_run/cvt_s32_f32.ll b/ptx/src/test/spirv_run/cvt_s32_f32.ll
index bd1b9e3..e8b8bc1 100644
--- a/ptx/src/test/spirv_run/cvt_s32_f32.ll
+++ b/ptx/src/test/spirv_run/cvt_s32_f32.ll
@@ -4,13 +4,15 @@ target triple = "amdgcn-amd-amdhsa"
declare i32 @__zluda_ptx_impl__cvt_rp_s32_f32(float) #0
define protected amdgpu_kernel void @cvt_s32_f32(ptr addrspace(4) byref(i64) %"27", ptr addrspace(4) byref(i64) %"28") #1 {
-"41":
%"8" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"8", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i32, align 4, addrspace(5)
%"7" = alloca i32, align 4, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"8", align 1
%"9" = load i64, ptr addrspace(4) %"27", align 8
store i64 %"9", ptr addrspace(5) %"4", align 8
%"10" = load i64, ptr addrspace(4) %"28", align 8
@@ -22,8 +24,8 @@ define protected amdgpu_kernel void @cvt_s32_f32(ptr addrspace(4) byref(i64) %"2
store i32 %"11", ptr addrspace(5) %"6", align 4
%"14" = load i64, ptr addrspace(5) %"4", align 8
%"31" = inttoptr i64 %"14" to ptr
- %"46" = getelementptr inbounds i8, ptr %"31", i64 4
- %"32" = load float, ptr %"46", align 4
+ %"45" = getelementptr inbounds i8, ptr %"31", i64 4
+ %"32" = load float, ptr %"45", align 4
%"13" = bitcast float %"32" to i32
store i32 %"13", ptr addrspace(5) %"7", align 4
%"16" = load i32, ptr addrspace(5) %"6", align 4
@@ -41,8 +43,8 @@ define protected amdgpu_kernel void @cvt_s32_f32(ptr addrspace(4) byref(i64) %"2
%"21" = load i64, ptr addrspace(5) %"5", align 8
%"22" = load i32, ptr addrspace(5) %"7", align 4
%"39" = inttoptr i64 %"21" to ptr addrspace(1)
- %"48" = getelementptr inbounds i8, ptr addrspace(1) %"39", i64 4
- store i32 %"22", ptr addrspace(1) %"48", align 4
+ %"47" = getelementptr inbounds i8, ptr addrspace(1) %"39", i64 4
+ store i32 %"22", ptr addrspace(1) %"47", align 4
ret void
}
diff --git a/ptx/src/test/spirv_run/cvt_s64_s32.ll b/ptx/src/test/spirv_run/cvt_s64_s32.ll
index 4958266..799b90a 100644
--- a/ptx/src/test/spirv_run/cvt_s64_s32.ll
+++ b/ptx/src/test/spirv_run/cvt_s64_s32.ll
@@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @cvt_s64_s32(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 {
-"23":
%"8" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"8", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i32, align 4, addrspace(5)
%"7" = alloca i64, align 8, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"8", align 1
%"9" = load i64, ptr addrspace(4) %"17", align 8
store i64 %"9", ptr addrspace(5) %"4", align 8
%"10" = load i64, ptr addrspace(4) %"18", align 8
diff --git a/ptx/src/test/spirv_run/cvt_sat_s_u.ll b/ptx/src/test/spirv_run/cvt_sat_s_u.ll
index 3af6ef5..5e8d015 100644
--- a/ptx/src/test/spirv_run/cvt_sat_s_u.ll
+++ b/ptx/src/test/spirv_run/cvt_sat_s_u.ll
@@ -2,14 +2,19 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @cvt_sat_s_u(ptr addrspace(4) byref(i64) %"26", ptr addrspace(4) byref(i64) %"27") #0 {
-"34":
%"9" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"9", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i32, align 4, addrspace(5)
%"7" = alloca i32, align 4, addrspace(5)
%"8" = alloca i32, align 4, addrspace(5)
+ %1 = alloca i32, align 4, addrspace(5)
+ %2 = alloca i32, align 4, addrspace(5)
+ %3 = alloca i32, align 4, addrspace(5)
+ br label %4
+
+4: ; preds = %0
+ store i1 false, ptr addrspace(5) %"9", align 1
%"10" = load i64, ptr addrspace(4) %"26", align 8
store i64 %"10", ptr addrspace(5) %"4", align 8
%"11" = load i64, ptr addrspace(4) %"27", align 8
@@ -19,18 +24,15 @@ define protected amdgpu_kernel void @cvt_sat_s_u(ptr addrspace(4) byref(i64) %"2
%"12" = load i32, ptr %"28", align 4
store i32 %"12", ptr addrspace(5) %"6", align 4
%"15" = load i32, ptr addrspace(5) %"6", align 4
- %0 = call i32 @llvm.smax.i32(i32 %"15", i32 0)
- %1 = alloca i32, align 4, addrspace(5)
- store i32 %0, ptr addrspace(5) %1, align 4
+ %5 = call i32 @llvm.smax.i32(i32 %"15", i32 0)
+ store i32 %5, ptr addrspace(5) %1, align 4
%"14" = load i32, ptr addrspace(5) %1, align 4
store i32 %"14", ptr addrspace(5) %"7", align 4
%"17" = load i32, ptr addrspace(5) %"7", align 4
- %2 = alloca i32, align 4, addrspace(5)
store i32 %"17", ptr addrspace(5) %2, align 4
%"29" = load i32, ptr addrspace(5) %2, align 4
store i32 %"29", ptr addrspace(5) %"7", align 4
%"19" = load i32, ptr addrspace(5) %"6", align 4
- %3 = alloca i32, align 4, addrspace(5)
store i32 %"19", ptr addrspace(5) %3, align 4
%"30" = load i32, ptr addrspace(5) %3, align 4
store i32 %"30", ptr addrspace(5) %"8", align 4
@@ -41,8 +43,8 @@ define protected amdgpu_kernel void @cvt_sat_s_u(ptr addrspace(4) byref(i64) %"2
%"22" = load i64, ptr addrspace(5) %"5", align 8
%"23" = load i32, ptr addrspace(5) %"8", align 4
%"33" = inttoptr i64 %"22" to ptr
- %"36" = getelementptr inbounds i8, ptr %"33", i64 4
- store i32 %"23", ptr %"36", align 4
+ %"35" = getelementptr inbounds i8, ptr %"33", i64 4
+ store i32 %"23", ptr %"35", align 4
ret void
}
diff --git a/ptx/src/test/spirv_run/cvt_u32_s16.ll b/ptx/src/test/spirv_run/cvt_u32_s16.ll
index 141f83f..1b868a5 100644
--- a/ptx/src/test/spirv_run/cvt_u32_s16.ll
+++ b/ptx/src/test/spirv_run/cvt_u32_s16.ll
@@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @cvt_u32_s16(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 {
-"23":
%"8" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"8", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i16, align 2, addrspace(5)
%"7" = alloca i32, align 4, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"8", align 1
%"9" = load i64, ptr addrspace(4) %"17", align 8
store i64 %"9", ptr addrspace(5) %"4", align 8
%"10" = load i64, ptr addrspace(4) %"18", align 8
diff --git a/ptx/src/test/spirv_run/cvta.ll b/ptx/src/test/spirv_run/cvta.ll
index d5c0f73..7b73f8c 100644
--- a/ptx/src/test/spirv_run/cvta.ll
+++ b/ptx/src/test/spirv_run/cvta.ll
@@ -2,25 +2,27 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @cvta(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #0 {
-"26":
%"7" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"7", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca float, align 4, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"7", align 1
%"8" = load i64, ptr addrspace(4) %"18", align 8
store i64 %"8", ptr addrspace(5) %"4", align 8
%"9" = load i64, ptr addrspace(4) %"19", align 8
store i64 %"9", ptr addrspace(5) %"5", align 8
%"11" = load i64, ptr addrspace(5) %"4", align 8
- %0 = inttoptr i64 %"11" to ptr
- %1 = addrspacecast ptr %0 to ptr addrspace(1)
- %"20" = ptrtoint ptr addrspace(1) %1 to i64
+ %2 = inttoptr i64 %"11" to ptr
+ %3 = addrspacecast ptr %2 to ptr addrspace(1)
+ %"20" = ptrtoint ptr addrspace(1) %3 to i64
store i64 %"20", ptr addrspace(5) %"4", align 8
%"13" = load i64, ptr addrspace(5) %"5", align 8
- %2 = inttoptr i64 %"13" to ptr
- %3 = addrspacecast ptr %2 to ptr addrspace(1)
- %"22" = ptrtoint ptr addrspace(1) %3 to i64
+ %4 = inttoptr i64 %"13" to ptr
+ %5 = addrspacecast ptr %4 to ptr addrspace(1)
+ %"22" = ptrtoint ptr addrspace(1) %5 to i64
store i64 %"22", ptr addrspace(5) %"5", align 8
%"15" = load i64, ptr addrspace(5) %"4", align 8
%"24" = inttoptr i64 %"15" to ptr addrspace(1)
diff --git a/ptx/src/test/spirv_run/div_approx.ll b/ptx/src/test/spirv_run/div_approx.ll
index 833065e..d4b889f 100644
--- a/ptx/src/test/spirv_run/div_approx.ll
+++ b/ptx/src/test/spirv_run/div_approx.ll
@@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @div_approx(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #0 {
-"27":
%"8" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"8", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca float, align 4, addrspace(5)
%"7" = alloca float, align 4, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"8", align 1
%"9" = load i64, ptr addrspace(4) %"22", align 8
store i64 %"9", ptr addrspace(5) %"4", align 8
%"10" = load i64, ptr addrspace(4) %"23", align 8
@@ -19,8 +21,8 @@ define protected amdgpu_kernel void @div_approx(ptr addrspace(4) byref(i64) %"22
store float %"11", ptr addrspace(5) %"6", align 4
%"14" = load i64, ptr addrspace(5) %"4", align 8
%"25" = inttoptr i64 %"14" to ptr
- %"29" = getelementptr inbounds i8, ptr %"25", i64 4
- %"13" = load float, ptr %"29", align 4
+ %"28" = getelementptr inbounds i8, ptr %"25", i64 4
+ %"13" = load float, ptr %"28", align 4
store float %"13", ptr addrspace(5) %"7", align 4
%"16" = load float, ptr addrspace(5) %"6", align 4
%"17" = load float, ptr addrspace(5) %"7", align 4
diff --git a/ptx/src/test/spirv_run/dp4a.ll b/ptx/src/test/spirv_run/dp4a.ll
index 2ada6cb..97f4098 100644
--- a/ptx/src/test/spirv_run/dp4a.ll
+++ b/ptx/src/test/spirv_run/dp4a.ll
@@ -4,14 +4,16 @@ target triple = "amdgcn-amd-amdhsa"
declare i32 @__zluda_ptx_impl__dp4a_s32_s32(i32, i32, i32) #0
define protected amdgpu_kernel void @dp4a(ptr addrspace(4) byref(i64) %"28", ptr addrspace(4) byref(i64) %"29") #1 {
-"38":
%"9" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"9", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i32, align 4, addrspace(5)
%"7" = alloca i32, align 4, addrspace(5)
%"8" = alloca i32, align 4, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"9", align 1
%"10" = load i64, ptr addrspace(4) %"28", align 8
store i64 %"10", ptr addrspace(5) %"4", align 8
%"11" = load i64, ptr addrspace(4) %"29", align 8
@@ -22,13 +24,13 @@ define protected amdgpu_kernel void @dp4a(ptr addrspace(4) byref(i64) %"28", ptr
store i32 %"12", ptr addrspace(5) %"6", align 4
%"15" = load i64, ptr addrspace(5) %"4", align 8
%"31" = inttoptr i64 %"15" to ptr
- %"45" = getelementptr inbounds i8, ptr %"31", i64 4
- %"14" = load i32, ptr %"45", align 4
+ %"44" = getelementptr inbounds i8, ptr %"31", i64 4
+ %"14" = load i32, ptr %"44", align 4
store i32 %"14", ptr addrspace(5) %"7", align 4
%"17" = load i64, ptr addrspace(5) %"4", align 8
%"32" = inttoptr i64 %"17" to ptr
- %"47" = getelementptr inbounds i8, ptr %"32", i64 8
- %"16" = load i32, ptr %"47", align 4
+ %"46" = getelementptr inbounds i8, ptr %"32", i64 8
+ %"16" = load i32, ptr %"46", align 4
store i32 %"16", ptr addrspace(5) %"8", align 4
%"19" = load i32, ptr addrspace(5) %"6", align 4
%"20" = load i32, ptr addrspace(5) %"7", align 4
diff --git a/ptx/src/test/spirv_run/ex2.ll b/ptx/src/test/spirv_run/ex2.ll
index b5e671e..aa0c1d5 100644
--- a/ptx/src/test/spirv_run/ex2.ll
+++ b/ptx/src/test/spirv_run/ex2.ll
@@ -2,12 +2,14 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @ex2(ptr addrspace(4) byref(i64) %"46", ptr addrspace(4) byref(i64) %"47") #0 {
-"56":
%"7" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"7", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca float, align 4, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"7", align 1
%"8" = load i64, ptr addrspace(4) %"46", align 8
store i64 %"8", ptr addrspace(5) %"4", align 8
%"9" = load i64, ptr addrspace(4) %"47", align 8
@@ -25,8 +27,8 @@ define protected amdgpu_kernel void @ex2(ptr addrspace(4) byref(i64) %"46", ptr
store float %"15", ptr %"49", align 4
%"17" = load i64, ptr addrspace(5) %"4", align 8
%"50" = inttoptr i64 %"17" to ptr
- %"58" = getelementptr inbounds i8, ptr %"50", i64 4
- %"16" = load float, ptr %"58", align 4
+ %"57" = getelementptr inbounds i8, ptr %"50", i64 4
+ %"16" = load float, ptr %"57", align 4
store float %"16", ptr addrspace(5) %"6", align 4
%"19" = load float, ptr addrspace(5) %"6", align 4
%"18" = call afn float @llvm.exp2.f32(float %"19")
@@ -34,12 +36,12 @@ define protected amdgpu_kernel void @ex2(ptr addrspace(4) byref(i64) %"46", ptr
%"20" = load i64, ptr addrspace(5) %"5", align 8
%"21" = load float, ptr addrspace(5) %"6", align 4
%"51" = inttoptr i64 %"20" to ptr
- %"60" = getelementptr inbounds i8, ptr %"51", i64 4
- store float %"21", ptr %"60", align 4
+ %"59" = getelementptr inbounds i8, ptr %"51", i64 4
+ store float %"21", ptr %"59", align 4
%"23" = load i64, ptr addrspace(5) %"4", align 8
%"52" = inttoptr i64 %"23" to ptr
- %"62" = getelementptr inbounds i8, ptr %"52", i64 8
- %"22" = load float, ptr %"62", align 4
+ %"61" = getelementptr inbounds i8, ptr %"52", i64 8
+ %"22" = load float, ptr %"61", align 4
store float %"22", ptr addrspace(5) %"6", align 4
%"25" = load float, ptr addrspace(5) %"6", align 4
%"24" = call afn float @llvm.exp2.f32(float %"25")
@@ -47,12 +49,12 @@ define protected amdgpu_kernel void @ex2(ptr addrspace(4) byref(i64) %"46", ptr
%"26" = load i64, ptr addrspace(5) %"5", align 8
%"27" = load float, ptr addrspace(5) %"6", align 4
%"53" = inttoptr i64 %"26" to ptr
- %"64" = getelementptr inbounds i8, ptr %"53", i64 8
- store float %"27", ptr %"64", align 4
+ %"63" = getelementptr inbounds i8, ptr %"53", i64 8
+ store float %"27", ptr %"63", align 4
%"29" = load i64, ptr addrspace(5) %"4", align 8
%"54" = inttoptr i64 %"29" to ptr
- %"66" = getelementptr inbounds i8, ptr %"54", i64 12
- %"28" = load float, ptr %"66", align 4
+ %"65" = getelementptr inbounds i8, ptr %"54", i64 12
+ %"28" = load float, ptr %"65", align 4
store float %"28", ptr addrspace(5) %"6", align 4
%"31" = load float, ptr addrspace(5) %"6", align 4
%"30" = call afn float @llvm.exp2.f32(float %"31")
@@ -60,8 +62,8 @@ define protected amdgpu_kernel void @ex2(ptr addrspace(4) byref(i64) %"46", ptr
%"32" = load i64, ptr addrspace(5) %"5", align 8
%"33" = load float, ptr addrspace(5) %"6", align 4
%"55" = inttoptr i64 %"32" to ptr
- %"68" = getelementptr inbounds i8, ptr %"55", i64 12
- store float %"33", ptr %"68", align 4
+ %"67" = getelementptr inbounds i8, ptr %"55", i64 12
+ store float %"33", ptr %"67", align 4
ret void
}
diff --git a/ptx/src/test/spirv_run/extern_shared.ll b/ptx/src/test/spirv_run/extern_shared.ll
index eeb0d50..e7d0a21 100644
--- a/ptx/src/test/spirv_run/extern_shared.ll
+++ b/ptx/src/test/spirv_run/extern_shared.ll
@@ -4,12 +4,14 @@ target triple = "amdgcn-amd-amdhsa"
@shared_mem = external hidden addrspace(3) global [0 x i32]
define protected amdgpu_kernel void @extern_shared(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 {
-"23":
%"8" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"8", align 1
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i64, align 8, addrspace(5)
%"7" = alloca i64, align 8, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"8", align 1
%"9" = load i64, ptr addrspace(4) %"17", align 8
store i64 %"9", ptr addrspace(5) %"5", align 8
%"10" = load i64, ptr addrspace(4) %"18", align 8
diff --git a/ptx/src/test/spirv_run/extern_shared_call.ll b/ptx/src/test/spirv_run/extern_shared_call.ll
index cdd37be..a2b6c10 100644
--- a/ptx/src/test/spirv_run/extern_shared_call.ll
+++ b/ptx/src/test/spirv_run/extern_shared_call.ll
@@ -3,28 +3,32 @@ target triple = "amdgcn-amd-amdhsa"
@shared_mem = external hidden addrspace(3) global [0 x i32], align 4
-define private void @"2"(ptr addrspace(3) %"35") #0 {
-"33":
+define private void @"2"(ptr addrspace(3) %"33") #0 {
%"10" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"10", align 1
%"3" = alloca i64, align 8, addrspace(5)
- %"12" = load i64, ptr addrspace(3) %"35", align 8
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"10", align 1
+ %"12" = load i64, ptr addrspace(3) %"33", align 8
store i64 %"12", ptr addrspace(5) %"3", align 8
%"14" = load i64, ptr addrspace(5) %"3", align 8
%"13" = add i64 %"14", 2
store i64 %"13", ptr addrspace(5) %"3", align 8
%"15" = load i64, ptr addrspace(5) %"3", align 8
- store i64 %"15", ptr addrspace(3) %"35", align 8
+ store i64 %"15", ptr addrspace(3) %"33", align 8
ret void
}
define protected amdgpu_kernel void @extern_shared_call(ptr addrspace(4) byref(i64) %"25", ptr addrspace(4) byref(i64) %"26") #0 {
-"34":
%"11" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"11", align 1
%"7" = alloca i64, align 8, addrspace(5)
%"8" = alloca i64, align 8, addrspace(5)
%"9" = alloca i64, align 8, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"11", align 1
%"16" = load i64, ptr addrspace(4) %"25", align 8
store i64 %"16", ptr addrspace(5) %"7", align 8
%"17" = load i64, ptr addrspace(4) %"26", align 8
diff --git a/ptx/src/test/spirv_run/fma.ll b/ptx/src/test/spirv_run/fma.ll
index 1dff2b8..61ef775 100644
--- a/ptx/src/test/spirv_run/fma.ll
+++ b/ptx/src/test/spirv_run/fma.ll
@@ -2,14 +2,16 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @fma(ptr addrspace(4) byref(i64) %"28", ptr addrspace(4) byref(i64) %"29") #0 {
-"34":
%"9" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"9", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca float, align 4, addrspace(5)
%"7" = alloca float, align 4, addrspace(5)
%"8" = alloca float, align 4, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"9", align 1
%"10" = load i64, ptr addrspace(4) %"28", align 8
store i64 %"10", ptr addrspace(5) %"4", align 8
%"11" = load i64, ptr addrspace(4) %"29", align 8
@@ -20,13 +22,13 @@ define protected amdgpu_kernel void @fma(ptr addrspace(4) byref(i64) %"28", ptr
store float %"12", ptr addrspace(5) %"6", align 4
%"15" = load i64, ptr addrspace(5) %"4", align 8
%"31" = inttoptr i64 %"15" to ptr
- %"36" = getelementptr inbounds i8, ptr %"31", i64 4
- %"14" = load float, ptr %"36", align 4
+ %"35" = getelementptr inbounds i8, ptr %"31", i64 4
+ %"14" = load float, ptr %"35", align 4
store float %"14", ptr addrspace(5) %"7", align 4
%"17" = load i64, ptr addrspace(5) %"4", align 8
%"32" = inttoptr i64 %"17" to ptr
- %"38" = getelementptr inbounds i8, ptr %"32", i64 8
- %"16" = load float, ptr %"38", align 4
+ %"37" = getelementptr inbounds i8, ptr %"32", i64 8
+ %"16" = load float, ptr %"37", align 4
store float %"16", ptr addrspace(5) %"8", align 4
%"19" = load float, ptr addrspace(5) %"6", align 4
%"20" = load float, ptr addrspace(5) %"7", align 4
diff --git a/ptx/src/test/spirv_run/func_ptr.ll b/ptx/src/test/spirv_run/func_ptr.ll
index 1160a76..ad4392b 100644
--- a/ptx/src/test/spirv_run/func_ptr.ll
+++ b/ptx/src/test/spirv_run/func_ptr.ll
@@ -2,14 +2,16 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define private float @"1"(float %"15", float %"16") #0 {
-"38":
%"3" = alloca float, align 4, addrspace(5)
%"4" = alloca float, align 4, addrspace(5)
%"2" = alloca float, align 4, addrspace(5)
%"13" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"13", align 1
+ br label %1
+
+1: ; preds = %0
store float %"15", ptr addrspace(5) %"3", align 4
store float %"16", ptr addrspace(5) %"4", align 4
+ store i1 false, ptr addrspace(5) %"13", align 1
%"18" = load float, ptr addrspace(5) %"3", align 4
%"19" = load float, ptr addrspace(5) %"4", align 4
%"17" = fadd float %"18", %"19"
@@ -19,14 +21,16 @@ define private float @"1"(float %"15", float %"16") #0 {
}
define protected amdgpu_kernel void @func_ptr(ptr addrspace(4) byref(i64) %"34", ptr addrspace(4) byref(i64) %"35") #0 {
-"39":
%"14" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"14", align 1
%"8" = alloca i64, align 8, addrspace(5)
%"9" = alloca i64, align 8, addrspace(5)
%"10" = alloca i64, align 8, addrspace(5)
%"11" = alloca i64, align 8, addrspace(5)
%"12" = alloca i64, align 8, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"14", align 1
%"21" = load i64, ptr addrspace(4) %"34", align 8
store i64 %"21", ptr addrspace(5) %"8", align 8
%"22" = load i64, ptr addrspace(4) %"35", align 8
diff --git a/ptx/src/test/spirv_run/generic.ll b/ptx/src/test/spirv_run/generic.ll
index 312a7cd..44b4ef9 100644
--- a/ptx/src/test/spirv_run/generic.ll
+++ b/ptx/src/test/spirv_run/generic.ll
@@ -5,18 +5,20 @@ target triple = "amdgcn-amd-amdhsa"
@bar = protected addrspace(1) externally_initialized global [4 x i64] [i64 ptrtoint (ptr addrspacecast (ptr addrspace(1) @foo to ptr) to i64), i64 add (i64 ptrtoint (ptr addrspacecast (ptr addrspace(1) @foo to ptr) to i64), i64 4), i64 add (i64 ptrtoint (ptr addrspacecast (ptr addrspace(1) @foo to ptr) to i64), i64 8), i64 add (i64 ptrtoint (ptr addrspacecast (ptr addrspace(1) @foo to ptr) to i64), i64 12)]
define protected amdgpu_kernel void @generic(ptr addrspace(4) byref(i64) %"46", ptr addrspace(4) byref(i64) %"47") #0 {
-"57":
%"10" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"10", align 1
%"6" = alloca i64, align 8, addrspace(5)
%"7" = alloca i64, align 8, addrspace(5)
%"8" = alloca i32, align 4, addrspace(5)
%"9" = alloca i32, align 4, addrspace(5)
+ %1 = alloca i32, align 4, addrspace(5)
+ br label %2
+
+2: ; preds = %0
+ store i1 false, ptr addrspace(5) %"10", align 1
%"11" = load i64, ptr addrspace(4) %"47", align 8
store i64 %"11", ptr addrspace(5) %"7", align 8
- %0 = alloca i32, align 4, addrspace(5)
- store i32 1, ptr addrspace(5) %0, align 4
- %"12" = load i32, ptr addrspace(5) %0, align 4
+ store i32 1, ptr addrspace(5) %1, align 4
+ %"12" = load i32, ptr addrspace(5) %1, align 4
store i32 %"12", ptr addrspace(5) %"8", align 4
%"13" = load i64, ptr addrspace(1) @bar, align 8
store i64 %"13", ptr addrspace(5) %"6", align 8
diff --git a/ptx/src/test/spirv_run/global_array.ll b/ptx/src/test/spirv_run/global_array.ll
index e2ad2f2..59a66ea 100644
--- a/ptx/src/test/spirv_run/global_array.ll
+++ b/ptx/src/test/spirv_run/global_array.ll
@@ -5,15 +5,17 @@ target triple = "amdgcn-amd-amdhsa"
@foobar = protected addrspace(1) externally_initialized global [4 x [2 x i64]] [[2 x i64] [i64 -1, i64 2], [2 x i64] [i64 3, i64 0], [2 x i64] [i64 ptrtoint (ptr addrspace(1) @asdas to i64), i64 0], [2 x i64] zeroinitializer]
define protected amdgpu_kernel void @global_array(ptr addrspace(4) byref(i64) %"16", ptr addrspace(4) byref(i64) %"17") #0 {
-"21":
%"9" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"9", align 1
%"6" = alloca i64, align 8, addrspace(5)
%"7" = alloca i64, align 8, addrspace(5)
%"8" = alloca i32, align 4, addrspace(5)
- %0 = alloca i64, align 8, addrspace(5)
- store i64 ptrtoint (ptr addrspace(1) @foobar to i64), ptr addrspace(5) %0, align 8
- %"10" = load i64, ptr addrspace(5) %0, align 8
+ %1 = alloca i64, align 8, addrspace(5)
+ br label %2
+
+2: ; preds = %0
+ store i1 false, ptr addrspace(5) %"9", align 1
+ store i64 ptrtoint (ptr addrspace(1) @foobar to i64), ptr addrspace(5) %1, align 8
+ %"10" = load i64, ptr addrspace(5) %1, align 8
store i64 %"10", ptr addrspace(5) %"6", align 8
%"11" = load i64, ptr addrspace(4) %"17", align 8
store i64 %"11", ptr addrspace(5) %"7", align 8
diff --git a/ptx/src/test/spirv_run/lanemask_lt.ll b/ptx/src/test/spirv_run/lanemask_lt.ll
index efa1746..cc81383 100644
--- a/ptx/src/test/spirv_run/lanemask_lt.ll
+++ b/ptx/src/test/spirv_run/lanemask_lt.ll
@@ -4,14 +4,17 @@ target triple = "amdgcn-amd-amdhsa"
declare i32 @__zluda_ptx_impl__sreg_lanemask_lt() #0
define protected amdgpu_kernel void @lanemask_lt(ptr addrspace(4) byref(i64) %"27", ptr addrspace(4) byref(i64) %"28") #1 {
-"39":
%"10" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"10", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i32, align 4, addrspace(5)
%"7" = alloca i32, align 4, addrspace(5)
%"8" = alloca i32, align 4, addrspace(5)
+ %1 = alloca i32, align 4, addrspace(5)
+ br label %2
+
+2: ; preds = %0
+ store i1 false, ptr addrspace(5) %"10", align 1
%"14" = load i64, ptr addrspace(4) %"27", align 8
store i64 %"14", ptr addrspace(5) %"4", align 8
%"15" = load i64, ptr addrspace(4) %"28", align 8
@@ -24,9 +27,8 @@ define protected amdgpu_kernel void @lanemask_lt(ptr addrspace(4) byref(i64) %"2
%"31" = add i32 %"19", 1
store i32 %"31", ptr addrspace(5) %"7", align 4
%"11" = call i32 @__zluda_ptx_impl__sreg_lanemask_lt()
- %0 = alloca i32, align 4, addrspace(5)
- store i32 %"11", ptr addrspace(5) %0, align 4
- %"33" = load i32, ptr addrspace(5) %0, align 4
+ store i32 %"11", ptr addrspace(5) %1, align 4
+ %"33" = load i32, ptr addrspace(5) %1, align 4
store i32 %"33", ptr addrspace(5) %"8", align 4
%"22" = load i32, ptr addrspace(5) %"7", align 4
%"23" = load i32, ptr addrspace(5) %"8", align 4
diff --git a/ptx/src/test/spirv_run/ld_st.ll b/ptx/src/test/spirv_run/ld_st.ll
index 0fe06f2..4b23120 100644
--- a/ptx/src/test/spirv_run/ld_st.ll
+++ b/ptx/src/test/spirv_run/ld_st.ll
@@ -2,12 +2,14 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @ld_st(ptr addrspace(4) byref(i64) %"14", ptr addrspace(4) byref(i64) %"15") #0 {
-"18":
%"7" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"7", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i64, align 8, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"7", align 1
%"8" = load i64, ptr addrspace(4) %"14", align 8
store i64 %"8", ptr addrspace(5) %"4", align 8
%"9" = load i64, ptr addrspace(4) %"15", align 8
diff --git a/ptx/src/test/spirv_run/ld_st_implicit.ll b/ptx/src/test/spirv_run/ld_st_implicit.ll
index 3ec1474..71baa92 100644
--- a/ptx/src/test/spirv_run/ld_st_implicit.ll
+++ b/ptx/src/test/spirv_run/ld_st_implicit.ll
@@ -2,31 +2,33 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @ld_st_implicit(ptr addrspace(4) byref(i64) %"16", ptr addrspace(4) byref(i64) %"17") #0 {
-"22":
%"7" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"7", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i64, align 8, addrspace(5)
+ %1 = alloca i64, align 8, addrspace(5)
+ br label %2
+
+2: ; preds = %0
+ store i1 false, ptr addrspace(5) %"7", align 1
%"8" = load i64, ptr addrspace(4) %"16", align 8
store i64 %"8", ptr addrspace(5) %"4", align 8
%"9" = load i64, ptr addrspace(4) %"17", align 8
store i64 %"9", ptr addrspace(5) %"5", align 8
- %0 = alloca i64, align 8, addrspace(5)
- store i64 81985529216486895, ptr addrspace(5) %0, align 8
- %"10" = load i64, ptr addrspace(5) %0, align 8
+ store i64 81985529216486895, ptr addrspace(5) %1, align 8
+ %"10" = load i64, ptr addrspace(5) %1, align 8
store i64 %"10", ptr addrspace(5) %"6", align 8
%"12" = load i64, ptr addrspace(5) %"4", align 8
%"19" = inttoptr i64 %"12" to ptr addrspace(1)
%"18" = load float, ptr addrspace(1) %"19", align 4
- %"23" = bitcast float %"18" to i32
- %"11" = zext i32 %"23" to i64
+ %"22" = bitcast float %"18" to i32
+ %"11" = zext i32 %"22" to i64
store i64 %"11", ptr addrspace(5) %"6", align 8
%"13" = load i64, ptr addrspace(5) %"5", align 8
%"14" = load i64, ptr addrspace(5) %"6", align 8
%"20" = inttoptr i64 %"13" to ptr addrspace(1)
- %"25" = trunc i64 %"14" to i32
- %"21" = bitcast i32 %"25" to float
+ %"24" = trunc i64 %"14" to i32
+ %"21" = bitcast i32 %"24" to float
store float %"21", ptr addrspace(1) %"20", align 4
ret void
}
diff --git a/ptx/src/test/spirv_run/ld_st_offset.ll b/ptx/src/test/spirv_run/ld_st_offset.ll
index ee8bde6..959aa53 100644
--- a/ptx/src/test/spirv_run/ld_st_offset.ll
+++ b/ptx/src/test/spirv_run/ld_st_offset.ll
@@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @ld_st_offset(ptr addrspace(4) byref(i64) %"23", ptr addrspace(4) byref(i64) %"24") #0 {
-"29":
%"8" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"8", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i32, align 4, addrspace(5)
%"7" = alloca i32, align 4, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"8", align 1
%"9" = load i64, ptr addrspace(4) %"23", align 8
store i64 %"9", ptr addrspace(5) %"4", align 8
%"10" = load i64, ptr addrspace(4) %"24", align 8
@@ -19,8 +21,8 @@ define protected amdgpu_kernel void @ld_st_offset(ptr addrspace(4) byref(i64) %"
store i32 %"11", ptr addrspace(5) %"6", align 4
%"14" = load i64, ptr addrspace(5) %"4", align 8
%"26" = inttoptr i64 %"14" to ptr
- %"31" = getelementptr inbounds i8, ptr %"26", i64 4
- %"13" = load i32, ptr %"31", align 4
+ %"30" = getelementptr inbounds i8, ptr %"26", i64 4
+ %"13" = load i32, ptr %"30", align 4
store i32 %"13", ptr addrspace(5) %"7", align 4
%"15" = load i64, ptr addrspace(5) %"5", align 8
%"16" = load i32, ptr addrspace(5) %"7", align 4
@@ -29,8 +31,8 @@ define protected amdgpu_kernel void @ld_st_offset(ptr addrspace(4) byref(i64) %"
%"17" = load i64, ptr addrspace(5) %"5", align 8
%"18" = load i32, ptr addrspace(5) %"6", align 4
%"28" = inttoptr i64 %"17" to ptr
- %"33" = getelementptr inbounds i8, ptr %"28", i64 4
- store i32 %"18", ptr %"33", align 4
+ %"32" = getelementptr inbounds i8, ptr %"28", i64 4
+ store i32 %"18", ptr %"32", align 4
ret void
}
diff --git a/ptx/src/test/spirv_run/lg2.ll b/ptx/src/test/spirv_run/lg2.ll
index 7dd63d6..9e4500e 100644
--- a/ptx/src/test/spirv_run/lg2.ll
+++ b/ptx/src/test/spirv_run/lg2.ll
@@ -2,12 +2,14 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @lg2(ptr addrspace(4) byref(i64) %"16", ptr addrspace(4) byref(i64) %"17") #0 {
-"20":
%"7" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"7", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca float, align 4, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"7", align 1
%"8" = load i64, ptr addrspace(4) %"16", align 8
store i64 %"8", ptr addrspace(5) %"4", align 8
%"9" = load i64, ptr addrspace(4) %"17", align 8
diff --git a/ptx/src/test/spirv_run/local_align.ll b/ptx/src/test/spirv_run/local_align.ll
index 13fbe4b..284a081 100644
--- a/ptx/src/test/spirv_run/local_align.ll
+++ b/ptx/src/test/spirv_run/local_align.ll
@@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @local_align(ptr addrspace(4) byref(i64) %"15", ptr addrspace(4) byref(i64) %"16") #0 {
-"19":
%"8" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"8", align 1
%"4" = alloca [8 x i8], align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i64, align 8, addrspace(5)
%"7" = alloca i64, align 8, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"8", align 1
%"9" = load i64, ptr addrspace(4) %"15", align 8
store i64 %"9", ptr addrspace(5) %"5", align 8
%"10" = load i64, ptr addrspace(4) %"16", align 8
diff --git a/ptx/src/test/spirv_run/mad_hi_cc.ll b/ptx/src/test/spirv_run/mad_hi_cc.ll
index 6c86dbc..f9a27b4 100644
--- a/ptx/src/test/spirv_run/mad_hi_cc.ll
+++ b/ptx/src/test/spirv_run/mad_hi_cc.ll
@@ -2,9 +2,7 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @mad_hi_cc(ptr addrspace(4) byref(i64) %"60", ptr addrspace(4) byref(i64) %"61") #0 {
-"77":
%"14" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"14", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i32, align 4, addrspace(5)
@@ -15,6 +13,10 @@ define protected amdgpu_kernel void @mad_hi_cc(ptr addrspace(4) byref(i64) %"60"
%"11" = alloca i32, align 4, addrspace(5)
%"12" = alloca i32, align 4, addrspace(5)
%"13" = alloca i32, align 4, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"14", align 1
%"15" = load i64, ptr addrspace(4) %"60", align 8
store i64 %"15", ptr addrspace(5) %"4", align 8
%"16" = load i64, ptr addrspace(4) %"61", align 8
@@ -25,44 +27,44 @@ define protected amdgpu_kernel void @mad_hi_cc(ptr addrspace(4) byref(i64) %"60"
store i32 %"62", ptr addrspace(5) %"8", align 4
%"20" = load i64, ptr addrspace(5) %"4", align 8
%"64" = inttoptr i64 %"20" to ptr
- %"79" = getelementptr inbounds i8, ptr %"64", i64 4
- %"65" = load i32, ptr %"79", align 4
+ %"78" = getelementptr inbounds i8, ptr %"64", i64 4
+ %"65" = load i32, ptr %"78", align 4
store i32 %"65", ptr addrspace(5) %"9", align 4
%"22" = load i64, ptr addrspace(5) %"4", align 8
%"66" = inttoptr i64 %"22" to ptr
- %"81" = getelementptr inbounds i8, ptr %"66", i64 8
- %"21" = load i32, ptr %"81", align 4
+ %"80" = getelementptr inbounds i8, ptr %"66", i64 8
+ %"21" = load i32, ptr %"80", align 4
store i32 %"21", ptr addrspace(5) %"10", align 4
%"25" = load i32, ptr addrspace(5) %"8", align 4
%"26" = load i32, ptr addrspace(5) %"9", align 4
%"27" = load i32, ptr addrspace(5) %"10", align 4
- %0 = sext i32 %"25" to i64
- %1 = sext i32 %"26" to i64
- %2 = mul nsw i64 %0, %1
- %3 = lshr i64 %2, 32
- %4 = trunc i64 %3 to i32
- %5 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %4, i32 %"27")
- %"23" = extractvalue { i32, i1 } %5, 0
- %"24" = extractvalue { i32, i1 } %5, 1
+ %2 = sext i32 %"25" to i64
+ %3 = sext i32 %"26" to i64
+ %4 = mul nsw i64 %2, %3
+ %5 = lshr i64 %4, 32
+ %6 = trunc i64 %5 to i32
+ %7 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %6, i32 %"27")
+ %"23" = extractvalue { i32, i1 } %7, 0
+ %"24" = extractvalue { i32, i1 } %7, 1
store i32 %"23", ptr addrspace(5) %"7", align 4
store i1 %"24", ptr addrspace(5) %"14", align 1
- %6 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 1, i32 -2)
- %"28" = extractvalue { i32, i1 } %6, 0
- %"29" = extractvalue { i32, i1 } %6, 1
+ %8 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 1, i32 -2)
+ %"28" = extractvalue { i32, i1 } %8, 0
+ %"29" = extractvalue { i32, i1 } %8, 1
store i32 %"28", ptr addrspace(5) %"6", align 4
store i1 %"29", ptr addrspace(5) %"14", align 1
%"31" = load i1, ptr addrspace(5) %"14", align 1
- %7 = zext i1 %"31" to i32
- %"70" = add i32 0, %7
+ %9 = zext i1 %"31" to i32
+ %"70" = add i32 0, %9
store i32 %"70", ptr addrspace(5) %"12", align 4
- %8 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 1, i32 -1)
- %"32" = extractvalue { i32, i1 } %8, 0
- %"33" = extractvalue { i32, i1 } %8, 1
+ %10 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 1, i32 -1)
+ %"32" = extractvalue { i32, i1 } %10, 0
+ %"33" = extractvalue { i32, i1 } %10, 1
store i32 %"32", ptr addrspace(5) %"6", align 4
store i1 %"33", ptr addrspace(5) %"14", align 1
%"35" = load i1, ptr addrspace(5) %"14", align 1
- %9 = zext i1 %"35" to i32
- %"71" = add i32 0, %9
+ %11 = zext i1 %"35" to i32
+ %"71" = add i32 0, %11
store i32 %"71", ptr addrspace(5) %"13", align 4
%"36" = load i64, ptr addrspace(5) %"5", align 8
%"37" = load i32, ptr addrspace(5) %"7", align 4
@@ -71,13 +73,13 @@ define protected amdgpu_kernel void @mad_hi_cc(ptr addrspace(4) byref(i64) %"60"
%"38" = load i64, ptr addrspace(5) %"5", align 8
%"39" = load i32, ptr addrspace(5) %"12", align 4
%"73" = inttoptr i64 %"38" to ptr
- %"83" = getelementptr inbounds i8, ptr %"73", i64 4
- store i32 %"39", ptr %"83", align 4
+ %"82" = getelementptr inbounds i8, ptr %"73", i64 4
+ store i32 %"39", ptr %"82", align 4
%"40" = load i64, ptr addrspace(5) %"5", align 8
%"41" = load i32, ptr addrspace(5) %"13", align 4
%"75" = inttoptr i64 %"40" to ptr
- %"85" = getelementptr inbounds i8, ptr %"75", i64 8
- store i32 %"41", ptr %"85", align 4
+ %"84" = getelementptr inbounds i8, ptr %"75", i64 8
+ store i32 %"41", ptr %"84", align 4
ret void
}
diff --git a/ptx/src/test/spirv_run/mad_s32.ll b/ptx/src/test/spirv_run/mad_s32.ll
index 5ab86ad..f1c15cf 100644
--- a/ptx/src/test/spirv_run/mad_s32.ll
+++ b/ptx/src/test/spirv_run/mad_s32.ll
@@ -2,9 +2,7 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @mad_s32(ptr addrspace(4) byref(i64) %"52", ptr addrspace(4) byref(i64) %"53") #0 {
-"75":
%"13" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"13", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i32, align 4, addrspace(5)
@@ -14,6 +12,10 @@ define protected amdgpu_kernel void @mad_s32(ptr addrspace(4) byref(i64) %"52",
%"10" = alloca i32, align 4, addrspace(5)
%"11" = alloca i32, align 4, addrspace(5)
%"12" = alloca i64, align 8, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"13", align 1
%"14" = load i64, ptr addrspace(4) %"52", align 8
store i64 %"14", ptr addrspace(5) %"4", align 8
%"15" = load i64, ptr addrspace(4) %"53", align 8
@@ -24,42 +26,42 @@ define protected amdgpu_kernel void @mad_s32(ptr addrspace(4) byref(i64) %"52",
store i32 %"54", ptr addrspace(5) %"9", align 4
%"19" = load i64, ptr addrspace(5) %"4", align 8
%"56" = inttoptr i64 %"19" to ptr
- %"77" = getelementptr inbounds i8, ptr %"56", i64 4
- %"57" = load i32, ptr %"77", align 4
+ %"76" = getelementptr inbounds i8, ptr %"56", i64 4
+ %"57" = load i32, ptr %"76", align 4
store i32 %"57", ptr addrspace(5) %"10", align 4
%"21" = load i64, ptr addrspace(5) %"4", align 8
%"58" = inttoptr i64 %"21" to ptr
- %"79" = getelementptr inbounds i8, ptr %"58", i64 8
- %"20" = load i64, ptr %"79", align 8
+ %"78" = getelementptr inbounds i8, ptr %"58", i64 8
+ %"20" = load i64, ptr %"78", align 8
store i64 %"20", ptr addrspace(5) %"12", align 8
%"23" = load i64, ptr addrspace(5) %"4", align 8
%"59" = inttoptr i64 %"23" to ptr
- %"81" = getelementptr inbounds i8, ptr %"59", i64 16
- %"60" = load i32, ptr %"81", align 4
+ %"80" = getelementptr inbounds i8, ptr %"59", i64 16
+ %"60" = load i32, ptr %"80", align 4
store i32 %"60", ptr addrspace(5) %"11", align 4
%"25" = load i32, ptr addrspace(5) %"9", align 4
%"26" = load i32, ptr addrspace(5) %"10", align 4
%"27" = load i32, ptr addrspace(5) %"11", align 4
- %0 = mul i32 %"25", %"26"
- %"24" = add i32 %0, %"27"
+ %2 = mul i32 %"25", %"26"
+ %"24" = add i32 %2, %"27"
store i32 %"24", ptr addrspace(5) %"6", align 4
%"29" = load i32, ptr addrspace(5) %"9", align 4
%"30" = load i32, ptr addrspace(5) %"10", align 4
%"31" = load i32, ptr addrspace(5) %"11", align 4
- %1 = sext i32 %"29" to i64
- %2 = sext i32 %"30" to i64
- %3 = mul nsw i64 %1, %2
- %4 = lshr i64 %3, 32
- %5 = trunc i64 %4 to i32
- %"28" = add i32 %5, %"31"
+ %3 = sext i32 %"29" to i64
+ %4 = sext i32 %"30" to i64
+ %5 = mul nsw i64 %3, %4
+ %6 = lshr i64 %5, 32
+ %7 = trunc i64 %6 to i32
+ %"28" = add i32 %7, %"31"
store i32 %"28", ptr addrspace(5) %"7", align 4
%"33" = load i32, ptr addrspace(5) %"9", align 4
%"34" = load i32, ptr addrspace(5) %"10", align 4
%"35" = load i64, ptr addrspace(5) %"12", align 8
- %6 = sext i32 %"33" to i64
- %7 = sext i32 %"34" to i64
- %8 = mul nsw i64 %6, %7
- %"67" = add i64 %8, %"35"
+ %8 = sext i32 %"33" to i64
+ %9 = sext i32 %"34" to i64
+ %10 = mul nsw i64 %8, %9
+ %"67" = add i64 %10, %"35"
store i64 %"67", ptr addrspace(5) %"8", align 8
%"36" = load i64, ptr addrspace(5) %"5", align 8
%"37" = load i32, ptr addrspace(5) %"6", align 4
@@ -68,13 +70,13 @@ define protected amdgpu_kernel void @mad_s32(ptr addrspace(4) byref(i64) %"52",
%"38" = load i64, ptr addrspace(5) %"5", align 8
%"39" = load i32, ptr addrspace(5) %"7", align 4
%"72" = inttoptr i64 %"38" to ptr
- %"83" = getelementptr inbounds i8, ptr %"72", i64 8
- store i32 %"39", ptr %"83", align 4
+ %"82" = getelementptr inbounds i8, ptr %"72", i64 8
+ store i32 %"39", ptr %"82", align 4
%"40" = load i64, ptr addrspace(5) %"5", align 8
%"41" = load i64, ptr addrspace(5) %"8", align 8
%"73" = inttoptr i64 %"40" to ptr
- %"85" = getelementptr inbounds i8, ptr %"73", i64 16
- store i64 %"41", ptr %"85", align 8
+ %"84" = getelementptr inbounds i8, ptr %"73", i64 16
+ store i64 %"41", ptr %"84", align 8
ret void
}
diff --git a/ptx/src/test/spirv_run/madc_cc.ll b/ptx/src/test/spirv_run/madc_cc.ll
index 136f320..0c9df2b 100644
--- a/ptx/src/test/spirv_run/madc_cc.ll
+++ b/ptx/src/test/spirv_run/madc_cc.ll
@@ -2,9 +2,7 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @madc_cc(ptr addrspace(4) byref(i64) %"40", ptr addrspace(4) byref(i64) %"41") #0 {
-"54":
%"11" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"11", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i32, align 4, addrspace(5)
@@ -12,6 +10,10 @@ define protected amdgpu_kernel void @madc_cc(ptr addrspace(4) byref(i64) %"40",
%"8" = alloca i32, align 4, addrspace(5)
%"9" = alloca i32, align 4, addrspace(5)
%"10" = alloca i32, align 4, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"11", align 1
%"12" = load i64, ptr addrspace(4) %"40", align 8
store i64 %"12", ptr addrspace(5) %"4", align 8
%"13" = load i64, ptr addrspace(4) %"41", align 8
@@ -22,34 +24,34 @@ define protected amdgpu_kernel void @madc_cc(ptr addrspace(4) byref(i64) %"40",
store i32 %"42", ptr addrspace(5) %"8", align 4
%"17" = load i64, ptr addrspace(5) %"4", align 8
%"44" = inttoptr i64 %"17" to ptr
- %"56" = getelementptr inbounds i8, ptr %"44", i64 4
- %"45" = load i32, ptr %"56", align 4
+ %"55" = getelementptr inbounds i8, ptr %"44", i64 4
+ %"45" = load i32, ptr %"55", align 4
store i32 %"45", ptr addrspace(5) %"9", align 4
%"19" = load i64, ptr addrspace(5) %"4", align 8
%"46" = inttoptr i64 %"19" to ptr
- %"58" = getelementptr inbounds i8, ptr %"46", i64 8
- %"18" = load i32, ptr %"58", align 4
+ %"57" = getelementptr inbounds i8, ptr %"46", i64 8
+ %"18" = load i32, ptr %"57", align 4
store i32 %"18", ptr addrspace(5) %"10", align 4
%"22" = load i32, ptr addrspace(5) %"8", align 4
%"23" = load i32, ptr addrspace(5) %"9", align 4
%"24" = load i32, ptr addrspace(5) %"10", align 4
- %0 = mul i32 %"22", %"23"
- %1 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %0, i32 %"24")
- %"20" = extractvalue { i32, i1 } %1, 0
- %"21" = extractvalue { i32, i1 } %1, 1
+ %2 = mul i32 %"22", %"23"
+ %3 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %2, i32 %"24")
+ %"20" = extractvalue { i32, i1 } %3, 0
+ %"21" = extractvalue { i32, i1 } %3, 1
store i32 %"20", ptr addrspace(5) %"6", align 4
store i1 %"21", ptr addrspace(5) %"11", align 1
%"26" = load i1, ptr addrspace(5) %"11", align 1
%"27" = load i32, ptr addrspace(5) %"8", align 4
%"28" = load i32, ptr addrspace(5) %"9", align 4
- %2 = sext i32 %"27" to i64
- %3 = sext i32 %"28" to i64
- %4 = mul nsw i64 %2, %3
- %5 = lshr i64 %4, 32
- %6 = trunc i64 %5 to i32
- %7 = zext i1 %"26" to i32
- %8 = add i32 %6, 3
- %"25" = add i32 %8, %7
+ %4 = sext i32 %"27" to i64
+ %5 = sext i32 %"28" to i64
+ %6 = mul nsw i64 %4, %5
+ %7 = lshr i64 %6, 32
+ %8 = trunc i64 %7 to i32
+ %9 = zext i1 %"26" to i32
+ %10 = add i32 %8, 3
+ %"25" = add i32 %10, %9
store i32 %"25", ptr addrspace(5) %"7", align 4
%"29" = load i64, ptr addrspace(5) %"5", align 8
%"30" = load i32, ptr addrspace(5) %"6", align 4
@@ -58,8 +60,8 @@ define protected amdgpu_kernel void @madc_cc(ptr addrspace(4) byref(i64) %"40",
%"31" = load i64, ptr addrspace(5) %"5", align 8
%"32" = load i32, ptr addrspace(5) %"7", align 4
%"53" = inttoptr i64 %"31" to ptr
- %"60" = getelementptr inbounds i8, ptr %"53", i64 4
- store i32 %"32", ptr %"60", align 4
+ %"59" = getelementptr inbounds i8, ptr %"53", i64 4
+ store i32 %"32", ptr %"59", align 4
ret void
}
diff --git a/ptx/src/test/spirv_run/max.ll b/ptx/src/test/spirv_run/max.ll
index 6dcc74d..ef0b39d 100644
--- a/ptx/src/test/spirv_run/max.ll
+++ b/ptx/src/test/spirv_run/max.ll
@@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @max(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #0 {
-"27":
%"8" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"8", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i32, align 4, addrspace(5)
%"7" = alloca i32, align 4, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"8", align 1
%"9" = load i64, ptr addrspace(4) %"22", align 8
store i64 %"9", ptr addrspace(5) %"4", align 8
%"10" = load i64, ptr addrspace(4) %"23", align 8
@@ -19,8 +21,8 @@ define protected amdgpu_kernel void @max(ptr addrspace(4) byref(i64) %"22", ptr
store i32 %"11", ptr addrspace(5) %"6", align 4
%"14" = load i64, ptr addrspace(5) %"4", align 8
%"25" = inttoptr i64 %"14" to ptr
- %"29" = getelementptr inbounds i8, ptr %"25", i64 4
- %"13" = load i32, ptr %"29", align 4
+ %"28" = getelementptr inbounds i8, ptr %"25", i64 4
+ %"13" = load i32, ptr %"28", align 4
store i32 %"13", ptr addrspace(5) %"7", align 4
%"16" = load i32, ptr addrspace(5) %"6", align 4
%"17" = load i32, ptr addrspace(5) %"7", align 4
diff --git a/ptx/src/test/spirv_run/membar.ll b/ptx/src/test/spirv_run/membar.ll
index 78f60c8..f24c0fb 100644
--- a/ptx/src/test/spirv_run/membar.ll
+++ b/ptx/src/test/spirv_run/membar.ll
@@ -2,12 +2,14 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @membar(ptr addrspace(4) byref(i64) %"14", ptr addrspace(4) byref(i64) %"15") #0 {
-"19":
%"7" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"7", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i32, align 4, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"7", align 1
%"8" = load i64, ptr addrspace(4) %"14", align 8
store i64 %"8", ptr addrspace(5) %"4", align 8
%"9" = load i64, ptr addrspace(4) %"15", align 8
diff --git a/ptx/src/test/spirv_run/min.ll b/ptx/src/test/spirv_run/min.ll
index 58cb36a..b40c4db 100644
--- a/ptx/src/test/spirv_run/min.ll
+++ b/ptx/src/test/spirv_run/min.ll
@@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @min(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #0 {
-"27":
%"8" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"8", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i32, align 4, addrspace(5)
%"7" = alloca i32, align 4, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"8", align 1
%"9" = load i64, ptr addrspace(4) %"22", align 8
store i64 %"9", ptr addrspace(5) %"4", align 8
%"10" = load i64, ptr addrspace(4) %"23", align 8
@@ -19,8 +21,8 @@ define protected amdgpu_kernel void @min(ptr addrspace(4) byref(i64) %"22", ptr
store i32 %"11", ptr addrspace(5) %"6", align 4
%"14" = load i64, ptr addrspace(5) %"4", align 8
%"25" = inttoptr i64 %"14" to ptr
- %"29" = getelementptr inbounds i8, ptr %"25", i64 4
- %"13" = load i32, ptr %"29", align 4
+ %"28" = getelementptr inbounds i8, ptr %"25", i64 4
+ %"13" = load i32, ptr %"28", align 4
store i32 %"13", ptr addrspace(5) %"7", align 4
%"16" = load i32, ptr addrspace(5) %"6", align 4
%"17" = load i32, ptr addrspace(5) %"7", align 4
diff --git a/ptx/src/test/spirv_run/mov.ll b/ptx/src/test/spirv_run/mov.ll
index e24446a..d43fe68 100644
--- a/ptx/src/test/spirv_run/mov.ll
+++ b/ptx/src/test/spirv_run/mov.ll
@@ -2,13 +2,16 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @mov(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 {
-"21":
%"8" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"8", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i64, align 8, addrspace(5)
%"7" = alloca i64, align 8, addrspace(5)
+ %1 = alloca i64, align 8, addrspace(5)
+ br label %2
+
+2: ; preds = %0
+ store i1 false, ptr addrspace(5) %"8", align 1
%"9" = load i64, ptr addrspace(4) %"17", align 8
store i64 %"9", ptr addrspace(5) %"4", align 8
%"10" = load i64, ptr addrspace(4) %"18", align 8
@@ -18,9 +21,8 @@ define protected amdgpu_kernel void @mov(ptr addrspace(4) byref(i64) %"17", ptr
%"11" = load i64, ptr %"19", align 8
store i64 %"11", ptr addrspace(5) %"6", align 8
%"14" = load i64, ptr addrspace(5) %"6", align 8
- %0 = alloca i64, align 8, addrspace(5)
- store i64 %"14", ptr addrspace(5) %0, align 8
- %"13" = load i64, ptr addrspace(5) %0, align 8
+ store i64 %"14", ptr addrspace(5) %1, align 8
+ %"13" = load i64, ptr addrspace(5) %1, align 8
store i64 %"13", ptr addrspace(5) %"7", align 8
%"15" = load i64, ptr addrspace(5) %"5", align 8
%"16" = load i64, ptr addrspace(5) %"7", align 8
diff --git a/ptx/src/test/spirv_run/mov_address.ll b/ptx/src/test/spirv_run/mov_address.ll
index 656410c..42d987f 100644
--- a/ptx/src/test/spirv_run/mov_address.ll
+++ b/ptx/src/test/spirv_run/mov_address.ll
@@ -2,15 +2,17 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @mov_address(ptr addrspace(4) byref(i64) %"8", ptr addrspace(4) byref(i64) %"9") #0 {
-"11":
%"6" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"6", align 1
%"4" = alloca [8 x i8], align 1, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
+ %1 = alloca i64, align 8, addrspace(5)
+ br label %2
+
+2: ; preds = %0
+ store i1 false, ptr addrspace(5) %"6", align 1
%"10" = ptrtoint ptr addrspace(5) %"4" to i64
- %0 = alloca i64, align 8, addrspace(5)
- store i64 %"10", ptr addrspace(5) %0, align 8
- %"7" = load i64, ptr addrspace(5) %0, align 8
+ store i64 %"10", ptr addrspace(5) %1, align 8
+ %"7" = load i64, ptr addrspace(5) %1, align 8
store i64 %"7", ptr addrspace(5) %"5", align 8
ret void
}
diff --git a/ptx/src/test/spirv_run/mov_vector_cast.ll b/ptx/src/test/spirv_run/mov_vector_cast.ll
index e65ad94..eb81724 100644
--- a/ptx/src/test/spirv_run/mov_vector_cast.ll
+++ b/ptx/src/test/spirv_run/mov_vector_cast.ll
@@ -2,9 +2,7 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @mov_vector_cast(ptr addrspace(4) byref(i64) %"34", ptr addrspace(4) byref(i64) %"35") #0 {
-"49":
%"15" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"15", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i64, align 8, addrspace(5)
@@ -14,6 +12,12 @@ define protected amdgpu_kernel void @mov_vector_cast(ptr addrspace(4) byref(i64)
%"10" = alloca half, align 2, addrspace(5)
%"11" = alloca half, align 2, addrspace(5)
%"12" = alloca half, align 2, addrspace(5)
+ %1 = alloca i64, align 8, addrspace(5)
+ %2 = alloca i64, align 8, addrspace(5)
+ br label %3
+
+3: ; preds = %0
+ store i1 false, ptr addrspace(5) %"15", align 1
%"16" = load i64, ptr addrspace(4) %"34", align 8
store i64 %"16", ptr addrspace(5) %"4", align 8
%"17" = load i64, ptr addrspace(4) %"35", align 8
@@ -23,9 +27,8 @@ define protected amdgpu_kernel void @mov_vector_cast(ptr addrspace(4) byref(i64)
%"18" = load i64, ptr %"36", align 8
store i64 %"18", ptr addrspace(5) %"6", align 8
%"20" = load i64, ptr addrspace(5) %"6", align 8
- %0 = alloca i64, align 8, addrspace(5)
- store i64 %"20", ptr addrspace(5) %0, align 8
- %"13" = load i64, ptr addrspace(5) %0, align 8
+ store i64 %"20", ptr addrspace(5) %1, align 8
+ %"13" = load i64, ptr addrspace(5) %1, align 8
%"38" = bitcast i64 %"13" to <2 x i32>
%"39" = extractelement <2 x i32> %"38", i32 0
%"40" = extractelement <2 x i32> %"38", i32 1
@@ -34,9 +37,8 @@ define protected amdgpu_kernel void @mov_vector_cast(ptr addrspace(4) byref(i64)
store float %"21", ptr addrspace(5) %"7", align 4
store float %"22", ptr addrspace(5) %"8", align 4
%"23" = load i64, ptr addrspace(5) %"6", align 8
- %1 = alloca i64, align 8, addrspace(5)
- store i64 %"23", ptr addrspace(5) %1, align 8
- %"14" = load i64, ptr addrspace(5) %1, align 8
+ store i64 %"23", ptr addrspace(5) %2, align 8
+ %"14" = load i64, ptr addrspace(5) %2, align 8
%"42" = bitcast i64 %"14" to <4 x i16>
%"43" = extractelement <4 x i16> %"42", i32 0
%"44" = extractelement <4 x i16> %"42", i32 1
@@ -57,8 +59,8 @@ define protected amdgpu_kernel void @mov_vector_cast(ptr addrspace(4) byref(i64)
%"30" = load i64, ptr addrspace(5) %"5", align 8
%"31" = load float, ptr addrspace(5) %"7", align 4
%"48" = inttoptr i64 %"30" to ptr
- %"51" = getelementptr inbounds i8, ptr %"48", i64 4
- store float %"31", ptr %"51", align 4
+ %"50" = getelementptr inbounds i8, ptr %"48", i64 4
+ store float %"31", ptr %"50", align 4
ret void
}
diff --git a/ptx/src/test/spirv_run/mul_ftz.ll b/ptx/src/test/spirv_run/mul_ftz.ll
index 3c32e73..38867fe 100644
--- a/ptx/src/test/spirv_run/mul_ftz.ll
+++ b/ptx/src/test/spirv_run/mul_ftz.ll
@@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @mul_ftz(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #0 {
-"27":
%"8" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"8", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca float, align 4, addrspace(5)
%"7" = alloca float, align 4, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"8", align 1
%"9" = load i64, ptr addrspace(4) %"22", align 8
store i64 %"9", ptr addrspace(5) %"4", align 8
%"10" = load i64, ptr addrspace(4) %"23", align 8
@@ -19,8 +21,8 @@ define protected amdgpu_kernel void @mul_ftz(ptr addrspace(4) byref(i64) %"22",
store float %"11", ptr addrspace(5) %"6", align 4
%"14" = load i64, ptr addrspace(5) %"4", align 8
%"25" = inttoptr i64 %"14" to ptr
- %"29" = getelementptr inbounds i8, ptr %"25", i64 4
- %"13" = load float, ptr %"29", align 4
+ %"28" = getelementptr inbounds i8, ptr %"25", i64 4
+ %"13" = load float, ptr %"28", align 4
store float %"13", ptr addrspace(5) %"7", align 4
%"16" = load float, ptr addrspace(5) %"6", align 4
%"17" = load float, ptr addrspace(5) %"7", align 4
diff --git a/ptx/src/test/spirv_run/mul_hi.ll b/ptx/src/test/spirv_run/mul_hi.ll
index 7d8ffa9..8043deb 100644
--- a/ptx/src/test/spirv_run/mul_hi.ll
+++ b/ptx/src/test/spirv_run/mul_hi.ll
@@ -4,13 +4,15 @@ target triple = "amdgcn-amd-amdhsa"
declare i64 @__zluda_ptx_impl__mul_hi_u64(i64, i64) #0
define protected amdgpu_kernel void @mul_hi(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #1 {
-"22":
%"8" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"8", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i64, align 8, addrspace(5)
%"7" = alloca i64, align 8, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"8", align 1
%"9" = load i64, ptr addrspace(4) %"18", align 8
store i64 %"9", ptr addrspace(5) %"4", align 8
%"10" = load i64, ptr addrspace(4) %"19", align 8
diff --git a/ptx/src/test/spirv_run/mul_lo.ll b/ptx/src/test/spirv_run/mul_lo.ll
index 57a767d..9370500 100644
--- a/ptx/src/test/spirv_run/mul_lo.ll
+++ b/ptx/src/test/spirv_run/mul_lo.ll
@@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @mul_lo(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #0 {
-"22":
%"8" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"8", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i64, align 8, addrspace(5)
%"7" = alloca i64, align 8, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"8", align 1
%"9" = load i64, ptr addrspace(4) %"18", align 8
store i64 %"9", ptr addrspace(5) %"4", align 8
%"10" = load i64, ptr addrspace(4) %"19", align 8
diff --git a/ptx/src/test/spirv_run/mul_non_ftz.ll b/ptx/src/test/spirv_run/mul_non_ftz.ll
index e6a3cc4..89f5e9f 100644
--- a/ptx/src/test/spirv_run/mul_non_ftz.ll
+++ b/ptx/src/test/spirv_run/mul_non_ftz.ll
@@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @mul_non_ftz(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #0 {
-"27":
%"8" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"8", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca float, align 4, addrspace(5)
%"7" = alloca float, align 4, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"8", align 1
%"9" = load i64, ptr addrspace(4) %"22", align 8
store i64 %"9", ptr addrspace(5) %"4", align 8
%"10" = load i64, ptr addrspace(4) %"23", align 8
@@ -19,8 +21,8 @@ define protected amdgpu_kernel void @mul_non_ftz(ptr addrspace(4) byref(i64) %"2
store float %"11", ptr addrspace(5) %"6", align 4
%"14" = load i64, ptr addrspace(5) %"4", align 8
%"25" = inttoptr i64 %"14" to ptr
- %"29" = getelementptr inbounds i8, ptr %"25", i64 4
- %"13" = load float, ptr %"29", align 4
+ %"28" = getelementptr inbounds i8, ptr %"25", i64 4
+ %"13" = load float, ptr %"28", align 4
store float %"13", ptr addrspace(5) %"7", align 4
%"16" = load float, ptr addrspace(5) %"6", align 4
%"17" = load float, ptr addrspace(5) %"7", align 4
diff --git a/ptx/src/test/spirv_run/mul_wide.ll b/ptx/src/test/spirv_run/mul_wide.ll
index e25a61d..a0d84f4 100644
--- a/ptx/src/test/spirv_run/mul_wide.ll
+++ b/ptx/src/test/spirv_run/mul_wide.ll
@@ -2,14 +2,16 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @mul_wide(ptr addrspace(4) byref(i64) %"23", ptr addrspace(4) byref(i64) %"24") #0 {
-"29":
%"9" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"9", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i32, align 4, addrspace(5)
%"7" = alloca i32, align 4, addrspace(5)
%"8" = alloca i64, align 8, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"9", align 1
%"10" = load i64, ptr addrspace(4) %"23", align 8
store i64 %"10", ptr addrspace(5) %"4", align 8
%"11" = load i64, ptr addrspace(4) %"24", align 8
@@ -20,14 +22,14 @@ define protected amdgpu_kernel void @mul_wide(ptr addrspace(4) byref(i64) %"23",
store i32 %"12", ptr addrspace(5) %"6", align 4
%"15" = load i64, ptr addrspace(5) %"4", align 8
%"26" = inttoptr i64 %"15" to ptr addrspace(1)
- %"31" = getelementptr inbounds i8, ptr addrspace(1) %"26", i64 4
- %"14" = load i32, ptr addrspace(1) %"31", align 4
+ %"30" = getelementptr inbounds i8, ptr addrspace(1) %"26", i64 4
+ %"14" = load i32, ptr addrspace(1) %"30", align 4
store i32 %"14", ptr addrspace(5) %"7", align 4
%"17" = load i32, ptr addrspace(5) %"6", align 4
%"18" = load i32, ptr addrspace(5) %"7", align 4
- %0 = sext i32 %"17" to i64
- %1 = sext i32 %"18" to i64
- %"16" = mul nsw i64 %0, %1
+ %2 = sext i32 %"17" to i64
+ %3 = sext i32 %"18" to i64
+ %"16" = mul nsw i64 %2, %3
store i64 %"16", ptr addrspace(5) %"8", align 8
%"19" = load i64, ptr addrspace(5) %"5", align 8
%"20" = load i64, ptr addrspace(5) %"8", align 8
diff --git a/ptx/src/test/spirv_run/multireg.ll b/ptx/src/test/spirv_run/multireg.ll
index 657d61f..3eb31cb 100644
--- a/ptx/src/test/spirv_run/multireg.ll
+++ b/ptx/src/test/spirv_run/multireg.ll
@@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @multireg(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #0 {
-"22":
%"8" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"8", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i64, align 8, addrspace(5)
%"7" = alloca i64, align 8, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"8", align 1
%"9" = load i64, ptr addrspace(4) %"18", align 8
store i64 %"9", ptr addrspace(5) %"4", align 8
%"10" = load i64, ptr addrspace(4) %"19", align 8
diff --git a/ptx/src/test/spirv_run/neg.ll b/ptx/src/test/spirv_run/neg.ll
index 1e94ed1..056b0a1 100644
--- a/ptx/src/test/spirv_run/neg.ll
+++ b/ptx/src/test/spirv_run/neg.ll
@@ -2,12 +2,14 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @neg(ptr addrspace(4) byref(i64) %"16", ptr addrspace(4) byref(i64) %"17") #0 {
-"20":
%"7" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"7", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i32, align 4, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"7", align 1
%"8" = load i64, ptr addrspace(4) %"16", align 8
store i64 %"8", ptr addrspace(5) %"4", align 8
%"9" = load i64, ptr addrspace(4) %"17", align 8
diff --git a/ptx/src/test/spirv_run/non_scalar_ptr_offset.ll b/ptx/src/test/spirv_run/non_scalar_ptr_offset.ll
index 69ea8d2..d0c71eb 100644
--- a/ptx/src/test/spirv_run/non_scalar_ptr_offset.ll
+++ b/ptx/src/test/spirv_run/non_scalar_ptr_offset.ll
@@ -2,21 +2,23 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @non_scalar_ptr_offset(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #0 {
-"26":
%"9" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"9", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i32, align 4, addrspace(5)
%"7" = alloca i32, align 4, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"9", align 1
%"10" = load i64, ptr addrspace(4) %"22", align 8
store i64 %"10", ptr addrspace(5) %"4", align 8
%"11" = load i64, ptr addrspace(4) %"23", align 8
store i64 %"11", ptr addrspace(5) %"5", align 8
%"12" = load i64, ptr addrspace(5) %"4", align 8
%"24" = inttoptr i64 %"12" to ptr addrspace(1)
- %"28" = getelementptr inbounds i8, ptr addrspace(1) %"24", i64 8
- %"8" = load <2 x i32>, ptr addrspace(1) %"28", align 8
+ %"27" = getelementptr inbounds i8, ptr addrspace(1) %"24", i64 8
+ %"8" = load <2 x i32>, ptr addrspace(1) %"27", align 8
%"13" = extractelement <2 x i32> %"8", i32 0
%"14" = extractelement <2 x i32> %"8", i32 1
store i32 %"13", ptr addrspace(5) %"6", align 4
diff --git a/ptx/src/test/spirv_run/not.ll b/ptx/src/test/spirv_run/not.ll
index 5e86545..7c9a557 100644
--- a/ptx/src/test/spirv_run/not.ll
+++ b/ptx/src/test/spirv_run/not.ll
@@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @not(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 {
-"23":
%"8" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"8", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i64, align 8, addrspace(5)
%"7" = alloca i64, align 8, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"8", align 1
%"9" = load i64, ptr addrspace(4) %"17", align 8
store i64 %"9", ptr addrspace(5) %"4", align 8
%"10" = load i64, ptr addrspace(4) %"18", align 8
diff --git a/ptx/src/test/spirv_run/ntid.ll b/ptx/src/test/spirv_run/ntid.ll
index 53216ce..29fccca 100644
--- a/ptx/src/test/spirv_run/ntid.ll
+++ b/ptx/src/test/spirv_run/ntid.ll
@@ -4,13 +4,16 @@ target triple = "amdgcn-amd-amdhsa"
declare i32 @__zluda_ptx_impl__sreg_ntid(i8) #0
define protected amdgpu_kernel void @ntid(ptr addrspace(4) byref(i64) %"25", ptr addrspace(4) byref(i64) %"26") #1 {
-"29":
%"9" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"9", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i32, align 4, addrspace(5)
%"7" = alloca i32, align 4, addrspace(5)
+ %1 = alloca i32, align 4, addrspace(5)
+ br label %2
+
+2: ; preds = %0
+ store i1 false, ptr addrspace(5) %"9", align 1
%"15" = load i64, ptr addrspace(4) %"25", align 8
store i64 %"15", ptr addrspace(5) %"4", align 8
%"16" = load i64, ptr addrspace(4) %"26", align 8
@@ -20,9 +23,8 @@ define protected amdgpu_kernel void @ntid(ptr addrspace(4) byref(i64) %"25", ptr
%"17" = load i32, ptr %"27", align 4
store i32 %"17", ptr addrspace(5) %"6", align 4
%"11" = call i32 @__zluda_ptx_impl__sreg_ntid(i8 0)
- %0 = alloca i32, align 4, addrspace(5)
- store i32 %"11", ptr addrspace(5) %0, align 4
- %"19" = load i32, ptr addrspace(5) %0, align 4
+ store i32 %"11", ptr addrspace(5) %1, align 4
+ %"19" = load i32, ptr addrspace(5) %1, align 4
store i32 %"19", ptr addrspace(5) %"7", align 4
%"21" = load i32, ptr addrspace(5) %"6", align 4
%"22" = load i32, ptr addrspace(5) %"7", align 4
diff --git a/ptx/src/test/spirv_run/or.ll b/ptx/src/test/spirv_run/or.ll
index 7b4bd7f..f929205 100644
--- a/ptx/src/test/spirv_run/or.ll
+++ b/ptx/src/test/spirv_run/or.ll
@@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @or(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #0 {
-"30":
%"8" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"8", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i64, align 8, addrspace(5)
%"7" = alloca i64, align 8, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"8", align 1
%"9" = load i64, ptr addrspace(4) %"22", align 8
store i64 %"9", ptr addrspace(5) %"4", align 8
%"10" = load i64, ptr addrspace(4) %"23", align 8
@@ -19,8 +21,8 @@ define protected amdgpu_kernel void @or(ptr addrspace(4) byref(i64) %"22", ptr a
store i64 %"11", ptr addrspace(5) %"6", align 8
%"14" = load i64, ptr addrspace(5) %"4", align 8
%"25" = inttoptr i64 %"14" to ptr
- %"32" = getelementptr inbounds i8, ptr %"25", i64 8
- %"13" = load i64, ptr %"32", align 8
+ %"31" = getelementptr inbounds i8, ptr %"25", i64 8
+ %"13" = load i64, ptr %"31", align 8
store i64 %"13", ptr addrspace(5) %"7", align 8
%"16" = load i64, ptr addrspace(5) %"6", align 8
%"17" = load i64, ptr addrspace(5) %"7", align 8
diff --git a/ptx/src/test/spirv_run/param_ptr.ll b/ptx/src/test/spirv_run/param_ptr.ll
index cea098c..75451de 100644
--- a/ptx/src/test/spirv_run/param_ptr.ll
+++ b/ptx/src/test/spirv_run/param_ptr.ll
@@ -2,18 +2,20 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @param_ptr(ptr addrspace(4) byref(i64) %"21", ptr addrspace(4) byref(i64) %"22") #0 {
-"28":
%"9" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"9", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i64, align 8, addrspace(5)
%"7" = alloca i64, align 8, addrspace(5)
%"8" = alloca i64, align 8, addrspace(5)
+ %1 = alloca i64, align 8, addrspace(5)
+ br label %2
+
+2: ; preds = %0
+ store i1 false, ptr addrspace(5) %"9", align 1
%"24" = ptrtoint ptr addrspace(4) %"21" to i64
- %0 = alloca i64, align 8, addrspace(5)
- store i64 %"24", ptr addrspace(5) %0, align 8
- %"23" = load i64, ptr addrspace(5) %0, align 8
+ store i64 %"24", ptr addrspace(5) %1, align 8
+ %"23" = load i64, ptr addrspace(5) %1, align 8
store i64 %"23", ptr addrspace(5) %"4", align 8
%"12" = load i64, ptr addrspace(5) %"4", align 8
%"25" = inttoptr i64 %"12" to ptr addrspace(4)
diff --git a/ptx/src/test/spirv_run/popc.ll b/ptx/src/test/spirv_run/popc.ll
index be9c625..15befc4 100644
--- a/ptx/src/test/spirv_run/popc.ll
+++ b/ptx/src/test/spirv_run/popc.ll
@@ -2,12 +2,14 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @popc(ptr addrspace(4) byref(i64) %"16", ptr addrspace(4) byref(i64) %"17") #0 {
-"20":
%"7" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"7", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i32, align 4, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"7", align 1
%"8" = load i64, ptr addrspace(4) %"16", align 8
store i64 %"8", ptr addrspace(5) %"4", align 8
%"9" = load i64, ptr addrspace(4) %"17", align 8
diff --git a/ptx/src/test/spirv_run/pred_not.ll b/ptx/src/test/spirv_run/pred_not.ll
index 69f7646..8315512 100644
--- a/ptx/src/test/spirv_run/pred_not.ll
+++ b/ptx/src/test/spirv_run/pred_not.ll
@@ -2,15 +2,19 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @pred_not(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #0 {
-"41":
%"14" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"14", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i64, align 8, addrspace(5)
%"7" = alloca i64, align 8, addrspace(5)
%"8" = alloca i64, align 8, addrspace(5)
%"9" = alloca i1, align 1, addrspace(5)
+ %1 = alloca i64, align 8, addrspace(5)
+ %2 = alloca i64, align 8, addrspace(5)
+ br label %3
+
+3: ; preds = %0
+ store i1 false, ptr addrspace(5) %"14", align 1
%"15" = load i64, ptr addrspace(4) %"36", align 8
store i64 %"15", ptr addrspace(5) %"4", align 8
%"16" = load i64, ptr addrspace(4) %"37", align 8
@@ -21,8 +25,8 @@ define protected amdgpu_kernel void @pred_not(ptr addrspace(4) byref(i64) %"36",
store i64 %"17", ptr addrspace(5) %"6", align 8
%"20" = load i64, ptr addrspace(5) %"4", align 8
%"39" = inttoptr i64 %"20" to ptr
- %"43" = getelementptr inbounds i8, ptr %"39", i64 8
- %"19" = load i64, ptr %"43", align 8
+ %"42" = getelementptr inbounds i8, ptr %"39", i64 8
+ %"19" = load i64, ptr %"42", align 8
store i64 %"19", ptr addrspace(5) %"7", align 8
%"22" = load i64, ptr addrspace(5) %"6", align 8
%"23" = load i64, ptr addrspace(5) %"7", align 8
@@ -34,21 +38,19 @@ define protected amdgpu_kernel void @pred_not(ptr addrspace(4) byref(i64) %"36",
%"26" = load i1, ptr addrspace(5) %"9", align 1
br i1 %"26", label %"10", label %"11"
-"10": ; preds = %"41"
- %0 = alloca i64, align 8, addrspace(5)
- store i64 1, ptr addrspace(5) %0, align 8
- %"27" = load i64, ptr addrspace(5) %0, align 8
+"10": ; preds = %3
+ store i64 1, ptr addrspace(5) %1, align 8
+ %"27" = load i64, ptr addrspace(5) %1, align 8
store i64 %"27", ptr addrspace(5) %"8", align 8
br label %"11"
-"11": ; preds = %"10", %"41"
+"11": ; preds = %"10", %3
%"28" = load i1, ptr addrspace(5) %"9", align 1
br i1 %"28", label %"13", label %"12"
"12": ; preds = %"11"
- %1 = alloca i64, align 8, addrspace(5)
- store i64 2, ptr addrspace(5) %1, align 8
- %"29" = load i64, ptr addrspace(5) %1, align 8
+ store i64 2, ptr addrspace(5) %2, align 8
+ %"29" = load i64, ptr addrspace(5) %2, align 8
store i64 %"29", ptr addrspace(5) %"8", align 8
br label %"13"
diff --git a/ptx/src/test/spirv_run/prmt.ll b/ptx/src/test/spirv_run/prmt.ll
index bdcb12d..76efedc 100644
--- a/ptx/src/test/spirv_run/prmt.ll
+++ b/ptx/src/test/spirv_run/prmt.ll
@@ -2,15 +2,17 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @prmt(ptr addrspace(4) byref(i64) %"31", ptr addrspace(4) byref(i64) %"32") #0 {
-"43":
%"10" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"10", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i32, align 4, addrspace(5)
%"7" = alloca i32, align 4, addrspace(5)
%"8" = alloca i32, align 4, addrspace(5)
%"9" = alloca i32, align 4, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"10", align 1
%"11" = load i64, ptr addrspace(4) %"31", align 8
store i64 %"11", ptr addrspace(5) %"4", align 8
%"12" = load i64, ptr addrspace(4) %"32", align 8
@@ -21,28 +23,28 @@ define protected amdgpu_kernel void @prmt(ptr addrspace(4) byref(i64) %"31", ptr
store i32 %"13", ptr addrspace(5) %"6", align 4
%"16" = load i64, ptr addrspace(5) %"4", align 8
%"34" = inttoptr i64 %"16" to ptr
- %"45" = getelementptr inbounds i8, ptr %"34", i64 4
- %"15" = load i32, ptr %"45", align 4
+ %"44" = getelementptr inbounds i8, ptr %"34", i64 4
+ %"15" = load i32, ptr %"44", align 4
store i32 %"15", ptr addrspace(5) %"7", align 4
%"18" = load i32, ptr addrspace(5) %"6", align 4
%"19" = load i32, ptr addrspace(5) %"7", align 4
- %0 = bitcast i32 %"18" to <4 x i8>
- %1 = bitcast i32 %"19" to <4 x i8>
- %2 = shufflevector <4 x i8> %0, <4 x i8> %1, <4 x i32> <i32 4, i32 0, i32 6, i32 7>
- %"35" = bitcast <4 x i8> %2 to i32
+ %2 = bitcast i32 %"18" to <4 x i8>
+ %3 = bitcast i32 %"19" to <4 x i8>
+ %4 = shufflevector <4 x i8> %2, <4 x i8> %3, <4 x i32> <i32 4, i32 0, i32 6, i32 7>
+ %"35" = bitcast <4 x i8> %4 to i32
store i32 %"35", ptr addrspace(5) %"8", align 4
%"21" = load i32, ptr addrspace(5) %"6", align 4
%"22" = load i32, ptr addrspace(5) %"7", align 4
- %3 = bitcast i32 %"21" to <4 x i8>
- %4 = bitcast i32 %"22" to <4 x i8>
- %5 = shufflevector <4 x i8> %3, <4 x i8> %4, <4 x i32> <i32 4, i32 0, i32 6, i32 7>
- %6 = extractelement <4 x i8> %5, i32 0
- %7 = ashr i8 %6, 7
- %8 = insertelement <4 x i8> %5, i8 %7, i32 0
- %9 = extractelement <4 x i8> %8, i32 2
- %10 = ashr i8 %9, 7
- %11 = insertelement <4 x i8> %8, i8 %10, i32 2
- %"38" = bitcast <4 x i8> %11 to i32
+ %5 = bitcast i32 %"21" to <4 x i8>
+ %6 = bitcast i32 %"22" to <4 x i8>
+ %7 = shufflevector <4 x i8> %5, <4 x i8> %6, <4 x i32> <i32 4, i32 0, i32 6, i32 7>
+ %8 = extractelement <4 x i8> %7, i32 0
+ %9 = ashr i8 %8, 7
+ %10 = insertelement <4 x i8> %7, i8 %9, i32 0
+ %11 = extractelement <4 x i8> %10, i32 2
+ %12 = ashr i8 %11, 7
+ %13 = insertelement <4 x i8> %10, i8 %12, i32 2
+ %"38" = bitcast <4 x i8> %13 to i32
store i32 %"38", ptr addrspace(5) %"9", align 4
%"23" = load i64, ptr addrspace(5) %"5", align 8
%"24" = load i32, ptr addrspace(5) %"8", align 4
@@ -51,8 +53,8 @@ define protected amdgpu_kernel void @prmt(ptr addrspace(4) byref(i64) %"31", ptr
%"25" = load i64, ptr addrspace(5) %"5", align 8
%"26" = load i32, ptr addrspace(5) %"9", align 4
%"42" = inttoptr i64 %"25" to ptr
- %"47" = getelementptr inbounds i8, ptr %"42", i64 4
- store i32 %"26", ptr %"47", align 4
+ %"46" = getelementptr inbounds i8, ptr %"42", i64 4
+ store i32 %"26", ptr %"46", align 4
ret void
}
diff --git a/ptx/src/test/spirv_run/prmt_non_immediate.ll b/ptx/src/test/spirv_run/prmt_non_immediate.ll
index d503917..104c56d 100644
--- a/ptx/src/test/spirv_run/prmt_non_immediate.ll
+++ b/ptx/src/test/spirv_run/prmt_non_immediate.ll
@@ -2,14 +2,17 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @prmt_non_immediate(ptr addrspace(4) byref(i64) %"25", ptr addrspace(4) byref(i64) %"26") #0 {
-"33":
%"9" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"9", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i32, align 4, addrspace(5)
%"7" = alloca i32, align 4, addrspace(5)
%"8" = alloca i32, align 4, addrspace(5)
+ %1 = alloca i32, align 4, addrspace(5)
+ br label %2
+
+2: ; preds = %0
+ store i1 false, ptr addrspace(5) %"9", align 1
%"10" = load i64, ptr addrspace(4) %"25", align 8
store i64 %"10", ptr addrspace(5) %"4", align 8
%"11" = load i64, ptr addrspace(4) %"26", align 8
@@ -20,19 +23,18 @@ define protected amdgpu_kernel void @prmt_non_immediate(ptr addrspace(4) byref(i
store i32 %"12", ptr addrspace(5) %"6", align 4
%"15" = load i64, ptr addrspace(5) %"4", align 8
%"28" = inttoptr i64 %"15" to ptr
- %"35" = getelementptr inbounds i8, ptr %"28", i64 4
- %"14" = load i32, ptr %"35", align 4
+ %"34" = getelementptr inbounds i8, ptr %"28", i64 4
+ %"14" = load i32, ptr %"34", align 4
store i32 %"14", ptr addrspace(5) %"7", align 4
- %0 = alloca i32, align 4, addrspace(5)
- store i32 64, ptr addrspace(5) %0, align 4
- %"16" = load i32, ptr addrspace(5) %0, align 4
+ store i32 64, ptr addrspace(5) %1, align 4
+ %"16" = load i32, ptr addrspace(5) %1, align 4
store i32 %"16", ptr addrspace(5) %"8", align 4
%"18" = load i32, ptr addrspace(5) %"6", align 4
%"19" = load i32, ptr addrspace(5) %"7", align 4
- %1 = bitcast i32 %"18" to <4 x i8>
- %2 = bitcast i32 %"19" to <4 x i8>
- %3 = shufflevector <4 x i8> %1, <4 x i8> %2, <4 x i32> <i32 0, i32 4, i32 0, i32 0>
- %"29" = bitcast <4 x i8> %3 to i32
+ %3 = bitcast i32 %"18" to <4 x i8>
+ %4 = bitcast i32 %"19" to <4 x i8>
+ %5 = shufflevector <4 x i8> %3, <4 x i8> %4, <4 x i32> <i32 0, i32 4, i32 0, i32 0>
+ %"29" = bitcast <4 x i8> %5 to i32
store i32 %"29", ptr addrspace(5) %"7", align 4
%"20" = load i64, ptr addrspace(5) %"5", align 8
%"21" = load i32, ptr addrspace(5) %"7", align 4
diff --git a/ptx/src/test/spirv_run/rcp.ll b/ptx/src/test/spirv_run/rcp.ll
index 116687b..dc03416 100644
--- a/ptx/src/test/spirv_run/rcp.ll
+++ b/ptx/src/test/spirv_run/rcp.ll
@@ -2,12 +2,14 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @rcp(ptr addrspace(4) byref(i64) %"16", ptr addrspace(4) byref(i64) %"17") #0 {
-"20":
%"7" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"7", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca float, align 4, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"7", align 1
%"8" = load i64, ptr addrspace(4) %"16", align 8
store i64 %"8", ptr addrspace(5) %"4", align 8
%"9" = load i64, ptr addrspace(4) %"17", align 8
diff --git a/ptx/src/test/spirv_run/reg_local.ll b/ptx/src/test/spirv_run/reg_local.ll
index 48c881d..52bb3d1 100644
--- a/ptx/src/test/spirv_run/reg_local.ll
+++ b/ptx/src/test/spirv_run/reg_local.ll
@@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @reg_local(ptr addrspace(4) byref(i64) %"23", ptr addrspace(4) byref(i64) %"24") #0 {
-"33":
%"8" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"8", align 1
%"4" = alloca [8 x i8], align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i64, align 8, addrspace(5)
%"7" = alloca i64, align 8, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"8", align 1
%"9" = load i64, ptr addrspace(4) %"23", align 8
store i64 %"9", ptr addrspace(5) %"5", align 8
%"10" = load i64, ptr addrspace(4) %"24", align 8
@@ -22,14 +24,14 @@ define protected amdgpu_kernel void @reg_local(ptr addrspace(4) byref(i64) %"23"
%"27" = addrspacecast ptr addrspace(5) %"4" to ptr
store i64 %"18", ptr %"27", align 8
%"29" = addrspacecast ptr addrspace(5) %"4" to ptr
- %"37" = getelementptr inbounds i8, ptr %"29", i64 0
- %"30" = load i64, ptr %"37", align 8
+ %"36" = getelementptr inbounds i8, ptr %"29", i64 0
+ %"30" = load i64, ptr %"36", align 8
store i64 %"30", ptr addrspace(5) %"7", align 8
%"15" = load i64, ptr addrspace(5) %"6", align 8
%"16" = load i64, ptr addrspace(5) %"7", align 8
%"31" = inttoptr i64 %"15" to ptr addrspace(1)
- %"39" = getelementptr inbounds i8, ptr addrspace(1) %"31", i64 0
- store i64 %"16", ptr addrspace(1) %"39", align 8
+ %"38" = getelementptr inbounds i8, ptr addrspace(1) %"31", i64 0
+ store i64 %"16", ptr addrspace(1) %"38", align 8
ret void
}
diff --git a/ptx/src/test/spirv_run/rem.ll b/ptx/src/test/spirv_run/rem.ll
index 4535f49..0fb9cd8 100644
--- a/ptx/src/test/spirv_run/rem.ll
+++ b/ptx/src/test/spirv_run/rem.ll
@@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @rem(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #0 {
-"27":
%"8" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"8", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i32, align 4, addrspace(5)
%"7" = alloca i32, align 4, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"8", align 1
%"9" = load i64, ptr addrspace(4) %"22", align 8
store i64 %"9", ptr addrspace(5) %"4", align 8
%"10" = load i64, ptr addrspace(4) %"23", align 8
@@ -19,8 +21,8 @@ define protected amdgpu_kernel void @rem(ptr addrspace(4) byref(i64) %"22", ptr
store i32 %"11", ptr addrspace(5) %"6", align 4
%"14" = load i64, ptr addrspace(5) %"4", align 8
%"25" = inttoptr i64 %"14" to ptr
- %"29" = getelementptr inbounds i8, ptr %"25", i64 4
- %"13" = load i32, ptr %"29", align 4
+ %"28" = getelementptr inbounds i8, ptr %"25", i64 4
+ %"13" = load i32, ptr %"28", align 4
store i32 %"13", ptr addrspace(5) %"7", align 4
%"16" = load i32, ptr addrspace(5) %"6", align 4
%"17" = load i32, ptr addrspace(5) %"7", align 4
diff --git a/ptx/src/test/spirv_run/rsqrt.ll b/ptx/src/test/spirv_run/rsqrt.ll
index 7797260..40833ac 100644
--- a/ptx/src/test/spirv_run/rsqrt.ll
+++ b/ptx/src/test/spirv_run/rsqrt.ll
@@ -2,12 +2,14 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @rsqrt(ptr addrspace(4) byref(i64) %"16", ptr addrspace(4) byref(i64) %"17") #0 {
-"20":
%"7" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"7", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca double, align 8, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"7", align 1
%"8" = load i64, ptr addrspace(4) %"16", align 8
store i64 %"8", ptr addrspace(5) %"4", align 8
%"9" = load i64, ptr addrspace(4) %"17", align 8
@@ -17,8 +19,8 @@ define protected amdgpu_kernel void @rsqrt(ptr addrspace(4) byref(i64) %"16", pt
%"10" = load double, ptr %"18", align 8
store double %"10", ptr addrspace(5) %"6", align 8
%"13" = load double, ptr addrspace(5) %"6", align 8
- %0 = call afn double @llvm.sqrt.f64(double %"13")
- %"12" = fdiv arcp afn double 1.000000e+00, %0
+ %2 = call afn double @llvm.sqrt.f64(double %"13")
+ %"12" = fdiv arcp afn double 1.000000e+00, %2
store double %"12", ptr addrspace(5) %"6", align 8
%"14" = load i64, ptr addrspace(5) %"5", align 8
%"15" = load double, ptr addrspace(5) %"6", align 8
diff --git a/ptx/src/test/spirv_run/s64_min.ll b/ptx/src/test/spirv_run/s64_min.ll
index 98eee04..a96f0a4 100644
--- a/ptx/src/test/spirv_run/s64_min.ll
+++ b/ptx/src/test/spirv_run/s64_min.ll
@@ -2,16 +2,18 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @s64_min(ptr addrspace(4) byref(i64) %"12", ptr addrspace(4) byref(i64) %"13") #0 {
-"15":
%"6" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"6", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
+ %1 = alloca i64, align 8, addrspace(5)
+ br label %2
+
+2: ; preds = %0
+ store i1 false, ptr addrspace(5) %"6", align 1
%"7" = load i64, ptr addrspace(4) %"13", align 8
store i64 %"7", ptr addrspace(5) %"4", align 8
- %0 = alloca i64, align 8, addrspace(5)
- store i64 -9223372036854775808, ptr addrspace(5) %0, align 8
- %"8" = load i64, ptr addrspace(5) %0, align 8
+ store i64 -9223372036854775808, ptr addrspace(5) %1, align 8
+ %"8" = load i64, ptr addrspace(5) %1, align 8
store i64 %"8", ptr addrspace(5) %"5", align 8
%"9" = load i64, ptr addrspace(5) %"4", align 8
%"10" = load i64, ptr addrspace(5) %"5", align 8
diff --git a/ptx/src/test/spirv_run/sad.ll b/ptx/src/test/spirv_run/sad.ll
index c7a5726..aa65fce 100644
--- a/ptx/src/test/spirv_run/sad.ll
+++ b/ptx/src/test/spirv_run/sad.ll
@@ -2,9 +2,7 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @sad(ptr addrspace(4) byref(i64) %"38", ptr addrspace(4) byref(i64) %"39") #0 {
-"56":
%"11" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"11", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i32, align 4, addrspace(5)
@@ -12,6 +10,10 @@ define protected amdgpu_kernel void @sad(ptr addrspace(4) byref(i64) %"38", ptr
%"8" = alloca i32, align 4, addrspace(5)
%"9" = alloca i32, align 4, addrspace(5)
%"10" = alloca i32, align 4, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"11", align 1
%"12" = load i64, ptr addrspace(4) %"38", align 8
store i64 %"12", ptr addrspace(5) %"4", align 8
%"13" = load i64, ptr addrspace(4) %"39", align 8
@@ -22,31 +24,31 @@ define protected amdgpu_kernel void @sad(ptr addrspace(4) byref(i64) %"38", ptr
store i32 %"40", ptr addrspace(5) %"6", align 4
%"17" = load i64, ptr addrspace(5) %"4", align 8
%"42" = inttoptr i64 %"17" to ptr
- %"58" = getelementptr inbounds i8, ptr %"42", i64 4
- %"43" = load i32, ptr %"58", align 4
+ %"57" = getelementptr inbounds i8, ptr %"42", i64 4
+ %"43" = load i32, ptr %"57", align 4
store i32 %"43", ptr addrspace(5) %"7", align 4
%"19" = load i64, ptr addrspace(5) %"4", align 8
%"44" = inttoptr i64 %"19" to ptr
- %"60" = getelementptr inbounds i8, ptr %"44", i64 8
- %"45" = load i32, ptr %"60", align 4
+ %"59" = getelementptr inbounds i8, ptr %"44", i64 8
+ %"45" = load i32, ptr %"59", align 4
store i32 %"45", ptr addrspace(5) %"8", align 4
%"21" = load i32, ptr addrspace(5) %"6", align 4
%"22" = load i32, ptr addrspace(5) %"7", align 4
%"23" = load i32, ptr addrspace(5) %"8", align 4
- %0 = icmp ugt i32 %"21", %"22"
- %1 = sub i32 %"21", %"22"
- %2 = sub i32 %"22", %"21"
- %3 = select i1 %0, i32 %1, i32 %2
- %"46" = add i32 %"23", %3
+ %2 = icmp ugt i32 %"21", %"22"
+ %3 = sub i32 %"21", %"22"
+ %4 = sub i32 %"22", %"21"
+ %5 = select i1 %2, i32 %3, i32 %4
+ %"46" = add i32 %"23", %5
store i32 %"46", ptr addrspace(5) %"9", align 4
%"25" = load i32, ptr addrspace(5) %"6", align 4
%"26" = load i32, ptr addrspace(5) %"7", align 4
%"27" = load i32, ptr addrspace(5) %"8", align 4
- %4 = icmp sgt i32 %"25", %"26"
- %5 = sub i32 %"25", %"26"
- %6 = sub i32 %"26", %"25"
- %7 = select i1 %4, i32 %5, i32 %6
- %"50" = add i32 %"27", %7
+ %6 = icmp sgt i32 %"25", %"26"
+ %7 = sub i32 %"25", %"26"
+ %8 = sub i32 %"26", %"25"
+ %9 = select i1 %6, i32 %7, i32 %8
+ %"50" = add i32 %"27", %9
store i32 %"50", ptr addrspace(5) %"10", align 4
%"28" = load i64, ptr addrspace(5) %"5", align 8
%"29" = load i32, ptr addrspace(5) %"9", align 4
@@ -55,8 +57,8 @@ define protected amdgpu_kernel void @sad(ptr addrspace(4) byref(i64) %"38", ptr
%"30" = load i64, ptr addrspace(5) %"5", align 8
%"31" = load i32, ptr addrspace(5) %"10", align 4
%"55" = inttoptr i64 %"30" to ptr
- %"62" = getelementptr inbounds i8, ptr %"55", i64 4
- store i32 %"31", ptr %"62", align 4
+ %"61" = getelementptr inbounds i8, ptr %"55", i64 4
+ store i32 %"31", ptr %"61", align 4
ret void
}
diff --git a/ptx/src/test/spirv_run/selp.ll b/ptx/src/test/spirv_run/selp.ll
index 073ec38..0e20d6d 100644
--- a/ptx/src/test/spirv_run/selp.ll
+++ b/ptx/src/test/spirv_run/selp.ll
@@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @selp(ptr addrspace(4) byref(i64) %"23", ptr addrspace(4) byref(i64) %"24") #0 {
-"28":
%"8" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"8", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i16, align 2, addrspace(5)
%"7" = alloca i16, align 2, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"8", align 1
%"9" = load i64, ptr addrspace(4) %"23", align 8
store i64 %"9", ptr addrspace(5) %"4", align 8
%"10" = load i64, ptr addrspace(4) %"24", align 8
@@ -19,8 +21,8 @@ define protected amdgpu_kernel void @selp(ptr addrspace(4) byref(i64) %"23", ptr
store i16 %"11", ptr addrspace(5) %"6", align 2
%"14" = load i64, ptr addrspace(5) %"4", align 8
%"26" = inttoptr i64 %"14" to ptr
- %"30" = getelementptr inbounds i8, ptr %"26", i64 2
- %"13" = load i16, ptr %"30", align 2
+ %"29" = getelementptr inbounds i8, ptr %"26", i64 2
+ %"13" = load i16, ptr %"29", align 2
store i16 %"13", ptr addrspace(5) %"7", align 2
%"16" = load i16, ptr addrspace(5) %"6", align 2
%"17" = load i16, ptr addrspace(5) %"7", align 2
diff --git a/ptx/src/test/spirv_run/selp_true.ll b/ptx/src/test/spirv_run/selp_true.ll
index 4eda981..9b6b41a 100644
--- a/ptx/src/test/spirv_run/selp_true.ll
+++ b/ptx/src/test/spirv_run/selp_true.ll
@@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @selp_true(ptr addrspace(4) byref(i64) %"23", ptr addrspace(4) byref(i64) %"24") #0 {
-"28":
%"8" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"8", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i16, align 2, addrspace(5)
%"7" = alloca i16, align 2, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"8", align 1
%"9" = load i64, ptr addrspace(4) %"23", align 8
store i64 %"9", ptr addrspace(5) %"4", align 8
%"10" = load i64, ptr addrspace(4) %"24", align 8
@@ -19,8 +21,8 @@ define protected amdgpu_kernel void @selp_true(ptr addrspace(4) byref(i64) %"23"
store i16 %"11", ptr addrspace(5) %"6", align 2
%"14" = load i64, ptr addrspace(5) %"4", align 8
%"26" = inttoptr i64 %"14" to ptr
- %"30" = getelementptr inbounds i8, ptr %"26", i64 2
- %"13" = load i16, ptr %"30", align 2
+ %"29" = getelementptr inbounds i8, ptr %"26", i64 2
+ %"13" = load i16, ptr %"29", align 2
store i16 %"13", ptr addrspace(5) %"7", align 2
%"16" = load i16, ptr addrspace(5) %"6", align 2
%"17" = load i16, ptr addrspace(5) %"7", align 2
diff --git a/ptx/src/test/spirv_run/set_f16x2.ll b/ptx/src/test/spirv_run/set_f16x2.ll
index 2a8caf3..d6bf7e0 100644
--- a/ptx/src/test/spirv_run/set_f16x2.ll
+++ b/ptx/src/test/spirv_run/set_f16x2.ll
@@ -2,9 +2,7 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @set_f16x2(ptr addrspace(4) byref(i64) %"40", ptr addrspace(4) byref(i64) %"41") #0 {
-"58":
%"11" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"11", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i32, align 4, addrspace(5)
@@ -12,6 +10,10 @@ define protected amdgpu_kernel void @set_f16x2(ptr addrspace(4) byref(i64) %"40"
%"8" = alloca i32, align 4, addrspace(5)
%"9" = alloca i32, align 4, addrspace(5)
%"10" = alloca <2 x half>, align 4, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"11", align 1
%"12" = load i64, ptr addrspace(4) %"40", align 8
store i64 %"12", ptr addrspace(5) %"4", align 8
%"13" = load i64, ptr addrspace(4) %"41", align 8
@@ -22,33 +24,33 @@ define protected amdgpu_kernel void @set_f16x2(ptr addrspace(4) byref(i64) %"40"
store i32 %"42", ptr addrspace(5) %"6", align 4
%"17" = load i64, ptr addrspace(5) %"4", align 8
%"44" = inttoptr i64 %"17" to ptr
- %"60" = getelementptr inbounds i8, ptr %"44", i64 4
- %"45" = load i32, ptr %"60", align 4
+ %"59" = getelementptr inbounds i8, ptr %"44", i64 4
+ %"45" = load i32, ptr %"59", align 4
store i32 %"45", ptr addrspace(5) %"7", align 4
%"19" = load i64, ptr addrspace(5) %"4", align 8
%"46" = inttoptr i64 %"19" to ptr
- %"62" = getelementptr inbounds i8, ptr %"46", i64 8
- %"47" = load i32, ptr %"62", align 4
+ %"61" = getelementptr inbounds i8, ptr %"46", i64 8
+ %"47" = load i32, ptr %"61", align 4
store i32 %"47", ptr addrspace(5) %"8", align 4
%"21" = load i64, ptr addrspace(5) %"4", align 8
%"48" = inttoptr i64 %"21" to ptr
- %"64" = getelementptr inbounds i8, ptr %"48", i64 12
- %"49" = load i32, ptr %"64", align 4
+ %"63" = getelementptr inbounds i8, ptr %"48", i64 12
+ %"49" = load i32, ptr %"63", align 4
store i32 %"49", ptr addrspace(5) %"9", align 4
%"23" = load i32, ptr addrspace(5) %"6", align 4
%"24" = load i32, ptr addrspace(5) %"7", align 4
%"51" = bitcast i32 %"23" to <2 x half>
%"52" = bitcast i32 %"24" to <2 x half>
- %0 = fcmp ugt <2 x half> %"51", %"52"
- %1 = sext <2 x i1> %0 to <2 x i16>
- %"50" = bitcast <2 x i16> %1 to i32
+ %2 = fcmp ugt <2 x half> %"51", %"52"
+ %3 = sext <2 x i1> %2 to <2 x i16>
+ %"50" = bitcast <2 x i16> %3 to i32
store i32 %"50", ptr addrspace(5) %"6", align 4
%"26" = load i32, ptr addrspace(5) %"8", align 4
%"27" = load i32, ptr addrspace(5) %"9", align 4
%"54" = bitcast i32 %"26" to <2 x half>
%"55" = bitcast i32 %"27" to <2 x half>
- %2 = fcmp oeq <2 x half> %"54", %"55"
- %"53" = uitofp <2 x i1> %2 to <2 x half>
+ %4 = fcmp oeq <2 x half> %"54", %"55"
+ %"53" = uitofp <2 x i1> %4 to <2 x half>
%"25" = bitcast <2 x half> %"53" to i32
store i32 %"25", ptr addrspace(5) %"8", align 4
%"28" = load i64, ptr addrspace(5) %"5", align 8
@@ -58,8 +60,8 @@ define protected amdgpu_kernel void @set_f16x2(ptr addrspace(4) byref(i64) %"40"
%"30" = load i64, ptr addrspace(5) %"5", align 8
%"31" = load i32, ptr addrspace(5) %"8", align 4
%"57" = inttoptr i64 %"30" to ptr
- %"66" = getelementptr inbounds i8, ptr %"57", i64 4
- store i32 %"31", ptr %"66", align 4
+ %"65" = getelementptr inbounds i8, ptr %"57", i64 4
+ store i32 %"31", ptr %"65", align 4
ret void
}
diff --git a/ptx/src/test/spirv_run/setp.ll b/ptx/src/test/spirv_run/setp.ll
index 2f95556..1e9e1e5 100644
--- a/ptx/src/test/spirv_run/setp.ll
+++ b/ptx/src/test/spirv_run/setp.ll
@@ -2,15 +2,19 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @setp(ptr addrspace(4) byref(i64) %"34", ptr addrspace(4) byref(i64) %"35") #0 {
-"39":
%"14" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"14", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i64, align 8, addrspace(5)
%"7" = alloca i64, align 8, addrspace(5)
%"8" = alloca i64, align 8, addrspace(5)
%"9" = alloca i1, align 1, addrspace(5)
+ %1 = alloca i64, align 8, addrspace(5)
+ %2 = alloca i64, align 8, addrspace(5)
+ br label %3
+
+3: ; preds = %0
+ store i1 false, ptr addrspace(5) %"14", align 1
%"15" = load i64, ptr addrspace(4) %"34", align 8
store i64 %"15", ptr addrspace(5) %"4", align 8
%"16" = load i64, ptr addrspace(4) %"35", align 8
@@ -21,8 +25,8 @@ define protected amdgpu_kernel void @setp(ptr addrspace(4) byref(i64) %"34", ptr
store i64 %"17", ptr addrspace(5) %"6", align 8
%"20" = load i64, ptr addrspace(5) %"4", align 8
%"37" = inttoptr i64 %"20" to ptr
- %"41" = getelementptr inbounds i8, ptr %"37", i64 8
- %"19" = load i64, ptr %"41", align 8
+ %"40" = getelementptr inbounds i8, ptr %"37", i64 8
+ %"19" = load i64, ptr %"40", align 8
store i64 %"19", ptr addrspace(5) %"7", align 8
%"22" = load i64, ptr addrspace(5) %"6", align 8
%"23" = load i64, ptr addrspace(5) %"7", align 8
@@ -31,21 +35,19 @@ define protected amdgpu_kernel void @setp(ptr addrspace(4) byref(i64) %"34", ptr
%"24" = load i1, ptr addrspace(5) %"9", align 1
br i1 %"24", label %"10", label %"11"
-"10": ; preds = %"39"
- %0 = alloca i64, align 8, addrspace(5)
- store i64 1, ptr addrspace(5) %0, align 8
- %"25" = load i64, ptr addrspace(5) %0, align 8
+"10": ; preds = %3
+ store i64 1, ptr addrspace(5) %1, align 8
+ %"25" = load i64, ptr addrspace(5) %1, align 8
store i64 %"25", ptr addrspace(5) %"8", align 8
br label %"11"
-"11": ; preds = %"10", %"39"
+"11": ; preds = %"10", %3
%"26" = load i1, ptr addrspace(5) %"9", align 1
br i1 %"26", label %"13", label %"12"
"12": ; preds = %"11"
- %1 = alloca i64, align 8, addrspace(5)
- store i64 2, ptr addrspace(5) %1, align 8
- %"27" = load i64, ptr addrspace(5) %1, align 8
+ store i64 2, ptr addrspace(5) %2, align 8
+ %"27" = load i64, ptr addrspace(5) %2, align 8
store i64 %"27", ptr addrspace(5) %"8", align 8
br label %"13"
diff --git a/ptx/src/test/spirv_run/setp_bool.ll b/ptx/src/test/spirv_run/setp_bool.ll
index ac1b2bb..f0b659f 100644
--- a/ptx/src/test/spirv_run/setp_bool.ll
+++ b/ptx/src/test/spirv_run/setp_bool.ll
@@ -2,9 +2,7 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @setp_bool(ptr addrspace(4) byref(i64) %"44", ptr addrspace(4) byref(i64) %"45") #0 {
-"50":
%"16" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"16", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca float, align 4, addrspace(5)
@@ -13,6 +11,13 @@ define protected amdgpu_kernel void @setp_bool(ptr addrspace(4) byref(i64) %"44"
%"9" = alloca i1, align 1, addrspace(5)
%"10" = alloca i1, align 1, addrspace(5)
%"11" = alloca i1, align 1, addrspace(5)
+ %1 = alloca i1, align 1, addrspace(5)
+ %2 = alloca float, align 4, addrspace(5)
+ %3 = alloca float, align 4, addrspace(5)
+ br label %4
+
+4: ; preds = %0
+ store i1 false, ptr addrspace(5) %"16", align 1
%"17" = load i64, ptr addrspace(4) %"44", align 8
store i64 %"17", ptr addrspace(5) %"4", align 8
%"18" = load i64, ptr addrspace(4) %"45", align 8
@@ -23,47 +28,44 @@ define protected amdgpu_kernel void @setp_bool(ptr addrspace(4) byref(i64) %"44"
store float %"19", ptr addrspace(5) %"6", align 4
%"22" = load i64, ptr addrspace(5) %"4", align 8
%"47" = inttoptr i64 %"22" to ptr
- %"52" = getelementptr inbounds i8, ptr %"47", i64 4
- %"21" = load float, ptr %"52", align 4
+ %"51" = getelementptr inbounds i8, ptr %"47", i64 4
+ %"21" = load float, ptr %"51", align 4
store float %"21", ptr addrspace(5) %"7", align 4
%"24" = load i64, ptr addrspace(5) %"4", align 8
%"48" = inttoptr i64 %"24" to ptr
- %"54" = getelementptr inbounds i8, ptr %"48", i64 8
- %"23" = load float, ptr %"54", align 4
+ %"53" = getelementptr inbounds i8, ptr %"48", i64 8
+ %"23" = load float, ptr %"53", align 4
store float %"23", ptr addrspace(5) %"8", align 4
- %0 = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %0, align 1
- %"25" = load i1, ptr addrspace(5) %0, align 1
+ store i1 false, ptr addrspace(5) %1, align 1
+ %"25" = load i1, ptr addrspace(5) %1, align 1
store i1 %"25", ptr addrspace(5) %"9", align 1
%"28" = load float, ptr addrspace(5) %"6", align 4
%"29" = load float, ptr addrspace(5) %"7", align 4
%"30" = load i1, ptr addrspace(5) %"9", align 1
- %1 = fcmp ogt float %"28", %"29"
- %2 = xor i1 %1, true
- %"26" = and i1 %1, %"30"
- %"27" = and i1 %2, %"30"
+ %5 = fcmp ogt float %"28", %"29"
+ %6 = xor i1 %5, true
+ %"26" = and i1 %5, %"30"
+ %"27" = and i1 %6, %"30"
store i1 %"26", ptr addrspace(5) %"10", align 1
store i1 %"27", ptr addrspace(5) %"11", align 1
%"31" = load i1, ptr addrspace(5) %"10", align 1
br i1 %"31", label %"12", label %"13"
-"12": ; preds = %"50"
+"12": ; preds = %4
%"33" = load float, ptr addrspace(5) %"6", align 4
- %3 = alloca float, align 4, addrspace(5)
- store float %"33", ptr addrspace(5) %3, align 4
- %"32" = load float, ptr addrspace(5) %3, align 4
+ store float %"33", ptr addrspace(5) %2, align 4
+ %"32" = load float, ptr addrspace(5) %2, align 4
store float %"32", ptr addrspace(5) %"8", align 4
br label %"13"
-"13": ; preds = %"12", %"50"
+"13": ; preds = %"12", %4
%"34" = load i1, ptr addrspace(5) %"11", align 1
br i1 %"34", label %"14", label %"15"
"14": ; preds = %"13"
%"36" = load float, ptr addrspace(5) %"7", align 4
- %4 = alloca float, align 4, addrspace(5)
- store float %"36", ptr addrspace(5) %4, align 4
- %"35" = load float, ptr addrspace(5) %4, align 4
+ store float %"36", ptr addrspace(5) %3, align 4
+ %"35" = load float, ptr addrspace(5) %3, align 4
store float %"35", ptr addrspace(5) %"8", align 4
br label %"15"
diff --git a/ptx/src/test/spirv_run/setp_gt.ll b/ptx/src/test/spirv_run/setp_gt.ll
index 3a8b965..dbaf20a 100644
--- a/ptx/src/test/spirv_run/setp_gt.ll
+++ b/ptx/src/test/spirv_run/setp_gt.ll
@@ -2,15 +2,19 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @setp_gt(ptr addrspace(4) byref(i64) %"34", ptr addrspace(4) byref(i64) %"35") #0 {
-"39":
%"14" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"14", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca float, align 4, addrspace(5)
%"7" = alloca float, align 4, addrspace(5)
%"8" = alloca float, align 4, addrspace(5)
%"9" = alloca i1, align 1, addrspace(5)
+ %1 = alloca float, align 4, addrspace(5)
+ %2 = alloca float, align 4, addrspace(5)
+ br label %3
+
+3: ; preds = %0
+ store i1 false, ptr addrspace(5) %"14", align 1
%"15" = load i64, ptr addrspace(4) %"34", align 8
store i64 %"15", ptr addrspace(5) %"4", align 8
%"16" = load i64, ptr addrspace(4) %"35", align 8
@@ -21,8 +25,8 @@ define protected amdgpu_kernel void @setp_gt(ptr addrspace(4) byref(i64) %"34",
store float %"17", ptr addrspace(5) %"6", align 4
%"20" = load i64, ptr addrspace(5) %"4", align 8
%"37" = inttoptr i64 %"20" to ptr
- %"41" = getelementptr inbounds i8, ptr %"37", i64 4
- %"19" = load float, ptr %"41", align 4
+ %"40" = getelementptr inbounds i8, ptr %"37", i64 4
+ %"19" = load float, ptr %"40", align 4
store float %"19", ptr addrspace(5) %"7", align 4
%"22" = load float, ptr addrspace(5) %"6", align 4
%"23" = load float, ptr addrspace(5) %"7", align 4
@@ -31,23 +35,21 @@ define protected amdgpu_kernel void @setp_gt(ptr addrspace(4) byref(i64) %"34",
%"24" = load i1, ptr addrspace(5) %"9", align 1
br i1 %"24", label %"10", label %"11"
-"10": ; preds = %"39"
+"10": ; preds = %3
%"26" = load float, ptr addrspace(5) %"6", align 4
- %0 = alloca float, align 4, addrspace(5)
- store float %"26", ptr addrspace(5) %0, align 4
- %"25" = load float, ptr addrspace(5) %0, align 4
+ store float %"26", ptr addrspace(5) %1, align 4
+ %"25" = load float, ptr addrspace(5) %1, align 4
store float %"25", ptr addrspace(5) %"8", align 4
br label %"11"
-"11": ; preds = %"10", %"39"
+"11": ; preds = %"10", %3
%"27" = load i1, ptr addrspace(5) %"9", align 1
br i1 %"27", label %"13", label %"12"
"12": ; preds = %"11"
%"29" = load float, ptr addrspace(5) %"7", align 4
- %1 = alloca float, align 4, addrspace(5)
- store float %"29", ptr addrspace(5) %1, align 4
- %"28" = load float, ptr addrspace(5) %1, align 4
+ store float %"29", ptr addrspace(5) %2, align 4
+ %"28" = load float, ptr addrspace(5) %2, align 4
store float %"28", ptr addrspace(5) %"8", align 4
br label %"13"
diff --git a/ptx/src/test/spirv_run/setp_leu.ll b/ptx/src/test/spirv_run/setp_leu.ll
index 9699fde..d27b96a 100644
--- a/ptx/src/test/spirv_run/setp_leu.ll
+++ b/ptx/src/test/spirv_run/setp_leu.ll
@@ -2,15 +2,19 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @setp_leu(ptr addrspace(4) byref(i64) %"34", ptr addrspace(4) byref(i64) %"35") #0 {
-"39":
%"14" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"14", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca float, align 4, addrspace(5)
%"7" = alloca float, align 4, addrspace(5)
%"8" = alloca float, align 4, addrspace(5)
%"9" = alloca i1, align 1, addrspace(5)
+ %1 = alloca float, align 4, addrspace(5)
+ %2 = alloca float, align 4, addrspace(5)
+ br label %3
+
+3: ; preds = %0
+ store i1 false, ptr addrspace(5) %"14", align 1
%"15" = load i64, ptr addrspace(4) %"34", align 8
store i64 %"15", ptr addrspace(5) %"4", align 8
%"16" = load i64, ptr addrspace(4) %"35", align 8
@@ -21,8 +25,8 @@ define protected amdgpu_kernel void @setp_leu(ptr addrspace(4) byref(i64) %"34",
store float %"17", ptr addrspace(5) %"6", align 4
%"20" = load i64, ptr addrspace(5) %"4", align 8
%"37" = inttoptr i64 %"20" to ptr
- %"41" = getelementptr inbounds i8, ptr %"37", i64 4
- %"19" = load float, ptr %"41", align 4
+ %"40" = getelementptr inbounds i8, ptr %"37", i64 4
+ %"19" = load float, ptr %"40", align 4
store float %"19", ptr addrspace(5) %"7", align 4
%"22" = load float, ptr addrspace(5) %"6", align 4
%"23" = load float, ptr addrspace(5) %"7", align 4
@@ -31,23 +35,21 @@ define protected amdgpu_kernel void @setp_leu(ptr addrspace(4) byref(i64) %"34",
%"24" = load i1, ptr addrspace(5) %"9", align 1
br i1 %"24", label %"10", label %"11"
-"10": ; preds = %"39"
+"10": ; preds = %3
%"26" = load float, ptr addrspace(5) %"6", align 4
- %0 = alloca float, align 4, addrspace(5)
- store float %"26", ptr addrspace(5) %0, align 4
- %"25" = load float, ptr addrspace(5) %0, align 4
+ store float %"26", ptr addrspace(5) %1, align 4
+ %"25" = load float, ptr addrspace(5) %1, align 4
store float %"25", ptr addrspace(5) %"8", align 4
br label %"11"
-"11": ; preds = %"10", %"39"
+"11": ; preds = %"10", %3
%"27" = load i1, ptr addrspace(5) %"9", align 1
br i1 %"27", label %"13", label %"12"
"12": ; preds = %"11"
%"29" = load float, ptr addrspace(5) %"7", align 4
- %1 = alloca float, align 4, addrspace(5)
- store float %"29", ptr addrspace(5) %1, align 4
- %"28" = load float, ptr addrspace(5) %1, align 4
+ store float %"29", ptr addrspace(5) %2, align 4
+ %"28" = load float, ptr addrspace(5) %2, align 4
store float %"28", ptr addrspace(5) %"8", align 4
br label %"13"
diff --git a/ptx/src/test/spirv_run/setp_nan.ll b/ptx/src/test/spirv_run/setp_nan.ll
index 1368386..709ed89 100644
--- a/ptx/src/test/spirv_run/setp_nan.ll
+++ b/ptx/src/test/spirv_run/setp_nan.ll
@@ -2,9 +2,7 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @setp_nan(ptr addrspace(4) byref(i64) %"115", ptr addrspace(4) byref(i64) %"116") #0 {
-"129":
%"32" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"32", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca float, align 4, addrspace(5)
@@ -17,6 +15,18 @@ define protected amdgpu_kernel void @setp_nan(ptr addrspace(4) byref(i64) %"115"
%"13" = alloca float, align 4, addrspace(5)
%"14" = alloca i32, align 4, addrspace(5)
%"15" = alloca i1, align 1, addrspace(5)
+ %1 = alloca i32, align 4, addrspace(5)
+ %2 = alloca i32, align 4, addrspace(5)
+ %3 = alloca i32, align 4, addrspace(5)
+ %4 = alloca i32, align 4, addrspace(5)
+ %5 = alloca i32, align 4, addrspace(5)
+ %6 = alloca i32, align 4, addrspace(5)
+ %7 = alloca i32, align 4, addrspace(5)
+ %8 = alloca i32, align 4, addrspace(5)
+ br label %9
+
+9: ; preds = %0
+ store i1 false, ptr addrspace(5) %"32", align 1
%"33" = load i64, ptr addrspace(4) %"115", align 8
store i64 %"33", ptr addrspace(5) %"4", align 8
%"34" = load i64, ptr addrspace(4) %"116", align 8
@@ -27,38 +37,38 @@ define protected amdgpu_kernel void @setp_nan(ptr addrspace(4) byref(i64) %"115"
store float %"35", ptr addrspace(5) %"6", align 4
%"38" = load i64, ptr addrspace(5) %"4", align 8
%"118" = inttoptr i64 %"38" to ptr
- %"131" = getelementptr inbounds i8, ptr %"118", i64 4
- %"37" = load float, ptr %"131", align 4
+ %"130" = getelementptr inbounds i8, ptr %"118", i64 4
+ %"37" = load float, ptr %"130", align 4
store float %"37", ptr addrspace(5) %"7", align 4
%"40" = load i64, ptr addrspace(5) %"4", align 8
%"119" = inttoptr i64 %"40" to ptr
- %"133" = getelementptr inbounds i8, ptr %"119", i64 8
- %"39" = load float, ptr %"133", align 4
+ %"132" = getelementptr inbounds i8, ptr %"119", i64 8
+ %"39" = load float, ptr %"132", align 4
store float %"39", ptr addrspace(5) %"8", align 4
%"42" = load i64, ptr addrspace(5) %"4", align 8
%"120" = inttoptr i64 %"42" to ptr
- %"135" = getelementptr inbounds i8, ptr %"120", i64 12
- %"41" = load float, ptr %"135", align 4
+ %"134" = getelementptr inbounds i8, ptr %"120", i64 12
+ %"41" = load float, ptr %"134", align 4
store float %"41", ptr addrspace(5) %"9", align 4
%"44" = load i64, ptr addrspace(5) %"4", align 8
%"121" = inttoptr i64 %"44" to ptr
- %"137" = getelementptr inbounds i8, ptr %"121", i64 16
- %"43" = load float, ptr %"137", align 4
+ %"136" = getelementptr inbounds i8, ptr %"121", i64 16
+ %"43" = load float, ptr %"136", align 4
store float %"43", ptr addrspace(5) %"10", align 4
%"46" = load i64, ptr addrspace(5) %"4", align 8
%"122" = inttoptr i64 %"46" to ptr
- %"139" = getelementptr inbounds i8, ptr %"122", i64 20
- %"45" = load float, ptr %"139", align 4
+ %"138" = getelementptr inbounds i8, ptr %"122", i64 20
+ %"45" = load float, ptr %"138", align 4
store float %"45", ptr addrspace(5) %"11", align 4
%"48" = load i64, ptr addrspace(5) %"4", align 8
%"123" = inttoptr i64 %"48" to ptr
- %"141" = getelementptr inbounds i8, ptr %"123", i64 24
- %"47" = load float, ptr %"141", align 4
+ %"140" = getelementptr inbounds i8, ptr %"123", i64 24
+ %"47" = load float, ptr %"140", align 4
store float %"47", ptr addrspace(5) %"12", align 4
%"50" = load i64, ptr addrspace(5) %"4", align 8
%"124" = inttoptr i64 %"50" to ptr
- %"143" = getelementptr inbounds i8, ptr %"124", i64 28
- %"49" = load float, ptr %"143", align 4
+ %"142" = getelementptr inbounds i8, ptr %"124", i64 28
+ %"49" = load float, ptr %"142", align 4
store float %"49", ptr addrspace(5) %"13", align 4
%"52" = load float, ptr addrspace(5) %"6", align 4
%"53" = load float, ptr addrspace(5) %"7", align 4
@@ -67,21 +77,19 @@ define protected amdgpu_kernel void @setp_nan(ptr addrspace(4) byref(i64) %"115"
%"54" = load i1, ptr addrspace(5) %"15", align 1
br i1 %"54", label %"16", label %"17"
-"16": ; preds = %"129"
- %0 = alloca i32, align 4, addrspace(5)
- store i32 1, ptr addrspace(5) %0, align 4
- %"55" = load i32, ptr addrspace(5) %0, align 4
+"16": ; preds = %9
+ store i32 1, ptr addrspace(5) %1, align 4
+ %"55" = load i32, ptr addrspace(5) %1, align 4
store i32 %"55", ptr addrspace(5) %"14", align 4
br label %"17"
-"17": ; preds = %"16", %"129"
+"17": ; preds = %"16", %9
%"56" = load i1, ptr addrspace(5) %"15", align 1
br i1 %"56", label %"19", label %"18"
"18": ; preds = %"17"
- %1 = alloca i32, align 4, addrspace(5)
- store i32 0, ptr addrspace(5) %1, align 4
- %"57" = load i32, ptr addrspace(5) %1, align 4
+ store i32 0, ptr addrspace(5) %2, align 4
+ %"57" = load i32, ptr addrspace(5) %2, align 4
store i32 %"57", ptr addrspace(5) %"14", align 4
br label %"19"
@@ -98,9 +106,8 @@ define protected amdgpu_kernel void @setp_nan(ptr addrspace(4) byref(i64) %"115"
br i1 %"63", label %"20", label %"21"
"20": ; preds = %"19"
- %2 = alloca i32, align 4, addrspace(5)
- store i32 1, ptr addrspace(5) %2, align 4
- %"64" = load i32, ptr addrspace(5) %2, align 4
+ store i32 1, ptr addrspace(5) %3, align 4
+ %"64" = load i32, ptr addrspace(5) %3, align 4
store i32 %"64", ptr addrspace(5) %"14", align 4
br label %"21"
@@ -109,9 +116,8 @@ define protected amdgpu_kernel void @setp_nan(ptr addrspace(4) byref(i64) %"115"
br i1 %"65", label %"23", label %"22"
"22": ; preds = %"21"
- %3 = alloca i32, align 4, addrspace(5)
- store i32 0, ptr addrspace(5) %3, align 4
- %"66" = load i32, ptr addrspace(5) %3, align 4
+ store i32 0, ptr addrspace(5) %4, align 4
+ %"66" = load i32, ptr addrspace(5) %4, align 4
store i32 %"66", ptr addrspace(5) %"14", align 4
br label %"23"
@@ -119,8 +125,8 @@ define protected amdgpu_kernel void @setp_nan(ptr addrspace(4) byref(i64) %"115"
%"67" = load i64, ptr addrspace(5) %"5", align 8
%"68" = load i32, ptr addrspace(5) %"14", align 4
%"126" = inttoptr i64 %"67" to ptr
- %"145" = getelementptr inbounds i8, ptr %"126", i64 4
- store i32 %"68", ptr %"145", align 4
+ %"144" = getelementptr inbounds i8, ptr %"126", i64 4
+ store i32 %"68", ptr %"144", align 4
%"70" = load float, ptr addrspace(5) %"10", align 4
%"71" = load float, ptr addrspace(5) %"11", align 4
%"69" = fcmp uno float %"70", %"71"
@@ -129,9 +135,8 @@ define protected amdgpu_kernel void @setp_nan(ptr addrspace(4) byref(i64) %"115"
br i1 %"72", label %"24", label %"25"
"24": ; preds = %"23"
- %4 = alloca i32, align 4, addrspace(5)
- store i32 1, ptr addrspace(5) %4, align 4
- %"73" = load i32, ptr addrspace(5) %4, align 4
+ store i32 1, ptr addrspace(5) %5, align 4
+ %"73" = load i32, ptr addrspace(5) %5, align 4
store i32 %"73", ptr addrspace(5) %"14", align 4
br label %"25"
@@ -140,9 +145,8 @@ define protected amdgpu_kernel void @setp_nan(ptr addrspace(4) byref(i64) %"115"
br i1 %"74", label %"27", label %"26"
"26": ; preds = %"25"
- %5 = alloca i32, align 4, addrspace(5)
- store i32 0, ptr addrspace(5) %5, align 4
- %"75" = load i32, ptr addrspace(5) %5, align 4
+ store i32 0, ptr addrspace(5) %6, align 4
+ %"75" = load i32, ptr addrspace(5) %6, align 4
store i32 %"75", ptr addrspace(5) %"14", align 4
br label %"27"
@@ -150,8 +154,8 @@ define protected amdgpu_kernel void @setp_nan(ptr addrspace(4) byref(i64) %"115"
%"76" = load i64, ptr addrspace(5) %"5", align 8
%"77" = load i32, ptr addrspace(5) %"14", align 4
%"127" = inttoptr i64 %"76" to ptr
- %"147" = getelementptr inbounds i8, ptr %"127", i64 8
- store i32 %"77", ptr %"147", align 4
+ %"146" = getelementptr inbounds i8, ptr %"127", i64 8
+ store i32 %"77", ptr %"146", align 4
%"79" = load float, ptr addrspace(5) %"12", align 4
%"80" = load float, ptr addrspace(5) %"13", align 4
%"78" = fcmp uno float %"79", %"80"
@@ -160,9 +164,8 @@ define protected amdgpu_kernel void @setp_nan(ptr addrspace(4) byref(i64) %"115"
br i1 %"81", label %"28", label %"29"
"28": ; preds = %"27"
- %6 = alloca i32, align 4, addrspace(5)
- store i32 1, ptr addrspace(5) %6, align 4
- %"82" = load i32, ptr addrspace(5) %6, align 4
+ store i32 1, ptr addrspace(5) %7, align 4
+ %"82" = load i32, ptr addrspace(5) %7, align 4
store i32 %"82", ptr addrspace(5) %"14", align 4
br label %"29"
@@ -171,9 +174,8 @@ define protected amdgpu_kernel void @setp_nan(ptr addrspace(4) byref(i64) %"115"
br i1 %"83", label %"31", label %"30"
"30": ; preds = %"29"
- %7 = alloca i32, align 4, addrspace(5)
- store i32 0, ptr addrspace(5) %7, align 4
- %"84" = load i32, ptr addrspace(5) %7, align 4
+ store i32 0, ptr addrspace(5) %8, align 4
+ %"84" = load i32, ptr addrspace(5) %8, align 4
store i32 %"84", ptr addrspace(5) %"14", align 4
br label %"31"
@@ -181,8 +183,8 @@ define protected amdgpu_kernel void @setp_nan(ptr addrspace(4) byref(i64) %"115"
%"85" = load i64, ptr addrspace(5) %"5", align 8
%"86" = load i32, ptr addrspace(5) %"14", align 4
%"128" = inttoptr i64 %"85" to ptr
- %"149" = getelementptr inbounds i8, ptr %"128", i64 12
- store i32 %"86", ptr %"149", align 4
+ %"148" = getelementptr inbounds i8, ptr %"128", i64 12
+ store i32 %"86", ptr %"148", align 4
ret void
}
diff --git a/ptx/src/test/spirv_run/setp_num.ll b/ptx/src/test/spirv_run/setp_num.ll
index a6254a2..bebecc4 100644
--- a/ptx/src/test/spirv_run/setp_num.ll
+++ b/ptx/src/test/spirv_run/setp_num.ll
@@ -2,9 +2,7 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @setp_num(ptr addrspace(4) byref(i64) %"115", ptr addrspace(4) byref(i64) %"116") #0 {
-"129":
%"32" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"32", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca float, align 4, addrspace(5)
@@ -17,6 +15,18 @@ define protected amdgpu_kernel void @setp_num(ptr addrspace(4) byref(i64) %"115"
%"13" = alloca float, align 4, addrspace(5)
%"14" = alloca i32, align 4, addrspace(5)
%"15" = alloca i1, align 1, addrspace(5)
+ %1 = alloca i32, align 4, addrspace(5)
+ %2 = alloca i32, align 4, addrspace(5)
+ %3 = alloca i32, align 4, addrspace(5)
+ %4 = alloca i32, align 4, addrspace(5)
+ %5 = alloca i32, align 4, addrspace(5)
+ %6 = alloca i32, align 4, addrspace(5)
+ %7 = alloca i32, align 4, addrspace(5)
+ %8 = alloca i32, align 4, addrspace(5)
+ br label %9
+
+9: ; preds = %0
+ store i1 false, ptr addrspace(5) %"32", align 1
%"33" = load i64, ptr addrspace(4) %"115", align 8
store i64 %"33", ptr addrspace(5) %"4", align 8
%"34" = load i64, ptr addrspace(4) %"116", align 8
@@ -27,38 +37,38 @@ define protected amdgpu_kernel void @setp_num(ptr addrspace(4) byref(i64) %"115"
store float %"35", ptr addrspace(5) %"6", align 4
%"38" = load i64, ptr addrspace(5) %"4", align 8
%"118" = inttoptr i64 %"38" to ptr
- %"131" = getelementptr inbounds i8, ptr %"118", i64 4
- %"37" = load float, ptr %"131", align 4
+ %"130" = getelementptr inbounds i8, ptr %"118", i64 4
+ %"37" = load float, ptr %"130", align 4
store float %"37", ptr addrspace(5) %"7", align 4
%"40" = load i64, ptr addrspace(5) %"4", align 8
%"119" = inttoptr i64 %"40" to ptr
- %"133" = getelementptr inbounds i8, ptr %"119", i64 8
- %"39" = load float, ptr %"133", align 4
+ %"132" = getelementptr inbounds i8, ptr %"119", i64 8
+ %"39" = load float, ptr %"132", align 4
store float %"39", ptr addrspace(5) %"8", align 4
%"42" = load i64, ptr addrspace(5) %"4", align 8
%"120" = inttoptr i64 %"42" to ptr
- %"135" = getelementptr inbounds i8, ptr %"120", i64 12
- %"41" = load float, ptr %"135", align 4
+ %"134" = getelementptr inbounds i8, ptr %"120", i64 12
+ %"41" = load float, ptr %"134", align 4
store float %"41", ptr addrspace(5) %"9", align 4
%"44" = load i64, ptr addrspace(5) %"4", align 8
%"121" = inttoptr i64 %"44" to ptr
- %"137" = getelementptr inbounds i8, ptr %"121", i64 16
- %"43" = load float, ptr %"137", align 4
+ %"136" = getelementptr inbounds i8, ptr %"121", i64 16
+ %"43" = load float, ptr %"136", align 4
store float %"43", ptr addrspace(5) %"10", align 4
%"46" = load i64, ptr addrspace(5) %"4", align 8
%"122" = inttoptr i64 %"46" to ptr
- %"139" = getelementptr inbounds i8, ptr %"122", i64 20
- %"45" = load float, ptr %"139", align 4
+ %"138" = getelementptr inbounds i8, ptr %"122", i64 20
+ %"45" = load float, ptr %"138", align 4
store float %"45", ptr addrspace(5) %"11", align 4
%"48" = load i64, ptr addrspace(5) %"4", align 8
%"123" = inttoptr i64 %"48" to ptr
- %"141" = getelementptr inbounds i8, ptr %"123", i64 24
- %"47" = load float, ptr %"141", align 4
+ %"140" = getelementptr inbounds i8, ptr %"123", i64 24
+ %"47" = load float, ptr %"140", align 4
store float %"47", ptr addrspace(5) %"12", align 4
%"50" = load i64, ptr addrspace(5) %"4", align 8
%"124" = inttoptr i64 %"50" to ptr
- %"143" = getelementptr inbounds i8, ptr %"124", i64 28
- %"49" = load float, ptr %"143", align 4
+ %"142" = getelementptr inbounds i8, ptr %"124", i64 28
+ %"49" = load float, ptr %"142", align 4
store float %"49", ptr addrspace(5) %"13", align 4
%"52" = load float, ptr addrspace(5) %"6", align 4
%"53" = load float, ptr addrspace(5) %"7", align 4
@@ -67,21 +77,19 @@ define protected amdgpu_kernel void @setp_num(ptr addrspace(4) byref(i64) %"115"
%"54" = load i1, ptr addrspace(5) %"15", align 1
br i1 %"54", label %"16", label %"17"
-"16": ; preds = %"129"
- %0 = alloca i32, align 4, addrspace(5)
- store i32 2, ptr addrspace(5) %0, align 4
- %"55" = load i32, ptr addrspace(5) %0, align 4
+"16": ; preds = %9
+ store i32 2, ptr addrspace(5) %1, align 4
+ %"55" = load i32, ptr addrspace(5) %1, align 4
store i32 %"55", ptr addrspace(5) %"14", align 4
br label %"17"
-"17": ; preds = %"16", %"129"
+"17": ; preds = %"16", %9
%"56" = load i1, ptr addrspace(5) %"15", align 1
br i1 %"56", label %"19", label %"18"
"18": ; preds = %"17"
- %1 = alloca i32, align 4, addrspace(5)
- store i32 0, ptr addrspace(5) %1, align 4
- %"57" = load i32, ptr addrspace(5) %1, align 4
+ store i32 0, ptr addrspace(5) %2, align 4
+ %"57" = load i32, ptr addrspace(5) %2, align 4
store i32 %"57", ptr addrspace(5) %"14", align 4
br label %"19"
@@ -98,9 +106,8 @@ define protected amdgpu_kernel void @setp_num(ptr addrspace(4) byref(i64) %"115"
br i1 %"63", label %"20", label %"21"
"20": ; preds = %"19"
- %2 = alloca i32, align 4, addrspace(5)
- store i32 2, ptr addrspace(5) %2, align 4
- %"64" = load i32, ptr addrspace(5) %2, align 4
+ store i32 2, ptr addrspace(5) %3, align 4
+ %"64" = load i32, ptr addrspace(5) %3, align 4
store i32 %"64", ptr addrspace(5) %"14", align 4
br label %"21"
@@ -109,9 +116,8 @@ define protected amdgpu_kernel void @setp_num(ptr addrspace(4) byref(i64) %"115"
br i1 %"65", label %"23", label %"22"
"22": ; preds = %"21"
- %3 = alloca i32, align 4, addrspace(5)
- store i32 0, ptr addrspace(5) %3, align 4
- %"66" = load i32, ptr addrspace(5) %3, align 4
+ store i32 0, ptr addrspace(5) %4, align 4
+ %"66" = load i32, ptr addrspace(5) %4, align 4
store i32 %"66", ptr addrspace(5) %"14", align 4
br label %"23"
@@ -119,8 +125,8 @@ define protected amdgpu_kernel void @setp_num(ptr addrspace(4) byref(i64) %"115"
%"67" = load i64, ptr addrspace(5) %"5", align 8
%"68" = load i32, ptr addrspace(5) %"14", align 4
%"126" = inttoptr i64 %"67" to ptr
- %"145" = getelementptr inbounds i8, ptr %"126", i64 4
- store i32 %"68", ptr %"145", align 4
+ %"144" = getelementptr inbounds i8, ptr %"126", i64 4
+ store i32 %"68", ptr %"144", align 4
%"70" = load float, ptr addrspace(5) %"10", align 4
%"71" = load float, ptr addrspace(5) %"11", align 4
%"69" = fcmp ord float %"70", %"71"
@@ -129,9 +135,8 @@ define protected amdgpu_kernel void @setp_num(ptr addrspace(4) byref(i64) %"115"
br i1 %"72", label %"24", label %"25"
"24": ; preds = %"23"
- %4 = alloca i32, align 4, addrspace(5)
- store i32 2, ptr addrspace(5) %4, align 4
- %"73" = load i32, ptr addrspace(5) %4, align 4
+ store i32 2, ptr addrspace(5) %5, align 4
+ %"73" = load i32, ptr addrspace(5) %5, align 4
store i32 %"73", ptr addrspace(5) %"14", align 4
br label %"25"
@@ -140,9 +145,8 @@ define protected amdgpu_kernel void @setp_num(ptr addrspace(4) byref(i64) %"115"
br i1 %"74", label %"27", label %"26"
"26": ; preds = %"25"
- %5 = alloca i32, align 4, addrspace(5)
- store i32 0, ptr addrspace(5) %5, align 4
- %"75" = load i32, ptr addrspace(5) %5, align 4
+ store i32 0, ptr addrspace(5) %6, align 4
+ %"75" = load i32, ptr addrspace(5) %6, align 4
store i32 %"75", ptr addrspace(5) %"14", align 4
br label %"27"
@@ -150,8 +154,8 @@ define protected amdgpu_kernel void @setp_num(ptr addrspace(4) byref(i64) %"115"
%"76" = load i64, ptr addrspace(5) %"5", align 8
%"77" = load i32, ptr addrspace(5) %"14", align 4
%"127" = inttoptr i64 %"76" to ptr
- %"147" = getelementptr inbounds i8, ptr %"127", i64 8
- store i32 %"77", ptr %"147", align 4
+ %"146" = getelementptr inbounds i8, ptr %"127", i64 8
+ store i32 %"77", ptr %"146", align 4
%"79" = load float, ptr addrspace(5) %"12", align 4
%"80" = load float, ptr addrspace(5) %"13", align 4
%"78" = fcmp ord float %"79", %"80"
@@ -160,9 +164,8 @@ define protected amdgpu_kernel void @setp_num(ptr addrspace(4) byref(i64) %"115"
br i1 %"81", label %"28", label %"29"
"28": ; preds = %"27"
- %6 = alloca i32, align 4, addrspace(5)
- store i32 2, ptr addrspace(5) %6, align 4
- %"82" = load i32, ptr addrspace(5) %6, align 4
+ store i32 2, ptr addrspace(5) %7, align 4
+ %"82" = load i32, ptr addrspace(5) %7, align 4
store i32 %"82", ptr addrspace(5) %"14", align 4
br label %"29"
@@ -171,9 +174,8 @@ define protected amdgpu_kernel void @setp_num(ptr addrspace(4) byref(i64) %"115"
br i1 %"83", label %"31", label %"30"
"30": ; preds = %"29"
- %7 = alloca i32, align 4, addrspace(5)
- store i32 0, ptr addrspace(5) %7, align 4
- %"84" = load i32, ptr addrspace(5) %7, align 4
+ store i32 0, ptr addrspace(5) %8, align 4
+ %"84" = load i32, ptr addrspace(5) %8, align 4
store i32 %"84", ptr addrspace(5) %"14", align 4
br label %"31"
@@ -181,8 +183,8 @@ define protected amdgpu_kernel void @setp_num(ptr addrspace(4) byref(i64) %"115"
%"85" = load i64, ptr addrspace(5) %"5", align 8
%"86" = load i32, ptr addrspace(5) %"14", align 4
%"128" = inttoptr i64 %"85" to ptr
- %"149" = getelementptr inbounds i8, ptr %"128", i64 12
- store i32 %"86", ptr %"149", align 4
+ %"148" = getelementptr inbounds i8, ptr %"128", i64 12
+ store i32 %"86", ptr %"148", align 4
ret void
}
diff --git a/ptx/src/test/spirv_run/setp_pred2.ll b/ptx/src/test/spirv_run/setp_pred2.ll
index 8220fc0..01ae23e 100644
--- a/ptx/src/test/spirv_run/setp_pred2.ll
+++ b/ptx/src/test/spirv_run/setp_pred2.ll
@@ -2,9 +2,7 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @setp_pred2(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #0 {
-"41":
%"15" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"15", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca float, align 4, addrspace(5)
@@ -12,6 +10,12 @@ define protected amdgpu_kernel void @setp_pred2(ptr addrspace(4) byref(i64) %"36
%"8" = alloca float, align 4, addrspace(5)
%"9" = alloca i1, align 1, addrspace(5)
%"10" = alloca i1, align 1, addrspace(5)
+ %1 = alloca float, align 4, addrspace(5)
+ %2 = alloca float, align 4, addrspace(5)
+ br label %3
+
+3: ; preds = %0
+ store i1 false, ptr addrspace(5) %"15", align 1
%"16" = load i64, ptr addrspace(4) %"36", align 8
store i64 %"16", ptr addrspace(5) %"4", align 8
%"17" = load i64, ptr addrspace(4) %"37", align 8
@@ -22,8 +26,8 @@ define protected amdgpu_kernel void @setp_pred2(ptr addrspace(4) byref(i64) %"36
store float %"18", ptr addrspace(5) %"6", align 4
%"21" = load i64, ptr addrspace(5) %"4", align 8
%"39" = inttoptr i64 %"21" to ptr
- %"43" = getelementptr inbounds i8, ptr %"39", i64 4
- %"20" = load float, ptr %"43", align 4
+ %"42" = getelementptr inbounds i8, ptr %"39", i64 4
+ %"20" = load float, ptr %"42", align 4
store float %"20", ptr addrspace(5) %"7", align 4
%"24" = load float, ptr addrspace(5) %"6", align 4
%"25" = load float, ptr addrspace(5) %"7", align 4
@@ -34,23 +38,21 @@ define protected amdgpu_kernel void @setp_pred2(ptr addrspace(4) byref(i64) %"36
%"26" = load i1, ptr addrspace(5) %"9", align 1
br i1 %"26", label %"11", label %"12"
-"11": ; preds = %"41"
+"11": ; preds = %3
%"28" = load float, ptr addrspace(5) %"6", align 4
- %0 = alloca float, align 4, addrspace(5)
- store float %"28", ptr addrspace(5) %0, align 4
- %"27" = load float, ptr addrspace(5) %0, align 4
+ store float %"28", ptr addrspace(5) %1, align 4
+ %"27" = load float, ptr addrspace(5) %1, align 4
store float %"27", ptr addrspace(5) %"8", align 4
br label %"12"
-"12": ; preds = %"11", %"41"
+"12": ; preds = %"11", %3
%"29" = load i1, ptr addrspace(5) %"10", align 1
br i1 %"29", label %"13", label %"14"
"13": ; preds = %"12"
%"31" = load float, ptr addrspace(5) %"7", align 4
- %1 = alloca float, align 4, addrspace(5)
- store float %"31", ptr addrspace(5) %1, align 4
- %"30" = load float, ptr addrspace(5) %1, align 4
+ store float %"31", ptr addrspace(5) %2, align 4
+ %"30" = load float, ptr addrspace(5) %2, align 4
store float %"30", ptr addrspace(5) %"8", align 4
br label %"14"
diff --git a/ptx/src/test/spirv_run/shared_ptr_32.ll b/ptx/src/test/spirv_run/shared_ptr_32.ll
index 8705967..f3e0269 100644
--- a/ptx/src/test/spirv_run/shared_ptr_32.ll
+++ b/ptx/src/test/spirv_run/shared_ptr_32.ll
@@ -4,21 +4,23 @@ target triple = "amdgcn-amd-amdhsa"
@"4" = private addrspace(3) global [128 x i8] undef, align 4
define protected amdgpu_kernel void @shared_ptr_32(ptr addrspace(4) byref(i64) %"24", ptr addrspace(4) byref(i64) %"25") #0 {
-"31":
%"10" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"10", align 1
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i64, align 8, addrspace(5)
%"7" = alloca i32, align 4, addrspace(5)
%"8" = alloca i64, align 8, addrspace(5)
%"9" = alloca i64, align 8, addrspace(5)
+ %1 = alloca i32, align 4, addrspace(5)
+ br label %2
+
+2: ; preds = %0
+ store i1 false, ptr addrspace(5) %"10", align 1
%"11" = load i64, ptr addrspace(4) %"24", align 8
store i64 %"11", ptr addrspace(5) %"5", align 8
%"12" = load i64, ptr addrspace(4) %"25", align 8
store i64 %"12", ptr addrspace(5) %"6", align 8
- %0 = alloca i32, align 4, addrspace(5)
- store i32 ptrtoint (ptr addrspace(3) @"4" to i32), ptr addrspace(5) %0, align 4
- %"13" = load i32, ptr addrspace(5) %0, align 4
+ store i32 ptrtoint (ptr addrspace(3) @"4" to i32), ptr addrspace(5) %1, align 4
+ %"13" = load i32, ptr addrspace(5) %1, align 4
store i32 %"13", ptr addrspace(5) %"7", align 4
%"15" = load i64, ptr addrspace(5) %"5", align 8
%"27" = inttoptr i64 %"15" to ptr addrspace(1)
@@ -30,8 +32,8 @@ define protected amdgpu_kernel void @shared_ptr_32(ptr addrspace(4) byref(i64) %
store i64 %"17", ptr addrspace(3) %"28", align 8
%"19" = load i32, ptr addrspace(5) %"7", align 4
%"29" = inttoptr i32 %"19" to ptr addrspace(3)
- %"33" = getelementptr inbounds i8, ptr addrspace(3) %"29", i64 0
- %"18" = load i64, ptr addrspace(3) %"33", align 8
+ %"32" = getelementptr inbounds i8, ptr addrspace(3) %"29", i64 0
+ %"18" = load i64, ptr addrspace(3) %"32", align 8
store i64 %"18", ptr addrspace(5) %"9", align 8
%"20" = load i64, ptr addrspace(5) %"6", align 8
%"21" = load i64, ptr addrspace(5) %"9", align 8
diff --git a/ptx/src/test/spirv_run/shared_ptr_take_address.ll b/ptx/src/test/spirv_run/shared_ptr_take_address.ll
index 6c430a2..fd61d71 100644
--- a/ptx/src/test/spirv_run/shared_ptr_take_address.ll
+++ b/ptx/src/test/spirv_run/shared_ptr_take_address.ll
@@ -4,21 +4,23 @@ target triple = "amdgcn-amd-amdhsa"
@shared_mem = external hidden addrspace(3) global [0 x i8], align 4
define protected amdgpu_kernel void @shared_ptr_take_address(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #0 {
-"29":
%"10" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"10", align 1
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i64, align 8, addrspace(5)
%"7" = alloca i64, align 8, addrspace(5)
%"8" = alloca i64, align 8, addrspace(5)
%"9" = alloca i64, align 8, addrspace(5)
+ %1 = alloca i64, align 8, addrspace(5)
+ br label %2
+
+2: ; preds = %0
+ store i1 false, ptr addrspace(5) %"10", align 1
%"11" = load i64, ptr addrspace(4) %"22", align 8
store i64 %"11", ptr addrspace(5) %"5", align 8
%"12" = load i64, ptr addrspace(4) %"23", align 8
store i64 %"12", ptr addrspace(5) %"6", align 8
- %0 = alloca i64, align 8, addrspace(5)
- store i64 ptrtoint (ptr addrspace(3) @shared_mem to i64), ptr addrspace(5) %0, align 8
- %"13" = load i64, ptr addrspace(5) %0, align 8
+ store i64 ptrtoint (ptr addrspace(3) @shared_mem to i64), ptr addrspace(5) %1, align 8
+ %"13" = load i64, ptr addrspace(5) %1, align 8
store i64 %"13", ptr addrspace(5) %"7", align 8
%"15" = load i64, ptr addrspace(5) %"5", align 8
%"25" = inttoptr i64 %"15" to ptr addrspace(1)
diff --git a/ptx/src/test/spirv_run/shared_unify_decl.ll b/ptx/src/test/spirv_run/shared_unify_decl.ll
index 4cc24fb..61d62d7 100644
--- a/ptx/src/test/spirv_run/shared_unify_decl.ll
+++ b/ptx/src/test/spirv_run/shared_unify_decl.ll
@@ -4,16 +4,18 @@ target triple = "amdgcn-amd-amdhsa"
@shared_ex = external hidden addrspace(3) global [0 x i32]
@shared_mod = private addrspace(3) global [4 x i32] undef
-define private i64 @"3"(ptr addrspace(3) %"66", ptr addrspace(3) %"67") #0 {
-"59":
+define private i64 @"3"(ptr addrspace(3) %"63", ptr addrspace(3) %"64") #0 {
%"8" = alloca i64, align 8, addrspace(5)
%"20" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"20", align 1
%"9" = alloca i64, align 8, addrspace(5)
%"10" = alloca i64, align 8, addrspace(5)
- %"23" = load i64, ptr addrspace(3) %"67", align 8
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"20", align 1
+ %"23" = load i64, ptr addrspace(3) %"64", align 8
store i64 %"23", ptr addrspace(5) %"9", align 8
- %"24" = load i64, ptr addrspace(3) %"66", align 8
+ %"24" = load i64, ptr addrspace(3) %"63", align 8
store i64 %"24", ptr addrspace(5) %"10", align 8
%"26" = load i64, ptr addrspace(5) %"10", align 8
%"27" = load i64, ptr addrspace(5) %"9", align 8
@@ -23,29 +25,33 @@ define private i64 @"3"(ptr addrspace(3) %"66", ptr addrspace(3) %"67") #0 {
ret i64 %"28"
}
-define private i64 @"5"(i64 %"29", ptr addrspace(3) %"68", ptr addrspace(3) %"69") #0 {
-"60":
+define private i64 @"5"(i64 %"29", ptr addrspace(3) %"65", ptr addrspace(3) %"66") #0 {
%"12" = alloca i64, align 8, addrspace(5)
%"11" = alloca i64, align 8, addrspace(5)
%"21" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"21", align 1
+ br label %1
+
+1: ; preds = %0
store i64 %"29", ptr addrspace(5) %"12", align 8
+ store i1 false, ptr addrspace(5) %"21", align 1
%"30" = load i64, ptr addrspace(5) %"12", align 8
- store i64 %"30", ptr addrspace(3) %"68", align 8
- %"31" = call i64 @"3"(ptr addrspace(3) %"68", ptr addrspace(3) %"69")
+ store i64 %"30", ptr addrspace(3) %"65", align 8
+ %"31" = call i64 @"3"(ptr addrspace(3) %"65", ptr addrspace(3) %"66")
store i64 %"31", ptr addrspace(5) %"11", align 8
%"32" = load i64, ptr addrspace(5) %"11", align 8
ret i64 %"32"
}
define protected amdgpu_kernel void @shared_unify_decl(ptr addrspace(4) byref(i64) %"46", ptr addrspace(4) byref(i64) %"47") #0 {
-"61":
%"22" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"22", align 1
%"16" = alloca i64, align 8, addrspace(5)
%"17" = alloca i64, align 8, addrspace(5)
%"18" = alloca i64, align 8, addrspace(5)
%"19" = alloca i64, align 8, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"22", align 1
%"33" = load i64, ptr addrspace(4) %"46", align 8
store i64 %"33", ptr addrspace(5) %"16", align 8
%"34" = load i64, ptr addrspace(4) %"47", align 8
@@ -56,8 +62,8 @@ define protected amdgpu_kernel void @shared_unify_decl(ptr addrspace(4) byref(i6
store i64 %"35", ptr addrspace(5) %"18", align 8
%"38" = load i64, ptr addrspace(5) %"16", align 8
%"54" = inttoptr i64 %"38" to ptr addrspace(1)
- %"71" = getelementptr inbounds i8, ptr addrspace(1) %"54", i64 8
- %"37" = load i64, ptr addrspace(1) %"71", align 8
+ %"68" = getelementptr inbounds i8, ptr addrspace(1) %"54", i64 8
+ %"37" = load i64, ptr addrspace(1) %"68", align 8
store i64 %"37", ptr addrspace(5) %"19", align 8
%"39" = load i64, ptr addrspace(5) %"19", align 8
store i64 %"39", ptr addrspace(3) @shared_mod, align 8
diff --git a/ptx/src/test/spirv_run/shared_unify_extern.ll b/ptx/src/test/spirv_run/shared_unify_extern.ll
index 819e8a1..769fd9f 100644
--- a/ptx/src/test/spirv_run/shared_unify_extern.ll
+++ b/ptx/src/test/spirv_run/shared_unify_extern.ll
@@ -4,16 +4,18 @@ target triple = "amdgcn-amd-amdhsa"
@shared_ex = external hidden addrspace(3) global [0 x i32]
@shared_mod = private addrspace(3) global [4 x i32] undef
-define private i64 @"3"(ptr addrspace(3) %"59", ptr addrspace(3) %"60") #0 {
-"56":
+define private i64 @"3"(ptr addrspace(3) %"56", ptr addrspace(3) %"57") #0 {
%"4" = alloca i64, align 8, addrspace(5)
%"17" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"17", align 1
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i64, align 8, addrspace(5)
- %"20" = load i64, ptr addrspace(3) %"60", align 8
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"17", align 1
+ %"20" = load i64, ptr addrspace(3) %"57", align 8
store i64 %"20", ptr addrspace(5) %"5", align 8
- %"21" = load i64, ptr addrspace(3) %"59", align 8
+ %"21" = load i64, ptr addrspace(3) %"56", align 8
store i64 %"21", ptr addrspace(5) %"6", align 8
%"23" = load i64, ptr addrspace(5) %"6", align 8
%"24" = load i64, ptr addrspace(5) %"5", align 8
@@ -23,29 +25,33 @@ define private i64 @"3"(ptr addrspace(3) %"59", ptr addrspace(3) %"60") #0 {
ret i64 %"25"
}
-define private i64 @"7"(i64 %"26", ptr addrspace(3) %"61", ptr addrspace(3) %"62") #0 {
-"57":
+define private i64 @"7"(i64 %"26", ptr addrspace(3) %"58", ptr addrspace(3) %"59") #0 {
%"9" = alloca i64, align 8, addrspace(5)
%"8" = alloca i64, align 8, addrspace(5)
%"18" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"18", align 1
+ br label %1
+
+1: ; preds = %0
store i64 %"26", ptr addrspace(5) %"9", align 8
+ store i1 false, ptr addrspace(5) %"18", align 1
%"27" = load i64, ptr addrspace(5) %"9", align 8
- store i64 %"27", ptr addrspace(3) %"61", align 8
- %"28" = call i64 @"3"(ptr addrspace(3) %"61", ptr addrspace(3) %"62")
+ store i64 %"27", ptr addrspace(3) %"58", align 8
+ %"28" = call i64 @"3"(ptr addrspace(3) %"58", ptr addrspace(3) %"59")
store i64 %"28", ptr addrspace(5) %"8", align 8
%"29" = load i64, ptr addrspace(5) %"8", align 8
ret i64 %"29"
}
define protected amdgpu_kernel void @shared_unify_extern(ptr addrspace(4) byref(i64) %"43", ptr addrspace(4) byref(i64) %"44") #0 {
-"58":
%"19" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"19", align 1
%"13" = alloca i64, align 8, addrspace(5)
%"14" = alloca i64, align 8, addrspace(5)
%"15" = alloca i64, align 8, addrspace(5)
%"16" = alloca i64, align 8, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"19", align 1
%"30" = load i64, ptr addrspace(4) %"43", align 8
store i64 %"30", ptr addrspace(5) %"13", align 8
%"31" = load i64, ptr addrspace(4) %"44", align 8
@@ -56,8 +62,8 @@ define protected amdgpu_kernel void @shared_unify_extern(ptr addrspace(4) byref(
store i64 %"32", ptr addrspace(5) %"15", align 8
%"35" = load i64, ptr addrspace(5) %"13", align 8
%"51" = inttoptr i64 %"35" to ptr addrspace(1)
- %"64" = getelementptr inbounds i8, ptr addrspace(1) %"51", i64 8
- %"34" = load i64, ptr addrspace(1) %"64", align 8
+ %"61" = getelementptr inbounds i8, ptr addrspace(1) %"51", i64 8
+ %"34" = load i64, ptr addrspace(1) %"61", align 8
store i64 %"34", ptr addrspace(5) %"16", align 8
%"36" = load i64, ptr addrspace(5) %"16", align 8
store i64 %"36", ptr addrspace(3) @shared_mod, align 8
diff --git a/ptx/src/test/spirv_run/shared_unify_local.ll b/ptx/src/test/spirv_run/shared_unify_local.ll
index b98b280..522e0f5 100644
--- a/ptx/src/test/spirv_run/shared_unify_local.ll
+++ b/ptx/src/test/spirv_run/shared_unify_local.ll
@@ -4,19 +4,21 @@ target triple = "amdgcn-amd-amdhsa"
@shared_ex = external hidden addrspace(3) global [0 x i32]
@"5" = private addrspace(3) global i64 undef, align 4
-define private i64 @"2"(i64 %"21", ptr addrspace(3) %"62", ptr addrspace(3) %"63") #0 {
-"59":
+define private i64 @"2"(i64 %"21", ptr addrspace(3) %"59", ptr addrspace(3) %"60") #0 {
%"4" = alloca i64, align 8, addrspace(5)
%"3" = alloca i64, align 8, addrspace(5)
%"18" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"18", align 1
%"6" = alloca i64, align 8, addrspace(5)
+ br label %1
+
+1: ; preds = %0
store i64 %"21", ptr addrspace(5) %"4", align 8
+ store i1 false, ptr addrspace(5) %"18", align 1
%"22" = load i64, ptr addrspace(5) %"4", align 8
- store i64 %"22", ptr addrspace(3) %"63", align 8
- %"23" = load i64, ptr addrspace(3) %"63", align 8
+ store i64 %"22", ptr addrspace(3) %"60", align 8
+ %"23" = load i64, ptr addrspace(3) %"60", align 8
store i64 %"23", ptr addrspace(5) %"6", align 8
- %"24" = load i64, ptr addrspace(3) %"62", align 8
+ %"24" = load i64, ptr addrspace(3) %"59", align 8
store i64 %"24", ptr addrspace(5) %"4", align 8
%"26" = load i64, ptr addrspace(5) %"4", align 8
%"27" = load i64, ptr addrspace(5) %"6", align 8
@@ -26,32 +28,36 @@ define private i64 @"2"(i64 %"21", ptr addrspace(3) %"62", ptr addrspace(3) %"63
ret i64 %"28"
}
-define private i64 @"7"(i64 %"29", i64 %"30", ptr addrspace(3) %"64", ptr addrspace(3) %"65") #0 {
-"60":
+define private i64 @"7"(i64 %"29", i64 %"30", ptr addrspace(3) %"61", ptr addrspace(3) %"62") #0 {
%"9" = alloca i64, align 8, addrspace(5)
%"10" = alloca i64, align 8, addrspace(5)
%"8" = alloca i64, align 8, addrspace(5)
%"19" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"19", align 1
+ br label %1
+
+1: ; preds = %0
store i64 %"29", ptr addrspace(5) %"9", align 8
store i64 %"30", ptr addrspace(5) %"10", align 8
+ store i1 false, ptr addrspace(5) %"19", align 1
%"31" = load i64, ptr addrspace(5) %"9", align 8
- store i64 %"31", ptr addrspace(3) %"64", align 8
+ store i64 %"31", ptr addrspace(3) %"61", align 8
%"33" = load i64, ptr addrspace(5) %"10", align 8
- %"32" = call i64 @"2"(i64 %"33", ptr addrspace(3) %"64", ptr addrspace(3) %"65")
+ %"32" = call i64 @"2"(i64 %"33", ptr addrspace(3) %"61", ptr addrspace(3) %"62")
store i64 %"32", ptr addrspace(5) %"8", align 8
%"34" = load i64, ptr addrspace(5) %"8", align 8
ret i64 %"34"
}
define protected amdgpu_kernel void @shared_unify_local(ptr addrspace(4) byref(i64) %"48", ptr addrspace(4) byref(i64) %"49") #0 {
-"61":
%"20" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"20", align 1
%"14" = alloca i64, align 8, addrspace(5)
%"15" = alloca i64, align 8, addrspace(5)
%"16" = alloca i64, align 8, addrspace(5)
%"17" = alloca i64, align 8, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"20", align 1
%"35" = load i64, ptr addrspace(4) %"48", align 8
store i64 %"35", ptr addrspace(5) %"14", align 8
%"36" = load i64, ptr addrspace(4) %"49", align 8
@@ -62,8 +68,8 @@ define protected amdgpu_kernel void @shared_unify_local(ptr addrspace(4) byref(i
store i64 %"37", ptr addrspace(5) %"16", align 8
%"40" = load i64, ptr addrspace(5) %"14", align 8
%"55" = inttoptr i64 %"40" to ptr addrspace(1)
- %"67" = getelementptr inbounds i8, ptr addrspace(1) %"55", i64 8
- %"39" = load i64, ptr addrspace(1) %"67", align 8
+ %"64" = getelementptr inbounds i8, ptr addrspace(1) %"55", i64 8
+ %"39" = load i64, ptr addrspace(1) %"64", align 8
store i64 %"39", ptr addrspace(5) %"17", align 8
%"42" = load i64, ptr addrspace(5) %"16", align 8
%"43" = load i64, ptr addrspace(5) %"17", align 8
diff --git a/ptx/src/test/spirv_run/shared_variable.ll b/ptx/src/test/spirv_run/shared_variable.ll
index 859a767..ac1e519 100644
--- a/ptx/src/test/spirv_run/shared_variable.ll
+++ b/ptx/src/test/spirv_run/shared_variable.ll
@@ -4,13 +4,15 @@ target triple = "amdgcn-amd-amdhsa"
@"4" = private addrspace(3) global [128 x i8] undef, align 4
define protected amdgpu_kernel void @shared_variable(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #0 {
-"24":
%"9" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"9", align 1
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i64, align 8, addrspace(5)
%"7" = alloca i64, align 8, addrspace(5)
%"8" = alloca i64, align 8, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"9", align 1
%"10" = load i64, ptr addrspace(4) %"18", align 8
store i64 %"10", ptr addrspace(5) %"5", align 8
%"11" = load i64, ptr addrspace(4) %"19", align 8
diff --git a/ptx/src/test/spirv_run/shf.ll b/ptx/src/test/spirv_run/shf.ll
index 22be32a..317a60f 100644
--- a/ptx/src/test/spirv_run/shf.ll
+++ b/ptx/src/test/spirv_run/shf.ll
@@ -2,14 +2,16 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @shf(ptr addrspace(4) byref(i64) %"24", ptr addrspace(4) byref(i64) %"25") #0 {
-"32":
%"9" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"9", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i32, align 4, addrspace(5)
%"7" = alloca i32, align 4, addrspace(5)
%"8" = alloca i32, align 4, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"9", align 1
%"10" = load i64, ptr addrspace(4) %"24", align 8
store i64 %"10", ptr addrspace(5) %"4", align 8
%"11" = load i64, ptr addrspace(4) %"25", align 8
@@ -20,8 +22,8 @@ define protected amdgpu_kernel void @shf(ptr addrspace(4) byref(i64) %"24", ptr
store i32 %"12", ptr addrspace(5) %"6", align 4
%"15" = load i64, ptr addrspace(5) %"4", align 8
%"27" = inttoptr i64 %"15" to ptr
- %"34" = getelementptr inbounds i8, ptr %"27", i64 4
- %"14" = load i32, ptr %"34", align 4
+ %"33" = getelementptr inbounds i8, ptr %"27", i64 4
+ %"14" = load i32, ptr %"33", align 4
store i32 %"14", ptr addrspace(5) %"7", align 4
%"17" = load i32, ptr addrspace(5) %"6", align 4
%"18" = load i32, ptr addrspace(5) %"7", align 4
diff --git a/ptx/src/test/spirv_run/shl.ll b/ptx/src/test/spirv_run/shl.ll
index 40c3365..9f9b609 100644
--- a/ptx/src/test/spirv_run/shl.ll
+++ b/ptx/src/test/spirv_run/shl.ll
@@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @shl(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #0 {
-"24":
%"8" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"8", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i64, align 8, addrspace(5)
%"7" = alloca i64, align 8, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"8", align 1
%"9" = load i64, ptr addrspace(4) %"18", align 8
store i64 %"9", ptr addrspace(5) %"4", align 8
%"10" = load i64, ptr addrspace(4) %"19", align 8
@@ -18,8 +20,8 @@ define protected amdgpu_kernel void @shl(ptr addrspace(4) byref(i64) %"18", ptr
%"11" = load i64, ptr %"20", align 8
store i64 %"11", ptr addrspace(5) %"6", align 8
%"14" = load i64, ptr addrspace(5) %"6", align 8
- %0 = shl i64 %"14", 2
- %"21" = select i1 false, i64 0, i64 %0
+ %2 = shl i64 %"14", 2
+ %"21" = select i1 false, i64 0, i64 %2
store i64 %"21", ptr addrspace(5) %"7", align 8
%"15" = load i64, ptr addrspace(5) %"5", align 8
%"16" = load i64, ptr addrspace(5) %"7", align 8
diff --git a/ptx/src/test/spirv_run/shl_link_hack.ll b/ptx/src/test/spirv_run/shl_link_hack.ll
index 9ac3883..29d1c74 100644
--- a/ptx/src/test/spirv_run/shl_link_hack.ll
+++ b/ptx/src/test/spirv_run/shl_link_hack.ll
@@ -4,14 +4,16 @@ target triple = "amdgcn-amd-amdhsa"
declare i32 @__zluda_ptx_impl__atom_relaxed_gpu_generic_inc(ptr, i32) #0
define protected amdgpu_kernel void @shl_link_hack(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #1 {
-"29":
%"9" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"9", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i64, align 8, addrspace(5)
%"7" = alloca i64, align 8, addrspace(5)
%"8" = alloca i32, align 4, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"9", align 1
%"10" = load i64, ptr addrspace(4) %"22", align 8
store i64 %"10", ptr addrspace(5) %"4", align 8
%"11" = load i64, ptr addrspace(4) %"23", align 8
@@ -25,8 +27,8 @@ define protected amdgpu_kernel void @shl_link_hack(ptr addrspace(4) byref(i64) %
%"14" = load i64, ptr %"25", align 8
store i64 %"14", ptr addrspace(5) %"6", align 8
%"17" = load i64, ptr addrspace(5) %"6", align 8
- %0 = shl i64 %"17", 2
- %"26" = select i1 false, i64 0, i64 %0
+ %2 = shl i64 %"17", 2
+ %"26" = select i1 false, i64 0, i64 %2
store i64 %"26", ptr addrspace(5) %"7", align 8
%"18" = load i64, ptr addrspace(5) %"5", align 8
%"19" = load i64, ptr addrspace(5) %"7", align 8
diff --git a/ptx/src/test/spirv_run/shl_overflow.ll b/ptx/src/test/spirv_run/shl_overflow.ll
index 80d4871..86178d8 100644
--- a/ptx/src/test/spirv_run/shl_overflow.ll
+++ b/ptx/src/test/spirv_run/shl_overflow.ll
@@ -2,9 +2,7 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @shl_overflow(ptr addrspace(4) byref(i64) %"47", ptr addrspace(4) byref(i64) %"48") #0 {
-"62":
%"11" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"11", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i32, align 4, addrspace(5)
@@ -12,6 +10,10 @@ define protected amdgpu_kernel void @shl_overflow(ptr addrspace(4) byref(i64) %"
%"8" = alloca i32, align 4, addrspace(5)
%"9" = alloca i32, align 4, addrspace(5)
%"10" = alloca i32, align 4, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"11", align 1
%"12" = load i64, ptr addrspace(4) %"47", align 8
store i64 %"12", ptr addrspace(5) %"4", align 8
%"13" = load i64, ptr addrspace(4) %"48", align 8
@@ -22,24 +24,24 @@ define protected amdgpu_kernel void @shl_overflow(ptr addrspace(4) byref(i64) %"
store i32 %"14", ptr addrspace(5) %"6", align 4
%"17" = load i64, ptr addrspace(5) %"4", align 8
%"50" = inttoptr i64 %"17" to ptr
- %"64" = getelementptr inbounds i8, ptr %"50", i64 4
- %"16" = load i32, ptr %"64", align 4
+ %"63" = getelementptr inbounds i8, ptr %"50", i64 4
+ %"16" = load i32, ptr %"63", align 4
store i32 %"16", ptr addrspace(5) %"8", align 4
%"19" = load i64, ptr addrspace(5) %"4", align 8
%"51" = inttoptr i64 %"19" to ptr
- %"66" = getelementptr inbounds i8, ptr %"51", i64 8
- %"18" = load i32, ptr %"66", align 4
+ %"65" = getelementptr inbounds i8, ptr %"51", i64 8
+ %"18" = load i32, ptr %"65", align 4
store i32 %"18", ptr addrspace(5) %"9", align 4
%"21" = load i64, ptr addrspace(5) %"4", align 8
%"52" = inttoptr i64 %"21" to ptr
- %"68" = getelementptr inbounds i8, ptr %"52", i64 12
- %"20" = load i32, ptr %"68", align 4
+ %"67" = getelementptr inbounds i8, ptr %"52", i64 12
+ %"20" = load i32, ptr %"67", align 4
store i32 %"20", ptr addrspace(5) %"10", align 4
%"23" = load i32, ptr addrspace(5) %"6", align 4
%"24" = load i32, ptr addrspace(5) %"8", align 4
- %0 = icmp ugt i32 %"24", 31
- %1 = shl i32 %"23", %"24"
- %"53" = select i1 %0, i32 0, i32 %1
+ %2 = icmp ugt i32 %"24", 31
+ %3 = shl i32 %"23", %"24"
+ %"53" = select i1 %2, i32 0, i32 %3
store i32 %"53", ptr addrspace(5) %"7", align 4
%"25" = load i64, ptr addrspace(5) %"5", align 8
%"26" = load i32, ptr addrspace(5) %"7", align 4
@@ -47,26 +49,26 @@ define protected amdgpu_kernel void @shl_overflow(ptr addrspace(4) byref(i64) %"
store i32 %"26", ptr %"55", align 4
%"28" = load i32, ptr addrspace(5) %"6", align 4
%"29" = load i32, ptr addrspace(5) %"9", align 4
- %2 = icmp ugt i32 %"29", 31
- %3 = shl i32 %"28", %"29"
- %"56" = select i1 %2, i32 0, i32 %3
+ %4 = icmp ugt i32 %"29", 31
+ %5 = shl i32 %"28", %"29"
+ %"56" = select i1 %4, i32 0, i32 %5
store i32 %"56", ptr addrspace(5) %"7", align 4
%"30" = load i64, ptr addrspace(5) %"5", align 8
%"31" = load i32, ptr addrspace(5) %"7", align 4
%"58" = inttoptr i64 %"30" to ptr
- %"70" = getelementptr inbounds i8, ptr %"58", i64 4
- store i32 %"31", ptr %"70", align 4
+ %"69" = getelementptr inbounds i8, ptr %"58", i64 4
+ store i32 %"31", ptr %"69", align 4
%"33" = load i32, ptr addrspace(5) %"6", align 4
%"34" = load i32, ptr addrspace(5) %"10", align 4
- %4 = icmp ugt i32 %"34", 31
- %5 = shl i32 %"33", %"34"
- %"59" = select i1 %4, i32 0, i32 %5
+ %6 = icmp ugt i32 %"34", 31
+ %7 = shl i32 %"33", %"34"
+ %"59" = select i1 %6, i32 0, i32 %7
store i32 %"59", ptr addrspace(5) %"7", align 4
%"35" = load i64, ptr addrspace(5) %"5", align 8
%"36" = load i32, ptr addrspace(5) %"7", align 4
%"61" = inttoptr i64 %"35" to ptr
- %"72" = getelementptr inbounds i8, ptr %"61", i64 8
- store i32 %"36", ptr %"72", align 4
+ %"71" = getelementptr inbounds i8, ptr %"61", i64 8
+ store i32 %"36", ptr %"71", align 4
ret void
}
diff --git a/ptx/src/test/spirv_run/shr_s32.ll b/ptx/src/test/spirv_run/shr_s32.ll
index 77c71f9..a6a6d98 100644
--- a/ptx/src/test/spirv_run/shr_s32.ll
+++ b/ptx/src/test/spirv_run/shr_s32.ll
@@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @shr_s32(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #0 {
-"28":
%"8" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"8", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i32, align 4, addrspace(5)
%"7" = alloca i32, align 4, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"8", align 1
%"9" = load i64, ptr addrspace(4) %"22", align 8
store i64 %"9", ptr addrspace(5) %"4", align 8
%"10" = load i64, ptr addrspace(4) %"23", align 8
@@ -19,14 +21,14 @@ define protected amdgpu_kernel void @shr_s32(ptr addrspace(4) byref(i64) %"22",
store i32 %"11", ptr addrspace(5) %"6", align 4
%"14" = load i64, ptr addrspace(5) %"4", align 8
%"25" = inttoptr i64 %"14" to ptr
- %"30" = getelementptr inbounds i8, ptr %"25", i64 4
- %"13" = load i32, ptr %"30", align 4
+ %"29" = getelementptr inbounds i8, ptr %"25", i64 4
+ %"13" = load i32, ptr %"29", align 4
store i32 %"13", ptr addrspace(5) %"7", align 4
%"16" = load i32, ptr addrspace(5) %"6", align 4
%"17" = load i32, ptr addrspace(5) %"7", align 4
- %0 = icmp ugt i32 %"17", 31
- %1 = ashr i32 %"16", %"17"
- %"15" = select i1 %0, i32 -1, i32 %1
+ %2 = icmp ugt i32 %"17", 31
+ %3 = ashr i32 %"16", %"17"
+ %"15" = select i1 %2, i32 -1, i32 %3
store i32 %"15", ptr addrspace(5) %"6", align 4
%"18" = load i64, ptr addrspace(5) %"5", align 8
%"19" = load i32, ptr addrspace(5) %"6", align 4
diff --git a/ptx/src/test/spirv_run/shr_u32.ll b/ptx/src/test/spirv_run/shr_u32.ll
index 22c8761..52153d9 100644
--- a/ptx/src/test/spirv_run/shr_u32.ll
+++ b/ptx/src/test/spirv_run/shr_u32.ll
@@ -2,9 +2,7 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @shr_u32(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #0 {
-"45":
%"11" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"11", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i32, align 4, addrspace(5)
@@ -12,6 +10,10 @@ define protected amdgpu_kernel void @shr_u32(ptr addrspace(4) byref(i64) %"36",
%"8" = alloca i32, align 4, addrspace(5)
%"9" = alloca i32, align 4, addrspace(5)
%"10" = alloca i32, align 4, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"11", align 1
%"12" = load i64, ptr addrspace(4) %"36", align 8
store i64 %"12", ptr addrspace(5) %"4", align 8
%"13" = load i64, ptr addrspace(4) %"37", align 8
@@ -22,25 +24,25 @@ define protected amdgpu_kernel void @shr_u32(ptr addrspace(4) byref(i64) %"36",
store i32 %"14", ptr addrspace(5) %"6", align 4
%"17" = load i64, ptr addrspace(5) %"4", align 8
%"39" = inttoptr i64 %"17" to ptr
- %"47" = getelementptr inbounds i8, ptr %"39", i64 4
- %"16" = load i32, ptr %"47", align 4
+ %"46" = getelementptr inbounds i8, ptr %"39", i64 4
+ %"16" = load i32, ptr %"46", align 4
store i32 %"16", ptr addrspace(5) %"7", align 4
%"19" = load i64, ptr addrspace(5) %"4", align 8
%"40" = inttoptr i64 %"19" to ptr
- %"49" = getelementptr inbounds i8, ptr %"40", i64 8
- %"18" = load i32, ptr %"49", align 4
+ %"48" = getelementptr inbounds i8, ptr %"40", i64 8
+ %"18" = load i32, ptr %"48", align 4
store i32 %"18", ptr addrspace(5) %"8", align 4
%"21" = load i32, ptr addrspace(5) %"6", align 4
%"22" = load i32, ptr addrspace(5) %"7", align 4
- %0 = icmp ugt i32 %"22", 31
- %1 = lshr i32 %"21", %"22"
- %"20" = select i1 %0, i32 0, i32 %1
+ %2 = icmp ugt i32 %"22", 31
+ %3 = lshr i32 %"21", %"22"
+ %"20" = select i1 %2, i32 0, i32 %3
store i32 %"20", ptr addrspace(5) %"9", align 4
%"24" = load i32, ptr addrspace(5) %"6", align 4
%"25" = load i32, ptr addrspace(5) %"8", align 4
- %2 = icmp ugt i32 %"25", 31
- %3 = lshr i32 %"24", %"25"
- %"23" = select i1 %2, i32 0, i32 %3
+ %4 = icmp ugt i32 %"25", 31
+ %5 = lshr i32 %"24", %"25"
+ %"23" = select i1 %4, i32 0, i32 %5
store i32 %"23", ptr addrspace(5) %"10", align 4
%"26" = load i64, ptr addrspace(5) %"5", align 8
%"27" = load i32, ptr addrspace(5) %"9", align 4
@@ -49,8 +51,8 @@ define protected amdgpu_kernel void @shr_u32(ptr addrspace(4) byref(i64) %"36",
%"28" = load i64, ptr addrspace(5) %"5", align 8
%"29" = load i32, ptr addrspace(5) %"10", align 4
%"44" = inttoptr i64 %"28" to ptr
- %"51" = getelementptr inbounds i8, ptr %"44", i64 4
- store i32 %"29", ptr %"51", align 4
+ %"50" = getelementptr inbounds i8, ptr %"44", i64 4
+ store i32 %"29", ptr %"50", align 4
ret void
}
diff --git a/ptx/src/test/spirv_run/sign_extend.ll b/ptx/src/test/spirv_run/sign_extend.ll
index ef26261..98494e3 100644
--- a/ptx/src/test/spirv_run/sign_extend.ll
+++ b/ptx/src/test/spirv_run/sign_extend.ll
@@ -2,12 +2,14 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @sign_extend(ptr addrspace(4) byref(i64) %"14", ptr addrspace(4) byref(i64) %"15") #0 {
-"19":
%"7" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"7", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i32, align 4, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"7", align 1
%"8" = load i64, ptr addrspace(4) %"14", align 8
store i64 %"8", ptr addrspace(5) %"4", align 8
%"9" = load i64, ptr addrspace(4) %"15", align 8
diff --git a/ptx/src/test/spirv_run/sin.ll b/ptx/src/test/spirv_run/sin.ll
index f38aedd..33f510c 100644
--- a/ptx/src/test/spirv_run/sin.ll
+++ b/ptx/src/test/spirv_run/sin.ll
@@ -2,12 +2,14 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @sin(ptr addrspace(4) byref(i64) %"16", ptr addrspace(4) byref(i64) %"17") #0 {
-"20":
%"7" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"7", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca float, align 4, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"7", align 1
%"8" = load i64, ptr addrspace(4) %"16", align 8
store i64 %"8", ptr addrspace(5) %"4", align 8
%"9" = load i64, ptr addrspace(4) %"17", align 8
diff --git a/ptx/src/test/spirv_run/sqrt.ll b/ptx/src/test/spirv_run/sqrt.ll
index c8e4ec0..f86753e 100644
--- a/ptx/src/test/spirv_run/sqrt.ll
+++ b/ptx/src/test/spirv_run/sqrt.ll
@@ -2,12 +2,14 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @sqrt(ptr addrspace(4) byref(i64) %"16", ptr addrspace(4) byref(i64) %"17") #0 {
-"20":
%"7" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"7", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca float, align 4, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"7", align 1
%"8" = load i64, ptr addrspace(4) %"16", align 8
store i64 %"8", ptr addrspace(5) %"4", align 8
%"9" = load i64, ptr addrspace(4) %"17", align 8
diff --git a/ptx/src/test/spirv_run/sub.ll b/ptx/src/test/spirv_run/sub.ll
index 83fec5f..24a12bd 100644
--- a/ptx/src/test/spirv_run/sub.ll
+++ b/ptx/src/test/spirv_run/sub.ll
@@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @sub(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #0 {
-"22":
%"8" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"8", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i64, align 8, addrspace(5)
%"7" = alloca i64, align 8, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"8", align 1
%"9" = load i64, ptr addrspace(4) %"18", align 8
store i64 %"9", ptr addrspace(5) %"4", align 8
%"10" = load i64, ptr addrspace(4) %"19", align 8
diff --git a/ptx/src/test/spirv_run/subc_cc.ll b/ptx/src/test/spirv_run/subc_cc.ll
index 0101b83..cdd5c0b 100644
--- a/ptx/src/test/spirv_run/subc_cc.ll
+++ b/ptx/src/test/spirv_run/subc_cc.ll
@@ -2,9 +2,7 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @subc_cc(ptr addrspace(4) byref(i64) %"57", ptr addrspace(4) byref(i64) %"58") #0 {
-"72":
%"13" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"13", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i32, align 4, addrspace(5)
@@ -14,6 +12,10 @@ define protected amdgpu_kernel void @subc_cc(ptr addrspace(4) byref(i64) %"57",
%"10" = alloca i32, align 4, addrspace(5)
%"11" = alloca i32, align 4, addrspace(5)
%"12" = alloca i32, align 4, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"13", align 1
%"18" = load i64, ptr addrspace(4) %"57", align 8
store i64 %"18", ptr addrspace(5) %"4", align 8
%"19" = load i64, ptr addrspace(4) %"58", align 8
@@ -24,24 +26,24 @@ define protected amdgpu_kernel void @subc_cc(ptr addrspace(4) byref(i64) %"57",
store i32 %"59", ptr addrspace(5) %"9", align 4
%"23" = load i64, ptr addrspace(5) %"4", align 8
%"61" = inttoptr i64 %"23" to ptr
- %"74" = getelementptr inbounds i8, ptr %"61", i64 4
- %"62" = load i32, ptr %"74", align 4
+ %"73" = getelementptr inbounds i8, ptr %"61", i64 4
+ %"62" = load i32, ptr %"73", align 4
store i32 %"62", ptr addrspace(5) %"10", align 4
%"25" = load i64, ptr addrspace(5) %"4", align 8
%"63" = inttoptr i64 %"25" to ptr
- %"76" = getelementptr inbounds i8, ptr %"63", i64 8
- %"24" = load i32, ptr %"76", align 4
+ %"75" = getelementptr inbounds i8, ptr %"63", i64 8
+ %"24" = load i32, ptr %"75", align 4
store i32 %"24", ptr addrspace(5) %"11", align 4
%"27" = load i64, ptr addrspace(5) %"4", align 8
%"64" = inttoptr i64 %"27" to ptr
- %"78" = getelementptr inbounds i8, ptr %"64", i64 12
- %"26" = load i32, ptr %"78", align 4
+ %"77" = getelementptr inbounds i8, ptr %"64", i64 12
+ %"26" = load i32, ptr %"77", align 4
store i32 %"26", ptr addrspace(5) %"12", align 4
%"29" = load i32, ptr addrspace(5) %"9", align 4
%"30" = load i32, ptr addrspace(5) %"10", align 4
- %0 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %"29", i32 %"30")
- %"28" = extractvalue { i32, i1 } %0, 0
- %"14" = extractvalue { i32, i1 } %0, 1
+ %2 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %"29", i32 %"30")
+ %"28" = extractvalue { i32, i1 } %2, 0
+ %"14" = extractvalue { i32, i1 } %2, 1
store i32 %"28", ptr addrspace(5) %"6", align 4
%"31" = xor i1 %"14", true
store i1 %"31", ptr addrspace(5) %"13", align 1
@@ -49,14 +51,14 @@ define protected amdgpu_kernel void @subc_cc(ptr addrspace(4) byref(i64) %"57",
%"15" = xor i1 %"32", true
%"34" = load i32, ptr addrspace(5) %"6", align 4
%"35" = load i32, ptr addrspace(5) %"11", align 4
- %1 = zext i1 %"15" to i32
- %2 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %"34", i32 %"35")
- %3 = extractvalue { i32, i1 } %2, 0
- %4 = extractvalue { i32, i1 } %2, 1
- %5 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %3, i32 %1)
- %"33" = extractvalue { i32, i1 } %5, 0
- %6 = extractvalue { i32, i1 } %5, 1
- %"16" = xor i1 %4, %6
+ %3 = zext i1 %"15" to i32
+ %4 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %"34", i32 %"35")
+ %5 = extractvalue { i32, i1 } %4, 0
+ %6 = extractvalue { i32, i1 } %4, 1
+ %7 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %5, i32 %3)
+ %"33" = extractvalue { i32, i1 } %7, 0
+ %8 = extractvalue { i32, i1 } %7, 1
+ %"16" = xor i1 %6, %8
store i32 %"33", ptr addrspace(5) %"7", align 4
%"36" = xor i1 %"16", true
store i1 %"36", ptr addrspace(5) %"13", align 1
@@ -64,9 +66,9 @@ define protected amdgpu_kernel void @subc_cc(ptr addrspace(4) byref(i64) %"57",
%"17" = xor i1 %"37", true
%"39" = load i32, ptr addrspace(5) %"7", align 4
%"40" = load i32, ptr addrspace(5) %"12", align 4
- %7 = zext i1 %"17" to i32
- %8 = sub i32 %"39", %"40"
- %"38" = sub i32 %8, %7
+ %9 = zext i1 %"17" to i32
+ %10 = sub i32 %"39", %"40"
+ %"38" = sub i32 %10, %9
store i32 %"38", ptr addrspace(5) %"8", align 4
%"41" = load i64, ptr addrspace(5) %"5", align 8
%"42" = load i32, ptr addrspace(5) %"6", align 4
@@ -75,13 +77,13 @@ define protected amdgpu_kernel void @subc_cc(ptr addrspace(4) byref(i64) %"57",
%"43" = load i64, ptr addrspace(5) %"5", align 8
%"44" = load i32, ptr addrspace(5) %"7", align 4
%"70" = inttoptr i64 %"43" to ptr
- %"80" = getelementptr inbounds i8, ptr %"70", i64 4
- store i32 %"44", ptr %"80", align 4
+ %"79" = getelementptr inbounds i8, ptr %"70", i64 4
+ store i32 %"44", ptr %"79", align 4
%"45" = load i64, ptr addrspace(5) %"5", align 8
%"46" = load i32, ptr addrspace(5) %"8", align 4
%"71" = inttoptr i64 %"45" to ptr
- %"82" = getelementptr inbounds i8, ptr %"71", i64 8
- store i32 %"46", ptr %"82", align 4
+ %"81" = getelementptr inbounds i8, ptr %"71", i64 8
+ store i32 %"46", ptr %"81", align 4
ret void
}
diff --git a/ptx/src/test/spirv_run/vector.ll b/ptx/src/test/spirv_run/vector.ll
index b60aaec..f311be7 100644
--- a/ptx/src/test/spirv_run/vector.ll
+++ b/ptx/src/test/spirv_run/vector.ll
@@ -2,69 +2,74 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define private <2 x i32> @"1"(<2 x i32> %"18") #0 {
-"50":
%"3" = alloca <2 x i32>, align 8, addrspace(5)
%"2" = alloca <2 x i32>, align 8, addrspace(5)
%"16" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"16", align 1
%"4" = alloca <2 x i32>, align 8, addrspace(5)
%"5" = alloca i32, align 4, addrspace(5)
%"6" = alloca i32, align 4, addrspace(5)
- store <2 x i32> %"18", ptr addrspace(5) %"3", align 8
- %0 = getelementptr inbounds <2 x i32>, ptr addrspace(5) %"3", i32 0, i32 0
- %"20" = load i32, ptr addrspace(5) %0, align 4
%1 = alloca i32, align 4, addrspace(5)
+ %2 = alloca i32, align 4, addrspace(5)
+ %3 = alloca i32, align 4, addrspace(5)
+ %4 = alloca i32, align 4, addrspace(5)
+ %5 = alloca i32, align 4, addrspace(5)
+ %6 = alloca <2 x i32>, align 8, addrspace(5)
+ br label %7
+
+7: ; preds = %0
+ store <2 x i32> %"18", ptr addrspace(5) %"3", align 8
+ store i1 false, ptr addrspace(5) %"16", align 1
+ %8 = getelementptr inbounds <2 x i32>, ptr addrspace(5) %"3", i32 0, i32 0
+ %"20" = load i32, ptr addrspace(5) %8, align 4
store i32 %"20", ptr addrspace(5) %1, align 4
%"19" = load i32, ptr addrspace(5) %1, align 4
store i32 %"19", ptr addrspace(5) %"5", align 4
- %2 = getelementptr inbounds <2 x i32>, ptr addrspace(5) %"3", i32 0, i32 1
- %"22" = load i32, ptr addrspace(5) %2, align 4
- %3 = alloca i32, align 4, addrspace(5)
- store i32 %"22", ptr addrspace(5) %3, align 4
- %"21" = load i32, ptr addrspace(5) %3, align 4
+ %9 = getelementptr inbounds <2 x i32>, ptr addrspace(5) %"3", i32 0, i32 1
+ %"22" = load i32, ptr addrspace(5) %9, align 4
+ store i32 %"22", ptr addrspace(5) %2, align 4
+ %"21" = load i32, ptr addrspace(5) %2, align 4
store i32 %"21", ptr addrspace(5) %"6", align 4
%"24" = load i32, ptr addrspace(5) %"5", align 4
%"25" = load i32, ptr addrspace(5) %"6", align 4
%"23" = add i32 %"24", %"25"
store i32 %"23", ptr addrspace(5) %"6", align 4
%"27" = load i32, ptr addrspace(5) %"6", align 4
- %4 = alloca i32, align 4, addrspace(5)
- store i32 %"27", ptr addrspace(5) %4, align 4
- %"26" = load i32, ptr addrspace(5) %4, align 4
- %5 = getelementptr inbounds <2 x i32>, ptr addrspace(5) %"4", i32 0, i32 0
- store i32 %"26", ptr addrspace(5) %5, align 4
- %"29" = load i32, ptr addrspace(5) %"6", align 4
- %6 = alloca i32, align 4, addrspace(5)
- store i32 %"29", ptr addrspace(5) %6, align 4
- %"28" = load i32, ptr addrspace(5) %6, align 4
- %7 = getelementptr inbounds <2 x i32>, ptr addrspace(5) %"4", i32 0, i32 1
- store i32 %"28", ptr addrspace(5) %7, align 4
- %8 = getelementptr inbounds <2 x i32>, ptr addrspace(5) %"4", i32 0, i32 1
- %"31" = load i32, ptr addrspace(5) %8, align 4
- %9 = alloca i32, align 4, addrspace(5)
- store i32 %"31", ptr addrspace(5) %9, align 4
- %"30" = load i32, ptr addrspace(5) %9, align 4
+ store i32 %"27", ptr addrspace(5) %3, align 4
+ %"26" = load i32, ptr addrspace(5) %3, align 4
%10 = getelementptr inbounds <2 x i32>, ptr addrspace(5) %"4", i32 0, i32 0
- store i32 %"30", ptr addrspace(5) %10, align 4
+ store i32 %"26", ptr addrspace(5) %10, align 4
+ %"29" = load i32, ptr addrspace(5) %"6", align 4
+ store i32 %"29", ptr addrspace(5) %4, align 4
+ %"28" = load i32, ptr addrspace(5) %4, align 4
+ %11 = getelementptr inbounds <2 x i32>, ptr addrspace(5) %"4", i32 0, i32 1
+ store i32 %"28", ptr addrspace(5) %11, align 4
+ %12 = getelementptr inbounds <2 x i32>, ptr addrspace(5) %"4", i32 0, i32 1
+ %"31" = load i32, ptr addrspace(5) %12, align 4
+ store i32 %"31", ptr addrspace(5) %5, align 4
+ %"30" = load i32, ptr addrspace(5) %5, align 4
+ %13 = getelementptr inbounds <2 x i32>, ptr addrspace(5) %"4", i32 0, i32 0
+ store i32 %"30", ptr addrspace(5) %13, align 4
%"33" = load <2 x i32>, ptr addrspace(5) %"4", align 8
- %11 = alloca <2 x i32>, align 8, addrspace(5)
- store <2 x i32> %"33", ptr addrspace(5) %11, align 8
- %"32" = load <2 x i32>, ptr addrspace(5) %11, align 8
+ store <2 x i32> %"33", ptr addrspace(5) %6, align 8
+ %"32" = load <2 x i32>, ptr addrspace(5) %6, align 8
store <2 x i32> %"32", ptr addrspace(5) %"2", align 8
%"34" = load <2 x i32>, ptr addrspace(5) %"2", align 8
ret <2 x i32> %"34"
}
define protected amdgpu_kernel void @vector(ptr addrspace(4) byref(i64) %"45", ptr addrspace(4) byref(i64) %"46") #0 {
-"51":
%"17" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"17", align 1
%"10" = alloca i64, align 8, addrspace(5)
%"11" = alloca i64, align 8, addrspace(5)
%"12" = alloca <2 x i32>, align 8, addrspace(5)
%"13" = alloca i32, align 4, addrspace(5)
%"14" = alloca i32, align 4, addrspace(5)
%"15" = alloca i64, align 8, addrspace(5)
+ %1 = alloca i64, align 8, addrspace(5)
+ br label %2
+
+2: ; preds = %0
+ store i1 false, ptr addrspace(5) %"17", align 1
%"35" = load i64, ptr addrspace(4) %"45", align 8
store i64 %"35", ptr addrspace(5) %"10", align 8
%"36" = load i64, ptr addrspace(4) %"46", align 8
@@ -78,9 +83,8 @@ define protected amdgpu_kernel void @vector(ptr addrspace(4) byref(i64) %"45", p
store <2 x i32> %"39", ptr addrspace(5) %"12", align 8
%"42" = load <2 x i32>, ptr addrspace(5) %"12", align 8
%"48" = bitcast <2 x i32> %"42" to i64
- %0 = alloca i64, align 8, addrspace(5)
- store i64 %"48", ptr addrspace(5) %0, align 8
- %"41" = load i64, ptr addrspace(5) %0, align 8
+ store i64 %"48", ptr addrspace(5) %1, align 8
+ %"41" = load i64, ptr addrspace(5) %1, align 8
store i64 %"41", ptr addrspace(5) %"15", align 8
%"43" = load i64, ptr addrspace(5) %"11", align 8
%"44" = load <2 x i32>, ptr addrspace(5) %"12", align 8
diff --git a/ptx/src/test/spirv_run/vector4.ll b/ptx/src/test/spirv_run/vector4.ll
index 494b1af..7d92885 100644
--- a/ptx/src/test/spirv_run/vector4.ll
+++ b/ptx/src/test/spirv_run/vector4.ll
@@ -2,13 +2,16 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @vector4(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 {
-"23":
%"8" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"8", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca <4 x i32>, align 16, addrspace(5)
%"7" = alloca i32, align 4, addrspace(5)
+ %1 = alloca i32, align 4, addrspace(5)
+ br label %2
+
+2: ; preds = %0
+ store i1 false, ptr addrspace(5) %"8", align 1
%"9" = load i64, ptr addrspace(4) %"17", align 8
store i64 %"9", ptr addrspace(5) %"4", align 8
%"10" = load i64, ptr addrspace(4) %"18", align 8
@@ -17,9 +20,8 @@ define protected amdgpu_kernel void @vector4(ptr addrspace(4) byref(i64) %"17",
%"19" = inttoptr i64 %"12" to ptr
%"11" = load <4 x i32>, ptr %"19", align 16
store <4 x i32> %"11", ptr addrspace(5) %"6", align 16
- %0 = getelementptr inbounds <4 x i32>, ptr addrspace(5) %"6", i32 0, i32 3
- %"14" = load i32, ptr addrspace(5) %0, align 4
- %1 = alloca i32, align 4, addrspace(5)
+ %3 = getelementptr inbounds <4 x i32>, ptr addrspace(5) %"6", i32 0, i32 3
+ %"14" = load i32, ptr addrspace(5) %3, align 4
store i32 %"14", ptr addrspace(5) %1, align 4
%"20" = load i32, ptr addrspace(5) %1, align 4
store i32 %"20", ptr addrspace(5) %"7", align 4
diff --git a/ptx/src/test/spirv_run/vector_extract.ll b/ptx/src/test/spirv_run/vector_extract.ll
index d877dc7..ea2e2db 100644
--- a/ptx/src/test/spirv_run/vector_extract.ll
+++ b/ptx/src/test/spirv_run/vector_extract.ll
@@ -2,9 +2,7 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @vector_extract(ptr addrspace(4) byref(i64) %"48", ptr addrspace(4) byref(i64) %"49") #0 {
-"60":
%"17" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"17", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i16, align 2, addrspace(5)
@@ -12,6 +10,13 @@ define protected amdgpu_kernel void @vector_extract(ptr addrspace(4) byref(i64)
%"8" = alloca i16, align 2, addrspace(5)
%"9" = alloca i16, align 2, addrspace(5)
%"10" = alloca <4 x i16>, align 8, addrspace(5)
+ %1 = alloca <4 x i16>, align 8, addrspace(5)
+ %2 = alloca <4 x i16>, align 8, addrspace(5)
+ %3 = alloca <4 x i16>, align 8, addrspace(5)
+ br label %4
+
+4: ; preds = %0
+ store i1 false, ptr addrspace(5) %"17", align 1
%"18" = load i64, ptr addrspace(4) %"48", align 8
store i64 %"18", ptr addrspace(5) %"4", align 8
%"19" = load i64, ptr addrspace(4) %"49", align 8
@@ -35,18 +40,16 @@ define protected amdgpu_kernel void @vector_extract(ptr addrspace(4) byref(i64)
%"26" = load i16, ptr addrspace(5) %"8", align 2
%"27" = load i16, ptr addrspace(5) %"9", align 2
%"28" = load i16, ptr addrspace(5) %"6", align 2
- %0 = insertelement <4 x i16> undef, i16 %"25", i32 0
- %1 = insertelement <4 x i16> %0, i16 %"26", i32 1
- %2 = insertelement <4 x i16> %1, i16 %"27", i32 2
- %"12" = insertelement <4 x i16> %2, i16 %"28", i32 3
- %3 = alloca <4 x i16>, align 8, addrspace(5)
- store <4 x i16> %"12", ptr addrspace(5) %3, align 8
- %"29" = load <4 x i16>, ptr addrspace(5) %3, align 8
+ %5 = insertelement <4 x i16> undef, i16 %"25", i32 0
+ %6 = insertelement <4 x i16> %5, i16 %"26", i32 1
+ %7 = insertelement <4 x i16> %6, i16 %"27", i32 2
+ %"12" = insertelement <4 x i16> %7, i16 %"28", i32 3
+ store <4 x i16> %"12", ptr addrspace(5) %1, align 8
+ %"29" = load <4 x i16>, ptr addrspace(5) %1, align 8
store <4 x i16> %"29", ptr addrspace(5) %"10", align 8
%"30" = load <4 x i16>, ptr addrspace(5) %"10", align 8
- %4 = alloca <4 x i16>, align 8, addrspace(5)
- store <4 x i16> %"30", ptr addrspace(5) %4, align 8
- %"13" = load <4 x i16>, ptr addrspace(5) %4, align 8
+ store <4 x i16> %"30", ptr addrspace(5) %2, align 8
+ %"13" = load <4 x i16>, ptr addrspace(5) %2, align 8
%"31" = extractelement <4 x i16> %"13", i32 0
%"32" = extractelement <4 x i16> %"13", i32 1
%"33" = extractelement <4 x i16> %"13", i32 2
@@ -59,13 +62,12 @@ define protected amdgpu_kernel void @vector_extract(ptr addrspace(4) byref(i64)
%"36" = load i16, ptr addrspace(5) %"9", align 2
%"37" = load i16, ptr addrspace(5) %"6", align 2
%"38" = load i16, ptr addrspace(5) %"7", align 2
- %5 = insertelement <4 x i16> undef, i16 %"35", i32 0
- %6 = insertelement <4 x i16> %5, i16 %"36", i32 1
- %7 = insertelement <4 x i16> %6, i16 %"37", i32 2
- %"15" = insertelement <4 x i16> %7, i16 %"38", i32 3
- %8 = alloca <4 x i16>, align 8, addrspace(5)
- store <4 x i16> %"15", ptr addrspace(5) %8, align 8
- %"14" = load <4 x i16>, ptr addrspace(5) %8, align 8
+ %8 = insertelement <4 x i16> undef, i16 %"35", i32 0
+ %9 = insertelement <4 x i16> %8, i16 %"36", i32 1
+ %10 = insertelement <4 x i16> %9, i16 %"37", i32 2
+ %"15" = insertelement <4 x i16> %10, i16 %"38", i32 3
+ store <4 x i16> %"15", ptr addrspace(5) %3, align 8
+ %"14" = load <4 x i16>, ptr addrspace(5) %3, align 8
%"39" = extractelement <4 x i16> %"14", i32 0
%"40" = extractelement <4 x i16> %"14", i32 1
%"41" = extractelement <4 x i16> %"14", i32 2
@@ -82,10 +84,10 @@ define protected amdgpu_kernel void @vector_extract(ptr addrspace(4) byref(i64)
%"56" = trunc i16 %"44" to i8
%"57" = trunc i16 %"45" to i8
%"58" = trunc i16 %"46" to i8
- %9 = insertelement <4 x i8> undef, i8 %"55", i32 0
- %10 = insertelement <4 x i8> %9, i8 %"56", i32 1
- %11 = insertelement <4 x i8> %10, i8 %"57", i32 2
- %"16" = insertelement <4 x i8> %11, i8 %"58", i32 3
+ %11 = insertelement <4 x i8> undef, i8 %"55", i32 0
+ %12 = insertelement <4 x i8> %11, i8 %"56", i32 1
+ %13 = insertelement <4 x i8> %12, i8 %"57", i32 2
+ %"16" = insertelement <4 x i8> %13, i8 %"58", i32 3
%"47" = load i64, ptr addrspace(5) %"5", align 8
%"59" = inttoptr i64 %"47" to ptr addrspace(1)
store <4 x i8> %"16", ptr addrspace(1) %"59", align 4
diff --git a/ptx/src/test/spirv_run/vote_ballot.ll b/ptx/src/test/spirv_run/vote_ballot.ll
index fd31f1a..efba70a 100644
--- a/ptx/src/test/spirv_run/vote_ballot.ll
+++ b/ptx/src/test/spirv_run/vote_ballot.ll
@@ -4,15 +4,17 @@ target triple = "amdgcn-amd-amdhsa"
declare i32 @__zluda_ptx_impl__vote_sync_ballot_b32_32(i1, i32) #0
define protected amdgpu_kernel void @vote_ballot(ptr addrspace(4) byref(i64) %"40", ptr addrspace(4) byref(i64) %"41") #1 {
-"50":
%"10" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"10", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i32, align 4, addrspace(5)
%"7" = alloca i32, align 4, addrspace(5)
%"8" = alloca i32, align 4, addrspace(5)
%"9" = alloca i32, align 4, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"10", align 1
%"11" = load i64, ptr addrspace(4) %"41", align 8
store i64 %"11", ptr addrspace(5) %"5", align 8
%"42" = call i32 @__zluda_ptx_impl__vote_sync_ballot_b32_32(i1 true, i32 1)
@@ -26,23 +28,23 @@ define protected amdgpu_kernel void @vote_ballot(ptr addrspace(4) byref(i64) %"4
%"16" = load i64, ptr addrspace(5) %"5", align 8
%"17" = load i32, ptr addrspace(5) %"6", align 4
%"46" = inttoptr i64 %"16" to ptr
- %"56" = getelementptr inbounds i8, ptr %"46", i64 0
- store i32 %"17", ptr %"56", align 4
+ %"55" = getelementptr inbounds i8, ptr %"46", i64 0
+ store i32 %"17", ptr %"55", align 4
%"18" = load i64, ptr addrspace(5) %"5", align 8
%"19" = load i32, ptr addrspace(5) %"7", align 4
%"47" = inttoptr i64 %"18" to ptr
- %"58" = getelementptr inbounds i8, ptr %"47", i64 4
- store i32 %"19", ptr %"58", align 4
+ %"57" = getelementptr inbounds i8, ptr %"47", i64 4
+ store i32 %"19", ptr %"57", align 4
%"20" = load i64, ptr addrspace(5) %"5", align 8
%"21" = load i32, ptr addrspace(5) %"8", align 4
%"48" = inttoptr i64 %"20" to ptr
- %"60" = getelementptr inbounds i8, ptr %"48", i64 8
- store i32 %"21", ptr %"60", align 4
+ %"59" = getelementptr inbounds i8, ptr %"48", i64 8
+ store i32 %"21", ptr %"59", align 4
%"22" = load i64, ptr addrspace(5) %"5", align 8
%"23" = load i32, ptr addrspace(5) %"9", align 4
%"49" = inttoptr i64 %"22" to ptr
- %"62" = getelementptr inbounds i8, ptr %"49", i64 12
- store i32 %"23", ptr %"62", align 4
+ %"61" = getelementptr inbounds i8, ptr %"49", i64 12
+ store i32 %"23", ptr %"61", align 4
ret void
}
diff --git a/ptx/src/test/spirv_run/vshr.ll b/ptx/src/test/spirv_run/vshr.ll
index 4433bf2..3d24770 100644
--- a/ptx/src/test/spirv_run/vshr.ll
+++ b/ptx/src/test/spirv_run/vshr.ll
@@ -2,15 +2,17 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @vshr(ptr addrspace(4) byref(i64) %"29", ptr addrspace(4) byref(i64) %"30") #0 {
-"38":
%"10" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"10", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i32, align 4, addrspace(5)
%"7" = alloca i32, align 4, addrspace(5)
%"8" = alloca i32, align 4, addrspace(5)
%"9" = alloca i32, align 4, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"10", align 1
%"11" = load i64, ptr addrspace(4) %"29", align 8
store i64 %"11", ptr addrspace(5) %"4", align 8
%"12" = load i64, ptr addrspace(4) %"30", align 8
@@ -21,21 +23,21 @@ define protected amdgpu_kernel void @vshr(ptr addrspace(4) byref(i64) %"29", ptr
store i32 %"31", ptr addrspace(5) %"7", align 4
%"16" = load i64, ptr addrspace(5) %"4", align 8
%"33" = inttoptr i64 %"16" to ptr
- %"40" = getelementptr inbounds i8, ptr %"33", i64 4
- %"34" = load i32, ptr %"40", align 4
+ %"39" = getelementptr inbounds i8, ptr %"33", i64 4
+ %"34" = load i32, ptr %"39", align 4
store i32 %"34", ptr addrspace(5) %"8", align 4
%"18" = load i64, ptr addrspace(5) %"4", align 8
%"35" = inttoptr i64 %"18" to ptr
- %"42" = getelementptr inbounds i8, ptr %"35", i64 8
- %"36" = load i32, ptr %"42", align 4
+ %"41" = getelementptr inbounds i8, ptr %"35", i64 8
+ %"36" = load i32, ptr %"41", align 4
store i32 %"36", ptr addrspace(5) %"9", align 4
%"20" = load i32, ptr addrspace(5) %"7", align 4
%"21" = load i32, ptr addrspace(5) %"8", align 4
%"22" = load i32, ptr addrspace(5) %"9", align 4
- %0 = icmp ugt i32 %"21", 31
- %1 = lshr i32 %"20", %"21"
- %2 = select i1 %0, i32 0, i32 %1
- %"19" = add i32 %2, %"22"
+ %2 = icmp ugt i32 %"21", 31
+ %3 = lshr i32 %"20", %"21"
+ %4 = select i1 %2, i32 0, i32 %3
+ %"19" = add i32 %4, %"22"
store i32 %"19", ptr addrspace(5) %"6", align 4
%"23" = load i64, ptr addrspace(5) %"5", align 8
%"24" = load i32, ptr addrspace(5) %"6", align 4
diff --git a/ptx/src/test/spirv_run/xor.ll b/ptx/src/test/spirv_run/xor.ll
index 96b2914..bc0ad26 100644
--- a/ptx/src/test/spirv_run/xor.ll
+++ b/ptx/src/test/spirv_run/xor.ll
@@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @xor(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #0 {
-"27":
%"8" = alloca i1, align 1, addrspace(5)
- store i1 false, ptr addrspace(5) %"8", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i32, align 4, addrspace(5)
%"7" = alloca i32, align 4, addrspace(5)
+ br label %1
+
+1: ; preds = %0
+ store i1 false, ptr addrspace(5) %"8", align 1
%"9" = load i64, ptr addrspace(4) %"22", align 8
store i64 %"9", ptr addrspace(5) %"4", align 8
%"10" = load i64, ptr addrspace(4) %"23", align 8
@@ -19,8 +21,8 @@ define protected amdgpu_kernel void @xor(ptr addrspace(4) byref(i64) %"22", ptr
store i32 %"11", ptr addrspace(5) %"6", align 4
%"14" = load i64, ptr addrspace(5) %"4", align 8
%"25" = inttoptr i64 %"14" to ptr
- %"29" = getelementptr inbounds i8, ptr %"25", i64 4
- %"13" = load i32, ptr %"29", align 4
+ %"28" = getelementptr inbounds i8, ptr %"25", i64 4
+ %"13" = load i32, ptr %"28", align 4
store i32 %"13", ptr addrspace(5) %"7", align 4
%"16" = load i32, ptr addrspace(5) %"6", align 4
%"17" = load i32, ptr addrspace(5) %"7", align 4
diff --git a/ptx/src/translate.rs b/ptx/src/translate.rs
index 1085258..b06fa52 100644
--- a/ptx/src/translate.rs
+++ b/ptx/src/translate.rs
@@ -2526,58 +2526,6 @@ fn insert_implicit_conversions2_impl<'input>(
Ok(result)
}
-fn normalize_labels<'input>(
- module: TranslationModule<'input, ExpandedArgParams>,
-) -> Result<TranslationModule<'input, ExpandedArgParams>, TranslateError> {
- convert_methods_simple(module, normalize_labels2_impl)
-}
-
-fn normalize_labels2_impl<'input>(
- id_defs: &mut IdNameMapBuilder<'input>,
- fn_body: Vec<ExpandedStatement>,
-) -> Result<Vec<ExpandedStatement>, TranslateError> {
- let mut labels_in_use = FxHashSet::default();
- for statement in fn_body.iter() {
- match statement {
- Statement::Instruction(i) => {
- if let Some(target) = i.jump_target() {
- labels_in_use.insert(target);
- }
- }
- Statement::Conditional(cond) => {
- labels_in_use.insert(cond.if_true);
- labels_in_use.insert(cond.if_false);
- }
- Statement::Call(..)
- | Statement::Variable(..)
- | Statement::LoadVar(..)
- | Statement::StoreVar(..)
- | Statement::RetValue(..)
- | Statement::Conversion(..)
- | Statement::Constant(..)
- | Statement::Label(..)
- | Statement::PtrAccess { .. }
- | Statement::RepackVector(..)
- | Statement::MadC(..)
- | Statement::MadCC(..)
- | Statement::AddC(..)
- | Statement::AddCC(..)
- | Statement::SubC(..)
- | Statement::SubCC(..)
- | Statement::AsmVolatile { .. }
- | Statement::FunctionPointer(..) => {}
- }
- }
- Ok(
- iter::once(Statement::Label(id_defs.register_intermediate(None)))
- .chain(fn_body.into_iter().filter(|s| match s {
- Statement::Label(i) => labels_in_use.contains(i),
- _ => true,
- }))
- .collect::<Vec<_>>(),
- )
-}
-
fn hoist_globals<'input, P: ast::ArgParams<Id = Id>>(
module: TranslationModule<'input, P>,
) -> TranslationModule<'input, P> {
@@ -3410,9 +3358,7 @@ fn to_llvm_module_impl2<'a, 'input>(
}
let translation_module = insert_implicit_conversions(translation_module)?;
let translation_module = insert_compilation_mode_prologue(translation_module);
- let translation_module = normalize_labels(translation_module)?;
let translation_module = hoist_globals(translation_module);
- let translation_module = move_variables_to_start(translation_module)?;
let mut translation_module = replace_instructions_with_builtins(translation_module)?;
if raytracing.is_some() {
translation_module = raytracing::replace_tex_builtins_hack(translation_module)?;
@@ -3439,49 +3385,6 @@ fn to_llvm_module_impl2<'a, 'input>(
})
}
-// From "Performance Tips for Frontend Authors" (https://llvm.org/docs/Frontend/PerformanceTips.html):
-// "The SROA (Scalar Replacement Of Aggregates) and Mem2Reg passes only attempt to eliminate alloca
-// instructions that are in the entry basic block. Given SSA is the canonical form expected by much
-// of the optimizer; if allocas can not be eliminated by Mem2Reg or SROA, the optimizer is likely to
-// be less effective than it could be."
-// Empirically, this is true. Moving allocas to the start gives us less spill-happy assembly
-fn move_variables_to_start<'input, P: ast::ArgParams<Id = Id>>(
- module: TranslationModule<'input, P>,
-) -> Result<TranslationModule<'input, P>, TranslateError> {
- convert_methods_simple(module, move_variables_to_start_impl)
-}
-
-fn move_variables_to_start_impl<'input, P: ast::ArgParams>(
- _: &mut IdNameMapBuilder<'input>,
- fn_body: Vec<Statement<ast::Instruction<P>, P>>,
-) -> Result<Vec<Statement<ast::Instruction<P>, P>>, TranslateError> {
- if fn_body.is_empty() {
- return Ok(fn_body);
- }
- let mut result = (0..fn_body.len())
- .into_iter()
- .map(|_| mem::MaybeUninit::<_>::uninit())
- .collect::<Vec<_>>();
- let variables_count = fn_body.iter().fold(0, |acc, statement| {
- acc + matches!(statement, Statement::Variable(..)) as usize
- });
- let mut variable = 1usize;
- let mut non_variable = variables_count + 1;
- // methods always start with an entry label
- let mut statements = fn_body.into_iter();
- let start_label = statements.next().ok_or_else(TranslateError::unreachable)?;
- unsafe { result.get_unchecked_mut(0).write(start_label) };
- for statement in statements {
- let index = match statement {
- Statement::Variable(_) => &mut variable,
- _ => &mut non_variable,
- };
- unsafe { result.get_unchecked_mut(*index).write(statement) };
- *index += 1;
- }
- Ok(unsafe { mem::transmute(result) })
-}
-
// PTX definition of param state space does not translate cleanly into AMDGPU notion of an address space:
//  .param in kernel arguments matches AMDGPU constant address space
// .param in function arguments and variables matches AMDGPU private address space
@@ -6901,15 +6804,6 @@ pub(crate) enum TypeKind {
Struct,
}
-impl<T: ast::ArgParams<Id = Id>> ast::Instruction<T> {
- fn jump_target(&self) -> Option<Id> {
- match self {
- ast::Instruction::Bra(_, a) => Some(a.src),
- _ => None,
- }
- }
-}
-
impl<T: ast::ArgParams> ast::Instruction<T> {
// .wide instructions don't support ftz, so it's enough to just look at the
// type declared by the instruction