diff options
author | Andrzej Janik <[email protected]> | 2024-04-05 23:26:08 +0200 |
---|---|---|
committer | GitHub <[email protected]> | 2024-04-05 23:26:08 +0200 |
commit | 0d9ace247567a07554294dc4653624943334a410 (patch) | |
tree | 0425b01dc5b98be2992d0949a123d8c6159f7a5c | |
parent | 76bae5f91bf81409b8f592e52a2658d787515fa8 (diff) | |
download | ZLUDA-0d9ace247567a07554294dc4653624943334a410.tar.gz ZLUDA-0d9ace247567a07554294dc4653624943334a410.zip |
Fix buggy carry flags when mixing subc/sub.cc with addc/add.cc (#197)
139 files changed, 4204 insertions, 4460 deletions
diff --git a/ptx/src/test/spirv_run/abs.ll b/ptx/src/test/spirv_run/abs.ll index c698e66..e086eda 100644 --- a/ptx/src/test/spirv_run/abs.ll +++ b/ptx/src/test/spirv_run/abs.ll @@ -1,44 +1,42 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @abs(ptr addrspace(4) byref(i64) %"28", ptr addrspace(4) byref(i64) %"29") #0 { -"38": +define protected amdgpu_kernel void @abs(ptr addrspace(4) byref(i64) %"27", ptr addrspace(4) byref(i64) %"28") #0 { +"37": %"8" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"8", align 1 - %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) + %"9" = load i64, ptr addrspace(4) %"27", align 8 + store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"28", align 8 - store i64 %"10", ptr addrspace(5) %"4", align 8 - %"11" = load i64, ptr addrspace(4) %"29", align 8 - store i64 %"11", ptr addrspace(5) %"5", align 8 - %"13" = load i64, ptr addrspace(5) %"4", align 8 - %"31" = inttoptr i64 %"13" to ptr - %"30" = load i32, ptr %"31", align 4 - store i32 %"30", ptr addrspace(5) %"6", align 4 - %"15" = load i64, ptr addrspace(5) %"4", align 8 - %"32" = inttoptr i64 %"15" to ptr - %"40" = getelementptr inbounds i8, ptr %"32", i64 4 - %"33" = load i32, ptr %"40", align 4 - store i32 %"33", ptr addrspace(5) %"7", align 4 - %"17" = load i32, ptr addrspace(5) %"6", align 4 - %"16" = call i32 @llvm.abs.i32(i32 %"17", i1 false) - store i32 %"16", ptr addrspace(5) %"6", align 4 - %"19" = load i32, ptr addrspace(5) %"7", align 4 - %"18" = call i32 @llvm.abs.i32(i32 %"19", i1 false) - store i32 %"18", ptr addrspace(5) %"7", align 4 - %"20" = load i64, ptr addrspace(5) %"5", align 8 - %"21" = load i32, ptr addrspace(5) %"6", align 4 - %"34" = inttoptr i64 %"20" to ptr - store i32 %"21", ptr %"34", align 4 - %"22" = load i64, ptr addrspace(5) %"5", align 8 - %"23" = load i32, ptr addrspace(5) %"7", align 4 - %"36" = inttoptr i64 %"22" to ptr - %"42" = getelementptr inbounds i8, ptr %"36", i64 4 - store i32 %"23", ptr %"42", align 4 + store i64 %"10", ptr addrspace(5) %"5", align 8 + %"12" = load i64, ptr addrspace(5) %"4", align 8 + %"30" = inttoptr i64 %"12" to ptr + %"29" = load i32, ptr %"30", align 4 + store i32 %"29", ptr addrspace(5) %"6", align 4 + %"14" = load i64, ptr addrspace(5) %"4", align 8 + %"31" = inttoptr i64 %"14" to ptr + %"39" = getelementptr inbounds i8, ptr %"31", i64 4 + %"32" = load i32, ptr %"39", align 4 + store i32 %"32", ptr addrspace(5) %"7", align 4 + %"16" = load i32, ptr addrspace(5) %"6", align 4 + %"15" = call i32 @llvm.abs.i32(i32 %"16", i1 false) + store i32 %"15", ptr addrspace(5) %"6", align 4 + %"18" = load i32, ptr addrspace(5) %"7", align 4 + %"17" = call i32 @llvm.abs.i32(i32 %"18", i1 false) + store i32 %"17", ptr addrspace(5) %"7", align 4 + %"19" = load i64, ptr addrspace(5) %"5", align 8 + %"20" = load i32, ptr addrspace(5) %"6", align 4 + %"33" = inttoptr i64 %"19" to ptr + store i32 %"20", ptr %"33", align 4 + %"21" = load i64, ptr addrspace(5) %"5", align 8 + %"22" = load i32, ptr addrspace(5) %"7", align 4 + %"35" = inttoptr i64 %"21" to ptr + %"41" = getelementptr inbounds i8, ptr %"35", i64 4 + store i32 %"22", ptr %"41", align 4 ret void } diff --git a/ptx/src/test/spirv_run/activemask.ll b/ptx/src/test/spirv_run/activemask.ll index 4e53429..5ca886c 100644 --- a/ptx/src/test/spirv_run/activemask.ll +++ b/ptx/src/test/spirv_run/activemask.ll @@ -3,22 +3,20 @@ target triple = "amdgcn-amd-amdhsa" declare i32 @__zluda_ptx_impl__activemask() #0 -define protected amdgpu_kernel void @activemask(ptr addrspace(4) byref(i64) %"12", ptr addrspace(4) byref(i64) %"13") #1 { -"16": +define protected amdgpu_kernel void @activemask(ptr addrspace(4) byref(i64) %"11", ptr addrspace(4) byref(i64) %"12") #1 { +"15": %"6" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"6", align 1 - %"7" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"7", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i32, align 4, addrspace(5) - %"8" = load i64, ptr addrspace(4) %"13", align 8 - store i64 %"8", ptr addrspace(5) %"4", align 8 - %"9" = call i32 @__zluda_ptx_impl__activemask() - store i32 %"9", ptr addrspace(5) %"5", align 4 - %"10" = load i64, ptr addrspace(5) %"4", align 8 - %"11" = load i32, ptr addrspace(5) %"5", align 4 - %"14" = inttoptr i64 %"10" to ptr - store i32 %"11", ptr %"14", align 4 + %"7" = load i64, ptr addrspace(4) %"12", align 8 + store i64 %"7", ptr addrspace(5) %"4", align 8 + %"8" = call i32 @__zluda_ptx_impl__activemask() + store i32 %"8", ptr addrspace(5) %"5", align 4 + %"9" = load i64, ptr addrspace(5) %"4", align 8 + %"10" = load i32, ptr addrspace(5) %"5", align 4 + %"13" = inttoptr i64 %"9" to ptr + store i32 %"10", ptr %"13", align 4 ret void } diff --git a/ptx/src/test/spirv_run/add.ll b/ptx/src/test/spirv_run/add.ll index 3b11a73..6a8ed12 100644 --- a/ptx/src/test/spirv_run/add.ll +++ b/ptx/src/test/spirv_run/add.ll @@ -1,31 +1,29 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @add(ptr addrspace(4) byref(i64) %"19", ptr addrspace(4) byref(i64) %"20") #0 { -"23": +define protected amdgpu_kernel void @add(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #0 { +"22": %"8" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"8", align 1 - %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) + %"9" = load i64, ptr addrspace(4) %"18", align 8 + store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"19", align 8 - store i64 %"10", ptr addrspace(5) %"4", align 8 - %"11" = load i64, ptr addrspace(4) %"20", align 8 - store i64 %"11", ptr addrspace(5) %"5", align 8 - %"13" = load i64, ptr addrspace(5) %"4", align 8 - %"21" = inttoptr i64 %"13" to ptr - %"12" = load i64, ptr %"21", align 8 - store i64 %"12", ptr addrspace(5) %"6", align 8 - %"15" = load i64, ptr addrspace(5) %"6", align 8 - %"14" = add i64 %"15", 1 - store i64 %"14", ptr addrspace(5) %"7", align 8 - %"16" = load i64, ptr addrspace(5) %"5", align 8 - %"17" = load i64, ptr addrspace(5) %"7", align 8 - %"22" = inttoptr i64 %"16" to ptr - store i64 %"17", ptr %"22", align 8 + store i64 %"10", ptr addrspace(5) %"5", align 8 + %"12" = load i64, ptr addrspace(5) %"4", align 8 + %"20" = inttoptr i64 %"12" to ptr + %"11" = load i64, ptr %"20", align 8 + store i64 %"11", ptr addrspace(5) %"6", align 8 + %"14" = load i64, ptr addrspace(5) %"6", align 8 + %"13" = add i64 %"14", 1 + store i64 %"13", ptr addrspace(5) %"7", align 8 + %"15" = load i64, ptr addrspace(5) %"5", align 8 + %"16" = load i64, ptr addrspace(5) %"7", align 8 + %"21" = inttoptr i64 %"15" to ptr + store i64 %"16", ptr %"21", align 8 ret void } diff --git a/ptx/src/test/spirv_run/add_global.ll b/ptx/src/test/spirv_run/add_global.ll index 14ae1f9..754623c 100644 --- a/ptx/src/test/spirv_run/add_global.ll +++ b/ptx/src/test/spirv_run/add_global.ll @@ -3,34 +3,32 @@ target triple = "amdgcn-amd-amdhsa" @PI = protected addrspace(1) externally_initialized global float 0x400921FB60000000, align 4 -define protected amdgpu_kernel void @add_global(ptr addrspace(4) byref(i64) %"21", ptr addrspace(4) byref(i64) %"22") #0 { -"25": +define protected amdgpu_kernel void @add_global(ptr addrspace(4) byref(i64) %"20", ptr addrspace(4) byref(i64) %"21") #0 { +"24": %"9" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"9", align 1 - %"10" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"10", align 1 %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca float, align 4, addrspace(5) %"8" = alloca float, align 4, addrspace(5) + %"10" = load i64, ptr addrspace(4) %"20", align 8 + store i64 %"10", ptr addrspace(5) %"5", align 8 %"11" = load i64, ptr addrspace(4) %"21", align 8 - store i64 %"11", ptr addrspace(5) %"5", align 8 - %"12" = load i64, ptr addrspace(4) %"22", align 8 - store i64 %"12", ptr addrspace(5) %"6", align 8 - %"14" = load i64, ptr addrspace(5) %"5", align 8 - %"23" = inttoptr i64 %"14" to ptr - %"13" = load float, ptr %"23", align 4 - store float %"13", ptr addrspace(5) %"7", align 4 - %"15" = load float, ptr addrspace(1) @PI, align 4 - store float %"15", ptr addrspace(5) %"8", align 4 - %"17" = load float, ptr addrspace(5) %"7", align 4 - %"18" = load float, ptr addrspace(5) %"8", align 4 - %"16" = fadd float %"17", %"18" - store float %"16", ptr addrspace(5) %"7", align 4 - %"19" = load i64, ptr addrspace(5) %"6", align 8 - %"20" = load float, ptr addrspace(5) %"7", align 4 - %"24" = inttoptr i64 %"19" to ptr - store float %"20", ptr %"24", align 4 + store i64 %"11", ptr addrspace(5) %"6", align 8 + %"13" = load i64, ptr addrspace(5) %"5", align 8 + %"22" = inttoptr i64 %"13" to ptr + %"12" = load float, ptr %"22", align 4 + store float %"12", ptr addrspace(5) %"7", align 4 + %"14" = load float, ptr addrspace(1) @PI, align 4 + store float %"14", ptr addrspace(5) %"8", align 4 + %"16" = load float, ptr addrspace(5) %"7", align 4 + %"17" = load float, ptr addrspace(5) %"8", align 4 + %"15" = fadd float %"16", %"17" + store float %"15", ptr addrspace(5) %"7", align 4 + %"18" = load i64, ptr addrspace(5) %"6", align 8 + %"19" = load float, ptr addrspace(5) %"7", align 4 + %"23" = inttoptr i64 %"18" to ptr + store float %"19", ptr %"23", align 4 ret void } diff --git a/ptx/src/test/spirv_run/add_non_coherent.ll b/ptx/src/test/spirv_run/add_non_coherent.ll index 7cf364c..ab8d0bc 100644 --- a/ptx/src/test/spirv_run/add_non_coherent.ll +++ b/ptx/src/test/spirv_run/add_non_coherent.ll @@ -1,31 +1,29 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @add_non_coherent(ptr addrspace(4) byref(i64) %"19", ptr addrspace(4) byref(i64) %"20") #0 { -"23": +define protected amdgpu_kernel void @add_non_coherent(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #0 { +"22": %"8" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"8", align 1 - %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) + %"9" = load i64, ptr addrspace(4) %"18", align 8 + store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"19", align 8 - store i64 %"10", ptr addrspace(5) %"4", align 8 - %"11" = load i64, ptr addrspace(4) %"20", align 8 - store i64 %"11", ptr addrspace(5) %"5", align 8 - %"13" = load i64, ptr addrspace(5) %"4", align 8 - %"21" = inttoptr i64 %"13" to ptr addrspace(1) - %"12" = load i64, ptr addrspace(1) %"21", align 8 - store i64 %"12", ptr addrspace(5) %"6", align 8 - %"15" = load i64, ptr addrspace(5) %"6", align 8 - %"14" = add i64 %"15", 1 - store i64 %"14", ptr addrspace(5) %"7", align 8 - %"16" = load i64, ptr addrspace(5) %"5", align 8 - %"17" = load i64, ptr addrspace(5) %"7", align 8 - %"22" = inttoptr i64 %"16" to ptr addrspace(1) - store i64 %"17", ptr addrspace(1) %"22", align 8 + store i64 %"10", ptr addrspace(5) %"5", align 8 + %"12" = load i64, ptr addrspace(5) %"4", align 8 + %"20" = inttoptr i64 %"12" to ptr addrspace(1) + %"11" = load i64, ptr addrspace(1) %"20", align 8 + store i64 %"11", ptr addrspace(5) %"6", align 8 + %"14" = load i64, ptr addrspace(5) %"6", align 8 + %"13" = add i64 %"14", 1 + store i64 %"13", ptr addrspace(5) %"7", align 8 + %"15" = load i64, ptr addrspace(5) %"5", align 8 + %"16" = load i64, ptr addrspace(5) %"7", align 8 + %"21" = inttoptr i64 %"15" to ptr addrspace(1) + store i64 %"16", ptr addrspace(1) %"21", align 8 ret void } diff --git a/ptx/src/test/spirv_run/add_param_ptr.ll b/ptx/src/test/spirv_run/add_param_ptr.ll index 9d90b23..810e9c8 100644 --- a/ptx/src/test/spirv_run/add_param_ptr.ll +++ b/ptx/src/test/spirv_run/add_param_ptr.ll @@ -1,47 +1,45 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @add_param_ptr(ptr addrspace(4) byref(i64) %"27", ptr addrspace(4) byref(i64) %"28") #0 { -"39": +define protected amdgpu_kernel void @add_param_ptr(ptr addrspace(4) byref(i64) %"26", ptr addrspace(4) byref(i64) %"27") #0 { +"38": %"8" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"8", align 1 - %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) - %"32" = ptrtoint ptr addrspace(4) %"27" to i64 + %"31" = ptrtoint ptr addrspace(4) %"26" to i64 %0 = alloca i64, align 8, addrspace(5) - store i64 %"32", ptr addrspace(5) %0, align 8 - %"31" = load i64, ptr addrspace(5) %0, align 8 - store i64 %"31", ptr addrspace(5) %"4", align 8 - %"34" = ptrtoint ptr addrspace(4) %"28" to i64 + store i64 %"31", ptr addrspace(5) %0, align 8 + %"30" = load i64, ptr addrspace(5) %0, align 8 + store i64 %"30", ptr addrspace(5) %"4", align 8 + %"33" = ptrtoint ptr addrspace(4) %"27" to i64 %1 = alloca i64, align 8, addrspace(5) - store i64 %"34", ptr addrspace(5) %1, align 8 - %"33" = load i64, ptr addrspace(5) %1, align 8 - store i64 %"33", ptr addrspace(5) %"5", align 8 - %"13" = load i64, ptr addrspace(5) %"4", align 8 - %"35" = inttoptr i64 %"13" to ptr addrspace(4) - %"41" = getelementptr inbounds i8, ptr addrspace(4) %"35", i64 0 - %"12" = load i64, ptr addrspace(4) %"41", align 8 - store i64 %"12", ptr addrspace(5) %"4", align 8 - %"15" = load i64, ptr addrspace(5) %"5", align 8 - %"36" = inttoptr i64 %"15" to ptr addrspace(4) - %"43" = getelementptr inbounds i8, ptr addrspace(4) %"36", i64 0 - %"14" = load i64, ptr addrspace(4) %"43", align 8 - store i64 %"14", ptr addrspace(5) %"5", align 8 - %"17" = load i64, ptr addrspace(5) %"4", align 8 - %"37" = inttoptr i64 %"17" to ptr - %"16" = load i64, ptr %"37", align 8 - store i64 %"16", ptr addrspace(5) %"6", align 8 - %"19" = load i64, ptr addrspace(5) %"6", align 8 - %"18" = add i64 %"19", 1 - store i64 %"18", ptr addrspace(5) %"7", align 8 - %"20" = load i64, ptr addrspace(5) %"5", align 8 - %"21" = load i64, ptr addrspace(5) %"7", align 8 - %"38" = inttoptr i64 %"20" to ptr - store i64 %"21", ptr %"38", align 8 + store i64 %"33", ptr addrspace(5) %1, align 8 + %"32" = load i64, ptr addrspace(5) %1, align 8 + store i64 %"32", ptr addrspace(5) %"5", align 8 + %"12" = load i64, ptr addrspace(5) %"4", align 8 + %"34" = inttoptr i64 %"12" to ptr addrspace(4) + %"40" = getelementptr inbounds i8, ptr addrspace(4) %"34", i64 0 + %"11" = load i64, ptr addrspace(4) %"40", align 8 + store i64 %"11", ptr addrspace(5) %"4", align 8 + %"14" = load i64, ptr addrspace(5) %"5", align 8 + %"35" = inttoptr i64 %"14" to ptr addrspace(4) + %"42" = getelementptr inbounds i8, ptr addrspace(4) %"35", i64 0 + %"13" = load i64, ptr addrspace(4) %"42", align 8 + store i64 %"13", ptr addrspace(5) %"5", align 8 + %"16" = load i64, ptr addrspace(5) %"4", align 8 + %"36" = inttoptr i64 %"16" to ptr + %"15" = load i64, ptr %"36", align 8 + store i64 %"15", ptr addrspace(5) %"6", align 8 + %"18" = load i64, ptr addrspace(5) %"6", align 8 + %"17" = add i64 %"18", 1 + store i64 %"17", ptr addrspace(5) %"7", align 8 + %"19" = load i64, ptr addrspace(5) %"5", align 8 + %"20" = load i64, ptr addrspace(5) %"7", align 8 + %"37" = inttoptr i64 %"19" to ptr + store i64 %"20", ptr %"37", align 8 ret void } diff --git a/ptx/src/test/spirv_run/add_tuning.ll b/ptx/src/test/spirv_run/add_tuning.ll index 1f36397..9ec6795 100644 --- a/ptx/src/test/spirv_run/add_tuning.ll +++ b/ptx/src/test/spirv_run/add_tuning.ll @@ -1,31 +1,29 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @add_tuning(ptr addrspace(4) byref(i64) %"19", ptr addrspace(4) byref(i64) %"20") #0 { -"23": +define protected amdgpu_kernel void @add_tuning(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #0 { +"22": %"8" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"8", align 1 - %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) + %"9" = load i64, ptr addrspace(4) %"18", align 8 + store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"19", align 8 - store i64 %"10", ptr addrspace(5) %"4", align 8 - %"11" = load i64, ptr addrspace(4) %"20", align 8 - store i64 %"11", ptr addrspace(5) %"5", align 8 - %"13" = load i64, ptr addrspace(5) %"4", align 8 - %"21" = inttoptr i64 %"13" to ptr - %"12" = load i64, ptr %"21", align 8 - store i64 %"12", ptr addrspace(5) %"6", align 8 - %"15" = load i64, ptr addrspace(5) %"6", align 8 - %"14" = add i64 %"15", 1 - store i64 %"14", ptr addrspace(5) %"7", align 8 - %"16" = load i64, ptr addrspace(5) %"5", align 8 - %"17" = load i64, ptr addrspace(5) %"7", align 8 - %"22" = inttoptr i64 %"16" to ptr - store i64 %"17", ptr %"22", align 8 + store i64 %"10", ptr addrspace(5) %"5", align 8 + %"12" = load i64, ptr addrspace(5) %"4", align 8 + %"20" = inttoptr i64 %"12" to ptr + %"11" = load i64, ptr %"20", align 8 + store i64 %"11", ptr addrspace(5) %"6", align 8 + %"14" = load i64, ptr addrspace(5) %"6", align 8 + %"13" = add i64 %"14", 1 + store i64 %"13", ptr addrspace(5) %"7", align 8 + %"15" = load i64, ptr addrspace(5) %"5", align 8 + %"16" = load i64, ptr addrspace(5) %"7", align 8 + %"21" = inttoptr i64 %"15" to ptr + store i64 %"16", ptr %"21", align 8 ret void } diff --git a/ptx/src/test/spirv_run/addc_cc.ll b/ptx/src/test/spirv_run/addc_cc.ll index 9015a80..3299982 100644 --- a/ptx/src/test/spirv_run/addc_cc.ll +++ b/ptx/src/test/spirv_run/addc_cc.ll @@ -1,12 +1,10 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @addc_cc(ptr addrspace(4) byref(i64) %"54", ptr addrspace(4) byref(i64) %"55") #0 { -"69": +define protected amdgpu_kernel void @addc_cc(ptr addrspace(4) byref(i64) %"53", ptr addrspace(4) byref(i64) %"54") #0 { +"68": %"13" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"13", align 1 - %"14" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"14", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) @@ -16,70 +14,70 @@ define protected amdgpu_kernel void @addc_cc(ptr addrspace(4) byref(i64) %"54", %"10" = alloca i32, align 4, addrspace(5) %"11" = alloca i32, align 4, addrspace(5) %"12" = alloca i32, align 4, addrspace(5) + %"14" = load i64, ptr addrspace(4) %"53", align 8 + store i64 %"14", ptr addrspace(5) %"4", align 8 %"15" = load i64, ptr addrspace(4) %"54", align 8 - store i64 %"15", ptr addrspace(5) %"4", align 8 - %"16" = load i64, ptr addrspace(4) %"55", align 8 - store i64 %"16", ptr addrspace(5) %"5", align 8 - %"18" = load i64, ptr addrspace(5) %"4", align 8 - %"57" = inttoptr i64 %"18" to ptr - %"56" = load i32, ptr %"57", align 4 - store i32 %"56", ptr addrspace(5) %"9", align 4 - %"20" = load i64, ptr addrspace(5) %"4", align 8 - %"58" = inttoptr i64 %"20" to ptr - %"71" = getelementptr inbounds i8, ptr %"58", i64 4 - %"59" = load i32, ptr %"71", align 4 - store i32 %"59", ptr addrspace(5) %"10", align 4 - %"22" = load i64, ptr addrspace(5) %"4", align 8 - %"60" = inttoptr i64 %"22" to ptr - %"73" = getelementptr inbounds i8, ptr %"60", i64 8 - %"21" = load i32, ptr %"73", align 4 - store i32 %"21", ptr addrspace(5) %"11", align 4 - %"24" = load i64, ptr addrspace(5) %"4", align 8 - %"61" = inttoptr i64 %"24" to ptr - %"75" = getelementptr inbounds i8, ptr %"61", i64 12 - %"23" = load i32, ptr %"75", align 4 - store i32 %"23", ptr addrspace(5) %"12", align 4 - %"27" = load i32, ptr addrspace(5) %"9", align 4 - %"28" = load i32, ptr addrspace(5) %"10", align 4 - %0 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %"27", i32 %"28") - %"25" = extractvalue { i32, i1 } %0, 0 - %"26" = extractvalue { i32, i1 } %0, 1 - store i32 %"25", ptr addrspace(5) %"6", align 4 - store i1 %"26", ptr addrspace(5) %"13", align 1 - %"31" = load i1, ptr addrspace(5) %"13", align 1 - %"32" = load i32, ptr addrspace(5) %"6", align 4 - %"33" = load i32, ptr addrspace(5) %"11", align 4 - %1 = zext i1 %"31" to i32 - %2 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %"32", i32 %"33") + store i64 %"15", ptr addrspace(5) %"5", align 8 + %"17" = load i64, ptr addrspace(5) %"4", align 8 + %"56" = inttoptr i64 %"17" to ptr + %"55" = load i32, ptr %"56", align 4 + store i32 %"55", ptr addrspace(5) %"9", align 4 + %"19" = load i64, ptr addrspace(5) %"4", align 8 + %"57" = inttoptr i64 %"19" to ptr + %"70" = getelementptr inbounds i8, ptr %"57", i64 4 + %"58" = load i32, ptr %"70", align 4 + store i32 %"58", ptr addrspace(5) %"10", align 4 + %"21" = load i64, ptr addrspace(5) %"4", align 8 + %"59" = inttoptr i64 %"21" to ptr + %"72" = getelementptr inbounds i8, ptr %"59", i64 8 + %"20" = load i32, ptr %"72", align 4 + store i32 %"20", ptr addrspace(5) %"11", align 4 + %"23" = load i64, ptr addrspace(5) %"4", align 8 + %"60" = inttoptr i64 %"23" to ptr + %"74" = getelementptr inbounds i8, ptr %"60", i64 12 + %"22" = load i32, ptr %"74", align 4 + store i32 %"22", ptr addrspace(5) %"12", align 4 + %"26" = load i32, ptr addrspace(5) %"9", align 4 + %"27" = load i32, ptr addrspace(5) %"10", align 4 + %0 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %"26", i32 %"27") + %"24" = extractvalue { i32, i1 } %0, 0 + %"25" = extractvalue { i32, i1 } %0, 1 + store i32 %"24", ptr addrspace(5) %"6", align 4 + store i1 %"25", ptr addrspace(5) %"13", align 1 + %"30" = load i1, ptr addrspace(5) %"13", align 1 + %"31" = load i32, ptr addrspace(5) %"6", align 4 + %"32" = load i32, ptr addrspace(5) %"11", align 4 + %1 = zext i1 %"30" to i32 + %2 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %"31", i32 %"32") %3 = extractvalue { i32, i1 } %2, 0 %4 = extractvalue { i32, i1 } %2, 1 %5 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %3, i32 %1) - %"29" = extractvalue { i32, i1 } %5, 0 + %"28" = extractvalue { i32, i1 } %5, 0 %6 = extractvalue { i32, i1 } %5, 1 - %"30" = xor i1 %4, %6 - store i32 %"29", ptr addrspace(5) %"7", align 4 - store i1 %"30", ptr addrspace(5) %"13", align 1 - %"35" = load i1, ptr addrspace(5) %"13", align 1 - %"36" = load i32, ptr addrspace(5) %"7", align 4 - %"37" = load i32, ptr addrspace(5) %"12", align 4 - %7 = zext i1 %"35" to i32 - %8 = add i32 %"36", %"37" - %"34" = add i32 %8, %7 - store i32 %"34", ptr addrspace(5) %"8", align 4 - %"38" = load i64, ptr addrspace(5) %"5", align 8 - %"39" = load i32, ptr addrspace(5) %"6", align 4 - %"66" = inttoptr i64 %"38" to ptr - store i32 %"39", ptr %"66", align 4 - %"40" = load i64, ptr addrspace(5) %"5", align 8 - %"41" = load i32, ptr addrspace(5) %"7", align 4 - %"67" = inttoptr i64 %"40" to ptr - %"77" = getelementptr inbounds i8, ptr %"67", i64 4 - store i32 %"41", ptr %"77", align 4 - %"42" = load i64, ptr addrspace(5) %"5", align 8 - %"43" = load i32, ptr addrspace(5) %"8", align 4 - %"68" = inttoptr i64 %"42" to ptr - %"79" = getelementptr inbounds i8, ptr %"68", i64 8 - store i32 %"43", ptr %"79", align 4 + %"29" = xor i1 %4, %6 + store i32 %"28", ptr addrspace(5) %"7", align 4 + store i1 %"29", ptr addrspace(5) %"13", align 1 + %"34" = load i1, ptr addrspace(5) %"13", align 1 + %"35" = load i32, ptr addrspace(5) %"7", align 4 + %"36" = load i32, ptr addrspace(5) %"12", align 4 + %7 = zext i1 %"34" to i32 + %8 = add i32 %"35", %"36" + %"33" = add i32 %8, %7 + store i32 %"33", ptr addrspace(5) %"8", align 4 + %"37" = load i64, ptr addrspace(5) %"5", align 8 + %"38" = load i32, ptr addrspace(5) %"6", align 4 + %"65" = inttoptr i64 %"37" to ptr + store i32 %"38", ptr %"65", align 4 + %"39" = load i64, ptr addrspace(5) %"5", align 8 + %"40" = load i32, ptr addrspace(5) %"7", align 4 + %"66" = inttoptr i64 %"39" to ptr + %"76" = getelementptr inbounds i8, ptr %"66", i64 4 + store i32 %"40", ptr %"76", align 4 + %"41" = load i64, ptr addrspace(5) %"5", align 8 + %"42" = load i32, ptr addrspace(5) %"8", align 4 + %"67" = inttoptr i64 %"41" to ptr + %"78" = getelementptr inbounds i8, ptr %"67", i64 8 + store i32 %"42", ptr %"78", align 4 ret void } diff --git a/ptx/src/test/spirv_run/addc_cc2.ll b/ptx/src/test/spirv_run/addc_cc2.ll index 982be96..836d8d5 100644 --- a/ptx/src/test/spirv_run/addc_cc2.ll +++ b/ptx/src/test/spirv_run/addc_cc2.ll @@ -1,63 +1,61 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @addc_cc2(ptr addrspace(4) byref(i64) %"40", ptr addrspace(4) byref(i64) %"41") #0 { -"51": +define protected amdgpu_kernel void @addc_cc2(ptr addrspace(4) byref(i64) %"39", ptr addrspace(4) byref(i64) %"40") #0 { +"50": %"9" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"9", align 1 - %"10" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"10", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) %"8" = alloca i32, align 4, addrspace(5) - %"11" = load i64, ptr addrspace(4) %"41", align 8 - store i64 %"11", ptr addrspace(5) %"5", align 8 + %"10" = load i64, ptr addrspace(4) %"40", align 8 + store i64 %"10", ptr addrspace(5) %"5", align 8 %0 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 -1, i32 -1) - %"42" = extractvalue { i32, i1 } %0, 0 - %"13" = extractvalue { i32, i1 } %0, 1 - store i32 %"42", ptr addrspace(5) %"6", align 4 - store i1 %"13", ptr addrspace(5) %"9", align 1 - %"16" = load i1, ptr addrspace(5) %"9", align 1 - %1 = zext i1 %"16" to i32 + %"41" = extractvalue { i32, i1 } %0, 0 + %"12" = extractvalue { i32, i1 } %0, 1 + store i32 %"41", ptr addrspace(5) %"6", align 4 + store i1 %"12", ptr addrspace(5) %"9", align 1 + %"15" = load i1, ptr addrspace(5) %"9", align 1 + %1 = zext i1 %"15" to i32 %2 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 -4, i32 -4) %3 = extractvalue { i32, i1 } %2, 0 %4 = extractvalue { i32, i1 } %2, 1 %5 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %3, i32 %1) - %"43" = extractvalue { i32, i1 } %5, 0 + %"42" = extractvalue { i32, i1 } %5, 0 %6 = extractvalue { i32, i1 } %5, 1 - %"15" = xor i1 %4, %6 - store i32 %"43", ptr addrspace(5) %"6", align 4 - store i1 %"15", ptr addrspace(5) %"9", align 1 - %"18" = load i1, ptr addrspace(5) %"9", align 1 - %7 = zext i1 %"18" to i32 - %"44" = add i32 0, %7 - store i32 %"44", ptr addrspace(5) %"7", align 4 - %"21" = load i1, ptr addrspace(5) %"9", align 1 - %8 = zext i1 %"21" to i32 + %"14" = xor i1 %4, %6 + store i32 %"42", ptr addrspace(5) %"6", align 4 + store i1 %"14", ptr addrspace(5) %"9", align 1 + %"17" = load i1, ptr addrspace(5) %"9", align 1 + %7 = zext i1 %"17" to i32 + %"43" = add i32 0, %7 + store i32 %"43", ptr addrspace(5) %"7", align 4 + %"20" = load i1, ptr addrspace(5) %"9", align 1 + %8 = zext i1 %"20" to i32 %9 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 0, i32 -1) %10 = extractvalue { i32, i1 } %9, 0 %11 = extractvalue { i32, i1 } %9, 1 %12 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %10, i32 %8) - %"45" = extractvalue { i32, i1 } %12, 0 + %"44" = extractvalue { i32, i1 } %12, 0 %13 = extractvalue { i32, i1 } %12, 1 - %"20" = xor i1 %11, %13 - store i32 %"45", ptr addrspace(5) %"6", align 4 - store i1 %"20", ptr addrspace(5) %"9", align 1 - %"23" = load i1, ptr addrspace(5) %"9", align 1 - %14 = zext i1 %"23" to i32 - %"46" = add i32 0, %14 - store i32 %"46", ptr addrspace(5) %"8", align 4 - %"24" = load i64, ptr addrspace(5) %"5", align 8 - %"25" = load i32, ptr addrspace(5) %"7", align 4 - %"47" = inttoptr i64 %"24" to ptr - store i32 %"25", ptr %"47", align 4 - %"26" = load i64, ptr addrspace(5) %"5", align 8 - %"27" = load i32, ptr addrspace(5) %"8", align 4 - %"49" = inttoptr i64 %"26" to ptr - %"53" = getelementptr inbounds i8, ptr %"49", i64 4 - store i32 %"27", ptr %"53", align 4 + %"19" = xor i1 %11, %13 + store i32 %"44", ptr addrspace(5) %"6", align 4 + store i1 %"19", ptr addrspace(5) %"9", align 1 + %"22" = load i1, ptr addrspace(5) %"9", align 1 + %14 = zext i1 %"22" to i32 + %"45" = add i32 0, %14 + store i32 %"45", ptr addrspace(5) %"8", align 4 + %"23" = load i64, ptr addrspace(5) %"5", align 8 + %"24" = load i32, ptr addrspace(5) %"7", align 4 + %"46" = inttoptr i64 %"23" to ptr + store i32 %"24", ptr %"46", align 4 + %"25" = load i64, ptr addrspace(5) %"5", align 8 + %"26" = load i32, ptr addrspace(5) %"8", align 4 + %"48" = inttoptr i64 %"25" to ptr + %"52" = getelementptr inbounds i8, ptr %"48", i64 4 + store i32 %"26", ptr %"52", align 4 ret void } diff --git a/ptx/src/test/spirv_run/alloca_call.ll b/ptx/src/test/spirv_run/alloca_call.ll index 1ae760b..e6a9d6f 100644 --- a/ptx/src/test/spirv_run/alloca_call.ll +++ b/ptx/src/test/spirv_run/alloca_call.ll @@ -1,12 +1,10 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @_Z13callback_onlyIdEvPvS0_10callback_tx(ptr addrspace(4) byref(i64) %"43", ptr addrspace(4) byref(i64) %"44", ptr addrspace(4) byref(i64) %"45", ptr addrspace(4) byref(i64) %"46") #0 { -"59": +define protected amdgpu_kernel void @_Z13callback_onlyIdEvPvS0_10callback_tx(ptr addrspace(4) byref(i64) %"42", ptr addrspace(4) byref(i64) %"43", ptr addrspace(4) byref(i64) %"44", ptr addrspace(4) byref(i64) %"45") #0 { +"58": %"22" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"22", align 1 - %"23" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"23", align 1 %"7" = alloca i1, align 1, addrspace(5) %"8" = alloca double, align 8, addrspace(5) %"9" = alloca double, align 8, addrspace(5) @@ -14,47 +12,47 @@ define protected amdgpu_kernel void @_Z13callback_onlyIdEvPvS0_10callback_tx(ptr %"11" = alloca i64, align 8, addrspace(5) %"12" = alloca i64, align 8, addrspace(5) %"13" = alloca i64, align 8, addrspace(5) - %"47" = alloca i64, align 8, addrspace(5) - %"49" = alloca [4 x i32], align 16, addrspace(5) + %"46" = alloca i64, align 8, addrspace(5) + %"48" = alloca [4 x i32], align 16, addrspace(5) + %"50" = load i64, ptr addrspace(4) %"42", align 8 + store i64 %"50", ptr addrspace(5) %"10", align 8 %"51" = load i64, ptr addrspace(4) %"43", align 8 - store i64 %"51", ptr addrspace(5) %"10", align 8 + store i64 %"51", ptr addrspace(5) %"11", align 8 %"52" = load i64, ptr addrspace(4) %"44", align 8 - store i64 %"52", ptr addrspace(5) %"11", align 8 + store i64 %"52", ptr addrspace(5) %"12", align 8 %"53" = load i64, ptr addrspace(4) %"45", align 8 - store i64 %"53", ptr addrspace(5) %"12", align 8 - %"54" = load i64, ptr addrspace(4) %"46", align 8 - store i64 %"54", ptr addrspace(5) %"13", align 8 - %"29" = load i64, ptr addrspace(5) %"12", align 8 - %"30" = load i64, ptr addrspace(5) %"13", align 8 - %"28" = icmp sge i64 %"29", %"30" - store i1 %"28", ptr addrspace(5) %"7", align 1 - %"31" = load i1, ptr addrspace(5) %"7", align 1 - br i1 %"31", label %"6", label %"18" + store i64 %"53", ptr addrspace(5) %"13", align 8 + %"28" = load i64, ptr addrspace(5) %"12", align 8 + %"29" = load i64, ptr addrspace(5) %"13", align 8 + %"27" = icmp sge i64 %"28", %"29" + store i1 %"27", ptr addrspace(5) %"7", align 1 + %"30" = load i1, ptr addrspace(5) %"7", align 1 + br i1 %"30", label %"6", label %"18" -"18": ; preds = %"59" +"18": ; preds = %"58" + %"31" = load i64, ptr addrspace(5) %"11", align 8 + %"60" = getelementptr inbounds i8, ptr addrspace(5) %"46", i64 0 + store i64 %"31", ptr addrspace(5) %"60", align 8 %"32" = load i64, ptr addrspace(5) %"11", align 8 - %"61" = getelementptr inbounds i8, ptr addrspace(5) %"47", i64 0 - store i64 %"32", ptr addrspace(5) %"61", align 8 - %"33" = load i64, ptr addrspace(5) %"11", align 8 - %0 = inttoptr i64 %"33" to ptr + %0 = inttoptr i64 %"32" to ptr %"21" = call [4 x i32] %0() - store [4 x i32] %"21", ptr addrspace(5) %"49", align 4 - %"63" = getelementptr inbounds i8, ptr addrspace(5) %"49", i64 0 - %"19" = load <2 x double>, ptr addrspace(5) %"63", align 16 - %"34" = extractelement <2 x double> %"19", i32 0 - %"35" = extractelement <2 x double> %"19", i32 1 - store double %"34", ptr addrspace(5) %"8", align 8 - store double %"35", ptr addrspace(5) %"9", align 8 - %"36" = load double, ptr addrspace(5) %"8", align 8 - %"37" = load double, ptr addrspace(5) %"9", align 8 - %1 = insertelement <2 x double> undef, double %"36", i32 0 - %"20" = insertelement <2 x double> %1, double %"37", i32 1 - %"38" = load i64, ptr addrspace(5) %"10", align 8 - %"58" = inttoptr i64 %"38" to ptr addrspace(1) - store <2 x double> %"20", ptr addrspace(1) %"58", align 16 + store [4 x i32] %"21", ptr addrspace(5) %"48", align 4 + %"62" = getelementptr inbounds i8, ptr addrspace(5) %"48", i64 0 + %"19" = load <2 x double>, ptr addrspace(5) %"62", align 16 + %"33" = extractelement <2 x double> %"19", i32 0 + %"34" = extractelement <2 x double> %"19", i32 1 + store double %"33", ptr addrspace(5) %"8", align 8 + store double %"34", ptr addrspace(5) %"9", align 8 + %"35" = load double, ptr addrspace(5) %"8", align 8 + %"36" = load double, ptr addrspace(5) %"9", align 8 + %1 = insertelement <2 x double> undef, double %"35", i32 0 + %"20" = insertelement <2 x double> %1, double %"36", i32 1 + %"37" = load i64, ptr addrspace(5) %"10", align 8 + %"57" = inttoptr i64 %"37" to ptr addrspace(1) + store <2 x double> %"20", ptr addrspace(1) %"57", align 16 br label %"6" -"6": ; preds = %"18", %"59" +"6": ; preds = %"18", %"58" ret void } diff --git a/ptx/src/test/spirv_run/amdgpu_unnamed.ll b/ptx/src/test/spirv_run/amdgpu_unnamed.ll index b08350b..61e3de4 100644 --- a/ptx/src/test/spirv_run/amdgpu_unnamed.ll +++ b/ptx/src/test/spirv_run/amdgpu_unnamed.ll @@ -7,12 +7,10 @@ target triple = "amdgcn-amd-amdhsa" declare void @__zluda_ptx_impl____assertfail(i64, i64, i32, i64, i64) #0 -define protected amdgpu_kernel void @amdgpu_unnamed(ptr addrspace(4) byref(i64) %"58", ptr addrspace(4) byref(i64) %"59") #1 { -"74": +define protected amdgpu_kernel void @amdgpu_unnamed(ptr addrspace(4) byref(i64) %"57", ptr addrspace(4) byref(i64) %"58") #1 { +"73": %"33" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"33", align 1 - %"34" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"34", align 1 %"14" = alloca i64, align 8, addrspace(5) %"15" = alloca i64, align 8, addrspace(5) %"16" = alloca i64, align 8, addrspace(5) @@ -20,63 +18,63 @@ define protected amdgpu_kernel void @amdgpu_unnamed(ptr addrspace(4) byref(i64) %"18" = alloca i1, align 1, addrspace(5) %"19" = alloca i64, align 8, addrspace(5) %"20" = alloca i32, align 4, addrspace(5) + %"59" = alloca i64, align 8, addrspace(5) %"60" = alloca i64, align 8, addrspace(5) - %"61" = alloca i64, align 8, addrspace(5) - %"62" = alloca i32, align 4, addrspace(5) + %"61" = alloca i32, align 4, addrspace(5) + %"62" = alloca i64, align 8, addrspace(5) %"63" = alloca i64, align 8, addrspace(5) - %"64" = alloca i64, align 8, addrspace(5) + %"34" = load i64, ptr addrspace(4) %"57", align 8 + store i64 %"34", ptr addrspace(5) %"14", align 8 %"35" = load i64, ptr addrspace(4) %"58", align 8 - store i64 %"35", ptr addrspace(5) %"14", align 8 - %"36" = load i64, ptr addrspace(4) %"59", align 8 - store i64 %"36", ptr addrspace(5) %"15", align 8 - %"38" = load i64, ptr addrspace(5) %"14", align 8 - %"66" = inttoptr i64 %"38" to ptr - %"37" = load i64, ptr %"66", align 8 - store i64 %"37", ptr addrspace(5) %"16", align 8 - %"40" = load i64, ptr addrspace(5) %"16", align 8 - %"39" = icmp uge i64 %"40", 1 - store i1 %"39", ptr addrspace(5) %"18", align 1 - %"41" = load i1, ptr addrspace(5) %"18", align 1 - br i1 %"41", label %"13", label %"27" + store i64 %"35", ptr addrspace(5) %"15", align 8 + %"37" = load i64, ptr addrspace(5) %"14", align 8 + %"65" = inttoptr i64 %"37" to ptr + %"36" = load i64, ptr %"65", align 8 + store i64 %"36", ptr addrspace(5) %"16", align 8 + %"39" = load i64, ptr addrspace(5) %"16", align 8 + %"38" = icmp uge i64 %"39", 1 + store i1 %"38", ptr addrspace(5) %"18", align 1 + %"40" = load i1, ptr addrspace(5) %"18", align 1 + br i1 %"40", label %"13", label %"27" -"27": ; preds = %"74" +"27": ; preds = %"73" %0 = alloca i64, align 8, addrspace(5) store i64 ptrtoint (ptr addrspace(1) @0 to i64), ptr addrspace(5) %0, align 8 - %"67" = load i64, ptr addrspace(5) %0, align 8 - store i64 %"67", ptr addrspace(5) %"19", align 8 - %"43" = load i64, ptr addrspace(5) %"19", align 8 - store i64 %"43", ptr addrspace(5) %"60", align 8 + %"66" = load i64, ptr addrspace(5) %0, align 8 + store i64 %"66", ptr addrspace(5) %"19", align 8 + %"42" = load i64, ptr addrspace(5) %"19", align 8 + store i64 %"42", ptr addrspace(5) %"59", align 8 %1 = alloca i64, align 8, addrspace(5) store i64 ptrtoint (ptr addrspace(1) @1 to i64), ptr addrspace(5) %1, align 8 - %"69" = load i64, ptr addrspace(5) %1, align 8 - store i64 %"69", ptr addrspace(5) %"19", align 8 - %"45" = load i64, ptr addrspace(5) %"19", align 8 - store i64 %"45", ptr addrspace(5) %"61", align 8 - store i32 1, ptr addrspace(5) %"62", align 4 + %"68" = load i64, ptr addrspace(5) %1, align 8 + store i64 %"68", ptr addrspace(5) %"19", align 8 + %"44" = load i64, ptr addrspace(5) %"19", align 8 + store i64 %"44", ptr addrspace(5) %"60", align 8 + store i32 1, ptr addrspace(5) %"61", align 4 %2 = alloca i64, align 8, addrspace(5) store i64 ptrtoint (ptr addrspace(1) @2 to i64), ptr addrspace(5) %2, align 8 - %"71" = load i64, ptr addrspace(5) %2, align 8 - store i64 %"71", ptr addrspace(5) %"19", align 8 - %"47" = load i64, ptr addrspace(5) %"19", align 8 - store i64 %"47", ptr addrspace(5) %"63", align 8 - %"76" = getelementptr inbounds i8, ptr addrspace(5) %"64", i64 0 - store i64 1, ptr addrspace(5) %"76", align 8 - %"28" = load i64, ptr addrspace(5) %"60", align 8 - %"29" = load i64, ptr addrspace(5) %"61", align 8 - %"30" = load i32, ptr addrspace(5) %"62", align 4 - %"31" = load i64, ptr addrspace(5) %"63", align 8 - %"32" = load i64, ptr addrspace(5) %"64", align 8 + %"70" = load i64, ptr addrspace(5) %2, align 8 + store i64 %"70", ptr addrspace(5) %"19", align 8 + %"46" = load i64, ptr addrspace(5) %"19", align 8 + store i64 %"46", ptr addrspace(5) %"62", align 8 + %"75" = getelementptr inbounds i8, ptr addrspace(5) %"63", i64 0 + store i64 1, ptr addrspace(5) %"75", align 8 + %"28" = load i64, ptr addrspace(5) %"59", align 8 + %"29" = load i64, ptr addrspace(5) %"60", align 8 + %"30" = load i32, ptr addrspace(5) %"61", align 4 + %"31" = load i64, ptr addrspace(5) %"62", align 8 + %"32" = load i64, ptr addrspace(5) %"63", align 8 call void @__zluda_ptx_impl____assertfail(i64 %"28", i64 %"29", i32 %"30", i64 %"31", i64 %"32") br label %"13" -"13": ; preds = %"27", %"74" - %"49" = load i64, ptr addrspace(5) %"16", align 8 - %"48" = add i64 %"49", 1 - store i64 %"48", ptr addrspace(5) %"17", align 8 - %"50" = load i64, ptr addrspace(5) %"15", align 8 - %"51" = load i64, ptr addrspace(5) %"17", align 8 - %"73" = inttoptr i64 %"50" to ptr - store i64 %"51", ptr %"73", align 8 +"13": ; preds = %"27", %"73" + %"48" = load i64, ptr addrspace(5) %"16", align 8 + %"47" = add i64 %"48", 1 + store i64 %"47", ptr addrspace(5) %"17", align 8 + %"49" = load i64, ptr addrspace(5) %"15", align 8 + %"50" = load i64, ptr addrspace(5) %"17", align 8 + %"72" = inttoptr i64 %"49" to ptr + store i64 %"50", ptr %"72", align 8 ret void } diff --git a/ptx/src/test/spirv_run/and.ll b/ptx/src/test/spirv_run/and.ll index 2862bcc..c90f390 100644 --- a/ptx/src/test/spirv_run/and.ll +++ b/ptx/src/test/spirv_run/and.ll @@ -1,37 +1,35 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @and(ptr addrspace(4) byref(i64) %"23", ptr addrspace(4) byref(i64) %"24") #0 { -"31": +define protected amdgpu_kernel void @and(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #0 { +"30": %"8" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"8", align 1 - %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) + %"9" = load i64, ptr addrspace(4) %"22", align 8 + store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"23", align 8 - store i64 %"10", ptr addrspace(5) %"4", align 8 - %"11" = load i64, ptr addrspace(4) %"24", align 8 - store i64 %"11", ptr addrspace(5) %"5", align 8 - %"13" = load i64, ptr addrspace(5) %"4", align 8 - %"25" = inttoptr i64 %"13" to ptr - %"12" = load i32, ptr %"25", align 4 - store i32 %"12", ptr addrspace(5) %"6", align 4 - %"15" = load i64, ptr addrspace(5) %"4", align 8 - %"26" = inttoptr i64 %"15" to ptr - %"33" = getelementptr inbounds i8, ptr %"26", i64 4 - %"14" = load i32, ptr %"33", align 4 - store i32 %"14", ptr addrspace(5) %"7", align 4 - %"17" = load i32, ptr addrspace(5) %"6", align 4 - %"18" = load i32, ptr addrspace(5) %"7", align 4 - %"27" = and i32 %"17", %"18" - store i32 %"27", ptr addrspace(5) %"6", align 4 - %"19" = load i64, ptr addrspace(5) %"5", align 8 - %"20" = load i32, ptr addrspace(5) %"6", align 4 - %"30" = inttoptr i64 %"19" to ptr - store i32 %"20", ptr %"30", align 4 + store i64 %"10", ptr addrspace(5) %"5", align 8 + %"12" = load i64, ptr addrspace(5) %"4", align 8 + %"24" = inttoptr i64 %"12" to ptr + %"11" = load i32, ptr %"24", align 4 + store i32 %"11", ptr addrspace(5) %"6", align 4 + %"14" = load i64, ptr addrspace(5) %"4", align 8 + %"25" = inttoptr i64 %"14" to ptr + %"32" = getelementptr inbounds i8, ptr %"25", i64 4 + %"13" = load i32, ptr %"32", align 4 + store i32 %"13", ptr addrspace(5) %"7", align 4 + %"16" = load i32, ptr addrspace(5) %"6", align 4 + %"17" = load i32, ptr addrspace(5) %"7", align 4 + %"26" = and i32 %"16", %"17" + store i32 %"26", ptr addrspace(5) %"6", align 4 + %"18" = load i64, ptr addrspace(5) %"5", align 8 + %"19" = load i32, ptr addrspace(5) %"6", align 4 + %"29" = inttoptr i64 %"18" to ptr + store i32 %"19", ptr %"29", align 4 ret void } diff --git a/ptx/src/test/spirv_run/assertfail.ll b/ptx/src/test/spirv_run/assertfail.ll index 0fb51f7..001dbfe 100644 --- a/ptx/src/test/spirv_run/assertfail.ll +++ b/ptx/src/test/spirv_run/assertfail.ll @@ -3,62 +3,60 @@ target triple = "amdgcn-amd-amdhsa" declare void @__zluda_ptx_impl____assertfail(i64, i64, i32, i64, i64) #0 -define protected amdgpu_kernel void @assertfail(ptr addrspace(4) byref(i64) %"63", ptr addrspace(4) byref(i64) %"64") #1 { -"82": +define protected amdgpu_kernel void @assertfail(ptr addrspace(4) byref(i64) %"62", ptr addrspace(4) byref(i64) %"63") #1 { +"81": %"35" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"35", align 1 - %"36" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"36", align 1 %"15" = alloca i64, align 8, addrspace(5) %"16" = alloca i64, align 8, addrspace(5) %"17" = alloca i64, align 8, addrspace(5) %"18" = alloca i64, align 8, addrspace(5) %"19" = alloca i32, align 4, addrspace(5) - %"65" = alloca i64, align 8, addrspace(5) - %"67" = alloca i64, align 8, addrspace(5) - %"69" = alloca i32, align 4, addrspace(5) - %"71" = alloca i64, align 8, addrspace(5) - %"73" = alloca i64, align 8, addrspace(5) + %"64" = alloca i64, align 8, addrspace(5) + %"66" = alloca i64, align 8, addrspace(5) + %"68" = alloca i32, align 4, addrspace(5) + %"70" = alloca i64, align 8, addrspace(5) + %"72" = alloca i64, align 8, addrspace(5) + %"36" = load i64, ptr addrspace(4) %"62", align 8 + store i64 %"36", ptr addrspace(5) %"15", align 8 %"37" = load i64, ptr addrspace(4) %"63", align 8 - store i64 %"37", ptr addrspace(5) %"15", align 8 - %"38" = load i64, ptr addrspace(4) %"64", align 8 - store i64 %"38", ptr addrspace(5) %"16", align 8 + store i64 %"37", ptr addrspace(5) %"16", align 8 %0 = alloca i32, align 4, addrspace(5) store i32 0, ptr addrspace(5) %0, align 4 - %"75" = load i32, ptr addrspace(5) %0, align 4 - store i32 %"75", ptr addrspace(5) %"19", align 4 + %"74" = load i32, ptr addrspace(5) %0, align 4 + store i32 %"74", ptr addrspace(5) %"19", align 4 + %"39" = load i64, ptr addrspace(5) %"15", align 8 + %"83" = getelementptr inbounds i8, ptr addrspace(5) %"64", i64 0 + store i64 %"39", ptr addrspace(5) %"83", align 8 %"40" = load i64, ptr addrspace(5) %"15", align 8 - %"84" = getelementptr inbounds i8, ptr addrspace(5) %"65", i64 0 - store i64 %"40", ptr addrspace(5) %"84", align 8 - %"41" = load i64, ptr addrspace(5) %"15", align 8 - %"86" = getelementptr inbounds i8, ptr addrspace(5) %"67", i64 0 - store i64 %"41", ptr addrspace(5) %"86", align 8 - %"42" = load i32, ptr addrspace(5) %"19", align 4 - %"88" = getelementptr inbounds i8, ptr addrspace(5) %"69", i64 0 - store i32 %"42", ptr addrspace(5) %"88", align 4 + %"85" = getelementptr inbounds i8, ptr addrspace(5) %"66", i64 0 + store i64 %"40", ptr addrspace(5) %"85", align 8 + %"41" = load i32, ptr addrspace(5) %"19", align 4 + %"87" = getelementptr inbounds i8, ptr addrspace(5) %"68", i64 0 + store i32 %"41", ptr addrspace(5) %"87", align 4 + %"42" = load i64, ptr addrspace(5) %"15", align 8 + %"89" = getelementptr inbounds i8, ptr addrspace(5) %"70", i64 0 + store i64 %"42", ptr addrspace(5) %"89", align 8 %"43" = load i64, ptr addrspace(5) %"15", align 8 - %"90" = getelementptr inbounds i8, ptr addrspace(5) %"71", i64 0 - store i64 %"43", ptr addrspace(5) %"90", align 8 - %"44" = load i64, ptr addrspace(5) %"15", align 8 - %"92" = getelementptr inbounds i8, ptr addrspace(5) %"73", i64 0 - store i64 %"44", ptr addrspace(5) %"92", align 8 - %"30" = load i64, ptr addrspace(5) %"65", align 8 - %"31" = load i64, ptr addrspace(5) %"67", align 8 - %"32" = load i32, ptr addrspace(5) %"69", align 4 - %"33" = load i64, ptr addrspace(5) %"71", align 8 - %"34" = load i64, ptr addrspace(5) %"73", align 8 + %"91" = getelementptr inbounds i8, ptr addrspace(5) %"72", i64 0 + store i64 %"43", ptr addrspace(5) %"91", align 8 + %"30" = load i64, ptr addrspace(5) %"64", align 8 + %"31" = load i64, ptr addrspace(5) %"66", align 8 + %"32" = load i32, ptr addrspace(5) %"68", align 4 + %"33" = load i64, ptr addrspace(5) %"70", align 8 + %"34" = load i64, ptr addrspace(5) %"72", align 8 call void @__zluda_ptx_impl____assertfail(i64 %"30", i64 %"31", i32 %"32", i64 %"33", i64 %"34") - %"46" = load i64, ptr addrspace(5) %"15", align 8 - %"80" = inttoptr i64 %"46" to ptr - %"45" = load i64, ptr %"80", align 8 - store i64 %"45", ptr addrspace(5) %"17", align 8 - %"48" = load i64, ptr addrspace(5) %"17", align 8 - %"47" = add i64 %"48", 1 - store i64 %"47", ptr addrspace(5) %"18", align 8 - %"49" = load i64, ptr addrspace(5) %"16", align 8 - %"50" = load i64, ptr addrspace(5) %"18", align 8 - %"81" = inttoptr i64 %"49" to ptr - store i64 %"50", ptr %"81", align 8 + %"45" = load i64, ptr addrspace(5) %"15", align 8 + %"79" = inttoptr i64 %"45" to ptr + %"44" = load i64, ptr %"79", align 8 + store i64 %"44", ptr addrspace(5) %"17", align 8 + %"47" = load i64, ptr addrspace(5) %"17", align 8 + %"46" = add i64 %"47", 1 + store i64 %"46", ptr addrspace(5) %"18", align 8 + %"48" = load i64, ptr addrspace(5) %"16", align 8 + %"49" = load i64, ptr addrspace(5) %"18", align 8 + %"80" = inttoptr i64 %"48" to ptr + store i64 %"49", ptr %"80", align 8 ret void } diff --git a/ptx/src/test/spirv_run/atom_add.ll b/ptx/src/test/spirv_run/atom_add.ll index 88ccc57..dff9e0e 100644 --- a/ptx/src/test/spirv_run/atom_add.ll +++ b/ptx/src/test/spirv_run/atom_add.ll @@ -3,45 +3,43 @@ target triple = "amdgcn-amd-amdhsa" @"4" = private addrspace(3) global [1024 x i8] undef, align 4 -define protected amdgpu_kernel void @atom_add(ptr addrspace(4) byref(i64) %"29", ptr addrspace(4) byref(i64) %"30") #0 { -"38": +define protected amdgpu_kernel void @atom_add(ptr addrspace(4) byref(i64) %"28", ptr addrspace(4) byref(i64) %"29") #0 { +"37": %"9" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"9", align 1 - %"10" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"10", align 1 %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) %"8" = alloca i32, align 4, addrspace(5) + %"10" = load i64, ptr addrspace(4) %"28", align 8 + store i64 %"10", ptr addrspace(5) %"5", align 8 %"11" = load i64, ptr addrspace(4) %"29", align 8 - store i64 %"11", ptr addrspace(5) %"5", align 8 - %"12" = load i64, ptr addrspace(4) %"30", align 8 - store i64 %"12", ptr addrspace(5) %"6", align 8 - %"14" = load i64, ptr addrspace(5) %"5", align 8 - %"31" = inttoptr i64 %"14" to ptr - %"13" = load i32, ptr %"31", align 4 - store i32 %"13", ptr addrspace(5) %"7", align 4 - %"16" = load i64, ptr addrspace(5) %"5", align 8 - %"32" = inttoptr i64 %"16" to ptr - %"40" = getelementptr inbounds i8, ptr %"32", i64 4 - %"15" = load i32, ptr %"40", align 4 - store i32 %"15", ptr addrspace(5) %"8", align 4 - %"17" = load i32, ptr addrspace(5) %"7", align 4 - store i32 %"17", ptr addrspace(3) @"4", align 4 - %"19" = load i32, ptr addrspace(5) %"8", align 4 - %"18" = atomicrmw add ptr addrspace(3) @"4", i32 %"19" syncscope("agent-one-as") monotonic, align 4 - store i32 %"18", ptr addrspace(5) %"7", align 4 - %"20" = load i32, ptr addrspace(3) @"4", align 4 - store i32 %"20", ptr addrspace(5) %"8", align 4 - %"21" = load i64, ptr addrspace(5) %"6", align 8 - %"22" = load i32, ptr addrspace(5) %"7", align 4 - %"36" = inttoptr i64 %"21" to ptr - store i32 %"22", ptr %"36", align 4 - %"23" = load i64, ptr addrspace(5) %"6", align 8 - %"24" = load i32, ptr addrspace(5) %"8", align 4 - %"37" = inttoptr i64 %"23" to ptr - %"42" = getelementptr inbounds i8, ptr %"37", i64 4 - store i32 %"24", ptr %"42", align 4 + store i64 %"11", ptr addrspace(5) %"6", align 8 + %"13" = load i64, ptr addrspace(5) %"5", align 8 + %"30" = inttoptr i64 %"13" to ptr + %"12" = load i32, ptr %"30", align 4 + store i32 %"12", ptr addrspace(5) %"7", align 4 + %"15" = load i64, ptr addrspace(5) %"5", align 8 + %"31" = inttoptr i64 %"15" to ptr + %"39" = getelementptr inbounds i8, ptr %"31", i64 4 + %"14" = load i32, ptr %"39", align 4 + store i32 %"14", ptr addrspace(5) %"8", align 4 + %"16" = load i32, ptr addrspace(5) %"7", align 4 + store i32 %"16", ptr addrspace(3) @"4", align 4 + %"18" = load i32, ptr addrspace(5) %"8", align 4 + %"17" = atomicrmw add ptr addrspace(3) @"4", i32 %"18" syncscope("agent-one-as") monotonic, align 4 + store i32 %"17", ptr addrspace(5) %"7", align 4 + %"19" = load i32, ptr addrspace(3) @"4", align 4 + store i32 %"19", ptr addrspace(5) %"8", align 4 + %"20" = load i64, ptr addrspace(5) %"6", align 8 + %"21" = load i32, ptr addrspace(5) %"7", align 4 + %"35" = inttoptr i64 %"20" to ptr + store i32 %"21", ptr %"35", align 4 + %"22" = load i64, ptr addrspace(5) %"6", align 8 + %"23" = load i32, ptr addrspace(5) %"8", align 4 + %"36" = inttoptr i64 %"22" to ptr + %"41" = getelementptr inbounds i8, ptr %"36", i64 4 + store i32 %"23", ptr %"41", align 4 ret void } diff --git a/ptx/src/test/spirv_run/atom_add_f16.ll b/ptx/src/test/spirv_run/atom_add_f16.ll index 10a22a0..e63de90 100644 --- a/ptx/src/test/spirv_run/atom_add_f16.ll +++ b/ptx/src/test/spirv_run/atom_add_f16.ll @@ -3,46 +3,44 @@ target triple = "amdgcn-amd-amdhsa" @"4" = private addrspace(3) global [1024 x i8] undef, align 4 -define protected amdgpu_kernel void @atom_add_f16(ptr addrspace(4) byref(i64) %"27", ptr addrspace(4) byref(i64) %"28") #0 { -"38": +define protected amdgpu_kernel void @atom_add_f16(ptr addrspace(4) byref(i64) %"26", ptr addrspace(4) byref(i64) %"27") #0 { +"37": %"8" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"8", align 1 - %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca half, align 2, addrspace(5) + %"9" = load i64, ptr addrspace(4) %"26", align 8 + store i64 %"9", ptr addrspace(5) %"5", align 8 %"10" = load i64, ptr addrspace(4) %"27", align 8 - store i64 %"10", ptr addrspace(5) %"5", align 8 - %"11" = load i64, ptr addrspace(4) %"28", align 8 - store i64 %"11", ptr addrspace(5) %"6", align 8 - %"13" = load i64, ptr addrspace(5) %"5", align 8 - %"29" = inttoptr i64 %"13" to ptr - %"40" = getelementptr inbounds i8, ptr %"29", i64 2 - %"30" = load i16, ptr %"40", align 2 - %"12" = bitcast i16 %"30" to half - store half %"12", ptr addrspace(5) %"7", align 2 - %"15" = load i64, ptr addrspace(5) %"5", align 8 - %"16" = load half, ptr addrspace(5) %"7", align 2 - %"31" = inttoptr i64 %"15" to ptr - %"14" = atomicrmw fadd ptr %"31", half %"16" syncscope("agent-one-as") monotonic, align 2 - store half %"14", ptr addrspace(5) %"7", align 2 - %"17" = load i64, ptr addrspace(5) %"6", align 8 - %"18" = load half, ptr addrspace(5) %"7", align 2 - %"32" = inttoptr i64 %"17" to ptr - %"33" = bitcast half %"18" to i16 - store i16 %"33", ptr %"32", align 2 - %"20" = load i64, ptr addrspace(5) %"5", align 8 + store i64 %"10", ptr addrspace(5) %"6", align 8 + %"12" = load i64, ptr addrspace(5) %"5", align 8 + %"28" = inttoptr i64 %"12" to ptr + %"39" = getelementptr inbounds i8, ptr %"28", i64 2 + %"29" = load i16, ptr %"39", align 2 + %"11" = bitcast i16 %"29" to half + store half %"11", ptr addrspace(5) %"7", align 2 + %"14" = load i64, ptr addrspace(5) %"5", align 8 + %"15" = load half, ptr addrspace(5) %"7", align 2 + %"30" = inttoptr i64 %"14" to ptr + %"13" = atomicrmw fadd ptr %"30", half %"15" syncscope("agent-one-as") monotonic, align 2 + store half %"13", ptr addrspace(5) %"7", align 2 + %"16" = load i64, ptr addrspace(5) %"6", align 8 + %"17" = load half, ptr addrspace(5) %"7", align 2 + %"31" = inttoptr i64 %"16" to ptr + %"32" = bitcast half %"17" to i16 + store i16 %"32", ptr %"31", align 2 + %"19" = load i64, ptr addrspace(5) %"5", align 8 + %"34" = inttoptr i64 %"19" to ptr + %"33" = load i16, ptr %"34", align 2 + %"18" = bitcast i16 %"33" to half + store half %"18", ptr addrspace(5) %"7", align 2 + %"20" = load i64, ptr addrspace(5) %"6", align 8 + %"21" = load half, ptr addrspace(5) %"7", align 2 %"35" = inttoptr i64 %"20" to ptr - %"34" = load i16, ptr %"35", align 2 - %"19" = bitcast i16 %"34" to half - store half %"19", ptr addrspace(5) %"7", align 2 - %"21" = load i64, ptr addrspace(5) %"6", align 8 - %"22" = load half, ptr addrspace(5) %"7", align 2 - %"36" = inttoptr i64 %"21" to ptr - %"42" = getelementptr inbounds i8, ptr %"36", i64 2 - %"37" = bitcast half %"22" to i16 - store i16 %"37", ptr %"42", align 2 + %"41" = getelementptr inbounds i8, ptr %"35", i64 2 + %"36" = bitcast half %"21" to i16 + store i16 %"36", ptr %"41", align 2 ret void } diff --git a/ptx/src/test/spirv_run/atom_add_float.ll b/ptx/src/test/spirv_run/atom_add_float.ll index efce26c..329d198 100644 --- a/ptx/src/test/spirv_run/atom_add_float.ll +++ b/ptx/src/test/spirv_run/atom_add_float.ll @@ -3,45 +3,43 @@ target triple = "amdgcn-amd-amdhsa" @"4" = private addrspace(3) global [1024 x i8] undef, align 4 -define protected amdgpu_kernel void @atom_add_float(ptr addrspace(4) byref(i64) %"29", ptr addrspace(4) byref(i64) %"30") #0 { -"38": +define protected amdgpu_kernel void @atom_add_float(ptr addrspace(4) byref(i64) %"28", ptr addrspace(4) byref(i64) %"29") #0 { +"37": %"9" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"9", align 1 - %"10" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"10", align 1 %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca float, align 4, addrspace(5) %"8" = alloca float, align 4, addrspace(5) + %"10" = load i64, ptr addrspace(4) %"28", align 8 + store i64 %"10", ptr addrspace(5) %"5", align 8 %"11" = load i64, ptr addrspace(4) %"29", align 8 - store i64 %"11", ptr addrspace(5) %"5", align 8 - %"12" = load i64, ptr addrspace(4) %"30", align 8 - store i64 %"12", ptr addrspace(5) %"6", align 8 - %"14" = load i64, ptr addrspace(5) %"5", align 8 - %"31" = inttoptr i64 %"14" to ptr - %"13" = load float, ptr %"31", align 4 - store float %"13", ptr addrspace(5) %"7", align 4 - %"16" = load i64, ptr addrspace(5) %"5", align 8 - %"32" = inttoptr i64 %"16" to ptr - %"40" = getelementptr inbounds i8, ptr %"32", i64 4 - %"15" = load float, ptr %"40", align 4 - store float %"15", ptr addrspace(5) %"8", align 4 - %"17" = load float, ptr addrspace(5) %"7", align 4 - store float %"17", ptr addrspace(3) @"4", align 4 - %"19" = load float, ptr addrspace(5) %"8", align 4 - %"18" = atomicrmw fadd ptr addrspace(3) @"4", float %"19" syncscope("agent-one-as") monotonic, align 4 - store float %"18", ptr addrspace(5) %"7", align 4 - %"20" = load float, ptr addrspace(3) @"4", align 4 - store float %"20", ptr addrspace(5) %"8", align 4 - %"21" = load i64, ptr addrspace(5) %"6", align 8 - %"22" = load float, ptr addrspace(5) %"7", align 4 - %"36" = inttoptr i64 %"21" to ptr - store float %"22", ptr %"36", align 4 - %"23" = load i64, ptr addrspace(5) %"6", align 8 - %"24" = load float, ptr addrspace(5) %"8", align 4 - %"37" = inttoptr i64 %"23" to ptr - %"42" = getelementptr inbounds i8, ptr %"37", i64 4 - store float %"24", ptr %"42", align 4 + store i64 %"11", ptr addrspace(5) %"6", align 8 + %"13" = load i64, ptr addrspace(5) %"5", align 8 + %"30" = inttoptr i64 %"13" to ptr + %"12" = load float, ptr %"30", align 4 + store float %"12", ptr addrspace(5) %"7", align 4 + %"15" = load i64, ptr addrspace(5) %"5", align 8 + %"31" = inttoptr i64 %"15" to ptr + %"39" = getelementptr inbounds i8, ptr %"31", i64 4 + %"14" = load float, ptr %"39", align 4 + store float %"14", ptr addrspace(5) %"8", align 4 + %"16" = load float, ptr addrspace(5) %"7", align 4 + store float %"16", ptr addrspace(3) @"4", align 4 + %"18" = load float, ptr addrspace(5) %"8", align 4 + %"17" = atomicrmw fadd ptr addrspace(3) @"4", float %"18" syncscope("agent-one-as") monotonic, align 4 + store float %"17", ptr addrspace(5) %"7", align 4 + %"19" = load float, ptr addrspace(3) @"4", align 4 + store float %"19", ptr addrspace(5) %"8", align 4 + %"20" = load i64, ptr addrspace(5) %"6", align 8 + %"21" = load float, ptr addrspace(5) %"7", align 4 + %"35" = inttoptr i64 %"20" to ptr + store float %"21", ptr %"35", align 4 + %"22" = load i64, ptr addrspace(5) %"6", align 8 + %"23" = load float, ptr addrspace(5) %"8", align 4 + %"36" = inttoptr i64 %"22" to ptr + %"41" = getelementptr inbounds i8, ptr %"36", i64 4 + store float %"23", ptr %"41", align 4 ret void } diff --git a/ptx/src/test/spirv_run/atom_cas.ll b/ptx/src/test/spirv_run/atom_cas.ll index fb83ed4..2e0475a 100644 --- a/ptx/src/test/spirv_run/atom_cas.ll +++ b/ptx/src/test/spirv_run/atom_cas.ll @@ -1,45 +1,43 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @atom_cas(ptr addrspace(4) byref(i64) %"30", ptr addrspace(4) byref(i64) %"31") #0 { -"39": +define protected amdgpu_kernel void @atom_cas(ptr addrspace(4) byref(i64) %"29", ptr addrspace(4) byref(i64) %"30") #0 { +"38": %"8" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"8", align 1 - %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) + %"9" = load i64, ptr addrspace(4) %"29", align 8 + store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"30", align 8 - store i64 %"10", ptr addrspace(5) %"4", align 8 - %"11" = load i64, ptr addrspace(4) %"31", align 8 - store i64 %"11", ptr addrspace(5) %"5", align 8 - %"13" = load i64, ptr addrspace(5) %"4", align 8 - %"32" = inttoptr i64 %"13" to ptr - %"12" = load i32, ptr %"32", align 4 - store i32 %"12", ptr addrspace(5) %"6", align 4 - %"15" = load i64, ptr addrspace(5) %"4", align 8 - %"16" = load i32, ptr addrspace(5) %"6", align 4 - %"33" = inttoptr i64 %"15" to ptr - %"41" = getelementptr inbounds i8, ptr %"33", i64 4 - %0 = cmpxchg ptr %"41", i32 %"16", i32 100 syncscope("agent-one-as") monotonic monotonic, align 4 - %"34" = extractvalue { i32, i1 } %0, 0 - store i32 %"34", ptr addrspace(5) %"6", align 4 - %"18" = load i64, ptr addrspace(5) %"4", align 8 + store i64 %"10", ptr addrspace(5) %"5", align 8 + %"12" = load i64, ptr addrspace(5) %"4", align 8 + %"31" = inttoptr i64 %"12" to ptr + %"11" = load i32, ptr %"31", align 4 + store i32 %"11", ptr addrspace(5) %"6", align 4 + %"14" = load i64, ptr addrspace(5) %"4", align 8 + %"15" = load i32, ptr addrspace(5) %"6", align 4 + %"32" = inttoptr i64 %"14" to ptr + %"40" = getelementptr inbounds i8, ptr %"32", i64 4 + %0 = cmpxchg ptr %"40", i32 %"15", i32 100 syncscope("agent-one-as") monotonic monotonic, align 4 + %"33" = extractvalue { i32, i1 } %0, 0 + store i32 %"33", ptr addrspace(5) %"6", align 4 + %"17" = load i64, ptr addrspace(5) %"4", align 8 + %"35" = inttoptr i64 %"17" to ptr + %"42" = getelementptr inbounds i8, ptr %"35", i64 4 + %"16" = load i32, ptr %"42", align 4 + store i32 %"16", ptr addrspace(5) %"7", align 4 + %"18" = load i64, ptr addrspace(5) %"5", align 8 + %"19" = load i32, ptr addrspace(5) %"6", align 4 %"36" = inttoptr i64 %"18" to ptr - %"43" = getelementptr inbounds i8, ptr %"36", i64 4 - %"17" = load i32, ptr %"43", align 4 - store i32 %"17", ptr addrspace(5) %"7", align 4 - %"19" = load i64, ptr addrspace(5) %"5", align 8 - %"20" = load i32, ptr addrspace(5) %"6", align 4 - %"37" = inttoptr i64 %"19" to ptr - store i32 %"20", ptr %"37", align 4 - %"21" = load i64, ptr addrspace(5) %"5", align 8 - %"22" = load i32, ptr addrspace(5) %"7", align 4 - %"38" = inttoptr i64 %"21" to ptr - %"45" = getelementptr inbounds i8, ptr %"38", i64 4 - store i32 %"22", ptr %"45", align 4 + store i32 %"19", ptr %"36", align 4 + %"20" = load i64, ptr addrspace(5) %"5", align 8 + %"21" = load i32, ptr addrspace(5) %"7", align 4 + %"37" = inttoptr i64 %"20" to ptr + %"44" = getelementptr inbounds i8, ptr %"37", i64 4 + store i32 %"21", ptr %"44", align 4 ret void } diff --git a/ptx/src/test/spirv_run/atom_inc.ll b/ptx/src/test/spirv_run/atom_inc.ll index 26b7b70..6fdc3c7 100644 --- a/ptx/src/test/spirv_run/atom_inc.ll +++ b/ptx/src/test/spirv_run/atom_inc.ll @@ -5,47 +5,45 @@ declare i32 @__zluda_ptx_impl__atom_relaxed_gpu_generic_inc(ptr, i32) #0 declare i32 @__zluda_ptx_impl__atom_relaxed_gpu_global_inc(ptr addrspace(1), i32) #0 -define protected amdgpu_kernel void @atom_inc(ptr addrspace(4) byref(i64) %"31", ptr addrspace(4) byref(i64) %"32") #1 { -"39": +define protected amdgpu_kernel void @atom_inc(ptr addrspace(4) byref(i64) %"30", ptr addrspace(4) byref(i64) %"31") #1 { +"38": %"9" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"9", align 1 - %"10" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"10", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) %"8" = alloca i32, align 4, addrspace(5) + %"10" = load i64, ptr addrspace(4) %"30", align 8 + store i64 %"10", ptr addrspace(5) %"4", align 8 %"11" = load i64, ptr addrspace(4) %"31", align 8 - store i64 %"11", ptr addrspace(5) %"4", align 8 - %"12" = load i64, ptr addrspace(4) %"32", align 8 - store i64 %"12", ptr addrspace(5) %"5", align 8 - %"14" = load i64, ptr addrspace(5) %"4", align 8 - %"33" = inttoptr i64 %"14" to ptr - %"13" = call i32 @__zluda_ptx_impl__atom_relaxed_gpu_generic_inc(ptr %"33", i32 101) - store i32 %"13", ptr addrspace(5) %"6", align 4 - %"16" = load i64, ptr addrspace(5) %"4", align 8 - %"34" = inttoptr i64 %"16" to ptr addrspace(1) - %"15" = call i32 @__zluda_ptx_impl__atom_relaxed_gpu_global_inc(ptr addrspace(1) %"34", i32 101) - store i32 %"15", ptr addrspace(5) %"7", align 4 - %"18" = load i64, ptr addrspace(5) %"4", align 8 + store i64 %"11", ptr addrspace(5) %"5", align 8 + %"13" = load i64, ptr addrspace(5) %"4", align 8 + %"32" = inttoptr i64 %"13" to ptr + %"12" = call i32 @__zluda_ptx_impl__atom_relaxed_gpu_generic_inc(ptr %"32", i32 101) + store i32 %"12", ptr addrspace(5) %"6", align 4 + %"15" = load i64, ptr addrspace(5) %"4", align 8 + %"33" = inttoptr i64 %"15" to ptr addrspace(1) + %"14" = call i32 @__zluda_ptx_impl__atom_relaxed_gpu_global_inc(ptr addrspace(1) %"33", i32 101) + store i32 %"14", ptr addrspace(5) %"7", align 4 + %"17" = load i64, ptr addrspace(5) %"4", align 8 + %"34" = inttoptr i64 %"17" to ptr + %"16" = load i32, ptr %"34", align 4 + store i32 %"16", ptr addrspace(5) %"8", align 4 + %"18" = load i64, ptr addrspace(5) %"5", align 8 + %"19" = load i32, ptr addrspace(5) %"6", align 4 %"35" = inttoptr i64 %"18" to ptr - %"17" = load i32, ptr %"35", align 4 - store i32 %"17", ptr addrspace(5) %"8", align 4 - %"19" = load i64, ptr addrspace(5) %"5", align 8 - %"20" = load i32, ptr addrspace(5) %"6", align 4 - %"36" = inttoptr i64 %"19" to ptr - store i32 %"20", ptr %"36", align 4 - %"21" = load i64, ptr addrspace(5) %"5", align 8 - %"22" = load i32, ptr addrspace(5) %"7", align 4 - %"37" = inttoptr i64 %"21" to ptr - %"49" = getelementptr inbounds i8, ptr %"37", i64 4 - store i32 %"22", ptr %"49", align 4 - %"23" = load i64, ptr addrspace(5) %"5", align 8 - %"24" = load i32, ptr addrspace(5) %"8", align 4 - %"38" = inttoptr i64 %"23" to ptr - %"51" = getelementptr inbounds i8, ptr %"38", i64 8 - store i32 %"24", ptr %"51", align 4 + store i32 %"19", ptr %"35", align 4 + %"20" = load i64, ptr addrspace(5) %"5", align 8 + %"21" = load i32, ptr addrspace(5) %"7", align 4 + %"36" = inttoptr i64 %"20" to ptr + %"48" = getelementptr inbounds i8, ptr %"36", i64 4 + store i32 %"21", ptr %"48", align 4 + %"22" = load i64, ptr addrspace(5) %"5", align 8 + %"23" = load i32, ptr addrspace(5) %"8", align 4 + %"37" = inttoptr i64 %"22" to ptr + %"50" = getelementptr inbounds i8, ptr %"37", i64 8 + store i32 %"23", ptr %"50", align 4 ret void } diff --git a/ptx/src/test/spirv_run/atom_ld_st.ll b/ptx/src/test/spirv_run/atom_ld_st.ll index 31f39c8..3b6488c 100644 --- a/ptx/src/test/spirv_run/atom_ld_st.ll +++ b/ptx/src/test/spirv_run/atom_ld_st.ll @@ -1,27 +1,25 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @atom_ld_st(ptr addrspace(4) byref(i64) %"15", ptr addrspace(4) byref(i64) %"16") #0 { -"19": +define protected amdgpu_kernel void @atom_ld_st(ptr addrspace(4) byref(i64) %"14", ptr addrspace(4) byref(i64) %"15") #0 { +"18": %"7" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"7", align 1 - %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) + %"8" = load i64, ptr addrspace(4) %"14", align 8 + store i64 %"8", ptr addrspace(5) %"4", align 8 %"9" = load i64, ptr addrspace(4) %"15", align 8 - store i64 %"9", ptr addrspace(5) %"4", align 8 - %"10" = load i64, ptr addrspace(4) %"16", align 8 - store i64 %"10", ptr addrspace(5) %"5", align 8 - %"12" = load i64, ptr addrspace(5) %"4", align 8 + store i64 %"9", ptr addrspace(5) %"5", align 8 + %"11" = load i64, ptr addrspace(5) %"4", align 8 + %"16" = inttoptr i64 %"11" to ptr + %"10" = load atomic i32, ptr %"16" syncscope("agent-one-as") acquire, align 4 + store i32 %"10", ptr addrspace(5) %"6", align 4 + %"12" = load i64, ptr addrspace(5) %"5", align 8 + %"13" = load i32, ptr addrspace(5) %"6", align 4 %"17" = inttoptr i64 %"12" to ptr - %"11" = load atomic i32, ptr %"17" syncscope("agent-one-as") acquire, align 4 - store i32 %"11", ptr addrspace(5) %"6", align 4 - %"13" = load i64, ptr addrspace(5) %"5", align 8 - %"14" = load i32, ptr addrspace(5) %"6", align 4 - %"18" = inttoptr i64 %"13" to ptr - store atomic i32 %"14", ptr %"18" syncscope("agent-one-as") release, align 4 + store atomic i32 %"13", ptr %"17" syncscope("agent-one-as") release, align 4 ret void } diff --git a/ptx/src/test/spirv_run/atom_ld_st_vec.ll b/ptx/src/test/spirv_run/atom_ld_st_vec.ll index 95ff710..7ea0fc5 100644 --- a/ptx/src/test/spirv_run/atom_ld_st_vec.ll +++ b/ptx/src/test/spirv_run/atom_ld_st_vec.ll @@ -1,36 +1,34 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @atom_ld_st_vec(ptr addrspace(4) byref(i64) %"20", ptr addrspace(4) byref(i64) %"21") #0 { -"24": +define protected amdgpu_kernel void @atom_ld_st_vec(ptr addrspace(4) byref(i64) %"19", ptr addrspace(4) byref(i64) %"20") #0 { +"23": %"10" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"10", align 1 - %"11" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"11", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) + %"11" = load i64, ptr addrspace(4) %"19", align 8 + store i64 %"11", ptr addrspace(5) %"4", align 8 %"12" = load i64, ptr addrspace(4) %"20", align 8 - store i64 %"12", ptr addrspace(5) %"4", align 8 - %"13" = load i64, ptr addrspace(4) %"21", align 8 - store i64 %"13", ptr addrspace(5) %"5", align 8 - %"14" = load i64, ptr addrspace(5) %"4", align 8 - %"22" = inttoptr i64 %"14" to ptr - %0 = load atomic i128, ptr %"22" syncscope("agent-one-as") acquire, align 16 + store i64 %"12", ptr addrspace(5) %"5", align 8 + %"13" = load i64, ptr addrspace(5) %"4", align 8 + %"21" = inttoptr i64 %"13" to ptr + %0 = load atomic i128, ptr %"21" syncscope("agent-one-as") acquire, align 16 %"8" = bitcast i128 %0 to <2 x i64> - %"15" = extractelement <2 x i64> %"8", i32 0 - %"16" = extractelement <2 x i64> %"8", i32 1 - store i64 %"15", ptr addrspace(5) %"6", align 8 - store i64 %"16", ptr addrspace(5) %"7", align 8 - %"17" = load i64, ptr addrspace(5) %"6", align 8 - %"18" = load i64, ptr addrspace(5) %"7", align 8 - %1 = insertelement <2 x i64> undef, i64 %"17", i32 0 - %"9" = insertelement <2 x i64> %1, i64 %"18", i32 1 - %"19" = load i64, ptr addrspace(5) %"5", align 8 - %"23" = inttoptr i64 %"19" to ptr + %"14" = extractelement <2 x i64> %"8", i32 0 + %"15" = extractelement <2 x i64> %"8", i32 1 + store i64 %"14", ptr addrspace(5) %"6", align 8 + store i64 %"15", ptr addrspace(5) %"7", align 8 + %"16" = load i64, ptr addrspace(5) %"6", align 8 + %"17" = load i64, ptr addrspace(5) %"7", align 8 + %1 = insertelement <2 x i64> undef, i64 %"16", i32 0 + %"9" = insertelement <2 x i64> %1, i64 %"17", i32 1 + %"18" = load i64, ptr addrspace(5) %"5", align 8 + %"22" = inttoptr i64 %"18" to ptr %2 = bitcast <2 x i64> %"9" to i128 - store atomic i128 %2, ptr %"23" syncscope("agent-one-as") release, align 16 + store atomic i128 %2, ptr %"22" syncscope("agent-one-as") release, align 16 ret void } diff --git a/ptx/src/test/spirv_run/atom_max_u32.ll b/ptx/src/test/spirv_run/atom_max_u32.ll index 7a89a13..64cb430 100644 --- a/ptx/src/test/spirv_run/atom_max_u32.ll +++ b/ptx/src/test/spirv_run/atom_max_u32.ll @@ -1,38 +1,36 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @atom_max_u32(ptr addrspace(4) byref(i64) %"23", ptr addrspace(4) byref(i64) %"24") #0 { -"31": +define protected amdgpu_kernel void @atom_max_u32(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #0 { +"30": %"8" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"8", align 1 - %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) + %"9" = load i64, ptr addrspace(4) %"22", align 8 + store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"23", align 8 - store i64 %"10", ptr addrspace(5) %"4", align 8 - %"11" = load i64, ptr addrspace(4) %"24", align 8 - store i64 %"11", ptr addrspace(5) %"5", align 8 - %"13" = load i64, ptr addrspace(5) %"4", align 8 + store i64 %"10", ptr addrspace(5) %"5", align 8 + %"12" = load i64, ptr addrspace(5) %"4", align 8 + %"24" = inttoptr i64 %"12" to ptr + %"11" = load i32, ptr %"24", align 4 + store i32 %"11", ptr addrspace(5) %"6", align 4 + %"13" = load i64, ptr addrspace(5) %"5", align 8 + %"14" = load i32, ptr addrspace(5) %"6", align 4 %"25" = inttoptr i64 %"13" to ptr - %"12" = load i32, ptr %"25", align 4 - store i32 %"12", ptr addrspace(5) %"6", align 4 - %"14" = load i64, ptr addrspace(5) %"5", align 8 - %"15" = load i32, ptr addrspace(5) %"6", align 4 - %"26" = inttoptr i64 %"14" to ptr - store i32 %"15", ptr %"26", align 4 - %"17" = load i64, ptr addrspace(5) %"4", align 8 - %"27" = inttoptr i64 %"17" to ptr - %"33" = getelementptr inbounds i8, ptr %"27", i64 4 - %"16" = load i32, ptr %"33", align 4 - store i32 %"16", ptr addrspace(5) %"7", align 4 - %"19" = load i64, ptr addrspace(5) %"5", align 8 - %"20" = load i32, ptr addrspace(5) %"7", align 4 - %"29" = inttoptr i64 %"19" to ptr - %"28" = atomicrmw umax ptr %"29", i32 %"20" syncscope("agent-one-as") monotonic, align 4 - store i32 %"28", ptr addrspace(5) %"6", align 4 + store i32 %"14", ptr %"25", align 4 + %"16" = load i64, ptr addrspace(5) %"4", align 8 + %"26" = inttoptr i64 %"16" to ptr + %"32" = getelementptr inbounds i8, ptr %"26", i64 4 + %"15" = load i32, ptr %"32", align 4 + store i32 %"15", ptr addrspace(5) %"7", align 4 + %"18" = load i64, ptr addrspace(5) %"5", align 8 + %"19" = load i32, ptr addrspace(5) %"7", align 4 + %"28" = inttoptr i64 %"18" to ptr + %"27" = atomicrmw umax ptr %"28", i32 %"19" syncscope("agent-one-as") monotonic, align 4 + store i32 %"27", ptr addrspace(5) %"6", align 4 ret void } diff --git a/ptx/src/test/spirv_run/b64tof64.ll b/ptx/src/test/spirv_run/b64tof64.ll index 2c2b674..5cd7a2c 100644 --- a/ptx/src/test/spirv_run/b64tof64.ll +++ b/ptx/src/test/spirv_run/b64tof64.ll @@ -1,34 +1,32 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @b64tof64(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #0 { -"24": +define protected amdgpu_kernel void @b64tof64(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 { +"23": %"8" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"8", align 1 - %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca double, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) - %"10" = load double, ptr addrspace(4) %"18", align 8 - store double %"10", ptr addrspace(5) %"4", align 8 - %"11" = load i64, ptr addrspace(4) %"19", align 8 - store i64 %"11", ptr addrspace(5) %"6", align 8 - %"13" = load double, ptr addrspace(5) %"4", align 8 - %"21" = bitcast double %"13" to i64 + %"9" = load double, ptr addrspace(4) %"17", align 8 + store double %"9", ptr addrspace(5) %"4", align 8 + %"10" = load i64, ptr addrspace(4) %"18", align 8 + store i64 %"10", ptr addrspace(5) %"6", align 8 + %"12" = load double, ptr addrspace(5) %"4", align 8 + %"20" = bitcast double %"12" to i64 %0 = alloca i64, align 8, addrspace(5) - store i64 %"21", ptr addrspace(5) %0, align 8 - %"12" = load i64, ptr addrspace(5) %0, align 8 - store i64 %"12", ptr addrspace(5) %"5", align 8 - %"15" = load i64, ptr addrspace(5) %"5", align 8 + store i64 %"20", ptr addrspace(5) %0, align 8 + %"11" = load i64, ptr addrspace(5) %0, align 8 + store i64 %"11", ptr addrspace(5) %"5", align 8 + %"14" = load i64, ptr addrspace(5) %"5", align 8 + %"21" = inttoptr i64 %"14" to ptr + %"13" = load i64, ptr %"21", align 8 + store i64 %"13", ptr addrspace(5) %"7", align 8 + %"15" = load i64, ptr addrspace(5) %"6", align 8 + %"16" = load i64, ptr addrspace(5) %"7", align 8 %"22" = inttoptr i64 %"15" to ptr - %"14" = load i64, ptr %"22", align 8 - store i64 %"14", ptr addrspace(5) %"7", align 8 - %"16" = load i64, ptr addrspace(5) %"6", align 8 - %"17" = load i64, ptr addrspace(5) %"7", align 8 - %"23" = inttoptr i64 %"16" to ptr - store i64 %"17", ptr %"23", align 8 + store i64 %"16", ptr %"22", align 8 ret void } diff --git a/ptx/src/test/spirv_run/barrier.ll b/ptx/src/test/spirv_run/barrier.ll index c247e32..e2e65f2 100644 --- a/ptx/src/test/spirv_run/barrier.ll +++ b/ptx/src/test/spirv_run/barrier.ll @@ -4,11 +4,9 @@ target triple = "amdgcn-amd-amdhsa" declare void @__zluda_ptx_impl__barrier_sync(i32) #0 define protected amdgpu_kernel void @barrier() #1 { -"5": +"4": %"2" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"2", align 1 - %"3" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"3", align 1 call void @__zluda_ptx_impl__barrier_sync(i32 0) ret void } diff --git a/ptx/src/test/spirv_run/bfe.ll b/ptx/src/test/spirv_run/bfe.ll index c67513a..99fd766 100644 --- a/ptx/src/test/spirv_run/bfe.ll +++ b/ptx/src/test/spirv_run/bfe.ll @@ -3,44 +3,42 @@ target triple = "amdgcn-amd-amdhsa" declare i32 @__zluda_ptx_impl__bfe_u32(i32, i32, i32) #0 -define protected amdgpu_kernel void @bfe(ptr addrspace(4) byref(i64) %"29", ptr addrspace(4) byref(i64) %"30") #1 { -"35": +define protected amdgpu_kernel void @bfe(ptr addrspace(4) byref(i64) %"28", ptr addrspace(4) byref(i64) %"29") #1 { +"34": %"9" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"9", align 1 - %"10" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"10", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) %"8" = alloca i32, align 4, addrspace(5) + %"10" = load i64, ptr addrspace(4) %"28", align 8 + store i64 %"10", ptr addrspace(5) %"4", align 8 %"11" = load i64, ptr addrspace(4) %"29", align 8 - store i64 %"11", ptr addrspace(5) %"4", align 8 - %"12" = load i64, ptr addrspace(4) %"30", align 8 - store i64 %"12", ptr addrspace(5) %"5", align 8 - %"14" = load i64, ptr addrspace(5) %"4", align 8 - %"31" = inttoptr i64 %"14" to ptr - %"13" = load i32, ptr %"31", align 4 - store i32 %"13", ptr addrspace(5) %"6", align 4 - %"16" = load i64, ptr addrspace(5) %"4", align 8 - %"32" = inttoptr i64 %"16" to ptr - %"42" = getelementptr inbounds i8, ptr %"32", i64 4 - %"15" = load i32, ptr %"42", align 4 - store i32 %"15", ptr addrspace(5) %"7", align 4 - %"18" = load i64, ptr addrspace(5) %"4", align 8 - %"33" = inttoptr i64 %"18" to ptr - %"44" = getelementptr inbounds i8, ptr %"33", i64 8 - %"17" = load i32, ptr %"44", align 4 - store i32 %"17", ptr addrspace(5) %"8", align 4 - %"20" = load i32, ptr addrspace(5) %"6", align 4 - %"21" = load i32, ptr addrspace(5) %"7", align 4 - %"22" = load i32, ptr addrspace(5) %"8", align 4 - %"19" = call i32 @__zluda_ptx_impl__bfe_u32(i32 %"20", i32 %"21", i32 %"22") - store i32 %"19", ptr addrspace(5) %"6", align 4 - %"23" = load i64, ptr addrspace(5) %"5", align 8 - %"24" = load i32, ptr addrspace(5) %"6", align 4 - %"34" = inttoptr i64 %"23" to ptr - store i32 %"24", ptr %"34", align 4 + store i64 %"11", ptr addrspace(5) %"5", align 8 + %"13" = load i64, ptr addrspace(5) %"4", align 8 + %"30" = inttoptr i64 %"13" to ptr + %"12" = load i32, ptr %"30", align 4 + store i32 %"12", ptr addrspace(5) %"6", align 4 + %"15" = load i64, ptr addrspace(5) %"4", align 8 + %"31" = inttoptr i64 %"15" to ptr + %"41" = getelementptr inbounds i8, ptr %"31", i64 4 + %"14" = load i32, ptr %"41", align 4 + store i32 %"14", ptr addrspace(5) %"7", align 4 + %"17" = load i64, ptr addrspace(5) %"4", align 8 + %"32" = inttoptr i64 %"17" to ptr + %"43" = getelementptr inbounds i8, ptr %"32", i64 8 + %"16" = load i32, ptr %"43", align 4 + store i32 %"16", ptr addrspace(5) %"8", align 4 + %"19" = load i32, ptr addrspace(5) %"6", align 4 + %"20" = load i32, ptr addrspace(5) %"7", align 4 + %"21" = load i32, ptr addrspace(5) %"8", align 4 + %"18" = call i32 @__zluda_ptx_impl__bfe_u32(i32 %"19", i32 %"20", i32 %"21") + store i32 %"18", ptr addrspace(5) %"6", align 4 + %"22" = load i64, ptr addrspace(5) %"5", align 8 + %"23" = load i32, ptr addrspace(5) %"6", align 4 + %"33" = inttoptr i64 %"22" to ptr + store i32 %"23", ptr %"33", align 4 ret void } diff --git a/ptx/src/test/spirv_run/bfi.ll b/ptx/src/test/spirv_run/bfi.ll index 2fc4191..bea4ac5 100644 --- a/ptx/src/test/spirv_run/bfi.ll +++ b/ptx/src/test/spirv_run/bfi.ll @@ -3,51 +3,49 @@ target triple = "amdgcn-amd-amdhsa" declare i32 @__zluda_ptx_impl__bfi_b32(i32, i32, i32, i32) #0 -define protected amdgpu_kernel void @bfi(ptr addrspace(4) byref(i64) %"35", ptr addrspace(4) byref(i64) %"36") #1 { -"45": +define protected amdgpu_kernel void @bfi(ptr addrspace(4) byref(i64) %"34", ptr addrspace(4) byref(i64) %"35") #1 { +"44": %"10" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"10", align 1 - %"11" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"11", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) %"8" = alloca i32, align 4, addrspace(5) %"9" = alloca i32, align 4, addrspace(5) + %"11" = load i64, ptr addrspace(4) %"34", align 8 + store i64 %"11", ptr addrspace(5) %"4", align 8 %"12" = load i64, ptr addrspace(4) %"35", align 8 - store i64 %"12", ptr addrspace(5) %"4", align 8 - %"13" = load i64, ptr addrspace(4) %"36", align 8 - store i64 %"13", ptr addrspace(5) %"5", align 8 - %"15" = load i64, ptr addrspace(5) %"4", align 8 - %"37" = inttoptr i64 %"15" to ptr - %"14" = load i32, ptr %"37", align 4 - store i32 %"14", ptr addrspace(5) %"6", align 4 - %"17" = load i64, ptr addrspace(5) %"4", align 8 - %"38" = inttoptr i64 %"17" to ptr - %"53" = getelementptr inbounds i8, ptr %"38", i64 4 - %"16" = load i32, ptr %"53", align 4 - store i32 %"16", ptr addrspace(5) %"7", align 4 - %"19" = load i64, ptr addrspace(5) %"4", align 8 - %"39" = inttoptr i64 %"19" to ptr - %"55" = getelementptr inbounds i8, ptr %"39", i64 8 - %"18" = load i32, ptr %"55", align 4 - store i32 %"18", ptr addrspace(5) %"8", align 4 - %"21" = load i64, ptr addrspace(5) %"4", align 8 - %"40" = inttoptr i64 %"21" to ptr - %"57" = getelementptr inbounds i8, ptr %"40", i64 12 - %"20" = load i32, ptr %"57", align 4 - store i32 %"20", ptr addrspace(5) %"9", align 4 - %"23" = load i32, ptr addrspace(5) %"6", align 4 - %"24" = load i32, ptr addrspace(5) %"7", align 4 - %"25" = load i32, ptr addrspace(5) %"8", align 4 - %"26" = load i32, ptr addrspace(5) %"9", align 4 - %"41" = call i32 @__zluda_ptx_impl__bfi_b32(i32 %"23", i32 %"24", i32 %"25", i32 %"26") - store i32 %"41", ptr addrspace(5) %"6", align 4 - %"27" = load i64, ptr addrspace(5) %"5", align 8 - %"28" = load i32, ptr addrspace(5) %"6", align 4 - %"44" = inttoptr i64 %"27" to ptr - store i32 %"28", ptr %"44", align 4 + store i64 %"12", ptr addrspace(5) %"5", align 8 + %"14" = load i64, ptr addrspace(5) %"4", align 8 + %"36" = inttoptr i64 %"14" to ptr + %"13" = load i32, ptr %"36", align 4 + store i32 %"13", ptr addrspace(5) %"6", align 4 + %"16" = load i64, ptr addrspace(5) %"4", align 8 + %"37" = inttoptr i64 %"16" to ptr + %"52" = getelementptr inbounds i8, ptr %"37", i64 4 + %"15" = load i32, ptr %"52", align 4 + store i32 %"15", ptr addrspace(5) %"7", align 4 + %"18" = load i64, ptr addrspace(5) %"4", align 8 + %"38" = inttoptr i64 %"18" to ptr + %"54" = getelementptr inbounds i8, ptr %"38", i64 8 + %"17" = load i32, ptr %"54", align 4 + store i32 %"17", ptr addrspace(5) %"8", align 4 + %"20" = load i64, ptr addrspace(5) %"4", align 8 + %"39" = inttoptr i64 %"20" to ptr + %"56" = getelementptr inbounds i8, ptr %"39", i64 12 + %"19" = load i32, ptr %"56", align 4 + store i32 %"19", ptr addrspace(5) %"9", align 4 + %"22" = load i32, ptr addrspace(5) %"6", align 4 + %"23" = load i32, ptr addrspace(5) %"7", align 4 + %"24" = load i32, ptr addrspace(5) %"8", align 4 + %"25" = load i32, ptr addrspace(5) %"9", align 4 + %"40" = call i32 @__zluda_ptx_impl__bfi_b32(i32 %"22", i32 %"23", i32 %"24", i32 %"25") + store i32 %"40", ptr addrspace(5) %"6", align 4 + %"26" = load i64, ptr addrspace(5) %"5", align 8 + %"27" = load i32, ptr addrspace(5) %"6", align 4 + %"43" = inttoptr i64 %"26" to ptr + store i32 %"27", ptr %"43", align 4 ret void } diff --git a/ptx/src/test/spirv_run/bfind.ll b/ptx/src/test/spirv_run/bfind.ll index 4b7dc1b..ebd9fea 100644 --- a/ptx/src/test/spirv_run/bfind.ll +++ b/ptx/src/test/spirv_run/bfind.ll @@ -1,12 +1,10 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @bfind(ptr addrspace(4) byref(i64) %"42", ptr addrspace(4) byref(i64) %"43") #0 { -"53": +define protected amdgpu_kernel void @bfind(ptr addrspace(4) byref(i64) %"41", ptr addrspace(4) byref(i64) %"42") #0 { +"52": %"12" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"12", align 1 - %"13" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"13", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) @@ -15,56 +13,56 @@ define protected amdgpu_kernel void @bfind(ptr addrspace(4) byref(i64) %"42", pt %"9" = alloca i32, align 4, addrspace(5) %"10" = alloca i32, align 4, addrspace(5) %"11" = alloca i32, align 4, addrspace(5) + %"13" = load i64, ptr addrspace(4) %"41", align 8 + store i64 %"13", ptr addrspace(5) %"4", align 8 %"14" = load i64, ptr addrspace(4) %"42", align 8 - store i64 %"14", ptr addrspace(5) %"4", align 8 - %"15" = load i64, ptr addrspace(4) %"43", align 8 - store i64 %"15", ptr addrspace(5) %"5", align 8 - %"17" = load i64, ptr addrspace(5) %"4", align 8 - %"44" = inttoptr i64 %"17" to ptr - %"16" = load i32, ptr %"44", align 4 - store i32 %"16", ptr addrspace(5) %"6", align 4 - %"19" = load i64, ptr addrspace(5) %"4", align 8 - %"45" = inttoptr i64 %"19" to ptr - %"55" = getelementptr inbounds i8, ptr %"45", i64 4 - %"18" = load i32, ptr %"55", align 4 - store i32 %"18", ptr addrspace(5) %"7", align 4 - %"21" = load i64, ptr addrspace(5) %"4", align 8 - %"46" = inttoptr i64 %"21" to ptr - %"57" = getelementptr inbounds i8, ptr %"46", i64 8 - %"20" = load i32, ptr %"57", align 4 - store i32 %"20", ptr addrspace(5) %"8", align 4 - %"23" = load i32, ptr addrspace(5) %"6", align 4 - %0 = icmp eq i32 %"23", 0 - %1 = call i32 @llvm.ctlz.i32(i32 %"23", i1 true) + store i64 %"14", ptr addrspace(5) %"5", align 8 + %"16" = load i64, ptr addrspace(5) %"4", align 8 + %"43" = inttoptr i64 %"16" to ptr + %"15" = load i32, ptr %"43", align 4 + store i32 %"15", ptr addrspace(5) %"6", align 4 + %"18" = load i64, ptr addrspace(5) %"4", align 8 + %"44" = inttoptr i64 %"18" to ptr + %"54" = getelementptr inbounds i8, ptr %"44", i64 4 + %"17" = load i32, ptr %"54", align 4 + store i32 %"17", ptr addrspace(5) %"7", align 4 + %"20" = load i64, ptr addrspace(5) %"4", align 8 + %"45" = inttoptr i64 %"20" to ptr + %"56" = getelementptr inbounds i8, ptr %"45", i64 8 + %"19" = load i32, ptr %"56", align 4 + store i32 %"19", ptr addrspace(5) %"8", align 4 + %"22" = load i32, ptr addrspace(5) %"6", align 4 + %0 = icmp eq i32 %"22", 0 + %1 = call i32 @llvm.ctlz.i32(i32 %"22", i1 true) %2 = sub i32 31, %1 - %"47" = select i1 %0, i32 -1, i32 %2 - store i32 %"47", ptr addrspace(5) %"9", align 4 - %"25" = load i32, ptr addrspace(5) %"7", align 4 - %3 = icmp eq i32 %"25", 0 - %4 = call i32 @llvm.ctlz.i32(i32 %"25", i1 true) + %"46" = select i1 %0, i32 -1, i32 %2 + store i32 %"46", ptr addrspace(5) %"9", align 4 + %"24" = load i32, ptr addrspace(5) %"7", align 4 + %3 = icmp eq i32 %"24", 0 + %4 = call i32 @llvm.ctlz.i32(i32 %"24", i1 true) %5 = sub i32 31, %4 - %"48" = select i1 %3, i32 -1, i32 %5 - store i32 %"48", ptr addrspace(5) %"10", align 4 - %"27" = load i32, ptr addrspace(5) %"8", align 4 - %6 = icmp eq i32 %"27", 0 - %7 = call i32 @llvm.ctlz.i32(i32 %"27", i1 true) + %"47" = select i1 %3, i32 -1, i32 %5 + store i32 %"47", ptr addrspace(5) %"10", align 4 + %"26" = load i32, ptr addrspace(5) %"8", align 4 + %6 = icmp eq i32 %"26", 0 + %7 = call i32 @llvm.ctlz.i32(i32 %"26", i1 true) %8 = sub i32 31, %7 - %"49" = select i1 %6, i32 -1, i32 %8 - store i32 %"49", ptr addrspace(5) %"11", align 4 - %"28" = load i64, ptr addrspace(5) %"5", align 8 - %"29" = load i32, ptr addrspace(5) %"9", align 4 - %"50" = inttoptr i64 %"28" to ptr - store i32 %"29", ptr %"50", align 4 - %"30" = load i64, ptr addrspace(5) %"5", align 8 - %"31" = load i32, ptr addrspace(5) %"10", align 4 - %"51" = inttoptr i64 %"30" to ptr - %"59" = getelementptr inbounds i8, ptr %"51", i64 4 - store i32 %"31", ptr %"59", align 4 - %"32" = load i64, ptr addrspace(5) %"5", align 8 - %"33" = load i32, ptr addrspace(5) %"11", align 4 - %"52" = inttoptr i64 %"32" to ptr - %"61" = getelementptr inbounds i8, ptr %"52", i64 8 - store i32 %"33", ptr %"61", align 4 + %"48" = select i1 %6, i32 -1, i32 %8 + store i32 %"48", ptr addrspace(5) %"11", align 4 + %"27" = load i64, ptr addrspace(5) %"5", align 8 + %"28" = load i32, ptr addrspace(5) %"9", align 4 + %"49" = inttoptr i64 %"27" to ptr + store i32 %"28", ptr %"49", align 4 + %"29" = load i64, ptr addrspace(5) %"5", align 8 + %"30" = load i32, ptr addrspace(5) %"10", align 4 + %"50" = inttoptr i64 %"29" to ptr + %"58" = getelementptr inbounds i8, ptr %"50", i64 4 + store i32 %"30", ptr %"58", align 4 + %"31" = load i64, ptr addrspace(5) %"5", align 8 + %"32" = load i32, ptr addrspace(5) %"11", align 4 + %"51" = inttoptr i64 %"31" to ptr + %"60" = getelementptr inbounds i8, ptr %"51", i64 8 + store i32 %"32", ptr %"60", align 4 ret void } diff --git a/ptx/src/test/spirv_run/bfind_shiftamt.ll b/ptx/src/test/spirv_run/bfind_shiftamt.ll index 6a3ca72..fd21514 100644 --- a/ptx/src/test/spirv_run/bfind_shiftamt.ll +++ b/ptx/src/test/spirv_run/bfind_shiftamt.ll @@ -1,12 +1,10 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @bfind_shiftamt(ptr addrspace(4) byref(i64) %"42", ptr addrspace(4) byref(i64) %"43") #0 { -"53": +define protected amdgpu_kernel void @bfind_shiftamt(ptr addrspace(4) byref(i64) %"41", ptr addrspace(4) byref(i64) %"42") #0 { +"52": %"12" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"12", align 1 - %"13" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"13", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) @@ -15,53 +13,53 @@ define protected amdgpu_kernel void @bfind_shiftamt(ptr addrspace(4) byref(i64) %"9" = alloca i32, align 4, addrspace(5) %"10" = alloca i32, align 4, addrspace(5) %"11" = alloca i32, align 4, addrspace(5) + %"13" = load i64, ptr addrspace(4) %"41", align 8 + store i64 %"13", ptr addrspace(5) %"4", align 8 %"14" = load i64, ptr addrspace(4) %"42", align 8 - store i64 %"14", ptr addrspace(5) %"4", align 8 - %"15" = load i64, ptr addrspace(4) %"43", align 8 - store i64 %"15", ptr addrspace(5) %"5", align 8 - %"17" = load i64, ptr addrspace(5) %"4", align 8 - %"44" = inttoptr i64 %"17" to ptr - %"16" = load i32, ptr %"44", align 4 - store i32 %"16", ptr addrspace(5) %"6", align 4 - %"19" = load i64, ptr addrspace(5) %"4", align 8 - %"45" = inttoptr i64 %"19" to ptr - %"55" = getelementptr inbounds i8, ptr %"45", i64 4 - %"18" = load i32, ptr %"55", align 4 - store i32 %"18", ptr addrspace(5) %"7", align 4 - %"21" = load i64, ptr addrspace(5) %"4", align 8 - %"46" = inttoptr i64 %"21" to ptr - %"57" = getelementptr inbounds i8, ptr %"46", i64 8 - %"20" = load i32, ptr %"57", align 4 - store i32 %"20", ptr addrspace(5) %"8", align 4 - %"23" = load i32, ptr addrspace(5) %"6", align 4 - %0 = icmp eq i32 %"23", 0 - %1 = call i32 @llvm.ctlz.i32(i32 %"23", i1 true) - %"47" = select i1 %0, i32 -1, i32 %1 - store i32 %"47", ptr addrspace(5) %"9", align 4 - %"25" = load i32, ptr addrspace(5) %"7", align 4 - %2 = icmp eq i32 %"25", 0 - %3 = call i32 @llvm.ctlz.i32(i32 %"25", i1 true) - %"48" = select i1 %2, i32 -1, i32 %3 - store i32 %"48", ptr addrspace(5) %"10", align 4 - %"27" = load i32, ptr addrspace(5) %"8", align 4 - %4 = icmp eq i32 %"27", 0 - %5 = call i32 @llvm.ctlz.i32(i32 %"27", i1 true) - %"49" = select i1 %4, i32 -1, i32 %5 - store i32 %"49", ptr addrspace(5) %"11", align 4 - %"28" = load i64, ptr addrspace(5) %"5", align 8 - %"29" = load i32, ptr addrspace(5) %"9", align 4 - %"50" = inttoptr i64 %"28" to ptr - store i32 %"29", ptr %"50", align 4 - %"30" = load i64, ptr addrspace(5) %"5", align 8 - %"31" = load i32, ptr addrspace(5) %"10", align 4 - %"51" = inttoptr i64 %"30" to ptr - %"59" = getelementptr inbounds i8, ptr %"51", i64 4 - store i32 %"31", ptr %"59", align 4 - %"32" = load i64, ptr addrspace(5) %"5", align 8 - %"33" = load i32, ptr addrspace(5) %"11", align 4 - %"52" = inttoptr i64 %"32" to ptr - %"61" = getelementptr inbounds i8, ptr %"52", i64 8 - store i32 %"33", ptr %"61", align 4 + store i64 %"14", ptr addrspace(5) %"5", align 8 + %"16" = load i64, ptr addrspace(5) %"4", align 8 + %"43" = inttoptr i64 %"16" to ptr + %"15" = load i32, ptr %"43", align 4 + store i32 %"15", ptr addrspace(5) %"6", align 4 + %"18" = load i64, ptr addrspace(5) %"4", align 8 + %"44" = inttoptr i64 %"18" to ptr + %"54" = getelementptr inbounds i8, ptr %"44", i64 4 + %"17" = load i32, ptr %"54", align 4 + store i32 %"17", ptr addrspace(5) %"7", align 4 + %"20" = load i64, ptr addrspace(5) %"4", align 8 + %"45" = inttoptr i64 %"20" to ptr + %"56" = getelementptr inbounds i8, ptr %"45", i64 8 + %"19" = load i32, ptr %"56", align 4 + store i32 %"19", ptr addrspace(5) %"8", align 4 + %"22" = load i32, ptr addrspace(5) %"6", align 4 + %0 = icmp eq i32 %"22", 0 + %1 = call i32 @llvm.ctlz.i32(i32 %"22", i1 true) + %"46" = select i1 %0, i32 -1, i32 %1 + store i32 %"46", ptr addrspace(5) %"9", align 4 + %"24" = load i32, ptr addrspace(5) %"7", align 4 + %2 = icmp eq i32 %"24", 0 + %3 = call i32 @llvm.ctlz.i32(i32 %"24", i1 true) + %"47" = select i1 %2, i32 -1, i32 %3 + store i32 %"47", ptr addrspace(5) %"10", align 4 + %"26" = load i32, ptr addrspace(5) %"8", align 4 + %4 = icmp eq i32 %"26", 0 + %5 = call i32 @llvm.ctlz.i32(i32 %"26", i1 true) + %"48" = select i1 %4, i32 -1, i32 %5 + store i32 %"48", ptr addrspace(5) %"11", align 4 + %"27" = load i64, ptr addrspace(5) %"5", align 8 + %"28" = load i32, ptr addrspace(5) %"9", align 4 + %"49" = inttoptr i64 %"27" to ptr + store i32 %"28", ptr %"49", align 4 + %"29" = load i64, ptr addrspace(5) %"5", align 8 + %"30" = load i32, ptr addrspace(5) %"10", align 4 + %"50" = inttoptr i64 %"29" to ptr + %"58" = getelementptr inbounds i8, ptr %"50", i64 4 + store i32 %"30", ptr %"58", align 4 + %"31" = load i64, ptr addrspace(5) %"5", align 8 + %"32" = load i32, ptr addrspace(5) %"11", align 4 + %"51" = inttoptr i64 %"31" to ptr + %"60" = getelementptr inbounds i8, ptr %"51", i64 8 + store i32 %"32", ptr %"60", align 4 ret void } diff --git a/ptx/src/test/spirv_run/block.ll b/ptx/src/test/spirv_run/block.ll index 87c9374..87dd227 100644 --- a/ptx/src/test/spirv_run/block.ll +++ b/ptx/src/test/spirv_run/block.ll @@ -1,35 +1,33 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @block(ptr addrspace(4) byref(i64) %"23", ptr addrspace(4) byref(i64) %"24") #0 { -"27": +define protected amdgpu_kernel void @block(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #0 { +"26": %"9" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"9", align 1 - %"10" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"10", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) %"8" = alloca i64, align 8, addrspace(5) + %"10" = load i64, ptr addrspace(4) %"22", align 8 + store i64 %"10", ptr addrspace(5) %"4", align 8 %"11" = load i64, ptr addrspace(4) %"23", align 8 - store i64 %"11", ptr addrspace(5) %"4", align 8 - %"12" = load i64, ptr addrspace(4) %"24", align 8 - store i64 %"12", ptr addrspace(5) %"5", align 8 - %"14" = load i64, ptr addrspace(5) %"4", align 8 - %"25" = inttoptr i64 %"14" to ptr - %"13" = load i64, ptr %"25", align 8 - store i64 %"13", ptr addrspace(5) %"6", align 8 - %"16" = load i64, ptr addrspace(5) %"6", align 8 - %"15" = add i64 %"16", 1 - store i64 %"15", ptr addrspace(5) %"7", align 8 - %"18" = load i64, ptr addrspace(5) %"8", align 8 - %"17" = add i64 %"18", 1 - store i64 %"17", ptr addrspace(5) %"8", align 8 - %"19" = load i64, ptr addrspace(5) %"5", align 8 - %"20" = load i64, ptr addrspace(5) %"7", align 8 - %"26" = inttoptr i64 %"19" to ptr - store i64 %"20", ptr %"26", align 8 + store i64 %"11", ptr addrspace(5) %"5", align 8 + %"13" = load i64, ptr addrspace(5) %"4", align 8 + %"24" = inttoptr i64 %"13" to ptr + %"12" = load i64, ptr %"24", align 8 + store i64 %"12", ptr addrspace(5) %"6", align 8 + %"15" = load i64, ptr addrspace(5) %"6", align 8 + %"14" = add i64 %"15", 1 + store i64 %"14", ptr addrspace(5) %"7", align 8 + %"17" = load i64, ptr addrspace(5) %"8", align 8 + %"16" = add i64 %"17", 1 + store i64 %"16", ptr addrspace(5) %"8", align 8 + %"18" = load i64, ptr addrspace(5) %"5", align 8 + %"19" = load i64, ptr addrspace(5) %"7", align 8 + %"25" = inttoptr i64 %"18" to ptr + store i64 %"19", ptr %"25", align 8 ret void } diff --git a/ptx/src/test/spirv_run/bra.ll b/ptx/src/test/spirv_run/bra.ll index 6188dc7..6d62cca 100644 --- a/ptx/src/test/spirv_run/bra.ll +++ b/ptx/src/test/spirv_run/bra.ll @@ -1,43 +1,41 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @bra(ptr addrspace(4) byref(i64) %"25", ptr addrspace(4) byref(i64) %"26") #0 { -"29": +define protected amdgpu_kernel void @bra(ptr addrspace(4) byref(i64) %"24", ptr addrspace(4) byref(i64) %"25") #0 { +"28": %"11" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"11", align 1 - %"12" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"12", align 1 %"7" = alloca i64, align 8, addrspace(5) %"8" = alloca i64, align 8, addrspace(5) %"9" = alloca i64, align 8, addrspace(5) %"10" = alloca i64, align 8, addrspace(5) + %"12" = load i64, ptr addrspace(4) %"24", align 8 + store i64 %"12", ptr addrspace(5) %"7", align 8 %"13" = load i64, ptr addrspace(4) %"25", align 8 - store i64 %"13", ptr addrspace(5) %"7", align 8 - %"14" = load i64, ptr addrspace(4) %"26", align 8 - store i64 %"14", ptr addrspace(5) %"8", align 8 - %"16" = load i64, ptr addrspace(5) %"7", align 8 - %"27" = inttoptr i64 %"16" to ptr - %"15" = load i64, ptr %"27", align 8 - store i64 %"15", ptr addrspace(5) %"9", align 8 + store i64 %"13", ptr addrspace(5) %"8", align 8 + %"15" = load i64, ptr addrspace(5) %"7", align 8 + %"26" = inttoptr i64 %"15" to ptr + %"14" = load i64, ptr %"26", align 8 + store i64 %"14", ptr addrspace(5) %"9", align 8 br label %"4" -"4": ; preds = %"29" - %"18" = load i64, ptr addrspace(5) %"9", align 8 - %"17" = add i64 %"18", 1 - store i64 %"17", ptr addrspace(5) %"10", align 8 +"4": ; preds = %"28" + %"17" = load i64, ptr addrspace(5) %"9", align 8 + %"16" = add i64 %"17", 1 + store i64 %"16", ptr addrspace(5) %"10", align 8 br label %"6" 0: ; No predecessors! - %"20" = load i64, ptr addrspace(5) %"9", align 8 - %"19" = add i64 %"20", 2 - store i64 %"19", ptr addrspace(5) %"10", align 8 + %"19" = load i64, ptr addrspace(5) %"9", align 8 + %"18" = add i64 %"19", 2 + store i64 %"18", ptr addrspace(5) %"10", align 8 br label %"6" "6": ; preds = %0, %"4" - %"21" = load i64, ptr addrspace(5) %"8", align 8 - %"22" = load i64, ptr addrspace(5) %"10", align 8 - %"28" = inttoptr i64 %"21" to ptr - store i64 %"22", ptr %"28", align 8 + %"20" = load i64, ptr addrspace(5) %"8", align 8 + %"21" = load i64, ptr addrspace(5) %"10", align 8 + %"27" = inttoptr i64 %"20" to ptr + store i64 %"21", ptr %"27", align 8 ret void } diff --git a/ptx/src/test/spirv_run/brev.ll b/ptx/src/test/spirv_run/brev.ll index e43d1c6..a519c2b 100644 --- a/ptx/src/test/spirv_run/brev.ll +++ b/ptx/src/test/spirv_run/brev.ll @@ -1,30 +1,28 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @brev(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 { -"21": +define protected amdgpu_kernel void @brev(ptr addrspace(4) byref(i64) %"16", ptr addrspace(4) byref(i64) %"17") #0 { +"20": %"7" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"7", align 1 - %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) + %"8" = load i64, ptr addrspace(4) %"16", align 8 + store i64 %"8", ptr addrspace(5) %"4", align 8 %"9" = load i64, ptr addrspace(4) %"17", align 8 - store i64 %"9", ptr addrspace(5) %"4", align 8 - %"10" = load i64, ptr addrspace(4) %"18", align 8 - store i64 %"10", ptr addrspace(5) %"5", align 8 - %"12" = load i64, ptr addrspace(5) %"4", align 8 - %"19" = inttoptr i64 %"12" to ptr - %"11" = load i32, ptr %"19", align 4 - store i32 %"11", ptr addrspace(5) %"6", align 4 - %"14" = load i32, ptr addrspace(5) %"6", align 4 - %"13" = call i32 @llvm.bitreverse.i32(i32 %"14") - store i32 %"13", ptr addrspace(5) %"6", align 4 - %"15" = load i64, ptr addrspace(5) %"5", align 8 - %"16" = load i32, ptr addrspace(5) %"6", align 4 - %"20" = inttoptr i64 %"15" to ptr - store i32 %"16", ptr %"20", align 4 + store i64 %"9", ptr addrspace(5) %"5", align 8 + %"11" = load i64, ptr addrspace(5) %"4", align 8 + %"18" = inttoptr i64 %"11" to ptr + %"10" = load i32, ptr %"18", align 4 + store i32 %"10", ptr addrspace(5) %"6", align 4 + %"13" = load i32, ptr addrspace(5) %"6", align 4 + %"12" = call i32 @llvm.bitreverse.i32(i32 %"13") + store i32 %"12", ptr addrspace(5) %"6", align 4 + %"14" = load i64, ptr addrspace(5) %"5", align 8 + %"15" = load i32, ptr addrspace(5) %"6", align 4 + %"19" = inttoptr i64 %"14" to ptr + store i32 %"15", ptr %"19", align 4 ret void } diff --git a/ptx/src/test/spirv_run/call.ll b/ptx/src/test/spirv_run/call.ll index af26549..d89322e 100644 --- a/ptx/src/test/spirv_run/call.ll +++ b/ptx/src/test/spirv_run/call.ll @@ -1,63 +1,59 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define private i64 @incr(i64 %"31") #0 { -"51": +define private i64 @incr(i64 %"29") #0 { +"49": %"18" = alloca i64, align 8, addrspace(5) %"17" = alloca i64, align 8, addrspace(5) - %"21" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"21", align 1 - %"22" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"22", align 1 - %"44" = alloca i64, align 8, addrspace(5) - %"45" = alloca i64, align 8, addrspace(5) + %"20" = alloca i1, align 1, addrspace(5) + store i1 false, ptr addrspace(5) %"20", align 1 + %"42" = alloca i64, align 8, addrspace(5) + %"43" = alloca i64, align 8, addrspace(5) %"14" = alloca i64, align 8, addrspace(5) - store i64 %"31", ptr addrspace(5) %"18", align 8 - %"32" = load i64, ptr addrspace(5) %"18", align 8 - store i64 %"32", ptr addrspace(5) %"45", align 8 - %"33" = load i64, ptr addrspace(5) %"45", align 8 - store i64 %"33", ptr addrspace(5) %"14", align 8 - %"35" = load i64, ptr addrspace(5) %"14", align 8 - %"34" = add i64 %"35", 1 - store i64 %"34", ptr addrspace(5) %"14", align 8 - %"36" = load i64, ptr addrspace(5) %"14", align 8 - store i64 %"36", ptr addrspace(5) %"44", align 8 - %"37" = load i64, ptr addrspace(5) %"44", align 8 - store i64 %"37", ptr addrspace(5) %"17", align 8 - %"38" = load i64, ptr addrspace(5) %"17", align 8 - ret i64 %"38" + store i64 %"29", ptr addrspace(5) %"18", align 8 + %"30" = load i64, ptr addrspace(5) %"18", align 8 + store i64 %"30", ptr addrspace(5) %"43", align 8 + %"31" = load i64, ptr addrspace(5) %"43", align 8 + store i64 %"31", ptr addrspace(5) %"14", align 8 + %"33" = load i64, ptr addrspace(5) %"14", align 8 + %"32" = add i64 %"33", 1 + store i64 %"32", ptr addrspace(5) %"14", align 8 + %"34" = load i64, ptr addrspace(5) %"14", align 8 + store i64 %"34", ptr addrspace(5) %"42", align 8 + %"35" = load i64, ptr addrspace(5) %"42", align 8 + store i64 %"35", ptr addrspace(5) %"17", align 8 + %"36" = load i64, ptr addrspace(5) %"17", align 8 + ret i64 %"36" } -define protected amdgpu_kernel void @call(ptr addrspace(4) byref(i64) %"40", ptr addrspace(4) byref(i64) %"41") #0 { -"50": +define protected amdgpu_kernel void @call(ptr addrspace(4) byref(i64) %"38", ptr addrspace(4) byref(i64) %"39") #0 { +"48": %"19" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"19", align 1 - %"20" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"20", align 1 %"7" = alloca i64, align 8, addrspace(5) %"8" = alloca i64, align 8, addrspace(5) %"9" = alloca i64, align 8, addrspace(5) - %"42" = alloca i64, align 8, addrspace(5) - %"43" = alloca i64, align 8, addrspace(5) - %"23" = load i64, ptr addrspace(4) %"40", align 8 - store i64 %"23", ptr addrspace(5) %"7", align 8 - %"24" = load i64, ptr addrspace(4) %"41", align 8 - store i64 %"24", ptr addrspace(5) %"8", align 8 - %"26" = load i64, ptr addrspace(5) %"7", align 8 - %"46" = inttoptr i64 %"26" to ptr addrspace(1) - %"25" = load i64, ptr addrspace(1) %"46", align 8 - store i64 %"25", ptr addrspace(5) %"9", align 8 - %"27" = load i64, ptr addrspace(5) %"9", align 8 - store i64 %"27", ptr addrspace(5) %"42", align 8 - %"15" = load i64, ptr addrspace(5) %"42", align 8 + %"40" = alloca i64, align 8, addrspace(5) + %"41" = alloca i64, align 8, addrspace(5) + %"21" = load i64, ptr addrspace(4) %"38", align 8 + store i64 %"21", ptr addrspace(5) %"7", align 8 + %"22" = load i64, ptr addrspace(4) %"39", align 8 + store i64 %"22", ptr addrspace(5) %"8", align 8 + %"24" = load i64, ptr addrspace(5) %"7", align 8 + %"44" = inttoptr i64 %"24" to ptr addrspace(1) + %"23" = load i64, ptr addrspace(1) %"44", align 8 + store i64 %"23", ptr addrspace(5) %"9", align 8 + %"25" = load i64, ptr addrspace(5) %"9", align 8 + store i64 %"25", ptr addrspace(5) %"40", align 8 + %"15" = load i64, ptr addrspace(5) %"40", align 8 %"16" = call i64 @incr(i64 %"15") - store i64 %"16", ptr addrspace(5) %"43", align 8 - %"28" = load i64, ptr addrspace(5) %"43", align 8 - store i64 %"28", ptr addrspace(5) %"9", align 8 - %"29" = load i64, ptr addrspace(5) %"8", align 8 - %"30" = load i64, ptr addrspace(5) %"9", align 8 - %"49" = inttoptr i64 %"29" to ptr addrspace(1) - store i64 %"30", ptr addrspace(1) %"49", align 8 + store i64 %"16", ptr addrspace(5) %"41", align 8 + %"26" = load i64, ptr addrspace(5) %"41", align 8 + store i64 %"26", ptr addrspace(5) %"9", align 8 + %"27" = load i64, ptr addrspace(5) %"8", align 8 + %"28" = load i64, ptr addrspace(5) %"9", align 8 + %"47" = inttoptr i64 %"27" to ptr addrspace(1) + store i64 %"28", ptr addrspace(1) %"47", align 8 ret void } diff --git a/ptx/src/test/spirv_run/call_bug.ll b/ptx/src/test/spirv_run/call_bug.ll index 749b2b6..3ad9146 100644 --- a/ptx/src/test/spirv_run/call_bug.ll +++ b/ptx/src/test/spirv_run/call_bug.ll @@ -1,68 +1,64 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define private [2 x i32] @incr(i64 %"23") #0 { -"58": +define private [2 x i32] @incr(i64 %"21") #0 { +"56": %"16" = alloca i64, align 8, addrspace(5) %"15" = alloca [2 x i32], align 4, addrspace(5) %"19" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"19", align 1 - %"20" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"20", align 1 - %"44" = alloca [2 x i32], align 4, addrspace(5) - %"45" = alloca i64, align 8, addrspace(5) + %"42" = alloca [2 x i32], align 4, addrspace(5) + %"43" = alloca i64, align 8, addrspace(5) %"4" = alloca i64, align 8, addrspace(5) - store i64 %"23", ptr addrspace(5) %"16", align 8 - %"24" = load i64, ptr addrspace(5) %"16", align 8 - store i64 %"24", ptr addrspace(5) %"45", align 8 - %"25" = load i64, ptr addrspace(5) %"45", align 8 - store i64 %"25", ptr addrspace(5) %"4", align 8 - %"27" = load i64, ptr addrspace(5) %"4", align 8 - %"26" = add i64 %"27", 1 - store i64 %"26", ptr addrspace(5) %"4", align 8 - %"28" = load i64, ptr addrspace(5) %"4", align 8 - store i64 %"28", ptr addrspace(5) %"44", align 8 - %"29" = load [2 x i32], ptr addrspace(5) %"44", align 4 - store [2 x i32] %"29", ptr addrspace(5) %"15", align 4 - %"30" = load [2 x i32], ptr addrspace(5) %"15", align 4 - ret [2 x i32] %"30" + store i64 %"21", ptr addrspace(5) %"16", align 8 + %"22" = load i64, ptr addrspace(5) %"16", align 8 + store i64 %"22", ptr addrspace(5) %"43", align 8 + %"23" = load i64, ptr addrspace(5) %"43", align 8 + store i64 %"23", ptr addrspace(5) %"4", align 8 + %"25" = load i64, ptr addrspace(5) %"4", align 8 + %"24" = add i64 %"25", 1 + store i64 %"24", ptr addrspace(5) %"4", align 8 + %"26" = load i64, ptr addrspace(5) %"4", align 8 + store i64 %"26", ptr addrspace(5) %"42", align 8 + %"27" = load [2 x i32], ptr addrspace(5) %"42", align 4 + store [2 x i32] %"27", ptr addrspace(5) %"15", align 4 + %"28" = load [2 x i32], ptr addrspace(5) %"15", align 4 + ret [2 x i32] %"28" } -define protected amdgpu_kernel void @call_bug(ptr addrspace(4) byref(i64) %"46", ptr addrspace(4) byref(i64) %"47") #0 { -"59": - %"21" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"21", align 1 - %"22" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"22", align 1 +define protected amdgpu_kernel void @call_bug(ptr addrspace(4) byref(i64) %"44", ptr addrspace(4) byref(i64) %"45") #0 { +"57": + %"20" = alloca i1, align 1, addrspace(5) + store i1 false, ptr addrspace(5) %"20", align 1 %"8" = alloca i64, align 8, addrspace(5) %"9" = alloca i64, align 8, addrspace(5) %"10" = alloca i64, align 8, addrspace(5) %"11" = alloca i64, align 8, addrspace(5) - %"48" = alloca i64, align 8, addrspace(5) - %"49" = alloca [2 x i32], align 4, addrspace(5) - %"31" = load i64, ptr addrspace(4) %"46", align 8 - store i64 %"31", ptr addrspace(5) %"8", align 8 - %"32" = load i64, ptr addrspace(4) %"47", align 8 - store i64 %"32", ptr addrspace(5) %"9", align 8 - %"34" = load i64, ptr addrspace(5) %"8", align 8 - %"52" = inttoptr i64 %"34" to ptr addrspace(1) - %"33" = load i64, ptr addrspace(1) %"52", align 8 - store i64 %"33", ptr addrspace(5) %"10", align 8 - %"35" = load i64, ptr addrspace(5) %"10", align 8 - store i64 %"35", ptr addrspace(5) %"48", align 8 + %"46" = alloca i64, align 8, addrspace(5) + %"47" = alloca [2 x i32], align 4, addrspace(5) + %"29" = load i64, ptr addrspace(4) %"44", align 8 + store i64 %"29", ptr addrspace(5) %"8", align 8 + %"30" = load i64, ptr addrspace(4) %"45", align 8 + store i64 %"30", ptr addrspace(5) %"9", align 8 + %"32" = load i64, ptr addrspace(5) %"8", align 8 + %"50" = inttoptr i64 %"32" to ptr addrspace(1) + %"31" = load i64, ptr addrspace(1) %"50", align 8 + store i64 %"31", ptr addrspace(5) %"10", align 8 + %"33" = load i64, ptr addrspace(5) %"10", align 8 + store i64 %"33", ptr addrspace(5) %"46", align 8 store i64 ptrtoint (ptr @incr to i64), ptr addrspace(5) %"11", align 8 - %"17" = load i64, ptr addrspace(5) %"48", align 8 - %"37" = load i64, ptr addrspace(5) %"11", align 8 - %0 = inttoptr i64 %"37" to ptr + %"17" = load i64, ptr addrspace(5) %"46", align 8 + %"35" = load i64, ptr addrspace(5) %"11", align 8 + %0 = inttoptr i64 %"35" to ptr %"18" = call [2 x i32] %0(i64 %"17") - store [2 x i32] %"18", ptr addrspace(5) %"49", align 4 - %"61" = getelementptr inbounds i8, ptr addrspace(5) %"49", i64 0 - %"38" = load i64, ptr addrspace(5) %"61", align 8 - store i64 %"38", ptr addrspace(5) %"10", align 8 - %"39" = load i64, ptr addrspace(5) %"9", align 8 - %"40" = load i64, ptr addrspace(5) %"10", align 8 - %"57" = inttoptr i64 %"39" to ptr addrspace(1) - store i64 %"40", ptr addrspace(1) %"57", align 8 + store [2 x i32] %"18", ptr addrspace(5) %"47", align 4 + %"59" = getelementptr inbounds i8, ptr addrspace(5) %"47", i64 0 + %"36" = load i64, ptr addrspace(5) %"59", align 8 + store i64 %"36", ptr addrspace(5) %"10", align 8 + %"37" = load i64, ptr addrspace(5) %"9", align 8 + %"38" = load i64, ptr addrspace(5) %"10", align 8 + %"55" = inttoptr i64 %"37" to ptr addrspace(1) + store i64 %"38", ptr addrspace(1) %"55", align 8 ret void } diff --git a/ptx/src/test/spirv_run/call_multi_return.ll b/ptx/src/test/spirv_run/call_multi_return.ll index a6cb883..35cc5e0 100644 --- a/ptx/src/test/spirv_run/call_multi_return.ll +++ b/ptx/src/test/spirv_run/call_multi_return.ll @@ -3,43 +3,39 @@ target triple = "amdgcn-amd-amdhsa" %struct.i64i32 = type { i64, i32 } -define private %struct.i64i32 @"1"(i32 %"41", i32 %"42") #0 { -"64": +define private %struct.i64i32 @"1"(i32 %"39", i32 %"40") #0 { +"62": %"18" = alloca i32, align 4, addrspace(5) %"19" = alloca i32, align 4, addrspace(5) %"16" = alloca i64, align 8, addrspace(5) %"17" = alloca i32, align 4, addrspace(5) - %"23" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"23", align 1 - %"24" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"24", align 1 + %"22" = alloca i1, align 1, addrspace(5) + store i1 false, ptr addrspace(5) %"22", align 1 %"20" = alloca i32, align 4, addrspace(5) - store i32 %"41", ptr addrspace(5) %"18", align 4 - store i32 %"42", ptr addrspace(5) %"19", align 4 - %"44" = load i32, ptr addrspace(5) %"18", align 4 - %"45" = load i32, ptr addrspace(5) %"19", align 4 - %"43" = add i32 %"44", %"45" - store i32 %"43", ptr addrspace(5) %"20", align 4 - %"47" = load i32, ptr addrspace(5) %"20", align 4 - %"46" = zext i32 %"47" to i64 - store i64 %"46", ptr addrspace(5) %"16", align 8 - %"49" = load i32, ptr addrspace(5) %"18", align 4 - %"50" = load i32, ptr addrspace(5) %"19", align 4 - %"48" = mul i32 %"49", %"50" - store i32 %"48", ptr addrspace(5) %"17", align 4 - %"51" = load i64, ptr addrspace(5) %"16", align 8 - %"52" = load i32, ptr addrspace(5) %"17", align 4 - %0 = insertvalue %struct.i64i32 undef, i64 %"51", 0 - %1 = insertvalue %struct.i64i32 %0, i32 %"52", 1 + store i32 %"39", ptr addrspace(5) %"18", align 4 + store i32 %"40", ptr addrspace(5) %"19", align 4 + %"42" = load i32, ptr addrspace(5) %"18", align 4 + %"43" = load i32, ptr addrspace(5) %"19", align 4 + %"41" = add i32 %"42", %"43" + store i32 %"41", ptr addrspace(5) %"20", align 4 + %"45" = load i32, ptr addrspace(5) %"20", align 4 + %"44" = zext i32 %"45" to i64 + store i64 %"44", ptr addrspace(5) %"16", align 8 + %"47" = load i32, ptr addrspace(5) %"18", align 4 + %"48" = load i32, ptr addrspace(5) %"19", align 4 + %"46" = mul i32 %"47", %"48" + store i32 %"46", ptr addrspace(5) %"17", align 4 + %"49" = load i64, ptr addrspace(5) %"16", align 8 + %"50" = load i32, ptr addrspace(5) %"17", align 4 + %0 = insertvalue %struct.i64i32 undef, i64 %"49", 0 + %1 = insertvalue %struct.i64i32 %0, i32 %"50", 1 ret %struct.i64i32 %1 } -define protected amdgpu_kernel void @call_multi_return(ptr addrspace(4) byref(i64) %"57", ptr addrspace(4) byref(i64) %"58") #0 { -"63": +define protected amdgpu_kernel void @call_multi_return(ptr addrspace(4) byref(i64) %"55", ptr addrspace(4) byref(i64) %"56") #0 { +"61": %"21" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"21", align 1 - %"22" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"22", align 1 %"9" = alloca i64, align 8, addrspace(5) %"10" = alloca i64, align 8, addrspace(5) %"11" = alloca i32, align 4, addrspace(5) @@ -47,38 +43,38 @@ define protected amdgpu_kernel void @call_multi_return(ptr addrspace(4) byref(i6 %"13" = alloca i64, align 8, addrspace(5) %"14" = alloca i64, align 8, addrspace(5) %"15" = alloca i32, align 4, addrspace(5) - %"25" = load i64, ptr addrspace(4) %"57", align 8 - store i64 %"25", ptr addrspace(5) %"9", align 8 - %"26" = load i64, ptr addrspace(4) %"58", align 8 - store i64 %"26", ptr addrspace(5) %"10", align 8 + %"23" = load i64, ptr addrspace(4) %"55", align 8 + store i64 %"23", ptr addrspace(5) %"9", align 8 + %"24" = load i64, ptr addrspace(4) %"56", align 8 + store i64 %"24", ptr addrspace(5) %"10", align 8 + %"26" = load i64, ptr addrspace(5) %"9", align 8 + %"57" = inttoptr i64 %"26" to ptr addrspace(1) + %"25" = load i32, ptr addrspace(1) %"57", align 4 + store i32 %"25", ptr addrspace(5) %"11", align 4 %"28" = load i64, ptr addrspace(5) %"9", align 8 - %"59" = inttoptr i64 %"28" to ptr addrspace(1) - %"27" = load i32, ptr addrspace(1) %"59", align 4 - store i32 %"27", ptr addrspace(5) %"11", align 4 - %"30" = load i64, ptr addrspace(5) %"9", align 8 - %"60" = inttoptr i64 %"30" to ptr addrspace(1) - %"66" = getelementptr inbounds i8, ptr addrspace(1) %"60", i64 4 - %"29" = load i32, ptr addrspace(1) %"66", align 4 - store i32 %"29", ptr addrspace(5) %"12", align 4 - %"33" = load i32, ptr addrspace(5) %"11", align 4 - %"34" = load i32, ptr addrspace(5) %"12", align 4 - %0 = call %struct.i64i32 @"1"(i32 %"33", i32 %"34") - %"31" = extractvalue %struct.i64i32 %0, 0 - %"32" = extractvalue %struct.i64i32 %0, 1 - store i64 %"31", ptr addrspace(5) %"13", align 8 - store i32 %"32", ptr addrspace(5) %"15", align 4 - %"36" = load i32, ptr addrspace(5) %"15", align 4 - %"35" = zext i32 %"36" to i64 - store i64 %"35", ptr addrspace(5) %"14", align 8 + %"58" = inttoptr i64 %"28" to ptr addrspace(1) + %"64" = getelementptr inbounds i8, ptr addrspace(1) %"58", i64 4 + %"27" = load i32, ptr addrspace(1) %"64", align 4 + store i32 %"27", ptr addrspace(5) %"12", align 4 + %"31" = load i32, ptr addrspace(5) %"11", align 4 + %"32" = load i32, ptr addrspace(5) %"12", align 4 + %0 = call %struct.i64i32 @"1"(i32 %"31", i32 %"32") + %"29" = extractvalue %struct.i64i32 %0, 0 + %"30" = extractvalue %struct.i64i32 %0, 1 + store i64 %"29", ptr addrspace(5) %"13", align 8 + store i32 %"30", ptr addrspace(5) %"15", align 4 + %"34" = load i32, ptr addrspace(5) %"15", align 4 + %"33" = zext i32 %"34" to i64 + store i64 %"33", ptr addrspace(5) %"14", align 8 + %"35" = load i64, ptr addrspace(5) %"10", align 8 + %"36" = load i64, ptr addrspace(5) %"13", align 8 + %"59" = inttoptr i64 %"35" to ptr addrspace(1) + store i64 %"36", ptr addrspace(1) %"59", align 8 %"37" = load i64, ptr addrspace(5) %"10", align 8 - %"38" = load i64, ptr addrspace(5) %"13", align 8 - %"61" = inttoptr i64 %"37" to ptr addrspace(1) - store i64 %"38", ptr addrspace(1) %"61", align 8 - %"39" = load i64, ptr addrspace(5) %"10", align 8 - %"40" = load i64, ptr addrspace(5) %"14", align 8 - %"62" = inttoptr i64 %"39" to ptr addrspace(1) - %"68" = getelementptr inbounds i8, ptr addrspace(1) %"62", i64 8 - store i64 %"40", ptr addrspace(1) %"68", align 8 + %"38" = load i64, ptr addrspace(5) %"14", align 8 + %"60" = inttoptr i64 %"37" to ptr addrspace(1) + %"66" = getelementptr inbounds i8, ptr addrspace(1) %"60", i64 8 + store i64 %"38", ptr addrspace(1) %"66", align 8 ret void } diff --git a/ptx/src/test/spirv_run/callprototype.ll b/ptx/src/test/spirv_run/callprototype.ll index 84e5987..be431ea 100644 --- a/ptx/src/test/spirv_run/callprototype.ll +++ b/ptx/src/test/spirv_run/callprototype.ll @@ -1,67 +1,63 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define private i64 @incr(i64 %"35") #0 { -"56": +define private i64 @incr(i64 %"33") #0 { +"54": %"20" = alloca i64, align 8, addrspace(5) %"19" = alloca i64, align 8, addrspace(5) - %"23" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"23", align 1 - %"24" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"24", align 1 - %"48" = alloca i64, align 8, addrspace(5) - %"49" = alloca i64, align 8, addrspace(5) + %"22" = alloca i1, align 1, addrspace(5) + store i1 false, ptr addrspace(5) %"22", align 1 + %"46" = alloca i64, align 8, addrspace(5) + %"47" = alloca i64, align 8, addrspace(5) %"16" = alloca i64, align 8, addrspace(5) - store i64 %"35", ptr addrspace(5) %"20", align 8 - %"36" = load i64, ptr addrspace(5) %"20", align 8 - store i64 %"36", ptr addrspace(5) %"49", align 8 - %"37" = load i64, ptr addrspace(5) %"49", align 8 - store i64 %"37", ptr addrspace(5) %"16", align 8 - %"39" = load i64, ptr addrspace(5) %"16", align 8 - %"38" = add i64 %"39", 1 - store i64 %"38", ptr addrspace(5) %"16", align 8 - %"40" = load i64, ptr addrspace(5) %"16", align 8 - store i64 %"40", ptr addrspace(5) %"48", align 8 - %"41" = load i64, ptr addrspace(5) %"48", align 8 - store i64 %"41", ptr addrspace(5) %"19", align 8 - %"42" = load i64, ptr addrspace(5) %"19", align 8 - ret i64 %"42" + store i64 %"33", ptr addrspace(5) %"20", align 8 + %"34" = load i64, ptr addrspace(5) %"20", align 8 + store i64 %"34", ptr addrspace(5) %"47", align 8 + %"35" = load i64, ptr addrspace(5) %"47", align 8 + store i64 %"35", ptr addrspace(5) %"16", align 8 + %"37" = load i64, ptr addrspace(5) %"16", align 8 + %"36" = add i64 %"37", 1 + store i64 %"36", ptr addrspace(5) %"16", align 8 + %"38" = load i64, ptr addrspace(5) %"16", align 8 + store i64 %"38", ptr addrspace(5) %"46", align 8 + %"39" = load i64, ptr addrspace(5) %"46", align 8 + store i64 %"39", ptr addrspace(5) %"19", align 8 + %"40" = load i64, ptr addrspace(5) %"19", align 8 + ret i64 %"40" } -define protected amdgpu_kernel void @callprototype(ptr addrspace(4) byref(i64) %"44", ptr addrspace(4) byref(i64) %"45") #0 { -"55": +define protected amdgpu_kernel void @callprototype(ptr addrspace(4) byref(i64) %"42", ptr addrspace(4) byref(i64) %"43") #0 { +"53": %"21" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"21", align 1 - %"22" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"22", align 1 %"7" = alloca i64, align 8, addrspace(5) %"8" = alloca i64, align 8, addrspace(5) %"9" = alloca i64, align 8, addrspace(5) %"10" = alloca i64, align 8, addrspace(5) - %"46" = alloca i64, align 8, addrspace(5) - %"47" = alloca i64, align 8, addrspace(5) - %"25" = load i64, ptr addrspace(4) %"44", align 8 - store i64 %"25", ptr addrspace(5) %"7", align 8 - %"26" = load i64, ptr addrspace(4) %"45", align 8 - store i64 %"26", ptr addrspace(5) %"8", align 8 - %"28" = load i64, ptr addrspace(5) %"7", align 8 - %"50" = inttoptr i64 %"28" to ptr addrspace(1) - %"27" = load i64, ptr addrspace(1) %"50", align 8 - store i64 %"27", ptr addrspace(5) %"9", align 8 - %"29" = load i64, ptr addrspace(5) %"9", align 8 - store i64 %"29", ptr addrspace(5) %"46", align 8 + %"44" = alloca i64, align 8, addrspace(5) + %"45" = alloca i64, align 8, addrspace(5) + %"23" = load i64, ptr addrspace(4) %"42", align 8 + store i64 %"23", ptr addrspace(5) %"7", align 8 + %"24" = load i64, ptr addrspace(4) %"43", align 8 + store i64 %"24", ptr addrspace(5) %"8", align 8 + %"26" = load i64, ptr addrspace(5) %"7", align 8 + %"48" = inttoptr i64 %"26" to ptr addrspace(1) + %"25" = load i64, ptr addrspace(1) %"48", align 8 + store i64 %"25", ptr addrspace(5) %"9", align 8 + %"27" = load i64, ptr addrspace(5) %"9", align 8 + store i64 %"27", ptr addrspace(5) %"44", align 8 store i64 ptrtoint (ptr @incr to i64), ptr addrspace(5) %"10", align 8 - %"17" = load i64, ptr addrspace(5) %"46", align 8 - %"31" = load i64, ptr addrspace(5) %"10", align 8 - %0 = inttoptr i64 %"31" to ptr + %"17" = load i64, ptr addrspace(5) %"44", align 8 + %"29" = load i64, ptr addrspace(5) %"10", align 8 + %0 = inttoptr i64 %"29" to ptr %"18" = call i64 %0(i64 %"17") - store i64 %"18", ptr addrspace(5) %"47", align 8 - %"32" = load i64, ptr addrspace(5) %"47", align 8 - store i64 %"32", ptr addrspace(5) %"9", align 8 - %"33" = load i64, ptr addrspace(5) %"8", align 8 - %"34" = load i64, ptr addrspace(5) %"9", align 8 - %"54" = inttoptr i64 %"33" to ptr addrspace(1) - store i64 %"34", ptr addrspace(1) %"54", align 8 + store i64 %"18", ptr addrspace(5) %"45", align 8 + %"30" = load i64, ptr addrspace(5) %"45", align 8 + store i64 %"30", ptr addrspace(5) %"9", align 8 + %"31" = load i64, ptr addrspace(5) %"8", align 8 + %"32" = load i64, ptr addrspace(5) %"9", align 8 + %"52" = inttoptr i64 %"31" to ptr addrspace(1) + store i64 %"32", ptr addrspace(1) %"52", align 8 ret void } diff --git a/ptx/src/test/spirv_run/carry_mixed.ll b/ptx/src/test/spirv_run/carry_mixed.ll deleted file mode 100644 index c33cc5e..0000000 --- a/ptx/src/test/spirv_run/carry_mixed.ll +++ /dev/null @@ -1,51 +0,0 @@ -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" -target triple = "amdgcn-amd-amdhsa" - -define protected amdgpu_kernel void @carry_mixed(ptr addrspace(4) byref(i64) %"34", ptr addrspace(4) byref(i64) %"35") #0 { -"44": - %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 - %"10" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"10", align 1 - %"4" = alloca i64, align 8, addrspace(5) - %"5" = alloca i64, align 8, addrspace(5) - %"6" = alloca i32, align 4, addrspace(5) - %"7" = alloca i32, align 4, addrspace(5) - %"8" = alloca i32, align 4, addrspace(5) - %"11" = load i64, ptr addrspace(4) %"35", align 8 - store i64 %"11", ptr addrspace(5) %"5", align 8 - %0 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 0, i32 1) - %"36" = extractvalue { i32, i1 } %0, 0 - %"13" = extractvalue { i32, i1 } %0, 1 - store i32 %"36", ptr addrspace(5) %"6", align 4 - store i1 %"13", ptr addrspace(5) %"10", align 1 - %"15" = load i1, ptr addrspace(5) %"10", align 1 - %1 = zext i1 %"15" to i32 - %"37" = sub i32 2, %1 - store i32 %"37", ptr addrspace(5) %"7", align 4 - %2 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 0, i32 1) - %"38" = extractvalue { i32, i1 } %2, 0 - %"17" = extractvalue { i32, i1 } %2, 1 - store i32 %"38", ptr addrspace(5) %"6", align 4 - store i1 %"17", ptr addrspace(5) %"10", align 1 - %"19" = load i1, ptr addrspace(5) %"9", align 1 - %3 = zext i1 %"19" to i32 - %"39" = add i32 1, %3 - store i32 %"39", ptr addrspace(5) %"8", align 4 - %"20" = load i64, ptr addrspace(5) %"5", align 8 - %"21" = load i32, ptr addrspace(5) %"7", align 4 - %"40" = inttoptr i64 %"20" to ptr - store i32 %"21", ptr %"40", align 4 - %"22" = load i64, ptr addrspace(5) %"5", align 8 - %"23" = load i32, ptr addrspace(5) %"8", align 4 - %"42" = inttoptr i64 %"22" to ptr - %"46" = getelementptr inbounds i8, ptr %"42", i64 4 - store i32 %"23", ptr %"46", align 4 - ret void -} - -; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn -declare { i32, i1 } @llvm.usub.with.overflow.i32(i32, i32) #1 - -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="ieee,ieee" "denormal-fp-math-f32"="ieee,ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { nocallback nofree nosync nounwind readnone speculatable willreturn } diff --git a/ptx/src/test/spirv_run/carry_mixed.ptx b/ptx/src/test/spirv_run/carry_mixed.ptx deleted file mode 100644 index b4f2caa..0000000 --- a/ptx/src/test/spirv_run/carry_mixed.ptx +++ /dev/null @@ -1,32 +0,0 @@ -.version 6.5 -.target sm_30 -.address_size 64 - -.visible .entry carry_mixed( - .param .u64 input, - .param .u64 output -) -{ - .reg .u64 in_addr; - .reg .u64 out_addr; - .reg .b32 unused; - - .reg .b32 carry_out_1; - .reg .b32 carry_out_2; - - ld.param.u64 out_addr, [output]; - - // set carry with sub - sub.cc.s32 unused, 0, 1; - // write carry with sub - subc.s32 carry_out_1, 2, 0; - - // set carry with sub - sub.cc.s32 unused, 0, 1; - // fail writing carry with add - addc.s32 carry_out_2, 1, 0; - - st.s32 [out_addr], carry_out_1; - st.s32 [out_addr+4], carry_out_2; - ret; -} diff --git a/ptx/src/test/spirv_run/carry_set_all.ll b/ptx/src/test/spirv_run/carry_set_all.ll new file mode 100644 index 0000000..8b412c1 --- /dev/null +++ b/ptx/src/test/spirv_run/carry_set_all.ll @@ -0,0 +1,257 @@ +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" +target triple = "amdgcn-amd-amdhsa" + +define protected amdgpu_kernel void @carry_set_all(ptr addrspace(4) byref(i64) %"208", ptr addrspace(4) byref(i64) %"209") #0 { +"268": + %"22" = alloca i1, align 1, addrspace(5) + store i1 false, ptr addrspace(5) %"22", align 1 + %"4" = alloca i64, align 8, addrspace(5) + %"5" = alloca i64, align 8, addrspace(5) + %"6" = alloca i32, align 4, addrspace(5) + %"7" = alloca i32, align 4, addrspace(5) + %"8" = alloca i32, align 4, addrspace(5) + %"9" = alloca i32, align 4, addrspace(5) + %"10" = alloca i32, align 4, addrspace(5) + %"11" = alloca i32, align 4, addrspace(5) + %"12" = alloca i32, align 4, addrspace(5) + %"13" = alloca i32, align 4, addrspace(5) + %"14" = alloca i32, align 4, addrspace(5) + %"15" = alloca i32, align 4, addrspace(5) + %"16" = alloca i32, align 4, addrspace(5) + %"17" = alloca i32, align 4, addrspace(5) + %"18" = alloca i32, align 4, addrspace(5) + %"19" = alloca i32, align 4, addrspace(5) + %"20" = alloca i32, align 4, addrspace(5) + %"21" = alloca i32, align 4, addrspace(5) + %"37" = load i64, ptr addrspace(4) %"209", align 8 + store i64 %"37", ptr addrspace(5) %"5", align 8 + %0 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 0, i32 0) + %"210" = extractvalue { i32, i1 } %0, 0 + %"23" = extractvalue { i32, i1 } %0, 1 + store i32 %"210", ptr addrspace(5) %"6", align 4 + %"39" = xor i1 %"23", true + store i1 %"39", ptr addrspace(5) %"22", align 1 + %"41" = load i1, ptr addrspace(5) %"22", align 1 + %1 = zext i1 %"41" to i32 + %"211" = add i32 0, %1 + store i32 %"211", ptr addrspace(5) %"6", align 4 + %"42" = load i1, ptr addrspace(5) %"22", align 1 + %"24" = xor i1 %"42", true + %2 = zext i1 %"24" to i32 + %"212" = sub i32 0, %2 + store i32 %"212", ptr addrspace(5) %"7", align 4 + %3 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 0, i32 1) + %"213" = extractvalue { i32, i1 } %3, 0 + %"25" = extractvalue { i32, i1 } %3, 1 + store i32 %"213", ptr addrspace(5) %"8", align 4 + %"45" = xor i1 %"25", true + store i1 %"45", ptr addrspace(5) %"22", align 1 + %"47" = load i1, ptr addrspace(5) %"22", align 1 + %4 = zext i1 %"47" to i32 + %"214" = add i32 0, %4 + store i32 %"214", ptr addrspace(5) %"8", align 4 + %"48" = load i1, ptr addrspace(5) %"22", align 1 + %"26" = xor i1 %"48", true + %5 = zext i1 %"26" to i32 + %"215" = sub i32 0, %5 + store i32 %"215", ptr addrspace(5) %"9", align 4 + %6 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 0, i32 0) + %"216" = extractvalue { i32, i1 } %6, 0 + %"51" = extractvalue { i32, i1 } %6, 1 + store i32 %"216", ptr addrspace(5) %"10", align 4 + store i1 %"51", ptr addrspace(5) %"22", align 1 + %"53" = load i1, ptr addrspace(5) %"22", align 1 + %7 = zext i1 %"53" to i32 + %"217" = add i32 0, %7 + store i32 %"217", ptr addrspace(5) %"10", align 4 + %"54" = load i1, ptr addrspace(5) %"22", align 1 + %"27" = xor i1 %"54", true + %8 = zext i1 %"27" to i32 + %"218" = sub i32 0, %8 + store i32 %"218", ptr addrspace(5) %"11", align 4 + %9 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 -1, i32 -1) + %"219" = extractvalue { i32, i1 } %9, 0 + %"57" = extractvalue { i32, i1 } %9, 1 + store i32 %"219", ptr addrspace(5) %"12", align 4 + store i1 %"57", ptr addrspace(5) %"22", align 1 + %"59" = load i1, ptr addrspace(5) %"22", align 1 + %10 = zext i1 %"59" to i32 + %"220" = add i32 0, %10 + store i32 %"220", ptr addrspace(5) %"12", align 4 + %"60" = load i1, ptr addrspace(5) %"22", align 1 + %"28" = xor i1 %"60", true + %11 = zext i1 %"28" to i32 + %"221" = sub i32 0, %11 + store i32 %"221", ptr addrspace(5) %"13", align 4 + %12 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 0, i32 0) + %"222" = extractvalue { i32, i1 } %12, 0 + %"63" = extractvalue { i32, i1 } %12, 1 + store i32 %"222", ptr addrspace(5) %"14", align 4 + store i1 %"63", ptr addrspace(5) %"22", align 1 + %"65" = load i1, ptr addrspace(5) %"22", align 1 + %13 = zext i1 %"65" to i32 + %"223" = add i32 0, %13 + store i32 %"223", ptr addrspace(5) %"14", align 4 + %"66" = load i1, ptr addrspace(5) %"22", align 1 + %"29" = xor i1 %"66", true + %14 = zext i1 %"29" to i32 + %"224" = sub i32 0, %14 + store i32 %"224", ptr addrspace(5) %"15", align 4 + %15 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 -1, i32 -1) + %"225" = extractvalue { i32, i1 } %15, 0 + %"69" = extractvalue { i32, i1 } %15, 1 + store i32 %"225", ptr addrspace(5) %"16", align 4 + store i1 %"69", ptr addrspace(5) %"22", align 1 + %"71" = load i1, ptr addrspace(5) %"22", align 1 + %16 = zext i1 %"71" to i32 + %"226" = add i32 0, %16 + store i32 %"226", ptr addrspace(5) %"16", align 4 + %"72" = load i1, ptr addrspace(5) %"22", align 1 + %"30" = xor i1 %"72", true + %17 = zext i1 %"30" to i32 + %"227" = sub i32 0, %17 + store i32 %"227", ptr addrspace(5) %"17", align 4 + %18 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 0, i32 0) + %"228" = extractvalue { i32, i1 } %18, 0 + %"75" = extractvalue { i32, i1 } %18, 1 + store i32 %"228", ptr addrspace(5) %"18", align 4 + store i1 %"75", ptr addrspace(5) %"22", align 1 + %"76" = load i1, ptr addrspace(5) %"22", align 1 + %"31" = xor i1 %"76", true + %19 = zext i1 %"31" to i32 + %20 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 0, i32 0) + %21 = extractvalue { i32, i1 } %20, 0 + %22 = extractvalue { i32, i1 } %20, 1 + %23 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %21, i32 %19) + %"229" = extractvalue { i32, i1 } %23, 0 + %24 = extractvalue { i32, i1 } %23, 1 + %"32" = xor i1 %22, %24 + store i32 %"229", ptr addrspace(5) %"18", align 4 + %"78" = xor i1 %"32", true + store i1 %"78", ptr addrspace(5) %"22", align 1 + %"80" = load i1, ptr addrspace(5) %"22", align 1 + %25 = zext i1 %"80" to i32 + %"230" = add i32 0, %25 + store i32 %"230", ptr addrspace(5) %"18", align 4 + %"81" = load i1, ptr addrspace(5) %"22", align 1 + %"33" = xor i1 %"81", true + %26 = zext i1 %"33" to i32 + %"231" = sub i32 0, %26 + store i32 %"231", ptr addrspace(5) %"19", align 4 + %27 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 0, i32 0) + %"232" = extractvalue { i32, i1 } %27, 0 + %"84" = extractvalue { i32, i1 } %27, 1 + store i32 %"232", ptr addrspace(5) %"20", align 4 + store i1 %"84", ptr addrspace(5) %"22", align 1 + %"85" = load i1, ptr addrspace(5) %"22", align 1 + %"34" = xor i1 %"85", true + %28 = zext i1 %"34" to i32 + %29 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 0, i32 1) + %30 = extractvalue { i32, i1 } %29, 0 + %31 = extractvalue { i32, i1 } %29, 1 + %32 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %30, i32 %28) + %"233" = extractvalue { i32, i1 } %32, 0 + %33 = extractvalue { i32, i1 } %32, 1 + %"35" = xor i1 %31, %33 + store i32 %"233", ptr addrspace(5) %"20", align 4 + %"87" = xor i1 %"35", true + store i1 %"87", ptr addrspace(5) %"22", align 1 + %"89" = load i1, ptr addrspace(5) %"22", align 1 + %34 = zext i1 %"89" to i32 + %"234" = add i32 0, %34 + store i32 %"234", ptr addrspace(5) %"20", align 4 + %"90" = load i1, ptr addrspace(5) %"22", align 1 + %"36" = xor i1 %"90", true + %35 = zext i1 %"36" to i32 + %"235" = sub i32 0, %35 + store i32 %"235", ptr addrspace(5) %"21", align 4 + %"92" = load i64, ptr addrspace(5) %"5", align 8 + %"93" = load i32, ptr addrspace(5) %"6", align 4 + %"236" = inttoptr i64 %"92" to ptr + store i32 %"93", ptr %"236", align 4 + %"94" = load i64, ptr addrspace(5) %"5", align 8 + %"95" = load i32, ptr addrspace(5) %"8", align 4 + %"238" = inttoptr i64 %"94" to ptr + %"270" = getelementptr inbounds i8, ptr %"238", i64 4 + store i32 %"95", ptr %"270", align 4 + %"96" = load i64, ptr addrspace(5) %"5", align 8 + %"97" = load i32, ptr addrspace(5) %"10", align 4 + %"240" = inttoptr i64 %"96" to ptr + %"272" = getelementptr inbounds i8, ptr %"240", i64 8 + store i32 %"97", ptr %"272", align 4 + %"98" = load i64, ptr addrspace(5) %"5", align 8 + %"99" = load i32, ptr addrspace(5) %"12", align 4 + %"242" = inttoptr i64 %"98" to ptr + %"274" = getelementptr inbounds i8, ptr %"242", i64 12 + store i32 %"99", ptr %"274", align 4 + %"100" = load i64, ptr addrspace(5) %"5", align 8 + %"101" = load i32, ptr addrspace(5) %"14", align 4 + %"244" = inttoptr i64 %"100" to ptr + %"276" = getelementptr inbounds i8, ptr %"244", i64 16 + store i32 %"101", ptr %"276", align 4 + %"102" = load i64, ptr addrspace(5) %"5", align 8 + %"103" = load i32, ptr addrspace(5) %"16", align 4 + %"246" = inttoptr i64 %"102" to ptr + %"278" = getelementptr inbounds i8, ptr %"246", i64 20 + store i32 %"103", ptr %"278", align 4 + %"104" = load i64, ptr addrspace(5) %"5", align 8 + %"105" = load i32, ptr addrspace(5) %"18", align 4 + %"248" = inttoptr i64 %"104" to ptr + %"280" = getelementptr inbounds i8, ptr %"248", i64 24 + store i32 %"105", ptr %"280", align 4 + %"106" = load i64, ptr addrspace(5) %"5", align 8 + %"107" = load i32, ptr addrspace(5) %"20", align 4 + %"250" = inttoptr i64 %"106" to ptr + %"282" = getelementptr inbounds i8, ptr %"250", i64 28 + store i32 %"107", ptr %"282", align 4 + %"108" = load i64, ptr addrspace(5) %"5", align 8 + %"109" = load i32, ptr addrspace(5) %"7", align 4 + %"252" = inttoptr i64 %"108" to ptr + %"284" = getelementptr inbounds i8, ptr %"252", i64 32 + store i32 %"109", ptr %"284", align 4 + %"110" = load i64, ptr addrspace(5) %"5", align 8 + %"111" = load i32, ptr addrspace(5) %"9", align 4 + %"254" = inttoptr i64 %"110" to ptr + %"286" = getelementptr inbounds i8, ptr %"254", i64 36 + store i32 %"111", ptr %"286", align 4 + %"112" = load i64, ptr addrspace(5) %"5", align 8 + %"113" = load i32, ptr addrspace(5) %"11", align 4 + %"256" = inttoptr i64 %"112" to ptr + %"288" = getelementptr inbounds i8, ptr %"256", i64 40 + store i32 %"113", ptr %"288", align 4 + %"114" = load i64, ptr addrspace(5) %"5", align 8 + %"115" = load i32, ptr addrspace(5) %"13", align 4 + %"258" = inttoptr i64 %"114" to ptr + %"290" = getelementptr inbounds i8, ptr %"258", i64 44 + store i32 %"115", ptr %"290", align 4 + %"116" = load i64, ptr addrspace(5) %"5", align 8 + %"117" = load i32, ptr addrspace(5) %"15", align 4 + %"260" = inttoptr i64 %"116" to ptr + %"292" = getelementptr inbounds i8, ptr %"260", i64 48 + store i32 %"117", ptr %"292", align 4 + %"118" = load i64, ptr addrspace(5) %"5", align 8 + %"119" = load i32, ptr addrspace(5) %"17", align 4 + %"262" = inttoptr i64 %"118" to ptr + %"294" = getelementptr inbounds i8, ptr %"262", i64 52 + store i32 %"119", ptr %"294", align 4 + %"120" = load i64, ptr addrspace(5) %"5", align 8 + %"121" = load i32, ptr addrspace(5) %"19", align 4 + %"264" = inttoptr i64 %"120" to ptr + %"296" = getelementptr inbounds i8, ptr %"264", i64 56 + store i32 %"121", ptr %"296", align 4 + %"122" = load i64, ptr addrspace(5) %"5", align 8 + %"123" = load i32, ptr addrspace(5) %"21", align 4 + %"266" = inttoptr i64 %"122" to ptr + %"298" = getelementptr inbounds i8, ptr %"266", i64 60 + store i32 %"123", ptr %"298", align 4 + ret void +} + +; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn +declare { i32, i1 } @llvm.usub.with.overflow.i32(i32, i32) #1 + +; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn +declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) #1 + +attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="ieee,ieee" "denormal-fp-math-f32"="ieee,ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { nocallback nofree nosync nounwind readnone speculatable willreturn } diff --git a/ptx/src/test/spirv_run/carry_set_all.ptx b/ptx/src/test/spirv_run/carry_set_all.ptx new file mode 100644 index 0000000..ace6e33 --- /dev/null +++ b/ptx/src/test/spirv_run/carry_set_all.ptx @@ -0,0 +1,84 @@ +.version 6.5 +.target sm_30 +.address_size 64 + +.visible .entry carry_set_all( + .param .u64 input, + .param .u64 output +) +{ + .reg .u64 in_addr; + .reg .u64 out_addr; + + .reg .b32 carry1_add; + .reg .b32 carry1_sub; + .reg .b32 carry2_add; + .reg .b32 carry2_sub; + .reg .b32 carry3_add; + .reg .b32 carry3_sub; + .reg .b32 carry4_add; + .reg .b32 carry4_sub; + .reg .b32 carry5_add; + .reg .b32 carry5_sub; + .reg .b32 carry6_add; + .reg .b32 carry6_sub; + .reg .b32 carry7_add; + .reg .b32 carry7_sub; + .reg .b32 carry8_add; + .reg .b32 carry8_sub; + + ld.param.u64 out_addr, [output]; + + sub.cc.u32 carry1_add, 0, 0; + addc.u32 carry1_add, 0, 0; + subc.u32 carry1_sub, 0, 0; + + sub.cc.u32 carry2_add, 0, 1; + addc.u32 carry2_add, 0, 0; + subc.u32 carry2_sub, 0, 0; + + add.cc.u32 carry3_add, 0, 0; + addc.u32 carry3_add, 0, 0; + subc.u32 carry3_sub, 0, 0; + + add.cc.u32 carry4_add, 4294967295, 4294967295; + addc.u32 carry4_add, 0, 0; + subc.u32 carry4_sub, 0, 0; + + mad.lo.cc.u32 carry5_add, 0, 0, 0; + addc.u32 carry5_add, 0, 0; + subc.u32 carry5_sub, 0, 0; + + mad.lo.cc.u32 carry6_add, 1, 4294967295, 4294967295; + addc.u32 carry6_add, 0, 0; + subc.u32 carry6_sub, 0, 0; + + add.cc.u32 carry7_add, 0, 0; + subc.cc.u32 carry7_add, 0, 0; + addc.u32 carry7_add, 0, 0; + subc.u32 carry7_sub, 0, 0; + + add.cc.u32 carry8_add, 0, 0; + subc.cc.u32 carry8_add, 0, 1; + addc.u32 carry8_add, 0, 0; + subc.u32 carry8_sub, 0, 0; + + st.u32 [out_addr], carry1_add; + st.u32 [out_addr+4], carry2_add; + st.u32 [out_addr+8], carry3_add; + st.u32 [out_addr+12], carry4_add; + st.u32 [out_addr+16], carry5_add; + st.u32 [out_addr+20], carry6_add; + st.u32 [out_addr+24], carry7_add; + st.u32 [out_addr+28], carry8_add; + + st.u32 [out_addr+32], carry1_sub; + st.u32 [out_addr+36], carry2_sub; + st.u32 [out_addr+40], carry3_sub; + st.u32 [out_addr+44], carry4_sub; + st.u32 [out_addr+48], carry5_sub; + st.u32 [out_addr+52], carry6_sub; + st.u32 [out_addr+56], carry7_sub; + st.u32 [out_addr+60], carry8_sub; + ret; +} diff --git a/ptx/src/test/spirv_run/clz.ll b/ptx/src/test/spirv_run/clz.ll index 356ee7d..31f408d 100644 --- a/ptx/src/test/spirv_run/clz.ll +++ b/ptx/src/test/spirv_run/clz.ll @@ -1,30 +1,28 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @clz(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 { -"21": +define protected amdgpu_kernel void @clz(ptr addrspace(4) byref(i64) %"16", ptr addrspace(4) byref(i64) %"17") #0 { +"20": %"7" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"7", align 1 - %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) + %"8" = load i64, ptr addrspace(4) %"16", align 8 + store i64 %"8", ptr addrspace(5) %"4", align 8 %"9" = load i64, ptr addrspace(4) %"17", align 8 - store i64 %"9", ptr addrspace(5) %"4", align 8 - %"10" = load i64, ptr addrspace(4) %"18", align 8 - store i64 %"10", ptr addrspace(5) %"5", align 8 - %"12" = load i64, ptr addrspace(5) %"4", align 8 - %"19" = inttoptr i64 %"12" to ptr - %"11" = load i32, ptr %"19", align 4 - store i32 %"11", ptr addrspace(5) %"6", align 4 - %"14" = load i32, ptr addrspace(5) %"6", align 4 - %0 = call i32 @llvm.ctlz.i32(i32 %"14", i1 false) + store i64 %"9", ptr addrspace(5) %"5", align 8 + %"11" = load i64, ptr addrspace(5) %"4", align 8 + %"18" = inttoptr i64 %"11" to ptr + %"10" = load i32, ptr %"18", align 4 + store i32 %"10", ptr addrspace(5) %"6", align 4 + %"13" = load i32, ptr addrspace(5) %"6", align 4 + %0 = call i32 @llvm.ctlz.i32(i32 %"13", i1 false) store i32 %0, ptr addrspace(5) %"6", align 4 - %"15" = load i64, ptr addrspace(5) %"5", align 8 - %"16" = load i32, ptr addrspace(5) %"6", align 4 - %"20" = inttoptr i64 %"15" to ptr - store i32 %"16", ptr %"20", align 4 + %"14" = load i64, ptr addrspace(5) %"5", align 8 + %"15" = load i32, ptr addrspace(5) %"6", align 4 + %"19" = inttoptr i64 %"14" to ptr + store i32 %"15", ptr %"19", align 4 ret void } diff --git a/ptx/src/test/spirv_run/const.ll b/ptx/src/test/spirv_run/const.ll index 472421d..80fcc07 100644 --- a/ptx/src/test/spirv_run/const.ll +++ b/ptx/src/test/spirv_run/const.ll @@ -3,49 +3,47 @@ target triple = "amdgcn-amd-amdhsa" @constparams = protected addrspace(4) externally_initialized global [4 x i16] [i16 10, i16 20, i16 30, i16 40], align 8 -define protected amdgpu_kernel void @const(ptr addrspace(4) byref(i64) %"39", ptr addrspace(4) byref(i64) %"40") #0 { -"53": +define protected amdgpu_kernel void @const(ptr addrspace(4) byref(i64) %"38", ptr addrspace(4) byref(i64) %"39") #0 { +"52": %"11" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"11", align 1 - %"12" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"12", align 1 %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i16, align 2, addrspace(5) %"8" = alloca i16, align 2, addrspace(5) %"9" = alloca i16, align 2, addrspace(5) %"10" = alloca i16, align 2, addrspace(5) + %"12" = load i64, ptr addrspace(4) %"38", align 8 + store i64 %"12", ptr addrspace(5) %"5", align 8 %"13" = load i64, ptr addrspace(4) %"39", align 8 - store i64 %"13", ptr addrspace(5) %"5", align 8 - %"14" = load i64, ptr addrspace(4) %"40", align 8 - store i64 %"14", ptr addrspace(5) %"6", align 8 - %"15" = load i16, ptr addrspace(4) @constparams, align 2 - store i16 %"15", ptr addrspace(5) %"7", align 2 - %"16" = load i16, ptr addrspace(4) getelementptr inbounds (i8, ptr addrspace(4) @constparams, i64 2), align 2 - store i16 %"16", ptr addrspace(5) %"8", align 2 - %"17" = load i16, ptr addrspace(4) getelementptr inbounds (i8, ptr addrspace(4) @constparams, i64 4), align 2 - store i16 %"17", ptr addrspace(5) %"9", align 2 - %"18" = load i16, ptr addrspace(4) getelementptr inbounds (i8, ptr addrspace(4) @constparams, i64 6), align 2 - store i16 %"18", ptr addrspace(5) %"10", align 2 - %"19" = load i64, ptr addrspace(5) %"6", align 8 - %"20" = load i16, ptr addrspace(5) %"7", align 2 - %"45" = inttoptr i64 %"19" to ptr - store i16 %"20", ptr %"45", align 2 - %"21" = load i64, ptr addrspace(5) %"6", align 8 - %"22" = load i16, ptr addrspace(5) %"8", align 2 - %"47" = inttoptr i64 %"21" to ptr - %"61" = getelementptr inbounds i8, ptr %"47", i64 2 - store i16 %"22", ptr %"61", align 2 - %"23" = load i64, ptr addrspace(5) %"6", align 8 - %"24" = load i16, ptr addrspace(5) %"9", align 2 - %"49" = inttoptr i64 %"23" to ptr - %"63" = getelementptr inbounds i8, ptr %"49", i64 4 - store i16 %"24", ptr %"63", align 2 - %"25" = load i64, ptr addrspace(5) %"6", align 8 - %"26" = load i16, ptr addrspace(5) %"10", align 2 - %"51" = inttoptr i64 %"25" to ptr - %"65" = getelementptr inbounds i8, ptr %"51", i64 6 - store i16 %"26", ptr %"65", align 2 + store i64 %"13", ptr addrspace(5) %"6", align 8 + %"14" = load i16, ptr addrspace(4) @constparams, align 2 + store i16 %"14", ptr addrspace(5) %"7", align 2 + %"15" = load i16, ptr addrspace(4) getelementptr inbounds (i8, ptr addrspace(4) @constparams, i64 2), align 2 + store i16 %"15", ptr addrspace(5) %"8", align 2 + %"16" = load i16, ptr addrspace(4) getelementptr inbounds (i8, ptr addrspace(4) @constparams, i64 4), align 2 + store i16 %"16", ptr addrspace(5) %"9", align 2 + %"17" = load i16, ptr addrspace(4) getelementptr inbounds (i8, ptr addrspace(4) @constparams, i64 6), align 2 + store i16 %"17", ptr addrspace(5) %"10", align 2 + %"18" = load i64, ptr addrspace(5) %"6", align 8 + %"19" = load i16, ptr addrspace(5) %"7", align 2 + %"44" = inttoptr i64 %"18" to ptr + store i16 %"19", ptr %"44", align 2 + %"20" = load i64, ptr addrspace(5) %"6", align 8 + %"21" = load i16, ptr addrspace(5) %"8", align 2 + %"46" = inttoptr i64 %"20" to ptr + %"60" = getelementptr inbounds i8, ptr %"46", i64 2 + store i16 %"21", ptr %"60", align 2 + %"22" = load i64, ptr addrspace(5) %"6", align 8 + %"23" = load i16, ptr addrspace(5) %"9", align 2 + %"48" = inttoptr i64 %"22" to ptr + %"62" = getelementptr inbounds i8, ptr %"48", i64 4 + store i16 %"23", ptr %"62", align 2 + %"24" = load i64, ptr addrspace(5) %"6", align 8 + %"25" = load i16, ptr addrspace(5) %"10", align 2 + %"50" = inttoptr i64 %"24" to ptr + %"64" = getelementptr inbounds i8, ptr %"50", i64 6 + store i16 %"25", ptr %"64", align 2 ret void } diff --git a/ptx/src/test/spirv_run/constant_f32.ll b/ptx/src/test/spirv_run/constant_f32.ll index e918c89..e0309ea 100644 --- a/ptx/src/test/spirv_run/constant_f32.ll +++ b/ptx/src/test/spirv_run/constant_f32.ll @@ -1,30 +1,28 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @constant_f32(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #0 { -"22": +define protected amdgpu_kernel void @constant_f32(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 { +"21": %"7" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"7", align 1 - %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca float, align 4, addrspace(5) + %"8" = load i64, ptr addrspace(4) %"17", align 8 + store i64 %"8", ptr addrspace(5) %"4", align 8 %"9" = load i64, ptr addrspace(4) %"18", align 8 - store i64 %"9", ptr addrspace(5) %"4", align 8 - %"10" = load i64, ptr addrspace(4) %"19", align 8 - store i64 %"10", ptr addrspace(5) %"5", align 8 - %"12" = load i64, ptr addrspace(5) %"4", align 8 - %"20" = inttoptr i64 %"12" to ptr - %"11" = load float, ptr %"20", align 4 - store float %"11", ptr addrspace(5) %"6", align 4 - %"14" = load float, ptr addrspace(5) %"6", align 4 - %"13" = fmul float %"14", 5.000000e-01 - store float %"13", ptr addrspace(5) %"6", align 4 - %"15" = load i64, ptr addrspace(5) %"5", align 8 - %"16" = load float, ptr addrspace(5) %"6", align 4 - %"21" = inttoptr i64 %"15" to ptr - store float %"16", ptr %"21", align 4 + store i64 %"9", ptr addrspace(5) %"5", align 8 + %"11" = load i64, ptr addrspace(5) %"4", align 8 + %"19" = inttoptr i64 %"11" to ptr + %"10" = load float, ptr %"19", align 4 + store float %"10", ptr addrspace(5) %"6", align 4 + %"13" = load float, ptr addrspace(5) %"6", align 4 + %"12" = fmul float %"13", 5.000000e-01 + store float %"12", ptr addrspace(5) %"6", align 4 + %"14" = load i64, ptr addrspace(5) %"5", align 8 + %"15" = load float, ptr addrspace(5) %"6", align 4 + %"20" = inttoptr i64 %"14" to ptr + store float %"15", ptr %"20", align 4 ret void } diff --git a/ptx/src/test/spirv_run/constant_negative.ll b/ptx/src/test/spirv_run/constant_negative.ll index 09478b6..337689f 100644 --- a/ptx/src/test/spirv_run/constant_negative.ll +++ b/ptx/src/test/spirv_run/constant_negative.ll @@ -1,30 +1,28 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @constant_negative(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #0 { -"22": +define protected amdgpu_kernel void @constant_negative(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 { +"21": %"7" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"7", align 1 - %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) + %"8" = load i64, ptr addrspace(4) %"17", align 8 + store i64 %"8", ptr addrspace(5) %"4", align 8 %"9" = load i64, ptr addrspace(4) %"18", align 8 - store i64 %"9", ptr addrspace(5) %"4", align 8 - %"10" = load i64, ptr addrspace(4) %"19", align 8 - store i64 %"10", ptr addrspace(5) %"5", align 8 - %"12" = load i64, ptr addrspace(5) %"4", align 8 - %"20" = inttoptr i64 %"12" to ptr - %"11" = load i32, ptr %"20", align 4 - store i32 %"11", ptr addrspace(5) %"6", align 4 - %"14" = load i32, ptr addrspace(5) %"6", align 4 - %"13" = mul i32 %"14", -1 - store i32 %"13", ptr addrspace(5) %"6", align 4 - %"15" = load i64, ptr addrspace(5) %"5", align 8 - %"16" = load i32, ptr addrspace(5) %"6", align 4 - %"21" = inttoptr i64 %"15" to ptr - store i32 %"16", ptr %"21", align 4 + store i64 %"9", ptr addrspace(5) %"5", align 8 + %"11" = load i64, ptr addrspace(5) %"4", align 8 + %"19" = inttoptr i64 %"11" to ptr + %"10" = load i32, ptr %"19", align 4 + store i32 %"10", ptr addrspace(5) %"6", align 4 + %"13" = load i32, ptr addrspace(5) %"6", align 4 + %"12" = mul i32 %"13", -1 + store i32 %"12", ptr addrspace(5) %"6", align 4 + %"14" = load i64, ptr addrspace(5) %"5", align 8 + %"15" = load i32, ptr addrspace(5) %"6", align 4 + %"20" = inttoptr i64 %"14" to ptr + store i32 %"15", ptr %"20", align 4 ret void } diff --git a/ptx/src/test/spirv_run/cos.ll b/ptx/src/test/spirv_run/cos.ll index 0cf9c30..d385e1f 100644 --- a/ptx/src/test/spirv_run/cos.ll +++ b/ptx/src/test/spirv_run/cos.ll @@ -1,30 +1,28 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @cos(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 { -"21": +define protected amdgpu_kernel void @cos(ptr addrspace(4) byref(i64) %"16", ptr addrspace(4) byref(i64) %"17") #0 { +"20": %"7" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"7", align 1 - %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca float, align 4, addrspace(5) + %"8" = load i64, ptr addrspace(4) %"16", align 8 + store i64 %"8", ptr addrspace(5) %"4", align 8 %"9" = load i64, ptr addrspace(4) %"17", align 8 - store i64 %"9", ptr addrspace(5) %"4", align 8 - %"10" = load i64, ptr addrspace(4) %"18", align 8 - store i64 %"10", ptr addrspace(5) %"5", align 8 - %"12" = load i64, ptr addrspace(5) %"4", align 8 - %"19" = inttoptr i64 %"12" to ptr - %"11" = load float, ptr %"19", align 4 - store float %"11", ptr addrspace(5) %"6", align 4 - %"14" = load float, ptr addrspace(5) %"6", align 4 - %"13" = call afn float @llvm.cos.f32(float %"14") - store float %"13", ptr addrspace(5) %"6", align 4 - %"15" = load i64, ptr addrspace(5) %"5", align 8 - %"16" = load float, ptr addrspace(5) %"6", align 4 - %"20" = inttoptr i64 %"15" to ptr - store float %"16", ptr %"20", align 4 + store i64 %"9", ptr addrspace(5) %"5", align 8 + %"11" = load i64, ptr addrspace(5) %"4", align 8 + %"18" = inttoptr i64 %"11" to ptr + %"10" = load float, ptr %"18", align 4 + store float %"10", ptr addrspace(5) %"6", align 4 + %"13" = load float, ptr addrspace(5) %"6", align 4 + %"12" = call afn float @llvm.cos.f32(float %"13") + store float %"12", ptr addrspace(5) %"6", align 4 + %"14" = load i64, ptr addrspace(5) %"5", align 8 + %"15" = load float, ptr addrspace(5) %"6", align 4 + %"19" = inttoptr i64 %"14" to ptr + store float %"15", ptr %"19", align 4 ret void } diff --git a/ptx/src/test/spirv_run/cvt_clamp.ll b/ptx/src/test/spirv_run/cvt_clamp.ll index 29de682..f2be477 100644 --- a/ptx/src/test/spirv_run/cvt_clamp.ll +++ b/ptx/src/test/spirv_run/cvt_clamp.ll @@ -3,69 +3,67 @@ target triple = "amdgcn-amd-amdhsa" declare float @__zluda_ptx_impl__cvt_sat_f32_f32(float) #0 -define protected amdgpu_kernel void @cvt_clamp(ptr addrspace(4) byref(i64) %"47", ptr addrspace(4) byref(i64) %"48") #1 { -"57": +define protected amdgpu_kernel void @cvt_clamp(ptr addrspace(4) byref(i64) %"46", ptr addrspace(4) byref(i64) %"47") #1 { +"56": %"7" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"7", align 1 - %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca float, align 4, addrspace(5) + %"8" = load i64, ptr addrspace(4) %"46", align 8 + store i64 %"8", ptr addrspace(5) %"4", align 8 %"9" = load i64, ptr addrspace(4) %"47", align 8 - store i64 %"9", ptr addrspace(5) %"4", align 8 - %"10" = load i64, ptr addrspace(4) %"48", align 8 - store i64 %"10", ptr addrspace(5) %"5", align 8 - %"12" = load i64, ptr addrspace(5) %"4", align 8 - %"49" = inttoptr i64 %"12" to ptr addrspace(1) - %"11" = load float, ptr addrspace(1) %"49", align 4 - store float %"11", ptr addrspace(5) %"6", align 4 - %"14" = load float, ptr addrspace(5) %"6", align 4 - %"13" = call float @__zluda_ptx_impl__cvt_sat_f32_f32(float %"14") - store float %"13", ptr addrspace(5) %"6", align 4 - %"15" = load i64, ptr addrspace(5) %"5", align 8 - %"16" = load float, ptr addrspace(5) %"6", align 4 - %"50" = inttoptr i64 %"15" to ptr addrspace(1) - store float %"16", ptr addrspace(1) %"50", align 4 - %"18" = load i64, ptr addrspace(5) %"4", align 8 - %"51" = inttoptr i64 %"18" to ptr addrspace(1) - %"62" = getelementptr inbounds i8, ptr addrspace(1) %"51", i64 4 - %"17" = load float, ptr addrspace(1) %"62", align 4 - store float %"17", ptr addrspace(5) %"6", align 4 - %"20" = load float, ptr addrspace(5) %"6", align 4 - %"19" = call float @__zluda_ptx_impl__cvt_sat_f32_f32(float %"20") - store float %"19", ptr addrspace(5) %"6", align 4 - %"21" = load i64, ptr addrspace(5) %"5", align 8 - %"22" = load float, ptr addrspace(5) %"6", align 4 - %"52" = inttoptr i64 %"21" to ptr addrspace(1) - %"64" = getelementptr inbounds i8, ptr addrspace(1) %"52", i64 4 - store float %"22", ptr addrspace(1) %"64", align 4 - %"24" = load i64, ptr addrspace(5) %"4", align 8 - %"53" = inttoptr i64 %"24" to ptr addrspace(1) - %"66" = getelementptr inbounds i8, ptr addrspace(1) %"53", i64 8 - %"23" = load float, ptr addrspace(1) %"66", align 4 - store float %"23", ptr addrspace(5) %"6", align 4 - %"26" = load float, ptr addrspace(5) %"6", align 4 - %"25" = call float @__zluda_ptx_impl__cvt_sat_f32_f32(float %"26") - store float %"25", ptr addrspace(5) %"6", align 4 - %"27" = load i64, ptr addrspace(5) %"5", align 8 - %"28" = load float, ptr addrspace(5) %"6", align 4 - %"54" = inttoptr i64 %"27" to ptr addrspace(1) - %"68" = getelementptr inbounds i8, ptr addrspace(1) %"54", i64 8 - store float %"28", ptr addrspace(1) %"68", align 4 - %"30" = load i64, ptr addrspace(5) %"4", align 8 - %"55" = inttoptr i64 %"30" to ptr addrspace(1) - %"70" = getelementptr inbounds i8, ptr addrspace(1) %"55", i64 12 - %"29" = load float, ptr addrspace(1) %"70", align 4 - store float %"29", ptr addrspace(5) %"6", align 4 - %"32" = load float, ptr addrspace(5) %"6", align 4 - %"31" = call float @__zluda_ptx_impl__cvt_sat_f32_f32(float %"32") - store float %"31", ptr addrspace(5) %"6", align 4 - %"33" = load i64, ptr addrspace(5) %"5", align 8 - %"34" = load float, ptr addrspace(5) %"6", align 4 - %"56" = inttoptr i64 %"33" to ptr addrspace(1) - %"72" = getelementptr inbounds i8, ptr addrspace(1) %"56", i64 12 - store float %"34", ptr addrspace(1) %"72", align 4 + store i64 %"9", ptr addrspace(5) %"5", align 8 + %"11" = load i64, ptr addrspace(5) %"4", align 8 + %"48" = inttoptr i64 %"11" to ptr addrspace(1) + %"10" = load float, ptr addrspace(1) %"48", align 4 + store float %"10", ptr addrspace(5) %"6", align 4 + %"13" = load float, ptr addrspace(5) %"6", align 4 + %"12" = call float @__zluda_ptx_impl__cvt_sat_f32_f32(float %"13") + store float %"12", ptr addrspace(5) %"6", align 4 + %"14" = load i64, ptr addrspace(5) %"5", align 8 + %"15" = load float, ptr addrspace(5) %"6", align 4 + %"49" = inttoptr i64 %"14" to ptr addrspace(1) + store float %"15", ptr addrspace(1) %"49", align 4 + %"17" = load i64, ptr addrspace(5) %"4", align 8 + %"50" = inttoptr i64 %"17" to ptr addrspace(1) + %"61" = getelementptr inbounds i8, ptr addrspace(1) %"50", i64 4 + %"16" = load float, ptr addrspace(1) %"61", align 4 + store float %"16", ptr addrspace(5) %"6", align 4 + %"19" = load float, ptr addrspace(5) %"6", align 4 + %"18" = call float @__zluda_ptx_impl__cvt_sat_f32_f32(float %"19") + store float %"18", ptr addrspace(5) %"6", align 4 + %"20" = load i64, ptr addrspace(5) %"5", align 8 + %"21" = load float, ptr addrspace(5) %"6", align 4 + %"51" = inttoptr i64 %"20" to ptr addrspace(1) + %"63" = getelementptr inbounds i8, ptr addrspace(1) %"51", i64 4 + store float %"21", ptr addrspace(1) %"63", align 4 + %"23" = load i64, ptr addrspace(5) %"4", align 8 + %"52" = inttoptr i64 %"23" to ptr addrspace(1) + %"65" = getelementptr inbounds i8, ptr addrspace(1) %"52", i64 8 + %"22" = load float, ptr addrspace(1) %"65", align 4 + store float %"22", ptr addrspace(5) %"6", align 4 + %"25" = load float, ptr addrspace(5) %"6", align 4 + %"24" = call float @__zluda_ptx_impl__cvt_sat_f32_f32(float %"25") + store float %"24", ptr addrspace(5) %"6", align 4 + %"26" = load i64, ptr addrspace(5) %"5", align 8 + %"27" = load float, ptr addrspace(5) %"6", align 4 + %"53" = inttoptr i64 %"26" to ptr addrspace(1) + %"67" = getelementptr inbounds i8, ptr addrspace(1) %"53", i64 8 + store float %"27", ptr addrspace(1) %"67", align 4 + %"29" = load i64, ptr addrspace(5) %"4", align 8 + %"54" = inttoptr i64 %"29" to ptr addrspace(1) + %"69" = getelementptr inbounds i8, ptr addrspace(1) %"54", i64 12 + %"28" = load float, ptr addrspace(1) %"69", align 4 + store float %"28", ptr addrspace(5) %"6", align 4 + %"31" = load float, ptr addrspace(5) %"6", align 4 + %"30" = call float @__zluda_ptx_impl__cvt_sat_f32_f32(float %"31") + store float %"30", ptr addrspace(5) %"6", align 4 + %"32" = load i64, ptr addrspace(5) %"5", align 8 + %"33" = load float, ptr addrspace(5) %"6", align 4 + %"55" = inttoptr i64 %"32" to ptr addrspace(1) + %"71" = getelementptr inbounds i8, ptr addrspace(1) %"55", i64 12 + store float %"33", ptr addrspace(1) %"71", align 4 ret void } diff --git a/ptx/src/test/spirv_run/cvt_f32_f16.ll b/ptx/src/test/spirv_run/cvt_f32_f16.ll index 169eb59..e3acdb6 100644 --- a/ptx/src/test/spirv_run/cvt_f32_f16.ll +++ b/ptx/src/test/spirv_run/cvt_f32_f16.ll @@ -1,32 +1,30 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @cvt_f32_f16(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #0 { -"23": +define protected amdgpu_kernel void @cvt_f32_f16(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 { +"22": %"8" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"8", align 1 - %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca half, align 2, addrspace(5) %"7" = alloca float, align 4, addrspace(5) + %"9" = load i64, ptr addrspace(4) %"17", align 8 + store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"18", align 8 - store i64 %"10", ptr addrspace(5) %"4", align 8 - %"11" = load i64, ptr addrspace(4) %"19", align 8 - store i64 %"11", ptr addrspace(5) %"5", align 8 - %"13" = load i64, ptr addrspace(5) %"4", align 8 - %"21" = inttoptr i64 %"13" to ptr addrspace(1) - %"20" = load i16, ptr addrspace(1) %"21", align 2 - %"12" = bitcast i16 %"20" to half - store half %"12", ptr addrspace(5) %"6", align 2 - %"15" = load half, ptr addrspace(5) %"6", align 2 - %"14" = fpext half %"15" to float - store float %"14", ptr addrspace(5) %"7", align 4 - %"16" = load i64, ptr addrspace(5) %"5", align 8 - %"17" = load float, ptr addrspace(5) %"7", align 4 - %"22" = inttoptr i64 %"16" to ptr - store float %"17", ptr %"22", align 4 + store i64 %"10", ptr addrspace(5) %"5", align 8 + %"12" = load i64, ptr addrspace(5) %"4", align 8 + %"20" = inttoptr i64 %"12" to ptr addrspace(1) + %"19" = load i16, ptr addrspace(1) %"20", align 2 + %"11" = bitcast i16 %"19" to half + store half %"11", ptr addrspace(5) %"6", align 2 + %"14" = load half, ptr addrspace(5) %"6", align 2 + %"13" = fpext half %"14" to float + store float %"13", ptr addrspace(5) %"7", align 4 + %"15" = load i64, ptr addrspace(5) %"5", align 8 + %"16" = load float, ptr addrspace(5) %"7", align 4 + %"21" = inttoptr i64 %"15" to ptr + store float %"16", ptr %"21", align 4 ret void } diff --git a/ptx/src/test/spirv_run/cvt_f32_s32.ll b/ptx/src/test/spirv_run/cvt_f32_s32.ll index 119d052..65b00ce 100644 --- a/ptx/src/test/spirv_run/cvt_f32_s32.ll +++ b/ptx/src/test/spirv_run/cvt_f32_s32.ll @@ -9,80 +9,78 @@ declare float @__zluda_ptx_impl__cvt_rp_f32_s32(i32) #0 declare float @__zluda_ptx_impl__cvt_rz_f32_s32(i32) #0 -define protected amdgpu_kernel void @cvt_f32_s32(ptr addrspace(4) byref(i64) %"50", ptr addrspace(4) byref(i64) %"51") #1 { -"76": +define protected amdgpu_kernel void @cvt_f32_s32(ptr addrspace(4) byref(i64) %"49", ptr addrspace(4) byref(i64) %"50") #1 { +"75": %"10" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"10", align 1 - %"11" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"11", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) %"8" = alloca i32, align 4, addrspace(5) %"9" = alloca i32, align 4, addrspace(5) + %"11" = load i64, ptr addrspace(4) %"49", align 8 + store i64 %"11", ptr addrspace(5) %"4", align 8 %"12" = load i64, ptr addrspace(4) %"50", align 8 - store i64 %"12", ptr addrspace(5) %"4", align 8 - %"13" = load i64, ptr addrspace(4) %"51", align 8 - store i64 %"13", ptr addrspace(5) %"5", align 8 - %"15" = load i64, ptr addrspace(5) %"4", align 8 - %"53" = inttoptr i64 %"15" to ptr - %"52" = load i32, ptr %"53", align 4 - store i32 %"52", ptr addrspace(5) %"6", align 4 - %"17" = load i64, ptr addrspace(5) %"4", align 8 - %"54" = inttoptr i64 %"17" to ptr - %"90" = getelementptr inbounds i8, ptr %"54", i64 4 - %"55" = load i32, ptr %"90", align 4 - store i32 %"55", ptr addrspace(5) %"7", align 4 - %"19" = load i64, ptr addrspace(5) %"4", align 8 - %"56" = inttoptr i64 %"19" to ptr - %"92" = getelementptr inbounds i8, ptr %"56", i64 8 - %"57" = load i32, ptr %"92", align 4 - store i32 %"57", ptr addrspace(5) %"8", align 4 - %"21" = load i64, ptr addrspace(5) %"4", align 8 - %"58" = inttoptr i64 %"21" to ptr - %"94" = getelementptr inbounds i8, ptr %"58", i64 12 - %"59" = load i32, ptr %"94", align 4 - store i32 %"59", ptr addrspace(5) %"9", align 4 - %"23" = load i32, ptr addrspace(5) %"6", align 4 - %"60" = call float @__zluda_ptx_impl__cvt_rn_f32_s32(i32 %"23") - %"22" = bitcast float %"60" to i32 - store i32 %"22", ptr addrspace(5) %"6", align 4 - %"25" = load i32, ptr addrspace(5) %"7", align 4 - %"62" = call float @__zluda_ptx_impl__cvt_rz_f32_s32(i32 %"25") - %"24" = bitcast float %"62" to i32 - store i32 %"24", ptr addrspace(5) %"7", align 4 - %"27" = load i32, ptr addrspace(5) %"8", align 4 - %"64" = call float @__zluda_ptx_impl__cvt_rm_f32_s32(i32 %"27") - %"26" = bitcast float %"64" to i32 - store i32 %"26", ptr addrspace(5) %"8", align 4 - %"29" = load i32, ptr addrspace(5) %"9", align 4 - %"66" = call float @__zluda_ptx_impl__cvt_rp_f32_s32(i32 %"29") - %"28" = bitcast float %"66" to i32 - store i32 %"28", ptr addrspace(5) %"9", align 4 - %"30" = load i64, ptr addrspace(5) %"5", align 8 - %"31" = load i32, ptr addrspace(5) %"6", align 4 - %"68" = inttoptr i64 %"30" to ptr addrspace(1) - %"69" = bitcast i32 %"31" to float - store float %"69", ptr addrspace(1) %"68", align 4 - %"32" = load i64, ptr addrspace(5) %"5", align 8 - %"33" = load i32, ptr addrspace(5) %"7", align 4 - %"70" = inttoptr i64 %"32" to ptr addrspace(1) - %"96" = getelementptr inbounds i8, ptr addrspace(1) %"70", i64 4 - %"71" = bitcast i32 %"33" to float - store float %"71", ptr addrspace(1) %"96", align 4 - %"34" = load i64, ptr addrspace(5) %"5", align 8 - %"35" = load i32, ptr addrspace(5) %"8", align 4 - %"72" = inttoptr i64 %"34" to ptr addrspace(1) - %"98" = getelementptr inbounds i8, ptr addrspace(1) %"72", i64 8 - %"73" = bitcast i32 %"35" to float - store float %"73", ptr addrspace(1) %"98", align 4 - %"36" = load i64, ptr addrspace(5) %"5", align 8 - %"37" = load i32, ptr addrspace(5) %"9", align 4 - %"74" = inttoptr i64 %"36" to ptr addrspace(1) - %"100" = getelementptr inbounds i8, ptr addrspace(1) %"74", i64 12 - %"75" = bitcast i32 %"37" to float - store float %"75", ptr addrspace(1) %"100", align 4 + store i64 %"12", ptr addrspace(5) %"5", align 8 + %"14" = load i64, ptr addrspace(5) %"4", align 8 + %"52" = inttoptr i64 %"14" to ptr + %"51" = load i32, ptr %"52", align 4 + store i32 %"51", ptr addrspace(5) %"6", align 4 + %"16" = load i64, ptr addrspace(5) %"4", align 8 + %"53" = inttoptr i64 %"16" to ptr + %"89" = getelementptr inbounds i8, ptr %"53", i64 4 + %"54" = load i32, ptr %"89", align 4 + store i32 %"54", ptr addrspace(5) %"7", align 4 + %"18" = load i64, ptr addrspace(5) %"4", align 8 + %"55" = inttoptr i64 %"18" to ptr + %"91" = getelementptr inbounds i8, ptr %"55", i64 8 + %"56" = load i32, ptr %"91", align 4 + store i32 %"56", ptr addrspace(5) %"8", align 4 + %"20" = load i64, ptr addrspace(5) %"4", align 8 + %"57" = inttoptr i64 %"20" to ptr + %"93" = getelementptr inbounds i8, ptr %"57", i64 12 + %"58" = load i32, ptr %"93", align 4 + store i32 %"58", ptr addrspace(5) %"9", align 4 + %"22" = load i32, ptr addrspace(5) %"6", align 4 + %"59" = call float @__zluda_ptx_impl__cvt_rn_f32_s32(i32 %"22") + %"21" = bitcast float %"59" to i32 + store i32 %"21", ptr addrspace(5) %"6", align 4 + %"24" = load i32, ptr addrspace(5) %"7", align 4 + %"61" = call float @__zluda_ptx_impl__cvt_rz_f32_s32(i32 %"24") + %"23" = bitcast float %"61" to i32 + store i32 %"23", ptr addrspace(5) %"7", align 4 + %"26" = load i32, ptr addrspace(5) %"8", align 4 + %"63" = call float @__zluda_ptx_impl__cvt_rm_f32_s32(i32 %"26") + %"25" = bitcast float %"63" to i32 + store i32 %"25", ptr addrspace(5) %"8", align 4 + %"28" = load i32, ptr addrspace(5) %"9", align 4 + %"65" = call float @__zluda_ptx_impl__cvt_rp_f32_s32(i32 %"28") + %"27" = bitcast float %"65" to i32 + store i32 %"27", ptr addrspace(5) %"9", align 4 + %"29" = load i64, ptr addrspace(5) %"5", align 8 + %"30" = load i32, ptr addrspace(5) %"6", align 4 + %"67" = inttoptr i64 %"29" to ptr addrspace(1) + %"68" = bitcast i32 %"30" to float + store float %"68", ptr addrspace(1) %"67", align 4 + %"31" = load i64, ptr addrspace(5) %"5", align 8 + %"32" = load i32, ptr addrspace(5) %"7", align 4 + %"69" = inttoptr i64 %"31" to ptr addrspace(1) + %"95" = getelementptr inbounds i8, ptr addrspace(1) %"69", i64 4 + %"70" = bitcast i32 %"32" to float + store float %"70", ptr addrspace(1) %"95", align 4 + %"33" = load i64, ptr addrspace(5) %"5", align 8 + %"34" = load i32, ptr addrspace(5) %"8", align 4 + %"71" = inttoptr i64 %"33" to ptr addrspace(1) + %"97" = getelementptr inbounds i8, ptr addrspace(1) %"71", i64 8 + %"72" = bitcast i32 %"34" to float + store float %"72", ptr addrspace(1) %"97", align 4 + %"35" = load i64, ptr addrspace(5) %"5", align 8 + %"36" = load i32, ptr addrspace(5) %"9", align 4 + %"73" = inttoptr i64 %"35" to ptr addrspace(1) + %"99" = getelementptr inbounds i8, ptr addrspace(1) %"73", i64 12 + %"74" = bitcast i32 %"36" to float + store float %"74", ptr addrspace(1) %"99", align 4 ret void } diff --git a/ptx/src/test/spirv_run/cvt_f64_f32.ll b/ptx/src/test/spirv_run/cvt_f64_f32.ll index f608ed1..96267f4 100644 --- a/ptx/src/test/spirv_run/cvt_f64_f32.ll +++ b/ptx/src/test/spirv_run/cvt_f64_f32.ll @@ -1,31 +1,29 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @cvt_f64_f32(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #0 { -"22": +define protected amdgpu_kernel void @cvt_f64_f32(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 { +"21": %"8" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"8", align 1 - %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca float, align 4, addrspace(5) %"7" = alloca double, align 8, addrspace(5) + %"9" = load i64, ptr addrspace(4) %"17", align 8 + store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"18", align 8 - store i64 %"10", ptr addrspace(5) %"4", align 8 - %"11" = load i64, ptr addrspace(4) %"19", align 8 - store i64 %"11", ptr addrspace(5) %"5", align 8 - %"13" = load i64, ptr addrspace(5) %"4", align 8 - %"20" = inttoptr i64 %"13" to ptr addrspace(1) - %"12" = load float, ptr addrspace(1) %"20", align 4 - store float %"12", ptr addrspace(5) %"6", align 4 - %"15" = load float, ptr addrspace(5) %"6", align 4 - %"14" = fpext float %"15" to double - store double %"14", ptr addrspace(5) %"7", align 8 - %"16" = load i64, ptr addrspace(5) %"5", align 8 - %"17" = load double, ptr addrspace(5) %"7", align 8 - %"21" = inttoptr i64 %"16" to ptr - store double %"17", ptr %"21", align 8 + store i64 %"10", ptr addrspace(5) %"5", align 8 + %"12" = load i64, ptr addrspace(5) %"4", align 8 + %"19" = inttoptr i64 %"12" to ptr addrspace(1) + %"11" = load float, ptr addrspace(1) %"19", align 4 + store float %"11", ptr addrspace(5) %"6", align 4 + %"14" = load float, ptr addrspace(5) %"6", align 4 + %"13" = fpext float %"14" to double + store double %"13", ptr addrspace(5) %"7", align 8 + %"15" = load i64, ptr addrspace(5) %"5", align 8 + %"16" = load double, ptr addrspace(5) %"7", align 8 + %"20" = inttoptr i64 %"15" to ptr + store double %"16", ptr %"20", align 8 ret void } diff --git a/ptx/src/test/spirv_run/cvt_rni.ll b/ptx/src/test/spirv_run/cvt_rni.ll index fa56dfa..5eb6eaa 100644 --- a/ptx/src/test/spirv_run/cvt_rni.ll +++ b/ptx/src/test/spirv_run/cvt_rni.ll @@ -1,44 +1,42 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @cvt_rni(ptr addrspace(4) byref(i64) %"28", ptr addrspace(4) byref(i64) %"29") #0 { -"34": +define protected amdgpu_kernel void @cvt_rni(ptr addrspace(4) byref(i64) %"27", ptr addrspace(4) byref(i64) %"28") #0 { +"33": %"8" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"8", align 1 - %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca float, align 4, addrspace(5) %"7" = alloca float, align 4, addrspace(5) + %"9" = load i64, ptr addrspace(4) %"27", align 8 + store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"28", align 8 - store i64 %"10", ptr addrspace(5) %"4", align 8 - %"11" = load i64, ptr addrspace(4) %"29", align 8 - store i64 %"11", ptr addrspace(5) %"5", align 8 - %"13" = load i64, ptr addrspace(5) %"4", align 8 - %"30" = inttoptr i64 %"13" to ptr - %"12" = load float, ptr %"30", align 4 - store float %"12", ptr addrspace(5) %"6", align 4 - %"15" = load i64, ptr addrspace(5) %"4", align 8 - %"31" = inttoptr i64 %"15" to ptr - %"36" = getelementptr inbounds i8, ptr %"31", i64 4 - %"14" = load float, ptr %"36", align 4 - store float %"14", ptr addrspace(5) %"7", align 4 - %"17" = load float, ptr addrspace(5) %"6", align 4 - %"16" = call float @llvm.rint.f32(float %"17") - store float %"16", ptr addrspace(5) %"6", align 4 - %"19" = load float, ptr addrspace(5) %"7", align 4 - %"18" = call float @llvm.rint.f32(float %"19") - store float %"18", ptr addrspace(5) %"7", align 4 - %"20" = load i64, ptr addrspace(5) %"5", align 8 - %"21" = load float, ptr addrspace(5) %"6", align 4 - %"32" = inttoptr i64 %"20" to ptr - store float %"21", ptr %"32", align 4 - %"22" = load i64, ptr addrspace(5) %"5", align 8 - %"23" = load float, ptr addrspace(5) %"7", align 4 - %"33" = inttoptr i64 %"22" to ptr - %"38" = getelementptr inbounds i8, ptr %"33", i64 4 - store float %"23", ptr %"38", align 4 + store i64 %"10", ptr addrspace(5) %"5", align 8 + %"12" = load i64, ptr addrspace(5) %"4", align 8 + %"29" = inttoptr i64 %"12" to ptr + %"11" = load float, ptr %"29", align 4 + store float %"11", ptr addrspace(5) %"6", align 4 + %"14" = load i64, ptr addrspace(5) %"4", align 8 + %"30" = inttoptr i64 %"14" to ptr + %"35" = getelementptr inbounds i8, ptr %"30", i64 4 + %"13" = load float, ptr %"35", align 4 + store float %"13", ptr addrspace(5) %"7", align 4 + %"16" = load float, ptr addrspace(5) %"6", align 4 + %"15" = call float @llvm.rint.f32(float %"16") + store float %"15", ptr addrspace(5) %"6", align 4 + %"18" = load float, ptr addrspace(5) %"7", align 4 + %"17" = call float @llvm.rint.f32(float %"18") + store float %"17", ptr addrspace(5) %"7", align 4 + %"19" = load i64, ptr addrspace(5) %"5", align 8 + %"20" = load float, ptr addrspace(5) %"6", align 4 + %"31" = inttoptr i64 %"19" to ptr + store float %"20", ptr %"31", align 4 + %"21" = load i64, ptr addrspace(5) %"5", align 8 + %"22" = load float, ptr addrspace(5) %"7", align 4 + %"32" = inttoptr i64 %"21" to ptr + %"37" = getelementptr inbounds i8, ptr %"32", i64 4 + store float %"22", ptr %"37", align 4 ret void } diff --git a/ptx/src/test/spirv_run/cvt_rzi.ll b/ptx/src/test/spirv_run/cvt_rzi.ll index ad4a305..83783d8 100644 --- a/ptx/src/test/spirv_run/cvt_rzi.ll +++ b/ptx/src/test/spirv_run/cvt_rzi.ll @@ -1,44 +1,42 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @cvt_rzi(ptr addrspace(4) byref(i64) %"28", ptr addrspace(4) byref(i64) %"29") #0 { -"34": +define protected amdgpu_kernel void @cvt_rzi(ptr addrspace(4) byref(i64) %"27", ptr addrspace(4) byref(i64) %"28") #0 { +"33": %"8" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"8", align 1 - %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca float, align 4, addrspace(5) %"7" = alloca float, align 4, addrspace(5) + %"9" = load i64, ptr addrspace(4) %"27", align 8 + store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"28", align 8 - store i64 %"10", ptr addrspace(5) %"4", align 8 - %"11" = load i64, ptr addrspace(4) %"29", align 8 - store i64 %"11", ptr addrspace(5) %"5", align 8 - %"13" = load i64, ptr addrspace(5) %"4", align 8 - %"30" = inttoptr i64 %"13" to ptr - %"12" = load float, ptr %"30", align 4 - store float %"12", ptr addrspace(5) %"6", align 4 - %"15" = load i64, ptr addrspace(5) %"4", align 8 - %"31" = inttoptr i64 %"15" to ptr - %"36" = getelementptr inbounds i8, ptr %"31", i64 4 - %"14" = load float, ptr %"36", align 4 - store float %"14", ptr addrspace(5) %"7", align 4 - %"17" = load float, ptr addrspace(5) %"6", align 4 - %"16" = call float @llvm.trunc.f32(float %"17") - store float %"16", ptr addrspace(5) %"6", align 4 - %"19" = load float, ptr addrspace(5) %"7", align 4 - %"18" = call float @llvm.trunc.f32(float %"19") - store float %"18", ptr addrspace(5) %"7", align 4 - %"20" = load i64, ptr addrspace(5) %"5", align 8 - %"21" = load float, ptr addrspace(5) %"6", align 4 - %"32" = inttoptr i64 %"20" to ptr - store float %"21", ptr %"32", align 4 - %"22" = load i64, ptr addrspace(5) %"5", align 8 - %"23" = load float, ptr addrspace(5) %"7", align 4 - %"33" = inttoptr i64 %"22" to ptr - %"38" = getelementptr inbounds i8, ptr %"33", i64 4 - store float %"23", ptr %"38", align 4 + store i64 %"10", ptr addrspace(5) %"5", align 8 + %"12" = load i64, ptr addrspace(5) %"4", align 8 + %"29" = inttoptr i64 %"12" to ptr + %"11" = load float, ptr %"29", align 4 + store float %"11", ptr addrspace(5) %"6", align 4 + %"14" = load i64, ptr addrspace(5) %"4", align 8 + %"30" = inttoptr i64 %"14" to ptr + %"35" = getelementptr inbounds i8, ptr %"30", i64 4 + %"13" = load float, ptr %"35", align 4 + store float %"13", ptr addrspace(5) %"7", align 4 + %"16" = load float, ptr addrspace(5) %"6", align 4 + %"15" = call float @llvm.trunc.f32(float %"16") + store float %"15", ptr addrspace(5) %"6", align 4 + %"18" = load float, ptr addrspace(5) %"7", align 4 + %"17" = call float @llvm.trunc.f32(float %"18") + store float %"17", ptr addrspace(5) %"7", align 4 + %"19" = load i64, ptr addrspace(5) %"5", align 8 + %"20" = load float, ptr addrspace(5) %"6", align 4 + %"31" = inttoptr i64 %"19" to ptr + store float %"20", ptr %"31", align 4 + %"21" = load i64, ptr addrspace(5) %"5", align 8 + %"22" = load float, ptr addrspace(5) %"7", align 4 + %"32" = inttoptr i64 %"21" to ptr + %"37" = getelementptr inbounds i8, ptr %"32", i64 4 + store float %"22", ptr %"37", align 4 ret void } diff --git a/ptx/src/test/spirv_run/cvt_s16_s8.ll b/ptx/src/test/spirv_run/cvt_s16_s8.ll index dcf4555..841178e 100644 --- a/ptx/src/test/spirv_run/cvt_s16_s8.ll +++ b/ptx/src/test/spirv_run/cvt_s16_s8.ll @@ -1,33 +1,31 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @cvt_s16_s8(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #0 { -"24": +define protected amdgpu_kernel void @cvt_s16_s8(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 { +"23": %"8" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"8", align 1 - %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) + %"9" = load i64, ptr addrspace(4) %"17", align 8 + store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"18", align 8 - store i64 %"10", ptr addrspace(5) %"4", align 8 - %"11" = load i64, ptr addrspace(4) %"19", align 8 - store i64 %"11", ptr addrspace(5) %"5", align 8 - %"13" = load i64, ptr addrspace(5) %"4", align 8 - %"20" = inttoptr i64 %"13" to ptr addrspace(1) - %"12" = load i32, ptr addrspace(1) %"20", align 4 - store i32 %"12", ptr addrspace(5) %"7", align 4 - %"15" = load i32, ptr addrspace(5) %"7", align 4 - %"26" = trunc i32 %"15" to i8 - %"21" = sext i8 %"26" to i16 - %"14" = sext i16 %"21" to i32 - store i32 %"14", ptr addrspace(5) %"6", align 4 - %"16" = load i64, ptr addrspace(5) %"5", align 8 - %"17" = load i32, ptr addrspace(5) %"6", align 4 - %"23" = inttoptr i64 %"16" to ptr - store i32 %"17", ptr %"23", align 4 + store i64 %"10", ptr addrspace(5) %"5", align 8 + %"12" = load i64, ptr addrspace(5) %"4", align 8 + %"19" = inttoptr i64 %"12" to ptr addrspace(1) + %"11" = load i32, ptr addrspace(1) %"19", align 4 + store i32 %"11", ptr addrspace(5) %"7", align 4 + %"14" = load i32, ptr addrspace(5) %"7", align 4 + %"25" = trunc i32 %"14" to i8 + %"20" = sext i8 %"25" to i16 + %"13" = sext i16 %"20" to i32 + store i32 %"13", ptr addrspace(5) %"6", align 4 + %"15" = load i64, ptr addrspace(5) %"5", align 8 + %"16" = load i32, ptr addrspace(5) %"6", align 4 + %"22" = inttoptr i64 %"15" to ptr + store i32 %"16", ptr %"22", align 4 ret void } diff --git a/ptx/src/test/spirv_run/cvt_s32_f32.ll b/ptx/src/test/spirv_run/cvt_s32_f32.ll index b8f8b2b..bd1b9e3 100644 --- a/ptx/src/test/spirv_run/cvt_s32_f32.ll +++ b/ptx/src/test/spirv_run/cvt_s32_f32.ll @@ -3,48 +3,46 @@ target triple = "amdgcn-amd-amdhsa" declare i32 @__zluda_ptx_impl__cvt_rp_s32_f32(float) #0 -define protected amdgpu_kernel void @cvt_s32_f32(ptr addrspace(4) byref(i64) %"28", ptr addrspace(4) byref(i64) %"29") #1 { -"42": +define protected amdgpu_kernel void @cvt_s32_f32(ptr addrspace(4) byref(i64) %"27", ptr addrspace(4) byref(i64) %"28") #1 { +"41": %"8" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"8", align 1 - %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) + %"9" = load i64, ptr addrspace(4) %"27", align 8 + store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"28", align 8 - store i64 %"10", ptr addrspace(5) %"4", align 8 - %"11" = load i64, ptr addrspace(4) %"29", align 8 - store i64 %"11", ptr addrspace(5) %"5", align 8 - %"13" = load i64, ptr addrspace(5) %"4", align 8 - %"31" = inttoptr i64 %"13" to ptr - %"30" = load float, ptr %"31", align 4 - %"12" = bitcast float %"30" to i32 - store i32 %"12", ptr addrspace(5) %"6", align 4 - %"15" = load i64, ptr addrspace(5) %"4", align 8 - %"32" = inttoptr i64 %"15" to ptr - %"47" = getelementptr inbounds i8, ptr %"32", i64 4 - %"33" = load float, ptr %"47", align 4 - %"14" = bitcast float %"33" to i32 - store i32 %"14", ptr addrspace(5) %"7", align 4 - %"17" = load i32, ptr addrspace(5) %"6", align 4 - %"35" = bitcast i32 %"17" to float - %"34" = call i32 @__zluda_ptx_impl__cvt_rp_s32_f32(float %"35") - store i32 %"34", ptr addrspace(5) %"6", align 4 - %"19" = load i32, ptr addrspace(5) %"7", align 4 - %"37" = bitcast i32 %"19" to float - %"36" = call i32 @__zluda_ptx_impl__cvt_rp_s32_f32(float %"37") - store i32 %"36", ptr addrspace(5) %"7", align 4 - %"20" = load i64, ptr addrspace(5) %"5", align 8 - %"21" = load i32, ptr addrspace(5) %"6", align 4 - %"38" = inttoptr i64 %"20" to ptr addrspace(1) - store i32 %"21", ptr addrspace(1) %"38", align 4 - %"22" = load i64, ptr addrspace(5) %"5", align 8 - %"23" = load i32, ptr addrspace(5) %"7", align 4 - %"40" = inttoptr i64 %"22" to ptr addrspace(1) - %"49" = getelementptr inbounds i8, ptr addrspace(1) %"40", i64 4 - store i32 %"23", ptr addrspace(1) %"49", align 4 + store i64 %"10", ptr addrspace(5) %"5", align 8 + %"12" = load i64, ptr addrspace(5) %"4", align 8 + %"30" = inttoptr i64 %"12" to ptr + %"29" = load float, ptr %"30", align 4 + %"11" = bitcast float %"29" to i32 + store i32 %"11", ptr addrspace(5) %"6", align 4 + %"14" = load i64, ptr addrspace(5) %"4", align 8 + %"31" = inttoptr i64 %"14" to ptr + %"46" = getelementptr inbounds i8, ptr %"31", i64 4 + %"32" = load float, ptr %"46", align 4 + %"13" = bitcast float %"32" to i32 + store i32 %"13", ptr addrspace(5) %"7", align 4 + %"16" = load i32, ptr addrspace(5) %"6", align 4 + %"34" = bitcast i32 %"16" to float + %"33" = call i32 @__zluda_ptx_impl__cvt_rp_s32_f32(float %"34") + store i32 %"33", ptr addrspace(5) %"6", align 4 + %"18" = load i32, ptr addrspace(5) %"7", align 4 + %"36" = bitcast i32 %"18" to float + %"35" = call i32 @__zluda_ptx_impl__cvt_rp_s32_f32(float %"36") + store i32 %"35", ptr addrspace(5) %"7", align 4 + %"19" = load i64, ptr addrspace(5) %"5", align 8 + %"20" = load i32, ptr addrspace(5) %"6", align 4 + %"37" = inttoptr i64 %"19" to ptr addrspace(1) + store i32 %"20", ptr addrspace(1) %"37", align 4 + %"21" = load i64, ptr addrspace(5) %"5", align 8 + %"22" = load i32, ptr addrspace(5) %"7", align 4 + %"39" = inttoptr i64 %"21" to ptr addrspace(1) + %"48" = getelementptr inbounds i8, ptr addrspace(1) %"39", i64 4 + store i32 %"22", ptr addrspace(1) %"48", align 4 ret void } diff --git a/ptx/src/test/spirv_run/cvt_s64_s32.ll b/ptx/src/test/spirv_run/cvt_s64_s32.ll index a272a4c..4958266 100644 --- a/ptx/src/test/spirv_run/cvt_s64_s32.ll +++ b/ptx/src/test/spirv_run/cvt_s64_s32.ll @@ -1,31 +1,29 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @cvt_s64_s32(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #0 { -"24": +define protected amdgpu_kernel void @cvt_s64_s32(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 { +"23": %"8" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"8", align 1 - %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) + %"9" = load i64, ptr addrspace(4) %"17", align 8 + store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"18", align 8 - store i64 %"10", ptr addrspace(5) %"4", align 8 - %"11" = load i64, ptr addrspace(4) %"19", align 8 - store i64 %"11", ptr addrspace(5) %"5", align 8 - %"13" = load i64, ptr addrspace(5) %"4", align 8 - %"21" = inttoptr i64 %"13" to ptr - %"20" = load i32, ptr %"21", align 4 - store i32 %"20", ptr addrspace(5) %"6", align 4 - %"15" = load i32, ptr addrspace(5) %"6", align 4 - %"14" = sext i32 %"15" to i64 - store i64 %"14", ptr addrspace(5) %"7", align 8 - %"16" = load i64, ptr addrspace(5) %"5", align 8 - %"17" = load i64, ptr addrspace(5) %"7", align 8 - %"22" = inttoptr i64 %"16" to ptr - store i64 %"17", ptr %"22", align 8 + store i64 %"10", ptr addrspace(5) %"5", align 8 + %"12" = load i64, ptr addrspace(5) %"4", align 8 + %"20" = inttoptr i64 %"12" to ptr + %"19" = load i32, ptr %"20", align 4 + store i32 %"19", ptr addrspace(5) %"6", align 4 + %"14" = load i32, ptr addrspace(5) %"6", align 4 + %"13" = sext i32 %"14" to i64 + store i64 %"13", ptr addrspace(5) %"7", align 8 + %"15" = load i64, ptr addrspace(5) %"5", align 8 + %"16" = load i64, ptr addrspace(5) %"7", align 8 + %"21" = inttoptr i64 %"15" to ptr + store i64 %"16", ptr %"21", align 8 ret void } diff --git a/ptx/src/test/spirv_run/cvt_sat_s_u.ll b/ptx/src/test/spirv_run/cvt_sat_s_u.ll index 946ece1..3af6ef5 100644 --- a/ptx/src/test/spirv_run/cvt_sat_s_u.ll +++ b/ptx/src/test/spirv_run/cvt_sat_s_u.ll @@ -1,50 +1,48 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @cvt_sat_s_u(ptr addrspace(4) byref(i64) %"27", ptr addrspace(4) byref(i64) %"28") #0 { -"35": +define protected amdgpu_kernel void @cvt_sat_s_u(ptr addrspace(4) byref(i64) %"26", ptr addrspace(4) byref(i64) %"27") #0 { +"34": %"9" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"9", align 1 - %"10" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"10", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) %"8" = alloca i32, align 4, addrspace(5) + %"10" = load i64, ptr addrspace(4) %"26", align 8 + store i64 %"10", ptr addrspace(5) %"4", align 8 %"11" = load i64, ptr addrspace(4) %"27", align 8 - store i64 %"11", ptr addrspace(5) %"4", align 8 - %"12" = load i64, ptr addrspace(4) %"28", align 8 - store i64 %"12", ptr addrspace(5) %"5", align 8 - %"14" = load i64, ptr addrspace(5) %"4", align 8 - %"29" = inttoptr i64 %"14" to ptr - %"13" = load i32, ptr %"29", align 4 - store i32 %"13", ptr addrspace(5) %"6", align 4 - %"16" = load i32, ptr addrspace(5) %"6", align 4 - %0 = call i32 @llvm.smax.i32(i32 %"16", i32 0) + store i64 %"11", ptr addrspace(5) %"5", align 8 + %"13" = load i64, ptr addrspace(5) %"4", align 8 + %"28" = inttoptr i64 %"13" to ptr + %"12" = load i32, ptr %"28", align 4 + store i32 %"12", ptr addrspace(5) %"6", align 4 + %"15" = load i32, ptr addrspace(5) %"6", align 4 + %0 = call i32 @llvm.smax.i32(i32 %"15", i32 0) %1 = alloca i32, align 4, addrspace(5) store i32 %0, ptr addrspace(5) %1, align 4 - %"15" = load i32, ptr addrspace(5) %1, align 4 - store i32 %"15", ptr addrspace(5) %"7", align 4 - %"18" = load i32, ptr addrspace(5) %"7", align 4 + %"14" = load i32, ptr addrspace(5) %1, align 4 + store i32 %"14", ptr addrspace(5) %"7", align 4 + %"17" = load i32, ptr addrspace(5) %"7", align 4 %2 = alloca i32, align 4, addrspace(5) - store i32 %"18", ptr addrspace(5) %2, align 4 - %"30" = load i32, ptr addrspace(5) %2, align 4 - store i32 %"30", ptr addrspace(5) %"7", align 4 - %"20" = load i32, ptr addrspace(5) %"6", align 4 + store i32 %"17", ptr addrspace(5) %2, align 4 + %"29" = load i32, ptr addrspace(5) %2, align 4 + store i32 %"29", ptr addrspace(5) %"7", align 4 + %"19" = load i32, ptr addrspace(5) %"6", align 4 %3 = alloca i32, align 4, addrspace(5) - store i32 %"20", ptr addrspace(5) %3, align 4 - %"31" = load i32, ptr addrspace(5) %3, align 4 - store i32 %"31", ptr addrspace(5) %"8", align 4 - %"21" = load i64, ptr addrspace(5) %"5", align 8 - %"22" = load i32, ptr addrspace(5) %"7", align 4 - %"32" = inttoptr i64 %"21" to ptr - store i32 %"22", ptr %"32", align 4 - %"23" = load i64, ptr addrspace(5) %"5", align 8 - %"24" = load i32, ptr addrspace(5) %"8", align 4 - %"34" = inttoptr i64 %"23" to ptr - %"37" = getelementptr inbounds i8, ptr %"34", i64 4 - store i32 %"24", ptr %"37", align 4 + store i32 %"19", ptr addrspace(5) %3, align 4 + %"30" = load i32, ptr addrspace(5) %3, align 4 + store i32 %"30", ptr addrspace(5) %"8", align 4 + %"20" = load i64, ptr addrspace(5) %"5", align 8 + %"21" = load i32, ptr addrspace(5) %"7", align 4 + %"31" = inttoptr i64 %"20" to ptr + store i32 %"21", ptr %"31", align 4 + %"22" = load i64, ptr addrspace(5) %"5", align 8 + %"23" = load i32, ptr addrspace(5) %"8", align 4 + %"33" = inttoptr i64 %"22" to ptr + %"36" = getelementptr inbounds i8, ptr %"33", i64 4 + store i32 %"23", ptr %"36", align 4 ret void } diff --git a/ptx/src/test/spirv_run/cvt_u32_s16.ll b/ptx/src/test/spirv_run/cvt_u32_s16.ll index 7ab8366..141f83f 100644 --- a/ptx/src/test/spirv_run/cvt_u32_s16.ll +++ b/ptx/src/test/spirv_run/cvt_u32_s16.ll @@ -1,31 +1,29 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @cvt_u32_s16(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #0 { -"24": +define protected amdgpu_kernel void @cvt_u32_s16(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 { +"23": %"8" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"8", align 1 - %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i16, align 2, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) + %"9" = load i64, ptr addrspace(4) %"17", align 8 + store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"18", align 8 - store i64 %"10", ptr addrspace(5) %"4", align 8 - %"11" = load i64, ptr addrspace(4) %"19", align 8 - store i64 %"11", ptr addrspace(5) %"5", align 8 - %"13" = load i64, ptr addrspace(5) %"4", align 8 - %"20" = inttoptr i64 %"13" to ptr addrspace(1) - %"12" = load i16, ptr addrspace(1) %"20", align 2 - store i16 %"12", ptr addrspace(5) %"6", align 2 - %"15" = load i16, ptr addrspace(5) %"6", align 2 - %"21" = sext i16 %"15" to i32 - store i32 %"21", ptr addrspace(5) %"7", align 4 - %"16" = load i64, ptr addrspace(5) %"5", align 8 - %"17" = load i32, ptr addrspace(5) %"7", align 4 - %"23" = inttoptr i64 %"16" to ptr - store i32 %"17", ptr %"23", align 4 + store i64 %"10", ptr addrspace(5) %"5", align 8 + %"12" = load i64, ptr addrspace(5) %"4", align 8 + %"19" = inttoptr i64 %"12" to ptr addrspace(1) + %"11" = load i16, ptr addrspace(1) %"19", align 2 + store i16 %"11", ptr addrspace(5) %"6", align 2 + %"14" = load i16, ptr addrspace(5) %"6", align 2 + %"20" = sext i16 %"14" to i32 + store i32 %"20", ptr addrspace(5) %"7", align 4 + %"15" = load i64, ptr addrspace(5) %"5", align 8 + %"16" = load i32, ptr addrspace(5) %"7", align 4 + %"22" = inttoptr i64 %"15" to ptr + store i32 %"16", ptr %"22", align 4 ret void } diff --git a/ptx/src/test/spirv_run/cvta.ll b/ptx/src/test/spirv_run/cvta.ll index 8cba990..d5c0f73 100644 --- a/ptx/src/test/spirv_run/cvta.ll +++ b/ptx/src/test/spirv_run/cvta.ll @@ -1,37 +1,35 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @cvta(ptr addrspace(4) byref(i64) %"19", ptr addrspace(4) byref(i64) %"20") #0 { -"27": +define protected amdgpu_kernel void @cvta(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #0 { +"26": %"7" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"7", align 1 - %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca float, align 4, addrspace(5) + %"8" = load i64, ptr addrspace(4) %"18", align 8 + store i64 %"8", ptr addrspace(5) %"4", align 8 %"9" = load i64, ptr addrspace(4) %"19", align 8 - store i64 %"9", ptr addrspace(5) %"4", align 8 - %"10" = load i64, ptr addrspace(4) %"20", align 8 - store i64 %"10", ptr addrspace(5) %"5", align 8 - %"12" = load i64, ptr addrspace(5) %"4", align 8 - %0 = inttoptr i64 %"12" to ptr + store i64 %"9", ptr addrspace(5) %"5", align 8 + %"11" = load i64, ptr addrspace(5) %"4", align 8 + %0 = inttoptr i64 %"11" to ptr %1 = addrspacecast ptr %0 to ptr addrspace(1) - %"21" = ptrtoint ptr addrspace(1) %1 to i64 - store i64 %"21", ptr addrspace(5) %"4", align 8 - %"14" = load i64, ptr addrspace(5) %"5", align 8 - %2 = inttoptr i64 %"14" to ptr + %"20" = ptrtoint ptr addrspace(1) %1 to i64 + store i64 %"20", ptr addrspace(5) %"4", align 8 + %"13" = load i64, ptr addrspace(5) %"5", align 8 + %2 = inttoptr i64 %"13" to ptr %3 = addrspacecast ptr %2 to ptr addrspace(1) - %"23" = ptrtoint ptr addrspace(1) %3 to i64 - store i64 %"23", ptr addrspace(5) %"5", align 8 - %"16" = load i64, ptr addrspace(5) %"4", align 8 + %"22" = ptrtoint ptr addrspace(1) %3 to i64 + store i64 %"22", ptr addrspace(5) %"5", align 8 + %"15" = load i64, ptr addrspace(5) %"4", align 8 + %"24" = inttoptr i64 %"15" to ptr addrspace(1) + %"14" = load float, ptr addrspace(1) %"24", align 4 + store float %"14", ptr addrspace(5) %"6", align 4 + %"16" = load i64, ptr addrspace(5) %"5", align 8 + %"17" = load float, ptr addrspace(5) %"6", align 4 %"25" = inttoptr i64 %"16" to ptr addrspace(1) - %"15" = load float, ptr addrspace(1) %"25", align 4 - store float %"15", ptr addrspace(5) %"6", align 4 - %"17" = load i64, ptr addrspace(5) %"5", align 8 - %"18" = load float, ptr addrspace(5) %"6", align 4 - %"26" = inttoptr i64 %"17" to ptr addrspace(1) - store float %"18", ptr addrspace(1) %"26", align 4 + store float %"17", ptr addrspace(1) %"25", align 4 ret void } diff --git a/ptx/src/test/spirv_run/div_approx.ll b/ptx/src/test/spirv_run/div_approx.ll index 91b3fb7..833065e 100644 --- a/ptx/src/test/spirv_run/div_approx.ll +++ b/ptx/src/test/spirv_run/div_approx.ll @@ -1,37 +1,35 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @div_approx(ptr addrspace(4) byref(i64) %"23", ptr addrspace(4) byref(i64) %"24") #0 { -"28": +define protected amdgpu_kernel void @div_approx(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #0 { +"27": %"8" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"8", align 1 - %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca float, align 4, addrspace(5) %"7" = alloca float, align 4, addrspace(5) + %"9" = load i64, ptr addrspace(4) %"22", align 8 + store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"23", align 8 - store i64 %"10", ptr addrspace(5) %"4", align 8 - %"11" = load i64, ptr addrspace(4) %"24", align 8 - store i64 %"11", ptr addrspace(5) %"5", align 8 - %"13" = load i64, ptr addrspace(5) %"4", align 8 - %"25" = inttoptr i64 %"13" to ptr - %"12" = load float, ptr %"25", align 4 - store float %"12", ptr addrspace(5) %"6", align 4 - %"15" = load i64, ptr addrspace(5) %"4", align 8 - %"26" = inttoptr i64 %"15" to ptr - %"30" = getelementptr inbounds i8, ptr %"26", i64 4 - %"14" = load float, ptr %"30", align 4 - store float %"14", ptr addrspace(5) %"7", align 4 - %"17" = load float, ptr addrspace(5) %"6", align 4 - %"18" = load float, ptr addrspace(5) %"7", align 4 - %"16" = fdiv arcp afn float %"17", %"18" - store float %"16", ptr addrspace(5) %"6", align 4 - %"19" = load i64, ptr addrspace(5) %"5", align 8 - %"20" = load float, ptr addrspace(5) %"6", align 4 - %"27" = inttoptr i64 %"19" to ptr - store float %"20", ptr %"27", align 4 + store i64 %"10", ptr addrspace(5) %"5", align 8 + %"12" = load i64, ptr addrspace(5) %"4", align 8 + %"24" = inttoptr i64 %"12" to ptr + %"11" = load float, ptr %"24", align 4 + store float %"11", ptr addrspace(5) %"6", align 4 + %"14" = load i64, ptr addrspace(5) %"4", align 8 + %"25" = inttoptr i64 %"14" to ptr + %"29" = getelementptr inbounds i8, ptr %"25", i64 4 + %"13" = load float, ptr %"29", align 4 + store float %"13", ptr addrspace(5) %"7", align 4 + %"16" = load float, ptr addrspace(5) %"6", align 4 + %"17" = load float, ptr addrspace(5) %"7", align 4 + %"15" = fdiv arcp afn float %"16", %"17" + store float %"15", ptr addrspace(5) %"6", align 4 + %"18" = load i64, ptr addrspace(5) %"5", align 8 + %"19" = load float, ptr addrspace(5) %"6", align 4 + %"26" = inttoptr i64 %"18" to ptr + store float %"19", ptr %"26", align 4 ret void } diff --git a/ptx/src/test/spirv_run/dp4a.ll b/ptx/src/test/spirv_run/dp4a.ll index f55aa62..2ada6cb 100644 --- a/ptx/src/test/spirv_run/dp4a.ll +++ b/ptx/src/test/spirv_run/dp4a.ll @@ -3,44 +3,42 @@ target triple = "amdgcn-amd-amdhsa" declare i32 @__zluda_ptx_impl__dp4a_s32_s32(i32, i32, i32) #0 -define protected amdgpu_kernel void @dp4a(ptr addrspace(4) byref(i64) %"29", ptr addrspace(4) byref(i64) %"30") #1 { -"39": +define protected amdgpu_kernel void @dp4a(ptr addrspace(4) byref(i64) %"28", ptr addrspace(4) byref(i64) %"29") #1 { +"38": %"9" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"9", align 1 - %"10" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"10", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) %"8" = alloca i32, align 4, addrspace(5) + %"10" = load i64, ptr addrspace(4) %"28", align 8 + store i64 %"10", ptr addrspace(5) %"4", align 8 %"11" = load i64, ptr addrspace(4) %"29", align 8 - store i64 %"11", ptr addrspace(5) %"4", align 8 - %"12" = load i64, ptr addrspace(4) %"30", align 8 - store i64 %"12", ptr addrspace(5) %"5", align 8 - %"14" = load i64, ptr addrspace(5) %"4", align 8 - %"31" = inttoptr i64 %"14" to ptr - %"13" = load i32, ptr %"31", align 4 - store i32 %"13", ptr addrspace(5) %"6", align 4 - %"16" = load i64, ptr addrspace(5) %"4", align 8 - %"32" = inttoptr i64 %"16" to ptr - %"46" = getelementptr inbounds i8, ptr %"32", i64 4 - %"15" = load i32, ptr %"46", align 4 - store i32 %"15", ptr addrspace(5) %"7", align 4 - %"18" = load i64, ptr addrspace(5) %"4", align 8 - %"33" = inttoptr i64 %"18" to ptr - %"48" = getelementptr inbounds i8, ptr %"33", i64 8 - %"17" = load i32, ptr %"48", align 4 - store i32 %"17", ptr addrspace(5) %"8", align 4 - %"20" = load i32, ptr addrspace(5) %"6", align 4 - %"21" = load i32, ptr addrspace(5) %"7", align 4 - %"22" = load i32, ptr addrspace(5) %"8", align 4 - %"34" = call i32 @__zluda_ptx_impl__dp4a_s32_s32(i32 %"20", i32 %"21", i32 %"22") - store i32 %"34", ptr addrspace(5) %"6", align 4 - %"23" = load i64, ptr addrspace(5) %"5", align 8 - %"24" = load i32, ptr addrspace(5) %"6", align 4 - %"38" = inttoptr i64 %"23" to ptr - store i32 %"24", ptr %"38", align 4 + store i64 %"11", ptr addrspace(5) %"5", align 8 + %"13" = load i64, ptr addrspace(5) %"4", align 8 + %"30" = inttoptr i64 %"13" to ptr + %"12" = load i32, ptr %"30", align 4 + store i32 %"12", ptr addrspace(5) %"6", align 4 + %"15" = load i64, ptr addrspace(5) %"4", align 8 + %"31" = inttoptr i64 %"15" to ptr + %"45" = getelementptr inbounds i8, ptr %"31", i64 4 + %"14" = load i32, ptr %"45", align 4 + store i32 %"14", ptr addrspace(5) %"7", align 4 + %"17" = load i64, ptr addrspace(5) %"4", align 8 + %"32" = inttoptr i64 %"17" to ptr + %"47" = getelementptr inbounds i8, ptr %"32", i64 8 + %"16" = load i32, ptr %"47", align 4 + store i32 %"16", ptr addrspace(5) %"8", align 4 + %"19" = load i32, ptr addrspace(5) %"6", align 4 + %"20" = load i32, ptr addrspace(5) %"7", align 4 + %"21" = load i32, ptr addrspace(5) %"8", align 4 + %"33" = call i32 @__zluda_ptx_impl__dp4a_s32_s32(i32 %"19", i32 %"20", i32 %"21") + store i32 %"33", ptr addrspace(5) %"6", align 4 + %"22" = load i64, ptr addrspace(5) %"5", align 8 + %"23" = load i32, ptr addrspace(5) %"6", align 4 + %"37" = inttoptr i64 %"22" to ptr + store i32 %"23", ptr %"37", align 4 ret void } diff --git a/ptx/src/test/spirv_run/ex2.ll b/ptx/src/test/spirv_run/ex2.ll index 8e13d43..b5e671e 100644 --- a/ptx/src/test/spirv_run/ex2.ll +++ b/ptx/src/test/spirv_run/ex2.ll @@ -1,69 +1,67 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @ex2(ptr addrspace(4) byref(i64) %"47", ptr addrspace(4) byref(i64) %"48") #0 { -"57": +define protected amdgpu_kernel void @ex2(ptr addrspace(4) byref(i64) %"46", ptr addrspace(4) byref(i64) %"47") #0 { +"56": %"7" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"7", align 1 - %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca float, align 4, addrspace(5) + %"8" = load i64, ptr addrspace(4) %"46", align 8 + store i64 %"8", ptr addrspace(5) %"4", align 8 %"9" = load i64, ptr addrspace(4) %"47", align 8 - store i64 %"9", ptr addrspace(5) %"4", align 8 - %"10" = load i64, ptr addrspace(4) %"48", align 8 - store i64 %"10", ptr addrspace(5) %"5", align 8 - %"12" = load i64, ptr addrspace(5) %"4", align 8 - %"49" = inttoptr i64 %"12" to ptr - %"11" = load float, ptr %"49", align 4 - store float %"11", ptr addrspace(5) %"6", align 4 - %"14" = load float, ptr addrspace(5) %"6", align 4 - %"13" = call afn float @llvm.exp2.f32(float %"14") - store float %"13", ptr addrspace(5) %"6", align 4 - %"15" = load i64, ptr addrspace(5) %"5", align 8 - %"16" = load float, ptr addrspace(5) %"6", align 4 - %"50" = inttoptr i64 %"15" to ptr - store float %"16", ptr %"50", align 4 - %"18" = load i64, ptr addrspace(5) %"4", align 8 - %"51" = inttoptr i64 %"18" to ptr - %"59" = getelementptr inbounds i8, ptr %"51", i64 4 - %"17" = load float, ptr %"59", align 4 - store float %"17", ptr addrspace(5) %"6", align 4 - %"20" = load float, ptr addrspace(5) %"6", align 4 - %"19" = call afn float @llvm.exp2.f32(float %"20") - store float %"19", ptr addrspace(5) %"6", align 4 - %"21" = load i64, ptr addrspace(5) %"5", align 8 - %"22" = load float, ptr addrspace(5) %"6", align 4 - %"52" = inttoptr i64 %"21" to ptr - %"61" = getelementptr inbounds i8, ptr %"52", i64 4 - store float %"22", ptr %"61", align 4 - %"24" = load i64, ptr addrspace(5) %"4", align 8 - %"53" = inttoptr i64 %"24" to ptr - %"63" = getelementptr inbounds i8, ptr %"53", i64 8 - %"23" = load float, ptr %"63", align 4 - store float %"23", ptr addrspace(5) %"6", align 4 - %"26" = load float, ptr addrspace(5) %"6", align 4 - %"25" = call afn float @llvm.exp2.f32(float %"26") - store float %"25", ptr addrspace(5) %"6", align 4 - %"27" = load i64, ptr addrspace(5) %"5", align 8 - %"28" = load float, ptr addrspace(5) %"6", align 4 - %"54" = inttoptr i64 %"27" to ptr - %"65" = getelementptr inbounds i8, ptr %"54", i64 8 - store float %"28", ptr %"65", align 4 - %"30" = load i64, ptr addrspace(5) %"4", align 8 - %"55" = inttoptr i64 %"30" to ptr - %"67" = getelementptr inbounds i8, ptr %"55", i64 12 - %"29" = load float, ptr %"67", align 4 - store float %"29", ptr addrspace(5) %"6", align 4 - %"32" = load float, ptr addrspace(5) %"6", align 4 - %"31" = call afn float @llvm.exp2.f32(float %"32") - store float %"31", ptr addrspace(5) %"6", align 4 - %"33" = load i64, ptr addrspace(5) %"5", align 8 - %"34" = load float, ptr addrspace(5) %"6", align 4 - %"56" = inttoptr i64 %"33" to ptr - %"69" = getelementptr inbounds i8, ptr %"56", i64 12 - store float %"34", ptr %"69", align 4 + store i64 %"9", ptr addrspace(5) %"5", align 8 + %"11" = load i64, ptr addrspace(5) %"4", align 8 + %"48" = inttoptr i64 %"11" to ptr + %"10" = load float, ptr %"48", align 4 + store float %"10", ptr addrspace(5) %"6", align 4 + %"13" = load float, ptr addrspace(5) %"6", align 4 + %"12" = call afn float @llvm.exp2.f32(float %"13") + store float %"12", ptr addrspace(5) %"6", align 4 + %"14" = load i64, ptr addrspace(5) %"5", align 8 + %"15" = load float, ptr addrspace(5) %"6", align 4 + %"49" = inttoptr i64 %"14" to ptr + store float %"15", ptr %"49", align 4 + %"17" = load i64, ptr addrspace(5) %"4", align 8 + %"50" = inttoptr i64 %"17" to ptr + %"58" = getelementptr inbounds i8, ptr %"50", i64 4 + %"16" = load float, ptr %"58", align 4 + store float %"16", ptr addrspace(5) %"6", align 4 + %"19" = load float, ptr addrspace(5) %"6", align 4 + %"18" = call afn float @llvm.exp2.f32(float %"19") + store float %"18", ptr addrspace(5) %"6", align 4 + %"20" = load i64, ptr addrspace(5) %"5", align 8 + %"21" = load float, ptr addrspace(5) %"6", align 4 + %"51" = inttoptr i64 %"20" to ptr + %"60" = getelementptr inbounds i8, ptr %"51", i64 4 + store float %"21", ptr %"60", align 4 + %"23" = load i64, ptr addrspace(5) %"4", align 8 + %"52" = inttoptr i64 %"23" to ptr + %"62" = getelementptr inbounds i8, ptr %"52", i64 8 + %"22" = load float, ptr %"62", align 4 + store float %"22", ptr addrspace(5) %"6", align 4 + %"25" = load float, ptr addrspace(5) %"6", align 4 + %"24" = call afn float @llvm.exp2.f32(float %"25") + store float %"24", ptr addrspace(5) %"6", align 4 + %"26" = load i64, ptr addrspace(5) %"5", align 8 + %"27" = load float, ptr addrspace(5) %"6", align 4 + %"53" = inttoptr i64 %"26" to ptr + %"64" = getelementptr inbounds i8, ptr %"53", i64 8 + store float %"27", ptr %"64", align 4 + %"29" = load i64, ptr addrspace(5) %"4", align 8 + %"54" = inttoptr i64 %"29" to ptr + %"66" = getelementptr inbounds i8, ptr %"54", i64 12 + %"28" = load float, ptr %"66", align 4 + store float %"28", ptr addrspace(5) %"6", align 4 + %"31" = load float, ptr addrspace(5) %"6", align 4 + %"30" = call afn float @llvm.exp2.f32(float %"31") + store float %"30", ptr addrspace(5) %"6", align 4 + %"32" = load i64, ptr addrspace(5) %"5", align 8 + %"33" = load float, ptr addrspace(5) %"6", align 4 + %"55" = inttoptr i64 %"32" to ptr + %"68" = getelementptr inbounds i8, ptr %"55", i64 12 + store float %"33", ptr %"68", align 4 ret void } diff --git a/ptx/src/test/spirv_run/extern_shared.ll b/ptx/src/test/spirv_run/extern_shared.ll index 34f1d33..eeb0d50 100644 --- a/ptx/src/test/spirv_run/extern_shared.ll +++ b/ptx/src/test/spirv_run/extern_shared.ll @@ -3,31 +3,29 @@ target triple = "amdgcn-amd-amdhsa" @shared_mem = external hidden addrspace(3) global [0 x i32] -define protected amdgpu_kernel void @extern_shared(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #0 { -"24": +define protected amdgpu_kernel void @extern_shared(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 { +"23": %"8" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"8", align 1 - %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) + %"9" = load i64, ptr addrspace(4) %"17", align 8 + store i64 %"9", ptr addrspace(5) %"5", align 8 %"10" = load i64, ptr addrspace(4) %"18", align 8 - store i64 %"10", ptr addrspace(5) %"5", align 8 - %"11" = load i64, ptr addrspace(4) %"19", align 8 - store i64 %"11", ptr addrspace(5) %"6", align 8 - %"13" = load i64, ptr addrspace(5) %"5", align 8 - %"20" = inttoptr i64 %"13" to ptr addrspace(1) - %"12" = load i64, ptr addrspace(1) %"20", align 8 - store i64 %"12", ptr addrspace(5) %"7", align 8 - %"14" = load i64, ptr addrspace(5) %"7", align 8 - store i64 %"14", ptr addrspace(3) @shared_mem, align 8 - %"15" = load i64, ptr addrspace(3) @shared_mem, align 8 - store i64 %"15", ptr addrspace(5) %"7", align 8 - %"16" = load i64, ptr addrspace(5) %"6", align 8 - %"17" = load i64, ptr addrspace(5) %"7", align 8 - %"23" = inttoptr i64 %"16" to ptr addrspace(1) - store i64 %"17", ptr addrspace(1) %"23", align 8 + store i64 %"10", ptr addrspace(5) %"6", align 8 + %"12" = load i64, ptr addrspace(5) %"5", align 8 + %"19" = inttoptr i64 %"12" to ptr addrspace(1) + %"11" = load i64, ptr addrspace(1) %"19", align 8 + store i64 %"11", ptr addrspace(5) %"7", align 8 + %"13" = load i64, ptr addrspace(5) %"7", align 8 + store i64 %"13", ptr addrspace(3) @shared_mem, align 8 + %"14" = load i64, ptr addrspace(3) @shared_mem, align 8 + store i64 %"14", ptr addrspace(5) %"7", align 8 + %"15" = load i64, ptr addrspace(5) %"6", align 8 + %"16" = load i64, ptr addrspace(5) %"7", align 8 + %"22" = inttoptr i64 %"15" to ptr addrspace(1) + store i64 %"16", ptr addrspace(1) %"22", align 8 ret void } diff --git a/ptx/src/test/spirv_run/extern_shared_call.ll b/ptx/src/test/spirv_run/extern_shared_call.ll index 241053f..cdd37be 100644 --- a/ptx/src/test/spirv_run/extern_shared_call.ll +++ b/ptx/src/test/spirv_run/extern_shared_call.ll @@ -3,49 +3,45 @@ target triple = "amdgcn-amd-amdhsa" @shared_mem = external hidden addrspace(3) global [0 x i32], align 4 -define private void @"2"(ptr addrspace(3) %"37") #0 { -"35": +define private void @"2"(ptr addrspace(3) %"35") #0 { +"33": %"10" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"10", align 1 - %"11" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"11", align 1 %"3" = alloca i64, align 8, addrspace(5) - %"14" = load i64, ptr addrspace(3) %"37", align 8 - store i64 %"14", ptr addrspace(5) %"3", align 8 - %"16" = load i64, ptr addrspace(5) %"3", align 8 - %"15" = add i64 %"16", 2 - store i64 %"15", ptr addrspace(5) %"3", align 8 - %"17" = load i64, ptr addrspace(5) %"3", align 8 - store i64 %"17", ptr addrspace(3) %"37", align 8 + %"12" = load i64, ptr addrspace(3) %"35", align 8 + store i64 %"12", ptr addrspace(5) %"3", align 8 + %"14" = load i64, ptr addrspace(5) %"3", align 8 + %"13" = add i64 %"14", 2 + store i64 %"13", ptr addrspace(5) %"3", align 8 + %"15" = load i64, ptr addrspace(5) %"3", align 8 + store i64 %"15", ptr addrspace(3) %"35", align 8 ret void } -define protected amdgpu_kernel void @extern_shared_call(ptr addrspace(4) byref(i64) %"27", ptr addrspace(4) byref(i64) %"28") #0 { -"36": - %"12" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"12", align 1 - %"13" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"13", align 1 +define protected amdgpu_kernel void @extern_shared_call(ptr addrspace(4) byref(i64) %"25", ptr addrspace(4) byref(i64) %"26") #0 { +"34": + %"11" = alloca i1, align 1, addrspace(5) + store i1 false, ptr addrspace(5) %"11", align 1 %"7" = alloca i64, align 8, addrspace(5) %"8" = alloca i64, align 8, addrspace(5) %"9" = alloca i64, align 8, addrspace(5) - %"18" = load i64, ptr addrspace(4) %"27", align 8 - store i64 %"18", ptr addrspace(5) %"7", align 8 - %"19" = load i64, ptr addrspace(4) %"28", align 8 - store i64 %"19", ptr addrspace(5) %"8", align 8 - %"21" = load i64, ptr addrspace(5) %"7", align 8 - %"31" = inttoptr i64 %"21" to ptr addrspace(1) - %"20" = load i64, ptr addrspace(1) %"31", align 8 - store i64 %"20", ptr addrspace(5) %"9", align 8 - %"22" = load i64, ptr addrspace(5) %"9", align 8 - store i64 %"22", ptr addrspace(3) @shared_mem, align 8 + %"16" = load i64, ptr addrspace(4) %"25", align 8 + store i64 %"16", ptr addrspace(5) %"7", align 8 + %"17" = load i64, ptr addrspace(4) %"26", align 8 + store i64 %"17", ptr addrspace(5) %"8", align 8 + %"19" = load i64, ptr addrspace(5) %"7", align 8 + %"29" = inttoptr i64 %"19" to ptr addrspace(1) + %"18" = load i64, ptr addrspace(1) %"29", align 8 + store i64 %"18", ptr addrspace(5) %"9", align 8 + %"20" = load i64, ptr addrspace(5) %"9", align 8 + store i64 %"20", ptr addrspace(3) @shared_mem, align 8 call void @"2"(ptr addrspace(3) @shared_mem) - %"23" = load i64, ptr addrspace(3) @shared_mem, align 8 - store i64 %"23", ptr addrspace(5) %"9", align 8 - %"24" = load i64, ptr addrspace(5) %"8", align 8 - %"25" = load i64, ptr addrspace(5) %"9", align 8 - %"34" = inttoptr i64 %"24" to ptr addrspace(1) - store i64 %"25", ptr addrspace(1) %"34", align 8 + %"21" = load i64, ptr addrspace(3) @shared_mem, align 8 + store i64 %"21", ptr addrspace(5) %"9", align 8 + %"22" = load i64, ptr addrspace(5) %"8", align 8 + %"23" = load i64, ptr addrspace(5) %"9", align 8 + %"32" = inttoptr i64 %"22" to ptr addrspace(1) + store i64 %"23", ptr addrspace(1) %"32", align 8 ret void } diff --git a/ptx/src/test/spirv_run/fma.ll b/ptx/src/test/spirv_run/fma.ll index d518432..1dff2b8 100644 --- a/ptx/src/test/spirv_run/fma.ll +++ b/ptx/src/test/spirv_run/fma.ll @@ -1,44 +1,42 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @fma(ptr addrspace(4) byref(i64) %"29", ptr addrspace(4) byref(i64) %"30") #0 { -"35": +define protected amdgpu_kernel void @fma(ptr addrspace(4) byref(i64) %"28", ptr addrspace(4) byref(i64) %"29") #0 { +"34": %"9" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"9", align 1 - %"10" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"10", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca float, align 4, addrspace(5) %"7" = alloca float, align 4, addrspace(5) %"8" = alloca float, align 4, addrspace(5) + %"10" = load i64, ptr addrspace(4) %"28", align 8 + store i64 %"10", ptr addrspace(5) %"4", align 8 %"11" = load i64, ptr addrspace(4) %"29", align 8 - store i64 %"11", ptr addrspace(5) %"4", align 8 - %"12" = load i64, ptr addrspace(4) %"30", align 8 - store i64 %"12", ptr addrspace(5) %"5", align 8 - %"14" = load i64, ptr addrspace(5) %"4", align 8 - %"31" = inttoptr i64 %"14" to ptr - %"13" = load float, ptr %"31", align 4 - store float %"13", ptr addrspace(5) %"6", align 4 - %"16" = load i64, ptr addrspace(5) %"4", align 8 - %"32" = inttoptr i64 %"16" to ptr - %"37" = getelementptr inbounds i8, ptr %"32", i64 4 - %"15" = load float, ptr %"37", align 4 - store float %"15", ptr addrspace(5) %"7", align 4 - %"18" = load i64, ptr addrspace(5) %"4", align 8 - %"33" = inttoptr i64 %"18" to ptr - %"39" = getelementptr inbounds i8, ptr %"33", i64 8 - %"17" = load float, ptr %"39", align 4 - store float %"17", ptr addrspace(5) %"8", align 4 - %"20" = load float, ptr addrspace(5) %"6", align 4 - %"21" = load float, ptr addrspace(5) %"7", align 4 - %"22" = load float, ptr addrspace(5) %"8", align 4 - %"19" = call float @llvm.fma.f32(float %"20", float %"21", float %"22") - store float %"19", ptr addrspace(5) %"6", align 4 - %"23" = load i64, ptr addrspace(5) %"5", align 8 - %"24" = load float, ptr addrspace(5) %"6", align 4 - %"34" = inttoptr i64 %"23" to ptr - store float %"24", ptr %"34", align 4 + store i64 %"11", ptr addrspace(5) %"5", align 8 + %"13" = load i64, ptr addrspace(5) %"4", align 8 + %"30" = inttoptr i64 %"13" to ptr + %"12" = load float, ptr %"30", align 4 + store float %"12", ptr addrspace(5) %"6", align 4 + %"15" = load i64, ptr addrspace(5) %"4", align 8 + %"31" = inttoptr i64 %"15" to ptr + %"36" = getelementptr inbounds i8, ptr %"31", i64 4 + %"14" = load float, ptr %"36", align 4 + store float %"14", ptr addrspace(5) %"7", align 4 + %"17" = load i64, ptr addrspace(5) %"4", align 8 + %"32" = inttoptr i64 %"17" to ptr + %"38" = getelementptr inbounds i8, ptr %"32", i64 8 + %"16" = load float, ptr %"38", align 4 + store float %"16", ptr addrspace(5) %"8", align 4 + %"19" = load float, ptr addrspace(5) %"6", align 4 + %"20" = load float, ptr addrspace(5) %"7", align 4 + %"21" = load float, ptr addrspace(5) %"8", align 4 + %"18" = call float @llvm.fma.f32(float %"19", float %"20", float %"21") + store float %"18", ptr addrspace(5) %"6", align 4 + %"22" = load i64, ptr addrspace(5) %"5", align 8 + %"23" = load float, ptr addrspace(5) %"6", align 4 + %"33" = inttoptr i64 %"22" to ptr + store float %"23", ptr %"33", align 4 ret void } diff --git a/ptx/src/test/spirv_run/func_ptr.ll b/ptx/src/test/spirv_run/func_ptr.ll index b7c0603..1160a76 100644 --- a/ptx/src/test/spirv_run/func_ptr.ll +++ b/ptx/src/test/spirv_run/func_ptr.ll @@ -1,56 +1,52 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define private float @"1"(float %"17", float %"18") #0 { -"40": +define private float @"1"(float %"15", float %"16") #0 { +"38": %"3" = alloca float, align 4, addrspace(5) %"4" = alloca float, align 4, addrspace(5) %"2" = alloca float, align 4, addrspace(5) %"13" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"13", align 1 - %"14" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"14", align 1 - store float %"17", ptr addrspace(5) %"3", align 4 - store float %"18", ptr addrspace(5) %"4", align 4 - %"20" = load float, ptr addrspace(5) %"3", align 4 - %"21" = load float, ptr addrspace(5) %"4", align 4 - %"19" = fadd float %"20", %"21" - store float %"19", ptr addrspace(5) %"2", align 4 - %"22" = load float, ptr addrspace(5) %"2", align 4 - ret float %"22" + store float %"15", ptr addrspace(5) %"3", align 4 + store float %"16", ptr addrspace(5) %"4", align 4 + %"18" = load float, ptr addrspace(5) %"3", align 4 + %"19" = load float, ptr addrspace(5) %"4", align 4 + %"17" = fadd float %"18", %"19" + store float %"17", ptr addrspace(5) %"2", align 4 + %"20" = load float, ptr addrspace(5) %"2", align 4 + ret float %"20" } -define protected amdgpu_kernel void @func_ptr(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #0 { -"41": - %"15" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"15", align 1 - %"16" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"16", align 1 +define protected amdgpu_kernel void @func_ptr(ptr addrspace(4) byref(i64) %"34", ptr addrspace(4) byref(i64) %"35") #0 { +"39": + %"14" = alloca i1, align 1, addrspace(5) + store i1 false, ptr addrspace(5) %"14", align 1 %"8" = alloca i64, align 8, addrspace(5) %"9" = alloca i64, align 8, addrspace(5) %"10" = alloca i64, align 8, addrspace(5) %"11" = alloca i64, align 8, addrspace(5) %"12" = alloca i64, align 8, addrspace(5) - %"23" = load i64, ptr addrspace(4) %"36", align 8 - store i64 %"23", ptr addrspace(5) %"8", align 8 - %"24" = load i64, ptr addrspace(4) %"37", align 8 - store i64 %"24", ptr addrspace(5) %"9", align 8 - %"26" = load i64, ptr addrspace(5) %"8", align 8 - %"38" = inttoptr i64 %"26" to ptr - %"25" = load i64, ptr %"38", align 8 - store i64 %"25", ptr addrspace(5) %"10", align 8 - %"28" = load i64, ptr addrspace(5) %"10", align 8 - %"27" = add i64 %"28", 1 - store i64 %"27", ptr addrspace(5) %"11", align 8 + %"21" = load i64, ptr addrspace(4) %"34", align 8 + store i64 %"21", ptr addrspace(5) %"8", align 8 + %"22" = load i64, ptr addrspace(4) %"35", align 8 + store i64 %"22", ptr addrspace(5) %"9", align 8 + %"24" = load i64, ptr addrspace(5) %"8", align 8 + %"36" = inttoptr i64 %"24" to ptr + %"23" = load i64, ptr %"36", align 8 + store i64 %"23", ptr addrspace(5) %"10", align 8 + %"26" = load i64, ptr addrspace(5) %"10", align 8 + %"25" = add i64 %"26", 1 + store i64 %"25", ptr addrspace(5) %"11", align 8 store i64 ptrtoint (ptr @"1" to i64), ptr addrspace(5) %"12", align 8 - %"31" = load i64, ptr addrspace(5) %"11", align 8 - %"32" = load i64, ptr addrspace(5) %"12", align 8 - %"30" = add i64 %"31", %"32" - store i64 %"30", ptr addrspace(5) %"11", align 8 - %"33" = load i64, ptr addrspace(5) %"9", align 8 - %"34" = load i64, ptr addrspace(5) %"11", align 8 - %"39" = inttoptr i64 %"33" to ptr - store i64 %"34", ptr %"39", align 8 + %"29" = load i64, ptr addrspace(5) %"11", align 8 + %"30" = load i64, ptr addrspace(5) %"12", align 8 + %"28" = add i64 %"29", %"30" + store i64 %"28", ptr addrspace(5) %"11", align 8 + %"31" = load i64, ptr addrspace(5) %"9", align 8 + %"32" = load i64, ptr addrspace(5) %"11", align 8 + %"37" = inttoptr i64 %"31" to ptr + store i64 %"32", ptr %"37", align 8 ret void } diff --git a/ptx/src/test/spirv_run/generic.ll b/ptx/src/test/spirv_run/generic.ll index d746a22..312a7cd 100644 --- a/ptx/src/test/spirv_run/generic.ll +++ b/ptx/src/test/spirv_run/generic.ll @@ -4,66 +4,64 @@ target triple = "amdgcn-amd-amdhsa" @foo = protected addrspace(1) externally_initialized global [4 x i32] [i32 2, i32 3, i32 5, i32 7] @bar = protected addrspace(1) externally_initialized global [4 x i64] [i64 ptrtoint (ptr addrspacecast (ptr addrspace(1) @foo to ptr) to i64), i64 add (i64 ptrtoint (ptr addrspacecast (ptr addrspace(1) @foo to ptr) to i64), i64 4), i64 add (i64 ptrtoint (ptr addrspacecast (ptr addrspace(1) @foo to ptr) to i64), i64 8), i64 add (i64 ptrtoint (ptr addrspacecast (ptr addrspace(1) @foo to ptr) to i64), i64 12)] -define protected amdgpu_kernel void @generic(ptr addrspace(4) byref(i64) %"47", ptr addrspace(4) byref(i64) %"48") #0 { -"58": +define protected amdgpu_kernel void @generic(ptr addrspace(4) byref(i64) %"46", ptr addrspace(4) byref(i64) %"47") #0 { +"57": %"10" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"10", align 1 - %"11" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"11", align 1 %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) %"8" = alloca i32, align 4, addrspace(5) %"9" = alloca i32, align 4, addrspace(5) - %"12" = load i64, ptr addrspace(4) %"48", align 8 - store i64 %"12", ptr addrspace(5) %"7", align 8 + %"11" = load i64, ptr addrspace(4) %"47", align 8 + store i64 %"11", ptr addrspace(5) %"7", align 8 %0 = alloca i32, align 4, addrspace(5) store i32 1, ptr addrspace(5) %0, align 4 - %"13" = load i32, ptr addrspace(5) %0, align 4 - store i32 %"13", ptr addrspace(5) %"8", align 4 - %"14" = load i64, ptr addrspace(1) @bar, align 8 - store i64 %"14", ptr addrspace(5) %"6", align 8 - %"16" = load i64, ptr addrspace(5) %"6", align 8 - %"50" = inttoptr i64 %"16" to ptr - %"15" = load i32, ptr %"50", align 4 - store i32 %"15", ptr addrspace(5) %"9", align 4 - %"18" = load i32, ptr addrspace(5) %"8", align 4 - %"19" = load i32, ptr addrspace(5) %"9", align 4 - %"17" = mul i32 %"18", %"19" - store i32 %"17", ptr addrspace(5) %"8", align 4 - %"20" = load i64, ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) @bar, i64 8), align 8 - store i64 %"20", ptr addrspace(5) %"6", align 8 - %"22" = load i64, ptr addrspace(5) %"6", align 8 - %"52" = inttoptr i64 %"22" to ptr - %"21" = load i32, ptr %"52", align 4 - store i32 %"21", ptr addrspace(5) %"9", align 4 - %"24" = load i32, ptr addrspace(5) %"8", align 4 - %"25" = load i32, ptr addrspace(5) %"9", align 4 - %"23" = mul i32 %"24", %"25" - store i32 %"23", ptr addrspace(5) %"8", align 4 - %"26" = load i64, ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) @bar, i64 16), align 8 - store i64 %"26", ptr addrspace(5) %"6", align 8 - %"28" = load i64, ptr addrspace(5) %"6", align 8 - %"54" = inttoptr i64 %"28" to ptr - %"27" = load i32, ptr %"54", align 4 - store i32 %"27", ptr addrspace(5) %"9", align 4 - %"30" = load i32, ptr addrspace(5) %"8", align 4 - %"31" = load i32, ptr addrspace(5) %"9", align 4 - %"29" = mul i32 %"30", %"31" - store i32 %"29", ptr addrspace(5) %"8", align 4 - %"32" = load i64, ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) @bar, i64 24), align 8 - store i64 %"32", ptr addrspace(5) %"6", align 8 - %"34" = load i64, ptr addrspace(5) %"6", align 8 - %"56" = inttoptr i64 %"34" to ptr - %"33" = load i32, ptr %"56", align 4 - store i32 %"33", ptr addrspace(5) %"9", align 4 - %"36" = load i32, ptr addrspace(5) %"8", align 4 - %"37" = load i32, ptr addrspace(5) %"9", align 4 - %"35" = mul i32 %"36", %"37" - store i32 %"35", ptr addrspace(5) %"8", align 4 - %"38" = load i64, ptr addrspace(5) %"7", align 8 - %"39" = load i32, ptr addrspace(5) %"8", align 4 - %"57" = inttoptr i64 %"38" to ptr - store i32 %"39", ptr %"57", align 4 + %"12" = load i32, ptr addrspace(5) %0, align 4 + store i32 %"12", ptr addrspace(5) %"8", align 4 + %"13" = load i64, ptr addrspace(1) @bar, align 8 + store i64 %"13", ptr addrspace(5) %"6", align 8 + %"15" = load i64, ptr addrspace(5) %"6", align 8 + %"49" = inttoptr i64 %"15" to ptr + %"14" = load i32, ptr %"49", align 4 + store i32 %"14", ptr addrspace(5) %"9", align 4 + %"17" = load i32, ptr addrspace(5) %"8", align 4 + %"18" = load i32, ptr addrspace(5) %"9", align 4 + %"16" = mul i32 %"17", %"18" + store i32 %"16", ptr addrspace(5) %"8", align 4 + %"19" = load i64, ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) @bar, i64 8), align 8 + store i64 %"19", ptr addrspace(5) %"6", align 8 + %"21" = load i64, ptr addrspace(5) %"6", align 8 + %"51" = inttoptr i64 %"21" to ptr + %"20" = load i32, ptr %"51", align 4 + store i32 %"20", ptr addrspace(5) %"9", align 4 + %"23" = load i32, ptr addrspace(5) %"8", align 4 + %"24" = load i32, ptr addrspace(5) %"9", align 4 + %"22" = mul i32 %"23", %"24" + store i32 %"22", ptr addrspace(5) %"8", align 4 + %"25" = load i64, ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) @bar, i64 16), align 8 + store i64 %"25", ptr addrspace(5) %"6", align 8 + %"27" = load i64, ptr addrspace(5) %"6", align 8 + %"53" = inttoptr i64 %"27" to ptr + %"26" = load i32, ptr %"53", align 4 + store i32 %"26", ptr addrspace(5) %"9", align 4 + %"29" = load i32, ptr addrspace(5) %"8", align 4 + %"30" = load i32, ptr addrspace(5) %"9", align 4 + %"28" = mul i32 %"29", %"30" + store i32 %"28", ptr addrspace(5) %"8", align 4 + %"31" = load i64, ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) @bar, i64 24), align 8 + store i64 %"31", ptr addrspace(5) %"6", align 8 + %"33" = load i64, ptr addrspace(5) %"6", align 8 + %"55" = inttoptr i64 %"33" to ptr + %"32" = load i32, ptr %"55", align 4 + store i32 %"32", ptr addrspace(5) %"9", align 4 + %"35" = load i32, ptr addrspace(5) %"8", align 4 + %"36" = load i32, ptr addrspace(5) %"9", align 4 + %"34" = mul i32 %"35", %"36" + store i32 %"34", ptr addrspace(5) %"8", align 4 + %"37" = load i64, ptr addrspace(5) %"7", align 8 + %"38" = load i32, ptr addrspace(5) %"8", align 4 + %"56" = inttoptr i64 %"37" to ptr + store i32 %"38", ptr %"56", align 4 ret void } diff --git a/ptx/src/test/spirv_run/global_array.ll b/ptx/src/test/spirv_run/global_array.ll index 3a8da01..e2ad2f2 100644 --- a/ptx/src/test/spirv_run/global_array.ll +++ b/ptx/src/test/spirv_run/global_array.ll @@ -4,29 +4,27 @@ target triple = "amdgcn-amd-amdhsa" @asdas = protected addrspace(1) externally_initialized global [4 x [2 x i32]] [[2 x i32] [i32 -1, i32 2], [2 x i32] [i32 3, i32 0], [2 x i32] zeroinitializer, [2 x i32] zeroinitializer] @foobar = protected addrspace(1) externally_initialized global [4 x [2 x i64]] [[2 x i64] [i64 -1, i64 2], [2 x i64] [i64 3, i64 0], [2 x i64] [i64 ptrtoint (ptr addrspace(1) @asdas to i64), i64 0], [2 x i64] zeroinitializer] -define protected amdgpu_kernel void @global_array(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 { -"22": +define protected amdgpu_kernel void @global_array(ptr addrspace(4) byref(i64) %"16", ptr addrspace(4) byref(i64) %"17") #0 { +"21": %"9" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"9", align 1 - %"10" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"10", align 1 %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) %"8" = alloca i32, align 4, addrspace(5) %0 = alloca i64, align 8, addrspace(5) store i64 ptrtoint (ptr addrspace(1) @foobar to i64), ptr addrspace(5) %0, align 8 - %"11" = load i64, ptr addrspace(5) %0, align 8 - store i64 %"11", ptr addrspace(5) %"6", align 8 - %"12" = load i64, ptr addrspace(4) %"18", align 8 - store i64 %"12", ptr addrspace(5) %"7", align 8 - %"14" = load i64, ptr addrspace(5) %"6", align 8 + %"10" = load i64, ptr addrspace(5) %0, align 8 + store i64 %"10", ptr addrspace(5) %"6", align 8 + %"11" = load i64, ptr addrspace(4) %"17", align 8 + store i64 %"11", ptr addrspace(5) %"7", align 8 + %"13" = load i64, ptr addrspace(5) %"6", align 8 + %"19" = inttoptr i64 %"13" to ptr addrspace(1) + %"12" = load i32, ptr addrspace(1) %"19", align 4 + store i32 %"12", ptr addrspace(5) %"8", align 4 + %"14" = load i64, ptr addrspace(5) %"7", align 8 + %"15" = load i32, ptr addrspace(5) %"8", align 4 %"20" = inttoptr i64 %"14" to ptr addrspace(1) - %"13" = load i32, ptr addrspace(1) %"20", align 4 - store i32 %"13", ptr addrspace(5) %"8", align 4 - %"15" = load i64, ptr addrspace(5) %"7", align 8 - %"16" = load i32, ptr addrspace(5) %"8", align 4 - %"21" = inttoptr i64 %"15" to ptr addrspace(1) - store i32 %"16", ptr addrspace(1) %"21", align 4 + store i32 %"15", ptr addrspace(1) %"20", align 4 ret void } diff --git a/ptx/src/test/spirv_run/lanemask_lt.ll b/ptx/src/test/spirv_run/lanemask_lt.ll index d36d4a2..efa1746 100644 --- a/ptx/src/test/spirv_run/lanemask_lt.ll +++ b/ptx/src/test/spirv_run/lanemask_lt.ll @@ -3,41 +3,39 @@ target triple = "amdgcn-amd-amdhsa" declare i32 @__zluda_ptx_impl__sreg_lanemask_lt() #0 -define protected amdgpu_kernel void @lanemask_lt(ptr addrspace(4) byref(i64) %"28", ptr addrspace(4) byref(i64) %"29") #1 { -"40": +define protected amdgpu_kernel void @lanemask_lt(ptr addrspace(4) byref(i64) %"27", ptr addrspace(4) byref(i64) %"28") #1 { +"39": %"10" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"10", align 1 - %"11" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"11", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) %"8" = alloca i32, align 4, addrspace(5) + %"14" = load i64, ptr addrspace(4) %"27", align 8 + store i64 %"14", ptr addrspace(5) %"4", align 8 %"15" = load i64, ptr addrspace(4) %"28", align 8 - store i64 %"15", ptr addrspace(5) %"4", align 8 - %"16" = load i64, ptr addrspace(4) %"29", align 8 - store i64 %"16", ptr addrspace(5) %"5", align 8 - %"18" = load i64, ptr addrspace(5) %"4", align 8 - %"31" = inttoptr i64 %"18" to ptr - %"30" = load i32, ptr %"31", align 4 - store i32 %"30", ptr addrspace(5) %"6", align 4 - %"20" = load i32, ptr addrspace(5) %"6", align 4 - %"32" = add i32 %"20", 1 - store i32 %"32", ptr addrspace(5) %"7", align 4 - %"12" = call i32 @__zluda_ptx_impl__sreg_lanemask_lt() + store i64 %"15", ptr addrspace(5) %"5", align 8 + %"17" = load i64, ptr addrspace(5) %"4", align 8 + %"30" = inttoptr i64 %"17" to ptr + %"29" = load i32, ptr %"30", align 4 + store i32 %"29", ptr addrspace(5) %"6", align 4 + %"19" = load i32, ptr addrspace(5) %"6", align 4 + %"31" = add i32 %"19", 1 + store i32 %"31", ptr addrspace(5) %"7", align 4 + %"11" = call i32 @__zluda_ptx_impl__sreg_lanemask_lt() %0 = alloca i32, align 4, addrspace(5) - store i32 %"12", ptr addrspace(5) %0, align 4 - %"34" = load i32, ptr addrspace(5) %0, align 4 - store i32 %"34", ptr addrspace(5) %"8", align 4 - %"23" = load i32, ptr addrspace(5) %"7", align 4 - %"24" = load i32, ptr addrspace(5) %"8", align 4 - %"35" = add i32 %"23", %"24" - store i32 %"35", ptr addrspace(5) %"7", align 4 - %"25" = load i64, ptr addrspace(5) %"5", align 8 - %"26" = load i32, ptr addrspace(5) %"7", align 4 - %"38" = inttoptr i64 %"25" to ptr - store i32 %"26", ptr %"38", align 4 + store i32 %"11", ptr addrspace(5) %0, align 4 + %"33" = load i32, ptr addrspace(5) %0, align 4 + store i32 %"33", ptr addrspace(5) %"8", align 4 + %"22" = load i32, ptr addrspace(5) %"7", align 4 + %"23" = load i32, ptr addrspace(5) %"8", align 4 + %"34" = add i32 %"22", %"23" + store i32 %"34", ptr addrspace(5) %"7", align 4 + %"24" = load i64, ptr addrspace(5) %"5", align 8 + %"25" = load i32, ptr addrspace(5) %"7", align 4 + %"37" = inttoptr i64 %"24" to ptr + store i32 %"25", ptr %"37", align 4 ret void } diff --git a/ptx/src/test/spirv_run/ld_st.ll b/ptx/src/test/spirv_run/ld_st.ll index c8d6eb1..0fe06f2 100644 --- a/ptx/src/test/spirv_run/ld_st.ll +++ b/ptx/src/test/spirv_run/ld_st.ll @@ -1,27 +1,25 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @ld_st(ptr addrspace(4) byref(i64) %"15", ptr addrspace(4) byref(i64) %"16") #0 { -"19": +define protected amdgpu_kernel void @ld_st(ptr addrspace(4) byref(i64) %"14", ptr addrspace(4) byref(i64) %"15") #0 { +"18": %"7" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"7", align 1 - %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) + %"8" = load i64, ptr addrspace(4) %"14", align 8 + store i64 %"8", ptr addrspace(5) %"4", align 8 %"9" = load i64, ptr addrspace(4) %"15", align 8 - store i64 %"9", ptr addrspace(5) %"4", align 8 - %"10" = load i64, ptr addrspace(4) %"16", align 8 - store i64 %"10", ptr addrspace(5) %"5", align 8 - %"12" = load i64, ptr addrspace(5) %"4", align 8 + store i64 %"9", ptr addrspace(5) %"5", align 8 + %"11" = load i64, ptr addrspace(5) %"4", align 8 + %"16" = inttoptr i64 %"11" to ptr + %"10" = load i64, ptr %"16", align 8 + store i64 %"10", ptr addrspace(5) %"6", align 8 + %"12" = load i64, ptr addrspace(5) %"5", align 8 + %"13" = load i64, ptr addrspace(5) %"6", align 8 %"17" = inttoptr i64 %"12" to ptr - %"11" = load i64, ptr %"17", align 8 - store i64 %"11", ptr addrspace(5) %"6", align 8 - %"13" = load i64, ptr addrspace(5) %"5", align 8 - %"14" = load i64, ptr addrspace(5) %"6", align 8 - %"18" = inttoptr i64 %"13" to ptr - store i64 %"14", ptr %"18", align 8 + store i64 %"13", ptr %"17", align 8 ret void } diff --git a/ptx/src/test/spirv_run/ld_st_implicit.ll b/ptx/src/test/spirv_run/ld_st_implicit.ll index da47ad8..3ec1474 100644 --- a/ptx/src/test/spirv_run/ld_st_implicit.ll +++ b/ptx/src/test/spirv_run/ld_st_implicit.ll @@ -1,35 +1,33 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @ld_st_implicit(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 { -"23": +define protected amdgpu_kernel void @ld_st_implicit(ptr addrspace(4) byref(i64) %"16", ptr addrspace(4) byref(i64) %"17") #0 { +"22": %"7" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"7", align 1 - %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) + %"8" = load i64, ptr addrspace(4) %"16", align 8 + store i64 %"8", ptr addrspace(5) %"4", align 8 %"9" = load i64, ptr addrspace(4) %"17", align 8 - store i64 %"9", ptr addrspace(5) %"4", align 8 - %"10" = load i64, ptr addrspace(4) %"18", align 8 - store i64 %"10", ptr addrspace(5) %"5", align 8 + store i64 %"9", ptr addrspace(5) %"5", align 8 %0 = alloca i64, align 8, addrspace(5) store i64 81985529216486895, ptr addrspace(5) %0, align 8 - %"11" = load i64, ptr addrspace(5) %0, align 8 + %"10" = load i64, ptr addrspace(5) %0, align 8 + store i64 %"10", ptr addrspace(5) %"6", align 8 + %"12" = load i64, ptr addrspace(5) %"4", align 8 + %"19" = inttoptr i64 %"12" to ptr addrspace(1) + %"18" = load float, ptr addrspace(1) %"19", align 4 + %"23" = bitcast float %"18" to i32 + %"11" = zext i32 %"23" to i64 store i64 %"11", ptr addrspace(5) %"6", align 8 - %"13" = load i64, ptr addrspace(5) %"4", align 8 + %"13" = load i64, ptr addrspace(5) %"5", align 8 + %"14" = load i64, ptr addrspace(5) %"6", align 8 %"20" = inttoptr i64 %"13" to ptr addrspace(1) - %"19" = load float, ptr addrspace(1) %"20", align 4 - %"24" = bitcast float %"19" to i32 - %"12" = zext i32 %"24" to i64 - store i64 %"12", ptr addrspace(5) %"6", align 8 - %"14" = load i64, ptr addrspace(5) %"5", align 8 - %"15" = load i64, ptr addrspace(5) %"6", align 8 - %"21" = inttoptr i64 %"14" to ptr addrspace(1) - %"26" = trunc i64 %"15" to i32 - %"22" = bitcast i32 %"26" to float - store float %"22", ptr addrspace(1) %"21", align 4 + %"25" = trunc i64 %"14" to i32 + %"21" = bitcast i32 %"25" to float + store float %"21", ptr addrspace(1) %"20", align 4 ret void } diff --git a/ptx/src/test/spirv_run/ld_st_offset.ll b/ptx/src/test/spirv_run/ld_st_offset.ll index 1b020cb..ee8bde6 100644 --- a/ptx/src/test/spirv_run/ld_st_offset.ll +++ b/ptx/src/test/spirv_run/ld_st_offset.ll @@ -1,38 +1,36 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @ld_st_offset(ptr addrspace(4) byref(i64) %"24", ptr addrspace(4) byref(i64) %"25") #0 { -"30": +define protected amdgpu_kernel void @ld_st_offset(ptr addrspace(4) byref(i64) %"23", ptr addrspace(4) byref(i64) %"24") #0 { +"29": %"8" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"8", align 1 - %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) + %"9" = load i64, ptr addrspace(4) %"23", align 8 + store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"24", align 8 - store i64 %"10", ptr addrspace(5) %"4", align 8 - %"11" = load i64, ptr addrspace(4) %"25", align 8 - store i64 %"11", ptr addrspace(5) %"5", align 8 - %"13" = load i64, ptr addrspace(5) %"4", align 8 - %"26" = inttoptr i64 %"13" to ptr - %"12" = load i32, ptr %"26", align 4 - store i32 %"12", ptr addrspace(5) %"6", align 4 - %"15" = load i64, ptr addrspace(5) %"4", align 8 + store i64 %"10", ptr addrspace(5) %"5", align 8 + %"12" = load i64, ptr addrspace(5) %"4", align 8 + %"25" = inttoptr i64 %"12" to ptr + %"11" = load i32, ptr %"25", align 4 + store i32 %"11", ptr addrspace(5) %"6", align 4 + %"14" = load i64, ptr addrspace(5) %"4", align 8 + %"26" = inttoptr i64 %"14" to ptr + %"31" = getelementptr inbounds i8, ptr %"26", i64 4 + %"13" = load i32, ptr %"31", align 4 + store i32 %"13", ptr addrspace(5) %"7", align 4 + %"15" = load i64, ptr addrspace(5) %"5", align 8 + %"16" = load i32, ptr addrspace(5) %"7", align 4 %"27" = inttoptr i64 %"15" to ptr - %"32" = getelementptr inbounds i8, ptr %"27", i64 4 - %"14" = load i32, ptr %"32", align 4 - store i32 %"14", ptr addrspace(5) %"7", align 4 - %"16" = load i64, ptr addrspace(5) %"5", align 8 - %"17" = load i32, ptr addrspace(5) %"7", align 4 - %"28" = inttoptr i64 %"16" to ptr - store i32 %"17", ptr %"28", align 4 - %"18" = load i64, ptr addrspace(5) %"5", align 8 - %"19" = load i32, ptr addrspace(5) %"6", align 4 - %"29" = inttoptr i64 %"18" to ptr - %"34" = getelementptr inbounds i8, ptr %"29", i64 4 - store i32 %"19", ptr %"34", align 4 + store i32 %"16", ptr %"27", align 4 + %"17" = load i64, ptr addrspace(5) %"5", align 8 + %"18" = load i32, ptr addrspace(5) %"6", align 4 + %"28" = inttoptr i64 %"17" to ptr + %"33" = getelementptr inbounds i8, ptr %"28", i64 4 + store i32 %"18", ptr %"33", align 4 ret void } diff --git a/ptx/src/test/spirv_run/lg2.ll b/ptx/src/test/spirv_run/lg2.ll index 5e29fe2..7dd63d6 100644 --- a/ptx/src/test/spirv_run/lg2.ll +++ b/ptx/src/test/spirv_run/lg2.ll @@ -1,30 +1,28 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @lg2(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 { -"21": +define protected amdgpu_kernel void @lg2(ptr addrspace(4) byref(i64) %"16", ptr addrspace(4) byref(i64) %"17") #0 { +"20": %"7" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"7", align 1 - %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca float, align 4, addrspace(5) + %"8" = load i64, ptr addrspace(4) %"16", align 8 + store i64 %"8", ptr addrspace(5) %"4", align 8 %"9" = load i64, ptr addrspace(4) %"17", align 8 - store i64 %"9", ptr addrspace(5) %"4", align 8 - %"10" = load i64, ptr addrspace(4) %"18", align 8 - store i64 %"10", ptr addrspace(5) %"5", align 8 - %"12" = load i64, ptr addrspace(5) %"4", align 8 - %"19" = inttoptr i64 %"12" to ptr - %"11" = load float, ptr %"19", align 4 - store float %"11", ptr addrspace(5) %"6", align 4 - %"14" = load float, ptr addrspace(5) %"6", align 4 - %"13" = call afn float @llvm.log2.f32(float %"14") - store float %"13", ptr addrspace(5) %"6", align 4 - %"15" = load i64, ptr addrspace(5) %"5", align 8 - %"16" = load float, ptr addrspace(5) %"6", align 4 - %"20" = inttoptr i64 %"15" to ptr - store float %"16", ptr %"20", align 4 + store i64 %"9", ptr addrspace(5) %"5", align 8 + %"11" = load i64, ptr addrspace(5) %"4", align 8 + %"18" = inttoptr i64 %"11" to ptr + %"10" = load float, ptr %"18", align 4 + store float %"10", ptr addrspace(5) %"6", align 4 + %"13" = load float, ptr addrspace(5) %"6", align 4 + %"12" = call afn float @llvm.log2.f32(float %"13") + store float %"12", ptr addrspace(5) %"6", align 4 + %"14" = load i64, ptr addrspace(5) %"5", align 8 + %"15" = load float, ptr addrspace(5) %"6", align 4 + %"19" = inttoptr i64 %"14" to ptr + store float %"15", ptr %"19", align 4 ret void } diff --git a/ptx/src/test/spirv_run/local_align.ll b/ptx/src/test/spirv_run/local_align.ll index 035d1f7..13fbe4b 100644 --- a/ptx/src/test/spirv_run/local_align.ll +++ b/ptx/src/test/spirv_run/local_align.ll @@ -1,28 +1,26 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @local_align(ptr addrspace(4) byref(i64) %"16", ptr addrspace(4) byref(i64) %"17") #0 { -"20": +define protected amdgpu_kernel void @local_align(ptr addrspace(4) byref(i64) %"15", ptr addrspace(4) byref(i64) %"16") #0 { +"19": %"8" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"8", align 1 - %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca [8 x i8], align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) + %"9" = load i64, ptr addrspace(4) %"15", align 8 + store i64 %"9", ptr addrspace(5) %"5", align 8 %"10" = load i64, ptr addrspace(4) %"16", align 8 - store i64 %"10", ptr addrspace(5) %"5", align 8 - %"11" = load i64, ptr addrspace(4) %"17", align 8 - store i64 %"11", ptr addrspace(5) %"6", align 8 - %"13" = load i64, ptr addrspace(5) %"5", align 8 + store i64 %"10", ptr addrspace(5) %"6", align 8 + %"12" = load i64, ptr addrspace(5) %"5", align 8 + %"17" = inttoptr i64 %"12" to ptr + %"11" = load i64, ptr %"17", align 8 + store i64 %"11", ptr addrspace(5) %"7", align 8 + %"13" = load i64, ptr addrspace(5) %"6", align 8 + %"14" = load i64, ptr addrspace(5) %"7", align 8 %"18" = inttoptr i64 %"13" to ptr - %"12" = load i64, ptr %"18", align 8 - store i64 %"12", ptr addrspace(5) %"7", align 8 - %"14" = load i64, ptr addrspace(5) %"6", align 8 - %"15" = load i64, ptr addrspace(5) %"7", align 8 - %"19" = inttoptr i64 %"14" to ptr - store i64 %"15", ptr %"19", align 8 + store i64 %"14", ptr %"18", align 8 ret void } diff --git a/ptx/src/test/spirv_run/mad_hi_cc.ll b/ptx/src/test/spirv_run/mad_hi_cc.ll index a5b1595..6c86dbc 100644 --- a/ptx/src/test/spirv_run/mad_hi_cc.ll +++ b/ptx/src/test/spirv_run/mad_hi_cc.ll @@ -1,12 +1,10 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @mad_hi_cc(ptr addrspace(4) byref(i64) %"61", ptr addrspace(4) byref(i64) %"62") #0 { -"78": +define protected amdgpu_kernel void @mad_hi_cc(ptr addrspace(4) byref(i64) %"60", ptr addrspace(4) byref(i64) %"61") #0 { +"77": %"14" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"14", align 1 - %"15" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"15", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) @@ -17,69 +15,69 @@ define protected amdgpu_kernel void @mad_hi_cc(ptr addrspace(4) byref(i64) %"61" %"11" = alloca i32, align 4, addrspace(5) %"12" = alloca i32, align 4, addrspace(5) %"13" = alloca i32, align 4, addrspace(5) + %"15" = load i64, ptr addrspace(4) %"60", align 8 + store i64 %"15", ptr addrspace(5) %"4", align 8 %"16" = load i64, ptr addrspace(4) %"61", align 8 - store i64 %"16", ptr addrspace(5) %"4", align 8 - %"17" = load i64, ptr addrspace(4) %"62", align 8 - store i64 %"17", ptr addrspace(5) %"5", align 8 - %"19" = load i64, ptr addrspace(5) %"4", align 8 - %"64" = inttoptr i64 %"19" to ptr - %"63" = load i32, ptr %"64", align 4 - store i32 %"63", ptr addrspace(5) %"8", align 4 - %"21" = load i64, ptr addrspace(5) %"4", align 8 - %"65" = inttoptr i64 %"21" to ptr - %"80" = getelementptr inbounds i8, ptr %"65", i64 4 - %"66" = load i32, ptr %"80", align 4 - store i32 %"66", ptr addrspace(5) %"9", align 4 - %"23" = load i64, ptr addrspace(5) %"4", align 8 - %"67" = inttoptr i64 %"23" to ptr - %"82" = getelementptr inbounds i8, ptr %"67", i64 8 - %"22" = load i32, ptr %"82", align 4 - store i32 %"22", ptr addrspace(5) %"10", align 4 - %"26" = load i32, ptr addrspace(5) %"8", align 4 - %"27" = load i32, ptr addrspace(5) %"9", align 4 - %"28" = load i32, ptr addrspace(5) %"10", align 4 - %0 = sext i32 %"26" to i64 - %1 = sext i32 %"27" to i64 + store i64 %"16", ptr addrspace(5) %"5", align 8 + %"18" = load i64, ptr addrspace(5) %"4", align 8 + %"63" = inttoptr i64 %"18" to ptr + %"62" = load i32, ptr %"63", align 4 + store i32 %"62", ptr addrspace(5) %"8", align 4 + %"20" = load i64, ptr addrspace(5) %"4", align 8 + %"64" = inttoptr i64 %"20" to ptr + %"79" = getelementptr inbounds i8, ptr %"64", i64 4 + %"65" = load i32, ptr %"79", align 4 + store i32 %"65", ptr addrspace(5) %"9", align 4 + %"22" = load i64, ptr addrspace(5) %"4", align 8 + %"66" = inttoptr i64 %"22" to ptr + %"81" = getelementptr inbounds i8, ptr %"66", i64 8 + %"21" = load i32, ptr %"81", align 4 + store i32 %"21", ptr addrspace(5) %"10", align 4 + %"25" = load i32, ptr addrspace(5) %"8", align 4 + %"26" = load i32, ptr addrspace(5) %"9", align 4 + %"27" = load i32, ptr addrspace(5) %"10", align 4 + %0 = sext i32 %"25" to i64 + %1 = sext i32 %"26" to i64 %2 = mul nsw i64 %0, %1 %3 = lshr i64 %2, 32 %4 = trunc i64 %3 to i32 - %5 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %4, i32 %"28") - %"24" = extractvalue { i32, i1 } %5, 0 - %"25" = extractvalue { i32, i1 } %5, 1 - store i32 %"24", ptr addrspace(5) %"7", align 4 - store i1 %"25", ptr addrspace(5) %"14", align 1 + %5 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %4, i32 %"27") + %"23" = extractvalue { i32, i1 } %5, 0 + %"24" = extractvalue { i32, i1 } %5, 1 + store i32 %"23", ptr addrspace(5) %"7", align 4 + store i1 %"24", ptr addrspace(5) %"14", align 1 %6 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 1, i32 -2) - %"29" = extractvalue { i32, i1 } %6, 0 - %"30" = extractvalue { i32, i1 } %6, 1 - store i32 %"29", ptr addrspace(5) %"6", align 4 - store i1 %"30", ptr addrspace(5) %"14", align 1 - %"32" = load i1, ptr addrspace(5) %"14", align 1 - %7 = zext i1 %"32" to i32 - %"71" = add i32 0, %7 - store i32 %"71", ptr addrspace(5) %"12", align 4 + %"28" = extractvalue { i32, i1 } %6, 0 + %"29" = extractvalue { i32, i1 } %6, 1 + store i32 %"28", ptr addrspace(5) %"6", align 4 + store i1 %"29", ptr addrspace(5) %"14", align 1 + %"31" = load i1, ptr addrspace(5) %"14", align 1 + %7 = zext i1 %"31" to i32 + %"70" = add i32 0, %7 + store i32 %"70", ptr addrspace(5) %"12", align 4 %8 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 1, i32 -1) - %"33" = extractvalue { i32, i1 } %8, 0 - %"34" = extractvalue { i32, i1 } %8, 1 - store i32 %"33", ptr addrspace(5) %"6", align 4 - store i1 %"34", ptr addrspace(5) %"14", align 1 - %"36" = load i1, ptr addrspace(5) %"14", align 1 - %9 = zext i1 %"36" to i32 - %"72" = add i32 0, %9 - store i32 %"72", ptr addrspace(5) %"13", align 4 - %"37" = load i64, ptr addrspace(5) %"5", align 8 - %"38" = load i32, ptr addrspace(5) %"7", align 4 - %"73" = inttoptr i64 %"37" to ptr - store i32 %"38", ptr %"73", align 4 - %"39" = load i64, ptr addrspace(5) %"5", align 8 - %"40" = load i32, ptr addrspace(5) %"12", align 4 - %"74" = inttoptr i64 %"39" to ptr - %"84" = getelementptr inbounds i8, ptr %"74", i64 4 - store i32 %"40", ptr %"84", align 4 - %"41" = load i64, ptr addrspace(5) %"5", align 8 - %"42" = load i32, ptr addrspace(5) %"13", align 4 - %"76" = inttoptr i64 %"41" to ptr - %"86" = getelementptr inbounds i8, ptr %"76", i64 8 - store i32 %"42", ptr %"86", align 4 + %"32" = extractvalue { i32, i1 } %8, 0 + %"33" = extractvalue { i32, i1 } %8, 1 + store i32 %"32", ptr addrspace(5) %"6", align 4 + store i1 %"33", ptr addrspace(5) %"14", align 1 + %"35" = load i1, ptr addrspace(5) %"14", align 1 + %9 = zext i1 %"35" to i32 + %"71" = add i32 0, %9 + store i32 %"71", ptr addrspace(5) %"13", align 4 + %"36" = load i64, ptr addrspace(5) %"5", align 8 + %"37" = load i32, ptr addrspace(5) %"7", align 4 + %"72" = inttoptr i64 %"36" to ptr + store i32 %"37", ptr %"72", align 4 + %"38" = load i64, ptr addrspace(5) %"5", align 8 + %"39" = load i32, ptr addrspace(5) %"12", align 4 + %"73" = inttoptr i64 %"38" to ptr + %"83" = getelementptr inbounds i8, ptr %"73", i64 4 + store i32 %"39", ptr %"83", align 4 + %"40" = load i64, ptr addrspace(5) %"5", align 8 + %"41" = load i32, ptr addrspace(5) %"13", align 4 + %"75" = inttoptr i64 %"40" to ptr + %"85" = getelementptr inbounds i8, ptr %"75", i64 8 + store i32 %"41", ptr %"85", align 4 ret void } diff --git a/ptx/src/test/spirv_run/mad_s32.ll b/ptx/src/test/spirv_run/mad_s32.ll index 75a204a..5ab86ad 100644 --- a/ptx/src/test/spirv_run/mad_s32.ll +++ b/ptx/src/test/spirv_run/mad_s32.ll @@ -1,12 +1,10 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @mad_s32(ptr addrspace(4) byref(i64) %"53", ptr addrspace(4) byref(i64) %"54") #0 { -"76": +define protected amdgpu_kernel void @mad_s32(ptr addrspace(4) byref(i64) %"52", ptr addrspace(4) byref(i64) %"53") #0 { +"75": %"13" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"13", align 1 - %"14" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"14", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) @@ -16,67 +14,67 @@ define protected amdgpu_kernel void @mad_s32(ptr addrspace(4) byref(i64) %"53", %"10" = alloca i32, align 4, addrspace(5) %"11" = alloca i32, align 4, addrspace(5) %"12" = alloca i64, align 8, addrspace(5) + %"14" = load i64, ptr addrspace(4) %"52", align 8 + store i64 %"14", ptr addrspace(5) %"4", align 8 %"15" = load i64, ptr addrspace(4) %"53", align 8 - store i64 %"15", ptr addrspace(5) %"4", align 8 - %"16" = load i64, ptr addrspace(4) %"54", align 8 - store i64 %"16", ptr addrspace(5) %"5", align 8 - %"18" = load i64, ptr addrspace(5) %"4", align 8 - %"56" = inttoptr i64 %"18" to ptr - %"55" = load i32, ptr %"56", align 4 - store i32 %"55", ptr addrspace(5) %"9", align 4 - %"20" = load i64, ptr addrspace(5) %"4", align 8 - %"57" = inttoptr i64 %"20" to ptr - %"78" = getelementptr inbounds i8, ptr %"57", i64 4 - %"58" = load i32, ptr %"78", align 4 - store i32 %"58", ptr addrspace(5) %"10", align 4 - %"22" = load i64, ptr addrspace(5) %"4", align 8 - %"59" = inttoptr i64 %"22" to ptr - %"80" = getelementptr inbounds i8, ptr %"59", i64 8 - %"21" = load i64, ptr %"80", align 8 - store i64 %"21", ptr addrspace(5) %"12", align 8 - %"24" = load i64, ptr addrspace(5) %"4", align 8 - %"60" = inttoptr i64 %"24" to ptr - %"82" = getelementptr inbounds i8, ptr %"60", i64 16 - %"61" = load i32, ptr %"82", align 4 - store i32 %"61", ptr addrspace(5) %"11", align 4 - %"26" = load i32, ptr addrspace(5) %"9", align 4 - %"27" = load i32, ptr addrspace(5) %"10", align 4 - %"28" = load i32, ptr addrspace(5) %"11", align 4 - %0 = mul i32 %"26", %"27" - %"25" = add i32 %0, %"28" - store i32 %"25", ptr addrspace(5) %"6", align 4 - %"30" = load i32, ptr addrspace(5) %"9", align 4 - %"31" = load i32, ptr addrspace(5) %"10", align 4 - %"32" = load i32, ptr addrspace(5) %"11", align 4 - %1 = sext i32 %"30" to i64 - %2 = sext i32 %"31" to i64 + store i64 %"15", ptr addrspace(5) %"5", align 8 + %"17" = load i64, ptr addrspace(5) %"4", align 8 + %"55" = inttoptr i64 %"17" to ptr + %"54" = load i32, ptr %"55", align 4 + store i32 %"54", ptr addrspace(5) %"9", align 4 + %"19" = load i64, ptr addrspace(5) %"4", align 8 + %"56" = inttoptr i64 %"19" to ptr + %"77" = getelementptr inbounds i8, ptr %"56", i64 4 + %"57" = load i32, ptr %"77", align 4 + store i32 %"57", ptr addrspace(5) %"10", align 4 + %"21" = load i64, ptr addrspace(5) %"4", align 8 + %"58" = inttoptr i64 %"21" to ptr + %"79" = getelementptr inbounds i8, ptr %"58", i64 8 + %"20" = load i64, ptr %"79", align 8 + store i64 %"20", ptr addrspace(5) %"12", align 8 + %"23" = load i64, ptr addrspace(5) %"4", align 8 + %"59" = inttoptr i64 %"23" to ptr + %"81" = getelementptr inbounds i8, ptr %"59", i64 16 + %"60" = load i32, ptr %"81", align 4 + store i32 %"60", ptr addrspace(5) %"11", align 4 + %"25" = load i32, ptr addrspace(5) %"9", align 4 + %"26" = load i32, ptr addrspace(5) %"10", align 4 + %"27" = load i32, ptr addrspace(5) %"11", align 4 + %0 = mul i32 %"25", %"26" + %"24" = add i32 %0, %"27" + store i32 %"24", ptr addrspace(5) %"6", align 4 + %"29" = load i32, ptr addrspace(5) %"9", align 4 + %"30" = load i32, ptr addrspace(5) %"10", align 4 + %"31" = load i32, ptr addrspace(5) %"11", align 4 + %1 = sext i32 %"29" to i64 + %2 = sext i32 %"30" to i64 %3 = mul nsw i64 %1, %2 %4 = lshr i64 %3, 32 %5 = trunc i64 %4 to i32 - %"29" = add i32 %5, %"32" - store i32 %"29", ptr addrspace(5) %"7", align 4 - %"34" = load i32, ptr addrspace(5) %"9", align 4 - %"35" = load i32, ptr addrspace(5) %"10", align 4 - %"36" = load i64, ptr addrspace(5) %"12", align 8 - %6 = sext i32 %"34" to i64 - %7 = sext i32 %"35" to i64 + %"28" = add i32 %5, %"31" + store i32 %"28", ptr addrspace(5) %"7", align 4 + %"33" = load i32, ptr addrspace(5) %"9", align 4 + %"34" = load i32, ptr addrspace(5) %"10", align 4 + %"35" = load i64, ptr addrspace(5) %"12", align 8 + %6 = sext i32 %"33" to i64 + %7 = sext i32 %"34" to i64 %8 = mul nsw i64 %6, %7 - %"68" = add i64 %8, %"36" - store i64 %"68", ptr addrspace(5) %"8", align 8 - %"37" = load i64, ptr addrspace(5) %"5", align 8 - %"38" = load i32, ptr addrspace(5) %"6", align 4 - %"72" = inttoptr i64 %"37" to ptr - store i32 %"38", ptr %"72", align 4 - %"39" = load i64, ptr addrspace(5) %"5", align 8 - %"40" = load i32, ptr addrspace(5) %"7", align 4 - %"73" = inttoptr i64 %"39" to ptr - %"84" = getelementptr inbounds i8, ptr %"73", i64 8 - store i32 %"40", ptr %"84", align 4 - %"41" = load i64, ptr addrspace(5) %"5", align 8 - %"42" = load i64, ptr addrspace(5) %"8", align 8 - %"74" = inttoptr i64 %"41" to ptr - %"86" = getelementptr inbounds i8, ptr %"74", i64 16 - store i64 %"42", ptr %"86", align 8 + %"67" = add i64 %8, %"35" + store i64 %"67", ptr addrspace(5) %"8", align 8 + %"36" = load i64, ptr addrspace(5) %"5", align 8 + %"37" = load i32, ptr addrspace(5) %"6", align 4 + %"71" = inttoptr i64 %"36" to ptr + store i32 %"37", ptr %"71", align 4 + %"38" = load i64, ptr addrspace(5) %"5", align 8 + %"39" = load i32, ptr addrspace(5) %"7", align 4 + %"72" = inttoptr i64 %"38" to ptr + %"83" = getelementptr inbounds i8, ptr %"72", i64 8 + store i32 %"39", ptr %"83", align 4 + %"40" = load i64, ptr addrspace(5) %"5", align 8 + %"41" = load i64, ptr addrspace(5) %"8", align 8 + %"73" = inttoptr i64 %"40" to ptr + %"85" = getelementptr inbounds i8, ptr %"73", i64 16 + store i64 %"41", ptr %"85", align 8 ret void } diff --git a/ptx/src/test/spirv_run/madc_cc.ll b/ptx/src/test/spirv_run/madc_cc.ll index 626149c..136f320 100644 --- a/ptx/src/test/spirv_run/madc_cc.ll +++ b/ptx/src/test/spirv_run/madc_cc.ll @@ -1,12 +1,10 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @madc_cc(ptr addrspace(4) byref(i64) %"41", ptr addrspace(4) byref(i64) %"42") #0 { -"55": +define protected amdgpu_kernel void @madc_cc(ptr addrspace(4) byref(i64) %"40", ptr addrspace(4) byref(i64) %"41") #0 { +"54": %"11" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"11", align 1 - %"12" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"12", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) @@ -14,54 +12,54 @@ define protected amdgpu_kernel void @madc_cc(ptr addrspace(4) byref(i64) %"41", %"8" = alloca i32, align 4, addrspace(5) %"9" = alloca i32, align 4, addrspace(5) %"10" = alloca i32, align 4, addrspace(5) + %"12" = load i64, ptr addrspace(4) %"40", align 8 + store i64 %"12", ptr addrspace(5) %"4", align 8 %"13" = load i64, ptr addrspace(4) %"41", align 8 - store i64 %"13", ptr addrspace(5) %"4", align 8 - %"14" = load i64, ptr addrspace(4) %"42", align 8 - store i64 %"14", ptr addrspace(5) %"5", align 8 - %"16" = load i64, ptr addrspace(5) %"4", align 8 - %"44" = inttoptr i64 %"16" to ptr - %"43" = load i32, ptr %"44", align 4 - store i32 %"43", ptr addrspace(5) %"8", align 4 - %"18" = load i64, ptr addrspace(5) %"4", align 8 - %"45" = inttoptr i64 %"18" to ptr - %"57" = getelementptr inbounds i8, ptr %"45", i64 4 - %"46" = load i32, ptr %"57", align 4 - store i32 %"46", ptr addrspace(5) %"9", align 4 - %"20" = load i64, ptr addrspace(5) %"4", align 8 - %"47" = inttoptr i64 %"20" to ptr - %"59" = getelementptr inbounds i8, ptr %"47", i64 8 - %"19" = load i32, ptr %"59", align 4 - store i32 %"19", ptr addrspace(5) %"10", align 4 - %"23" = load i32, ptr addrspace(5) %"8", align 4 - %"24" = load i32, ptr addrspace(5) %"9", align 4 - %"25" = load i32, ptr addrspace(5) %"10", align 4 - %0 = mul i32 %"23", %"24" - %1 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %0, i32 %"25") - %"21" = extractvalue { i32, i1 } %1, 0 - %"22" = extractvalue { i32, i1 } %1, 1 - store i32 %"21", ptr addrspace(5) %"6", align 4 - store i1 %"22", ptr addrspace(5) %"11", align 1 - %"27" = load i1, ptr addrspace(5) %"11", align 1 - %"28" = load i32, ptr addrspace(5) %"8", align 4 - %"29" = load i32, ptr addrspace(5) %"9", align 4 - %2 = sext i32 %"28" to i64 - %3 = sext i32 %"29" to i64 + store i64 %"13", ptr addrspace(5) %"5", align 8 + %"15" = load i64, ptr addrspace(5) %"4", align 8 + %"43" = inttoptr i64 %"15" to ptr + %"42" = load i32, ptr %"43", align 4 + store i32 %"42", ptr addrspace(5) %"8", align 4 + %"17" = load i64, ptr addrspace(5) %"4", align 8 + %"44" = inttoptr i64 %"17" to ptr + %"56" = getelementptr inbounds i8, ptr %"44", i64 4 + %"45" = load i32, ptr %"56", align 4 + store i32 %"45", ptr addrspace(5) %"9", align 4 + %"19" = load i64, ptr addrspace(5) %"4", align 8 + %"46" = inttoptr i64 %"19" to ptr + %"58" = getelementptr inbounds i8, ptr %"46", i64 8 + %"18" = load i32, ptr %"58", align 4 + store i32 %"18", ptr addrspace(5) %"10", align 4 + %"22" = load i32, ptr addrspace(5) %"8", align 4 + %"23" = load i32, ptr addrspace(5) %"9", align 4 + %"24" = load i32, ptr addrspace(5) %"10", align 4 + %0 = mul i32 %"22", %"23" + %1 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %0, i32 %"24") + %"20" = extractvalue { i32, i1 } %1, 0 + %"21" = extractvalue { i32, i1 } %1, 1 + store i32 %"20", ptr addrspace(5) %"6", align 4 + store i1 %"21", ptr addrspace(5) %"11", align 1 + %"26" = load i1, ptr addrspace(5) %"11", align 1 + %"27" = load i32, ptr addrspace(5) %"8", align 4 + %"28" = load i32, ptr addrspace(5) %"9", align 4 + %2 = sext i32 %"27" to i64 + %3 = sext i32 %"28" to i64 %4 = mul nsw i64 %2, %3 %5 = lshr i64 %4, 32 %6 = trunc i64 %5 to i32 - %7 = zext i1 %"27" to i32 + %7 = zext i1 %"26" to i32 %8 = add i32 %6, 3 - %"26" = add i32 %8, %7 - store i32 %"26", ptr addrspace(5) %"7", align 4 - %"30" = load i64, ptr addrspace(5) %"5", align 8 - %"31" = load i32, ptr addrspace(5) %"6", align 4 - %"53" = inttoptr i64 %"30" to ptr - store i32 %"31", ptr %"53", align 4 - %"32" = load i64, ptr addrspace(5) %"5", align 8 - %"33" = load i32, ptr addrspace(5) %"7", align 4 - %"54" = inttoptr i64 %"32" to ptr - %"61" = getelementptr inbounds i8, ptr %"54", i64 4 - store i32 %"33", ptr %"61", align 4 + %"25" = add i32 %8, %7 + store i32 %"25", ptr addrspace(5) %"7", align 4 + %"29" = load i64, ptr addrspace(5) %"5", align 8 + %"30" = load i32, ptr addrspace(5) %"6", align 4 + %"52" = inttoptr i64 %"29" to ptr + store i32 %"30", ptr %"52", align 4 + %"31" = load i64, ptr addrspace(5) %"5", align 8 + %"32" = load i32, ptr addrspace(5) %"7", align 4 + %"53" = inttoptr i64 %"31" to ptr + %"60" = getelementptr inbounds i8, ptr %"53", i64 4 + store i32 %"32", ptr %"60", align 4 ret void } diff --git a/ptx/src/test/spirv_run/madc_cc2.ll b/ptx/src/test/spirv_run/madc_cc2.ll deleted file mode 100644 index bea7193..0000000 --- a/ptx/src/test/spirv_run/madc_cc2.ll +++ /dev/null @@ -1,73 +0,0 @@ -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" -target triple = "amdgcn-amd-amdhsa" - -define protected amdgpu_kernel void @madc_cc2(ptr addrspace(4) byref(i64) %"52", ptr addrspace(4) byref(i64) %"53") #0 { -"66": - %"11" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"11", align 1 - %"12" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"12", align 1 - %"4" = alloca i64, align 8, addrspace(5) - %"5" = alloca i64, align 8, addrspace(5) - %"6" = alloca i32, align 4, addrspace(5) - %"7" = alloca i32, align 4, addrspace(5) - %"8" = alloca i32, align 4, addrspace(5) - %"9" = alloca i32, align 4, addrspace(5) - %"10" = alloca i32, align 4, addrspace(5) - %"13" = load i64, ptr addrspace(4) %"53", align 8 - store i64 %"13", ptr addrspace(5) %"5", align 8 - %0 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 0, i32 -1) - %"14" = extractvalue { i32, i1 } %0, 0 - %"15" = extractvalue { i32, i1 } %0, 1 - store i32 %"14", ptr addrspace(5) %"6", align 4 - store i1 %"15", ptr addrspace(5) %"11", align 1 - %"18" = load i1, ptr addrspace(5) %"11", align 1 - %1 = zext i1 %"18" to i32 - %2 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 1, i32 -1) - %3 = extractvalue { i32, i1 } %2, 0 - %4 = extractvalue { i32, i1 } %2, 1 - %5 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %3, i32 %1) - %"54" = extractvalue { i32, i1 } %5, 0 - %6 = extractvalue { i32, i1 } %5, 1 - %"17" = xor i1 %4, %6 - store i32 %"54", ptr addrspace(5) %"7", align 4 - store i1 %"17", ptr addrspace(5) %"11", align 1 - %"20" = load i1, ptr addrspace(5) %"11", align 1 - %7 = zext i1 %"20" to i32 - %"55" = add i32 0, %7 - store i32 %"55", ptr addrspace(5) %"8", align 4 - %"22" = load i1, ptr addrspace(5) %"11", align 1 - %8 = zext i1 %"22" to i32 - %"56" = add i32 0, %8 - store i32 %"56", ptr addrspace(5) %"9", align 4 - %"24" = load i1, ptr addrspace(5) %"12", align 1 - %9 = zext i1 %"24" to i32 - %"57" = sub i32 2, %9 - store i32 %"57", ptr addrspace(5) %"10", align 4 - %"25" = load i64, ptr addrspace(5) %"5", align 8 - %"26" = load i32, ptr addrspace(5) %"7", align 4 - %"58" = inttoptr i64 %"25" to ptr - store i32 %"26", ptr %"58", align 4 - %"27" = load i64, ptr addrspace(5) %"5", align 8 - %"28" = load i32, ptr addrspace(5) %"8", align 4 - %"60" = inttoptr i64 %"27" to ptr - %"68" = getelementptr inbounds i8, ptr %"60", i64 4 - store i32 %"28", ptr %"68", align 4 - %"29" = load i64, ptr addrspace(5) %"5", align 8 - %"30" = load i32, ptr addrspace(5) %"9", align 4 - %"62" = inttoptr i64 %"29" to ptr - %"70" = getelementptr inbounds i8, ptr %"62", i64 8 - store i32 %"30", ptr %"70", align 4 - %"31" = load i64, ptr addrspace(5) %"5", align 8 - %"32" = load i32, ptr addrspace(5) %"10", align 4 - %"64" = inttoptr i64 %"31" to ptr - %"72" = getelementptr inbounds i8, ptr %"64", i64 12 - store i32 %"32", ptr %"72", align 4 - ret void -} - -; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn -declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) #1 - -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="ieee,ieee" "denormal-fp-math-f32"="ieee,ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { nocallback nofree nosync nounwind readnone speculatable willreturn } diff --git a/ptx/src/test/spirv_run/madc_cc2.ptx b/ptx/src/test/spirv_run/madc_cc2.ptx deleted file mode 100644 index 163c39b..0000000 --- a/ptx/src/test/spirv_run/madc_cc2.ptx +++ /dev/null @@ -1,38 +0,0 @@ -.version 6.5 -.target sm_30 -.address_size 64 - -.visible .entry madc_cc2( - .param .u64 input, - .param .u64 output -) -{ - .reg .u64 in_addr; - .reg .u64 out_addr; - .reg .u32 unused; - - .reg .b32 result_1; - .reg .b32 carry_out_1_1; - .reg .b32 carry_out_1_2; - .reg .b32 carry_out_1_3; - - ld.param.u64 out_addr, [output]; - - // set carry=1 - mad.lo.cc.u32 unused, 0, 0, 4294967295; - // overflow addition - madc.lo.cc.u32 result_1, 1, 1, 4294967295; - // write carry - madc.lo.u32 carry_out_1_1, 0, 0, 0; - // overflow is also detected by addc - addc.u32 carry_out_1_2, 0, 0; - // but not subc - subc.u32 carry_out_1_3, 2, 0; - - st.s32 [out_addr], result_1; - st.s32 [out_addr+4], carry_out_1_1; - st.s32 [out_addr+8], carry_out_1_2; - st.s32 [out_addr+12], carry_out_1_3; - - ret; -} diff --git a/ptx/src/test/spirv_run/max.ll b/ptx/src/test/spirv_run/max.ll index 79b6f48..6dcc74d 100644 --- a/ptx/src/test/spirv_run/max.ll +++ b/ptx/src/test/spirv_run/max.ll @@ -1,37 +1,35 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @max(ptr addrspace(4) byref(i64) %"23", ptr addrspace(4) byref(i64) %"24") #0 { -"28": +define protected amdgpu_kernel void @max(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #0 { +"27": %"8" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"8", align 1 - %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) + %"9" = load i64, ptr addrspace(4) %"22", align 8 + store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"23", align 8 - store i64 %"10", ptr addrspace(5) %"4", align 8 - %"11" = load i64, ptr addrspace(4) %"24", align 8 - store i64 %"11", ptr addrspace(5) %"5", align 8 - %"13" = load i64, ptr addrspace(5) %"4", align 8 - %"25" = inttoptr i64 %"13" to ptr - %"12" = load i32, ptr %"25", align 4 - store i32 %"12", ptr addrspace(5) %"6", align 4 - %"15" = load i64, ptr addrspace(5) %"4", align 8 - %"26" = inttoptr i64 %"15" to ptr - %"30" = getelementptr inbounds i8, ptr %"26", i64 4 - %"14" = load i32, ptr %"30", align 4 - store i32 %"14", ptr addrspace(5) %"7", align 4 - %"17" = load i32, ptr addrspace(5) %"6", align 4 - %"18" = load i32, ptr addrspace(5) %"7", align 4 - %"16" = call i32 @llvm.smax.i32(i32 %"17", i32 %"18") - store i32 %"16", ptr addrspace(5) %"6", align 4 - %"19" = load i64, ptr addrspace(5) %"5", align 8 - %"20" = load i32, ptr addrspace(5) %"6", align 4 - %"27" = inttoptr i64 %"19" to ptr - store i32 %"20", ptr %"27", align 4 + store i64 %"10", ptr addrspace(5) %"5", align 8 + %"12" = load i64, ptr addrspace(5) %"4", align 8 + %"24" = inttoptr i64 %"12" to ptr + %"11" = load i32, ptr %"24", align 4 + store i32 %"11", ptr addrspace(5) %"6", align 4 + %"14" = load i64, ptr addrspace(5) %"4", align 8 + %"25" = inttoptr i64 %"14" to ptr + %"29" = getelementptr inbounds i8, ptr %"25", i64 4 + %"13" = load i32, ptr %"29", align 4 + store i32 %"13", ptr addrspace(5) %"7", align 4 + %"16" = load i32, ptr addrspace(5) %"6", align 4 + %"17" = load i32, ptr addrspace(5) %"7", align 4 + %"15" = call i32 @llvm.smax.i32(i32 %"16", i32 %"17") + store i32 %"15", ptr addrspace(5) %"6", align 4 + %"18" = load i64, ptr addrspace(5) %"5", align 8 + %"19" = load i32, ptr addrspace(5) %"6", align 4 + %"26" = inttoptr i64 %"18" to ptr + store i32 %"19", ptr %"26", align 4 ret void } diff --git a/ptx/src/test/spirv_run/membar.ll b/ptx/src/test/spirv_run/membar.ll index c9ec8b9..78f60c8 100644 --- a/ptx/src/test/spirv_run/membar.ll +++ b/ptx/src/test/spirv_run/membar.ll @@ -1,28 +1,26 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @membar(ptr addrspace(4) byref(i64) %"15", ptr addrspace(4) byref(i64) %"16") #0 { -"20": +define protected amdgpu_kernel void @membar(ptr addrspace(4) byref(i64) %"14", ptr addrspace(4) byref(i64) %"15") #0 { +"19": %"7" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"7", align 1 - %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) + %"8" = load i64, ptr addrspace(4) %"14", align 8 + store i64 %"8", ptr addrspace(5) %"4", align 8 %"9" = load i64, ptr addrspace(4) %"15", align 8 - store i64 %"9", ptr addrspace(5) %"4", align 8 - %"10" = load i64, ptr addrspace(4) %"16", align 8 - store i64 %"10", ptr addrspace(5) %"5", align 8 - %"12" = load i64, ptr addrspace(5) %"4", align 8 - %"18" = inttoptr i64 %"12" to ptr - %"17" = load i32, ptr %"18", align 4 - store i32 %"17", ptr addrspace(5) %"6", align 4 + store i64 %"9", ptr addrspace(5) %"5", align 8 + %"11" = load i64, ptr addrspace(5) %"4", align 8 + %"17" = inttoptr i64 %"11" to ptr + %"16" = load i32, ptr %"17", align 4 + store i32 %"16", ptr addrspace(5) %"6", align 4 fence seq_cst - %"13" = load i64, ptr addrspace(5) %"5", align 8 - %"14" = load i32, ptr addrspace(5) %"6", align 4 - %"19" = inttoptr i64 %"13" to ptr - store i32 %"14", ptr %"19", align 4 + %"12" = load i64, ptr addrspace(5) %"5", align 8 + %"13" = load i32, ptr addrspace(5) %"6", align 4 + %"18" = inttoptr i64 %"12" to ptr + store i32 %"13", ptr %"18", align 4 ret void } diff --git a/ptx/src/test/spirv_run/min.ll b/ptx/src/test/spirv_run/min.ll index 0828070..58cb36a 100644 --- a/ptx/src/test/spirv_run/min.ll +++ b/ptx/src/test/spirv_run/min.ll @@ -1,37 +1,35 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @min(ptr addrspace(4) byref(i64) %"23", ptr addrspace(4) byref(i64) %"24") #0 { -"28": +define protected amdgpu_kernel void @min(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #0 { +"27": %"8" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"8", align 1 - %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) + %"9" = load i64, ptr addrspace(4) %"22", align 8 + store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"23", align 8 - store i64 %"10", ptr addrspace(5) %"4", align 8 - %"11" = load i64, ptr addrspace(4) %"24", align 8 - store i64 %"11", ptr addrspace(5) %"5", align 8 - %"13" = load i64, ptr addrspace(5) %"4", align 8 - %"25" = inttoptr i64 %"13" to ptr - %"12" = load i32, ptr %"25", align 4 - store i32 %"12", ptr addrspace(5) %"6", align 4 - %"15" = load i64, ptr addrspace(5) %"4", align 8 - %"26" = inttoptr i64 %"15" to ptr - %"30" = getelementptr inbounds i8, ptr %"26", i64 4 - %"14" = load i32, ptr %"30", align 4 - store i32 %"14", ptr addrspace(5) %"7", align 4 - %"17" = load i32, ptr addrspace(5) %"6", align 4 - %"18" = load i32, ptr addrspace(5) %"7", align 4 - %"16" = call i32 @llvm.smin.i32(i32 %"17", i32 %"18") - store i32 %"16", ptr addrspace(5) %"6", align 4 - %"19" = load i64, ptr addrspace(5) %"5", align 8 - %"20" = load i32, ptr addrspace(5) %"6", align 4 - %"27" = inttoptr i64 %"19" to ptr - store i32 %"20", ptr %"27", align 4 + store i64 %"10", ptr addrspace(5) %"5", align 8 + %"12" = load i64, ptr addrspace(5) %"4", align 8 + %"24" = inttoptr i64 %"12" to ptr + %"11" = load i32, ptr %"24", align 4 + store i32 %"11", ptr addrspace(5) %"6", align 4 + %"14" = load i64, ptr addrspace(5) %"4", align 8 + %"25" = inttoptr i64 %"14" to ptr + %"29" = getelementptr inbounds i8, ptr %"25", i64 4 + %"13" = load i32, ptr %"29", align 4 + store i32 %"13", ptr addrspace(5) %"7", align 4 + %"16" = load i32, ptr addrspace(5) %"6", align 4 + %"17" = load i32, ptr addrspace(5) %"7", align 4 + %"15" = call i32 @llvm.smin.i32(i32 %"16", i32 %"17") + store i32 %"15", ptr addrspace(5) %"6", align 4 + %"18" = load i64, ptr addrspace(5) %"5", align 8 + %"19" = load i32, ptr addrspace(5) %"6", align 4 + %"26" = inttoptr i64 %"18" to ptr + store i32 %"19", ptr %"26", align 4 ret void } diff --git a/ptx/src/test/spirv_run/mod.rs b/ptx/src/test/spirv_run/mod.rs index 8f229c9..1ec030b 100644 --- a/ptx/src/test/spirv_run/mod.rs +++ b/ptx/src/test/spirv_run/mod.rs @@ -271,7 +271,11 @@ test_ptx!(const, [0u16], [10u16, 20, 30, 40]); test_ptx!(cvt_s16_s8, [0x139231C2u32], [0xFFFFFFC2u32]);
test_ptx!(cvt_f64_f32, [0.125f32], [0.125f64]);
test_ptx!(cvt_f32_f16, [0xa1u16], [0x37210000u32]);
-test_ptx!(prmt, [0x70c507d6u32, 0x6fbd4b5cu32], [0x6fbdd65cu32, 0x6FFFD600]);
+test_ptx!(
+ prmt,
+ [0x70c507d6u32, 0x6fbd4b5cu32],
+ [0x6fbdd65cu32, 0x6FFFD600]
+);
test_ptx!(
prmt_non_immediate,
[0x70c507d6u32, 0x6fbd4b5cu32],
@@ -289,8 +293,11 @@ test_ptx!( [65521u32, 2147549199, 0x1000],
[2147487519u32, 4294934539]
);
-test_ptx!(madc_cc2, [0xDEADu32], [0u32, 1, 1, 2]);
-test_ptx!(mad_hi_cc, [0x26223377u32, 0x70777766u32, 0x60666633u32], [0x71272866u32, 0u32, 1u32]); // Multi-tap :)
+test_ptx!(
+ mad_hi_cc,
+ [0x26223377u32, 0x70777766u32, 0x60666633u32],
+ [0x71272866u32, 0u32, 1u32]
+); // Multi-tap :)
test_ptx!(mov_vector_cast, [0x200000001u64], [2u32, 1u32]);
test_ptx!(
cvt_clamp,
@@ -323,11 +330,13 @@ test_ptx!( ],
[4294967295u32, 0, 2]
);
-test_ptx!(carry_mixed, [0xDEADu32], [1u32, 1u32]);
test_ptx!(
- subc_cc2,
+ carry_set_all,
[0xDEADu32],
- [0u32, 1, 0, 4294967295, 1, 4294967295, 1]
+ [
+ 1u32, 0, 0, 1, 0, 1, 0, 0, 0u32, 4294967295, 4294967295, 0, 4294967295, 0, 4294967295,
+ 4294967295
+ ]
);
test_ptx!(vshr, [0x6f3650f4u32, 22, 0xc62d4586], [0xC62D4742u32]);
test_ptx!(bfind, [0u32, 1u32, 0x64eb0414], [u32::MAX, 0, 30]);
@@ -337,7 +346,11 @@ test_ptx!( [f16::from_f32(2.0), f16::from_f32(3.0)],
[f16::from_f32(2.0), f16::from_f32(5.0)]
);
-test_ptx!(set_f16x2, [0xc1690e6eu32, 0x13739444u32, 0x424834CC, 0x4248B4CC], [0xffffu32, 0x3C000000]);
+test_ptx!(
+ set_f16x2,
+ [0xc1690e6eu32, 0x13739444u32, 0x424834CC, 0x4248B4CC],
+ [0xffffu32, 0x3C000000]
+);
test_ptx!(
dp4a,
[0xde3032f5u32, 0x2474fe15, 0xf51d8d6c],
diff --git a/ptx/src/test/spirv_run/mov.ll b/ptx/src/test/spirv_run/mov.ll index e876ced..e24446a 100644 --- a/ptx/src/test/spirv_run/mov.ll +++ b/ptx/src/test/spirv_run/mov.ll @@ -1,33 +1,31 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @mov(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #0 { -"22": +define protected amdgpu_kernel void @mov(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 { +"21": %"8" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"8", align 1 - %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) + %"9" = load i64, ptr addrspace(4) %"17", align 8 + store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"18", align 8 - store i64 %"10", ptr addrspace(5) %"4", align 8 - %"11" = load i64, ptr addrspace(4) %"19", align 8 - store i64 %"11", ptr addrspace(5) %"5", align 8 - %"13" = load i64, ptr addrspace(5) %"4", align 8 - %"20" = inttoptr i64 %"13" to ptr - %"12" = load i64, ptr %"20", align 8 - store i64 %"12", ptr addrspace(5) %"6", align 8 - %"15" = load i64, ptr addrspace(5) %"6", align 8 + store i64 %"10", ptr addrspace(5) %"5", align 8 + %"12" = load i64, ptr addrspace(5) %"4", align 8 + %"19" = inttoptr i64 %"12" to ptr + %"11" = load i64, ptr %"19", align 8 + store i64 %"11", ptr addrspace(5) %"6", align 8 + %"14" = load i64, ptr addrspace(5) %"6", align 8 %0 = alloca i64, align 8, addrspace(5) - store i64 %"15", ptr addrspace(5) %0, align 8 - %"14" = load i64, ptr addrspace(5) %0, align 8 - store i64 %"14", ptr addrspace(5) %"7", align 8 - %"16" = load i64, ptr addrspace(5) %"5", align 8 - %"17" = load i64, ptr addrspace(5) %"7", align 8 - %"21" = inttoptr i64 %"16" to ptr - store i64 %"17", ptr %"21", align 8 + store i64 %"14", ptr addrspace(5) %0, align 8 + %"13" = load i64, ptr addrspace(5) %0, align 8 + store i64 %"13", ptr addrspace(5) %"7", align 8 + %"15" = load i64, ptr addrspace(5) %"5", align 8 + %"16" = load i64, ptr addrspace(5) %"7", align 8 + %"20" = inttoptr i64 %"15" to ptr + store i64 %"16", ptr %"20", align 8 ret void } diff --git a/ptx/src/test/spirv_run/mov_address.ll b/ptx/src/test/spirv_run/mov_address.ll index b9f3a8a..656410c 100644 --- a/ptx/src/test/spirv_run/mov_address.ll +++ b/ptx/src/test/spirv_run/mov_address.ll @@ -1,19 +1,17 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @mov_address(ptr addrspace(4) byref(i64) %"9", ptr addrspace(4) byref(i64) %"10") #0 { -"12": +define protected amdgpu_kernel void @mov_address(ptr addrspace(4) byref(i64) %"8", ptr addrspace(4) byref(i64) %"9") #0 { +"11": %"6" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"6", align 1 - %"7" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"7", align 1 %"4" = alloca [8 x i8], align 1, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) - %"11" = ptrtoint ptr addrspace(5) %"4" to i64 + %"10" = ptrtoint ptr addrspace(5) %"4" to i64 %0 = alloca i64, align 8, addrspace(5) - store i64 %"11", ptr addrspace(5) %0, align 8 - %"8" = load i64, ptr addrspace(5) %0, align 8 - store i64 %"8", ptr addrspace(5) %"5", align 8 + store i64 %"10", ptr addrspace(5) %0, align 8 + %"7" = load i64, ptr addrspace(5) %0, align 8 + store i64 %"7", ptr addrspace(5) %"5", align 8 ret void } diff --git a/ptx/src/test/spirv_run/mov_vector_cast.ll b/ptx/src/test/spirv_run/mov_vector_cast.ll index 1f52a3b..e65ad94 100644 --- a/ptx/src/test/spirv_run/mov_vector_cast.ll +++ b/ptx/src/test/spirv_run/mov_vector_cast.ll @@ -1,12 +1,10 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @mov_vector_cast(ptr addrspace(4) byref(i64) %"35", ptr addrspace(4) byref(i64) %"36") #0 { -"50": +define protected amdgpu_kernel void @mov_vector_cast(ptr addrspace(4) byref(i64) %"34", ptr addrspace(4) byref(i64) %"35") #0 { +"49": %"15" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"15", align 1 - %"16" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"16", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) @@ -16,51 +14,51 @@ define protected amdgpu_kernel void @mov_vector_cast(ptr addrspace(4) byref(i64) %"10" = alloca half, align 2, addrspace(5) %"11" = alloca half, align 2, addrspace(5) %"12" = alloca half, align 2, addrspace(5) + %"16" = load i64, ptr addrspace(4) %"34", align 8 + store i64 %"16", ptr addrspace(5) %"4", align 8 %"17" = load i64, ptr addrspace(4) %"35", align 8 - store i64 %"17", ptr addrspace(5) %"4", align 8 - %"18" = load i64, ptr addrspace(4) %"36", align 8 - store i64 %"18", ptr addrspace(5) %"5", align 8 - %"20" = load i64, ptr addrspace(5) %"4", align 8 - %"37" = inttoptr i64 %"20" to ptr - %"19" = load i64, ptr %"37", align 8 - store i64 %"19", ptr addrspace(5) %"6", align 8 - %"21" = load i64, ptr addrspace(5) %"6", align 8 + store i64 %"17", ptr addrspace(5) %"5", align 8 + %"19" = load i64, ptr addrspace(5) %"4", align 8 + %"36" = inttoptr i64 %"19" to ptr + %"18" = load i64, ptr %"36", align 8 + store i64 %"18", ptr addrspace(5) %"6", align 8 + %"20" = load i64, ptr addrspace(5) %"6", align 8 %0 = alloca i64, align 8, addrspace(5) - store i64 %"21", ptr addrspace(5) %0, align 8 + store i64 %"20", ptr addrspace(5) %0, align 8 %"13" = load i64, ptr addrspace(5) %0, align 8 - %"39" = bitcast i64 %"13" to <2 x i32> - %"40" = extractelement <2 x i32> %"39", i32 0 - %"41" = extractelement <2 x i32> %"39", i32 1 + %"38" = bitcast i64 %"13" to <2 x i32> + %"39" = extractelement <2 x i32> %"38", i32 0 + %"40" = extractelement <2 x i32> %"38", i32 1 + %"21" = bitcast i32 %"39" to float %"22" = bitcast i32 %"40" to float - %"23" = bitcast i32 %"41" to float - store float %"22", ptr addrspace(5) %"7", align 4 - store float %"23", ptr addrspace(5) %"8", align 4 - %"24" = load i64, ptr addrspace(5) %"6", align 8 + store float %"21", ptr addrspace(5) %"7", align 4 + store float %"22", ptr addrspace(5) %"8", align 4 + %"23" = load i64, ptr addrspace(5) %"6", align 8 %1 = alloca i64, align 8, addrspace(5) - store i64 %"24", ptr addrspace(5) %1, align 8 + store i64 %"23", ptr addrspace(5) %1, align 8 %"14" = load i64, ptr addrspace(5) %1, align 8 - %"43" = bitcast i64 %"14" to <4 x i16> - %"44" = extractelement <4 x i16> %"43", i32 0 - %"45" = extractelement <4 x i16> %"43", i32 1 - %"46" = extractelement <4 x i16> %"43", i32 2 - %"47" = extractelement <4 x i16> %"43", i32 3 + %"42" = bitcast i64 %"14" to <4 x i16> + %"43" = extractelement <4 x i16> %"42", i32 0 + %"44" = extractelement <4 x i16> %"42", i32 1 + %"45" = extractelement <4 x i16> %"42", i32 2 + %"46" = extractelement <4 x i16> %"42", i32 3 + %"24" = bitcast i16 %"43" to half %"25" = bitcast i16 %"44" to half %"26" = bitcast i16 %"45" to half %"27" = bitcast i16 %"46" to half - %"28" = bitcast i16 %"47" to half - store half %"25", ptr addrspace(5) %"9", align 2 - store half %"26", ptr addrspace(5) %"10", align 2 - store half %"27", ptr addrspace(5) %"11", align 2 - store half %"28", ptr addrspace(5) %"12", align 2 - %"29" = load i64, ptr addrspace(5) %"5", align 8 - %"30" = load float, ptr addrspace(5) %"8", align 4 - %"48" = inttoptr i64 %"29" to ptr - store float %"30", ptr %"48", align 4 - %"31" = load i64, ptr addrspace(5) %"5", align 8 - %"32" = load float, ptr addrspace(5) %"7", align 4 - %"49" = inttoptr i64 %"31" to ptr - %"52" = getelementptr inbounds i8, ptr %"49", i64 4 - store float %"32", ptr %"52", align 4 + store half %"24", ptr addrspace(5) %"9", align 2 + store half %"25", ptr addrspace(5) %"10", align 2 + store half %"26", ptr addrspace(5) %"11", align 2 + store half %"27", ptr addrspace(5) %"12", align 2 + %"28" = load i64, ptr addrspace(5) %"5", align 8 + %"29" = load float, ptr addrspace(5) %"8", align 4 + %"47" = inttoptr i64 %"28" to ptr + store float %"29", ptr %"47", align 4 + %"30" = load i64, ptr addrspace(5) %"5", align 8 + %"31" = load float, ptr addrspace(5) %"7", align 4 + %"48" = inttoptr i64 %"30" to ptr + %"51" = getelementptr inbounds i8, ptr %"48", i64 4 + store float %"31", ptr %"51", align 4 ret void } diff --git a/ptx/src/test/spirv_run/mul_ftz.ll b/ptx/src/test/spirv_run/mul_ftz.ll index 04de6f2..3c32e73 100644 --- a/ptx/src/test/spirv_run/mul_ftz.ll +++ b/ptx/src/test/spirv_run/mul_ftz.ll @@ -1,37 +1,35 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @mul_ftz(ptr addrspace(4) byref(i64) %"23", ptr addrspace(4) byref(i64) %"24") #0 { -"28": +define protected amdgpu_kernel void @mul_ftz(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #0 { +"27": %"8" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"8", align 1 - %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca float, align 4, addrspace(5) %"7" = alloca float, align 4, addrspace(5) + %"9" = load i64, ptr addrspace(4) %"22", align 8 + store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"23", align 8 - store i64 %"10", ptr addrspace(5) %"4", align 8 - %"11" = load i64, ptr addrspace(4) %"24", align 8 - store i64 %"11", ptr addrspace(5) %"5", align 8 - %"13" = load i64, ptr addrspace(5) %"4", align 8 - %"25" = inttoptr i64 %"13" to ptr - %"12" = load float, ptr %"25", align 4 - store float %"12", ptr addrspace(5) %"6", align 4 - %"15" = load i64, ptr addrspace(5) %"4", align 8 - %"26" = inttoptr i64 %"15" to ptr - %"30" = getelementptr inbounds i8, ptr %"26", i64 4 - %"14" = load float, ptr %"30", align 4 - store float %"14", ptr addrspace(5) %"7", align 4 - %"17" = load float, ptr addrspace(5) %"6", align 4 - %"18" = load float, ptr addrspace(5) %"7", align 4 - %"16" = fmul float %"17", %"18" - store float %"16", ptr addrspace(5) %"6", align 4 - %"19" = load i64, ptr addrspace(5) %"5", align 8 - %"20" = load float, ptr addrspace(5) %"6", align 4 - %"27" = inttoptr i64 %"19" to ptr - store float %"20", ptr %"27", align 4 + store i64 %"10", ptr addrspace(5) %"5", align 8 + %"12" = load i64, ptr addrspace(5) %"4", align 8 + %"24" = inttoptr i64 %"12" to ptr + %"11" = load float, ptr %"24", align 4 + store float %"11", ptr addrspace(5) %"6", align 4 + %"14" = load i64, ptr addrspace(5) %"4", align 8 + %"25" = inttoptr i64 %"14" to ptr + %"29" = getelementptr inbounds i8, ptr %"25", i64 4 + %"13" = load float, ptr %"29", align 4 + store float %"13", ptr addrspace(5) %"7", align 4 + %"16" = load float, ptr addrspace(5) %"6", align 4 + %"17" = load float, ptr addrspace(5) %"7", align 4 + %"15" = fmul float %"16", %"17" + store float %"15", ptr addrspace(5) %"6", align 4 + %"18" = load i64, ptr addrspace(5) %"5", align 8 + %"19" = load float, ptr addrspace(5) %"6", align 4 + %"26" = inttoptr i64 %"18" to ptr + store float %"19", ptr %"26", align 4 ret void } diff --git a/ptx/src/test/spirv_run/mul_hi.ll b/ptx/src/test/spirv_run/mul_hi.ll index e57141b..7d8ffa9 100644 --- a/ptx/src/test/spirv_run/mul_hi.ll +++ b/ptx/src/test/spirv_run/mul_hi.ll @@ -3,31 +3,29 @@ target triple = "amdgcn-amd-amdhsa" declare i64 @__zluda_ptx_impl__mul_hi_u64(i64, i64) #0 -define protected amdgpu_kernel void @mul_hi(ptr addrspace(4) byref(i64) %"19", ptr addrspace(4) byref(i64) %"20") #1 { -"23": +define protected amdgpu_kernel void @mul_hi(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #1 { +"22": %"8" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"8", align 1 - %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) + %"9" = load i64, ptr addrspace(4) %"18", align 8 + store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"19", align 8 - store i64 %"10", ptr addrspace(5) %"4", align 8 - %"11" = load i64, ptr addrspace(4) %"20", align 8 - store i64 %"11", ptr addrspace(5) %"5", align 8 - %"13" = load i64, ptr addrspace(5) %"4", align 8 - %"21" = inttoptr i64 %"13" to ptr - %"12" = load i64, ptr %"21", align 8 - store i64 %"12", ptr addrspace(5) %"6", align 8 - %"15" = load i64, ptr addrspace(5) %"6", align 8 - %"14" = call i64 @__zluda_ptx_impl__mul_hi_u64(i64 %"15", i64 2) - store i64 %"14", ptr addrspace(5) %"7", align 8 - %"16" = load i64, ptr addrspace(5) %"5", align 8 - %"17" = load i64, ptr addrspace(5) %"7", align 8 - %"22" = inttoptr i64 %"16" to ptr - store i64 %"17", ptr %"22", align 8 + store i64 %"10", ptr addrspace(5) %"5", align 8 + %"12" = load i64, ptr addrspace(5) %"4", align 8 + %"20" = inttoptr i64 %"12" to ptr + %"11" = load i64, ptr %"20", align 8 + store i64 %"11", ptr addrspace(5) %"6", align 8 + %"14" = load i64, ptr addrspace(5) %"6", align 8 + %"13" = call i64 @__zluda_ptx_impl__mul_hi_u64(i64 %"14", i64 2) + store i64 %"13", ptr addrspace(5) %"7", align 8 + %"15" = load i64, ptr addrspace(5) %"5", align 8 + %"16" = load i64, ptr addrspace(5) %"7", align 8 + %"21" = inttoptr i64 %"15" to ptr + store i64 %"16", ptr %"21", align 8 ret void } diff --git a/ptx/src/test/spirv_run/mul_lo.ll b/ptx/src/test/spirv_run/mul_lo.ll index 1a915fa..57a767d 100644 --- a/ptx/src/test/spirv_run/mul_lo.ll +++ b/ptx/src/test/spirv_run/mul_lo.ll @@ -1,31 +1,29 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @mul_lo(ptr addrspace(4) byref(i64) %"19", ptr addrspace(4) byref(i64) %"20") #0 { -"23": +define protected amdgpu_kernel void @mul_lo(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #0 { +"22": %"8" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"8", align 1 - %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) + %"9" = load i64, ptr addrspace(4) %"18", align 8 + store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"19", align 8 - store i64 %"10", ptr addrspace(5) %"4", align 8 - %"11" = load i64, ptr addrspace(4) %"20", align 8 - store i64 %"11", ptr addrspace(5) %"5", align 8 - %"13" = load i64, ptr addrspace(5) %"4", align 8 - %"21" = inttoptr i64 %"13" to ptr - %"12" = load i64, ptr %"21", align 8 - store i64 %"12", ptr addrspace(5) %"6", align 8 - %"15" = load i64, ptr addrspace(5) %"6", align 8 - %"14" = mul i64 %"15", 2 - store i64 %"14", ptr addrspace(5) %"7", align 8 - %"16" = load i64, ptr addrspace(5) %"5", align 8 - %"17" = load i64, ptr addrspace(5) %"7", align 8 - %"22" = inttoptr i64 %"16" to ptr - store i64 %"17", ptr %"22", align 8 + store i64 %"10", ptr addrspace(5) %"5", align 8 + %"12" = load i64, ptr addrspace(5) %"4", align 8 + %"20" = inttoptr i64 %"12" to ptr + %"11" = load i64, ptr %"20", align 8 + store i64 %"11", ptr addrspace(5) %"6", align 8 + %"14" = load i64, ptr addrspace(5) %"6", align 8 + %"13" = mul i64 %"14", 2 + store i64 %"13", ptr addrspace(5) %"7", align 8 + %"15" = load i64, ptr addrspace(5) %"5", align 8 + %"16" = load i64, ptr addrspace(5) %"7", align 8 + %"21" = inttoptr i64 %"15" to ptr + store i64 %"16", ptr %"21", align 8 ret void } diff --git a/ptx/src/test/spirv_run/mul_non_ftz.ll b/ptx/src/test/spirv_run/mul_non_ftz.ll index d0d2bcd..e6a3cc4 100644 --- a/ptx/src/test/spirv_run/mul_non_ftz.ll +++ b/ptx/src/test/spirv_run/mul_non_ftz.ll @@ -1,37 +1,35 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @mul_non_ftz(ptr addrspace(4) byref(i64) %"23", ptr addrspace(4) byref(i64) %"24") #0 { -"28": +define protected amdgpu_kernel void @mul_non_ftz(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #0 { +"27": %"8" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"8", align 1 - %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca float, align 4, addrspace(5) %"7" = alloca float, align 4, addrspace(5) + %"9" = load i64, ptr addrspace(4) %"22", align 8 + store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"23", align 8 - store i64 %"10", ptr addrspace(5) %"4", align 8 - %"11" = load i64, ptr addrspace(4) %"24", align 8 - store i64 %"11", ptr addrspace(5) %"5", align 8 - %"13" = load i64, ptr addrspace(5) %"4", align 8 - %"25" = inttoptr i64 %"13" to ptr - %"12" = load float, ptr %"25", align 4 - store float %"12", ptr addrspace(5) %"6", align 4 - %"15" = load i64, ptr addrspace(5) %"4", align 8 - %"26" = inttoptr i64 %"15" to ptr - %"30" = getelementptr inbounds i8, ptr %"26", i64 4 - %"14" = load float, ptr %"30", align 4 - store float %"14", ptr addrspace(5) %"7", align 4 - %"17" = load float, ptr addrspace(5) %"6", align 4 - %"18" = load float, ptr addrspace(5) %"7", align 4 - %"16" = fmul float %"17", %"18" - store float %"16", ptr addrspace(5) %"6", align 4 - %"19" = load i64, ptr addrspace(5) %"5", align 8 - %"20" = load float, ptr addrspace(5) %"6", align 4 - %"27" = inttoptr i64 %"19" to ptr - store float %"20", ptr %"27", align 4 + store i64 %"10", ptr addrspace(5) %"5", align 8 + %"12" = load i64, ptr addrspace(5) %"4", align 8 + %"24" = inttoptr i64 %"12" to ptr + %"11" = load float, ptr %"24", align 4 + store float %"11", ptr addrspace(5) %"6", align 4 + %"14" = load i64, ptr addrspace(5) %"4", align 8 + %"25" = inttoptr i64 %"14" to ptr + %"29" = getelementptr inbounds i8, ptr %"25", i64 4 + %"13" = load float, ptr %"29", align 4 + store float %"13", ptr addrspace(5) %"7", align 4 + %"16" = load float, ptr addrspace(5) %"6", align 4 + %"17" = load float, ptr addrspace(5) %"7", align 4 + %"15" = fmul float %"16", %"17" + store float %"15", ptr addrspace(5) %"6", align 4 + %"18" = load i64, ptr addrspace(5) %"5", align 8 + %"19" = load float, ptr addrspace(5) %"6", align 4 + %"26" = inttoptr i64 %"18" to ptr + store float %"19", ptr %"26", align 4 ret void } diff --git a/ptx/src/test/spirv_run/mul_wide.ll b/ptx/src/test/spirv_run/mul_wide.ll index b1dec22..e25a61d 100644 --- a/ptx/src/test/spirv_run/mul_wide.ll +++ b/ptx/src/test/spirv_run/mul_wide.ll @@ -1,40 +1,38 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @mul_wide(ptr addrspace(4) byref(i64) %"24", ptr addrspace(4) byref(i64) %"25") #0 { -"30": +define protected amdgpu_kernel void @mul_wide(ptr addrspace(4) byref(i64) %"23", ptr addrspace(4) byref(i64) %"24") #0 { +"29": %"9" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"9", align 1 - %"10" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"10", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) %"8" = alloca i64, align 8, addrspace(5) + %"10" = load i64, ptr addrspace(4) %"23", align 8 + store i64 %"10", ptr addrspace(5) %"4", align 8 %"11" = load i64, ptr addrspace(4) %"24", align 8 - store i64 %"11", ptr addrspace(5) %"4", align 8 - %"12" = load i64, ptr addrspace(4) %"25", align 8 - store i64 %"12", ptr addrspace(5) %"5", align 8 - %"14" = load i64, ptr addrspace(5) %"4", align 8 - %"26" = inttoptr i64 %"14" to ptr addrspace(1) - %"13" = load i32, ptr addrspace(1) %"26", align 4 - store i32 %"13", ptr addrspace(5) %"6", align 4 - %"16" = load i64, ptr addrspace(5) %"4", align 8 - %"27" = inttoptr i64 %"16" to ptr addrspace(1) - %"32" = getelementptr inbounds i8, ptr addrspace(1) %"27", i64 4 - %"15" = load i32, ptr addrspace(1) %"32", align 4 - store i32 %"15", ptr addrspace(5) %"7", align 4 - %"18" = load i32, ptr addrspace(5) %"6", align 4 - %"19" = load i32, ptr addrspace(5) %"7", align 4 - %0 = sext i32 %"18" to i64 - %1 = sext i32 %"19" to i64 - %"17" = mul nsw i64 %0, %1 - store i64 %"17", ptr addrspace(5) %"8", align 8 - %"20" = load i64, ptr addrspace(5) %"5", align 8 - %"21" = load i64, ptr addrspace(5) %"8", align 8 - %"28" = inttoptr i64 %"20" to ptr - store i64 %"21", ptr %"28", align 8 + store i64 %"11", ptr addrspace(5) %"5", align 8 + %"13" = load i64, ptr addrspace(5) %"4", align 8 + %"25" = inttoptr i64 %"13" to ptr addrspace(1) + %"12" = load i32, ptr addrspace(1) %"25", align 4 + store i32 %"12", ptr addrspace(5) %"6", align 4 + %"15" = load i64, ptr addrspace(5) %"4", align 8 + %"26" = inttoptr i64 %"15" to ptr addrspace(1) + %"31" = getelementptr inbounds i8, ptr addrspace(1) %"26", i64 4 + %"14" = load i32, ptr addrspace(1) %"31", align 4 + store i32 %"14", ptr addrspace(5) %"7", align 4 + %"17" = load i32, ptr addrspace(5) %"6", align 4 + %"18" = load i32, ptr addrspace(5) %"7", align 4 + %0 = sext i32 %"17" to i64 + %1 = sext i32 %"18" to i64 + %"16" = mul nsw i64 %0, %1 + store i64 %"16", ptr addrspace(5) %"8", align 8 + %"19" = load i64, ptr addrspace(5) %"5", align 8 + %"20" = load i64, ptr addrspace(5) %"8", align 8 + %"27" = inttoptr i64 %"19" to ptr + store i64 %"20", ptr %"27", align 8 ret void } diff --git a/ptx/src/test/spirv_run/multireg.ll b/ptx/src/test/spirv_run/multireg.ll index 3826c19..657d61f 100644 --- a/ptx/src/test/spirv_run/multireg.ll +++ b/ptx/src/test/spirv_run/multireg.ll @@ -1,31 +1,29 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @multireg(ptr addrspace(4) byref(i64) %"19", ptr addrspace(4) byref(i64) %"20") #0 { -"23": +define protected amdgpu_kernel void @multireg(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #0 { +"22": %"8" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"8", align 1 - %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) + %"9" = load i64, ptr addrspace(4) %"18", align 8 + store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"19", align 8 - store i64 %"10", ptr addrspace(5) %"4", align 8 - %"11" = load i64, ptr addrspace(4) %"20", align 8 - store i64 %"11", ptr addrspace(5) %"5", align 8 - %"13" = load i64, ptr addrspace(5) %"4", align 8 - %"21" = inttoptr i64 %"13" to ptr - %"12" = load i64, ptr %"21", align 8 - store i64 %"12", ptr addrspace(5) %"6", align 8 - %"15" = load i64, ptr addrspace(5) %"6", align 8 - %"14" = add i64 %"15", 1 - store i64 %"14", ptr addrspace(5) %"7", align 8 - %"16" = load i64, ptr addrspace(5) %"5", align 8 - %"17" = load i64, ptr addrspace(5) %"7", align 8 - %"22" = inttoptr i64 %"16" to ptr - store i64 %"17", ptr %"22", align 8 + store i64 %"10", ptr addrspace(5) %"5", align 8 + %"12" = load i64, ptr addrspace(5) %"4", align 8 + %"20" = inttoptr i64 %"12" to ptr + %"11" = load i64, ptr %"20", align 8 + store i64 %"11", ptr addrspace(5) %"6", align 8 + %"14" = load i64, ptr addrspace(5) %"6", align 8 + %"13" = add i64 %"14", 1 + store i64 %"13", ptr addrspace(5) %"7", align 8 + %"15" = load i64, ptr addrspace(5) %"5", align 8 + %"16" = load i64, ptr addrspace(5) %"7", align 8 + %"21" = inttoptr i64 %"15" to ptr + store i64 %"16", ptr %"21", align 8 ret void } diff --git a/ptx/src/test/spirv_run/neg.ll b/ptx/src/test/spirv_run/neg.ll index c1087b4..1e94ed1 100644 --- a/ptx/src/test/spirv_run/neg.ll +++ b/ptx/src/test/spirv_run/neg.ll @@ -1,30 +1,28 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @neg(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 { -"21": +define protected amdgpu_kernel void @neg(ptr addrspace(4) byref(i64) %"16", ptr addrspace(4) byref(i64) %"17") #0 { +"20": %"7" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"7", align 1 - %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) + %"8" = load i64, ptr addrspace(4) %"16", align 8 + store i64 %"8", ptr addrspace(5) %"4", align 8 %"9" = load i64, ptr addrspace(4) %"17", align 8 - store i64 %"9", ptr addrspace(5) %"4", align 8 - %"10" = load i64, ptr addrspace(4) %"18", align 8 - store i64 %"10", ptr addrspace(5) %"5", align 8 - %"12" = load i64, ptr addrspace(5) %"4", align 8 - %"19" = inttoptr i64 %"12" to ptr - %"11" = load i32, ptr %"19", align 4 - store i32 %"11", ptr addrspace(5) %"6", align 4 - %"14" = load i32, ptr addrspace(5) %"6", align 4 - %"13" = sub i32 0, %"14" - store i32 %"13", ptr addrspace(5) %"6", align 4 - %"15" = load i64, ptr addrspace(5) %"5", align 8 - %"16" = load i32, ptr addrspace(5) %"6", align 4 - %"20" = inttoptr i64 %"15" to ptr - store i32 %"16", ptr %"20", align 4 + store i64 %"9", ptr addrspace(5) %"5", align 8 + %"11" = load i64, ptr addrspace(5) %"4", align 8 + %"18" = inttoptr i64 %"11" to ptr + %"10" = load i32, ptr %"18", align 4 + store i32 %"10", ptr addrspace(5) %"6", align 4 + %"13" = load i32, ptr addrspace(5) %"6", align 4 + %"12" = sub i32 0, %"13" + store i32 %"12", ptr addrspace(5) %"6", align 4 + %"14" = load i64, ptr addrspace(5) %"5", align 8 + %"15" = load i32, ptr addrspace(5) %"6", align 4 + %"19" = inttoptr i64 %"14" to ptr + store i32 %"15", ptr %"19", align 4 ret void } diff --git a/ptx/src/test/spirv_run/non_scalar_ptr_offset.ll b/ptx/src/test/spirv_run/non_scalar_ptr_offset.ll index 718a512..69ea8d2 100644 --- a/ptx/src/test/spirv_run/non_scalar_ptr_offset.ll +++ b/ptx/src/test/spirv_run/non_scalar_ptr_offset.ll @@ -1,36 +1,34 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @non_scalar_ptr_offset(ptr addrspace(4) byref(i64) %"23", ptr addrspace(4) byref(i64) %"24") #0 { -"27": +define protected amdgpu_kernel void @non_scalar_ptr_offset(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #0 { +"26": %"9" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"9", align 1 - %"10" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"10", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) + %"10" = load i64, ptr addrspace(4) %"22", align 8 + store i64 %"10", ptr addrspace(5) %"4", align 8 %"11" = load i64, ptr addrspace(4) %"23", align 8 - store i64 %"11", ptr addrspace(5) %"4", align 8 - %"12" = load i64, ptr addrspace(4) %"24", align 8 - store i64 %"12", ptr addrspace(5) %"5", align 8 - %"13" = load i64, ptr addrspace(5) %"4", align 8 - %"25" = inttoptr i64 %"13" to ptr addrspace(1) - %"29" = getelementptr inbounds i8, ptr addrspace(1) %"25", i64 8 - %"8" = load <2 x i32>, ptr addrspace(1) %"29", align 8 - %"14" = extractelement <2 x i32> %"8", i32 0 - %"15" = extractelement <2 x i32> %"8", i32 1 - store i32 %"14", ptr addrspace(5) %"6", align 4 - store i32 %"15", ptr addrspace(5) %"7", align 4 - %"17" = load i32, ptr addrspace(5) %"6", align 4 - %"18" = load i32, ptr addrspace(5) %"7", align 4 - %"16" = add i32 %"17", %"18" - store i32 %"16", ptr addrspace(5) %"6", align 4 - %"19" = load i64, ptr addrspace(5) %"5", align 8 - %"20" = load i32, ptr addrspace(5) %"6", align 4 - %"26" = inttoptr i64 %"19" to ptr addrspace(1) - store i32 %"20", ptr addrspace(1) %"26", align 4 + store i64 %"11", ptr addrspace(5) %"5", align 8 + %"12" = load i64, ptr addrspace(5) %"4", align 8 + %"24" = inttoptr i64 %"12" to ptr addrspace(1) + %"28" = getelementptr inbounds i8, ptr addrspace(1) %"24", i64 8 + %"8" = load <2 x i32>, ptr addrspace(1) %"28", align 8 + %"13" = extractelement <2 x i32> %"8", i32 0 + %"14" = extractelement <2 x i32> %"8", i32 1 + store i32 %"13", ptr addrspace(5) %"6", align 4 + store i32 %"14", ptr addrspace(5) %"7", align 4 + %"16" = load i32, ptr addrspace(5) %"6", align 4 + %"17" = load i32, ptr addrspace(5) %"7", align 4 + %"15" = add i32 %"16", %"17" + store i32 %"15", ptr addrspace(5) %"6", align 4 + %"18" = load i64, ptr addrspace(5) %"5", align 8 + %"19" = load i32, ptr addrspace(5) %"6", align 4 + %"25" = inttoptr i64 %"18" to ptr addrspace(1) + store i32 %"19", ptr addrspace(1) %"25", align 4 ret void } diff --git a/ptx/src/test/spirv_run/not.ll b/ptx/src/test/spirv_run/not.ll index 10dd56c..5e86545 100644 --- a/ptx/src/test/spirv_run/not.ll +++ b/ptx/src/test/spirv_run/not.ll @@ -1,31 +1,29 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @not(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #0 { -"24": +define protected amdgpu_kernel void @not(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 { +"23": %"8" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"8", align 1 - %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) + %"9" = load i64, ptr addrspace(4) %"17", align 8 + store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"18", align 8 - store i64 %"10", ptr addrspace(5) %"4", align 8 - %"11" = load i64, ptr addrspace(4) %"19", align 8 - store i64 %"11", ptr addrspace(5) %"5", align 8 - %"13" = load i64, ptr addrspace(5) %"4", align 8 - %"20" = inttoptr i64 %"13" to ptr - %"12" = load i64, ptr %"20", align 8 - store i64 %"12", ptr addrspace(5) %"6", align 8 - %"15" = load i64, ptr addrspace(5) %"6", align 8 - %"21" = xor i64 %"15", -1 - store i64 %"21", ptr addrspace(5) %"7", align 8 - %"16" = load i64, ptr addrspace(5) %"5", align 8 - %"17" = load i64, ptr addrspace(5) %"7", align 8 - %"23" = inttoptr i64 %"16" to ptr - store i64 %"17", ptr %"23", align 8 + store i64 %"10", ptr addrspace(5) %"5", align 8 + %"12" = load i64, ptr addrspace(5) %"4", align 8 + %"19" = inttoptr i64 %"12" to ptr + %"11" = load i64, ptr %"19", align 8 + store i64 %"11", ptr addrspace(5) %"6", align 8 + %"14" = load i64, ptr addrspace(5) %"6", align 8 + %"20" = xor i64 %"14", -1 + store i64 %"20", ptr addrspace(5) %"7", align 8 + %"15" = load i64, ptr addrspace(5) %"5", align 8 + %"16" = load i64, ptr addrspace(5) %"7", align 8 + %"22" = inttoptr i64 %"15" to ptr + store i64 %"16", ptr %"22", align 8 ret void } diff --git a/ptx/src/test/spirv_run/ntid.ll b/ptx/src/test/spirv_run/ntid.ll index 93c95bf..53216ce 100644 --- a/ptx/src/test/spirv_run/ntid.ll +++ b/ptx/src/test/spirv_run/ntid.ll @@ -3,37 +3,35 @@ target triple = "amdgcn-amd-amdhsa" declare i32 @__zluda_ptx_impl__sreg_ntid(i8) #0 -define protected amdgpu_kernel void @ntid(ptr addrspace(4) byref(i64) %"26", ptr addrspace(4) byref(i64) %"27") #1 { -"30": +define protected amdgpu_kernel void @ntid(ptr addrspace(4) byref(i64) %"25", ptr addrspace(4) byref(i64) %"26") #1 { +"29": %"9" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"9", align 1 - %"10" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"10", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) + %"15" = load i64, ptr addrspace(4) %"25", align 8 + store i64 %"15", ptr addrspace(5) %"4", align 8 %"16" = load i64, ptr addrspace(4) %"26", align 8 - store i64 %"16", ptr addrspace(5) %"4", align 8 - %"17" = load i64, ptr addrspace(4) %"27", align 8 - store i64 %"17", ptr addrspace(5) %"5", align 8 - %"19" = load i64, ptr addrspace(5) %"4", align 8 - %"28" = inttoptr i64 %"19" to ptr - %"18" = load i32, ptr %"28", align 4 - store i32 %"18", ptr addrspace(5) %"6", align 4 - %"12" = call i32 @__zluda_ptx_impl__sreg_ntid(i8 0) + store i64 %"16", ptr addrspace(5) %"5", align 8 + %"18" = load i64, ptr addrspace(5) %"4", align 8 + %"27" = inttoptr i64 %"18" to ptr + %"17" = load i32, ptr %"27", align 4 + store i32 %"17", ptr addrspace(5) %"6", align 4 + %"11" = call i32 @__zluda_ptx_impl__sreg_ntid(i8 0) %0 = alloca i32, align 4, addrspace(5) - store i32 %"12", ptr addrspace(5) %0, align 4 - %"20" = load i32, ptr addrspace(5) %0, align 4 - store i32 %"20", ptr addrspace(5) %"7", align 4 - %"22" = load i32, ptr addrspace(5) %"6", align 4 - %"23" = load i32, ptr addrspace(5) %"7", align 4 - %"21" = add i32 %"22", %"23" - store i32 %"21", ptr addrspace(5) %"6", align 4 - %"24" = load i64, ptr addrspace(5) %"5", align 8 - %"25" = load i32, ptr addrspace(5) %"6", align 4 - %"29" = inttoptr i64 %"24" to ptr - store i32 %"25", ptr %"29", align 4 + store i32 %"11", ptr addrspace(5) %0, align 4 + %"19" = load i32, ptr addrspace(5) %0, align 4 + store i32 %"19", ptr addrspace(5) %"7", align 4 + %"21" = load i32, ptr addrspace(5) %"6", align 4 + %"22" = load i32, ptr addrspace(5) %"7", align 4 + %"20" = add i32 %"21", %"22" + store i32 %"20", ptr addrspace(5) %"6", align 4 + %"23" = load i64, ptr addrspace(5) %"5", align 8 + %"24" = load i32, ptr addrspace(5) %"6", align 4 + %"28" = inttoptr i64 %"23" to ptr + store i32 %"24", ptr %"28", align 4 ret void } diff --git a/ptx/src/test/spirv_run/or.ll b/ptx/src/test/spirv_run/or.ll index 13e844b..7b4bd7f 100644 --- a/ptx/src/test/spirv_run/or.ll +++ b/ptx/src/test/spirv_run/or.ll @@ -1,37 +1,35 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @or(ptr addrspace(4) byref(i64) %"23", ptr addrspace(4) byref(i64) %"24") #0 { -"31": +define protected amdgpu_kernel void @or(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #0 { +"30": %"8" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"8", align 1 - %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) + %"9" = load i64, ptr addrspace(4) %"22", align 8 + store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"23", align 8 - store i64 %"10", ptr addrspace(5) %"4", align 8 - %"11" = load i64, ptr addrspace(4) %"24", align 8 - store i64 %"11", ptr addrspace(5) %"5", align 8 - %"13" = load i64, ptr addrspace(5) %"4", align 8 - %"25" = inttoptr i64 %"13" to ptr - %"12" = load i64, ptr %"25", align 8 - store i64 %"12", ptr addrspace(5) %"6", align 8 - %"15" = load i64, ptr addrspace(5) %"4", align 8 - %"26" = inttoptr i64 %"15" to ptr - %"33" = getelementptr inbounds i8, ptr %"26", i64 8 - %"14" = load i64, ptr %"33", align 8 - store i64 %"14", ptr addrspace(5) %"7", align 8 - %"17" = load i64, ptr addrspace(5) %"6", align 8 - %"18" = load i64, ptr addrspace(5) %"7", align 8 - %"27" = or i64 %"17", %"18" - store i64 %"27", ptr addrspace(5) %"6", align 8 - %"19" = load i64, ptr addrspace(5) %"5", align 8 - %"20" = load i64, ptr addrspace(5) %"6", align 8 - %"30" = inttoptr i64 %"19" to ptr - store i64 %"20", ptr %"30", align 8 + store i64 %"10", ptr addrspace(5) %"5", align 8 + %"12" = load i64, ptr addrspace(5) %"4", align 8 + %"24" = inttoptr i64 %"12" to ptr + %"11" = load i64, ptr %"24", align 8 + store i64 %"11", ptr addrspace(5) %"6", align 8 + %"14" = load i64, ptr addrspace(5) %"4", align 8 + %"25" = inttoptr i64 %"14" to ptr + %"32" = getelementptr inbounds i8, ptr %"25", i64 8 + %"13" = load i64, ptr %"32", align 8 + store i64 %"13", ptr addrspace(5) %"7", align 8 + %"16" = load i64, ptr addrspace(5) %"6", align 8 + %"17" = load i64, ptr addrspace(5) %"7", align 8 + %"26" = or i64 %"16", %"17" + store i64 %"26", ptr addrspace(5) %"6", align 8 + %"18" = load i64, ptr addrspace(5) %"5", align 8 + %"19" = load i64, ptr addrspace(5) %"6", align 8 + %"29" = inttoptr i64 %"18" to ptr + store i64 %"19", ptr %"29", align 8 ret void } diff --git a/ptx/src/test/spirv_run/param_ptr.ll b/ptx/src/test/spirv_run/param_ptr.ll index 3634669..cea098c 100644 --- a/ptx/src/test/spirv_run/param_ptr.ll +++ b/ptx/src/test/spirv_run/param_ptr.ll @@ -1,39 +1,37 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @param_ptr(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #0 { -"29": +define protected amdgpu_kernel void @param_ptr(ptr addrspace(4) byref(i64) %"21", ptr addrspace(4) byref(i64) %"22") #0 { +"28": %"9" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"9", align 1 - %"10" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"10", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) %"8" = alloca i64, align 8, addrspace(5) - %"25" = ptrtoint ptr addrspace(4) %"22" to i64 + %"24" = ptrtoint ptr addrspace(4) %"21" to i64 %0 = alloca i64, align 8, addrspace(5) - store i64 %"25", ptr addrspace(5) %0, align 8 - %"24" = load i64, ptr addrspace(5) %0, align 8 - store i64 %"24", ptr addrspace(5) %"4", align 8 - %"13" = load i64, ptr addrspace(5) %"4", align 8 - %"26" = inttoptr i64 %"13" to ptr addrspace(4) - %"12" = load i64, ptr addrspace(4) %"26", align 8 - store i64 %"12", ptr addrspace(5) %"5", align 8 - %"14" = load i64, ptr addrspace(4) %"23", align 8 - store i64 %"14", ptr addrspace(5) %"6", align 8 - %"16" = load i64, ptr addrspace(5) %"5", align 8 - %"27" = inttoptr i64 %"16" to ptr - %"15" = load i64, ptr %"27", align 8 - store i64 %"15", ptr addrspace(5) %"7", align 8 - %"18" = load i64, ptr addrspace(5) %"7", align 8 - %"17" = add i64 %"18", 1 - store i64 %"17", ptr addrspace(5) %"8", align 8 - %"19" = load i64, ptr addrspace(5) %"6", align 8 - %"20" = load i64, ptr addrspace(5) %"8", align 8 - %"28" = inttoptr i64 %"19" to ptr - store i64 %"20", ptr %"28", align 8 + store i64 %"24", ptr addrspace(5) %0, align 8 + %"23" = load i64, ptr addrspace(5) %0, align 8 + store i64 %"23", ptr addrspace(5) %"4", align 8 + %"12" = load i64, ptr addrspace(5) %"4", align 8 + %"25" = inttoptr i64 %"12" to ptr addrspace(4) + %"11" = load i64, ptr addrspace(4) %"25", align 8 + store i64 %"11", ptr addrspace(5) %"5", align 8 + %"13" = load i64, ptr addrspace(4) %"22", align 8 + store i64 %"13", ptr addrspace(5) %"6", align 8 + %"15" = load i64, ptr addrspace(5) %"5", align 8 + %"26" = inttoptr i64 %"15" to ptr + %"14" = load i64, ptr %"26", align 8 + store i64 %"14", ptr addrspace(5) %"7", align 8 + %"17" = load i64, ptr addrspace(5) %"7", align 8 + %"16" = add i64 %"17", 1 + store i64 %"16", ptr addrspace(5) %"8", align 8 + %"18" = load i64, ptr addrspace(5) %"6", align 8 + %"19" = load i64, ptr addrspace(5) %"8", align 8 + %"27" = inttoptr i64 %"18" to ptr + store i64 %"19", ptr %"27", align 8 ret void } diff --git a/ptx/src/test/spirv_run/popc.ll b/ptx/src/test/spirv_run/popc.ll index e93f8ad..be9c625 100644 --- a/ptx/src/test/spirv_run/popc.ll +++ b/ptx/src/test/spirv_run/popc.ll @@ -1,30 +1,28 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @popc(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 { -"21": +define protected amdgpu_kernel void @popc(ptr addrspace(4) byref(i64) %"16", ptr addrspace(4) byref(i64) %"17") #0 { +"20": %"7" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"7", align 1 - %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) + %"8" = load i64, ptr addrspace(4) %"16", align 8 + store i64 %"8", ptr addrspace(5) %"4", align 8 %"9" = load i64, ptr addrspace(4) %"17", align 8 - store i64 %"9", ptr addrspace(5) %"4", align 8 - %"10" = load i64, ptr addrspace(4) %"18", align 8 - store i64 %"10", ptr addrspace(5) %"5", align 8 - %"12" = load i64, ptr addrspace(5) %"4", align 8 - %"19" = inttoptr i64 %"12" to ptr - %"11" = load i32, ptr %"19", align 4 - store i32 %"11", ptr addrspace(5) %"6", align 4 - %"14" = load i32, ptr addrspace(5) %"6", align 4 - %"13" = call i32 @llvm.ctpop.i32(i32 %"14") - store i32 %"13", ptr addrspace(5) %"6", align 4 - %"15" = load i64, ptr addrspace(5) %"5", align 8 - %"16" = load i32, ptr addrspace(5) %"6", align 4 - %"20" = inttoptr i64 %"15" to ptr - store i32 %"16", ptr %"20", align 4 + store i64 %"9", ptr addrspace(5) %"5", align 8 + %"11" = load i64, ptr addrspace(5) %"4", align 8 + %"18" = inttoptr i64 %"11" to ptr + %"10" = load i32, ptr %"18", align 4 + store i32 %"10", ptr addrspace(5) %"6", align 4 + %"13" = load i32, ptr addrspace(5) %"6", align 4 + %"12" = call i32 @llvm.ctpop.i32(i32 %"13") + store i32 %"12", ptr addrspace(5) %"6", align 4 + %"14" = load i64, ptr addrspace(5) %"5", align 8 + %"15" = load i32, ptr addrspace(5) %"6", align 4 + %"19" = inttoptr i64 %"14" to ptr + store i32 %"15", ptr %"19", align 4 ret void } diff --git a/ptx/src/test/spirv_run/pred_not.ll b/ptx/src/test/spirv_run/pred_not.ll index 047f94a..69f7646 100644 --- a/ptx/src/test/spirv_run/pred_not.ll +++ b/ptx/src/test/spirv_run/pred_not.ll @@ -1,64 +1,62 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @pred_not(ptr addrspace(4) byref(i64) %"37", ptr addrspace(4) byref(i64) %"38") #0 { -"42": +define protected amdgpu_kernel void @pred_not(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #0 { +"41": %"14" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"14", align 1 - %"15" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"15", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) %"8" = alloca i64, align 8, addrspace(5) %"9" = alloca i1, align 1, addrspace(5) + %"15" = load i64, ptr addrspace(4) %"36", align 8 + store i64 %"15", ptr addrspace(5) %"4", align 8 %"16" = load i64, ptr addrspace(4) %"37", align 8 - store i64 %"16", ptr addrspace(5) %"4", align 8 - %"17" = load i64, ptr addrspace(4) %"38", align 8 - store i64 %"17", ptr addrspace(5) %"5", align 8 - %"19" = load i64, ptr addrspace(5) %"4", align 8 - %"39" = inttoptr i64 %"19" to ptr - %"18" = load i64, ptr %"39", align 8 - store i64 %"18", ptr addrspace(5) %"6", align 8 - %"21" = load i64, ptr addrspace(5) %"4", align 8 - %"40" = inttoptr i64 %"21" to ptr - %"44" = getelementptr inbounds i8, ptr %"40", i64 8 - %"20" = load i64, ptr %"44", align 8 - store i64 %"20", ptr addrspace(5) %"7", align 8 - %"23" = load i64, ptr addrspace(5) %"6", align 8 - %"24" = load i64, ptr addrspace(5) %"7", align 8 - %"22" = icmp ult i64 %"23", %"24" - store i1 %"22", ptr addrspace(5) %"9", align 1 + store i64 %"16", ptr addrspace(5) %"5", align 8 + %"18" = load i64, ptr addrspace(5) %"4", align 8 + %"38" = inttoptr i64 %"18" to ptr + %"17" = load i64, ptr %"38", align 8 + store i64 %"17", ptr addrspace(5) %"6", align 8 + %"20" = load i64, ptr addrspace(5) %"4", align 8 + %"39" = inttoptr i64 %"20" to ptr + %"43" = getelementptr inbounds i8, ptr %"39", i64 8 + %"19" = load i64, ptr %"43", align 8 + store i64 %"19", ptr addrspace(5) %"7", align 8 + %"22" = load i64, ptr addrspace(5) %"6", align 8 + %"23" = load i64, ptr addrspace(5) %"7", align 8 + %"21" = icmp ult i64 %"22", %"23" + store i1 %"21", ptr addrspace(5) %"9", align 1 + %"25" = load i1, ptr addrspace(5) %"9", align 1 + %"24" = xor i1 %"25", true + store i1 %"24", ptr addrspace(5) %"9", align 1 %"26" = load i1, ptr addrspace(5) %"9", align 1 - %"25" = xor i1 %"26", true - store i1 %"25", ptr addrspace(5) %"9", align 1 - %"27" = load i1, ptr addrspace(5) %"9", align 1 - br i1 %"27", label %"10", label %"11" + br i1 %"26", label %"10", label %"11" -"10": ; preds = %"42" +"10": ; preds = %"41" %0 = alloca i64, align 8, addrspace(5) store i64 1, ptr addrspace(5) %0, align 8 - %"28" = load i64, ptr addrspace(5) %0, align 8 - store i64 %"28", ptr addrspace(5) %"8", align 8 + %"27" = load i64, ptr addrspace(5) %0, align 8 + store i64 %"27", ptr addrspace(5) %"8", align 8 br label %"11" -"11": ; preds = %"10", %"42" - %"29" = load i1, ptr addrspace(5) %"9", align 1 - br i1 %"29", label %"13", label %"12" +"11": ; preds = %"10", %"41" + %"28" = load i1, ptr addrspace(5) %"9", align 1 + br i1 %"28", label %"13", label %"12" "12": ; preds = %"11" %1 = alloca i64, align 8, addrspace(5) store i64 2, ptr addrspace(5) %1, align 8 - %"30" = load i64, ptr addrspace(5) %1, align 8 - store i64 %"30", ptr addrspace(5) %"8", align 8 + %"29" = load i64, ptr addrspace(5) %1, align 8 + store i64 %"29", ptr addrspace(5) %"8", align 8 br label %"13" "13": ; preds = %"12", %"11" - %"31" = load i64, ptr addrspace(5) %"5", align 8 - %"32" = load i64, ptr addrspace(5) %"8", align 8 - %"41" = inttoptr i64 %"31" to ptr - store i64 %"32", ptr %"41", align 8 + %"30" = load i64, ptr addrspace(5) %"5", align 8 + %"31" = load i64, ptr addrspace(5) %"8", align 8 + %"40" = inttoptr i64 %"30" to ptr + store i64 %"31", ptr %"40", align 8 ret void } diff --git a/ptx/src/test/spirv_run/prmt.ll b/ptx/src/test/spirv_run/prmt.ll index 87313c6..bdcb12d 100644 --- a/ptx/src/test/spirv_run/prmt.ll +++ b/ptx/src/test/spirv_run/prmt.ll @@ -1,42 +1,40 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @prmt(ptr addrspace(4) byref(i64) %"32", ptr addrspace(4) byref(i64) %"33") #0 { -"44": +define protected amdgpu_kernel void @prmt(ptr addrspace(4) byref(i64) %"31", ptr addrspace(4) byref(i64) %"32") #0 { +"43": %"10" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"10", align 1 - %"11" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"11", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) %"8" = alloca i32, align 4, addrspace(5) %"9" = alloca i32, align 4, addrspace(5) + %"11" = load i64, ptr addrspace(4) %"31", align 8 + store i64 %"11", ptr addrspace(5) %"4", align 8 %"12" = load i64, ptr addrspace(4) %"32", align 8 - store i64 %"12", ptr addrspace(5) %"4", align 8 - %"13" = load i64, ptr addrspace(4) %"33", align 8 - store i64 %"13", ptr addrspace(5) %"5", align 8 - %"15" = load i64, ptr addrspace(5) %"4", align 8 - %"34" = inttoptr i64 %"15" to ptr - %"14" = load i32, ptr %"34", align 4 - store i32 %"14", ptr addrspace(5) %"6", align 4 - %"17" = load i64, ptr addrspace(5) %"4", align 8 - %"35" = inttoptr i64 %"17" to ptr - %"46" = getelementptr inbounds i8, ptr %"35", i64 4 - %"16" = load i32, ptr %"46", align 4 - store i32 %"16", ptr addrspace(5) %"7", align 4 - %"19" = load i32, ptr addrspace(5) %"6", align 4 - %"20" = load i32, ptr addrspace(5) %"7", align 4 - %0 = bitcast i32 %"19" to <4 x i8> - %1 = bitcast i32 %"20" to <4 x i8> + store i64 %"12", ptr addrspace(5) %"5", align 8 + %"14" = load i64, ptr addrspace(5) %"4", align 8 + %"33" = inttoptr i64 %"14" to ptr + %"13" = load i32, ptr %"33", align 4 + store i32 %"13", ptr addrspace(5) %"6", align 4 + %"16" = load i64, ptr addrspace(5) %"4", align 8 + %"34" = inttoptr i64 %"16" to ptr + %"45" = getelementptr inbounds i8, ptr %"34", i64 4 + %"15" = load i32, ptr %"45", align 4 + store i32 %"15", ptr addrspace(5) %"7", align 4 + %"18" = load i32, ptr addrspace(5) %"6", align 4 + %"19" = load i32, ptr addrspace(5) %"7", align 4 + %0 = bitcast i32 %"18" to <4 x i8> + %1 = bitcast i32 %"19" to <4 x i8> %2 = shufflevector <4 x i8> %0, <4 x i8> %1, <4 x i32> <i32 4, i32 0, i32 6, i32 7> - %"36" = bitcast <4 x i8> %2 to i32 - store i32 %"36", ptr addrspace(5) %"8", align 4 - %"22" = load i32, ptr addrspace(5) %"6", align 4 - %"23" = load i32, ptr addrspace(5) %"7", align 4 - %3 = bitcast i32 %"22" to <4 x i8> - %4 = bitcast i32 %"23" to <4 x i8> + %"35" = bitcast <4 x i8> %2 to i32 + store i32 %"35", ptr addrspace(5) %"8", align 4 + %"21" = load i32, ptr addrspace(5) %"6", align 4 + %"22" = load i32, ptr addrspace(5) %"7", align 4 + %3 = bitcast i32 %"21" to <4 x i8> + %4 = bitcast i32 %"22" to <4 x i8> %5 = shufflevector <4 x i8> %3, <4 x i8> %4, <4 x i32> <i32 4, i32 0, i32 6, i32 7> %6 = extractelement <4 x i8> %5, i32 0 %7 = ashr i8 %6, 7 @@ -44,17 +42,17 @@ define protected amdgpu_kernel void @prmt(ptr addrspace(4) byref(i64) %"32", ptr %9 = extractelement <4 x i8> %8, i32 2 %10 = ashr i8 %9, 7 %11 = insertelement <4 x i8> %8, i8 %10, i32 2 - %"39" = bitcast <4 x i8> %11 to i32 - store i32 %"39", ptr addrspace(5) %"9", align 4 - %"24" = load i64, ptr addrspace(5) %"5", align 8 - %"25" = load i32, ptr addrspace(5) %"8", align 4 - %"42" = inttoptr i64 %"24" to ptr - store i32 %"25", ptr %"42", align 4 - %"26" = load i64, ptr addrspace(5) %"5", align 8 - %"27" = load i32, ptr addrspace(5) %"9", align 4 - %"43" = inttoptr i64 %"26" to ptr - %"48" = getelementptr inbounds i8, ptr %"43", i64 4 - store i32 %"27", ptr %"48", align 4 + %"38" = bitcast <4 x i8> %11 to i32 + store i32 %"38", ptr addrspace(5) %"9", align 4 + %"23" = load i64, ptr addrspace(5) %"5", align 8 + %"24" = load i32, ptr addrspace(5) %"8", align 4 + %"41" = inttoptr i64 %"23" to ptr + store i32 %"24", ptr %"41", align 4 + %"25" = load i64, ptr addrspace(5) %"5", align 8 + %"26" = load i32, ptr addrspace(5) %"9", align 4 + %"42" = inttoptr i64 %"25" to ptr + %"47" = getelementptr inbounds i8, ptr %"42", i64 4 + store i32 %"26", ptr %"47", align 4 ret void } diff --git a/ptx/src/test/spirv_run/prmt_non_immediate.ll b/ptx/src/test/spirv_run/prmt_non_immediate.ll index c1a1b9d..d503917 100644 --- a/ptx/src/test/spirv_run/prmt_non_immediate.ll +++ b/ptx/src/test/spirv_run/prmt_non_immediate.ll @@ -1,45 +1,43 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @prmt_non_immediate(ptr addrspace(4) byref(i64) %"26", ptr addrspace(4) byref(i64) %"27") #0 { -"34": +define protected amdgpu_kernel void @prmt_non_immediate(ptr addrspace(4) byref(i64) %"25", ptr addrspace(4) byref(i64) %"26") #0 { +"33": %"9" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"9", align 1 - %"10" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"10", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) %"8" = alloca i32, align 4, addrspace(5) + %"10" = load i64, ptr addrspace(4) %"25", align 8 + store i64 %"10", ptr addrspace(5) %"4", align 8 %"11" = load i64, ptr addrspace(4) %"26", align 8 - store i64 %"11", ptr addrspace(5) %"4", align 8 - %"12" = load i64, ptr addrspace(4) %"27", align 8 - store i64 %"12", ptr addrspace(5) %"5", align 8 - %"14" = load i64, ptr addrspace(5) %"4", align 8 - %"28" = inttoptr i64 %"14" to ptr - %"13" = load i32, ptr %"28", align 4 - store i32 %"13", ptr addrspace(5) %"6", align 4 - %"16" = load i64, ptr addrspace(5) %"4", align 8 - %"29" = inttoptr i64 %"16" to ptr - %"36" = getelementptr inbounds i8, ptr %"29", i64 4 - %"15" = load i32, ptr %"36", align 4 - store i32 %"15", ptr addrspace(5) %"7", align 4 + store i64 %"11", ptr addrspace(5) %"5", align 8 + %"13" = load i64, ptr addrspace(5) %"4", align 8 + %"27" = inttoptr i64 %"13" to ptr + %"12" = load i32, ptr %"27", align 4 + store i32 %"12", ptr addrspace(5) %"6", align 4 + %"15" = load i64, ptr addrspace(5) %"4", align 8 + %"28" = inttoptr i64 %"15" to ptr + %"35" = getelementptr inbounds i8, ptr %"28", i64 4 + %"14" = load i32, ptr %"35", align 4 + store i32 %"14", ptr addrspace(5) %"7", align 4 %0 = alloca i32, align 4, addrspace(5) store i32 64, ptr addrspace(5) %0, align 4 - %"17" = load i32, ptr addrspace(5) %0, align 4 - store i32 %"17", ptr addrspace(5) %"8", align 4 - %"19" = load i32, ptr addrspace(5) %"6", align 4 - %"20" = load i32, ptr addrspace(5) %"7", align 4 - %1 = bitcast i32 %"19" to <4 x i8> - %2 = bitcast i32 %"20" to <4 x i8> + %"16" = load i32, ptr addrspace(5) %0, align 4 + store i32 %"16", ptr addrspace(5) %"8", align 4 + %"18" = load i32, ptr addrspace(5) %"6", align 4 + %"19" = load i32, ptr addrspace(5) %"7", align 4 + %1 = bitcast i32 %"18" to <4 x i8> + %2 = bitcast i32 %"19" to <4 x i8> %3 = shufflevector <4 x i8> %1, <4 x i8> %2, <4 x i32> <i32 0, i32 4, i32 0, i32 0> - %"30" = bitcast <4 x i8> %3 to i32 - store i32 %"30", ptr addrspace(5) %"7", align 4 - %"21" = load i64, ptr addrspace(5) %"5", align 8 - %"22" = load i32, ptr addrspace(5) %"7", align 4 - %"33" = inttoptr i64 %"21" to ptr - store i32 %"22", ptr %"33", align 4 + %"29" = bitcast <4 x i8> %3 to i32 + store i32 %"29", ptr addrspace(5) %"7", align 4 + %"20" = load i64, ptr addrspace(5) %"5", align 8 + %"21" = load i32, ptr addrspace(5) %"7", align 4 + %"32" = inttoptr i64 %"20" to ptr + store i32 %"21", ptr %"32", align 4 ret void } diff --git a/ptx/src/test/spirv_run/rcp.ll b/ptx/src/test/spirv_run/rcp.ll index cb55c6a..116687b 100644 --- a/ptx/src/test/spirv_run/rcp.ll +++ b/ptx/src/test/spirv_run/rcp.ll @@ -1,30 +1,28 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @rcp(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 { -"21": +define protected amdgpu_kernel void @rcp(ptr addrspace(4) byref(i64) %"16", ptr addrspace(4) byref(i64) %"17") #0 { +"20": %"7" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"7", align 1 - %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca float, align 4, addrspace(5) + %"8" = load i64, ptr addrspace(4) %"16", align 8 + store i64 %"8", ptr addrspace(5) %"4", align 8 %"9" = load i64, ptr addrspace(4) %"17", align 8 - store i64 %"9", ptr addrspace(5) %"4", align 8 - %"10" = load i64, ptr addrspace(4) %"18", align 8 - store i64 %"10", ptr addrspace(5) %"5", align 8 - %"12" = load i64, ptr addrspace(5) %"4", align 8 - %"19" = inttoptr i64 %"12" to ptr - %"11" = load float, ptr %"19", align 4 - store float %"11", ptr addrspace(5) %"6", align 4 - %"14" = load float, ptr addrspace(5) %"6", align 4 - %"13" = fdiv arcp afn float 1.000000e+00, %"14" - store float %"13", ptr addrspace(5) %"6", align 4 - %"15" = load i64, ptr addrspace(5) %"5", align 8 - %"16" = load float, ptr addrspace(5) %"6", align 4 - %"20" = inttoptr i64 %"15" to ptr - store float %"16", ptr %"20", align 4 + store i64 %"9", ptr addrspace(5) %"5", align 8 + %"11" = load i64, ptr addrspace(5) %"4", align 8 + %"18" = inttoptr i64 %"11" to ptr + %"10" = load float, ptr %"18", align 4 + store float %"10", ptr addrspace(5) %"6", align 4 + %"13" = load float, ptr addrspace(5) %"6", align 4 + %"12" = fdiv arcp afn float 1.000000e+00, %"13" + store float %"12", ptr addrspace(5) %"6", align 4 + %"14" = load i64, ptr addrspace(5) %"5", align 8 + %"15" = load float, ptr addrspace(5) %"6", align 4 + %"19" = inttoptr i64 %"14" to ptr + store float %"15", ptr %"19", align 4 ret void } diff --git a/ptx/src/test/spirv_run/reg_local.ll b/ptx/src/test/spirv_run/reg_local.ll index c01a5e0..48c881d 100644 --- a/ptx/src/test/spirv_run/reg_local.ll +++ b/ptx/src/test/spirv_run/reg_local.ll @@ -1,37 +1,35 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @reg_local(ptr addrspace(4) byref(i64) %"24", ptr addrspace(4) byref(i64) %"25") #0 { -"34": +define protected amdgpu_kernel void @reg_local(ptr addrspace(4) byref(i64) %"23", ptr addrspace(4) byref(i64) %"24") #0 { +"33": %"8" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"8", align 1 - %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca [8 x i8], align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) + %"9" = load i64, ptr addrspace(4) %"23", align 8 + store i64 %"9", ptr addrspace(5) %"5", align 8 %"10" = load i64, ptr addrspace(4) %"24", align 8 - store i64 %"10", ptr addrspace(5) %"5", align 8 - %"11" = load i64, ptr addrspace(4) %"25", align 8 - store i64 %"11", ptr addrspace(5) %"6", align 8 - %"13" = load i64, ptr addrspace(5) %"5", align 8 - %"27" = inttoptr i64 %"13" to ptr addrspace(1) - %"26" = load i64, ptr addrspace(1) %"27", align 8 - store i64 %"26", ptr addrspace(5) %"7", align 8 - %"14" = load i64, ptr addrspace(5) %"7", align 8 - %"19" = add i64 %"14", 1 - %"28" = addrspacecast ptr addrspace(5) %"4" to ptr - store i64 %"19", ptr %"28", align 8 - %"30" = addrspacecast ptr addrspace(5) %"4" to ptr - %"38" = getelementptr inbounds i8, ptr %"30", i64 0 - %"31" = load i64, ptr %"38", align 8 - store i64 %"31", ptr addrspace(5) %"7", align 8 - %"16" = load i64, ptr addrspace(5) %"6", align 8 - %"17" = load i64, ptr addrspace(5) %"7", align 8 - %"32" = inttoptr i64 %"16" to ptr addrspace(1) - %"40" = getelementptr inbounds i8, ptr addrspace(1) %"32", i64 0 - store i64 %"17", ptr addrspace(1) %"40", align 8 + store i64 %"10", ptr addrspace(5) %"6", align 8 + %"12" = load i64, ptr addrspace(5) %"5", align 8 + %"26" = inttoptr i64 %"12" to ptr addrspace(1) + %"25" = load i64, ptr addrspace(1) %"26", align 8 + store i64 %"25", ptr addrspace(5) %"7", align 8 + %"13" = load i64, ptr addrspace(5) %"7", align 8 + %"18" = add i64 %"13", 1 + %"27" = addrspacecast ptr addrspace(5) %"4" to ptr + store i64 %"18", ptr %"27", align 8 + %"29" = addrspacecast ptr addrspace(5) %"4" to ptr + %"37" = getelementptr inbounds i8, ptr %"29", i64 0 + %"30" = load i64, ptr %"37", align 8 + store i64 %"30", ptr addrspace(5) %"7", align 8 + %"15" = load i64, ptr addrspace(5) %"6", align 8 + %"16" = load i64, ptr addrspace(5) %"7", align 8 + %"31" = inttoptr i64 %"15" to ptr addrspace(1) + %"39" = getelementptr inbounds i8, ptr addrspace(1) %"31", i64 0 + store i64 %"16", ptr addrspace(1) %"39", align 8 ret void } diff --git a/ptx/src/test/spirv_run/rem.ll b/ptx/src/test/spirv_run/rem.ll index 3a1e26c..4535f49 100644 --- a/ptx/src/test/spirv_run/rem.ll +++ b/ptx/src/test/spirv_run/rem.ll @@ -1,37 +1,35 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @rem(ptr addrspace(4) byref(i64) %"23", ptr addrspace(4) byref(i64) %"24") #0 { -"28": +define protected amdgpu_kernel void @rem(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #0 { +"27": %"8" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"8", align 1 - %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) + %"9" = load i64, ptr addrspace(4) %"22", align 8 + store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"23", align 8 - store i64 %"10", ptr addrspace(5) %"4", align 8 - %"11" = load i64, ptr addrspace(4) %"24", align 8 - store i64 %"11", ptr addrspace(5) %"5", align 8 - %"13" = load i64, ptr addrspace(5) %"4", align 8 - %"25" = inttoptr i64 %"13" to ptr - %"12" = load i32, ptr %"25", align 4 - store i32 %"12", ptr addrspace(5) %"6", align 4 - %"15" = load i64, ptr addrspace(5) %"4", align 8 - %"26" = inttoptr i64 %"15" to ptr - %"30" = getelementptr inbounds i8, ptr %"26", i64 4 - %"14" = load i32, ptr %"30", align 4 - store i32 %"14", ptr addrspace(5) %"7", align 4 - %"17" = load i32, ptr addrspace(5) %"6", align 4 - %"18" = load i32, ptr addrspace(5) %"7", align 4 - %"16" = srem i32 %"17", %"18" - store i32 %"16", ptr addrspace(5) %"6", align 4 - %"19" = load i64, ptr addrspace(5) %"5", align 8 - %"20" = load i32, ptr addrspace(5) %"6", align 4 - %"27" = inttoptr i64 %"19" to ptr - store i32 %"20", ptr %"27", align 4 + store i64 %"10", ptr addrspace(5) %"5", align 8 + %"12" = load i64, ptr addrspace(5) %"4", align 8 + %"24" = inttoptr i64 %"12" to ptr + %"11" = load i32, ptr %"24", align 4 + store i32 %"11", ptr addrspace(5) %"6", align 4 + %"14" = load i64, ptr addrspace(5) %"4", align 8 + %"25" = inttoptr i64 %"14" to ptr + %"29" = getelementptr inbounds i8, ptr %"25", i64 4 + %"13" = load i32, ptr %"29", align 4 + store i32 %"13", ptr addrspace(5) %"7", align 4 + %"16" = load i32, ptr addrspace(5) %"6", align 4 + %"17" = load i32, ptr addrspace(5) %"7", align 4 + %"15" = srem i32 %"16", %"17" + store i32 %"15", ptr addrspace(5) %"6", align 4 + %"18" = load i64, ptr addrspace(5) %"5", align 8 + %"19" = load i32, ptr addrspace(5) %"6", align 4 + %"26" = inttoptr i64 %"18" to ptr + store i32 %"19", ptr %"26", align 4 ret void } diff --git a/ptx/src/test/spirv_run/rsqrt.ll b/ptx/src/test/spirv_run/rsqrt.ll index ffdd662..7797260 100644 --- a/ptx/src/test/spirv_run/rsqrt.ll +++ b/ptx/src/test/spirv_run/rsqrt.ll @@ -1,31 +1,29 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @rsqrt(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 { -"21": +define protected amdgpu_kernel void @rsqrt(ptr addrspace(4) byref(i64) %"16", ptr addrspace(4) byref(i64) %"17") #0 { +"20": %"7" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"7", align 1 - %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca double, align 8, addrspace(5) + %"8" = load i64, ptr addrspace(4) %"16", align 8 + store i64 %"8", ptr addrspace(5) %"4", align 8 %"9" = load i64, ptr addrspace(4) %"17", align 8 - store i64 %"9", ptr addrspace(5) %"4", align 8 - %"10" = load i64, ptr addrspace(4) %"18", align 8 - store i64 %"10", ptr addrspace(5) %"5", align 8 - %"12" = load i64, ptr addrspace(5) %"4", align 8 - %"19" = inttoptr i64 %"12" to ptr - %"11" = load double, ptr %"19", align 8 - store double %"11", ptr addrspace(5) %"6", align 8 - %"14" = load double, ptr addrspace(5) %"6", align 8 - %0 = call afn double @llvm.sqrt.f64(double %"14") - %"13" = fdiv arcp afn double 1.000000e+00, %0 - store double %"13", ptr addrspace(5) %"6", align 8 - %"15" = load i64, ptr addrspace(5) %"5", align 8 - %"16" = load double, ptr addrspace(5) %"6", align 8 - %"20" = inttoptr i64 %"15" to ptr - store double %"16", ptr %"20", align 8 + store i64 %"9", ptr addrspace(5) %"5", align 8 + %"11" = load i64, ptr addrspace(5) %"4", align 8 + %"18" = inttoptr i64 %"11" to ptr + %"10" = load double, ptr %"18", align 8 + store double %"10", ptr addrspace(5) %"6", align 8 + %"13" = load double, ptr addrspace(5) %"6", align 8 + %0 = call afn double @llvm.sqrt.f64(double %"13") + %"12" = fdiv arcp afn double 1.000000e+00, %0 + store double %"12", ptr addrspace(5) %"6", align 8 + %"14" = load i64, ptr addrspace(5) %"5", align 8 + %"15" = load double, ptr addrspace(5) %"6", align 8 + %"19" = inttoptr i64 %"14" to ptr + store double %"15", ptr %"19", align 8 ret void } diff --git a/ptx/src/test/spirv_run/s64_min.ll b/ptx/src/test/spirv_run/s64_min.ll index 3f741e7..98eee04 100644 --- a/ptx/src/test/spirv_run/s64_min.ll +++ b/ptx/src/test/spirv_run/s64_min.ll @@ -1,24 +1,22 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @s64_min(ptr addrspace(4) byref(i64) %"13", ptr addrspace(4) byref(i64) %"14") #0 { -"16": +define protected amdgpu_kernel void @s64_min(ptr addrspace(4) byref(i64) %"12", ptr addrspace(4) byref(i64) %"13") #0 { +"15": %"6" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"6", align 1 - %"7" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"7", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) - %"8" = load i64, ptr addrspace(4) %"14", align 8 - store i64 %"8", ptr addrspace(5) %"4", align 8 + %"7" = load i64, ptr addrspace(4) %"13", align 8 + store i64 %"7", ptr addrspace(5) %"4", align 8 %0 = alloca i64, align 8, addrspace(5) store i64 -9223372036854775808, ptr addrspace(5) %0, align 8 - %"9" = load i64, ptr addrspace(5) %0, align 8 - store i64 %"9", ptr addrspace(5) %"5", align 8 - %"10" = load i64, ptr addrspace(5) %"4", align 8 - %"11" = load i64, ptr addrspace(5) %"5", align 8 - %"15" = inttoptr i64 %"10" to ptr - store i64 %"11", ptr %"15", align 8 + %"8" = load i64, ptr addrspace(5) %0, align 8 + store i64 %"8", ptr addrspace(5) %"5", align 8 + %"9" = load i64, ptr addrspace(5) %"4", align 8 + %"10" = load i64, ptr addrspace(5) %"5", align 8 + %"14" = inttoptr i64 %"9" to ptr + store i64 %"10", ptr %"14", align 8 ret void } diff --git a/ptx/src/test/spirv_run/selp.ll b/ptx/src/test/spirv_run/selp.ll index 6124887..073ec38 100644 --- a/ptx/src/test/spirv_run/selp.ll +++ b/ptx/src/test/spirv_run/selp.ll @@ -1,37 +1,35 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @selp(ptr addrspace(4) byref(i64) %"24", ptr addrspace(4) byref(i64) %"25") #0 { -"29": +define protected amdgpu_kernel void @selp(ptr addrspace(4) byref(i64) %"23", ptr addrspace(4) byref(i64) %"24") #0 { +"28": %"8" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"8", align 1 - %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i16, align 2, addrspace(5) %"7" = alloca i16, align 2, addrspace(5) + %"9" = load i64, ptr addrspace(4) %"23", align 8 + store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"24", align 8 - store i64 %"10", ptr addrspace(5) %"4", align 8 - %"11" = load i64, ptr addrspace(4) %"25", align 8 - store i64 %"11", ptr addrspace(5) %"5", align 8 - %"13" = load i64, ptr addrspace(5) %"4", align 8 - %"26" = inttoptr i64 %"13" to ptr - %"12" = load i16, ptr %"26", align 2 - store i16 %"12", ptr addrspace(5) %"6", align 2 - %"15" = load i64, ptr addrspace(5) %"4", align 8 - %"27" = inttoptr i64 %"15" to ptr - %"31" = getelementptr inbounds i8, ptr %"27", i64 2 - %"14" = load i16, ptr %"31", align 2 - store i16 %"14", ptr addrspace(5) %"7", align 2 - %"17" = load i16, ptr addrspace(5) %"6", align 2 - %"18" = load i16, ptr addrspace(5) %"7", align 2 - %"16" = select i1 false, i16 %"17", i16 %"18" - store i16 %"16", ptr addrspace(5) %"6", align 2 - %"19" = load i64, ptr addrspace(5) %"5", align 8 - %"20" = load i16, ptr addrspace(5) %"6", align 2 - %"28" = inttoptr i64 %"19" to ptr - store i16 %"20", ptr %"28", align 2 + store i64 %"10", ptr addrspace(5) %"5", align 8 + %"12" = load i64, ptr addrspace(5) %"4", align 8 + %"25" = inttoptr i64 %"12" to ptr + %"11" = load i16, ptr %"25", align 2 + store i16 %"11", ptr addrspace(5) %"6", align 2 + %"14" = load i64, ptr addrspace(5) %"4", align 8 + %"26" = inttoptr i64 %"14" to ptr + %"30" = getelementptr inbounds i8, ptr %"26", i64 2 + %"13" = load i16, ptr %"30", align 2 + store i16 %"13", ptr addrspace(5) %"7", align 2 + %"16" = load i16, ptr addrspace(5) %"6", align 2 + %"17" = load i16, ptr addrspace(5) %"7", align 2 + %"15" = select i1 false, i16 %"16", i16 %"17" + store i16 %"15", ptr addrspace(5) %"6", align 2 + %"18" = load i64, ptr addrspace(5) %"5", align 8 + %"19" = load i16, ptr addrspace(5) %"6", align 2 + %"27" = inttoptr i64 %"18" to ptr + store i16 %"19", ptr %"27", align 2 ret void } diff --git a/ptx/src/test/spirv_run/selp_true.ll b/ptx/src/test/spirv_run/selp_true.ll index 283eb81..4eda981 100644 --- a/ptx/src/test/spirv_run/selp_true.ll +++ b/ptx/src/test/spirv_run/selp_true.ll @@ -1,37 +1,35 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @selp_true(ptr addrspace(4) byref(i64) %"24", ptr addrspace(4) byref(i64) %"25") #0 { -"29": +define protected amdgpu_kernel void @selp_true(ptr addrspace(4) byref(i64) %"23", ptr addrspace(4) byref(i64) %"24") #0 { +"28": %"8" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"8", align 1 - %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i16, align 2, addrspace(5) %"7" = alloca i16, align 2, addrspace(5) + %"9" = load i64, ptr addrspace(4) %"23", align 8 + store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"24", align 8 - store i64 %"10", ptr addrspace(5) %"4", align 8 - %"11" = load i64, ptr addrspace(4) %"25", align 8 - store i64 %"11", ptr addrspace(5) %"5", align 8 - %"13" = load i64, ptr addrspace(5) %"4", align 8 - %"26" = inttoptr i64 %"13" to ptr - %"12" = load i16, ptr %"26", align 2 - store i16 %"12", ptr addrspace(5) %"6", align 2 - %"15" = load i64, ptr addrspace(5) %"4", align 8 - %"27" = inttoptr i64 %"15" to ptr - %"31" = getelementptr inbounds i8, ptr %"27", i64 2 - %"14" = load i16, ptr %"31", align 2 - store i16 %"14", ptr addrspace(5) %"7", align 2 - %"17" = load i16, ptr addrspace(5) %"6", align 2 - %"18" = load i16, ptr addrspace(5) %"7", align 2 - %"16" = select i1 true, i16 %"17", i16 %"18" - store i16 %"16", ptr addrspace(5) %"6", align 2 - %"19" = load i64, ptr addrspace(5) %"5", align 8 - %"20" = load i16, ptr addrspace(5) %"6", align 2 - %"28" = inttoptr i64 %"19" to ptr - store i16 %"20", ptr %"28", align 2 + store i64 %"10", ptr addrspace(5) %"5", align 8 + %"12" = load i64, ptr addrspace(5) %"4", align 8 + %"25" = inttoptr i64 %"12" to ptr + %"11" = load i16, ptr %"25", align 2 + store i16 %"11", ptr addrspace(5) %"6", align 2 + %"14" = load i64, ptr addrspace(5) %"4", align 8 + %"26" = inttoptr i64 %"14" to ptr + %"30" = getelementptr inbounds i8, ptr %"26", i64 2 + %"13" = load i16, ptr %"30", align 2 + store i16 %"13", ptr addrspace(5) %"7", align 2 + %"16" = load i16, ptr addrspace(5) %"6", align 2 + %"17" = load i16, ptr addrspace(5) %"7", align 2 + %"15" = select i1 true, i16 %"16", i16 %"17" + store i16 %"15", ptr addrspace(5) %"6", align 2 + %"18" = load i64, ptr addrspace(5) %"5", align 8 + %"19" = load i16, ptr addrspace(5) %"6", align 2 + %"27" = inttoptr i64 %"18" to ptr + store i16 %"19", ptr %"27", align 2 ret void } diff --git a/ptx/src/test/spirv_run/set_f16x2.ll b/ptx/src/test/spirv_run/set_f16x2.ll index 4a2c8ea..2a8caf3 100644 --- a/ptx/src/test/spirv_run/set_f16x2.ll +++ b/ptx/src/test/spirv_run/set_f16x2.ll @@ -1,12 +1,10 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @set_f16x2(ptr addrspace(4) byref(i64) %"41", ptr addrspace(4) byref(i64) %"42") #0 { -"59": +define protected amdgpu_kernel void @set_f16x2(ptr addrspace(4) byref(i64) %"40", ptr addrspace(4) byref(i64) %"41") #0 { +"58": %"11" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"11", align 1 - %"12" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"12", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) @@ -14,54 +12,54 @@ define protected amdgpu_kernel void @set_f16x2(ptr addrspace(4) byref(i64) %"41" %"8" = alloca i32, align 4, addrspace(5) %"9" = alloca i32, align 4, addrspace(5) %"10" = alloca <2 x half>, align 4, addrspace(5) + %"12" = load i64, ptr addrspace(4) %"40", align 8 + store i64 %"12", ptr addrspace(5) %"4", align 8 %"13" = load i64, ptr addrspace(4) %"41", align 8 - store i64 %"13", ptr addrspace(5) %"4", align 8 - %"14" = load i64, ptr addrspace(4) %"42", align 8 - store i64 %"14", ptr addrspace(5) %"5", align 8 - %"16" = load i64, ptr addrspace(5) %"4", align 8 - %"44" = inttoptr i64 %"16" to ptr - %"43" = load i32, ptr %"44", align 4 - store i32 %"43", ptr addrspace(5) %"6", align 4 - %"18" = load i64, ptr addrspace(5) %"4", align 8 - %"45" = inttoptr i64 %"18" to ptr - %"61" = getelementptr inbounds i8, ptr %"45", i64 4 - %"46" = load i32, ptr %"61", align 4 - store i32 %"46", ptr addrspace(5) %"7", align 4 - %"20" = load i64, ptr addrspace(5) %"4", align 8 - %"47" = inttoptr i64 %"20" to ptr - %"63" = getelementptr inbounds i8, ptr %"47", i64 8 - %"48" = load i32, ptr %"63", align 4 - store i32 %"48", ptr addrspace(5) %"8", align 4 - %"22" = load i64, ptr addrspace(5) %"4", align 8 - %"49" = inttoptr i64 %"22" to ptr - %"65" = getelementptr inbounds i8, ptr %"49", i64 12 - %"50" = load i32, ptr %"65", align 4 - store i32 %"50", ptr addrspace(5) %"9", align 4 - %"24" = load i32, ptr addrspace(5) %"6", align 4 - %"25" = load i32, ptr addrspace(5) %"7", align 4 + store i64 %"13", ptr addrspace(5) %"5", align 8 + %"15" = load i64, ptr addrspace(5) %"4", align 8 + %"43" = inttoptr i64 %"15" to ptr + %"42" = load i32, ptr %"43", align 4 + store i32 %"42", ptr addrspace(5) %"6", align 4 + %"17" = load i64, ptr addrspace(5) %"4", align 8 + %"44" = inttoptr i64 %"17" to ptr + %"60" = getelementptr inbounds i8, ptr %"44", i64 4 + %"45" = load i32, ptr %"60", align 4 + store i32 %"45", ptr addrspace(5) %"7", align 4 + %"19" = load i64, ptr addrspace(5) %"4", align 8 + %"46" = inttoptr i64 %"19" to ptr + %"62" = getelementptr inbounds i8, ptr %"46", i64 8 + %"47" = load i32, ptr %"62", align 4 + store i32 %"47", ptr addrspace(5) %"8", align 4 + %"21" = load i64, ptr addrspace(5) %"4", align 8 + %"48" = inttoptr i64 %"21" to ptr + %"64" = getelementptr inbounds i8, ptr %"48", i64 12 + %"49" = load i32, ptr %"64", align 4 + store i32 %"49", ptr addrspace(5) %"9", align 4 + %"23" = load i32, ptr addrspace(5) %"6", align 4 + %"24" = load i32, ptr addrspace(5) %"7", align 4 + %"51" = bitcast i32 %"23" to <2 x half> %"52" = bitcast i32 %"24" to <2 x half> - %"53" = bitcast i32 %"25" to <2 x half> - %0 = fcmp ugt <2 x half> %"52", %"53" + %0 = fcmp ugt <2 x half> %"51", %"52" %1 = sext <2 x i1> %0 to <2 x i16> - %"51" = bitcast <2 x i16> %1 to i32 - store i32 %"51", ptr addrspace(5) %"6", align 4 - %"27" = load i32, ptr addrspace(5) %"8", align 4 - %"28" = load i32, ptr addrspace(5) %"9", align 4 + %"50" = bitcast <2 x i16> %1 to i32 + store i32 %"50", ptr addrspace(5) %"6", align 4 + %"26" = load i32, ptr addrspace(5) %"8", align 4 + %"27" = load i32, ptr addrspace(5) %"9", align 4 + %"54" = bitcast i32 %"26" to <2 x half> %"55" = bitcast i32 %"27" to <2 x half> - %"56" = bitcast i32 %"28" to <2 x half> - %2 = fcmp oeq <2 x half> %"55", %"56" - %"54" = uitofp <2 x i1> %2 to <2 x half> - %"26" = bitcast <2 x half> %"54" to i32 - store i32 %"26", ptr addrspace(5) %"8", align 4 - %"29" = load i64, ptr addrspace(5) %"5", align 8 - %"30" = load i32, ptr addrspace(5) %"6", align 4 - %"57" = inttoptr i64 %"29" to ptr - store i32 %"30", ptr %"57", align 4 - %"31" = load i64, ptr addrspace(5) %"5", align 8 - %"32" = load i32, ptr addrspace(5) %"8", align 4 - %"58" = inttoptr i64 %"31" to ptr - %"67" = getelementptr inbounds i8, ptr %"58", i64 4 - store i32 %"32", ptr %"67", align 4 + %2 = fcmp oeq <2 x half> %"54", %"55" + %"53" = uitofp <2 x i1> %2 to <2 x half> + %"25" = bitcast <2 x half> %"53" to i32 + store i32 %"25", ptr addrspace(5) %"8", align 4 + %"28" = load i64, ptr addrspace(5) %"5", align 8 + %"29" = load i32, ptr addrspace(5) %"6", align 4 + %"56" = inttoptr i64 %"28" to ptr + store i32 %"29", ptr %"56", align 4 + %"30" = load i64, ptr addrspace(5) %"5", align 8 + %"31" = load i32, ptr addrspace(5) %"8", align 4 + %"57" = inttoptr i64 %"30" to ptr + %"66" = getelementptr inbounds i8, ptr %"57", i64 4 + store i32 %"31", ptr %"66", align 4 ret void } diff --git a/ptx/src/test/spirv_run/setp.ll b/ptx/src/test/spirv_run/setp.ll index a54f8f6..2f95556 100644 --- a/ptx/src/test/spirv_run/setp.ll +++ b/ptx/src/test/spirv_run/setp.ll @@ -1,61 +1,59 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @setp(ptr addrspace(4) byref(i64) %"35", ptr addrspace(4) byref(i64) %"36") #0 { -"40": +define protected amdgpu_kernel void @setp(ptr addrspace(4) byref(i64) %"34", ptr addrspace(4) byref(i64) %"35") #0 { +"39": %"14" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"14", align 1 - %"15" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"15", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) %"8" = alloca i64, align 8, addrspace(5) %"9" = alloca i1, align 1, addrspace(5) + %"15" = load i64, ptr addrspace(4) %"34", align 8 + store i64 %"15", ptr addrspace(5) %"4", align 8 %"16" = load i64, ptr addrspace(4) %"35", align 8 - store i64 %"16", ptr addrspace(5) %"4", align 8 - %"17" = load i64, ptr addrspace(4) %"36", align 8 - store i64 %"17", ptr addrspace(5) %"5", align 8 - %"19" = load i64, ptr addrspace(5) %"4", align 8 - %"37" = inttoptr i64 %"19" to ptr - %"18" = load i64, ptr %"37", align 8 - store i64 %"18", ptr addrspace(5) %"6", align 8 - %"21" = load i64, ptr addrspace(5) %"4", align 8 - %"38" = inttoptr i64 %"21" to ptr - %"42" = getelementptr inbounds i8, ptr %"38", i64 8 - %"20" = load i64, ptr %"42", align 8 - store i64 %"20", ptr addrspace(5) %"7", align 8 - %"23" = load i64, ptr addrspace(5) %"6", align 8 - %"24" = load i64, ptr addrspace(5) %"7", align 8 - %"22" = icmp ult i64 %"23", %"24" - store i1 %"22", ptr addrspace(5) %"9", align 1 - %"25" = load i1, ptr addrspace(5) %"9", align 1 - br i1 %"25", label %"10", label %"11" + store i64 %"16", ptr addrspace(5) %"5", align 8 + %"18" = load i64, ptr addrspace(5) %"4", align 8 + %"36" = inttoptr i64 %"18" to ptr + %"17" = load i64, ptr %"36", align 8 + store i64 %"17", ptr addrspace(5) %"6", align 8 + %"20" = load i64, ptr addrspace(5) %"4", align 8 + %"37" = inttoptr i64 %"20" to ptr + %"41" = getelementptr inbounds i8, ptr %"37", i64 8 + %"19" = load i64, ptr %"41", align 8 + store i64 %"19", ptr addrspace(5) %"7", align 8 + %"22" = load i64, ptr addrspace(5) %"6", align 8 + %"23" = load i64, ptr addrspace(5) %"7", align 8 + %"21" = icmp ult i64 %"22", %"23" + store i1 %"21", ptr addrspace(5) %"9", align 1 + %"24" = load i1, ptr addrspace(5) %"9", align 1 + br i1 %"24", label %"10", label %"11" -"10": ; preds = %"40" +"10": ; preds = %"39" %0 = alloca i64, align 8, addrspace(5) store i64 1, ptr addrspace(5) %0, align 8 - %"26" = load i64, ptr addrspace(5) %0, align 8 - store i64 %"26", ptr addrspace(5) %"8", align 8 + %"25" = load i64, ptr addrspace(5) %0, align 8 + store i64 %"25", ptr addrspace(5) %"8", align 8 br label %"11" -"11": ; preds = %"10", %"40" - %"27" = load i1, ptr addrspace(5) %"9", align 1 - br i1 %"27", label %"13", label %"12" +"11": ; preds = %"10", %"39" + %"26" = load i1, ptr addrspace(5) %"9", align 1 + br i1 %"26", label %"13", label %"12" "12": ; preds = %"11" %1 = alloca i64, align 8, addrspace(5) store i64 2, ptr addrspace(5) %1, align 8 - %"28" = load i64, ptr addrspace(5) %1, align 8 - store i64 %"28", ptr addrspace(5) %"8", align 8 + %"27" = load i64, ptr addrspace(5) %1, align 8 + store i64 %"27", ptr addrspace(5) %"8", align 8 br label %"13" "13": ; preds = %"12", %"11" - %"29" = load i64, ptr addrspace(5) %"5", align 8 - %"30" = load i64, ptr addrspace(5) %"8", align 8 - %"39" = inttoptr i64 %"29" to ptr - store i64 %"30", ptr %"39", align 8 + %"28" = load i64, ptr addrspace(5) %"5", align 8 + %"29" = load i64, ptr addrspace(5) %"8", align 8 + %"38" = inttoptr i64 %"28" to ptr + store i64 %"29", ptr %"38", align 8 ret void } diff --git a/ptx/src/test/spirv_run/setp_bool.ll b/ptx/src/test/spirv_run/setp_bool.ll index 1707a3d..ac1b2bb 100644 --- a/ptx/src/test/spirv_run/setp_bool.ll +++ b/ptx/src/test/spirv_run/setp_bool.ll @@ -1,12 +1,10 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @setp_bool(ptr addrspace(4) byref(i64) %"45", ptr addrspace(4) byref(i64) %"46") #0 { -"51": +define protected amdgpu_kernel void @setp_bool(ptr addrspace(4) byref(i64) %"44", ptr addrspace(4) byref(i64) %"45") #0 { +"50": %"16" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"16", align 1 - %"17" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"17", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca float, align 4, addrspace(5) @@ -15,65 +13,65 @@ define protected amdgpu_kernel void @setp_bool(ptr addrspace(4) byref(i64) %"45" %"9" = alloca i1, align 1, addrspace(5) %"10" = alloca i1, align 1, addrspace(5) %"11" = alloca i1, align 1, addrspace(5) + %"17" = load i64, ptr addrspace(4) %"44", align 8 + store i64 %"17", ptr addrspace(5) %"4", align 8 %"18" = load i64, ptr addrspace(4) %"45", align 8 - store i64 %"18", ptr addrspace(5) %"4", align 8 - %"19" = load i64, ptr addrspace(4) %"46", align 8 - store i64 %"19", ptr addrspace(5) %"5", align 8 - %"21" = load i64, ptr addrspace(5) %"4", align 8 - %"47" = inttoptr i64 %"21" to ptr - %"20" = load float, ptr %"47", align 4 - store float %"20", ptr addrspace(5) %"6", align 4 - %"23" = load i64, ptr addrspace(5) %"4", align 8 - %"48" = inttoptr i64 %"23" to ptr - %"53" = getelementptr inbounds i8, ptr %"48", i64 4 - %"22" = load float, ptr %"53", align 4 - store float %"22", ptr addrspace(5) %"7", align 4 - %"25" = load i64, ptr addrspace(5) %"4", align 8 - %"49" = inttoptr i64 %"25" to ptr - %"55" = getelementptr inbounds i8, ptr %"49", i64 8 - %"24" = load float, ptr %"55", align 4 - store float %"24", ptr addrspace(5) %"8", align 4 + store i64 %"18", ptr addrspace(5) %"5", align 8 + %"20" = load i64, ptr addrspace(5) %"4", align 8 + %"46" = inttoptr i64 %"20" to ptr + %"19" = load float, ptr %"46", align 4 + store float %"19", ptr addrspace(5) %"6", align 4 + %"22" = load i64, ptr addrspace(5) %"4", align 8 + %"47" = inttoptr i64 %"22" to ptr + %"52" = getelementptr inbounds i8, ptr %"47", i64 4 + %"21" = load float, ptr %"52", align 4 + store float %"21", ptr addrspace(5) %"7", align 4 + %"24" = load i64, ptr addrspace(5) %"4", align 8 + %"48" = inttoptr i64 %"24" to ptr + %"54" = getelementptr inbounds i8, ptr %"48", i64 8 + %"23" = load float, ptr %"54", align 4 + store float %"23", ptr addrspace(5) %"8", align 4 %0 = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %0, align 1 - %"26" = load i1, ptr addrspace(5) %0, align 1 - store i1 %"26", ptr addrspace(5) %"9", align 1 - %"29" = load float, ptr addrspace(5) %"6", align 4 - %"30" = load float, ptr addrspace(5) %"7", align 4 - %"31" = load i1, ptr addrspace(5) %"9", align 1 - %1 = fcmp ogt float %"29", %"30" + %"25" = load i1, ptr addrspace(5) %0, align 1 + store i1 %"25", ptr addrspace(5) %"9", align 1 + %"28" = load float, ptr addrspace(5) %"6", align 4 + %"29" = load float, ptr addrspace(5) %"7", align 4 + %"30" = load i1, ptr addrspace(5) %"9", align 1 + %1 = fcmp ogt float %"28", %"29" %2 = xor i1 %1, true - %"27" = and i1 %1, %"31" - %"28" = and i1 %2, %"31" - store i1 %"27", ptr addrspace(5) %"10", align 1 - store i1 %"28", ptr addrspace(5) %"11", align 1 - %"32" = load i1, ptr addrspace(5) %"10", align 1 - br i1 %"32", label %"12", label %"13" + %"26" = and i1 %1, %"30" + %"27" = and i1 %2, %"30" + store i1 %"26", ptr addrspace(5) %"10", align 1 + store i1 %"27", ptr addrspace(5) %"11", align 1 + %"31" = load i1, ptr addrspace(5) %"10", align 1 + br i1 %"31", label %"12", label %"13" -"12": ; preds = %"51" - %"34" = load float, ptr addrspace(5) %"6", align 4 +"12": ; preds = %"50" + %"33" = load float, ptr addrspace(5) %"6", align 4 %3 = alloca float, align 4, addrspace(5) - store float %"34", ptr addrspace(5) %3, align 4 - %"33" = load float, ptr addrspace(5) %3, align 4 - store float %"33", ptr addrspace(5) %"8", align 4 + store float %"33", ptr addrspace(5) %3, align 4 + %"32" = load float, ptr addrspace(5) %3, align 4 + store float %"32", ptr addrspace(5) %"8", align 4 br label %"13" -"13": ; preds = %"12", %"51" - %"35" = load i1, ptr addrspace(5) %"11", align 1 - br i1 %"35", label %"14", label %"15" +"13": ; preds = %"12", %"50" + %"34" = load i1, ptr addrspace(5) %"11", align 1 + br i1 %"34", label %"14", label %"15" "14": ; preds = %"13" - %"37" = load float, ptr addrspace(5) %"7", align 4 + %"36" = load float, ptr addrspace(5) %"7", align 4 %4 = alloca float, align 4, addrspace(5) - store float %"37", ptr addrspace(5) %4, align 4 - %"36" = load float, ptr addrspace(5) %4, align 4 - store float %"36", ptr addrspace(5) %"8", align 4 + store float %"36", ptr addrspace(5) %4, align 4 + %"35" = load float, ptr addrspace(5) %4, align 4 + store float %"35", ptr addrspace(5) %"8", align 4 br label %"15" "15": ; preds = %"14", %"13" - %"38" = load i64, ptr addrspace(5) %"5", align 8 - %"39" = load float, ptr addrspace(5) %"8", align 4 - %"50" = inttoptr i64 %"38" to ptr - store float %"39", ptr %"50", align 4 + %"37" = load i64, ptr addrspace(5) %"5", align 8 + %"38" = load float, ptr addrspace(5) %"8", align 4 + %"49" = inttoptr i64 %"37" to ptr + store float %"38", ptr %"49", align 4 ret void } diff --git a/ptx/src/test/spirv_run/setp_gt.ll b/ptx/src/test/spirv_run/setp_gt.ll index 0aa4831..3a8b965 100644 --- a/ptx/src/test/spirv_run/setp_gt.ll +++ b/ptx/src/test/spirv_run/setp_gt.ll @@ -1,63 +1,61 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @setp_gt(ptr addrspace(4) byref(i64) %"35", ptr addrspace(4) byref(i64) %"36") #0 { -"40": +define protected amdgpu_kernel void @setp_gt(ptr addrspace(4) byref(i64) %"34", ptr addrspace(4) byref(i64) %"35") #0 { +"39": %"14" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"14", align 1 - %"15" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"15", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca float, align 4, addrspace(5) %"7" = alloca float, align 4, addrspace(5) %"8" = alloca float, align 4, addrspace(5) %"9" = alloca i1, align 1, addrspace(5) + %"15" = load i64, ptr addrspace(4) %"34", align 8 + store i64 %"15", ptr addrspace(5) %"4", align 8 %"16" = load i64, ptr addrspace(4) %"35", align 8 - store i64 %"16", ptr addrspace(5) %"4", align 8 - %"17" = load i64, ptr addrspace(4) %"36", align 8 - store i64 %"17", ptr addrspace(5) %"5", align 8 - %"19" = load i64, ptr addrspace(5) %"4", align 8 - %"37" = inttoptr i64 %"19" to ptr - %"18" = load float, ptr %"37", align 4 - store float %"18", ptr addrspace(5) %"6", align 4 - %"21" = load i64, ptr addrspace(5) %"4", align 8 - %"38" = inttoptr i64 %"21" to ptr - %"42" = getelementptr inbounds i8, ptr %"38", i64 4 - %"20" = load float, ptr %"42", align 4 - store float %"20", ptr addrspace(5) %"7", align 4 - %"23" = load float, ptr addrspace(5) %"6", align 4 - %"24" = load float, ptr addrspace(5) %"7", align 4 - %"22" = fcmp ogt float %"23", %"24" - store i1 %"22", ptr addrspace(5) %"9", align 1 - %"25" = load i1, ptr addrspace(5) %"9", align 1 - br i1 %"25", label %"10", label %"11" + store i64 %"16", ptr addrspace(5) %"5", align 8 + %"18" = load i64, ptr addrspace(5) %"4", align 8 + %"36" = inttoptr i64 %"18" to ptr + %"17" = load float, ptr %"36", align 4 + store float %"17", ptr addrspace(5) %"6", align 4 + %"20" = load i64, ptr addrspace(5) %"4", align 8 + %"37" = inttoptr i64 %"20" to ptr + %"41" = getelementptr inbounds i8, ptr %"37", i64 4 + %"19" = load float, ptr %"41", align 4 + store float %"19", ptr addrspace(5) %"7", align 4 + %"22" = load float, ptr addrspace(5) %"6", align 4 + %"23" = load float, ptr addrspace(5) %"7", align 4 + %"21" = fcmp ogt float %"22", %"23" + store i1 %"21", ptr addrspace(5) %"9", align 1 + %"24" = load i1, ptr addrspace(5) %"9", align 1 + br i1 %"24", label %"10", label %"11" -"10": ; preds = %"40" - %"27" = load float, ptr addrspace(5) %"6", align 4 +"10": ; preds = %"39" + %"26" = load float, ptr addrspace(5) %"6", align 4 %0 = alloca float, align 4, addrspace(5) - store float %"27", ptr addrspace(5) %0, align 4 - %"26" = load float, ptr addrspace(5) %0, align 4 - store float %"26", ptr addrspace(5) %"8", align 4 + store float %"26", ptr addrspace(5) %0, align 4 + %"25" = load float, ptr addrspace(5) %0, align 4 + store float %"25", ptr addrspace(5) %"8", align 4 br label %"11" -"11": ; preds = %"10", %"40" - %"28" = load i1, ptr addrspace(5) %"9", align 1 - br i1 %"28", label %"13", label %"12" +"11": ; preds = %"10", %"39" + %"27" = load i1, ptr addrspace(5) %"9", align 1 + br i1 %"27", label %"13", label %"12" "12": ; preds = %"11" - %"30" = load float, ptr addrspace(5) %"7", align 4 + %"29" = load float, ptr addrspace(5) %"7", align 4 %1 = alloca float, align 4, addrspace(5) - store float %"30", ptr addrspace(5) %1, align 4 - %"29" = load float, ptr addrspace(5) %1, align 4 - store float %"29", ptr addrspace(5) %"8", align 4 + store float %"29", ptr addrspace(5) %1, align 4 + %"28" = load float, ptr addrspace(5) %1, align 4 + store float %"28", ptr addrspace(5) %"8", align 4 br label %"13" "13": ; preds = %"12", %"11" - %"31" = load i64, ptr addrspace(5) %"5", align 8 - %"32" = load float, ptr addrspace(5) %"8", align 4 - %"39" = inttoptr i64 %"31" to ptr - store float %"32", ptr %"39", align 4 + %"30" = load i64, ptr addrspace(5) %"5", align 8 + %"31" = load float, ptr addrspace(5) %"8", align 4 + %"38" = inttoptr i64 %"30" to ptr + store float %"31", ptr %"38", align 4 ret void } diff --git a/ptx/src/test/spirv_run/setp_leu.ll b/ptx/src/test/spirv_run/setp_leu.ll index 4105d59..9699fde 100644 --- a/ptx/src/test/spirv_run/setp_leu.ll +++ b/ptx/src/test/spirv_run/setp_leu.ll @@ -1,63 +1,61 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @setp_leu(ptr addrspace(4) byref(i64) %"35", ptr addrspace(4) byref(i64) %"36") #0 { -"40": +define protected amdgpu_kernel void @setp_leu(ptr addrspace(4) byref(i64) %"34", ptr addrspace(4) byref(i64) %"35") #0 { +"39": %"14" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"14", align 1 - %"15" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"15", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca float, align 4, addrspace(5) %"7" = alloca float, align 4, addrspace(5) %"8" = alloca float, align 4, addrspace(5) %"9" = alloca i1, align 1, addrspace(5) + %"15" = load i64, ptr addrspace(4) %"34", align 8 + store i64 %"15", ptr addrspace(5) %"4", align 8 %"16" = load i64, ptr addrspace(4) %"35", align 8 - store i64 %"16", ptr addrspace(5) %"4", align 8 - %"17" = load i64, ptr addrspace(4) %"36", align 8 - store i64 %"17", ptr addrspace(5) %"5", align 8 - %"19" = load i64, ptr addrspace(5) %"4", align 8 - %"37" = inttoptr i64 %"19" to ptr - %"18" = load float, ptr %"37", align 4 - store float %"18", ptr addrspace(5) %"6", align 4 - %"21" = load i64, ptr addrspace(5) %"4", align 8 - %"38" = inttoptr i64 %"21" to ptr - %"42" = getelementptr inbounds i8, ptr %"38", i64 4 - %"20" = load float, ptr %"42", align 4 - store float %"20", ptr addrspace(5) %"7", align 4 - %"23" = load float, ptr addrspace(5) %"6", align 4 - %"24" = load float, ptr addrspace(5) %"7", align 4 - %"22" = fcmp ule float %"23", %"24" - store i1 %"22", ptr addrspace(5) %"9", align 1 - %"25" = load i1, ptr addrspace(5) %"9", align 1 - br i1 %"25", label %"10", label %"11" + store i64 %"16", ptr addrspace(5) %"5", align 8 + %"18" = load i64, ptr addrspace(5) %"4", align 8 + %"36" = inttoptr i64 %"18" to ptr + %"17" = load float, ptr %"36", align 4 + store float %"17", ptr addrspace(5) %"6", align 4 + %"20" = load i64, ptr addrspace(5) %"4", align 8 + %"37" = inttoptr i64 %"20" to ptr + %"41" = getelementptr inbounds i8, ptr %"37", i64 4 + %"19" = load float, ptr %"41", align 4 + store float %"19", ptr addrspace(5) %"7", align 4 + %"22" = load float, ptr addrspace(5) %"6", align 4 + %"23" = load float, ptr addrspace(5) %"7", align 4 + %"21" = fcmp ule float %"22", %"23" + store i1 %"21", ptr addrspace(5) %"9", align 1 + %"24" = load i1, ptr addrspace(5) %"9", align 1 + br i1 %"24", label %"10", label %"11" -"10": ; preds = %"40" - %"27" = load float, ptr addrspace(5) %"6", align 4 +"10": ; preds = %"39" + %"26" = load float, ptr addrspace(5) %"6", align 4 %0 = alloca float, align 4, addrspace(5) - store float %"27", ptr addrspace(5) %0, align 4 - %"26" = load float, ptr addrspace(5) %0, align 4 - store float %"26", ptr addrspace(5) %"8", align 4 + store float %"26", ptr addrspace(5) %0, align 4 + %"25" = load float, ptr addrspace(5) %0, align 4 + store float %"25", ptr addrspace(5) %"8", align 4 br label %"11" -"11": ; preds = %"10", %"40" - %"28" = load i1, ptr addrspace(5) %"9", align 1 - br i1 %"28", label %"13", label %"12" +"11": ; preds = %"10", %"39" + %"27" = load i1, ptr addrspace(5) %"9", align 1 + br i1 %"27", label %"13", label %"12" "12": ; preds = %"11" - %"30" = load float, ptr addrspace(5) %"7", align 4 + %"29" = load float, ptr addrspace(5) %"7", align 4 %1 = alloca float, align 4, addrspace(5) - store float %"30", ptr addrspace(5) %1, align 4 - %"29" = load float, ptr addrspace(5) %1, align 4 - store float %"29", ptr addrspace(5) %"8", align 4 + store float %"29", ptr addrspace(5) %1, align 4 + %"28" = load float, ptr addrspace(5) %1, align 4 + store float %"28", ptr addrspace(5) %"8", align 4 br label %"13" "13": ; preds = %"12", %"11" - %"31" = load i64, ptr addrspace(5) %"5", align 8 - %"32" = load float, ptr addrspace(5) %"8", align 4 - %"39" = inttoptr i64 %"31" to ptr - store float %"32", ptr %"39", align 4 + %"30" = load i64, ptr addrspace(5) %"5", align 8 + %"31" = load float, ptr addrspace(5) %"8", align 4 + %"38" = inttoptr i64 %"30" to ptr + store float %"31", ptr %"38", align 4 ret void } diff --git a/ptx/src/test/spirv_run/setp_nan.ll b/ptx/src/test/spirv_run/setp_nan.ll index da9c62a..1368386 100644 --- a/ptx/src/test/spirv_run/setp_nan.ll +++ b/ptx/src/test/spirv_run/setp_nan.ll @@ -1,12 +1,10 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @setp_nan(ptr addrspace(4) byref(i64) %"116", ptr addrspace(4) byref(i64) %"117") #0 { -"130": +define protected amdgpu_kernel void @setp_nan(ptr addrspace(4) byref(i64) %"115", ptr addrspace(4) byref(i64) %"116") #0 { +"129": %"32" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"32", align 1 - %"33" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"33", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca float, align 4, addrspace(5) @@ -19,172 +17,172 @@ define protected amdgpu_kernel void @setp_nan(ptr addrspace(4) byref(i64) %"116" %"13" = alloca float, align 4, addrspace(5) %"14" = alloca i32, align 4, addrspace(5) %"15" = alloca i1, align 1, addrspace(5) + %"33" = load i64, ptr addrspace(4) %"115", align 8 + store i64 %"33", ptr addrspace(5) %"4", align 8 %"34" = load i64, ptr addrspace(4) %"116", align 8 - store i64 %"34", ptr addrspace(5) %"4", align 8 - %"35" = load i64, ptr addrspace(4) %"117", align 8 - store i64 %"35", ptr addrspace(5) %"5", align 8 - %"37" = load i64, ptr addrspace(5) %"4", align 8 - %"118" = inttoptr i64 %"37" to ptr - %"36" = load float, ptr %"118", align 4 - store float %"36", ptr addrspace(5) %"6", align 4 - %"39" = load i64, ptr addrspace(5) %"4", align 8 - %"119" = inttoptr i64 %"39" to ptr - %"132" = getelementptr inbounds i8, ptr %"119", i64 4 - %"38" = load float, ptr %"132", align 4 - store float %"38", ptr addrspace(5) %"7", align 4 - %"41" = load i64, ptr addrspace(5) %"4", align 8 - %"120" = inttoptr i64 %"41" to ptr - %"134" = getelementptr inbounds i8, ptr %"120", i64 8 - %"40" = load float, ptr %"134", align 4 - store float %"40", ptr addrspace(5) %"8", align 4 - %"43" = load i64, ptr addrspace(5) %"4", align 8 - %"121" = inttoptr i64 %"43" to ptr - %"136" = getelementptr inbounds i8, ptr %"121", i64 12 - %"42" = load float, ptr %"136", align 4 - store float %"42", ptr addrspace(5) %"9", align 4 - %"45" = load i64, ptr addrspace(5) %"4", align 8 - %"122" = inttoptr i64 %"45" to ptr - %"138" = getelementptr inbounds i8, ptr %"122", i64 16 - %"44" = load float, ptr %"138", align 4 - store float %"44", ptr addrspace(5) %"10", align 4 - %"47" = load i64, ptr addrspace(5) %"4", align 8 - %"123" = inttoptr i64 %"47" to ptr - %"140" = getelementptr inbounds i8, ptr %"123", i64 20 - %"46" = load float, ptr %"140", align 4 - store float %"46", ptr addrspace(5) %"11", align 4 - %"49" = load i64, ptr addrspace(5) %"4", align 8 - %"124" = inttoptr i64 %"49" to ptr - %"142" = getelementptr inbounds i8, ptr %"124", i64 24 - %"48" = load float, ptr %"142", align 4 - store float %"48", ptr addrspace(5) %"12", align 4 - %"51" = load i64, ptr addrspace(5) %"4", align 8 - %"125" = inttoptr i64 %"51" to ptr - %"144" = getelementptr inbounds i8, ptr %"125", i64 28 - %"50" = load float, ptr %"144", align 4 - store float %"50", ptr addrspace(5) %"13", align 4 - %"53" = load float, ptr addrspace(5) %"6", align 4 - %"54" = load float, ptr addrspace(5) %"7", align 4 - %"52" = fcmp uno float %"53", %"54" - store i1 %"52", ptr addrspace(5) %"15", align 1 - %"55" = load i1, ptr addrspace(5) %"15", align 1 - br i1 %"55", label %"16", label %"17" - -"16": ; preds = %"130" + store i64 %"34", ptr addrspace(5) %"5", align 8 + %"36" = load i64, ptr addrspace(5) %"4", align 8 + %"117" = inttoptr i64 %"36" to ptr + %"35" = load float, ptr %"117", align 4 + store float %"35", ptr addrspace(5) %"6", align 4 + %"38" = load i64, ptr addrspace(5) %"4", align 8 + %"118" = inttoptr i64 %"38" to ptr + %"131" = getelementptr inbounds i8, ptr %"118", i64 4 + %"37" = load float, ptr %"131", align 4 + store float %"37", ptr addrspace(5) %"7", align 4 + %"40" = load i64, ptr addrspace(5) %"4", align 8 + %"119" = inttoptr i64 %"40" to ptr + %"133" = getelementptr inbounds i8, ptr %"119", i64 8 + %"39" = load float, ptr %"133", align 4 + store float %"39", ptr addrspace(5) %"8", align 4 + %"42" = load i64, ptr addrspace(5) %"4", align 8 + %"120" = inttoptr i64 %"42" to ptr + %"135" = getelementptr inbounds i8, ptr %"120", i64 12 + %"41" = load float, ptr %"135", align 4 + store float %"41", ptr addrspace(5) %"9", align 4 + %"44" = load i64, ptr addrspace(5) %"4", align 8 + %"121" = inttoptr i64 %"44" to ptr + %"137" = getelementptr inbounds i8, ptr %"121", i64 16 + %"43" = load float, ptr %"137", align 4 + store float %"43", ptr addrspace(5) %"10", align 4 + %"46" = load i64, ptr addrspace(5) %"4", align 8 + %"122" = inttoptr i64 %"46" to ptr + %"139" = getelementptr inbounds i8, ptr %"122", i64 20 + %"45" = load float, ptr %"139", align 4 + store float %"45", ptr addrspace(5) %"11", align 4 + %"48" = load i64, ptr addrspace(5) %"4", align 8 + %"123" = inttoptr i64 %"48" to ptr + %"141" = getelementptr inbounds i8, ptr %"123", i64 24 + %"47" = load float, ptr %"141", align 4 + store float %"47", ptr addrspace(5) %"12", align 4 + %"50" = load i64, ptr addrspace(5) %"4", align 8 + %"124" = inttoptr i64 %"50" to ptr + %"143" = getelementptr inbounds i8, ptr %"124", i64 28 + %"49" = load float, ptr %"143", align 4 + store float %"49", ptr addrspace(5) %"13", align 4 + %"52" = load float, ptr addrspace(5) %"6", align 4 + %"53" = load float, ptr addrspace(5) %"7", align 4 + %"51" = fcmp uno float %"52", %"53" + store i1 %"51", ptr addrspace(5) %"15", align 1 + %"54" = load i1, ptr addrspace(5) %"15", align 1 + br i1 %"54", label %"16", label %"17" + +"16": ; preds = %"129" %0 = alloca i32, align 4, addrspace(5) store i32 1, ptr addrspace(5) %0, align 4 - %"56" = load i32, ptr addrspace(5) %0, align 4 - store i32 %"56", ptr addrspace(5) %"14", align 4 + %"55" = load i32, ptr addrspace(5) %0, align 4 + store i32 %"55", ptr addrspace(5) %"14", align 4 br label %"17" -"17": ; preds = %"16", %"130" - %"57" = load i1, ptr addrspace(5) %"15", align 1 - br i1 %"57", label %"19", label %"18" +"17": ; preds = %"16", %"129" + %"56" = load i1, ptr addrspace(5) %"15", align 1 + br i1 %"56", label %"19", label %"18" "18": ; preds = %"17" %1 = alloca i32, align 4, addrspace(5) store i32 0, ptr addrspace(5) %1, align 4 - %"58" = load i32, ptr addrspace(5) %1, align 4 - store i32 %"58", ptr addrspace(5) %"14", align 4 + %"57" = load i32, ptr addrspace(5) %1, align 4 + store i32 %"57", ptr addrspace(5) %"14", align 4 br label %"19" "19": ; preds = %"18", %"17" - %"59" = load i64, ptr addrspace(5) %"5", align 8 - %"60" = load i32, ptr addrspace(5) %"14", align 4 - %"126" = inttoptr i64 %"59" to ptr - store i32 %"60", ptr %"126", align 4 - %"62" = load float, ptr addrspace(5) %"8", align 4 - %"63" = load float, ptr addrspace(5) %"9", align 4 - %"61" = fcmp uno float %"62", %"63" - store i1 %"61", ptr addrspace(5) %"15", align 1 - %"64" = load i1, ptr addrspace(5) %"15", align 1 - br i1 %"64", label %"20", label %"21" + %"58" = load i64, ptr addrspace(5) %"5", align 8 + %"59" = load i32, ptr addrspace(5) %"14", align 4 + %"125" = inttoptr i64 %"58" to ptr + store i32 %"59", ptr %"125", align 4 + %"61" = load float, ptr addrspace(5) %"8", align 4 + %"62" = load float, ptr addrspace(5) %"9", align 4 + %"60" = fcmp uno float %"61", %"62" + store i1 %"60", ptr addrspace(5) %"15", align 1 + %"63" = load i1, ptr addrspace(5) %"15", align 1 + br i1 %"63", label %"20", label %"21" "20": ; preds = %"19" %2 = alloca i32, align 4, addrspace(5) store i32 1, ptr addrspace(5) %2, align 4 - %"65" = load i32, ptr addrspace(5) %2, align 4 - store i32 %"65", ptr addrspace(5) %"14", align 4 + %"64" = load i32, ptr addrspace(5) %2, align 4 + store i32 %"64", ptr addrspace(5) %"14", align 4 br label %"21" "21": ; preds = %"20", %"19" - %"66" = load i1, ptr addrspace(5) %"15", align 1 - br i1 %"66", label %"23", label %"22" + %"65" = load i1, ptr addrspace(5) %"15", align 1 + br i1 %"65", label %"23", label %"22" "22": ; preds = %"21" %3 = alloca i32, align 4, addrspace(5) store i32 0, ptr addrspace(5) %3, align 4 - %"67" = load i32, ptr addrspace(5) %3, align 4 - store i32 %"67", ptr addrspace(5) %"14", align 4 + %"66" = load i32, ptr addrspace(5) %3, align 4 + store i32 %"66", ptr addrspace(5) %"14", align 4 br label %"23" "23": ; preds = %"22", %"21" - %"68" = load i64, ptr addrspace(5) %"5", align 8 - %"69" = load i32, ptr addrspace(5) %"14", align 4 - %"127" = inttoptr i64 %"68" to ptr - %"146" = getelementptr inbounds i8, ptr %"127", i64 4 - store i32 %"69", ptr %"146", align 4 - %"71" = load float, ptr addrspace(5) %"10", align 4 - %"72" = load float, ptr addrspace(5) %"11", align 4 - %"70" = fcmp uno float %"71", %"72" - store i1 %"70", ptr addrspace(5) %"15", align 1 - %"73" = load i1, ptr addrspace(5) %"15", align 1 - br i1 %"73", label %"24", label %"25" + %"67" = load i64, ptr addrspace(5) %"5", align 8 + %"68" = load i32, ptr addrspace(5) %"14", align 4 + %"126" = inttoptr i64 %"67" to ptr + %"145" = getelementptr inbounds i8, ptr %"126", i64 4 + store i32 %"68", ptr %"145", align 4 + %"70" = load float, ptr addrspace(5) %"10", align 4 + %"71" = load float, ptr addrspace(5) %"11", align 4 + %"69" = fcmp uno float %"70", %"71" + store i1 %"69", ptr addrspace(5) %"15", align 1 + %"72" = load i1, ptr addrspace(5) %"15", align 1 + br i1 %"72", label %"24", label %"25" "24": ; preds = %"23" %4 = alloca i32, align 4, addrspace(5) store i32 1, ptr addrspace(5) %4, align 4 - %"74" = load i32, ptr addrspace(5) %4, align 4 - store i32 %"74", ptr addrspace(5) %"14", align 4 + %"73" = load i32, ptr addrspace(5) %4, align 4 + store i32 %"73", ptr addrspace(5) %"14", align 4 br label %"25" "25": ; preds = %"24", %"23" - %"75" = load i1, ptr addrspace(5) %"15", align 1 - br i1 %"75", label %"27", label %"26" + %"74" = load i1, ptr addrspace(5) %"15", align 1 + br i1 %"74", label %"27", label %"26" "26": ; preds = %"25" %5 = alloca i32, align 4, addrspace(5) store i32 0, ptr addrspace(5) %5, align 4 - %"76" = load i32, ptr addrspace(5) %5, align 4 - store i32 %"76", ptr addrspace(5) %"14", align 4 + %"75" = load i32, ptr addrspace(5) %5, align 4 + store i32 %"75", ptr addrspace(5) %"14", align 4 br label %"27" "27": ; preds = %"26", %"25" - %"77" = load i64, ptr addrspace(5) %"5", align 8 - %"78" = load i32, ptr addrspace(5) %"14", align 4 - %"128" = inttoptr i64 %"77" to ptr - %"148" = getelementptr inbounds i8, ptr %"128", i64 8 - store i32 %"78", ptr %"148", align 4 - %"80" = load float, ptr addrspace(5) %"12", align 4 - %"81" = load float, ptr addrspace(5) %"13", align 4 - %"79" = fcmp uno float %"80", %"81" - store i1 %"79", ptr addrspace(5) %"15", align 1 - %"82" = load i1, ptr addrspace(5) %"15", align 1 - br i1 %"82", label %"28", label %"29" + %"76" = load i64, ptr addrspace(5) %"5", align 8 + %"77" = load i32, ptr addrspace(5) %"14", align 4 + %"127" = inttoptr i64 %"76" to ptr + %"147" = getelementptr inbounds i8, ptr %"127", i64 8 + store i32 %"77", ptr %"147", align 4 + %"79" = load float, ptr addrspace(5) %"12", align 4 + %"80" = load float, ptr addrspace(5) %"13", align 4 + %"78" = fcmp uno float %"79", %"80" + store i1 %"78", ptr addrspace(5) %"15", align 1 + %"81" = load i1, ptr addrspace(5) %"15", align 1 + br i1 %"81", label %"28", label %"29" "28": ; preds = %"27" %6 = alloca i32, align 4, addrspace(5) store i32 1, ptr addrspace(5) %6, align 4 - %"83" = load i32, ptr addrspace(5) %6, align 4 - store i32 %"83", ptr addrspace(5) %"14", align 4 + %"82" = load i32, ptr addrspace(5) %6, align 4 + store i32 %"82", ptr addrspace(5) %"14", align 4 br label %"29" "29": ; preds = %"28", %"27" - %"84" = load i1, ptr addrspace(5) %"15", align 1 - br i1 %"84", label %"31", label %"30" + %"83" = load i1, ptr addrspace(5) %"15", align 1 + br i1 %"83", label %"31", label %"30" "30": ; preds = %"29" %7 = alloca i32, align 4, addrspace(5) store i32 0, ptr addrspace(5) %7, align 4 - %"85" = load i32, ptr addrspace(5) %7, align 4 - store i32 %"85", ptr addrspace(5) %"14", align 4 + %"84" = load i32, ptr addrspace(5) %7, align 4 + store i32 %"84", ptr addrspace(5) %"14", align 4 br label %"31" "31": ; preds = %"30", %"29" - %"86" = load i64, ptr addrspace(5) %"5", align 8 - %"87" = load i32, ptr addrspace(5) %"14", align 4 - %"129" = inttoptr i64 %"86" to ptr - %"150" = getelementptr inbounds i8, ptr %"129", i64 12 - store i32 %"87", ptr %"150", align 4 + %"85" = load i64, ptr addrspace(5) %"5", align 8 + %"86" = load i32, ptr addrspace(5) %"14", align 4 + %"128" = inttoptr i64 %"85" to ptr + %"149" = getelementptr inbounds i8, ptr %"128", i64 12 + store i32 %"86", ptr %"149", align 4 ret void } diff --git a/ptx/src/test/spirv_run/setp_num.ll b/ptx/src/test/spirv_run/setp_num.ll index 07cf161..a6254a2 100644 --- a/ptx/src/test/spirv_run/setp_num.ll +++ b/ptx/src/test/spirv_run/setp_num.ll @@ -1,12 +1,10 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @setp_num(ptr addrspace(4) byref(i64) %"116", ptr addrspace(4) byref(i64) %"117") #0 { -"130": +define protected amdgpu_kernel void @setp_num(ptr addrspace(4) byref(i64) %"115", ptr addrspace(4) byref(i64) %"116") #0 { +"129": %"32" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"32", align 1 - %"33" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"33", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca float, align 4, addrspace(5) @@ -19,172 +17,172 @@ define protected amdgpu_kernel void @setp_num(ptr addrspace(4) byref(i64) %"116" %"13" = alloca float, align 4, addrspace(5) %"14" = alloca i32, align 4, addrspace(5) %"15" = alloca i1, align 1, addrspace(5) + %"33" = load i64, ptr addrspace(4) %"115", align 8 + store i64 %"33", ptr addrspace(5) %"4", align 8 %"34" = load i64, ptr addrspace(4) %"116", align 8 - store i64 %"34", ptr addrspace(5) %"4", align 8 - %"35" = load i64, ptr addrspace(4) %"117", align 8 - store i64 %"35", ptr addrspace(5) %"5", align 8 - %"37" = load i64, ptr addrspace(5) %"4", align 8 - %"118" = inttoptr i64 %"37" to ptr - %"36" = load float, ptr %"118", align 4 - store float %"36", ptr addrspace(5) %"6", align 4 - %"39" = load i64, ptr addrspace(5) %"4", align 8 - %"119" = inttoptr i64 %"39" to ptr - %"132" = getelementptr inbounds i8, ptr %"119", i64 4 - %"38" = load float, ptr %"132", align 4 - store float %"38", ptr addrspace(5) %"7", align 4 - %"41" = load i64, ptr addrspace(5) %"4", align 8 - %"120" = inttoptr i64 %"41" to ptr - %"134" = getelementptr inbounds i8, ptr %"120", i64 8 - %"40" = load float, ptr %"134", align 4 - store float %"40", ptr addrspace(5) %"8", align 4 - %"43" = load i64, ptr addrspace(5) %"4", align 8 - %"121" = inttoptr i64 %"43" to ptr - %"136" = getelementptr inbounds i8, ptr %"121", i64 12 - %"42" = load float, ptr %"136", align 4 - store float %"42", ptr addrspace(5) %"9", align 4 - %"45" = load i64, ptr addrspace(5) %"4", align 8 - %"122" = inttoptr i64 %"45" to ptr - %"138" = getelementptr inbounds i8, ptr %"122", i64 16 - %"44" = load float, ptr %"138", align 4 - store float %"44", ptr addrspace(5) %"10", align 4 - %"47" = load i64, ptr addrspace(5) %"4", align 8 - %"123" = inttoptr i64 %"47" to ptr - %"140" = getelementptr inbounds i8, ptr %"123", i64 20 - %"46" = load float, ptr %"140", align 4 - store float %"46", ptr addrspace(5) %"11", align 4 - %"49" = load i64, ptr addrspace(5) %"4", align 8 - %"124" = inttoptr i64 %"49" to ptr - %"142" = getelementptr inbounds i8, ptr %"124", i64 24 - %"48" = load float, ptr %"142", align 4 - store float %"48", ptr addrspace(5) %"12", align 4 - %"51" = load i64, ptr addrspace(5) %"4", align 8 - %"125" = inttoptr i64 %"51" to ptr - %"144" = getelementptr inbounds i8, ptr %"125", i64 28 - %"50" = load float, ptr %"144", align 4 - store float %"50", ptr addrspace(5) %"13", align 4 - %"53" = load float, ptr addrspace(5) %"6", align 4 - %"54" = load float, ptr addrspace(5) %"7", align 4 - %"52" = fcmp ord float %"53", %"54" - store i1 %"52", ptr addrspace(5) %"15", align 1 - %"55" = load i1, ptr addrspace(5) %"15", align 1 - br i1 %"55", label %"16", label %"17" - -"16": ; preds = %"130" + store i64 %"34", ptr addrspace(5) %"5", align 8 + %"36" = load i64, ptr addrspace(5) %"4", align 8 + %"117" = inttoptr i64 %"36" to ptr + %"35" = load float, ptr %"117", align 4 + store float %"35", ptr addrspace(5) %"6", align 4 + %"38" = load i64, ptr addrspace(5) %"4", align 8 + %"118" = inttoptr i64 %"38" to ptr + %"131" = getelementptr inbounds i8, ptr %"118", i64 4 + %"37" = load float, ptr %"131", align 4 + store float %"37", ptr addrspace(5) %"7", align 4 + %"40" = load i64, ptr addrspace(5) %"4", align 8 + %"119" = inttoptr i64 %"40" to ptr + %"133" = getelementptr inbounds i8, ptr %"119", i64 8 + %"39" = load float, ptr %"133", align 4 + store float %"39", ptr addrspace(5) %"8", align 4 + %"42" = load i64, ptr addrspace(5) %"4", align 8 + %"120" = inttoptr i64 %"42" to ptr + %"135" = getelementptr inbounds i8, ptr %"120", i64 12 + %"41" = load float, ptr %"135", align 4 + store float %"41", ptr addrspace(5) %"9", align 4 + %"44" = load i64, ptr addrspace(5) %"4", align 8 + %"121" = inttoptr i64 %"44" to ptr + %"137" = getelementptr inbounds i8, ptr %"121", i64 16 + %"43" = load float, ptr %"137", align 4 + store float %"43", ptr addrspace(5) %"10", align 4 + %"46" = load i64, ptr addrspace(5) %"4", align 8 + %"122" = inttoptr i64 %"46" to ptr + %"139" = getelementptr inbounds i8, ptr %"122", i64 20 + %"45" = load float, ptr %"139", align 4 + store float %"45", ptr addrspace(5) %"11", align 4 + %"48" = load i64, ptr addrspace(5) %"4", align 8 + %"123" = inttoptr i64 %"48" to ptr + %"141" = getelementptr inbounds i8, ptr %"123", i64 24 + %"47" = load float, ptr %"141", align 4 + store float %"47", ptr addrspace(5) %"12", align 4 + %"50" = load i64, ptr addrspace(5) %"4", align 8 + %"124" = inttoptr i64 %"50" to ptr + %"143" = getelementptr inbounds i8, ptr %"124", i64 28 + %"49" = load float, ptr %"143", align 4 + store float %"49", ptr addrspace(5) %"13", align 4 + %"52" = load float, ptr addrspace(5) %"6", align 4 + %"53" = load float, ptr addrspace(5) %"7", align 4 + %"51" = fcmp ord float %"52", %"53" + store i1 %"51", ptr addrspace(5) %"15", align 1 + %"54" = load i1, ptr addrspace(5) %"15", align 1 + br i1 %"54", label %"16", label %"17" + +"16": ; preds = %"129" %0 = alloca i32, align 4, addrspace(5) store i32 2, ptr addrspace(5) %0, align 4 - %"56" = load i32, ptr addrspace(5) %0, align 4 - store i32 %"56", ptr addrspace(5) %"14", align 4 + %"55" = load i32, ptr addrspace(5) %0, align 4 + store i32 %"55", ptr addrspace(5) %"14", align 4 br label %"17" -"17": ; preds = %"16", %"130" - %"57" = load i1, ptr addrspace(5) %"15", align 1 - br i1 %"57", label %"19", label %"18" +"17": ; preds = %"16", %"129" + %"56" = load i1, ptr addrspace(5) %"15", align 1 + br i1 %"56", label %"19", label %"18" "18": ; preds = %"17" %1 = alloca i32, align 4, addrspace(5) store i32 0, ptr addrspace(5) %1, align 4 - %"58" = load i32, ptr addrspace(5) %1, align 4 - store i32 %"58", ptr addrspace(5) %"14", align 4 + %"57" = load i32, ptr addrspace(5) %1, align 4 + store i32 %"57", ptr addrspace(5) %"14", align 4 br label %"19" "19": ; preds = %"18", %"17" - %"59" = load i64, ptr addrspace(5) %"5", align 8 - %"60" = load i32, ptr addrspace(5) %"14", align 4 - %"126" = inttoptr i64 %"59" to ptr - store i32 %"60", ptr %"126", align 4 - %"62" = load float, ptr addrspace(5) %"8", align 4 - %"63" = load float, ptr addrspace(5) %"9", align 4 - %"61" = fcmp ord float %"62", %"63" - store i1 %"61", ptr addrspace(5) %"15", align 1 - %"64" = load i1, ptr addrspace(5) %"15", align 1 - br i1 %"64", label %"20", label %"21" + %"58" = load i64, ptr addrspace(5) %"5", align 8 + %"59" = load i32, ptr addrspace(5) %"14", align 4 + %"125" = inttoptr i64 %"58" to ptr + store i32 %"59", ptr %"125", align 4 + %"61" = load float, ptr addrspace(5) %"8", align 4 + %"62" = load float, ptr addrspace(5) %"9", align 4 + %"60" = fcmp ord float %"61", %"62" + store i1 %"60", ptr addrspace(5) %"15", align 1 + %"63" = load i1, ptr addrspace(5) %"15", align 1 + br i1 %"63", label %"20", label %"21" "20": ; preds = %"19" %2 = alloca i32, align 4, addrspace(5) store i32 2, ptr addrspace(5) %2, align 4 - %"65" = load i32, ptr addrspace(5) %2, align 4 - store i32 %"65", ptr addrspace(5) %"14", align 4 + %"64" = load i32, ptr addrspace(5) %2, align 4 + store i32 %"64", ptr addrspace(5) %"14", align 4 br label %"21" "21": ; preds = %"20", %"19" - %"66" = load i1, ptr addrspace(5) %"15", align 1 - br i1 %"66", label %"23", label %"22" + %"65" = load i1, ptr addrspace(5) %"15", align 1 + br i1 %"65", label %"23", label %"22" "22": ; preds = %"21" %3 = alloca i32, align 4, addrspace(5) store i32 0, ptr addrspace(5) %3, align 4 - %"67" = load i32, ptr addrspace(5) %3, align 4 - store i32 %"67", ptr addrspace(5) %"14", align 4 + %"66" = load i32, ptr addrspace(5) %3, align 4 + store i32 %"66", ptr addrspace(5) %"14", align 4 br label %"23" "23": ; preds = %"22", %"21" - %"68" = load i64, ptr addrspace(5) %"5", align 8 - %"69" = load i32, ptr addrspace(5) %"14", align 4 - %"127" = inttoptr i64 %"68" to ptr - %"146" = getelementptr inbounds i8, ptr %"127", i64 4 - store i32 %"69", ptr %"146", align 4 - %"71" = load float, ptr addrspace(5) %"10", align 4 - %"72" = load float, ptr addrspace(5) %"11", align 4 - %"70" = fcmp ord float %"71", %"72" - store i1 %"70", ptr addrspace(5) %"15", align 1 - %"73" = load i1, ptr addrspace(5) %"15", align 1 - br i1 %"73", label %"24", label %"25" + %"67" = load i64, ptr addrspace(5) %"5", align 8 + %"68" = load i32, ptr addrspace(5) %"14", align 4 + %"126" = inttoptr i64 %"67" to ptr + %"145" = getelementptr inbounds i8, ptr %"126", i64 4 + store i32 %"68", ptr %"145", align 4 + %"70" = load float, ptr addrspace(5) %"10", align 4 + %"71" = load float, ptr addrspace(5) %"11", align 4 + %"69" = fcmp ord float %"70", %"71" + store i1 %"69", ptr addrspace(5) %"15", align 1 + %"72" = load i1, ptr addrspace(5) %"15", align 1 + br i1 %"72", label %"24", label %"25" "24": ; preds = %"23" %4 = alloca i32, align 4, addrspace(5) store i32 2, ptr addrspace(5) %4, align 4 - %"74" = load i32, ptr addrspace(5) %4, align 4 - store i32 %"74", ptr addrspace(5) %"14", align 4 + %"73" = load i32, ptr addrspace(5) %4, align 4 + store i32 %"73", ptr addrspace(5) %"14", align 4 br label %"25" "25": ; preds = %"24", %"23" - %"75" = load i1, ptr addrspace(5) %"15", align 1 - br i1 %"75", label %"27", label %"26" + %"74" = load i1, ptr addrspace(5) %"15", align 1 + br i1 %"74", label %"27", label %"26" "26": ; preds = %"25" %5 = alloca i32, align 4, addrspace(5) store i32 0, ptr addrspace(5) %5, align 4 - %"76" = load i32, ptr addrspace(5) %5, align 4 - store i32 %"76", ptr addrspace(5) %"14", align 4 + %"75" = load i32, ptr addrspace(5) %5, align 4 + store i32 %"75", ptr addrspace(5) %"14", align 4 br label %"27" "27": ; preds = %"26", %"25" - %"77" = load i64, ptr addrspace(5) %"5", align 8 - %"78" = load i32, ptr addrspace(5) %"14", align 4 - %"128" = inttoptr i64 %"77" to ptr - %"148" = getelementptr inbounds i8, ptr %"128", i64 8 - store i32 %"78", ptr %"148", align 4 - %"80" = load float, ptr addrspace(5) %"12", align 4 - %"81" = load float, ptr addrspace(5) %"13", align 4 - %"79" = fcmp ord float %"80", %"81" - store i1 %"79", ptr addrspace(5) %"15", align 1 - %"82" = load i1, ptr addrspace(5) %"15", align 1 - br i1 %"82", label %"28", label %"29" + %"76" = load i64, ptr addrspace(5) %"5", align 8 + %"77" = load i32, ptr addrspace(5) %"14", align 4 + %"127" = inttoptr i64 %"76" to ptr + %"147" = getelementptr inbounds i8, ptr %"127", i64 8 + store i32 %"77", ptr %"147", align 4 + %"79" = load float, ptr addrspace(5) %"12", align 4 + %"80" = load float, ptr addrspace(5) %"13", align 4 + %"78" = fcmp ord float %"79", %"80" + store i1 %"78", ptr addrspace(5) %"15", align 1 + %"81" = load i1, ptr addrspace(5) %"15", align 1 + br i1 %"81", label %"28", label %"29" "28": ; preds = %"27" %6 = alloca i32, align 4, addrspace(5) store i32 2, ptr addrspace(5) %6, align 4 - %"83" = load i32, ptr addrspace(5) %6, align 4 - store i32 %"83", ptr addrspace(5) %"14", align 4 + %"82" = load i32, ptr addrspace(5) %6, align 4 + store i32 %"82", ptr addrspace(5) %"14", align 4 br label %"29" "29": ; preds = %"28", %"27" - %"84" = load i1, ptr addrspace(5) %"15", align 1 - br i1 %"84", label %"31", label %"30" + %"83" = load i1, ptr addrspace(5) %"15", align 1 + br i1 %"83", label %"31", label %"30" "30": ; preds = %"29" %7 = alloca i32, align 4, addrspace(5) store i32 0, ptr addrspace(5) %7, align 4 - %"85" = load i32, ptr addrspace(5) %7, align 4 - store i32 %"85", ptr addrspace(5) %"14", align 4 + %"84" = load i32, ptr addrspace(5) %7, align 4 + store i32 %"84", ptr addrspace(5) %"14", align 4 br label %"31" "31": ; preds = %"30", %"29" - %"86" = load i64, ptr addrspace(5) %"5", align 8 - %"87" = load i32, ptr addrspace(5) %"14", align 4 - %"129" = inttoptr i64 %"86" to ptr - %"150" = getelementptr inbounds i8, ptr %"129", i64 12 - store i32 %"87", ptr %"150", align 4 + %"85" = load i64, ptr addrspace(5) %"5", align 8 + %"86" = load i32, ptr addrspace(5) %"14", align 4 + %"128" = inttoptr i64 %"85" to ptr + %"149" = getelementptr inbounds i8, ptr %"128", i64 12 + store i32 %"86", ptr %"149", align 4 ret void } diff --git a/ptx/src/test/spirv_run/setp_pred2.ll b/ptx/src/test/spirv_run/setp_pred2.ll index 9ce8135..8220fc0 100644 --- a/ptx/src/test/spirv_run/setp_pred2.ll +++ b/ptx/src/test/spirv_run/setp_pred2.ll @@ -1,12 +1,10 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @setp_pred2(ptr addrspace(4) byref(i64) %"37", ptr addrspace(4) byref(i64) %"38") #0 { -"42": +define protected amdgpu_kernel void @setp_pred2(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #0 { +"41": %"15" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"15", align 1 - %"16" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"16", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca float, align 4, addrspace(5) @@ -14,53 +12,53 @@ define protected amdgpu_kernel void @setp_pred2(ptr addrspace(4) byref(i64) %"37 %"8" = alloca float, align 4, addrspace(5) %"9" = alloca i1, align 1, addrspace(5) %"10" = alloca i1, align 1, addrspace(5) + %"16" = load i64, ptr addrspace(4) %"36", align 8 + store i64 %"16", ptr addrspace(5) %"4", align 8 %"17" = load i64, ptr addrspace(4) %"37", align 8 - store i64 %"17", ptr addrspace(5) %"4", align 8 - %"18" = load i64, ptr addrspace(4) %"38", align 8 - store i64 %"18", ptr addrspace(5) %"5", align 8 - %"20" = load i64, ptr addrspace(5) %"4", align 8 - %"39" = inttoptr i64 %"20" to ptr - %"19" = load float, ptr %"39", align 4 - store float %"19", ptr addrspace(5) %"6", align 4 - %"22" = load i64, ptr addrspace(5) %"4", align 8 - %"40" = inttoptr i64 %"22" to ptr - %"44" = getelementptr inbounds i8, ptr %"40", i64 4 - %"21" = load float, ptr %"44", align 4 - store float %"21", ptr addrspace(5) %"7", align 4 - %"25" = load float, ptr addrspace(5) %"6", align 4 - %"26" = load float, ptr addrspace(5) %"7", align 4 - %"23" = fcmp ogt float %"25", %"26" - %"24" = xor i1 %"23", true - store i1 %"23", ptr addrspace(5) %"9", align 1 - store i1 %"24", ptr addrspace(5) %"10", align 1 - %"27" = load i1, ptr addrspace(5) %"9", align 1 - br i1 %"27", label %"11", label %"12" + store i64 %"17", ptr addrspace(5) %"5", align 8 + %"19" = load i64, ptr addrspace(5) %"4", align 8 + %"38" = inttoptr i64 %"19" to ptr + %"18" = load float, ptr %"38", align 4 + store float %"18", ptr addrspace(5) %"6", align 4 + %"21" = load i64, ptr addrspace(5) %"4", align 8 + %"39" = inttoptr i64 %"21" to ptr + %"43" = getelementptr inbounds i8, ptr %"39", i64 4 + %"20" = load float, ptr %"43", align 4 + store float %"20", ptr addrspace(5) %"7", align 4 + %"24" = load float, ptr addrspace(5) %"6", align 4 + %"25" = load float, ptr addrspace(5) %"7", align 4 + %"22" = fcmp ogt float %"24", %"25" + %"23" = xor i1 %"22", true + store i1 %"22", ptr addrspace(5) %"9", align 1 + store i1 %"23", ptr addrspace(5) %"10", align 1 + %"26" = load i1, ptr addrspace(5) %"9", align 1 + br i1 %"26", label %"11", label %"12" -"11": ; preds = %"42" - %"29" = load float, ptr addrspace(5) %"6", align 4 +"11": ; preds = %"41" + %"28" = load float, ptr addrspace(5) %"6", align 4 %0 = alloca float, align 4, addrspace(5) - store float %"29", ptr addrspace(5) %0, align 4 - %"28" = load float, ptr addrspace(5) %0, align 4 - store float %"28", ptr addrspace(5) %"8", align 4 + store float %"28", ptr addrspace(5) %0, align 4 + %"27" = load float, ptr addrspace(5) %0, align 4 + store float %"27", ptr addrspace(5) %"8", align 4 br label %"12" -"12": ; preds = %"11", %"42" - %"30" = load i1, ptr addrspace(5) %"10", align 1 - br i1 %"30", label %"13", label %"14" +"12": ; preds = %"11", %"41" + %"29" = load i1, ptr addrspace(5) %"10", align 1 + br i1 %"29", label %"13", label %"14" "13": ; preds = %"12" - %"32" = load float, ptr addrspace(5) %"7", align 4 + %"31" = load float, ptr addrspace(5) %"7", align 4 %1 = alloca float, align 4, addrspace(5) - store float %"32", ptr addrspace(5) %1, align 4 - %"31" = load float, ptr addrspace(5) %1, align 4 - store float %"31", ptr addrspace(5) %"8", align 4 + store float %"31", ptr addrspace(5) %1, align 4 + %"30" = load float, ptr addrspace(5) %1, align 4 + store float %"30", ptr addrspace(5) %"8", align 4 br label %"14" "14": ; preds = %"13", %"12" - %"33" = load i64, ptr addrspace(5) %"5", align 8 - %"34" = load float, ptr addrspace(5) %"8", align 4 - %"41" = inttoptr i64 %"33" to ptr - store float %"34", ptr %"41", align 4 + %"32" = load i64, ptr addrspace(5) %"5", align 8 + %"33" = load float, ptr addrspace(5) %"8", align 4 + %"40" = inttoptr i64 %"32" to ptr + store float %"33", ptr %"40", align 4 ret void } diff --git a/ptx/src/test/spirv_run/shared_ptr_32.ll b/ptx/src/test/spirv_run/shared_ptr_32.ll index a132a58..8705967 100644 --- a/ptx/src/test/spirv_run/shared_ptr_32.ll +++ b/ptx/src/test/spirv_run/shared_ptr_32.ll @@ -3,42 +3,40 @@ target triple = "amdgcn-amd-amdhsa" @"4" = private addrspace(3) global [128 x i8] undef, align 4 -define protected amdgpu_kernel void @shared_ptr_32(ptr addrspace(4) byref(i64) %"25", ptr addrspace(4) byref(i64) %"26") #0 { -"32": +define protected amdgpu_kernel void @shared_ptr_32(ptr addrspace(4) byref(i64) %"24", ptr addrspace(4) byref(i64) %"25") #0 { +"31": %"10" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"10", align 1 - %"11" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"11", align 1 %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) %"8" = alloca i64, align 8, addrspace(5) %"9" = alloca i64, align 8, addrspace(5) + %"11" = load i64, ptr addrspace(4) %"24", align 8 + store i64 %"11", ptr addrspace(5) %"5", align 8 %"12" = load i64, ptr addrspace(4) %"25", align 8 - store i64 %"12", ptr addrspace(5) %"5", align 8 - %"13" = load i64, ptr addrspace(4) %"26", align 8 - store i64 %"13", ptr addrspace(5) %"6", align 8 + store i64 %"12", ptr addrspace(5) %"6", align 8 %0 = alloca i32, align 4, addrspace(5) store i32 ptrtoint (ptr addrspace(3) @"4" to i32), ptr addrspace(5) %0, align 4 - %"14" = load i32, ptr addrspace(5) %0, align 4 - store i32 %"14", ptr addrspace(5) %"7", align 4 - %"16" = load i64, ptr addrspace(5) %"5", align 8 - %"28" = inttoptr i64 %"16" to ptr addrspace(1) - %"15" = load i64, ptr addrspace(1) %"28", align 8 - store i64 %"15", ptr addrspace(5) %"8", align 8 - %"17" = load i32, ptr addrspace(5) %"7", align 4 - %"18" = load i64, ptr addrspace(5) %"8", align 8 - %"29" = inttoptr i32 %"17" to ptr addrspace(3) - store i64 %"18", ptr addrspace(3) %"29", align 8 - %"20" = load i32, ptr addrspace(5) %"7", align 4 - %"30" = inttoptr i32 %"20" to ptr addrspace(3) - %"34" = getelementptr inbounds i8, ptr addrspace(3) %"30", i64 0 - %"19" = load i64, ptr addrspace(3) %"34", align 8 - store i64 %"19", ptr addrspace(5) %"9", align 8 - %"21" = load i64, ptr addrspace(5) %"6", align 8 - %"22" = load i64, ptr addrspace(5) %"9", align 8 - %"31" = inttoptr i64 %"21" to ptr addrspace(1) - store i64 %"22", ptr addrspace(1) %"31", align 8 + %"13" = load i32, ptr addrspace(5) %0, align 4 + store i32 %"13", ptr addrspace(5) %"7", align 4 + %"15" = load i64, ptr addrspace(5) %"5", align 8 + %"27" = inttoptr i64 %"15" to ptr addrspace(1) + %"14" = load i64, ptr addrspace(1) %"27", align 8 + store i64 %"14", ptr addrspace(5) %"8", align 8 + %"16" = load i32, ptr addrspace(5) %"7", align 4 + %"17" = load i64, ptr addrspace(5) %"8", align 8 + %"28" = inttoptr i32 %"16" to ptr addrspace(3) + store i64 %"17", ptr addrspace(3) %"28", align 8 + %"19" = load i32, ptr addrspace(5) %"7", align 4 + %"29" = inttoptr i32 %"19" to ptr addrspace(3) + %"33" = getelementptr inbounds i8, ptr addrspace(3) %"29", i64 0 + %"18" = load i64, ptr addrspace(3) %"33", align 8 + store i64 %"18", ptr addrspace(5) %"9", align 8 + %"20" = load i64, ptr addrspace(5) %"6", align 8 + %"21" = load i64, ptr addrspace(5) %"9", align 8 + %"30" = inttoptr i64 %"20" to ptr addrspace(1) + store i64 %"21", ptr addrspace(1) %"30", align 8 ret void } diff --git a/ptx/src/test/spirv_run/shared_ptr_take_address.ll b/ptx/src/test/spirv_run/shared_ptr_take_address.ll index a3d3e5d..6c430a2 100644 --- a/ptx/src/test/spirv_run/shared_ptr_take_address.ll +++ b/ptx/src/test/spirv_run/shared_ptr_take_address.ll @@ -3,41 +3,39 @@ target triple = "amdgcn-amd-amdhsa" @shared_mem = external hidden addrspace(3) global [0 x i8], align 4 -define protected amdgpu_kernel void @shared_ptr_take_address(ptr addrspace(4) byref(i64) %"23", ptr addrspace(4) byref(i64) %"24") #0 { -"30": +define protected amdgpu_kernel void @shared_ptr_take_address(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #0 { +"29": %"10" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"10", align 1 - %"11" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"11", align 1 %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) %"8" = alloca i64, align 8, addrspace(5) %"9" = alloca i64, align 8, addrspace(5) + %"11" = load i64, ptr addrspace(4) %"22", align 8 + store i64 %"11", ptr addrspace(5) %"5", align 8 %"12" = load i64, ptr addrspace(4) %"23", align 8 - store i64 %"12", ptr addrspace(5) %"5", align 8 - %"13" = load i64, ptr addrspace(4) %"24", align 8 - store i64 %"13", ptr addrspace(5) %"6", align 8 + store i64 %"12", ptr addrspace(5) %"6", align 8 %0 = alloca i64, align 8, addrspace(5) store i64 ptrtoint (ptr addrspace(3) @shared_mem to i64), ptr addrspace(5) %0, align 8 - %"14" = load i64, ptr addrspace(5) %0, align 8 - store i64 %"14", ptr addrspace(5) %"7", align 8 - %"16" = load i64, ptr addrspace(5) %"5", align 8 - %"26" = inttoptr i64 %"16" to ptr addrspace(1) - %"15" = load i64, ptr addrspace(1) %"26", align 8 - store i64 %"15", ptr addrspace(5) %"8", align 8 - %"17" = load i64, ptr addrspace(5) %"7", align 8 - %"18" = load i64, ptr addrspace(5) %"8", align 8 - %"27" = inttoptr i64 %"17" to ptr addrspace(3) - store i64 %"18", ptr addrspace(3) %"27", align 8 - %"20" = load i64, ptr addrspace(5) %"7", align 8 - %"28" = inttoptr i64 %"20" to ptr addrspace(3) - %"19" = load i64, ptr addrspace(3) %"28", align 8 - store i64 %"19", ptr addrspace(5) %"9", align 8 - %"21" = load i64, ptr addrspace(5) %"6", align 8 - %"22" = load i64, ptr addrspace(5) %"9", align 8 - %"29" = inttoptr i64 %"21" to ptr addrspace(1) - store i64 %"22", ptr addrspace(1) %"29", align 8 + %"13" = load i64, ptr addrspace(5) %0, align 8 + store i64 %"13", ptr addrspace(5) %"7", align 8 + %"15" = load i64, ptr addrspace(5) %"5", align 8 + %"25" = inttoptr i64 %"15" to ptr addrspace(1) + %"14" = load i64, ptr addrspace(1) %"25", align 8 + store i64 %"14", ptr addrspace(5) %"8", align 8 + %"16" = load i64, ptr addrspace(5) %"7", align 8 + %"17" = load i64, ptr addrspace(5) %"8", align 8 + %"26" = inttoptr i64 %"16" to ptr addrspace(3) + store i64 %"17", ptr addrspace(3) %"26", align 8 + %"19" = load i64, ptr addrspace(5) %"7", align 8 + %"27" = inttoptr i64 %"19" to ptr addrspace(3) + %"18" = load i64, ptr addrspace(3) %"27", align 8 + store i64 %"18", ptr addrspace(5) %"9", align 8 + %"20" = load i64, ptr addrspace(5) %"6", align 8 + %"21" = load i64, ptr addrspace(5) %"9", align 8 + %"28" = inttoptr i64 %"20" to ptr addrspace(1) + store i64 %"21", ptr addrspace(1) %"28", align 8 ret void } diff --git a/ptx/src/test/spirv_run/shared_unify_decl.ll b/ptx/src/test/spirv_run/shared_unify_decl.ll index 1079e59..4cc24fb 100644 --- a/ptx/src/test/spirv_run/shared_unify_decl.ll +++ b/ptx/src/test/spirv_run/shared_unify_decl.ll @@ -4,76 +4,70 @@ target triple = "amdgcn-amd-amdhsa" @shared_ex = external hidden addrspace(3) global [0 x i32] @shared_mod = private addrspace(3) global [4 x i32] undef -define private i64 @"3"(ptr addrspace(3) %"69", ptr addrspace(3) %"70") #0 { -"62": +define private i64 @"3"(ptr addrspace(3) %"66", ptr addrspace(3) %"67") #0 { +"59": %"8" = alloca i64, align 8, addrspace(5) %"20" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"20", align 1 - %"21" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"21", align 1 %"9" = alloca i64, align 8, addrspace(5) %"10" = alloca i64, align 8, addrspace(5) - %"26" = load i64, ptr addrspace(3) %"70", align 8 - store i64 %"26", ptr addrspace(5) %"9", align 8 - %"27" = load i64, ptr addrspace(3) %"69", align 8 - store i64 %"27", ptr addrspace(5) %"10", align 8 - %"29" = load i64, ptr addrspace(5) %"10", align 8 - %"30" = load i64, ptr addrspace(5) %"9", align 8 - %"53" = add i64 %"29", %"30" - store i64 %"53", ptr addrspace(5) %"8", align 8 - %"31" = load i64, ptr addrspace(5) %"8", align 8 - ret i64 %"31" + %"23" = load i64, ptr addrspace(3) %"67", align 8 + store i64 %"23", ptr addrspace(5) %"9", align 8 + %"24" = load i64, ptr addrspace(3) %"66", align 8 + store i64 %"24", ptr addrspace(5) %"10", align 8 + %"26" = load i64, ptr addrspace(5) %"10", align 8 + %"27" = load i64, ptr addrspace(5) %"9", align 8 + %"50" = add i64 %"26", %"27" + store i64 %"50", ptr addrspace(5) %"8", align 8 + %"28" = load i64, ptr addrspace(5) %"8", align 8 + ret i64 %"28" } -define private i64 @"5"(i64 %"32", ptr addrspace(3) %"71", ptr addrspace(3) %"72") #0 { -"63": +define private i64 @"5"(i64 %"29", ptr addrspace(3) %"68", ptr addrspace(3) %"69") #0 { +"60": %"12" = alloca i64, align 8, addrspace(5) %"11" = alloca i64, align 8, addrspace(5) - %"22" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"22", align 1 - %"23" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"23", align 1 - store i64 %"32", ptr addrspace(5) %"12", align 8 - %"33" = load i64, ptr addrspace(5) %"12", align 8 - store i64 %"33", ptr addrspace(3) %"71", align 8 - %"34" = call i64 @"3"(ptr addrspace(3) %"71", ptr addrspace(3) %"72") - store i64 %"34", ptr addrspace(5) %"11", align 8 - %"35" = load i64, ptr addrspace(5) %"11", align 8 - ret i64 %"35" + %"21" = alloca i1, align 1, addrspace(5) + store i1 false, ptr addrspace(5) %"21", align 1 + store i64 %"29", ptr addrspace(5) %"12", align 8 + %"30" = load i64, ptr addrspace(5) %"12", align 8 + store i64 %"30", ptr addrspace(3) %"68", align 8 + %"31" = call i64 @"3"(ptr addrspace(3) %"68", ptr addrspace(3) %"69") + store i64 %"31", ptr addrspace(5) %"11", align 8 + %"32" = load i64, ptr addrspace(5) %"11", align 8 + ret i64 %"32" } -define protected amdgpu_kernel void @shared_unify_decl(ptr addrspace(4) byref(i64) %"49", ptr addrspace(4) byref(i64) %"50") #0 { -"64": - %"24" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"24", align 1 - %"25" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"25", align 1 +define protected amdgpu_kernel void @shared_unify_decl(ptr addrspace(4) byref(i64) %"46", ptr addrspace(4) byref(i64) %"47") #0 { +"61": + %"22" = alloca i1, align 1, addrspace(5) + store i1 false, ptr addrspace(5) %"22", align 1 %"16" = alloca i64, align 8, addrspace(5) %"17" = alloca i64, align 8, addrspace(5) %"18" = alloca i64, align 8, addrspace(5) %"19" = alloca i64, align 8, addrspace(5) - %"36" = load i64, ptr addrspace(4) %"49", align 8 - store i64 %"36", ptr addrspace(5) %"16", align 8 - %"37" = load i64, ptr addrspace(4) %"50", align 8 - store i64 %"37", ptr addrspace(5) %"17", align 8 - %"39" = load i64, ptr addrspace(5) %"16", align 8 - %"56" = inttoptr i64 %"39" to ptr addrspace(1) - %"38" = load i64, ptr addrspace(1) %"56", align 8 - store i64 %"38", ptr addrspace(5) %"18", align 8 - %"41" = load i64, ptr addrspace(5) %"16", align 8 - %"57" = inttoptr i64 %"41" to ptr addrspace(1) - %"74" = getelementptr inbounds i8, ptr addrspace(1) %"57", i64 8 - %"40" = load i64, ptr addrspace(1) %"74", align 8 - store i64 %"40", ptr addrspace(5) %"19", align 8 - %"42" = load i64, ptr addrspace(5) %"19", align 8 - store i64 %"42", ptr addrspace(3) @shared_mod, align 8 - %"44" = load i64, ptr addrspace(5) %"18", align 8 - %"59" = call i64 @"5"(i64 %"44", ptr addrspace(3) @shared_ex, ptr addrspace(3) @shared_mod) - store i64 %"59", ptr addrspace(5) %"19", align 8 - %"45" = load i64, ptr addrspace(5) %"17", align 8 - %"46" = load i64, ptr addrspace(5) %"19", align 8 - %"61" = inttoptr i64 %"45" to ptr - store i64 %"46", ptr %"61", align 8 + %"33" = load i64, ptr addrspace(4) %"46", align 8 + store i64 %"33", ptr addrspace(5) %"16", align 8 + %"34" = load i64, ptr addrspace(4) %"47", align 8 + store i64 %"34", ptr addrspace(5) %"17", align 8 + %"36" = load i64, ptr addrspace(5) %"16", align 8 + %"53" = inttoptr i64 %"36" to ptr addrspace(1) + %"35" = load i64, ptr addrspace(1) %"53", align 8 + store i64 %"35", ptr addrspace(5) %"18", align 8 + %"38" = load i64, ptr addrspace(5) %"16", align 8 + %"54" = inttoptr i64 %"38" to ptr addrspace(1) + %"71" = getelementptr inbounds i8, ptr addrspace(1) %"54", i64 8 + %"37" = load i64, ptr addrspace(1) %"71", align 8 + store i64 %"37", ptr addrspace(5) %"19", align 8 + %"39" = load i64, ptr addrspace(5) %"19", align 8 + store i64 %"39", ptr addrspace(3) @shared_mod, align 8 + %"41" = load i64, ptr addrspace(5) %"18", align 8 + %"56" = call i64 @"5"(i64 %"41", ptr addrspace(3) @shared_ex, ptr addrspace(3) @shared_mod) + store i64 %"56", ptr addrspace(5) %"19", align 8 + %"42" = load i64, ptr addrspace(5) %"17", align 8 + %"43" = load i64, ptr addrspace(5) %"19", align 8 + %"58" = inttoptr i64 %"42" to ptr + store i64 %"43", ptr %"58", align 8 ret void } diff --git a/ptx/src/test/spirv_run/shared_unify_extern.ll b/ptx/src/test/spirv_run/shared_unify_extern.ll index d83ea7a..819e8a1 100644 --- a/ptx/src/test/spirv_run/shared_unify_extern.ll +++ b/ptx/src/test/spirv_run/shared_unify_extern.ll @@ -4,76 +4,70 @@ target triple = "amdgcn-amd-amdhsa" @shared_ex = external hidden addrspace(3) global [0 x i32] @shared_mod = private addrspace(3) global [4 x i32] undef -define private i64 @"3"(ptr addrspace(3) %"62", ptr addrspace(3) %"63") #0 { -"59": +define private i64 @"3"(ptr addrspace(3) %"59", ptr addrspace(3) %"60") #0 { +"56": %"4" = alloca i64, align 8, addrspace(5) %"17" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"17", align 1 - %"18" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"18", align 1 %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) - %"23" = load i64, ptr addrspace(3) %"63", align 8 - store i64 %"23", ptr addrspace(5) %"5", align 8 - %"24" = load i64, ptr addrspace(3) %"62", align 8 - store i64 %"24", ptr addrspace(5) %"6", align 8 - %"26" = load i64, ptr addrspace(5) %"6", align 8 - %"27" = load i64, ptr addrspace(5) %"5", align 8 - %"50" = add i64 %"26", %"27" - store i64 %"50", ptr addrspace(5) %"4", align 8 - %"28" = load i64, ptr addrspace(5) %"4", align 8 - ret i64 %"28" + %"20" = load i64, ptr addrspace(3) %"60", align 8 + store i64 %"20", ptr addrspace(5) %"5", align 8 + %"21" = load i64, ptr addrspace(3) %"59", align 8 + store i64 %"21", ptr addrspace(5) %"6", align 8 + %"23" = load i64, ptr addrspace(5) %"6", align 8 + %"24" = load i64, ptr addrspace(5) %"5", align 8 + %"47" = add i64 %"23", %"24" + store i64 %"47", ptr addrspace(5) %"4", align 8 + %"25" = load i64, ptr addrspace(5) %"4", align 8 + ret i64 %"25" } -define private i64 @"7"(i64 %"29", ptr addrspace(3) %"64", ptr addrspace(3) %"65") #0 { -"60": +define private i64 @"7"(i64 %"26", ptr addrspace(3) %"61", ptr addrspace(3) %"62") #0 { +"57": %"9" = alloca i64, align 8, addrspace(5) %"8" = alloca i64, align 8, addrspace(5) - %"19" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"19", align 1 - %"20" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"20", align 1 - store i64 %"29", ptr addrspace(5) %"9", align 8 - %"30" = load i64, ptr addrspace(5) %"9", align 8 - store i64 %"30", ptr addrspace(3) %"64", align 8 - %"31" = call i64 @"3"(ptr addrspace(3) %"64", ptr addrspace(3) %"65") - store i64 %"31", ptr addrspace(5) %"8", align 8 - %"32" = load i64, ptr addrspace(5) %"8", align 8 - ret i64 %"32" + %"18" = alloca i1, align 1, addrspace(5) + store i1 false, ptr addrspace(5) %"18", align 1 + store i64 %"26", ptr addrspace(5) %"9", align 8 + %"27" = load i64, ptr addrspace(5) %"9", align 8 + store i64 %"27", ptr addrspace(3) %"61", align 8 + %"28" = call i64 @"3"(ptr addrspace(3) %"61", ptr addrspace(3) %"62") + store i64 %"28", ptr addrspace(5) %"8", align 8 + %"29" = load i64, ptr addrspace(5) %"8", align 8 + ret i64 %"29" } -define protected amdgpu_kernel void @shared_unify_extern(ptr addrspace(4) byref(i64) %"46", ptr addrspace(4) byref(i64) %"47") #0 { -"61": - %"21" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"21", align 1 - %"22" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"22", align 1 +define protected amdgpu_kernel void @shared_unify_extern(ptr addrspace(4) byref(i64) %"43", ptr addrspace(4) byref(i64) %"44") #0 { +"58": + %"19" = alloca i1, align 1, addrspace(5) + store i1 false, ptr addrspace(5) %"19", align 1 %"13" = alloca i64, align 8, addrspace(5) %"14" = alloca i64, align 8, addrspace(5) %"15" = alloca i64, align 8, addrspace(5) %"16" = alloca i64, align 8, addrspace(5) - %"33" = load i64, ptr addrspace(4) %"46", align 8 - store i64 %"33", ptr addrspace(5) %"13", align 8 - %"34" = load i64, ptr addrspace(4) %"47", align 8 - store i64 %"34", ptr addrspace(5) %"14", align 8 - %"36" = load i64, ptr addrspace(5) %"13", align 8 - %"53" = inttoptr i64 %"36" to ptr addrspace(1) - %"35" = load i64, ptr addrspace(1) %"53", align 8 - store i64 %"35", ptr addrspace(5) %"15", align 8 - %"38" = load i64, ptr addrspace(5) %"13", align 8 - %"54" = inttoptr i64 %"38" to ptr addrspace(1) - %"67" = getelementptr inbounds i8, ptr addrspace(1) %"54", i64 8 - %"37" = load i64, ptr addrspace(1) %"67", align 8 - store i64 %"37", ptr addrspace(5) %"16", align 8 - %"39" = load i64, ptr addrspace(5) %"16", align 8 - store i64 %"39", ptr addrspace(3) @shared_mod, align 8 - %"41" = load i64, ptr addrspace(5) %"15", align 8 - %"56" = call i64 @"7"(i64 %"41", ptr addrspace(3) @shared_ex, ptr addrspace(3) @shared_mod) - store i64 %"56", ptr addrspace(5) %"16", align 8 - %"42" = load i64, ptr addrspace(5) %"14", align 8 - %"43" = load i64, ptr addrspace(5) %"16", align 8 - %"58" = inttoptr i64 %"42" to ptr - store i64 %"43", ptr %"58", align 8 + %"30" = load i64, ptr addrspace(4) %"43", align 8 + store i64 %"30", ptr addrspace(5) %"13", align 8 + %"31" = load i64, ptr addrspace(4) %"44", align 8 + store i64 %"31", ptr addrspace(5) %"14", align 8 + %"33" = load i64, ptr addrspace(5) %"13", align 8 + %"50" = inttoptr i64 %"33" to ptr addrspace(1) + %"32" = load i64, ptr addrspace(1) %"50", align 8 + store i64 %"32", ptr addrspace(5) %"15", align 8 + %"35" = load i64, ptr addrspace(5) %"13", align 8 + %"51" = inttoptr i64 %"35" to ptr addrspace(1) + %"64" = getelementptr inbounds i8, ptr addrspace(1) %"51", i64 8 + %"34" = load i64, ptr addrspace(1) %"64", align 8 + store i64 %"34", ptr addrspace(5) %"16", align 8 + %"36" = load i64, ptr addrspace(5) %"16", align 8 + store i64 %"36", ptr addrspace(3) @shared_mod, align 8 + %"38" = load i64, ptr addrspace(5) %"15", align 8 + %"53" = call i64 @"7"(i64 %"38", ptr addrspace(3) @shared_ex, ptr addrspace(3) @shared_mod) + store i64 %"53", ptr addrspace(5) %"16", align 8 + %"39" = load i64, ptr addrspace(5) %"14", align 8 + %"40" = load i64, ptr addrspace(5) %"16", align 8 + %"55" = inttoptr i64 %"39" to ptr + store i64 %"40", ptr %"55", align 8 ret void } diff --git a/ptx/src/test/spirv_run/shared_unify_local.ll b/ptx/src/test/spirv_run/shared_unify_local.ll index e3a1db7..b98b280 100644 --- a/ptx/src/test/spirv_run/shared_unify_local.ll +++ b/ptx/src/test/spirv_run/shared_unify_local.ll @@ -4,81 +4,75 @@ target triple = "amdgcn-amd-amdhsa" @shared_ex = external hidden addrspace(3) global [0 x i32] @"5" = private addrspace(3) global i64 undef, align 4 -define private i64 @"2"(i64 %"24", ptr addrspace(3) %"65", ptr addrspace(3) %"66") #0 { -"62": +define private i64 @"2"(i64 %"21", ptr addrspace(3) %"62", ptr addrspace(3) %"63") #0 { +"59": %"4" = alloca i64, align 8, addrspace(5) %"3" = alloca i64, align 8, addrspace(5) %"18" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"18", align 1 - %"19" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"19", align 1 %"6" = alloca i64, align 8, addrspace(5) + store i64 %"21", ptr addrspace(5) %"4", align 8 + %"22" = load i64, ptr addrspace(5) %"4", align 8 + store i64 %"22", ptr addrspace(3) %"63", align 8 + %"23" = load i64, ptr addrspace(3) %"63", align 8 + store i64 %"23", ptr addrspace(5) %"6", align 8 + %"24" = load i64, ptr addrspace(3) %"62", align 8 store i64 %"24", ptr addrspace(5) %"4", align 8 - %"25" = load i64, ptr addrspace(5) %"4", align 8 - store i64 %"25", ptr addrspace(3) %"66", align 8 - %"26" = load i64, ptr addrspace(3) %"66", align 8 - store i64 %"26", ptr addrspace(5) %"6", align 8 - %"27" = load i64, ptr addrspace(3) %"65", align 8 - store i64 %"27", ptr addrspace(5) %"4", align 8 - %"29" = load i64, ptr addrspace(5) %"4", align 8 - %"30" = load i64, ptr addrspace(5) %"6", align 8 - %"54" = add i64 %"29", %"30" - store i64 %"54", ptr addrspace(5) %"3", align 8 - %"31" = load i64, ptr addrspace(5) %"3", align 8 - ret i64 %"31" + %"26" = load i64, ptr addrspace(5) %"4", align 8 + %"27" = load i64, ptr addrspace(5) %"6", align 8 + %"51" = add i64 %"26", %"27" + store i64 %"51", ptr addrspace(5) %"3", align 8 + %"28" = load i64, ptr addrspace(5) %"3", align 8 + ret i64 %"28" } -define private i64 @"7"(i64 %"32", i64 %"33", ptr addrspace(3) %"67", ptr addrspace(3) %"68") #0 { -"63": +define private i64 @"7"(i64 %"29", i64 %"30", ptr addrspace(3) %"64", ptr addrspace(3) %"65") #0 { +"60": %"9" = alloca i64, align 8, addrspace(5) %"10" = alloca i64, align 8, addrspace(5) %"8" = alloca i64, align 8, addrspace(5) - %"20" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"20", align 1 - %"21" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"21", align 1 - store i64 %"32", ptr addrspace(5) %"9", align 8 - store i64 %"33", ptr addrspace(5) %"10", align 8 - %"34" = load i64, ptr addrspace(5) %"9", align 8 - store i64 %"34", ptr addrspace(3) %"67", align 8 - %"36" = load i64, ptr addrspace(5) %"10", align 8 - %"35" = call i64 @"2"(i64 %"36", ptr addrspace(3) %"67", ptr addrspace(3) %"68") - store i64 %"35", ptr addrspace(5) %"8", align 8 - %"37" = load i64, ptr addrspace(5) %"8", align 8 - ret i64 %"37" + %"19" = alloca i1, align 1, addrspace(5) + store i1 false, ptr addrspace(5) %"19", align 1 + store i64 %"29", ptr addrspace(5) %"9", align 8 + store i64 %"30", ptr addrspace(5) %"10", align 8 + %"31" = load i64, ptr addrspace(5) %"9", align 8 + store i64 %"31", ptr addrspace(3) %"64", align 8 + %"33" = load i64, ptr addrspace(5) %"10", align 8 + %"32" = call i64 @"2"(i64 %"33", ptr addrspace(3) %"64", ptr addrspace(3) %"65") + store i64 %"32", ptr addrspace(5) %"8", align 8 + %"34" = load i64, ptr addrspace(5) %"8", align 8 + ret i64 %"34" } -define protected amdgpu_kernel void @shared_unify_local(ptr addrspace(4) byref(i64) %"51", ptr addrspace(4) byref(i64) %"52") #0 { -"64": - %"22" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"22", align 1 - %"23" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"23", align 1 +define protected amdgpu_kernel void @shared_unify_local(ptr addrspace(4) byref(i64) %"48", ptr addrspace(4) byref(i64) %"49") #0 { +"61": + %"20" = alloca i1, align 1, addrspace(5) + store i1 false, ptr addrspace(5) %"20", align 1 %"14" = alloca i64, align 8, addrspace(5) %"15" = alloca i64, align 8, addrspace(5) %"16" = alloca i64, align 8, addrspace(5) %"17" = alloca i64, align 8, addrspace(5) - %"38" = load i64, ptr addrspace(4) %"51", align 8 - store i64 %"38", ptr addrspace(5) %"14", align 8 - %"39" = load i64, ptr addrspace(4) %"52", align 8 - store i64 %"39", ptr addrspace(5) %"15", align 8 - %"41" = load i64, ptr addrspace(5) %"14", align 8 - %"57" = inttoptr i64 %"41" to ptr addrspace(1) - %"40" = load i64, ptr addrspace(1) %"57", align 8 - store i64 %"40", ptr addrspace(5) %"16", align 8 - %"43" = load i64, ptr addrspace(5) %"14", align 8 - %"58" = inttoptr i64 %"43" to ptr addrspace(1) - %"70" = getelementptr inbounds i8, ptr addrspace(1) %"58", i64 8 - %"42" = load i64, ptr addrspace(1) %"70", align 8 - store i64 %"42", ptr addrspace(5) %"17", align 8 - %"45" = load i64, ptr addrspace(5) %"16", align 8 - %"46" = load i64, ptr addrspace(5) %"17", align 8 - %"59" = call i64 @"7"(i64 %"45", i64 %"46", ptr addrspace(3) @shared_ex, ptr addrspace(3) @"5") - store i64 %"59", ptr addrspace(5) %"17", align 8 - %"47" = load i64, ptr addrspace(5) %"15", align 8 - %"48" = load i64, ptr addrspace(5) %"17", align 8 - %"61" = inttoptr i64 %"47" to ptr - store i64 %"48", ptr %"61", align 8 + %"35" = load i64, ptr addrspace(4) %"48", align 8 + store i64 %"35", ptr addrspace(5) %"14", align 8 + %"36" = load i64, ptr addrspace(4) %"49", align 8 + store i64 %"36", ptr addrspace(5) %"15", align 8 + %"38" = load i64, ptr addrspace(5) %"14", align 8 + %"54" = inttoptr i64 %"38" to ptr addrspace(1) + %"37" = load i64, ptr addrspace(1) %"54", align 8 + store i64 %"37", ptr addrspace(5) %"16", align 8 + %"40" = load i64, ptr addrspace(5) %"14", align 8 + %"55" = inttoptr i64 %"40" to ptr addrspace(1) + %"67" = getelementptr inbounds i8, ptr addrspace(1) %"55", i64 8 + %"39" = load i64, ptr addrspace(1) %"67", align 8 + store i64 %"39", ptr addrspace(5) %"17", align 8 + %"42" = load i64, ptr addrspace(5) %"16", align 8 + %"43" = load i64, ptr addrspace(5) %"17", align 8 + %"56" = call i64 @"7"(i64 %"42", i64 %"43", ptr addrspace(3) @shared_ex, ptr addrspace(3) @"5") + store i64 %"56", ptr addrspace(5) %"17", align 8 + %"44" = load i64, ptr addrspace(5) %"15", align 8 + %"45" = load i64, ptr addrspace(5) %"17", align 8 + %"58" = inttoptr i64 %"44" to ptr + store i64 %"45", ptr %"58", align 8 ret void } diff --git a/ptx/src/test/spirv_run/shared_variable.ll b/ptx/src/test/spirv_run/shared_variable.ll index 2c2678a..859a767 100644 --- a/ptx/src/test/spirv_run/shared_variable.ll +++ b/ptx/src/test/spirv_run/shared_variable.ll @@ -3,32 +3,30 @@ target triple = "amdgcn-amd-amdhsa" @"4" = private addrspace(3) global [128 x i8] undef, align 4 -define protected amdgpu_kernel void @shared_variable(ptr addrspace(4) byref(i64) %"19", ptr addrspace(4) byref(i64) %"20") #0 { -"25": +define protected amdgpu_kernel void @shared_variable(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #0 { +"24": %"9" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"9", align 1 - %"10" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"10", align 1 %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) %"8" = alloca i64, align 8, addrspace(5) + %"10" = load i64, ptr addrspace(4) %"18", align 8 + store i64 %"10", ptr addrspace(5) %"5", align 8 %"11" = load i64, ptr addrspace(4) %"19", align 8 - store i64 %"11", ptr addrspace(5) %"5", align 8 - %"12" = load i64, ptr addrspace(4) %"20", align 8 - store i64 %"12", ptr addrspace(5) %"6", align 8 - %"14" = load i64, ptr addrspace(5) %"5", align 8 - %"21" = inttoptr i64 %"14" to ptr addrspace(1) - %"13" = load i64, ptr addrspace(1) %"21", align 8 - store i64 %"13", ptr addrspace(5) %"7", align 8 - %"15" = load i64, ptr addrspace(5) %"7", align 8 - store i64 %"15", ptr addrspace(3) @"4", align 8 - %"16" = load i64, ptr addrspace(3) @"4", align 8 - store i64 %"16", ptr addrspace(5) %"8", align 8 - %"17" = load i64, ptr addrspace(5) %"6", align 8 - %"18" = load i64, ptr addrspace(5) %"8", align 8 - %"24" = inttoptr i64 %"17" to ptr addrspace(1) - store i64 %"18", ptr addrspace(1) %"24", align 8 + store i64 %"11", ptr addrspace(5) %"6", align 8 + %"13" = load i64, ptr addrspace(5) %"5", align 8 + %"20" = inttoptr i64 %"13" to ptr addrspace(1) + %"12" = load i64, ptr addrspace(1) %"20", align 8 + store i64 %"12", ptr addrspace(5) %"7", align 8 + %"14" = load i64, ptr addrspace(5) %"7", align 8 + store i64 %"14", ptr addrspace(3) @"4", align 8 + %"15" = load i64, ptr addrspace(3) @"4", align 8 + store i64 %"15", ptr addrspace(5) %"8", align 8 + %"16" = load i64, ptr addrspace(5) %"6", align 8 + %"17" = load i64, ptr addrspace(5) %"8", align 8 + %"23" = inttoptr i64 %"16" to ptr addrspace(1) + store i64 %"17", ptr addrspace(1) %"23", align 8 ret void } diff --git a/ptx/src/test/spirv_run/shf.ll b/ptx/src/test/spirv_run/shf.ll index 6eb5aa0..22be32a 100644 --- a/ptx/src/test/spirv_run/shf.ll +++ b/ptx/src/test/spirv_run/shf.ll @@ -1,38 +1,36 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @shf(ptr addrspace(4) byref(i64) %"25", ptr addrspace(4) byref(i64) %"26") #0 { -"33": +define protected amdgpu_kernel void @shf(ptr addrspace(4) byref(i64) %"24", ptr addrspace(4) byref(i64) %"25") #0 { +"32": %"9" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"9", align 1 - %"10" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"10", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) %"8" = alloca i32, align 4, addrspace(5) + %"10" = load i64, ptr addrspace(4) %"24", align 8 + store i64 %"10", ptr addrspace(5) %"4", align 8 %"11" = load i64, ptr addrspace(4) %"25", align 8 - store i64 %"11", ptr addrspace(5) %"4", align 8 - %"12" = load i64, ptr addrspace(4) %"26", align 8 - store i64 %"12", ptr addrspace(5) %"5", align 8 - %"14" = load i64, ptr addrspace(5) %"4", align 8 - %"27" = inttoptr i64 %"14" to ptr - %"13" = load i32, ptr %"27", align 4 - store i32 %"13", ptr addrspace(5) %"6", align 4 - %"16" = load i64, ptr addrspace(5) %"4", align 8 - %"28" = inttoptr i64 %"16" to ptr - %"35" = getelementptr inbounds i8, ptr %"28", i64 4 - %"15" = load i32, ptr %"35", align 4 - store i32 %"15", ptr addrspace(5) %"7", align 4 - %"18" = load i32, ptr addrspace(5) %"6", align 4 - %"19" = load i32, ptr addrspace(5) %"7", align 4 - %"29" = call i32 @llvm.fshl.i32(i32 %"19", i32 %"18", i32 14) - store i32 %"29", ptr addrspace(5) %"8", align 4 - %"20" = load i64, ptr addrspace(5) %"5", align 8 - %"21" = load i32, ptr addrspace(5) %"8", align 4 - %"32" = inttoptr i64 %"20" to ptr - store i32 %"21", ptr %"32", align 4 + store i64 %"11", ptr addrspace(5) %"5", align 8 + %"13" = load i64, ptr addrspace(5) %"4", align 8 + %"26" = inttoptr i64 %"13" to ptr + %"12" = load i32, ptr %"26", align 4 + store i32 %"12", ptr addrspace(5) %"6", align 4 + %"15" = load i64, ptr addrspace(5) %"4", align 8 + %"27" = inttoptr i64 %"15" to ptr + %"34" = getelementptr inbounds i8, ptr %"27", i64 4 + %"14" = load i32, ptr %"34", align 4 + store i32 %"14", ptr addrspace(5) %"7", align 4 + %"17" = load i32, ptr addrspace(5) %"6", align 4 + %"18" = load i32, ptr addrspace(5) %"7", align 4 + %"28" = call i32 @llvm.fshl.i32(i32 %"18", i32 %"17", i32 14) + store i32 %"28", ptr addrspace(5) %"8", align 4 + %"19" = load i64, ptr addrspace(5) %"5", align 8 + %"20" = load i32, ptr addrspace(5) %"8", align 4 + %"31" = inttoptr i64 %"19" to ptr + store i32 %"20", ptr %"31", align 4 ret void } diff --git a/ptx/src/test/spirv_run/shl.ll b/ptx/src/test/spirv_run/shl.ll index a353e07..40c3365 100644 --- a/ptx/src/test/spirv_run/shl.ll +++ b/ptx/src/test/spirv_run/shl.ll @@ -1,32 +1,30 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @shl(ptr addrspace(4) byref(i64) %"19", ptr addrspace(4) byref(i64) %"20") #0 { -"25": +define protected amdgpu_kernel void @shl(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #0 { +"24": %"8" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"8", align 1 - %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) + %"9" = load i64, ptr addrspace(4) %"18", align 8 + store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"19", align 8 - store i64 %"10", ptr addrspace(5) %"4", align 8 - %"11" = load i64, ptr addrspace(4) %"20", align 8 - store i64 %"11", ptr addrspace(5) %"5", align 8 - %"13" = load i64, ptr addrspace(5) %"4", align 8 - %"21" = inttoptr i64 %"13" to ptr - %"12" = load i64, ptr %"21", align 8 - store i64 %"12", ptr addrspace(5) %"6", align 8 - %"15" = load i64, ptr addrspace(5) %"6", align 8 - %0 = shl i64 %"15", 2 - %"22" = select i1 false, i64 0, i64 %0 - store i64 %"22", ptr addrspace(5) %"7", align 8 - %"16" = load i64, ptr addrspace(5) %"5", align 8 - %"17" = load i64, ptr addrspace(5) %"7", align 8 - %"24" = inttoptr i64 %"16" to ptr - store i64 %"17", ptr %"24", align 8 + store i64 %"10", ptr addrspace(5) %"5", align 8 + %"12" = load i64, ptr addrspace(5) %"4", align 8 + %"20" = inttoptr i64 %"12" to ptr + %"11" = load i64, ptr %"20", align 8 + store i64 %"11", ptr addrspace(5) %"6", align 8 + %"14" = load i64, ptr addrspace(5) %"6", align 8 + %0 = shl i64 %"14", 2 + %"21" = select i1 false, i64 0, i64 %0 + store i64 %"21", ptr addrspace(5) %"7", align 8 + %"15" = load i64, ptr addrspace(5) %"5", align 8 + %"16" = load i64, ptr addrspace(5) %"7", align 8 + %"23" = inttoptr i64 %"15" to ptr + store i64 %"16", ptr %"23", align 8 ret void } diff --git a/ptx/src/test/spirv_run/shl_link_hack.ll b/ptx/src/test/spirv_run/shl_link_hack.ll index 8d695ad..9ac3883 100644 --- a/ptx/src/test/spirv_run/shl_link_hack.ll +++ b/ptx/src/test/spirv_run/shl_link_hack.ll @@ -3,37 +3,35 @@ target triple = "amdgcn-amd-amdhsa" declare i32 @__zluda_ptx_impl__atom_relaxed_gpu_generic_inc(ptr, i32) #0 -define protected amdgpu_kernel void @shl_link_hack(ptr addrspace(4) byref(i64) %"23", ptr addrspace(4) byref(i64) %"24") #1 { -"30": +define protected amdgpu_kernel void @shl_link_hack(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #1 { +"29": %"9" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"9", align 1 - %"10" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"10", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) %"8" = alloca i32, align 4, addrspace(5) + %"10" = load i64, ptr addrspace(4) %"22", align 8 + store i64 %"10", ptr addrspace(5) %"4", align 8 %"11" = load i64, ptr addrspace(4) %"23", align 8 - store i64 %"11", ptr addrspace(5) %"4", align 8 - %"12" = load i64, ptr addrspace(4) %"24", align 8 - store i64 %"12", ptr addrspace(5) %"5", align 8 - %"14" = load i64, ptr addrspace(5) %"5", align 8 - %"25" = inttoptr i64 %"14" to ptr - %"13" = call i32 @__zluda_ptx_impl__atom_relaxed_gpu_generic_inc(ptr %"25", i32 2000000) - store i32 %"13", ptr addrspace(5) %"8", align 4 - %"16" = load i64, ptr addrspace(5) %"4", align 8 - %"26" = inttoptr i64 %"16" to ptr - %"15" = load i64, ptr %"26", align 8 - store i64 %"15", ptr addrspace(5) %"6", align 8 - %"18" = load i64, ptr addrspace(5) %"6", align 8 - %0 = shl i64 %"18", 2 - %"27" = select i1 false, i64 0, i64 %0 - store i64 %"27", ptr addrspace(5) %"7", align 8 - %"19" = load i64, ptr addrspace(5) %"5", align 8 - %"20" = load i64, ptr addrspace(5) %"7", align 8 - %"29" = inttoptr i64 %"19" to ptr - store i64 %"20", ptr %"29", align 8 + store i64 %"11", ptr addrspace(5) %"5", align 8 + %"13" = load i64, ptr addrspace(5) %"5", align 8 + %"24" = inttoptr i64 %"13" to ptr + %"12" = call i32 @__zluda_ptx_impl__atom_relaxed_gpu_generic_inc(ptr %"24", i32 2000000) + store i32 %"12", ptr addrspace(5) %"8", align 4 + %"15" = load i64, ptr addrspace(5) %"4", align 8 + %"25" = inttoptr i64 %"15" to ptr + %"14" = load i64, ptr %"25", align 8 + store i64 %"14", ptr addrspace(5) %"6", align 8 + %"17" = load i64, ptr addrspace(5) %"6", align 8 + %0 = shl i64 %"17", 2 + %"26" = select i1 false, i64 0, i64 %0 + store i64 %"26", ptr addrspace(5) %"7", align 8 + %"18" = load i64, ptr addrspace(5) %"5", align 8 + %"19" = load i64, ptr addrspace(5) %"7", align 8 + %"28" = inttoptr i64 %"18" to ptr + store i64 %"19", ptr %"28", align 8 ret void } diff --git a/ptx/src/test/spirv_run/shl_overflow.ll b/ptx/src/test/spirv_run/shl_overflow.ll index 0213149..80d4871 100644 --- a/ptx/src/test/spirv_run/shl_overflow.ll +++ b/ptx/src/test/spirv_run/shl_overflow.ll @@ -1,12 +1,10 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @shl_overflow(ptr addrspace(4) byref(i64) %"48", ptr addrspace(4) byref(i64) %"49") #0 { -"63": +define protected amdgpu_kernel void @shl_overflow(ptr addrspace(4) byref(i64) %"47", ptr addrspace(4) byref(i64) %"48") #0 { +"62": %"11" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"11", align 1 - %"12" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"12", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) @@ -14,61 +12,61 @@ define protected amdgpu_kernel void @shl_overflow(ptr addrspace(4) byref(i64) %" %"8" = alloca i32, align 4, addrspace(5) %"9" = alloca i32, align 4, addrspace(5) %"10" = alloca i32, align 4, addrspace(5) + %"12" = load i64, ptr addrspace(4) %"47", align 8 + store i64 %"12", ptr addrspace(5) %"4", align 8 %"13" = load i64, ptr addrspace(4) %"48", align 8 - store i64 %"13", ptr addrspace(5) %"4", align 8 - %"14" = load i64, ptr addrspace(4) %"49", align 8 - store i64 %"14", ptr addrspace(5) %"5", align 8 - %"16" = load i64, ptr addrspace(5) %"4", align 8 - %"50" = inttoptr i64 %"16" to ptr - %"15" = load i32, ptr %"50", align 4 - store i32 %"15", ptr addrspace(5) %"6", align 4 - %"18" = load i64, ptr addrspace(5) %"4", align 8 - %"51" = inttoptr i64 %"18" to ptr - %"65" = getelementptr inbounds i8, ptr %"51", i64 4 - %"17" = load i32, ptr %"65", align 4 - store i32 %"17", ptr addrspace(5) %"8", align 4 - %"20" = load i64, ptr addrspace(5) %"4", align 8 - %"52" = inttoptr i64 %"20" to ptr - %"67" = getelementptr inbounds i8, ptr %"52", i64 8 - %"19" = load i32, ptr %"67", align 4 - store i32 %"19", ptr addrspace(5) %"9", align 4 - %"22" = load i64, ptr addrspace(5) %"4", align 8 - %"53" = inttoptr i64 %"22" to ptr - %"69" = getelementptr inbounds i8, ptr %"53", i64 12 - %"21" = load i32, ptr %"69", align 4 - store i32 %"21", ptr addrspace(5) %"10", align 4 - %"24" = load i32, ptr addrspace(5) %"6", align 4 - %"25" = load i32, ptr addrspace(5) %"8", align 4 - %0 = icmp ugt i32 %"25", 31 - %1 = shl i32 %"24", %"25" - %"54" = select i1 %0, i32 0, i32 %1 - store i32 %"54", ptr addrspace(5) %"7", align 4 - %"26" = load i64, ptr addrspace(5) %"5", align 8 - %"27" = load i32, ptr addrspace(5) %"7", align 4 - %"56" = inttoptr i64 %"26" to ptr - store i32 %"27", ptr %"56", align 4 - %"29" = load i32, ptr addrspace(5) %"6", align 4 - %"30" = load i32, ptr addrspace(5) %"9", align 4 - %2 = icmp ugt i32 %"30", 31 - %3 = shl i32 %"29", %"30" - %"57" = select i1 %2, i32 0, i32 %3 - store i32 %"57", ptr addrspace(5) %"7", align 4 - %"31" = load i64, ptr addrspace(5) %"5", align 8 - %"32" = load i32, ptr addrspace(5) %"7", align 4 - %"59" = inttoptr i64 %"31" to ptr - %"71" = getelementptr inbounds i8, ptr %"59", i64 4 - store i32 %"32", ptr %"71", align 4 - %"34" = load i32, ptr addrspace(5) %"6", align 4 - %"35" = load i32, ptr addrspace(5) %"10", align 4 - %4 = icmp ugt i32 %"35", 31 - %5 = shl i32 %"34", %"35" - %"60" = select i1 %4, i32 0, i32 %5 - store i32 %"60", ptr addrspace(5) %"7", align 4 - %"36" = load i64, ptr addrspace(5) %"5", align 8 - %"37" = load i32, ptr addrspace(5) %"7", align 4 - %"62" = inttoptr i64 %"36" to ptr - %"73" = getelementptr inbounds i8, ptr %"62", i64 8 - store i32 %"37", ptr %"73", align 4 + store i64 %"13", ptr addrspace(5) %"5", align 8 + %"15" = load i64, ptr addrspace(5) %"4", align 8 + %"49" = inttoptr i64 %"15" to ptr + %"14" = load i32, ptr %"49", align 4 + store i32 %"14", ptr addrspace(5) %"6", align 4 + %"17" = load i64, ptr addrspace(5) %"4", align 8 + %"50" = inttoptr i64 %"17" to ptr + %"64" = getelementptr inbounds i8, ptr %"50", i64 4 + %"16" = load i32, ptr %"64", align 4 + store i32 %"16", ptr addrspace(5) %"8", align 4 + %"19" = load i64, ptr addrspace(5) %"4", align 8 + %"51" = inttoptr i64 %"19" to ptr + %"66" = getelementptr inbounds i8, ptr %"51", i64 8 + %"18" = load i32, ptr %"66", align 4 + store i32 %"18", ptr addrspace(5) %"9", align 4 + %"21" = load i64, ptr addrspace(5) %"4", align 8 + %"52" = inttoptr i64 %"21" to ptr + %"68" = getelementptr inbounds i8, ptr %"52", i64 12 + %"20" = load i32, ptr %"68", align 4 + store i32 %"20", ptr addrspace(5) %"10", align 4 + %"23" = load i32, ptr addrspace(5) %"6", align 4 + %"24" = load i32, ptr addrspace(5) %"8", align 4 + %0 = icmp ugt i32 %"24", 31 + %1 = shl i32 %"23", %"24" + %"53" = select i1 %0, i32 0, i32 %1 + store i32 %"53", ptr addrspace(5) %"7", align 4 + %"25" = load i64, ptr addrspace(5) %"5", align 8 + %"26" = load i32, ptr addrspace(5) %"7", align 4 + %"55" = inttoptr i64 %"25" to ptr + store i32 %"26", ptr %"55", align 4 + %"28" = load i32, ptr addrspace(5) %"6", align 4 + %"29" = load i32, ptr addrspace(5) %"9", align 4 + %2 = icmp ugt i32 %"29", 31 + %3 = shl i32 %"28", %"29" + %"56" = select i1 %2, i32 0, i32 %3 + store i32 %"56", ptr addrspace(5) %"7", align 4 + %"30" = load i64, ptr addrspace(5) %"5", align 8 + %"31" = load i32, ptr addrspace(5) %"7", align 4 + %"58" = inttoptr i64 %"30" to ptr + %"70" = getelementptr inbounds i8, ptr %"58", i64 4 + store i32 %"31", ptr %"70", align 4 + %"33" = load i32, ptr addrspace(5) %"6", align 4 + %"34" = load i32, ptr addrspace(5) %"10", align 4 + %4 = icmp ugt i32 %"34", 31 + %5 = shl i32 %"33", %"34" + %"59" = select i1 %4, i32 0, i32 %5 + store i32 %"59", ptr addrspace(5) %"7", align 4 + %"35" = load i64, ptr addrspace(5) %"5", align 8 + %"36" = load i32, ptr addrspace(5) %"7", align 4 + %"61" = inttoptr i64 %"35" to ptr + %"72" = getelementptr inbounds i8, ptr %"61", i64 8 + store i32 %"36", ptr %"72", align 4 ret void } diff --git a/ptx/src/test/spirv_run/shr_s32.ll b/ptx/src/test/spirv_run/shr_s32.ll index 7bc5489..77c71f9 100644 --- a/ptx/src/test/spirv_run/shr_s32.ll +++ b/ptx/src/test/spirv_run/shr_s32.ll @@ -1,39 +1,37 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @shr_s32(ptr addrspace(4) byref(i64) %"23", ptr addrspace(4) byref(i64) %"24") #0 { -"29": +define protected amdgpu_kernel void @shr_s32(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #0 { +"28": %"8" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"8", align 1 - %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) + %"9" = load i64, ptr addrspace(4) %"22", align 8 + store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"23", align 8 - store i64 %"10", ptr addrspace(5) %"4", align 8 - %"11" = load i64, ptr addrspace(4) %"24", align 8 - store i64 %"11", ptr addrspace(5) %"5", align 8 - %"13" = load i64, ptr addrspace(5) %"4", align 8 - %"25" = inttoptr i64 %"13" to ptr - %"12" = load i32, ptr %"25", align 4 - store i32 %"12", ptr addrspace(5) %"6", align 4 - %"15" = load i64, ptr addrspace(5) %"4", align 8 - %"26" = inttoptr i64 %"15" to ptr - %"31" = getelementptr inbounds i8, ptr %"26", i64 4 - %"14" = load i32, ptr %"31", align 4 - store i32 %"14", ptr addrspace(5) %"7", align 4 - %"17" = load i32, ptr addrspace(5) %"6", align 4 - %"18" = load i32, ptr addrspace(5) %"7", align 4 - %0 = icmp ugt i32 %"18", 31 - %1 = ashr i32 %"17", %"18" - %"16" = select i1 %0, i32 -1, i32 %1 - store i32 %"16", ptr addrspace(5) %"6", align 4 - %"19" = load i64, ptr addrspace(5) %"5", align 8 - %"20" = load i32, ptr addrspace(5) %"6", align 4 - %"28" = inttoptr i64 %"19" to ptr - store i32 %"20", ptr %"28", align 4 + store i64 %"10", ptr addrspace(5) %"5", align 8 + %"12" = load i64, ptr addrspace(5) %"4", align 8 + %"24" = inttoptr i64 %"12" to ptr + %"11" = load i32, ptr %"24", align 4 + store i32 %"11", ptr addrspace(5) %"6", align 4 + %"14" = load i64, ptr addrspace(5) %"4", align 8 + %"25" = inttoptr i64 %"14" to ptr + %"30" = getelementptr inbounds i8, ptr %"25", i64 4 + %"13" = load i32, ptr %"30", align 4 + store i32 %"13", ptr addrspace(5) %"7", align 4 + %"16" = load i32, ptr addrspace(5) %"6", align 4 + %"17" = load i32, ptr addrspace(5) %"7", align 4 + %0 = icmp ugt i32 %"17", 31 + %1 = ashr i32 %"16", %"17" + %"15" = select i1 %0, i32 -1, i32 %1 + store i32 %"15", ptr addrspace(5) %"6", align 4 + %"18" = load i64, ptr addrspace(5) %"5", align 8 + %"19" = load i32, ptr addrspace(5) %"6", align 4 + %"27" = inttoptr i64 %"18" to ptr + store i32 %"19", ptr %"27", align 4 ret void } diff --git a/ptx/src/test/spirv_run/shr_u32.ll b/ptx/src/test/spirv_run/shr_u32.ll index f337c1b..22c8761 100644 --- a/ptx/src/test/spirv_run/shr_u32.ll +++ b/ptx/src/test/spirv_run/shr_u32.ll @@ -1,12 +1,10 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @shr_u32(ptr addrspace(4) byref(i64) %"37", ptr addrspace(4) byref(i64) %"38") #0 { -"46": +define protected amdgpu_kernel void @shr_u32(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #0 { +"45": %"11" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"11", align 1 - %"12" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"12", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) @@ -14,45 +12,45 @@ define protected amdgpu_kernel void @shr_u32(ptr addrspace(4) byref(i64) %"37", %"8" = alloca i32, align 4, addrspace(5) %"9" = alloca i32, align 4, addrspace(5) %"10" = alloca i32, align 4, addrspace(5) + %"12" = load i64, ptr addrspace(4) %"36", align 8 + store i64 %"12", ptr addrspace(5) %"4", align 8 %"13" = load i64, ptr addrspace(4) %"37", align 8 - store i64 %"13", ptr addrspace(5) %"4", align 8 - %"14" = load i64, ptr addrspace(4) %"38", align 8 - store i64 %"14", ptr addrspace(5) %"5", align 8 - %"16" = load i64, ptr addrspace(5) %"4", align 8 - %"39" = inttoptr i64 %"16" to ptr - %"15" = load i32, ptr %"39", align 4 - store i32 %"15", ptr addrspace(5) %"6", align 4 - %"18" = load i64, ptr addrspace(5) %"4", align 8 - %"40" = inttoptr i64 %"18" to ptr - %"48" = getelementptr inbounds i8, ptr %"40", i64 4 - %"17" = load i32, ptr %"48", align 4 - store i32 %"17", ptr addrspace(5) %"7", align 4 - %"20" = load i64, ptr addrspace(5) %"4", align 8 - %"41" = inttoptr i64 %"20" to ptr - %"50" = getelementptr inbounds i8, ptr %"41", i64 8 - %"19" = load i32, ptr %"50", align 4 - store i32 %"19", ptr addrspace(5) %"8", align 4 - %"22" = load i32, ptr addrspace(5) %"6", align 4 - %"23" = load i32, ptr addrspace(5) %"7", align 4 - %0 = icmp ugt i32 %"23", 31 - %1 = lshr i32 %"22", %"23" - %"21" = select i1 %0, i32 0, i32 %1 - store i32 %"21", ptr addrspace(5) %"9", align 4 - %"25" = load i32, ptr addrspace(5) %"6", align 4 - %"26" = load i32, ptr addrspace(5) %"8", align 4 - %2 = icmp ugt i32 %"26", 31 - %3 = lshr i32 %"25", %"26" - %"24" = select i1 %2, i32 0, i32 %3 - store i32 %"24", ptr addrspace(5) %"10", align 4 - %"27" = load i64, ptr addrspace(5) %"5", align 8 - %"28" = load i32, ptr addrspace(5) %"9", align 4 - %"44" = inttoptr i64 %"27" to ptr - store i32 %"28", ptr %"44", align 4 - %"29" = load i64, ptr addrspace(5) %"5", align 8 - %"30" = load i32, ptr addrspace(5) %"10", align 4 - %"45" = inttoptr i64 %"29" to ptr - %"52" = getelementptr inbounds i8, ptr %"45", i64 4 - store i32 %"30", ptr %"52", align 4 + store i64 %"13", ptr addrspace(5) %"5", align 8 + %"15" = load i64, ptr addrspace(5) %"4", align 8 + %"38" = inttoptr i64 %"15" to ptr + %"14" = load i32, ptr %"38", align 4 + store i32 %"14", ptr addrspace(5) %"6", align 4 + %"17" = load i64, ptr addrspace(5) %"4", align 8 + %"39" = inttoptr i64 %"17" to ptr + %"47" = getelementptr inbounds i8, ptr %"39", i64 4 + %"16" = load i32, ptr %"47", align 4 + store i32 %"16", ptr addrspace(5) %"7", align 4 + %"19" = load i64, ptr addrspace(5) %"4", align 8 + %"40" = inttoptr i64 %"19" to ptr + %"49" = getelementptr inbounds i8, ptr %"40", i64 8 + %"18" = load i32, ptr %"49", align 4 + store i32 %"18", ptr addrspace(5) %"8", align 4 + %"21" = load i32, ptr addrspace(5) %"6", align 4 + %"22" = load i32, ptr addrspace(5) %"7", align 4 + %0 = icmp ugt i32 %"22", 31 + %1 = lshr i32 %"21", %"22" + %"20" = select i1 %0, i32 0, i32 %1 + store i32 %"20", ptr addrspace(5) %"9", align 4 + %"24" = load i32, ptr addrspace(5) %"6", align 4 + %"25" = load i32, ptr addrspace(5) %"8", align 4 + %2 = icmp ugt i32 %"25", 31 + %3 = lshr i32 %"24", %"25" + %"23" = select i1 %2, i32 0, i32 %3 + store i32 %"23", ptr addrspace(5) %"10", align 4 + %"26" = load i64, ptr addrspace(5) %"5", align 8 + %"27" = load i32, ptr addrspace(5) %"9", align 4 + %"43" = inttoptr i64 %"26" to ptr + store i32 %"27", ptr %"43", align 4 + %"28" = load i64, ptr addrspace(5) %"5", align 8 + %"29" = load i32, ptr addrspace(5) %"10", align 4 + %"44" = inttoptr i64 %"28" to ptr + %"51" = getelementptr inbounds i8, ptr %"44", i64 4 + store i32 %"29", ptr %"51", align 4 ret void } diff --git a/ptx/src/test/spirv_run/sign_extend.ll b/ptx/src/test/spirv_run/sign_extend.ll index bb72576..ef26261 100644 --- a/ptx/src/test/spirv_run/sign_extend.ll +++ b/ptx/src/test/spirv_run/sign_extend.ll @@ -1,28 +1,26 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @sign_extend(ptr addrspace(4) byref(i64) %"15", ptr addrspace(4) byref(i64) %"16") #0 { -"20": +define protected amdgpu_kernel void @sign_extend(ptr addrspace(4) byref(i64) %"14", ptr addrspace(4) byref(i64) %"15") #0 { +"19": %"7" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"7", align 1 - %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) + %"8" = load i64, ptr addrspace(4) %"14", align 8 + store i64 %"8", ptr addrspace(5) %"4", align 8 %"9" = load i64, ptr addrspace(4) %"15", align 8 - store i64 %"9", ptr addrspace(5) %"4", align 8 - %"10" = load i64, ptr addrspace(4) %"16", align 8 - store i64 %"10", ptr addrspace(5) %"5", align 8 - %"12" = load i64, ptr addrspace(5) %"4", align 8 + store i64 %"9", ptr addrspace(5) %"5", align 8 + %"11" = load i64, ptr addrspace(5) %"4", align 8 + %"17" = inttoptr i64 %"11" to ptr + %"16" = load i16, ptr %"17", align 2 + %"10" = sext i16 %"16" to i32 + store i32 %"10", ptr addrspace(5) %"6", align 4 + %"12" = load i64, ptr addrspace(5) %"5", align 8 + %"13" = load i32, ptr addrspace(5) %"6", align 4 %"18" = inttoptr i64 %"12" to ptr - %"17" = load i16, ptr %"18", align 2 - %"11" = sext i16 %"17" to i32 - store i32 %"11", ptr addrspace(5) %"6", align 4 - %"13" = load i64, ptr addrspace(5) %"5", align 8 - %"14" = load i32, ptr addrspace(5) %"6", align 4 - %"19" = inttoptr i64 %"13" to ptr - store i32 %"14", ptr %"19", align 4 + store i32 %"13", ptr %"18", align 4 ret void } diff --git a/ptx/src/test/spirv_run/sin.ll b/ptx/src/test/spirv_run/sin.ll index 40ce553..f38aedd 100644 --- a/ptx/src/test/spirv_run/sin.ll +++ b/ptx/src/test/spirv_run/sin.ll @@ -1,30 +1,28 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @sin(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 { -"21": +define protected amdgpu_kernel void @sin(ptr addrspace(4) byref(i64) %"16", ptr addrspace(4) byref(i64) %"17") #0 { +"20": %"7" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"7", align 1 - %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca float, align 4, addrspace(5) + %"8" = load i64, ptr addrspace(4) %"16", align 8 + store i64 %"8", ptr addrspace(5) %"4", align 8 %"9" = load i64, ptr addrspace(4) %"17", align 8 - store i64 %"9", ptr addrspace(5) %"4", align 8 - %"10" = load i64, ptr addrspace(4) %"18", align 8 - store i64 %"10", ptr addrspace(5) %"5", align 8 - %"12" = load i64, ptr addrspace(5) %"4", align 8 - %"19" = inttoptr i64 %"12" to ptr - %"11" = load float, ptr %"19", align 4 - store float %"11", ptr addrspace(5) %"6", align 4 - %"14" = load float, ptr addrspace(5) %"6", align 4 - %"13" = call afn float @llvm.sin.f32(float %"14") - store float %"13", ptr addrspace(5) %"6", align 4 - %"15" = load i64, ptr addrspace(5) %"5", align 8 - %"16" = load float, ptr addrspace(5) %"6", align 4 - %"20" = inttoptr i64 %"15" to ptr - store float %"16", ptr %"20", align 4 + store i64 %"9", ptr addrspace(5) %"5", align 8 + %"11" = load i64, ptr addrspace(5) %"4", align 8 + %"18" = inttoptr i64 %"11" to ptr + %"10" = load float, ptr %"18", align 4 + store float %"10", ptr addrspace(5) %"6", align 4 + %"13" = load float, ptr addrspace(5) %"6", align 4 + %"12" = call afn float @llvm.sin.f32(float %"13") + store float %"12", ptr addrspace(5) %"6", align 4 + %"14" = load i64, ptr addrspace(5) %"5", align 8 + %"15" = load float, ptr addrspace(5) %"6", align 4 + %"19" = inttoptr i64 %"14" to ptr + store float %"15", ptr %"19", align 4 ret void } diff --git a/ptx/src/test/spirv_run/sqrt.ll b/ptx/src/test/spirv_run/sqrt.ll index 332f67a..c8e4ec0 100644 --- a/ptx/src/test/spirv_run/sqrt.ll +++ b/ptx/src/test/spirv_run/sqrt.ll @@ -1,30 +1,28 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @sqrt(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 { -"21": +define protected amdgpu_kernel void @sqrt(ptr addrspace(4) byref(i64) %"16", ptr addrspace(4) byref(i64) %"17") #0 { +"20": %"7" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"7", align 1 - %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca float, align 4, addrspace(5) + %"8" = load i64, ptr addrspace(4) %"16", align 8 + store i64 %"8", ptr addrspace(5) %"4", align 8 %"9" = load i64, ptr addrspace(4) %"17", align 8 - store i64 %"9", ptr addrspace(5) %"4", align 8 - %"10" = load i64, ptr addrspace(4) %"18", align 8 - store i64 %"10", ptr addrspace(5) %"5", align 8 - %"12" = load i64, ptr addrspace(5) %"4", align 8 - %"19" = inttoptr i64 %"12" to ptr - %"11" = load float, ptr %"19", align 4 - store float %"11", ptr addrspace(5) %"6", align 4 - %"14" = load float, ptr addrspace(5) %"6", align 4 - %"13" = call afn float @llvm.sqrt.f32(float %"14") - store float %"13", ptr addrspace(5) %"6", align 4 - %"15" = load i64, ptr addrspace(5) %"5", align 8 - %"16" = load float, ptr addrspace(5) %"6", align 4 - %"20" = inttoptr i64 %"15" to ptr - store float %"16", ptr %"20", align 4 + store i64 %"9", ptr addrspace(5) %"5", align 8 + %"11" = load i64, ptr addrspace(5) %"4", align 8 + %"18" = inttoptr i64 %"11" to ptr + %"10" = load float, ptr %"18", align 4 + store float %"10", ptr addrspace(5) %"6", align 4 + %"13" = load float, ptr addrspace(5) %"6", align 4 + %"12" = call afn float @llvm.sqrt.f32(float %"13") + store float %"12", ptr addrspace(5) %"6", align 4 + %"14" = load i64, ptr addrspace(5) %"5", align 8 + %"15" = load float, ptr addrspace(5) %"6", align 4 + %"19" = inttoptr i64 %"14" to ptr + store float %"15", ptr %"19", align 4 ret void } diff --git a/ptx/src/test/spirv_run/sub.ll b/ptx/src/test/spirv_run/sub.ll index 2383be0..83fec5f 100644 --- a/ptx/src/test/spirv_run/sub.ll +++ b/ptx/src/test/spirv_run/sub.ll @@ -1,31 +1,29 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @sub(ptr addrspace(4) byref(i64) %"19", ptr addrspace(4) byref(i64) %"20") #0 { -"23": +define protected amdgpu_kernel void @sub(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #0 { +"22": %"8" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"8", align 1 - %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) + %"9" = load i64, ptr addrspace(4) %"18", align 8 + store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"19", align 8 - store i64 %"10", ptr addrspace(5) %"4", align 8 - %"11" = load i64, ptr addrspace(4) %"20", align 8 - store i64 %"11", ptr addrspace(5) %"5", align 8 - %"13" = load i64, ptr addrspace(5) %"4", align 8 - %"21" = inttoptr i64 %"13" to ptr - %"12" = load i64, ptr %"21", align 8 - store i64 %"12", ptr addrspace(5) %"6", align 8 - %"15" = load i64, ptr addrspace(5) %"6", align 8 - %"14" = sub i64 %"15", 1 - store i64 %"14", ptr addrspace(5) %"7", align 8 - %"16" = load i64, ptr addrspace(5) %"5", align 8 - %"17" = load i64, ptr addrspace(5) %"7", align 8 - %"22" = inttoptr i64 %"16" to ptr - store i64 %"17", ptr %"22", align 8 + store i64 %"10", ptr addrspace(5) %"5", align 8 + %"12" = load i64, ptr addrspace(5) %"4", align 8 + %"20" = inttoptr i64 %"12" to ptr + %"11" = load i64, ptr %"20", align 8 + store i64 %"11", ptr addrspace(5) %"6", align 8 + %"14" = load i64, ptr addrspace(5) %"6", align 8 + %"13" = sub i64 %"14", 1 + store i64 %"13", ptr addrspace(5) %"7", align 8 + %"15" = load i64, ptr addrspace(5) %"5", align 8 + %"16" = load i64, ptr addrspace(5) %"7", align 8 + %"21" = inttoptr i64 %"15" to ptr + store i64 %"16", ptr %"21", align 8 ret void } diff --git a/ptx/src/test/spirv_run/subc_cc.ll b/ptx/src/test/spirv_run/subc_cc.ll index 9a08872..0101b83 100644 --- a/ptx/src/test/spirv_run/subc_cc.ll +++ b/ptx/src/test/spirv_run/subc_cc.ll @@ -1,12 +1,10 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @subc_cc(ptr addrspace(4) byref(i64) %"54", ptr addrspace(4) byref(i64) %"55") #0 { -"69": +define protected amdgpu_kernel void @subc_cc(ptr addrspace(4) byref(i64) %"57", ptr addrspace(4) byref(i64) %"58") #0 { +"72": %"13" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"13", align 1 - %"14" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"14", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) @@ -16,70 +14,74 @@ define protected amdgpu_kernel void @subc_cc(ptr addrspace(4) byref(i64) %"54", %"10" = alloca i32, align 4, addrspace(5) %"11" = alloca i32, align 4, addrspace(5) %"12" = alloca i32, align 4, addrspace(5) - %"15" = load i64, ptr addrspace(4) %"54", align 8 - store i64 %"15", ptr addrspace(5) %"4", align 8 - %"16" = load i64, ptr addrspace(4) %"55", align 8 - store i64 %"16", ptr addrspace(5) %"5", align 8 - %"18" = load i64, ptr addrspace(5) %"4", align 8 - %"57" = inttoptr i64 %"18" to ptr - %"56" = load i32, ptr %"57", align 4 - store i32 %"56", ptr addrspace(5) %"9", align 4 - %"20" = load i64, ptr addrspace(5) %"4", align 8 - %"58" = inttoptr i64 %"20" to ptr - %"71" = getelementptr inbounds i8, ptr %"58", i64 4 - %"59" = load i32, ptr %"71", align 4 - store i32 %"59", ptr addrspace(5) %"10", align 4 - %"22" = load i64, ptr addrspace(5) %"4", align 8 - %"60" = inttoptr i64 %"22" to ptr - %"73" = getelementptr inbounds i8, ptr %"60", i64 8 - %"21" = load i32, ptr %"73", align 4 - store i32 %"21", ptr addrspace(5) %"11", align 4 - %"24" = load i64, ptr addrspace(5) %"4", align 8 - %"61" = inttoptr i64 %"24" to ptr - %"75" = getelementptr inbounds i8, ptr %"61", i64 12 - %"23" = load i32, ptr %"75", align 4 - store i32 %"23", ptr addrspace(5) %"12", align 4 - %"27" = load i32, ptr addrspace(5) %"9", align 4 - %"28" = load i32, ptr addrspace(5) %"10", align 4 - %0 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %"27", i32 %"28") - %"25" = extractvalue { i32, i1 } %0, 0 - %"26" = extractvalue { i32, i1 } %0, 1 - store i32 %"25", ptr addrspace(5) %"6", align 4 - store i1 %"26", ptr addrspace(5) %"14", align 1 - %"31" = load i1, ptr addrspace(5) %"14", align 1 - %"32" = load i32, ptr addrspace(5) %"6", align 4 - %"33" = load i32, ptr addrspace(5) %"11", align 4 - %1 = zext i1 %"31" to i32 - %2 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %"32", i32 %"33") + %"18" = load i64, ptr addrspace(4) %"57", align 8 + store i64 %"18", ptr addrspace(5) %"4", align 8 + %"19" = load i64, ptr addrspace(4) %"58", align 8 + store i64 %"19", ptr addrspace(5) %"5", align 8 + %"21" = load i64, ptr addrspace(5) %"4", align 8 + %"60" = inttoptr i64 %"21" to ptr + %"59" = load i32, ptr %"60", align 4 + store i32 %"59", ptr addrspace(5) %"9", align 4 + %"23" = load i64, ptr addrspace(5) %"4", align 8 + %"61" = inttoptr i64 %"23" to ptr + %"74" = getelementptr inbounds i8, ptr %"61", i64 4 + %"62" = load i32, ptr %"74", align 4 + store i32 %"62", ptr addrspace(5) %"10", align 4 + %"25" = load i64, ptr addrspace(5) %"4", align 8 + %"63" = inttoptr i64 %"25" to ptr + %"76" = getelementptr inbounds i8, ptr %"63", i64 8 + %"24" = load i32, ptr %"76", align 4 + store i32 %"24", ptr addrspace(5) %"11", align 4 + %"27" = load i64, ptr addrspace(5) %"4", align 8 + %"64" = inttoptr i64 %"27" to ptr + %"78" = getelementptr inbounds i8, ptr %"64", i64 12 + %"26" = load i32, ptr %"78", align 4 + store i32 %"26", ptr addrspace(5) %"12", align 4 + %"29" = load i32, ptr addrspace(5) %"9", align 4 + %"30" = load i32, ptr addrspace(5) %"10", align 4 + %0 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %"29", i32 %"30") + %"28" = extractvalue { i32, i1 } %0, 0 + %"14" = extractvalue { i32, i1 } %0, 1 + store i32 %"28", ptr addrspace(5) %"6", align 4 + %"31" = xor i1 %"14", true + store i1 %"31", ptr addrspace(5) %"13", align 1 + %"32" = load i1, ptr addrspace(5) %"13", align 1 + %"15" = xor i1 %"32", true + %"34" = load i32, ptr addrspace(5) %"6", align 4 + %"35" = load i32, ptr addrspace(5) %"11", align 4 + %1 = zext i1 %"15" to i32 + %2 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %"34", i32 %"35") %3 = extractvalue { i32, i1 } %2, 0 %4 = extractvalue { i32, i1 } %2, 1 %5 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %3, i32 %1) - %"29" = extractvalue { i32, i1 } %5, 0 + %"33" = extractvalue { i32, i1 } %5, 0 %6 = extractvalue { i32, i1 } %5, 1 - %"30" = xor i1 %4, %6 - store i32 %"29", ptr addrspace(5) %"7", align 4 - store i1 %"30", ptr addrspace(5) %"14", align 1 - %"35" = load i1, ptr addrspace(5) %"14", align 1 - %"36" = load i32, ptr addrspace(5) %"7", align 4 - %"37" = load i32, ptr addrspace(5) %"12", align 4 - %7 = zext i1 %"35" to i32 - %8 = sub i32 %"36", %"37" - %"34" = sub i32 %8, %7 - store i32 %"34", ptr addrspace(5) %"8", align 4 - %"38" = load i64, ptr addrspace(5) %"5", align 8 - %"39" = load i32, ptr addrspace(5) %"6", align 4 - %"66" = inttoptr i64 %"38" to ptr - store i32 %"39", ptr %"66", align 4 - %"40" = load i64, ptr addrspace(5) %"5", align 8 - %"41" = load i32, ptr addrspace(5) %"7", align 4 - %"67" = inttoptr i64 %"40" to ptr - %"77" = getelementptr inbounds i8, ptr %"67", i64 4 - store i32 %"41", ptr %"77", align 4 - %"42" = load i64, ptr addrspace(5) %"5", align 8 - %"43" = load i32, ptr addrspace(5) %"8", align 4 - %"68" = inttoptr i64 %"42" to ptr - %"79" = getelementptr inbounds i8, ptr %"68", i64 8 - store i32 %"43", ptr %"79", align 4 + %"16" = xor i1 %4, %6 + store i32 %"33", ptr addrspace(5) %"7", align 4 + %"36" = xor i1 %"16", true + store i1 %"36", ptr addrspace(5) %"13", align 1 + %"37" = load i1, ptr addrspace(5) %"13", align 1 + %"17" = xor i1 %"37", true + %"39" = load i32, ptr addrspace(5) %"7", align 4 + %"40" = load i32, ptr addrspace(5) %"12", align 4 + %7 = zext i1 %"17" to i32 + %8 = sub i32 %"39", %"40" + %"38" = sub i32 %8, %7 + store i32 %"38", ptr addrspace(5) %"8", align 4 + %"41" = load i64, ptr addrspace(5) %"5", align 8 + %"42" = load i32, ptr addrspace(5) %"6", align 4 + %"69" = inttoptr i64 %"41" to ptr + store i32 %"42", ptr %"69", align 4 + %"43" = load i64, ptr addrspace(5) %"5", align 8 + %"44" = load i32, ptr addrspace(5) %"7", align 4 + %"70" = inttoptr i64 %"43" to ptr + %"80" = getelementptr inbounds i8, ptr %"70", i64 4 + store i32 %"44", ptr %"80", align 4 + %"45" = load i64, ptr addrspace(5) %"5", align 8 + %"46" = load i32, ptr addrspace(5) %"8", align 4 + %"71" = inttoptr i64 %"45" to ptr + %"82" = getelementptr inbounds i8, ptr %"71", i64 8 + store i32 %"46", ptr %"82", align 4 ret void } diff --git a/ptx/src/test/spirv_run/subc_cc2.ll b/ptx/src/test/spirv_run/subc_cc2.ll deleted file mode 100644 index aded371..0000000 --- a/ptx/src/test/spirv_run/subc_cc2.ll +++ /dev/null @@ -1,127 +0,0 @@ -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" -target triple = "amdgcn-amd-amdhsa" - -define protected amdgpu_kernel void @subc_cc2(ptr addrspace(4) byref(i64) %"86", ptr addrspace(4) byref(i64) %"87") #0 { -"112": - %"14" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"14", align 1 - %"15" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"15", align 1 - %"4" = alloca i64, align 8, addrspace(5) - %"5" = alloca i64, align 8, addrspace(5) - %"6" = alloca i32, align 4, addrspace(5) - %"7" = alloca i32, align 4, addrspace(5) - %"8" = alloca i32, align 4, addrspace(5) - %"9" = alloca i32, align 4, addrspace(5) - %"10" = alloca i32, align 4, addrspace(5) - %"11" = alloca i32, align 4, addrspace(5) - %"12" = alloca i32, align 4, addrspace(5) - %"13" = alloca i32, align 4, addrspace(5) - %"16" = load i64, ptr addrspace(4) %"87", align 8 - store i64 %"16", ptr addrspace(5) %"5", align 8 - %0 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 0, i32 1) - %"88" = extractvalue { i32, i1 } %0, 0 - %"18" = extractvalue { i32, i1 } %0, 1 - store i32 %"88", ptr addrspace(5) %"6", align 4 - store i1 %"18", ptr addrspace(5) %"15", align 1 - %"21" = load i1, ptr addrspace(5) %"15", align 1 - %1 = zext i1 %"21" to i32 - %2 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 0, i32 -1) - %3 = extractvalue { i32, i1 } %2, 0 - %4 = extractvalue { i32, i1 } %2, 1 - %5 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %3, i32 %1) - %"89" = extractvalue { i32, i1 } %5, 0 - %6 = extractvalue { i32, i1 } %5, 1 - %"20" = xor i1 %4, %6 - store i32 %"89", ptr addrspace(5) %"7", align 4 - store i1 %"20", ptr addrspace(5) %"15", align 1 - %"23" = load i1, ptr addrspace(5) %"15", align 1 - %7 = zext i1 %"23" to i32 - %"90" = sub i32 2, %7 - store i32 %"90", ptr addrspace(5) %"8", align 4 - %"25" = load i1, ptr addrspace(5) %"14", align 1 - %8 = zext i1 %"25" to i32 - %"91" = add i32 0, %8 - store i32 %"91", ptr addrspace(5) %"9", align 4 - %9 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 0, i32 1) - %"92" = extractvalue { i32, i1 } %9, 0 - %"27" = extractvalue { i32, i1 } %9, 1 - store i32 %"92", ptr addrspace(5) %"6", align 4 - store i1 %"27", ptr addrspace(5) %"15", align 1 - %"30" = load i1, ptr addrspace(5) %"15", align 1 - %10 = zext i1 %"30" to i32 - %11 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 0, i32 0) - %12 = extractvalue { i32, i1 } %11, 0 - %13 = extractvalue { i32, i1 } %11, 1 - %14 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %12, i32 %10) - %"93" = extractvalue { i32, i1 } %14, 0 - %15 = extractvalue { i32, i1 } %14, 1 - %"29" = xor i1 %13, %15 - store i32 %"93", ptr addrspace(5) %"10", align 4 - store i1 %"29", ptr addrspace(5) %"15", align 1 - %"32" = load i1, ptr addrspace(5) %"15", align 1 - %16 = zext i1 %"32" to i32 - %"94" = sub i32 2, %16 - store i32 %"94", ptr addrspace(5) %"11", align 4 - %17 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 0, i32 0) - %"95" = extractvalue { i32, i1 } %17, 0 - %"34" = extractvalue { i32, i1 } %17, 1 - store i32 %"95", ptr addrspace(5) %"6", align 4 - store i1 %"34", ptr addrspace(5) %"15", align 1 - %"37" = load i1, ptr addrspace(5) %"15", align 1 - %18 = zext i1 %"37" to i32 - %19 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 0, i32 1) - %20 = extractvalue { i32, i1 } %19, 0 - %21 = extractvalue { i32, i1 } %19, 1 - %22 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %20, i32 %18) - %"96" = extractvalue { i32, i1 } %22, 0 - %23 = extractvalue { i32, i1 } %22, 1 - %"36" = xor i1 %21, %23 - store i32 %"96", ptr addrspace(5) %"12", align 4 - store i1 %"36", ptr addrspace(5) %"15", align 1 - %"39" = load i1, ptr addrspace(5) %"15", align 1 - %24 = zext i1 %"39" to i32 - %"97" = sub i32 2, %24 - store i32 %"97", ptr addrspace(5) %"13", align 4 - %"40" = load i64, ptr addrspace(5) %"5", align 8 - %"41" = load i32, ptr addrspace(5) %"7", align 4 - %"98" = inttoptr i64 %"40" to ptr - store i32 %"41", ptr %"98", align 4 - %"42" = load i64, ptr addrspace(5) %"5", align 8 - %"43" = load i32, ptr addrspace(5) %"8", align 4 - %"100" = inttoptr i64 %"42" to ptr - %"114" = getelementptr inbounds i8, ptr %"100", i64 4 - store i32 %"43", ptr %"114", align 4 - %"44" = load i64, ptr addrspace(5) %"5", align 8 - %"45" = load i32, ptr addrspace(5) %"9", align 4 - %"102" = inttoptr i64 %"44" to ptr - %"116" = getelementptr inbounds i8, ptr %"102", i64 8 - store i32 %"45", ptr %"116", align 4 - %"46" = load i64, ptr addrspace(5) %"5", align 8 - %"47" = load i32, ptr addrspace(5) %"10", align 4 - %"104" = inttoptr i64 %"46" to ptr - %"118" = getelementptr inbounds i8, ptr %"104", i64 12 - store i32 %"47", ptr %"118", align 4 - %"48" = load i64, ptr addrspace(5) %"5", align 8 - %"49" = load i32, ptr addrspace(5) %"11", align 4 - %"106" = inttoptr i64 %"48" to ptr - %"120" = getelementptr inbounds i8, ptr %"106", i64 16 - store i32 %"49", ptr %"120", align 4 - %"50" = load i64, ptr addrspace(5) %"5", align 8 - %"51" = load i32, ptr addrspace(5) %"12", align 4 - %"108" = inttoptr i64 %"50" to ptr - %"122" = getelementptr inbounds i8, ptr %"108", i64 20 - store i32 %"51", ptr %"122", align 4 - %"52" = load i64, ptr addrspace(5) %"5", align 8 - %"53" = load i32, ptr addrspace(5) %"13", align 4 - %"110" = inttoptr i64 %"52" to ptr - %"124" = getelementptr inbounds i8, ptr %"110", i64 24 - store i32 %"53", ptr %"124", align 4 - ret void -} - -; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn -declare { i32, i1 } @llvm.usub.with.overflow.i32(i32, i32) #1 - -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="ieee,ieee" "denormal-fp-math-f32"="ieee,ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { nocallback nofree nosync nounwind readnone speculatable willreturn } diff --git a/ptx/src/test/spirv_run/subc_cc2.ptx b/ptx/src/test/spirv_run/subc_cc2.ptx deleted file mode 100644 index 2c776a4..0000000 --- a/ptx/src/test/spirv_run/subc_cc2.ptx +++ /dev/null @@ -1,55 +0,0 @@ -.version 6.5 -.target sm_30 -.address_size 64 - -.visible .entry subc_cc2( - .param .u64 input, - .param .u64 output -) -{ - .reg .u64 in_addr; - .reg .u64 out_addr; - .reg .b32 unused; - - .reg .b32 result_1; - .reg .b32 carry_out_1_1; - .reg .b32 carry_out_1_2; - .reg .b32 result_2; - .reg .b32 carry_out_2; - .reg .b32 result_3; - .reg .b32 carry_out_3; - - ld.param.u64 out_addr, [output]; - - // set carry=1 - sub.cc.s32 unused, 0, 1; - // overflow (b + CC.CF), no underflow in whole operation - subc.cc.s32 result_1, 0, 4294967295; - // write carry - subc.s32 carry_out_1_1, 2, 0; - // make sure the overflow in (b + CC.CF) is not detected by addc - addc.s32 carry_out_1_2, 0, 0; - - // set carry=1 - sub.cc.s32 unused, 0, 1; - // underflow in substraction, underflow in whole operation - subc.cc.s32 result_2, 0, 0; - // write carry - subc.s32 carry_out_2, 2, 0; - - // set carry=0 - sub.cc.s32 unused, 0, 0; - // same operation as bove, but 0-1-0 instead of 0-0-1 - subc.cc.s32 result_3, 0, 1; - // write carry - subc.s32 carry_out_3, 2, 0; - - st.s32 [out_addr], result_1; - st.s32 [out_addr+4], carry_out_1_1; - st.s32 [out_addr+8], carry_out_1_2; - st.s32 [out_addr+12], result_2; - st.s32 [out_addr+16], carry_out_2; - st.s32 [out_addr+20], result_3; - st.s32 [out_addr+24], carry_out_3; - ret; -} diff --git a/ptx/src/test/spirv_run/vector.ll b/ptx/src/test/spirv_run/vector.ll index a53904e..b60aaec 100644 --- a/ptx/src/test/spirv_run/vector.ll +++ b/ptx/src/test/spirv_run/vector.ll @@ -1,95 +1,91 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define private <2 x i32> @"1"(<2 x i32> %"20") #0 { -"52": +define private <2 x i32> @"1"(<2 x i32> %"18") #0 { +"50": %"3" = alloca <2 x i32>, align 8, addrspace(5) %"2" = alloca <2 x i32>, align 8, addrspace(5) %"16" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"16", align 1 - %"17" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"17", align 1 %"4" = alloca <2 x i32>, align 8, addrspace(5) %"5" = alloca i32, align 4, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) - store <2 x i32> %"20", ptr addrspace(5) %"3", align 8 + store <2 x i32> %"18", ptr addrspace(5) %"3", align 8 %0 = getelementptr inbounds <2 x i32>, ptr addrspace(5) %"3", i32 0, i32 0 - %"22" = load i32, ptr addrspace(5) %0, align 4 + %"20" = load i32, ptr addrspace(5) %0, align 4 %1 = alloca i32, align 4, addrspace(5) - store i32 %"22", ptr addrspace(5) %1, align 4 - %"21" = load i32, ptr addrspace(5) %1, align 4 - store i32 %"21", ptr addrspace(5) %"5", align 4 + store i32 %"20", ptr addrspace(5) %1, align 4 + %"19" = load i32, ptr addrspace(5) %1, align 4 + store i32 %"19", ptr addrspace(5) %"5", align 4 %2 = getelementptr inbounds <2 x i32>, ptr addrspace(5) %"3", i32 0, i32 1 - %"24" = load i32, ptr addrspace(5) %2, align 4 + %"22" = load i32, ptr addrspace(5) %2, align 4 %3 = alloca i32, align 4, addrspace(5) - store i32 %"24", ptr addrspace(5) %3, align 4 - %"23" = load i32, ptr addrspace(5) %3, align 4 + store i32 %"22", ptr addrspace(5) %3, align 4 + %"21" = load i32, ptr addrspace(5) %3, align 4 + store i32 %"21", ptr addrspace(5) %"6", align 4 + %"24" = load i32, ptr addrspace(5) %"5", align 4 + %"25" = load i32, ptr addrspace(5) %"6", align 4 + %"23" = add i32 %"24", %"25" store i32 %"23", ptr addrspace(5) %"6", align 4 - %"26" = load i32, ptr addrspace(5) %"5", align 4 %"27" = load i32, ptr addrspace(5) %"6", align 4 - %"25" = add i32 %"26", %"27" - store i32 %"25", ptr addrspace(5) %"6", align 4 - %"29" = load i32, ptr addrspace(5) %"6", align 4 %4 = alloca i32, align 4, addrspace(5) - store i32 %"29", ptr addrspace(5) %4, align 4 - %"28" = load i32, ptr addrspace(5) %4, align 4 + store i32 %"27", ptr addrspace(5) %4, align 4 + %"26" = load i32, ptr addrspace(5) %4, align 4 %5 = getelementptr inbounds <2 x i32>, ptr addrspace(5) %"4", i32 0, i32 0 - store i32 %"28", ptr addrspace(5) %5, align 4 - %"31" = load i32, ptr addrspace(5) %"6", align 4 + store i32 %"26", ptr addrspace(5) %5, align 4 + %"29" = load i32, ptr addrspace(5) %"6", align 4 %6 = alloca i32, align 4, addrspace(5) - store i32 %"31", ptr addrspace(5) %6, align 4 - %"30" = load i32, ptr addrspace(5) %6, align 4 + store i32 %"29", ptr addrspace(5) %6, align 4 + %"28" = load i32, ptr addrspace(5) %6, align 4 %7 = getelementptr inbounds <2 x i32>, ptr addrspace(5) %"4", i32 0, i32 1 - store i32 %"30", ptr addrspace(5) %7, align 4 + store i32 %"28", ptr addrspace(5) %7, align 4 %8 = getelementptr inbounds <2 x i32>, ptr addrspace(5) %"4", i32 0, i32 1 - %"33" = load i32, ptr addrspace(5) %8, align 4 + %"31" = load i32, ptr addrspace(5) %8, align 4 %9 = alloca i32, align 4, addrspace(5) - store i32 %"33", ptr addrspace(5) %9, align 4 - %"32" = load i32, ptr addrspace(5) %9, align 4 + store i32 %"31", ptr addrspace(5) %9, align 4 + %"30" = load i32, ptr addrspace(5) %9, align 4 %10 = getelementptr inbounds <2 x i32>, ptr addrspace(5) %"4", i32 0, i32 0 - store i32 %"32", ptr addrspace(5) %10, align 4 - %"35" = load <2 x i32>, ptr addrspace(5) %"4", align 8 + store i32 %"30", ptr addrspace(5) %10, align 4 + %"33" = load <2 x i32>, ptr addrspace(5) %"4", align 8 %11 = alloca <2 x i32>, align 8, addrspace(5) - store <2 x i32> %"35", ptr addrspace(5) %11, align 8 - %"34" = load <2 x i32>, ptr addrspace(5) %11, align 8 - store <2 x i32> %"34", ptr addrspace(5) %"2", align 8 - %"36" = load <2 x i32>, ptr addrspace(5) %"2", align 8 - ret <2 x i32> %"36" + store <2 x i32> %"33", ptr addrspace(5) %11, align 8 + %"32" = load <2 x i32>, ptr addrspace(5) %11, align 8 + store <2 x i32> %"32", ptr addrspace(5) %"2", align 8 + %"34" = load <2 x i32>, ptr addrspace(5) %"2", align 8 + ret <2 x i32> %"34" } -define protected amdgpu_kernel void @vector(ptr addrspace(4) byref(i64) %"47", ptr addrspace(4) byref(i64) %"48") #0 { -"53": - %"18" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"18", align 1 - %"19" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"19", align 1 +define protected amdgpu_kernel void @vector(ptr addrspace(4) byref(i64) %"45", ptr addrspace(4) byref(i64) %"46") #0 { +"51": + %"17" = alloca i1, align 1, addrspace(5) + store i1 false, ptr addrspace(5) %"17", align 1 %"10" = alloca i64, align 8, addrspace(5) %"11" = alloca i64, align 8, addrspace(5) %"12" = alloca <2 x i32>, align 8, addrspace(5) %"13" = alloca i32, align 4, addrspace(5) %"14" = alloca i32, align 4, addrspace(5) %"15" = alloca i64, align 8, addrspace(5) - %"37" = load i64, ptr addrspace(4) %"47", align 8 - store i64 %"37", ptr addrspace(5) %"10", align 8 - %"38" = load i64, ptr addrspace(4) %"48", align 8 - store i64 %"38", ptr addrspace(5) %"11", align 8 - %"40" = load i64, ptr addrspace(5) %"10", align 8 - %"49" = inttoptr i64 %"40" to ptr - %"39" = load <2 x i32>, ptr %"49", align 8 + %"35" = load i64, ptr addrspace(4) %"45", align 8 + store i64 %"35", ptr addrspace(5) %"10", align 8 + %"36" = load i64, ptr addrspace(4) %"46", align 8 + store i64 %"36", ptr addrspace(5) %"11", align 8 + %"38" = load i64, ptr addrspace(5) %"10", align 8 + %"47" = inttoptr i64 %"38" to ptr + %"37" = load <2 x i32>, ptr %"47", align 8 + store <2 x i32> %"37", ptr addrspace(5) %"12", align 8 + %"40" = load <2 x i32>, ptr addrspace(5) %"12", align 8 + %"39" = call <2 x i32> @"1"(<2 x i32> %"40") store <2 x i32> %"39", ptr addrspace(5) %"12", align 8 %"42" = load <2 x i32>, ptr addrspace(5) %"12", align 8 - %"41" = call <2 x i32> @"1"(<2 x i32> %"42") - store <2 x i32> %"41", ptr addrspace(5) %"12", align 8 - %"44" = load <2 x i32>, ptr addrspace(5) %"12", align 8 - %"50" = bitcast <2 x i32> %"44" to i64 + %"48" = bitcast <2 x i32> %"42" to i64 %0 = alloca i64, align 8, addrspace(5) - store i64 %"50", ptr addrspace(5) %0, align 8 - %"43" = load i64, ptr addrspace(5) %0, align 8 - store i64 %"43", ptr addrspace(5) %"15", align 8 - %"45" = load i64, ptr addrspace(5) %"11", align 8 - %"46" = load <2 x i32>, ptr addrspace(5) %"12", align 8 - %"51" = inttoptr i64 %"45" to ptr - store <2 x i32> %"46", ptr %"51", align 8 + store i64 %"48", ptr addrspace(5) %0, align 8 + %"41" = load i64, ptr addrspace(5) %0, align 8 + store i64 %"41", ptr addrspace(5) %"15", align 8 + %"43" = load i64, ptr addrspace(5) %"11", align 8 + %"44" = load <2 x i32>, ptr addrspace(5) %"12", align 8 + %"49" = inttoptr i64 %"43" to ptr + store <2 x i32> %"44", ptr %"49", align 8 ret void } diff --git a/ptx/src/test/spirv_run/vector4.ll b/ptx/src/test/spirv_run/vector4.ll index 53187f7..494b1af 100644 --- a/ptx/src/test/spirv_run/vector4.ll +++ b/ptx/src/test/spirv_run/vector4.ll @@ -1,34 +1,32 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @vector4(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #0 { -"24": +define protected amdgpu_kernel void @vector4(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 { +"23": %"8" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"8", align 1 - %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca <4 x i32>, align 16, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) + %"9" = load i64, ptr addrspace(4) %"17", align 8 + store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"18", align 8 - store i64 %"10", ptr addrspace(5) %"4", align 8 - %"11" = load i64, ptr addrspace(4) %"19", align 8 - store i64 %"11", ptr addrspace(5) %"5", align 8 - %"13" = load i64, ptr addrspace(5) %"4", align 8 - %"20" = inttoptr i64 %"13" to ptr - %"12" = load <4 x i32>, ptr %"20", align 16 - store <4 x i32> %"12", ptr addrspace(5) %"6", align 16 + store i64 %"10", ptr addrspace(5) %"5", align 8 + %"12" = load i64, ptr addrspace(5) %"4", align 8 + %"19" = inttoptr i64 %"12" to ptr + %"11" = load <4 x i32>, ptr %"19", align 16 + store <4 x i32> %"11", ptr addrspace(5) %"6", align 16 %0 = getelementptr inbounds <4 x i32>, ptr addrspace(5) %"6", i32 0, i32 3 - %"15" = load i32, ptr addrspace(5) %0, align 4 + %"14" = load i32, ptr addrspace(5) %0, align 4 %1 = alloca i32, align 4, addrspace(5) - store i32 %"15", ptr addrspace(5) %1, align 4 - %"21" = load i32, ptr addrspace(5) %1, align 4 - store i32 %"21", ptr addrspace(5) %"7", align 4 - %"16" = load i64, ptr addrspace(5) %"5", align 8 - %"17" = load i32, ptr addrspace(5) %"7", align 4 - %"23" = inttoptr i64 %"16" to ptr - store i32 %"17", ptr %"23", align 4 + store i32 %"14", ptr addrspace(5) %1, align 4 + %"20" = load i32, ptr addrspace(5) %1, align 4 + store i32 %"20", ptr addrspace(5) %"7", align 4 + %"15" = load i64, ptr addrspace(5) %"5", align 8 + %"16" = load i32, ptr addrspace(5) %"7", align 4 + %"22" = inttoptr i64 %"15" to ptr + store i32 %"16", ptr %"22", align 4 ret void } diff --git a/ptx/src/test/spirv_run/vector_extract.ll b/ptx/src/test/spirv_run/vector_extract.ll index bceac42..d877dc7 100644 --- a/ptx/src/test/spirv_run/vector_extract.ll +++ b/ptx/src/test/spirv_run/vector_extract.ll @@ -1,12 +1,10 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @vector_extract(ptr addrspace(4) byref(i64) %"49", ptr addrspace(4) byref(i64) %"50") #0 { -"61": +define protected amdgpu_kernel void @vector_extract(ptr addrspace(4) byref(i64) %"48", ptr addrspace(4) byref(i64) %"49") #0 { +"60": %"17" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"17", align 1 - %"18" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"18", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i16, align 2, addrspace(5) @@ -14,83 +12,83 @@ define protected amdgpu_kernel void @vector_extract(ptr addrspace(4) byref(i64) %"8" = alloca i16, align 2, addrspace(5) %"9" = alloca i16, align 2, addrspace(5) %"10" = alloca <4 x i16>, align 8, addrspace(5) + %"18" = load i64, ptr addrspace(4) %"48", align 8 + store i64 %"18", ptr addrspace(5) %"4", align 8 %"19" = load i64, ptr addrspace(4) %"49", align 8 - store i64 %"19", ptr addrspace(5) %"4", align 8 - %"20" = load i64, ptr addrspace(4) %"50", align 8 - store i64 %"20", ptr addrspace(5) %"5", align 8 - %"21" = load i64, ptr addrspace(5) %"4", align 8 - %"51" = inttoptr i64 %"21" to ptr addrspace(1) - %"11" = load <4 x i8>, ptr addrspace(1) %"51", align 4 - %"52" = extractelement <4 x i8> %"11", i32 0 - %"53" = extractelement <4 x i8> %"11", i32 1 - %"54" = extractelement <4 x i8> %"11", i32 2 - %"55" = extractelement <4 x i8> %"11", i32 3 + store i64 %"19", ptr addrspace(5) %"5", align 8 + %"20" = load i64, ptr addrspace(5) %"4", align 8 + %"50" = inttoptr i64 %"20" to ptr addrspace(1) + %"11" = load <4 x i8>, ptr addrspace(1) %"50", align 4 + %"51" = extractelement <4 x i8> %"11", i32 0 + %"52" = extractelement <4 x i8> %"11", i32 1 + %"53" = extractelement <4 x i8> %"11", i32 2 + %"54" = extractelement <4 x i8> %"11", i32 3 + %"21" = zext i8 %"51" to i16 %"22" = zext i8 %"52" to i16 %"23" = zext i8 %"53" to i16 %"24" = zext i8 %"54" to i16 - %"25" = zext i8 %"55" to i16 - store i16 %"22", ptr addrspace(5) %"6", align 2 - store i16 %"23", ptr addrspace(5) %"7", align 2 - store i16 %"24", ptr addrspace(5) %"8", align 2 - store i16 %"25", ptr addrspace(5) %"9", align 2 - %"26" = load i16, ptr addrspace(5) %"7", align 2 - %"27" = load i16, ptr addrspace(5) %"8", align 2 - %"28" = load i16, ptr addrspace(5) %"9", align 2 - %"29" = load i16, ptr addrspace(5) %"6", align 2 - %0 = insertelement <4 x i16> undef, i16 %"26", i32 0 - %1 = insertelement <4 x i16> %0, i16 %"27", i32 1 - %2 = insertelement <4 x i16> %1, i16 %"28", i32 2 - %"12" = insertelement <4 x i16> %2, i16 %"29", i32 3 + store i16 %"21", ptr addrspace(5) %"6", align 2 + store i16 %"22", ptr addrspace(5) %"7", align 2 + store i16 %"23", ptr addrspace(5) %"8", align 2 + store i16 %"24", ptr addrspace(5) %"9", align 2 + %"25" = load i16, ptr addrspace(5) %"7", align 2 + %"26" = load i16, ptr addrspace(5) %"8", align 2 + %"27" = load i16, ptr addrspace(5) %"9", align 2 + %"28" = load i16, ptr addrspace(5) %"6", align 2 + %0 = insertelement <4 x i16> undef, i16 %"25", i32 0 + %1 = insertelement <4 x i16> %0, i16 %"26", i32 1 + %2 = insertelement <4 x i16> %1, i16 %"27", i32 2 + %"12" = insertelement <4 x i16> %2, i16 %"28", i32 3 %3 = alloca <4 x i16>, align 8, addrspace(5) store <4 x i16> %"12", ptr addrspace(5) %3, align 8 - %"30" = load <4 x i16>, ptr addrspace(5) %3, align 8 - store <4 x i16> %"30", ptr addrspace(5) %"10", align 8 - %"31" = load <4 x i16>, ptr addrspace(5) %"10", align 8 + %"29" = load <4 x i16>, ptr addrspace(5) %3, align 8 + store <4 x i16> %"29", ptr addrspace(5) %"10", align 8 + %"30" = load <4 x i16>, ptr addrspace(5) %"10", align 8 %4 = alloca <4 x i16>, align 8, addrspace(5) - store <4 x i16> %"31", ptr addrspace(5) %4, align 8 + store <4 x i16> %"30", ptr addrspace(5) %4, align 8 %"13" = load <4 x i16>, ptr addrspace(5) %4, align 8 - %"32" = extractelement <4 x i16> %"13", i32 0 - %"33" = extractelement <4 x i16> %"13", i32 1 - %"34" = extractelement <4 x i16> %"13", i32 2 - %"35" = extractelement <4 x i16> %"13", i32 3 - store i16 %"32", ptr addrspace(5) %"8", align 2 - store i16 %"33", ptr addrspace(5) %"9", align 2 - store i16 %"34", ptr addrspace(5) %"6", align 2 - store i16 %"35", ptr addrspace(5) %"7", align 2 - %"36" = load i16, ptr addrspace(5) %"8", align 2 - %"37" = load i16, ptr addrspace(5) %"9", align 2 - %"38" = load i16, ptr addrspace(5) %"6", align 2 - %"39" = load i16, ptr addrspace(5) %"7", align 2 - %5 = insertelement <4 x i16> undef, i16 %"36", i32 0 - %6 = insertelement <4 x i16> %5, i16 %"37", i32 1 - %7 = insertelement <4 x i16> %6, i16 %"38", i32 2 - %"15" = insertelement <4 x i16> %7, i16 %"39", i32 3 + %"31" = extractelement <4 x i16> %"13", i32 0 + %"32" = extractelement <4 x i16> %"13", i32 1 + %"33" = extractelement <4 x i16> %"13", i32 2 + %"34" = extractelement <4 x i16> %"13", i32 3 + store i16 %"31", ptr addrspace(5) %"8", align 2 + store i16 %"32", ptr addrspace(5) %"9", align 2 + store i16 %"33", ptr addrspace(5) %"6", align 2 + store i16 %"34", ptr addrspace(5) %"7", align 2 + %"35" = load i16, ptr addrspace(5) %"8", align 2 + %"36" = load i16, ptr addrspace(5) %"9", align 2 + %"37" = load i16, ptr addrspace(5) %"6", align 2 + %"38" = load i16, ptr addrspace(5) %"7", align 2 + %5 = insertelement <4 x i16> undef, i16 %"35", i32 0 + %6 = insertelement <4 x i16> %5, i16 %"36", i32 1 + %7 = insertelement <4 x i16> %6, i16 %"37", i32 2 + %"15" = insertelement <4 x i16> %7, i16 %"38", i32 3 %8 = alloca <4 x i16>, align 8, addrspace(5) store <4 x i16> %"15", ptr addrspace(5) %8, align 8 %"14" = load <4 x i16>, ptr addrspace(5) %8, align 8 - %"40" = extractelement <4 x i16> %"14", i32 0 - %"41" = extractelement <4 x i16> %"14", i32 1 - %"42" = extractelement <4 x i16> %"14", i32 2 - %"43" = extractelement <4 x i16> %"14", i32 3 - store i16 %"40", ptr addrspace(5) %"9", align 2 - store i16 %"41", ptr addrspace(5) %"6", align 2 - store i16 %"42", ptr addrspace(5) %"7", align 2 - store i16 %"43", ptr addrspace(5) %"8", align 2 - %"44" = load i16, ptr addrspace(5) %"6", align 2 - %"45" = load i16, ptr addrspace(5) %"7", align 2 - %"46" = load i16, ptr addrspace(5) %"8", align 2 - %"47" = load i16, ptr addrspace(5) %"9", align 2 + %"39" = extractelement <4 x i16> %"14", i32 0 + %"40" = extractelement <4 x i16> %"14", i32 1 + %"41" = extractelement <4 x i16> %"14", i32 2 + %"42" = extractelement <4 x i16> %"14", i32 3 + store i16 %"39", ptr addrspace(5) %"9", align 2 + store i16 %"40", ptr addrspace(5) %"6", align 2 + store i16 %"41", ptr addrspace(5) %"7", align 2 + store i16 %"42", ptr addrspace(5) %"8", align 2 + %"43" = load i16, ptr addrspace(5) %"6", align 2 + %"44" = load i16, ptr addrspace(5) %"7", align 2 + %"45" = load i16, ptr addrspace(5) %"8", align 2 + %"46" = load i16, ptr addrspace(5) %"9", align 2 + %"55" = trunc i16 %"43" to i8 %"56" = trunc i16 %"44" to i8 %"57" = trunc i16 %"45" to i8 %"58" = trunc i16 %"46" to i8 - %"59" = trunc i16 %"47" to i8 - %9 = insertelement <4 x i8> undef, i8 %"56", i32 0 - %10 = insertelement <4 x i8> %9, i8 %"57", i32 1 - %11 = insertelement <4 x i8> %10, i8 %"58", i32 2 - %"16" = insertelement <4 x i8> %11, i8 %"59", i32 3 - %"48" = load i64, ptr addrspace(5) %"5", align 8 - %"60" = inttoptr i64 %"48" to ptr addrspace(1) - store <4 x i8> %"16", ptr addrspace(1) %"60", align 4 + %9 = insertelement <4 x i8> undef, i8 %"55", i32 0 + %10 = insertelement <4 x i8> %9, i8 %"56", i32 1 + %11 = insertelement <4 x i8> %10, i8 %"57", i32 2 + %"16" = insertelement <4 x i8> %11, i8 %"58", i32 3 + %"47" = load i64, ptr addrspace(5) %"5", align 8 + %"59" = inttoptr i64 %"47" to ptr addrspace(1) + store <4 x i8> %"16", ptr addrspace(1) %"59", align 4 ret void } diff --git a/ptx/src/test/spirv_run/vote_ballot.ll b/ptx/src/test/spirv_run/vote_ballot.ll index 200eccc..fd31f1a 100644 --- a/ptx/src/test/spirv_run/vote_ballot.ll +++ b/ptx/src/test/spirv_run/vote_ballot.ll @@ -3,48 +3,46 @@ target triple = "amdgcn-amd-amdhsa" declare i32 @__zluda_ptx_impl__vote_sync_ballot_b32_32(i1, i32) #0 -define protected amdgpu_kernel void @vote_ballot(ptr addrspace(4) byref(i64) %"41", ptr addrspace(4) byref(i64) %"42") #1 { -"51": +define protected amdgpu_kernel void @vote_ballot(ptr addrspace(4) byref(i64) %"40", ptr addrspace(4) byref(i64) %"41") #1 { +"50": %"10" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"10", align 1 - %"11" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"11", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) %"8" = alloca i32, align 4, addrspace(5) %"9" = alloca i32, align 4, addrspace(5) - %"12" = load i64, ptr addrspace(4) %"42", align 8 - store i64 %"12", ptr addrspace(5) %"5", align 8 - %"43" = call i32 @__zluda_ptx_impl__vote_sync_ballot_b32_32(i1 true, i32 1) - store i32 %"43", ptr addrspace(5) %"6", align 4 - %"44" = call i32 @__zluda_ptx_impl__vote_sync_ballot_b32_32(i1 false, i32 16777215) - store i32 %"44", ptr addrspace(5) %"7", align 4 - %"45" = call i32 @__zluda_ptx_impl__vote_sync_ballot_b32_32(i1 true, i32 2) - store i32 %"45", ptr addrspace(5) %"8", align 4 - %"46" = call i32 @__zluda_ptx_impl__vote_sync_ballot_b32_32(i1 true, i32 3) - store i32 %"46", ptr addrspace(5) %"9", align 4 - %"17" = load i64, ptr addrspace(5) %"5", align 8 - %"18" = load i32, ptr addrspace(5) %"6", align 4 - %"47" = inttoptr i64 %"17" to ptr - %"57" = getelementptr inbounds i8, ptr %"47", i64 0 - store i32 %"18", ptr %"57", align 4 - %"19" = load i64, ptr addrspace(5) %"5", align 8 - %"20" = load i32, ptr addrspace(5) %"7", align 4 - %"48" = inttoptr i64 %"19" to ptr - %"59" = getelementptr inbounds i8, ptr %"48", i64 4 - store i32 %"20", ptr %"59", align 4 - %"21" = load i64, ptr addrspace(5) %"5", align 8 - %"22" = load i32, ptr addrspace(5) %"8", align 4 - %"49" = inttoptr i64 %"21" to ptr - %"61" = getelementptr inbounds i8, ptr %"49", i64 8 - store i32 %"22", ptr %"61", align 4 - %"23" = load i64, ptr addrspace(5) %"5", align 8 - %"24" = load i32, ptr addrspace(5) %"9", align 4 - %"50" = inttoptr i64 %"23" to ptr - %"63" = getelementptr inbounds i8, ptr %"50", i64 12 - store i32 %"24", ptr %"63", align 4 + %"11" = load i64, ptr addrspace(4) %"41", align 8 + store i64 %"11", ptr addrspace(5) %"5", align 8 + %"42" = call i32 @__zluda_ptx_impl__vote_sync_ballot_b32_32(i1 true, i32 1) + store i32 %"42", ptr addrspace(5) %"6", align 4 + %"43" = call i32 @__zluda_ptx_impl__vote_sync_ballot_b32_32(i1 false, i32 16777215) + store i32 %"43", ptr addrspace(5) %"7", align 4 + %"44" = call i32 @__zluda_ptx_impl__vote_sync_ballot_b32_32(i1 true, i32 2) + store i32 %"44", ptr addrspace(5) %"8", align 4 + %"45" = call i32 @__zluda_ptx_impl__vote_sync_ballot_b32_32(i1 true, i32 3) + store i32 %"45", ptr addrspace(5) %"9", align 4 + %"16" = load i64, ptr addrspace(5) %"5", align 8 + %"17" = load i32, ptr addrspace(5) %"6", align 4 + %"46" = inttoptr i64 %"16" to ptr + %"56" = getelementptr inbounds i8, ptr %"46", i64 0 + store i32 %"17", ptr %"56", align 4 + %"18" = load i64, ptr addrspace(5) %"5", align 8 + %"19" = load i32, ptr addrspace(5) %"7", align 4 + %"47" = inttoptr i64 %"18" to ptr + %"58" = getelementptr inbounds i8, ptr %"47", i64 4 + store i32 %"19", ptr %"58", align 4 + %"20" = load i64, ptr addrspace(5) %"5", align 8 + %"21" = load i32, ptr addrspace(5) %"8", align 4 + %"48" = inttoptr i64 %"20" to ptr + %"60" = getelementptr inbounds i8, ptr %"48", i64 8 + store i32 %"21", ptr %"60", align 4 + %"22" = load i64, ptr addrspace(5) %"5", align 8 + %"23" = load i32, ptr addrspace(5) %"9", align 4 + %"49" = inttoptr i64 %"22" to ptr + %"62" = getelementptr inbounds i8, ptr %"49", i64 12 + store i32 %"23", ptr %"62", align 4 ret void } diff --git a/ptx/src/test/spirv_run/vshr.ll b/ptx/src/test/spirv_run/vshr.ll index e3b6b5e..4433bf2 100644 --- a/ptx/src/test/spirv_run/vshr.ll +++ b/ptx/src/test/spirv_run/vshr.ll @@ -1,48 +1,46 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @vshr(ptr addrspace(4) byref(i64) %"30", ptr addrspace(4) byref(i64) %"31") #0 { -"39": +define protected amdgpu_kernel void @vshr(ptr addrspace(4) byref(i64) %"29", ptr addrspace(4) byref(i64) %"30") #0 { +"38": %"10" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"10", align 1 - %"11" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"11", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) %"8" = alloca i32, align 4, addrspace(5) %"9" = alloca i32, align 4, addrspace(5) + %"11" = load i64, ptr addrspace(4) %"29", align 8 + store i64 %"11", ptr addrspace(5) %"4", align 8 %"12" = load i64, ptr addrspace(4) %"30", align 8 - store i64 %"12", ptr addrspace(5) %"4", align 8 - %"13" = load i64, ptr addrspace(4) %"31", align 8 - store i64 %"13", ptr addrspace(5) %"5", align 8 - %"15" = load i64, ptr addrspace(5) %"4", align 8 - %"33" = inttoptr i64 %"15" to ptr - %"32" = load i32, ptr %"33", align 4 - store i32 %"32", ptr addrspace(5) %"7", align 4 - %"17" = load i64, ptr addrspace(5) %"4", align 8 - %"34" = inttoptr i64 %"17" to ptr - %"41" = getelementptr inbounds i8, ptr %"34", i64 4 - %"35" = load i32, ptr %"41", align 4 - store i32 %"35", ptr addrspace(5) %"8", align 4 - %"19" = load i64, ptr addrspace(5) %"4", align 8 - %"36" = inttoptr i64 %"19" to ptr - %"43" = getelementptr inbounds i8, ptr %"36", i64 8 - %"37" = load i32, ptr %"43", align 4 - store i32 %"37", ptr addrspace(5) %"9", align 4 - %"21" = load i32, ptr addrspace(5) %"7", align 4 - %"22" = load i32, ptr addrspace(5) %"8", align 4 - %"23" = load i32, ptr addrspace(5) %"9", align 4 - %0 = icmp ugt i32 %"22", 31 - %1 = lshr i32 %"21", %"22" + store i64 %"12", ptr addrspace(5) %"5", align 8 + %"14" = load i64, ptr addrspace(5) %"4", align 8 + %"32" = inttoptr i64 %"14" to ptr + %"31" = load i32, ptr %"32", align 4 + store i32 %"31", ptr addrspace(5) %"7", align 4 + %"16" = load i64, ptr addrspace(5) %"4", align 8 + %"33" = inttoptr i64 %"16" to ptr + %"40" = getelementptr inbounds i8, ptr %"33", i64 4 + %"34" = load i32, ptr %"40", align 4 + store i32 %"34", ptr addrspace(5) %"8", align 4 + %"18" = load i64, ptr addrspace(5) %"4", align 8 + %"35" = inttoptr i64 %"18" to ptr + %"42" = getelementptr inbounds i8, ptr %"35", i64 8 + %"36" = load i32, ptr %"42", align 4 + store i32 %"36", ptr addrspace(5) %"9", align 4 + %"20" = load i32, ptr addrspace(5) %"7", align 4 + %"21" = load i32, ptr addrspace(5) %"8", align 4 + %"22" = load i32, ptr addrspace(5) %"9", align 4 + %0 = icmp ugt i32 %"21", 31 + %1 = lshr i32 %"20", %"21" %2 = select i1 %0, i32 0, i32 %1 - %"20" = add i32 %2, %"23" - store i32 %"20", ptr addrspace(5) %"6", align 4 - %"24" = load i64, ptr addrspace(5) %"5", align 8 - %"25" = load i32, ptr addrspace(5) %"6", align 4 - %"38" = inttoptr i64 %"24" to ptr - store i32 %"25", ptr %"38", align 4 + %"19" = add i32 %2, %"22" + store i32 %"19", ptr addrspace(5) %"6", align 4 + %"23" = load i64, ptr addrspace(5) %"5", align 8 + %"24" = load i32, ptr addrspace(5) %"6", align 4 + %"37" = inttoptr i64 %"23" to ptr + store i32 %"24", ptr %"37", align 4 ret void } diff --git a/ptx/src/test/spirv_run/xor.ll b/ptx/src/test/spirv_run/xor.ll index 7181bd1..96b2914 100644 --- a/ptx/src/test/spirv_run/xor.ll +++ b/ptx/src/test/spirv_run/xor.ll @@ -1,37 +1,35 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -define protected amdgpu_kernel void @xor(ptr addrspace(4) byref(i64) %"23", ptr addrspace(4) byref(i64) %"24") #0 { -"28": +define protected amdgpu_kernel void @xor(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #0 { +"27": %"8" = alloca i1, align 1, addrspace(5) store i1 false, ptr addrspace(5) %"8", align 1 - %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) + %"9" = load i64, ptr addrspace(4) %"22", align 8 + store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"23", align 8 - store i64 %"10", ptr addrspace(5) %"4", align 8 - %"11" = load i64, ptr addrspace(4) %"24", align 8 - store i64 %"11", ptr addrspace(5) %"5", align 8 - %"13" = load i64, ptr addrspace(5) %"4", align 8 - %"25" = inttoptr i64 %"13" to ptr - %"12" = load i32, ptr %"25", align 4 - store i32 %"12", ptr addrspace(5) %"6", align 4 - %"15" = load i64, ptr addrspace(5) %"4", align 8 - %"26" = inttoptr i64 %"15" to ptr - %"30" = getelementptr inbounds i8, ptr %"26", i64 4 - %"14" = load i32, ptr %"30", align 4 - store i32 %"14", ptr addrspace(5) %"7", align 4 - %"17" = load i32, ptr addrspace(5) %"6", align 4 - %"18" = load i32, ptr addrspace(5) %"7", align 4 - %"16" = xor i32 %"17", %"18" - store i32 %"16", ptr addrspace(5) %"6", align 4 - %"19" = load i64, ptr addrspace(5) %"5", align 8 - %"20" = load i32, ptr addrspace(5) %"6", align 4 - %"27" = inttoptr i64 %"19" to ptr - store i32 %"20", ptr %"27", align 4 + store i64 %"10", ptr addrspace(5) %"5", align 8 + %"12" = load i64, ptr addrspace(5) %"4", align 8 + %"24" = inttoptr i64 %"12" to ptr + %"11" = load i32, ptr %"24", align 4 + store i32 %"11", ptr addrspace(5) %"6", align 4 + %"14" = load i64, ptr addrspace(5) %"4", align 8 + %"25" = inttoptr i64 %"14" to ptr + %"29" = getelementptr inbounds i8, ptr %"25", i64 4 + %"13" = load i32, ptr %"29", align 4 + store i32 %"13", ptr addrspace(5) %"7", align 4 + %"16" = load i32, ptr addrspace(5) %"6", align 4 + %"17" = load i32, ptr addrspace(5) %"7", align 4 + %"15" = xor i32 %"16", %"17" + store i32 %"15", ptr addrspace(5) %"6", align 4 + %"18" = load i64, ptr addrspace(5) %"5", align 8 + %"19" = load i32, ptr addrspace(5) %"6", align 4 + %"26" = inttoptr i64 %"18" to ptr + store i32 %"19", ptr %"26", align 4 ret void } diff --git a/ptx/src/translate.rs b/ptx/src/translate.rs index 1a203bd..3b75ec9 100644 --- a/ptx/src/translate.rs +++ b/ptx/src/translate.rs @@ -1963,30 +1963,26 @@ fn insert_hardware_registers<'input>( }
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#extended-precision-integer-arithmetic-instructions
-// NVIDIA documentation is misleading. In fact there is no single CC.CF,
-// but separate registers for overflow (`add` and `mad`) and underflow (`sub`)
-// For reference check the .ptx tests
+// NVIDIA documentation is slightly misleading when it comes to subc and sub.cc.
+// They both invert the CC flag. Meaning that for sub:
+// * sub.cc x, 0,1 will set CC to 0
+// * sub.cc x, 0,0 will set CC to 1
+// and for subc:
+// * if CC is 1 then subc will compute d = a - b
+// * if CC is 0 then subc will compute d = a - b - 1
fn insert_hardware_registers_impl<'input>(
id_defs: &mut IdNameMapBuilder<'input>,
typed_statements: Vec<TypedStatement>,
) -> Result<Vec<TypedStatement>, TranslateError> {
let mut result = Vec::with_capacity(typed_statements.len());
- let overflow_flag_var = id_defs.register_variable_def(
+ let carry_flag_variable = id_defs.register_variable_def(
None,
ast::Type::Scalar(ast::ScalarType::Pred),
ast::StateSpace::Reg,
Some(ast::Initializer::Constant(ast::ImmediateValue::U64(0))),
);
- let underflow_flag_var = id_defs.register_variable_def(
- None,
- ast::Type::Scalar(ast::ScalarType::Pred),
- ast::StateSpace::Reg,
- Some(ast::Initializer::Constant(ast::ImmediateValue::U64(0))),
- );
- let overflow_flag = overflow_flag_var.name;
- let underflow_flag = underflow_flag_var.name;
- result.push(Statement::Variable(overflow_flag_var));
- result.push(Statement::Variable(underflow_flag_var));
+ let carry_flag = carry_flag_variable.name;
+ result.push(Statement::Variable(carry_flag_variable));
for statement in typed_statements {
match statement {
Statement::Instruction(ast::Instruction::MadC {
@@ -1997,38 +1993,88 @@ fn insert_hardware_registers_impl<'input>( }) => result.push(Statement::MadC(MadCDetails {
type_,
is_hi,
- arg: Arg4CarryIn::new(arg, carry_out, TypedOperand::Reg(overflow_flag)),
+ arg: Arg4CarryIn::new(arg, carry_out, TypedOperand::Reg(carry_flag)),
})),
Statement::Instruction(ast::Instruction::MadCC { type_, is_hi, arg }) => {
result.push(Statement::MadCC(MadCCDetails {
type_,
is_hi,
- arg: Arg4CarryOut::new(arg, TypedOperand::Reg(overflow_flag)),
+ arg: Arg4CarryOut::new(arg, TypedOperand::Reg(carry_flag)),
}))
}
Statement::Instruction(ast::Instruction::AddC(details, args)) => {
result.push(Statement::AddC(
details.type_,
- Arg3CarryIn::new(args, details.carry_out, TypedOperand::Reg(overflow_flag)),
+ Arg3CarryIn::new(args, details.carry_out, TypedOperand::Reg(carry_flag)),
))
}
Statement::Instruction(ast::Instruction::AddCC(details, args)) => {
result.push(Statement::AddCC(
details,
- Arg3CarryOut::new(args, TypedOperand::Reg(overflow_flag)),
+ Arg3CarryOut::new(args, TypedOperand::Reg(carry_flag)),
))
}
Statement::Instruction(ast::Instruction::SubC(details, args)) => {
+ let inverted_carry_in = id_defs.register_intermediate(Some((
+ ast::Type::Scalar(ast::ScalarType::Pred),
+ ast::StateSpace::Reg,
+ )));
+ result.push(Statement::Instruction(ast::Instruction::Not(
+ ast::ScalarType::Pred,
+ ast::Arg2 {
+ dst: TypedOperand::Reg(inverted_carry_in),
+ src: TypedOperand::Reg(carry_flag),
+ },
+ )));
+ let (carry_out_id, carry_out_postprocess) = if details.carry_out {
+ let inverted_carry_out = id_defs.register_intermediate(Some((
+ ast::Type::Scalar(ast::ScalarType::Pred),
+ ast::StateSpace::Reg,
+ )));
+ let invert_statement = Statement::Instruction(ast::Instruction::Not(
+ ast::ScalarType::Pred,
+ ast::Arg2 {
+ dst: TypedOperand::Reg(carry_flag),
+ src: TypedOperand::Reg(inverted_carry_out),
+ },
+ ));
+ (
+ Some(TypedOperand::Reg(inverted_carry_out)),
+ Some(invert_statement),
+ )
+ } else {
+ (None, None)
+ };
result.push(Statement::SubC(
details.type_,
- Arg3CarryIn::new(args, details.carry_out, TypedOperand::Reg(underflow_flag)),
- ))
+ Arg3CarryIn {
+ dst: args.dst,
+ carry_out: carry_out_id,
+ carry_in: TypedOperand::Reg(inverted_carry_in),
+ src1: args.src1,
+ src2: args.src2,
+ },
+ ));
+ if let Some(carry_out_postprocess) = carry_out_postprocess {
+ result.push(carry_out_postprocess);
+ }
}
- Statement::Instruction(ast::Instruction::SubCC(details, args)) => {
+ Statement::Instruction(ast::Instruction::SubCC(type_, args)) => {
+ let temp = id_defs.register_intermediate(Some((
+ ast::Type::Scalar(ast::ScalarType::Pred),
+ ast::StateSpace::Reg,
+ )));
result.push(Statement::SubCC(
- details,
- Arg3CarryOut::new(args, TypedOperand::Reg(underflow_flag)),
- ))
+ type_,
+ Arg3CarryOut::new(args, TypedOperand::Reg(temp)),
+ ));
+ result.push(Statement::Instruction(ast::Instruction::Not(
+ ast::ScalarType::Pred,
+ ast::Arg2 {
+ dst: TypedOperand::Reg(carry_flag),
+ src: TypedOperand::Reg(temp),
+ },
+ )));
}
s => result.push(s),
}
|