aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorAndrzej Janik <[email protected]>2024-04-05 23:23:01 +0000
committerAndrzej Janik <[email protected]>2024-04-05 23:23:01 +0000
commitd6017e450952966ed2b5754c76a851bd7ac0fdfe (patch)
tree6c257a53205ca27669bf4fb24817f6cff886e25b
parentfca92e31e44a94931242295536d8f207ad157dd6 (diff)
downloadZLUDA-d6017e450952966ed2b5754c76a851bd7ac0fdfe.tar.gz
ZLUDA-d6017e450952966ed2b5754c76a851bd7ac0fdfe.zip
Tweak LLVM IR to actually get v_sad_u16/u32sad_inst
-rw-r--r--ptx/src/emit.rs6
-rw-r--r--ptx/src/test/spirv_run/sad.ll12
2 files changed, 9 insertions, 9 deletions
diff --git a/ptx/src/emit.rs b/ptx/src/emit.rs
index 3f46680..9e62d5b 100644
--- a/ptx/src/emit.rs
+++ b/ptx/src/emit.rs
@@ -1174,7 +1174,7 @@ fn emit_inst_sad(
&SetpData {
typ: type_,
flush_to_zero: None,
- cmp_op: ast::SetpCompareOp::Less,
+ cmp_op: ast::SetpCompareOp::Greater,
},
None,
arg.src1,
@@ -1182,9 +1182,9 @@ fn emit_inst_sad(
)?;
let a = ctx.names.value(arg.src1)?;
let b = ctx.names.value(arg.src2)?;
- let b_minus_a = unsafe { LLVMBuildSub(builder, b, a, LLVM_UNNAMED) };
let a_minus_b = unsafe { LLVMBuildSub(builder, a, b, LLVM_UNNAMED) };
- let a_or_b = unsafe { LLVMBuildSelect(builder, less_than, b_minus_a, a_minus_b, LLVM_UNNAMED) };
+ let b_minus_a = unsafe { LLVMBuildSub(builder, b, a, LLVM_UNNAMED) };
+ let a_or_b = unsafe { LLVMBuildSelect(builder, less_than, a_minus_b, b_minus_a, LLVM_UNNAMED) };
let src3 = ctx.names.value(arg.src3)?;
ctx.names.register_result(arg.dst, |dst_name| unsafe {
LLVMBuildAdd(builder, src3, a_or_b, dst_name)
diff --git a/ptx/src/test/spirv_run/sad.ll b/ptx/src/test/spirv_run/sad.ll
index 5467b5e..c7a5726 100644
--- a/ptx/src/test/spirv_run/sad.ll
+++ b/ptx/src/test/spirv_run/sad.ll
@@ -33,18 +33,18 @@ define protected amdgpu_kernel void @sad(ptr addrspace(4) byref(i64) %"38", ptr
%"21" = load i32, ptr addrspace(5) %"6", align 4
%"22" = load i32, ptr addrspace(5) %"7", align 4
%"23" = load i32, ptr addrspace(5) %"8", align 4
- %0 = icmp ult i32 %"21", %"22"
- %1 = sub i32 %"22", %"21"
- %2 = sub i32 %"21", %"22"
+ %0 = icmp ugt i32 %"21", %"22"
+ %1 = sub i32 %"21", %"22"
+ %2 = sub i32 %"22", %"21"
%3 = select i1 %0, i32 %1, i32 %2
%"46" = add i32 %"23", %3
store i32 %"46", ptr addrspace(5) %"9", align 4
%"25" = load i32, ptr addrspace(5) %"6", align 4
%"26" = load i32, ptr addrspace(5) %"7", align 4
%"27" = load i32, ptr addrspace(5) %"8", align 4
- %4 = icmp slt i32 %"25", %"26"
- %5 = sub i32 %"26", %"25"
- %6 = sub i32 %"25", %"26"
+ %4 = icmp sgt i32 %"25", %"26"
+ %5 = sub i32 %"25", %"26"
+ %6 = sub i32 %"26", %"25"
%7 = select i1 %4, i32 %5, i32 %6
%"50" = add i32 %"27", %7
store i32 %"50", ptr addrspace(5) %"10", align 4