diff options
author | Andrzej Janik <[email protected]> | 2024-04-05 23:23:01 +0000 |
---|---|---|
committer | Andrzej Janik <[email protected]> | 2024-04-05 23:23:01 +0000 |
commit | d6017e450952966ed2b5754c76a851bd7ac0fdfe (patch) | |
tree | 6c257a53205ca27669bf4fb24817f6cff886e25b | |
parent | fca92e31e44a94931242295536d8f207ad157dd6 (diff) | |
download | ZLUDA-sad_inst.tar.gz ZLUDA-sad_inst.zip |
Tweak LLVM IR to actually get v_sad_u16/u32sad_inst
-rw-r--r-- | ptx/src/emit.rs | 6 | ||||
-rw-r--r-- | ptx/src/test/spirv_run/sad.ll | 12 |
2 files changed, 9 insertions, 9 deletions
diff --git a/ptx/src/emit.rs b/ptx/src/emit.rs index 3f46680..9e62d5b 100644 --- a/ptx/src/emit.rs +++ b/ptx/src/emit.rs @@ -1174,7 +1174,7 @@ fn emit_inst_sad( &SetpData { typ: type_, flush_to_zero: None, - cmp_op: ast::SetpCompareOp::Less, + cmp_op: ast::SetpCompareOp::Greater, }, None, arg.src1, @@ -1182,9 +1182,9 @@ fn emit_inst_sad( )?; let a = ctx.names.value(arg.src1)?; let b = ctx.names.value(arg.src2)?; - let b_minus_a = unsafe { LLVMBuildSub(builder, b, a, LLVM_UNNAMED) }; let a_minus_b = unsafe { LLVMBuildSub(builder, a, b, LLVM_UNNAMED) }; - let a_or_b = unsafe { LLVMBuildSelect(builder, less_than, b_minus_a, a_minus_b, LLVM_UNNAMED) }; + let b_minus_a = unsafe { LLVMBuildSub(builder, b, a, LLVM_UNNAMED) }; + let a_or_b = unsafe { LLVMBuildSelect(builder, less_than, a_minus_b, b_minus_a, LLVM_UNNAMED) }; let src3 = ctx.names.value(arg.src3)?; ctx.names.register_result(arg.dst, |dst_name| unsafe { LLVMBuildAdd(builder, src3, a_or_b, dst_name) diff --git a/ptx/src/test/spirv_run/sad.ll b/ptx/src/test/spirv_run/sad.ll index 5467b5e..c7a5726 100644 --- a/ptx/src/test/spirv_run/sad.ll +++ b/ptx/src/test/spirv_run/sad.ll @@ -33,18 +33,18 @@ define protected amdgpu_kernel void @sad(ptr addrspace(4) byref(i64) %"38", ptr %"21" = load i32, ptr addrspace(5) %"6", align 4 %"22" = load i32, ptr addrspace(5) %"7", align 4 %"23" = load i32, ptr addrspace(5) %"8", align 4 - %0 = icmp ult i32 %"21", %"22" - %1 = sub i32 %"22", %"21" - %2 = sub i32 %"21", %"22" + %0 = icmp ugt i32 %"21", %"22" + %1 = sub i32 %"21", %"22" + %2 = sub i32 %"22", %"21" %3 = select i1 %0, i32 %1, i32 %2 %"46" = add i32 %"23", %3 store i32 %"46", ptr addrspace(5) %"9", align 4 %"25" = load i32, ptr addrspace(5) %"6", align 4 %"26" = load i32, ptr addrspace(5) %"7", align 4 %"27" = load i32, ptr addrspace(5) %"8", align 4 - %4 = icmp slt i32 %"25", %"26" - %5 = sub i32 %"26", %"25" - %6 = sub i32 %"25", %"26" + %4 = icmp sgt i32 %"25", %"26" + %5 = sub i32 %"25", %"26" + %6 = sub i32 %"26", %"25" %7 = select i1 %4, i32 %5, i32 %6 %"50" = add i32 %"27", %7 store i32 %"50", ptr addrspace(5) %"10", align 4 |