aboutsummaryrefslogtreecommitdiffhomepage
path: root/ptx
diff options
context:
space:
mode:
authorAndrzej Janik <[email protected]>2021-01-26 21:05:09 +0100
committerGitHub <[email protected]>2021-01-26 21:05:09 +0100
commit972f612562dc534ad605bfc5a00bc908ddd8b3de (patch)
tree7e4b764357965ab52892dbd79299afa308265710 /ptx
parent3e2e73ac33273fc23a6183b1e5bc0b2f754fa4fb (diff)
downloadZLUDA-972f612562dc534ad605bfc5a00bc908ddd8b3de.tar.gz
ZLUDA-972f612562dc534ad605bfc5a00bc908ddd8b3de.zip
Fix signed integer conversion (#36)
This fixes the last remaining bug preventing end-to-end GeekBench run, so also update Geekbench results in README
Diffstat (limited to 'ptx')
-rw-r--r--ptx/src/test/spirv_run/cvt_rzi.ptx25
-rw-r--r--ptx/src/test/spirv_run/cvt_rzi.spvtxt63
-rw-r--r--ptx/src/test/spirv_run/cvt_s32_f32.ptx25
-rw-r--r--ptx/src/test/spirv_run/cvt_s32_f32.spvtxt75
-rw-r--r--ptx/src/test/spirv_run/cvt_s64_s32.ptx22
-rw-r--r--ptx/src/test/spirv_run/cvt_s64_s32.spvtxt53
-rw-r--r--ptx/src/test/spirv_run/mod.rs36
-rw-r--r--ptx/src/test/spirv_run/setp_gt.ptx27
-rw-r--r--ptx/src/test/spirv_run/setp_gt.spvtxt75
-rw-r--r--ptx/src/test/spirv_run/setp_leu.ptx27
-rw-r--r--ptx/src/test/spirv_run/setp_leu.spvtxt75
-rw-r--r--ptx/src/translate.rs9
12 files changed, 499 insertions, 13 deletions
diff --git a/ptx/src/test/spirv_run/cvt_rzi.ptx b/ptx/src/test/spirv_run/cvt_rzi.ptx
new file mode 100644
index 0000000..ba5cc0e
--- /dev/null
+++ b/ptx/src/test/spirv_run/cvt_rzi.ptx
@@ -0,0 +1,25 @@
+.version 6.5
+.target sm_30
+.address_size 64
+
+.visible .entry cvt_rzi(
+ .param .u64 input,
+ .param .u64 output
+)
+{
+ .reg .u64 in_addr;
+ .reg .u64 out_addr;
+ .reg .f32 temp1;
+ .reg .f32 temp2;
+
+ ld.param.u64 in_addr, [input];
+ ld.param.u64 out_addr, [output];
+
+ ld.f32 temp1, [in_addr];
+ ld.f32 temp2, [in_addr+4];
+ cvt.rzi.f32.f32 temp1, temp1;
+ cvt.rzi.f32.f32 temp2, temp2;
+ st.f32 [out_addr], temp1;
+ st.f32 [out_addr+4], temp2;
+ ret;
+}
diff --git a/ptx/src/test/spirv_run/cvt_rzi.spvtxt b/ptx/src/test/spirv_run/cvt_rzi.spvtxt
new file mode 100644
index 0000000..68c12c6
--- /dev/null
+++ b/ptx/src/test/spirv_run/cvt_rzi.spvtxt
@@ -0,0 +1,63 @@
+ OpCapability GenericPointer
+ OpCapability Linkage
+ OpCapability Addresses
+ OpCapability Kernel
+ OpCapability Int8
+ OpCapability Int16
+ OpCapability Int64
+ OpCapability Float16
+ OpCapability Float64
+ %34 = OpExtInstImport "OpenCL.std"
+ OpMemoryModel Physical64 OpenCL
+ OpEntryPoint Kernel %1 "cvt_rzi"
+ %void = OpTypeVoid
+ %ulong = OpTypeInt 64 0
+ %37 = OpTypeFunction %void %ulong %ulong
+%_ptr_Function_ulong = OpTypePointer Function %ulong
+ %float = OpTypeFloat 32
+%_ptr_Function_float = OpTypePointer Function %float
+%_ptr_Generic_float = OpTypePointer Generic %float
+ %ulong_4 = OpConstant %ulong 4
+ %ulong_4_0 = OpConstant %ulong 4
+ %1 = OpFunction %void None %37
+ %8 = OpFunctionParameter %ulong
+ %9 = OpFunctionParameter %ulong
+ %32 = OpLabel
+ %2 = OpVariable %_ptr_Function_ulong Function
+ %3 = OpVariable %_ptr_Function_ulong Function
+ %4 = OpVariable %_ptr_Function_ulong Function
+ %5 = OpVariable %_ptr_Function_ulong Function
+ %6 = OpVariable %_ptr_Function_float Function
+ %7 = OpVariable %_ptr_Function_float Function
+ OpStore %2 %8
+ OpStore %3 %9
+ %10 = OpLoad %ulong %2 Aligned 8
+ OpStore %4 %10
+ %11 = OpLoad %ulong %3 Aligned 8
+ OpStore %5 %11
+ %13 = OpLoad %ulong %4
+ %28 = OpConvertUToPtr %_ptr_Generic_float %13
+ %12 = OpLoad %float %28 Aligned 4
+ OpStore %6 %12
+ %15 = OpLoad %ulong %4
+ %25 = OpIAdd %ulong %15 %ulong_4
+ %29 = OpConvertUToPtr %_ptr_Generic_float %25
+ %14 = OpLoad %float %29 Aligned 4
+ OpStore %7 %14
+ %17 = OpLoad %float %6
+ %16 = OpExtInst %float %34 trunc %17
+ OpStore %6 %16
+ %19 = OpLoad %float %7
+ %18 = OpExtInst %float %34 trunc %19
+ OpStore %7 %18
+ %20 = OpLoad %ulong %5
+ %21 = OpLoad %float %6
+ %30 = OpConvertUToPtr %_ptr_Generic_float %20
+ OpStore %30 %21 Aligned 4
+ %22 = OpLoad %ulong %5
+ %23 = OpLoad %float %7
+ %27 = OpIAdd %ulong %22 %ulong_4_0
+ %31 = OpConvertUToPtr %_ptr_Generic_float %27
+ OpStore %31 %23 Aligned 4
+ OpReturn
+ OpFunctionEnd
diff --git a/ptx/src/test/spirv_run/cvt_s32_f32.ptx b/ptx/src/test/spirv_run/cvt_s32_f32.ptx
new file mode 100644
index 0000000..d432a91
--- /dev/null
+++ b/ptx/src/test/spirv_run/cvt_s32_f32.ptx
@@ -0,0 +1,25 @@
+.version 6.5
+.target sm_30
+.address_size 64
+
+.visible .entry cvt_s32_f32(
+ .param .u64 input,
+ .param .u64 output
+)
+{
+ .reg .u64 in_addr;
+ .reg .u64 out_addr;
+ .reg .b32 temp1;
+ .reg .b32 temp2;
+
+ ld.param.u64 in_addr, [input];
+ ld.param.u64 out_addr, [output];
+
+ ld.f32 temp1, [in_addr];
+ ld.f32 temp2, [in_addr+4];
+ cvt.rpi.ftz.s32.f32 temp1, temp1;
+ cvt.rpi.ftz.s32.f32 temp2, temp2;
+ st.global.s32 [out_addr], temp1;
+ st.global.s32 [out_addr+4], temp2;
+ ret;
+}
diff --git a/ptx/src/test/spirv_run/cvt_s32_f32.spvtxt b/ptx/src/test/spirv_run/cvt_s32_f32.spvtxt
new file mode 100644
index 0000000..d9ae053
--- /dev/null
+++ b/ptx/src/test/spirv_run/cvt_s32_f32.spvtxt
@@ -0,0 +1,75 @@
+ OpCapability GenericPointer
+ OpCapability Linkage
+ OpCapability Addresses
+ OpCapability Kernel
+ OpCapability Int8
+ OpCapability Int16
+ OpCapability Int64
+ OpCapability Float16
+ OpCapability Float64
+ %42 = OpExtInstImport "OpenCL.std"
+ OpMemoryModel Physical64 OpenCL
+ OpEntryPoint Kernel %1 "cvt_s32_f32"
+ OpDecorate %32 FPRoundingMode RTP
+ OpDecorate %34 FPRoundingMode RTP
+ %void = OpTypeVoid
+ %ulong = OpTypeInt 64 0
+ %45 = OpTypeFunction %void %ulong %ulong
+%_ptr_Function_ulong = OpTypePointer Function %ulong
+ %uint = OpTypeInt 32 0
+%_ptr_Function_uint = OpTypePointer Function %uint
+ %float = OpTypeFloat 32
+%_ptr_Generic_float = OpTypePointer Generic %float
+ %ulong_4 = OpConstant %ulong 4
+%_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
+ %ulong_4_0 = OpConstant %ulong 4
+ %1 = OpFunction %void None %45
+ %8 = OpFunctionParameter %ulong
+ %9 = OpFunctionParameter %ulong
+ %40 = OpLabel
+ %2 = OpVariable %_ptr_Function_ulong Function
+ %3 = OpVariable %_ptr_Function_ulong Function
+ %4 = OpVariable %_ptr_Function_ulong Function
+ %5 = OpVariable %_ptr_Function_ulong Function
+ %6 = OpVariable %_ptr_Function_uint Function
+ %7 = OpVariable %_ptr_Function_uint Function
+ OpStore %2 %8
+ OpStore %3 %9
+ %10 = OpLoad %ulong %2 Aligned 8
+ OpStore %4 %10
+ %11 = OpLoad %ulong %3 Aligned 8
+ OpStore %5 %11
+ %13 = OpLoad %ulong %4
+ %29 = OpConvertUToPtr %_ptr_Generic_float %13
+ %28 = OpLoad %float %29 Aligned 4
+ %12 = OpBitcast %uint %28
+ OpStore %6 %12
+ %15 = OpLoad %ulong %4
+ %25 = OpIAdd %ulong %15 %ulong_4
+ %31 = OpConvertUToPtr %_ptr_Generic_float %25
+ %30 = OpLoad %float %31 Aligned 4
+ %14 = OpBitcast %uint %30
+ OpStore %7 %14
+ %17 = OpLoad %uint %6
+ %33 = OpBitcast %float %17
+ %32 = OpConvertFToS %uint %33
+ %16 = OpCopyObject %uint %32
+ OpStore %6 %16
+ %19 = OpLoad %uint %7
+ %35 = OpBitcast %float %19
+ %34 = OpConvertFToS %uint %35
+ %18 = OpCopyObject %uint %34
+ OpStore %7 %18
+ %20 = OpLoad %ulong %5
+ %21 = OpLoad %uint %6
+ %36 = OpConvertUToPtr %_ptr_CrossWorkgroup_uint %20
+ %37 = OpCopyObject %uint %21
+ OpStore %36 %37 Aligned 4
+ %22 = OpLoad %ulong %5
+ %23 = OpLoad %uint %7
+ %27 = OpIAdd %ulong %22 %ulong_4_0
+ %38 = OpConvertUToPtr %_ptr_CrossWorkgroup_uint %27
+ %39 = OpCopyObject %uint %23
+ OpStore %38 %39 Aligned 4
+ OpReturn
+ OpFunctionEnd
diff --git a/ptx/src/test/spirv_run/cvt_s64_s32.ptx b/ptx/src/test/spirv_run/cvt_s64_s32.ptx
new file mode 100644
index 0000000..5242864
--- /dev/null
+++ b/ptx/src/test/spirv_run/cvt_s64_s32.ptx
@@ -0,0 +1,22 @@
+.version 6.5
+.target sm_30
+.address_size 64
+
+.visible .entry cvt_s64_s32(
+ .param .u64 input,
+ .param .u64 output
+)
+{
+ .reg .u64 in_addr;
+ .reg .u64 out_addr;
+ .reg .s32 r_32;
+ .reg .s64 r_64;
+
+ ld.param.u64 in_addr, [input];
+ ld.param.u64 out_addr, [output];
+
+ ld.b32 r_32, [in_addr];
+ cvt.s64.s32 r_64, r_32;
+ st.b64 [out_addr], r_64;
+ ret;
+}
diff --git a/ptx/src/test/spirv_run/cvt_s64_s32.spvtxt b/ptx/src/test/spirv_run/cvt_s64_s32.spvtxt
new file mode 100644
index 0000000..3f46103
--- /dev/null
+++ b/ptx/src/test/spirv_run/cvt_s64_s32.spvtxt
@@ -0,0 +1,53 @@
+ OpCapability GenericPointer
+ OpCapability Linkage
+ OpCapability Addresses
+ OpCapability Kernel
+ OpCapability Int8
+ OpCapability Int16
+ OpCapability Int64
+ OpCapability Float16
+ OpCapability Float64
+ %24 = OpExtInstImport "OpenCL.std"
+ OpMemoryModel Physical64 OpenCL
+ OpEntryPoint Kernel %1 "cvt_s64_s32"
+ %void = OpTypeVoid
+ %ulong = OpTypeInt 64 0
+ %27 = OpTypeFunction %void %ulong %ulong
+%_ptr_Function_ulong = OpTypePointer Function %ulong
+ %uint = OpTypeInt 32 0
+%_ptr_Function_uint = OpTypePointer Function %uint
+%_ptr_Generic_uint = OpTypePointer Generic %uint
+%_ptr_Generic_ulong = OpTypePointer Generic %ulong
+ %1 = OpFunction %void None %27
+ %8 = OpFunctionParameter %ulong
+ %9 = OpFunctionParameter %ulong
+ %22 = OpLabel
+ %2 = OpVariable %_ptr_Function_ulong Function
+ %3 = OpVariable %_ptr_Function_ulong Function
+ %4 = OpVariable %_ptr_Function_ulong Function
+ %5 = OpVariable %_ptr_Function_ulong Function
+ %6 = OpVariable %_ptr_Function_uint Function
+ %7 = OpVariable %_ptr_Function_ulong Function
+ OpStore %2 %8
+ OpStore %3 %9
+ %10 = OpLoad %ulong %2 Aligned 8
+ OpStore %4 %10
+ %11 = OpLoad %ulong %3 Aligned 8
+ OpStore %5 %11
+ %13 = OpLoad %ulong %4
+ %19 = OpConvertUToPtr %_ptr_Generic_uint %13
+ %18 = OpLoad %uint %19 Aligned 4
+ %12 = OpCopyObject %uint %18
+ OpStore %6 %12
+ %15 = OpLoad %uint %6
+ %32 = OpBitcast %uint %15
+ %33 = OpSConvert %ulong %32
+ %14 = OpCopyObject %ulong %33
+ OpStore %7 %14
+ %16 = OpLoad %ulong %5
+ %17 = OpLoad %ulong %7
+ %20 = OpConvertUToPtr %_ptr_Generic_ulong %16
+ %21 = OpCopyObject %ulong %17
+ OpStore %20 %21 Aligned 8
+ OpReturn
+ OpFunctionEnd
diff --git a/ptx/src/test/spirv_run/mod.rs b/ptx/src/test/spirv_run/mod.rs
index 86f9c16..3976c76 100644
--- a/ptx/src/test/spirv_run/mod.rs
+++ b/ptx/src/test/spirv_run/mod.rs
@@ -49,6 +49,8 @@ test_ptx!(mul_lo, [1u64], [2u64]);
test_ptx!(mul_hi, [u64::max_value()], [1u64]);
test_ptx!(add, [1u64], [2u64]);
test_ptx!(setp, [10u64, 11u64], [1u64, 0u64]);
+test_ptx!(setp_gt, [f32::NAN, 1f32], [1f32]);
+test_ptx!(setp_leu, [1f32, f32::NAN], [1f32]);
test_ptx!(bra, [10u64], [11u64]);
test_ptx!(not, [0u64], [u64::max_value()]);
test_ptx!(shl, [11u64], [44u64]);
@@ -114,6 +116,8 @@ test_ptx!(cos, [std::f32::consts::PI], [-1f32]);
test_ptx!(lg2, [512f32], [9f32]);
test_ptx!(ex2, [10f32], [1024f32]);
test_ptx!(cvt_rni, [9.5f32, 10.5f32], [10f32, 10f32]);
+test_ptx!(cvt_rzi, [-13.8f32, 12.9f32], [-13f32, 13f32]);
+test_ptx!(cvt_s32_f32, [-13.8f32, 12.9f32], [-13i32, 13i32]);
test_ptx!(clz, [0b00000101_00101101_00010011_10101011u32], [5u32]);
test_ptx!(popc, [0b10111100_10010010_01001001_10001010u32], [14u32]);
test_ptx!(
@@ -140,8 +144,9 @@ test_ptx!(stateful_ld_st_ntid, [123u64], [123u64]);
test_ptx!(stateful_ld_st_ntid_chain, [12651u64], [12651u64]);
test_ptx!(stateful_ld_st_ntid_sub, [96311u64], [96311u64]);
test_ptx!(shared_ptr_take_address, [97815231u64], [97815231u64]);
-// For now, we just that it builds and links
+// For now, we just make sure that it builds and links
test_ptx!(assertfail, [716523871u64], [716523872u64]);
+test_ptx!(cvt_s64_s32, [-1i32], [-1i64]);
struct DisplayError<T: Debug> {
err: T,
@@ -161,11 +166,15 @@ impl<T: Debug> Debug for DisplayError<T> {
impl<T: Debug> error::Error for DisplayError<T> {}
-fn test_ptx_assert<'a, T: From<u8> + ze::SafeRepr + Debug + Copy + PartialEq>(
+fn test_ptx_assert<
+ 'a,
+ Input: From<u8> + ze::SafeRepr + Debug + Copy + PartialEq,
+ Output: From<u8> + ze::SafeRepr + Debug + Copy + PartialEq,
+>(
name: &str,
ptx_text: &'a str,
- input: &[T],
- output: &mut [T],
+ input: &[Input],
+ output: &mut [Output],
) -> Result<(), Box<dyn error::Error + 'a>> {
let mut errors = Vec::new();
let ast = ptx::ModuleParser::new().parse(&mut errors, ptx_text)?;
@@ -178,12 +187,15 @@ fn test_ptx_assert<'a, T: From<u8> + ze::SafeRepr + Debug + Copy + PartialEq>(
Ok(())
}
-fn run_spirv<T: From<u8> + ze::SafeRepr + Copy + Debug>(
+fn run_spirv<
+ Input: From<u8> + ze::SafeRepr + Copy + Debug,
+ Output: From<u8> + ze::SafeRepr + Copy + Debug,
+>(
name: &CStr,
module: translate::Module,
- input: &[T],
- output: &mut [T],
-) -> ze::Result<Vec<T>> {
+ input: &[Input],
+ output: &mut [Output],
+) -> ze::Result<Vec<Output>> {
ze::init()?;
let spirv = module.spirv.assemble();
let byte_il = unsafe {
@@ -237,15 +249,15 @@ fn run_spirv<T: From<u8> + ze::SafeRepr + Copy + Debug>(
kernel.set_indirect_access(
ze::sys::ze_kernel_indirect_access_flags_t::ZE_KERNEL_INDIRECT_ACCESS_FLAG_DEVICE,
)?;
- let mut inp_b = ze::DeviceBuffer::<T>::new(&mut ctx, &dev, cmp::max(input.len(), 1))?;
- let mut out_b = ze::DeviceBuffer::<T>::new(&mut ctx, &dev, cmp::max(output.len(), 1))?;
- let inp_b_ptr_mut: ze::BufferPtrMut<T> = (&mut inp_b).into();
+ let mut inp_b = ze::DeviceBuffer::<Input>::new(&mut ctx, &dev, cmp::max(input.len(), 1))?;
+ let mut out_b = ze::DeviceBuffer::<Output>::new(&mut ctx, &dev, cmp::max(output.len(), 1))?;
+ let inp_b_ptr_mut: ze::BufferPtrMut<Input> = (&mut inp_b).into();
let event_pool = ze::EventPool::new(&mut ctx, 3, Some(&[&dev]))?;
let ev0 = ze::Event::new(&event_pool, 0)?;
let ev1 = ze::Event::new(&event_pool, 1)?;
let mut ev2 = ze::Event::new(&event_pool, 2)?;
let mut cmd_list = ze::CommandList::new(&mut ctx, &dev)?;
- let out_b_ptr_mut: ze::BufferPtrMut<T> = (&mut out_b).into();
+ let out_b_ptr_mut: ze::BufferPtrMut<Output> = (&mut out_b).into();
let mut init_evs = [ev0, ev1];
cmd_list.append_memory_copy(inp_b_ptr_mut, input, Some(&mut init_evs[0]), &mut [])?;
cmd_list.append_memory_fill(out_b_ptr_mut, 0, Some(&mut init_evs[1]), &mut [])?;
diff --git a/ptx/src/test/spirv_run/setp_gt.ptx b/ptx/src/test/spirv_run/setp_gt.ptx
new file mode 100644
index 0000000..5f45300
--- /dev/null
+++ b/ptx/src/test/spirv_run/setp_gt.ptx
@@ -0,0 +1,27 @@
+.version 6.5
+.target sm_30
+.address_size 64
+
+.visible .entry setp_gt(
+ .param .u64 input,
+ .param .u64 output
+)
+{
+ .reg .u64 in_addr;
+ .reg .u64 out_addr;
+ .reg .f32 r1;
+ .reg .f32 r2;
+ .reg .f32 r3;
+ .reg .pred pred;
+
+ ld.param.u64 in_addr, [input];
+ ld.param.u64 out_addr, [output];
+
+ ld.f32 r1, [in_addr];
+ ld.f32 r2, [in_addr + 4];
+ setp.gt.ftz.f32 pred, r1, r2;
+ @pred mov.f32 r3, r1;
+ @!pred mov.f32 r3, r2;
+ st.f32 [out_addr], r3;
+ ret;
+}
diff --git a/ptx/src/test/spirv_run/setp_gt.spvtxt b/ptx/src/test/spirv_run/setp_gt.spvtxt
new file mode 100644
index 0000000..77f6546
--- /dev/null
+++ b/ptx/src/test/spirv_run/setp_gt.spvtxt
@@ -0,0 +1,75 @@
+ OpCapability GenericPointer
+ OpCapability Linkage
+ OpCapability Addresses
+ OpCapability Kernel
+ OpCapability Int8
+ OpCapability Int16
+ OpCapability Int64
+ OpCapability Float16
+ OpCapability Float64
+ %40 = OpExtInstImport "OpenCL.std"
+ OpMemoryModel Physical64 OpenCL
+ OpEntryPoint Kernel %1 "setp_gt"
+ %void = OpTypeVoid
+ %ulong = OpTypeInt 64 0
+ %43 = OpTypeFunction %void %ulong %ulong
+%_ptr_Function_ulong = OpTypePointer Function %ulong
+ %float = OpTypeFloat 32
+%_ptr_Function_float = OpTypePointer Function %float
+ %bool = OpTypeBool
+%_ptr_Function_bool = OpTypePointer Function %bool
+%_ptr_Generic_float = OpTypePointer Generic %float
+ %ulong_4 = OpConstant %ulong 4
+ %1 = OpFunction %void None %43
+ %14 = OpFunctionParameter %ulong
+ %15 = OpFunctionParameter %ulong
+ %38 = OpLabel
+ %2 = OpVariable %_ptr_Function_ulong Function
+ %3 = OpVariable %_ptr_Function_ulong Function
+ %4 = OpVariable %_ptr_Function_ulong Function
+ %5 = OpVariable %_ptr_Function_ulong Function
+ %6 = OpVariable %_ptr_Function_float Function
+ %7 = OpVariable %_ptr_Function_float Function
+ %8 = OpVariable %_ptr_Function_float Function
+ %9 = OpVariable %_ptr_Function_bool Function
+ OpStore %2 %14
+ OpStore %3 %15
+ %16 = OpLoad %ulong %2 Aligned 8
+ OpStore %4 %16
+ %17 = OpLoad %ulong %3 Aligned 8
+ OpStore %5 %17
+ %19 = OpLoad %ulong %4
+ %35 = OpConvertUToPtr %_ptr_Generic_float %19
+ %18 = OpLoad %float %35 Aligned 4
+ OpStore %6 %18
+ %21 = OpLoad %ulong %4
+ %34 = OpIAdd %ulong %21 %ulong_4
+ %36 = OpConvertUToPtr %_ptr_Generic_float %34
+ %20 = OpLoad %float %36 Aligned 4
+ OpStore %7 %20
+ %23 = OpLoad %float %6
+ %24 = OpLoad %float %7
+ %22 = OpFOrdGreaterThan %bool %23 %24
+ OpStore %9 %22
+ %25 = OpLoad %bool %9
+ OpBranchConditional %25 %10 %11
+ %10 = OpLabel
+ %27 = OpLoad %float %6
+ %26 = OpCopyObject %float %27
+ OpStore %8 %26
+ OpBranch %11
+ %11 = OpLabel
+ %28 = OpLoad %bool %9
+ OpBranchConditional %28 %13 %12
+ %12 = OpLabel
+ %30 = OpLoad %float %7
+ %29 = OpCopyObject %float %30
+ OpStore %8 %29
+ OpBranch %13
+ %13 = OpLabel
+ %31 = OpLoad %ulong %5
+ %32 = OpLoad %float %8
+ %37 = OpConvertUToPtr %_ptr_Generic_float %31
+ OpStore %37 %32 Aligned 4
+ OpReturn
+ OpFunctionEnd
diff --git a/ptx/src/test/spirv_run/setp_leu.ptx b/ptx/src/test/spirv_run/setp_leu.ptx
new file mode 100644
index 0000000..be7538a
--- /dev/null
+++ b/ptx/src/test/spirv_run/setp_leu.ptx
@@ -0,0 +1,27 @@
+.version 6.5
+.target sm_30
+.address_size 64
+
+.visible .entry setp_leu(
+ .param .u64 input,
+ .param .u64 output
+)
+{
+ .reg .u64 in_addr;
+ .reg .u64 out_addr;
+ .reg .f32 r1;
+ .reg .f32 r2;
+ .reg .f32 r3;
+ .reg .pred pred;
+
+ ld.param.u64 in_addr, [input];
+ ld.param.u64 out_addr, [output];
+
+ ld.f32 r1, [in_addr];
+ ld.f32 r2, [in_addr + 4];
+ setp.leu.ftz.f32 pred, r1, r2;
+ @pred mov.f32 r3, r1;
+ @!pred mov.f32 r3, r2;
+ st.f32 [out_addr], r3;
+ ret;
+}
diff --git a/ptx/src/test/spirv_run/setp_leu.spvtxt b/ptx/src/test/spirv_run/setp_leu.spvtxt
new file mode 100644
index 0000000..f80880a
--- /dev/null
+++ b/ptx/src/test/spirv_run/setp_leu.spvtxt
@@ -0,0 +1,75 @@
+ OpCapability GenericPointer
+ OpCapability Linkage
+ OpCapability Addresses
+ OpCapability Kernel
+ OpCapability Int8
+ OpCapability Int16
+ OpCapability Int64
+ OpCapability Float16
+ OpCapability Float64
+ %40 = OpExtInstImport "OpenCL.std"
+ OpMemoryModel Physical64 OpenCL
+ OpEntryPoint Kernel %1 "setp_leu"
+ %void = OpTypeVoid
+ %ulong = OpTypeInt 64 0
+ %43 = OpTypeFunction %void %ulong %ulong
+%_ptr_Function_ulong = OpTypePointer Function %ulong
+ %float = OpTypeFloat 32
+%_ptr_Function_float = OpTypePointer Function %float
+ %bool = OpTypeBool
+%_ptr_Function_bool = OpTypePointer Function %bool
+%_ptr_Generic_float = OpTypePointer Generic %float
+ %ulong_4 = OpConstant %ulong 4
+ %1 = OpFunction %void None %43
+ %14 = OpFunctionParameter %ulong
+ %15 = OpFunctionParameter %ulong
+ %38 = OpLabel
+ %2 = OpVariable %_ptr_Function_ulong Function
+ %3 = OpVariable %_ptr_Function_ulong Function
+ %4 = OpVariable %_ptr_Function_ulong Function
+ %5 = OpVariable %_ptr_Function_ulong Function
+ %6 = OpVariable %_ptr_Function_float Function
+ %7 = OpVariable %_ptr_Function_float Function
+ %8 = OpVariable %_ptr_Function_float Function
+ %9 = OpVariable %_ptr_Function_bool Function
+ OpStore %2 %14
+ OpStore %3 %15
+ %16 = OpLoad %ulong %2 Aligned 8
+ OpStore %4 %16
+ %17 = OpLoad %ulong %3 Aligned 8
+ OpStore %5 %17
+ %19 = OpLoad %ulong %4
+ %35 = OpConvertUToPtr %_ptr_Generic_float %19
+ %18 = OpLoad %float %35 Aligned 4
+ OpStore %6 %18
+ %21 = OpLoad %ulong %4
+ %34 = OpIAdd %ulong %21 %ulong_4
+ %36 = OpConvertUToPtr %_ptr_Generic_float %34
+ %20 = OpLoad %float %36 Aligned 4
+ OpStore %7 %20
+ %23 = OpLoad %float %6
+ %24 = OpLoad %float %7
+ %22 = OpFUnordLessThanEqual %bool %23 %24
+ OpStore %9 %22
+ %25 = OpLoad %bool %9
+ OpBranchConditional %25 %10 %11
+ %10 = OpLabel
+ %27 = OpLoad %float %6
+ %26 = OpCopyObject %float %27
+ OpStore %8 %26
+ OpBranch %11
+ %11 = OpLabel
+ %28 = OpLoad %bool %9
+ OpBranchConditional %28 %13 %12
+ %12 = OpLabel
+ %30 = OpLoad %float %7
+ %29 = OpCopyObject %float %30
+ OpStore %8 %29
+ OpBranch %13
+ %13 = OpLabel
+ %31 = OpLoad %ulong %5
+ %32 = OpLoad %float %8
+ %37 = OpConvertUToPtr %_ptr_Generic_float %31
+ OpStore %37 %32 Aligned 4
+ OpReturn
+ OpFunctionEnd
diff --git a/ptx/src/translate.rs b/ptx/src/translate.rs
index 471a2d7..18d750f 100644
--- a/ptx/src/translate.rs
+++ b/ptx/src/translate.rs
@@ -4087,8 +4087,15 @@ fn emit_implicit_conversion(
{
builder.u_convert(wide_bit_type_spirv, Some(cv.dst), same_width_bit_value)?;
} else {
+ let conversion_fn = if from_parts.scalar_kind == ScalarKind::Signed
+ && to_parts.scalar_kind == ScalarKind::Signed
+ {
+ dr::Builder::s_convert
+ } else {
+ dr::Builder::u_convert
+ };
let wide_bit_value =
- builder.u_convert(wide_bit_type_spirv, None, same_width_bit_value)?;
+ conversion_fn(builder, wide_bit_type_spirv, None, same_width_bit_value)?;
emit_implicit_conversion(
builder,
map,