diff options
author | Andrzej Janik <[email protected]> | 2021-07-03 02:13:38 +0200 |
---|---|---|
committer | Andrzej Janik <[email protected]> | 2021-07-03 02:13:38 +0200 |
commit | ad2059872a43315ecb5361db1ab2c74d55363fae (patch) | |
tree | 8229288f9b6f67134d33db4666dc21d51edee20e /ptx | |
parent | e328ecc55072bb3f871b3f27982573a0f1931e65 (diff) | |
download | ZLUDA-ad2059872a43315ecb5361db1ab2c74d55363fae.tar.gz ZLUDA-ad2059872a43315ecb5361db1ab2c74d55363fae.zip |
Regenerate SPIR-V for ptx_impl and fix weird handling of ptr-ptr add or sub
Diffstat (limited to 'ptx')
-rw-r--r-- | ptx/lib/zluda_ptx_impl.spv | bin | 105668 -> 106076 bytes | |||
-rw-r--r-- | ptx/src/test/spirv_run/mod.rs | 1 | ||||
-rw-r--r-- | ptx/src/test/spirv_run/stateful_neg_offset.ptx | 29 | ||||
-rw-r--r-- | ptx/src/test/spirv_run/stateful_neg_offset.spvtxt | 80 | ||||
-rw-r--r-- | ptx/src/translate.rs | 76 |
5 files changed, 164 insertions, 22 deletions
diff --git a/ptx/lib/zluda_ptx_impl.spv b/ptx/lib/zluda_ptx_impl.spv Binary files differindex ca16447..e9fc938 100644 --- a/ptx/lib/zluda_ptx_impl.spv +++ b/ptx/lib/zluda_ptx_impl.spv diff --git a/ptx/src/test/spirv_run/mod.rs b/ptx/src/test/spirv_run/mod.rs index ff48ae9..4f8b252 100644 --- a/ptx/src/test/spirv_run/mod.rs +++ b/ptx/src/test/spirv_run/mod.rs @@ -181,6 +181,7 @@ test_ptx!( [0u32, 0u32, 0u32, 2u32]
);
test_ptx!(non_scalar_ptr_offset, [1u32, 2u32, 3u32, 4u32], [7u32]);
+test_ptx!(stateful_neg_offset, [1237518u64], [1237518u64]);
struct DisplayError<T: Debug> {
err: T,
diff --git a/ptx/src/test/spirv_run/stateful_neg_offset.ptx b/ptx/src/test/spirv_run/stateful_neg_offset.ptx new file mode 100644 index 0000000..9c56f31 --- /dev/null +++ b/ptx/src/test/spirv_run/stateful_neg_offset.ptx @@ -0,0 +1,29 @@ +.version 6.5
+.target sm_30
+.address_size 64
+
+.visible .entry stateful_neg_offset(
+ .param .u64 input,
+ .param .u64 output
+)
+{
+ .reg .u64 in_addr;
+ .reg .u64 out_addr;
+ .reg .u64 in_addr2;
+ .reg .u64 out_addr2;
+ .reg .u64 offset;
+ .reg .u64 temp;
+
+ ld.param.u64 in_addr, [input];
+ ld.param.u64 out_addr, [output];
+
+ cvta.to.global.u64 in_addr2, in_addr;
+ cvta.to.global.u64 out_addr2, out_addr;
+
+ add.u64 offset, in_addr2, out_addr2;
+ sub.u64 offset, in_addr2, out_addr2;
+
+ ld.global.u64 temp, [in_addr2];
+ st.global.u64 [out_addr2], temp;
+ ret;
+}
\ No newline at end of file diff --git a/ptx/src/test/spirv_run/stateful_neg_offset.spvtxt b/ptx/src/test/spirv_run/stateful_neg_offset.spvtxt new file mode 100644 index 0000000..62843ca --- /dev/null +++ b/ptx/src/test/spirv_run/stateful_neg_offset.spvtxt @@ -0,0 +1,80 @@ + OpCapability GenericPointer + OpCapability Linkage + OpCapability Addresses + OpCapability Kernel + OpCapability Int8 + OpCapability Int16 + OpCapability Int64 + OpCapability Float16 + OpCapability Float64 + %57 = OpExtInstImport "OpenCL.std" + OpMemoryModel Physical64 OpenCL + OpEntryPoint Kernel %1 "stateful_neg_offset" + %void = OpTypeVoid + %uchar = OpTypeInt 8 0 +%_ptr_CrossWorkgroup_uchar = OpTypePointer CrossWorkgroup %uchar + %61 = OpTypeFunction %void %_ptr_CrossWorkgroup_uchar %_ptr_CrossWorkgroup_uchar +%_ptr_Function__ptr_CrossWorkgroup_uchar = OpTypePointer Function %_ptr_CrossWorkgroup_uchar + %ulong = OpTypeInt 64 0 +%_ptr_Function_ulong = OpTypePointer Function %ulong +%_ptr_CrossWorkgroup_ulong = OpTypePointer CrossWorkgroup %ulong + %1 = OpFunction %void None %61 + %29 = OpFunctionParameter %_ptr_CrossWorkgroup_uchar + %30 = OpFunctionParameter %_ptr_CrossWorkgroup_uchar + %55 = OpLabel + %15 = OpVariable %_ptr_Function__ptr_CrossWorkgroup_uchar Function + %16 = OpVariable %_ptr_Function__ptr_CrossWorkgroup_uchar Function + %10 = OpVariable %_ptr_Function__ptr_CrossWorkgroup_uchar Function + %11 = OpVariable %_ptr_Function__ptr_CrossWorkgroup_uchar Function + %12 = OpVariable %_ptr_Function__ptr_CrossWorkgroup_uchar Function + %13 = OpVariable %_ptr_Function__ptr_CrossWorkgroup_uchar Function + %14 = OpVariable %_ptr_Function__ptr_CrossWorkgroup_uchar Function + %9 = OpVariable %_ptr_Function_ulong Function + OpStore %15 %29 + OpStore %16 %30 + %47 = OpBitcast %_ptr_Function_ulong %15 + %17 = OpLoad %ulong %47 Aligned 8 + %31 = OpConvertUToPtr %_ptr_CrossWorkgroup_uchar %17 + OpStore %10 %31 + %48 = OpBitcast %_ptr_Function_ulong %16 + %18 = OpLoad %ulong %48 Aligned 8 + %32 = OpConvertUToPtr %_ptr_CrossWorkgroup_uchar %18 + OpStore %11 %32 + %33 = OpLoad %_ptr_CrossWorkgroup_uchar %10 + %20 = OpConvertPtrToU %ulong %33 + %50 = OpCopyObject %ulong %20 + %49 = OpCopyObject %ulong %50 + %19 = OpCopyObject %ulong %49 + %34 = OpConvertUToPtr %_ptr_CrossWorkgroup_uchar %19 + OpStore %12 %34 + %35 = OpLoad %_ptr_CrossWorkgroup_uchar %11 + %22 = OpConvertPtrToU %ulong %35 + %52 = OpCopyObject %ulong %22 + %51 = OpCopyObject %ulong %52 + %21 = OpCopyObject %ulong %51 + %36 = OpConvertUToPtr %_ptr_CrossWorkgroup_uchar %21 + OpStore %13 %36 + %37 = OpLoad %_ptr_CrossWorkgroup_uchar %12 + %24 = OpConvertPtrToU %ulong %37 + %38 = OpLoad %_ptr_CrossWorkgroup_uchar %13 + %25 = OpConvertPtrToU %ulong %38 + %23 = OpIAdd %ulong %24 %25 + %39 = OpConvertUToPtr %_ptr_CrossWorkgroup_uchar %23 + OpStore %14 %39 + %40 = OpLoad %_ptr_CrossWorkgroup_uchar %12 + %27 = OpConvertPtrToU %ulong %40 + %41 = OpLoad %_ptr_CrossWorkgroup_uchar %13 + %28 = OpConvertPtrToU %ulong %41 + %26 = OpISub %ulong %27 %28 + %42 = OpConvertUToPtr %_ptr_CrossWorkgroup_uchar %26 + OpStore %14 %42 + %44 = OpLoad %_ptr_CrossWorkgroup_uchar %12 + %53 = OpBitcast %_ptr_CrossWorkgroup_ulong %44 + %43 = OpLoad %ulong %53 Aligned 8 + OpStore %9 %43 + %45 = OpLoad %_ptr_CrossWorkgroup_uchar %13 + %46 = OpLoad %ulong %9 + %54 = OpBitcast %_ptr_CrossWorkgroup_ulong %45 + OpStore %54 %46 Aligned 8 + OpReturn + OpFunctionEnd diff --git a/ptx/src/translate.rs b/ptx/src/translate.rs index 16a4cfb..2885f52 100644 --- a/ptx/src/translate.rs +++ b/ptx/src/translate.rs @@ -4234,7 +4234,7 @@ fn convert_to_stateful_memory_access<'a, 'input>( arg,
)) => {
if let (TypedOperand::Reg(dst), Some(src)) =
- (arg.dst, arg.src.upcast().underlying())
+ (arg.dst, arg.src.upcast().underlying_register())
{
if is_64_bit_integer(id_defs, *src) && is_64_bit_integer(id_defs, dst) {
stateful_markers.push((dst, *src));
@@ -4266,7 +4266,7 @@ fn convert_to_stateful_memory_access<'a, 'input>( arg,
)) => {
if let (TypedOperand::Reg(dst), Some(src)) =
- (&arg.dst, arg.src.upcast().underlying())
+ (&arg.dst, arg.src.upcast().underlying_register())
{
if func_args_64bit.contains(src) {
multi_hash_map_append(&mut stateful_init_reg, *dst, *src);
@@ -4320,14 +4320,16 @@ fn convert_to_stateful_memory_access<'a, 'input>( }),
arg,
)) => {
+ // TODO: don't mark result of double pointer sub or double
+ // pointer add as ptr result
if let (TypedOperand::Reg(dst), Some(src1)) =
- (arg.dst, arg.src1.upcast().underlying())
+ (arg.dst, arg.src1.upcast().underlying_register())
{
if regs_ptr_current.contains(src1) && !regs_ptr_seen.contains(src1) {
regs_ptr_new.insert(dst);
}
} else if let (TypedOperand::Reg(dst), Some(src2)) =
- (arg.dst, arg.src2.upcast().underlying())
+ (arg.dst, arg.src2.upcast().underlying_register())
{
if regs_ptr_current.contains(src2) && !regs_ptr_seen.contains(src2) {
regs_ptr_new.insert(dst);
@@ -4392,7 +4394,7 @@ fn convert_to_stateful_memory_access<'a, 'input>( }),
arg,
)) if is_add_ptr_direct(&remapped_ids, &arg) => {
- let (ptr, offset) = match arg.src1.upcast().underlying() {
+ let (ptr, offset) = match arg.src1.upcast().underlying_register() {
Some(src1) if remapped_ids.contains_key(src1) => {
(remapped_ids.get(src1).unwrap(), arg.src2)
}
@@ -4420,14 +4422,9 @@ fn convert_to_stateful_memory_access<'a, 'input>( saturate: false,
}),
arg,
- )) if is_add_ptr_direct(&remapped_ids, &arg) => {
- let (ptr, offset) = match arg.src1.upcast().underlying() {
- Some(src1) if remapped_ids.contains_key(src1) => {
- (remapped_ids.get(src1).unwrap(), arg.src2)
- }
- Some(src2) if remapped_ids.contains_key(src2) => {
- (remapped_ids.get(src2).unwrap(), arg.src1)
- }
+ )) if is_sub_ptr_direct(&remapped_ids, &arg) => {
+ let (ptr, offset) = match arg.src1.upcast().underlying_register() {
+ Some(src1) => (remapped_ids.get(src1).unwrap(), arg.src2),
_ => return Err(error_unreachable()),
};
let offset_neg = id_defs.register_intermediate(Some((
@@ -4577,10 +4574,45 @@ fn is_add_ptr_direct(remapped_ids: &HashMap<u32, u32>, arg: &ast::Arg3<TypedArgP if !remapped_ids.contains_key(&dst) {
return false;
}
- match arg.src1.upcast().underlying() {
- Some(src1) if remapped_ids.contains_key(src1) => true,
- Some(src2) if remapped_ids.contains_key(src2) => true,
- _ => false,
+ if let Some(src1_reg) = arg.src1.upcast().underlying_register() {
+ if remapped_ids.contains_key(src1_reg) {
+ // don't trigger optimization when adding two pointers
+ if let Some(src2_reg) = arg.src2.upcast().underlying_register() {
+ return !remapped_ids.contains_key(src2_reg);
+ }
+ }
+ }
+ if let Some(src2_reg) = arg.src2.upcast().underlying_register() {
+ remapped_ids.contains_key(src2_reg)
+ } else {
+ false
+ }
+ }
+ }
+}
+
+fn is_sub_ptr_direct(remapped_ids: &HashMap<u32, u32>, arg: &ast::Arg3<TypedArgParams>) -> bool {
+ match arg.dst {
+ TypedOperand::Imm(..) | TypedOperand::RegOffset(..) | TypedOperand::VecMember(..) => {
+ return false
+ }
+ TypedOperand::Reg(dst) => {
+ if !remapped_ids.contains_key(&dst) {
+ return false;
+ }
+ match arg.src1.upcast().underlying_register() {
+ Some(src1_reg) => {
+ if remapped_ids.contains_key(src1_reg) {
+ // don't trigger optimization when subtracting two pointers
+ arg.src2
+ .upcast()
+ .underlying_register()
+ .map_or(true, |src2_reg| !remapped_ids.contains_key(src2_reg))
+ } else {
+ false
+ }
+ }
+ None => false,
}
}
}
@@ -7099,12 +7131,12 @@ impl ast::StateSpace { }
impl<T> ast::Operand<T> {
- fn underlying(&self) -> Option<&T> {
+ fn underlying_register(&self) -> Option<&T> {
match self {
- ast::Operand::Reg(r) | ast::Operand::RegOffset(r, _) => Some(r),
- ast::Operand::Imm(_) => None,
- ast::Operand::VecMember(reg, _) => Some(reg),
- ast::Operand::VecPack(..) => None,
+ ast::Operand::Reg(r)
+ | ast::Operand::RegOffset(r, _)
+ | ast::Operand::VecMember(r, _) => Some(r),
+ ast::Operand::Imm(_) | ast::Operand::VecPack(..) => None,
}
}
}
|