diff options
Diffstat (limited to 'zluda_rt/src/tests/alloca_bug.ptx')
-rw-r--r-- | zluda_rt/src/tests/alloca_bug.ptx | 126 |
1 files changed, 126 insertions, 0 deletions
diff --git a/zluda_rt/src/tests/alloca_bug.ptx b/zluda_rt/src/tests/alloca_bug.ptx new file mode 100644 index 0000000..f2ae4d1 --- /dev/null +++ b/zluda_rt/src/tests/alloca_bug.ptx @@ -0,0 +1,126 @@ +// +// Generated by NVIDIA NVVM Compiler +// +// Compiler Build ID: CL-31833905 +// Cuda compilation tools, release 11.8, V11.8.89 +// Based on NVVM 7.0.1 +// + +.version 7.8 +.target sm_52 +.address_size 64 + + // .globl _Z11closest_hitv +.visible .global .align 4 .b8 sysBRDFEval[4]; +.visible .global .align 1 .b8 sysMaterialParameters[1]; +.visible .global .align 4 .b8 _ZN21rti_internal_typeinfo11sysBRDFEvalE[8] = {82, 97, 121, 0, 4, 0, 0, 0}; +.visible .global .align 8 .u64 _ZN21rti_internal_register20reg_bitness_detectorE; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail0E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail1E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail2E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail3E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail4E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail5E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail6E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail7E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail8E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail9E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail0E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail1E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail2E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail3E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail4E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail5E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail6E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail7E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail8E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail9E; +.visible .global .align 4 .u32 _ZN21rti_internal_register14reg_rayIndex_xE; +.visible .global .align 4 .u32 _ZN21rti_internal_register14reg_rayIndex_yE; +.visible .global .align 4 .u32 _ZN21rti_internal_register14reg_rayIndex_zE; +.visible .global .align 1 .b8 _ZN21rti_internal_typename11sysBRDFEvalE[54] = {114, 116, 67, 97, 108, 108, 97, 98, 108, 101, 80, 114, 111, 103, 114, 97, 109, 73, 100, 60, 102, 108, 111, 97, 116, 51, 40, 102, 108, 111, 97, 116, 51, 32, 38, 109, 97, 116, 44, 32, 102, 108, 111, 97, 116, 51, 32, 38, 112, 114, 100, 41, 62, 0}; +.visible .global .align 4 .u32 _ZN21rti_internal_typeenum11sysBRDFEvalE = 4920; +.visible .global .align 1 .b8 _ZN21rti_internal_semantic11sysBRDFEvalE[1]; +.visible .global .align 1 .b8 _ZN23rti_internal_annotation11sysBRDFEvalE[1]; + +.visible .entry _Z11closest_hitv() +{ + .local .align 4 .b8 __local_depot0[24]; + .reg .b64 %SP; + .reg .b64 %SPL; + .reg .pred %p<3>; + .reg .f32 %f<13>; + .reg .b32 %r<5>; + .reg .b64 %rd<16>; + + + mov.u64 %SPL, __local_depot0; + cvta.local.u64 %SP, %SPL; + add.u64 %rd9, %SP, 0; + add.u64 %rd1, %SPL, 0; + mov.u64 %rd10, sysMaterialParameters; + cvta.global.u64 %rd4, %rd10; + mov.u32 %r1, 1; + mov.u32 %r2, 12; + mov.u64 %rd8, 0; + // begin inline asm + call (%rd3), _rt_buffer_get_64, (%rd4, %r1, %r2, %rd8, %rd8, %rd8, %rd8); + // end inline asm + ld.f32 %f5, [%rd3]; + ld.f32 %f6, [%rd3+4]; + ld.f32 %f7, [%rd3+8]; + st.local.f32 [%rd1], %f5; + st.local.f32 [%rd1+4], %f6; + st.local.f32 [%rd1+8], %f7; + setp.eq.f32 %p1, %f5, 0f00000000; + @%p1 bra $L__BB0_2; + + mov.u32 %r3, 0; + st.local.u32 [%rd1], %r3; + st.local.u32 [%rd1+4], %r3; + st.local.u32 [%rd1+8], %r3; + +$L__BB0_2: + ld.global.u32 %r4, [sysBRDFEval]; + // begin inline asm + call (%rd11), _rt_callable_program_from_id_v2_64, (%r4, %rd8); + // end inline asm + add.u64 %rd15, %SP, 12; + { // callseq 0, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.b64 [param0+0], %rd9; + .param .b64 param1; + st.param.b64 [param1+0], %rd15; + .param .align 4 .b8 retval0[12]; + prototype_0 : .callprototype (.param .align 4 .b8 _[12]) _ (.param .b64 _, .param .b64 _); + call (retval0), + %rd11, + ( + param0, + param1 + ) + , prototype_0; + ld.param.f32 %f1, [retval0+0]; + ld.param.f32 %f2, [retval0+4]; + ld.param.f32 %f3, [retval0+8]; + } // callseq 0 + add.u64 %rd2, %SPL, 12; + ld.local.f32 %f4, [%rd2]; + setp.leu.f32 %p2, %f4, 0f00000000; + @%p2 bra $L__BB0_4; + + mul.f32 %f8, %f1, %f4; + st.local.f32 [%rd2], %f8; + ld.local.f32 %f9, [%rd2+4]; + mul.f32 %f10, %f2, %f9; + st.local.f32 [%rd2+4], %f10; + ld.local.f32 %f11, [%rd2+8]; + mul.f32 %f12, %f3, %f11; + st.local.f32 [%rd2+8], %f12; + +$L__BB0_4: + ret; + +} + |