/* This collection of functions is here to assist with debugging You use it by manually pasting into a module.ptx that was generated by zluda_dump and inspecting content of additional debug buffer in replay.py */ .func debug_dump_from_thread_16(.reg.b64 debug_addr, .reg.u32 global_id_0, .reg.b16 value) { .reg.u32 local_id; mov.u32 local_id, %tid.x; .reg.u32 local_size; mov.u32 local_size, %ntid.x; .reg.u32 group_id; mov.u32 group_id, %ctaid.x; .reg.b32 global_id; mad.lo.u32 global_id, group_id, local_size, local_id; .reg.pred should_exit; setp.ne.u32 should_exit, global_id, global_id_0; @should_exit bra END; .reg.b32 index; ld.global.u32 index, [debug_addr]; st.global.u32 [debug_addr], index+1; .reg.u64 st_offset; cvt.u64.u32 st_offset, index; mad.lo.u64 st_offset, st_offset, 2, 4; // sizeof(b16), sizeof(32) add.u64 debug_addr, debug_addr, st_offset; st.global.u16 [debug_addr], value; END: ret; } .func debug_dump_from_thread_32(.reg.b64 debug_addr, .reg.u32 global_id_0, .reg.b32 value) { .reg.u32 local_id; mov.u32 local_id, %tid.x; .reg.u32 local_size; mov.u32 local_size, %ntid.x; .reg.u32 group_id; mov.u32 group_id, %ctaid.x; .reg.b32 global_id; mad.lo.u32 global_id, group_id, local_size, local_id; .reg.pred should_exit; setp.ne.u32 should_exit, global_id, global_id_0; @should_exit bra END; .reg.b32 index; ld.global.u32 index, [debug_addr]; st.global.u32 [debug_addr], index+1; .reg.u64 st_offset; cvt.u64.u32 st_offset, index; mad.lo.u64 st_offset, st_offset, 4, 4; // sizeof(b32), sizeof(32) add.u64 debug_addr, debug_addr, st_offset; st.global.u32 [debug_addr], value; END: ret; }