1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
|
// Exercise as many features of vector types as possible
.version 6.5
.target sm_60
.address_size 64
.func (.reg .v2 .u32 output) impl(
.reg .v2 .u32 input
)
{
.reg .v2 .u32 temp_v;
.reg .u32 temp1;
.reg .u32 temp2;
mov.u32 temp1, input.x;
mov.u32 temp2, input.y;
add.u32 temp2, temp1, temp2;
mov.u32 temp_v.x, temp2;
mov.u32 temp_v.y, temp2;
mov.u32 temp_v.x, temp_v.y;
mov.v2.u32 output, temp_v;
ret;
}
.visible .entry vector(
.param .u64 input_p,
.param .u64 output_p
)
{
.reg .u64 in_addr;
.reg .u64 out_addr;
.reg .v2 .u32 temp;
.reg .u32 temp1;
.reg .u32 temp2;
.reg .b64 packed;
ld.param.u64 in_addr, [input_p];
ld.param.u64 out_addr, [output_p];
ld.v2.u32 temp, [in_addr];
call (temp), impl, (temp);
mov.b64 packed, temp;
st.v2.u32 [out_addr], temp;
ret;
}
|