1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
|
import os, sys, subprocess
SPACE = [".reg", ".sreg", ".param", ".param::entry", ".param::func", ".local", ".global", ".const", ".shared", ".shared::cta", ".shared::cluster"]
TYPE_AND_INIT = ["", " = 1", "[1]", "[1] = {1}"]
MULTIVAR = ["", "<1>" ]
VECTOR = ["", ".v2" ]
HEADER = """
.version 8.5
.target sm_90
.address_size 64
"""
def directive(space, variable, multivar, vector):
return """{3}
{0} {4} .b32 variable{2} {1};
""".format(space, variable, multivar, HEADER, vector)
def entry_arg(space, variable, multivar, vector):
return """{3}
.entry foobar ( {0} {4} .b32 variable{2} {1})
{{
ret;
}}
""".format(space, variable, multivar, HEADER, vector)
def fn_arg(space, variable, multivar, vector):
return """{3}
.func foobar ( {0} {4} .b32 variable{2} {1})
{{
ret;
}}
""".format(space, variable, multivar, HEADER, vector)
def fn_body(space, variable, multivar, vector):
return """{3}
.func foobar ()
{{
{0} {4} .b32 variable{2} {1};
ret;
}}
""".format(space, variable, multivar, HEADER, vector)
def generate(generator):
legal = []
for space in SPACE:
for init in TYPE_AND_INIT:
for multi in MULTIVAR:
for vector in VECTOR:
ptx = generator(space, init, multi, vector)
if 0 == subprocess.call(["C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v12.6\\bin\\ptxas.exe", "-arch", "sm_90", "-ias", ptx], stdout = subprocess.DEVNULL): #
legal.append((space, vector, init, multi))
print(generator.__name__)
print(legal)
def main():
generate(directive)
generate(entry_arg)
generate(fn_arg)
generate(fn_body)
if __name__ == "__main__":
main()
|