summaryrefslogtreecommitdiffhomepage
path: root/ptx/src/ptx.lalrpop
diff options
context:
space:
mode:
Diffstat (limited to 'ptx/src/ptx.lalrpop')
-rw-r--r--ptx/src/ptx.lalrpop34
1 files changed, 29 insertions, 5 deletions
diff --git a/ptx/src/ptx.lalrpop b/ptx/src/ptx.lalrpop
index 31c2356..cd1c642 100644
--- a/ptx/src/ptx.lalrpop
+++ b/ptx/src/ptx.lalrpop
@@ -143,7 +143,9 @@ match {
"bar",
"barrier",
"bra",
+ "brev",
"call",
+ "clz",
"cos",
"cvt",
"cvta",
@@ -162,6 +164,7 @@ match {
"neg",
"not",
"or",
+ "popc",
"rcp",
"ret",
"rsqrt",
@@ -190,7 +193,9 @@ ExtendedID : &'input str = {
"bar",
"barrier",
"bra",
+ "brev",
"call",
+ "clz",
"cos",
"cvt",
"cvta",
@@ -209,6 +214,7 @@ ExtendedID : &'input str = {
"neg",
"not",
"or",
+ "popc",
"rcp",
"ret",
"rsqrt",
@@ -699,6 +705,9 @@ Instruction: ast::Instruction<ast::ParsedArgParams<'input>> = {
InstCos,
InstLg2,
InstEx2,
+ InstClz,
+ InstBrev,
+ InstPopc,
};
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-ld
@@ -1395,7 +1404,7 @@ InstBar: ast::Instruction<ast::ParsedArgParams<'input>> = {
// * Operation .dec requires .u32 type for instuction
// Otherwise as documented
InstAtom: ast::Instruction<ast::ParsedArgParams<'input>> = {
- "atom" <sema:AtomSemantics?> <scope:MemScope?> <space:AtomSpace?> <op:AtomBitOp> <typ:AtomBitType> <a:Arg3Atom> => {
+ "atom" <sema:AtomSemantics?> <scope:MemScope?> <space:AtomSpace?> <op:AtomBitOp> <typ:BitType> <a:Arg3Atom> => {
let details = ast::AtomDetails {
semantics: sema.unwrap_or(ast::AtomSemantics::Relaxed),
scope: scope.unwrap_or(ast::MemScope::Gpu),
@@ -1459,7 +1468,7 @@ InstAtom: ast::Instruction<ast::ParsedArgParams<'input>> = {
}
InstAtomCas: ast::Instruction<ast::ParsedArgParams<'input>> = {
- "atom" <sema:AtomSemantics?> <scope:MemScope?> <space:AtomSpace?> ".cas" <typ:AtomBitType> <a:Arg4Atom> => {
+ "atom" <sema:AtomSemantics?> <scope:MemScope?> <space:AtomSpace?> ".cas" <typ:BitType> <a:Arg4Atom> => {
let details = ast::AtomCasDetails {
semantics: sema.unwrap_or(ast::AtomSemantics::Relaxed),
scope: scope.unwrap_or(ast::MemScope::Gpu),
@@ -1501,7 +1510,7 @@ AtomSIntOp: ast::AtomSIntOp = {
".max" => ast::AtomSIntOp::Max,
}
-AtomBitType: ast::BitType = {
+BitType: ast::BitType = {
".b32" => ast::BitType::B32,
".b64" => ast::BitType::B64,
}
@@ -1640,6 +1649,21 @@ InstEx2: ast::Instruction<ast::ParsedArgParams<'input>> = {
},
}
+// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-clz
+InstClz: ast::Instruction<ast::ParsedArgParams<'input>> = {
+ "clz" <typ:BitType> <arg:Arg2> => ast::Instruction::Clz{ <> }
+}
+
+// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-brev
+InstBrev: ast::Instruction<ast::ParsedArgParams<'input>> = {
+ "brev" <typ:BitType> <arg:Arg2> => ast::Instruction::Brev{ <> }
+}
+
+// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-popc
+InstPopc: ast::Instruction<ast::ParsedArgParams<'input>> = {
+ "popc" <typ:BitType> <arg:Arg2> => ast::Instruction::Popc{ <> }
+}
+
NegTypeFtz: ast::ScalarType = {
".f16" => ast::ScalarType::F16,
".f16x2" => ast::ScalarType::F16x2,
@@ -1858,7 +1882,7 @@ Section = {
};
SectionDwarfLines: () = {
- BitType Comma<U32Num>,
+ AnyBitType Comma<U32Num>,
".b32" SectionLabel,
".b64" SectionLabel,
".b32" SectionLabel "+" U32Num,
@@ -1870,7 +1894,7 @@ SectionLabel = {
DotID
};
-BitType = {
+AnyBitType = {
".b8", ".b16", ".b32", ".b64"
};