diff options
Diffstat (limited to 'ptx/src/ptx.lalrpop')
-rw-r--r-- | ptx/src/ptx.lalrpop | 63 |
1 files changed, 44 insertions, 19 deletions
diff --git a/ptx/src/ptx.lalrpop b/ptx/src/ptx.lalrpop index cd1c642..6c231b2 100644 --- a/ptx/src/ptx.lalrpop +++ b/ptx/src/ptx.lalrpop @@ -142,6 +142,7 @@ match { "atom", "bar", "barrier", + "bfe", "bra", "brev", "call", @@ -166,6 +167,7 @@ match { "or", "popc", "rcp", + "rem", "ret", "rsqrt", "selp", @@ -179,6 +181,7 @@ match { "sub", "texmode_independent", "texmode_unified", + "xor", } else { // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#identifiers r"[a-zA-Z][a-zA-Z0-9_$]*|[_$%][a-zA-Z0-9_$]+" => ID, @@ -192,6 +195,7 @@ ExtendedID : &'input str = { "atom", "bar", "barrier", + "bfe", "bra", "brev", "call", @@ -216,6 +220,7 @@ ExtendedID : &'input str = { "or", "popc", "rcp", + "rem", "ret", "rsqrt", "selp", @@ -229,6 +234,7 @@ ExtendedID : &'input str = { "sub", "texmode_independent", "texmode_unified", + "xor", ID } @@ -708,6 +714,9 @@ Instruction: ast::Instruction<ast::ParsedArgParams<'input>> = { InstClz, InstBrev, InstPopc, + InstXor, + InstRem, + InstBfe, }; // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-ld @@ -874,6 +883,13 @@ IntType : ast::IntType = { ".s64" => ast::IntType::S64, }; +IntType3264: ast::IntType = { + ".u32" => ast::IntType::U32, + ".u64" => ast::IntType::U64, + ".s32" => ast::IntType::S32, + ".s64" => ast::IntType::S64, +} + UIntType: ast::UIntType = { ".u16" => ast::UIntType::U16, ".u32" => ast::UIntType::U32, @@ -979,14 +995,14 @@ SetpTypeNoF32: ast::ScalarType = { // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#logic-and-shift-instructions-not InstNot: ast::Instruction<ast::ParsedArgParams<'input>> = { - "not" <t:NotType> <a:Arg2> => ast::Instruction::Not(t, a) + "not" <t:BooleanType> <a:Arg2> => ast::Instruction::Not(t, a) }; -NotType: ast::NotType = { - ".pred" => ast::NotType::Pred, - ".b16" => ast::NotType::B16, - ".b32" => ast::NotType::B32, - ".b64" => ast::NotType::B64, +BooleanType: ast::BooleanType = { + ".pred" => ast::BooleanType::Pred, + ".b16" => ast::BooleanType::B16, + ".b32" => ast::BooleanType::B32, + ".b64" => ast::BooleanType::B64, }; // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#control-flow-instructions-at @@ -1294,19 +1310,12 @@ SignedIntType: ast::ScalarType = { // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#logic-and-shift-instructions-or InstOr: ast::Instruction<ast::ParsedArgParams<'input>> = { - "or" <d:OrAndType> <a:Arg3> => ast::Instruction::Or(d, a), + "or" <d:BooleanType> <a:Arg3> => ast::Instruction::Or(d, a), }; -OrAndType: ast::OrAndType = { - ".pred" => ast::OrAndType::Pred, - ".b16" => ast::OrAndType::B16, - ".b32" => ast::OrAndType::B32, - ".b64" => ast::OrAndType::B64, -} - // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#logic-and-shift-instructions-and InstAnd: ast::Instruction<ast::ParsedArgParams<'input>> = { - "and" <d:OrAndType> <a:Arg3> => ast::Instruction::And(d, a), + "and" <d:BooleanType> <a:Arg3> => ast::Instruction::And(d, a), }; // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-rcp @@ -1447,7 +1456,7 @@ InstAtom: ast::Instruction<ast::ParsedArgParams<'input>> = { }; ast::Instruction::Atom(details,a) }, - "atom" <sema:AtomSemantics?> <scope:MemScope?> <space:AtomSpace?> <op: AtomUIntOp> <typ:AtomUIntType> <a:Arg3Atom> => { + "atom" <sema:AtomSemantics?> <scope:MemScope?> <space:AtomSpace?> <op: AtomUIntOp> <typ:UIntType3264> <a:Arg3Atom> => { let details = ast::AtomDetails { semantics: sema.unwrap_or(ast::AtomSemantics::Relaxed), scope: scope.unwrap_or(ast::MemScope::Gpu), @@ -1456,7 +1465,7 @@ InstAtom: ast::Instruction<ast::ParsedArgParams<'input>> = { }; ast::Instruction::Atom(details,a) }, - "atom" <sema:AtomSemantics?> <scope:MemScope?> <space:AtomSpace?> <op: AtomSIntOp> <typ:AtomSIntType> <a:Arg3Atom> => { + "atom" <sema:AtomSemantics?> <scope:MemScope?> <space:AtomSpace?> <op: AtomSIntOp> <typ:SIntType3264> <a:Arg3Atom> => { let details = ast::AtomDetails { semantics: sema.unwrap_or(ast::AtomSemantics::Relaxed), scope: scope.unwrap_or(ast::MemScope::Gpu), @@ -1515,12 +1524,12 @@ BitType: ast::BitType = { ".b64" => ast::BitType::B64, } -AtomUIntType: ast::UIntType = { +UIntType3264: ast::UIntType = { ".u32" => ast::UIntType::U32, ".u64" => ast::UIntType::U64, } -AtomSIntType: ast::SIntType = { +SIntType3264: ast::SIntType = { ".s32" => ast::SIntType::S32, ".s64" => ast::SIntType::S64, } @@ -1664,6 +1673,22 @@ InstPopc: ast::Instruction<ast::ParsedArgParams<'input>> = { "popc" <typ:BitType> <arg:Arg2> => ast::Instruction::Popc{ <> } } +// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#logic-and-shift-instructions-xor +InstXor: ast::Instruction<ast::ParsedArgParams<'input>> = { + "xor" <typ:BooleanType> <arg:Arg3> => ast::Instruction::Xor{ <> } +} + +// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-bfe +InstBfe: ast::Instruction<ast::ParsedArgParams<'input>> = { + "bfe" <typ:IntType3264> <arg:Arg4> => ast::Instruction::Bfe{ <> } +} + +// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-rem +InstRem: ast::Instruction<ast::ParsedArgParams<'input>> = { + "rem" <typ:IntType> <arg:Arg3> => ast::Instruction::Rem{ <> } +} + + NegTypeFtz: ast::ScalarType = { ".f16" => ast::ScalarType::F16, ".f16x2" => ast::ScalarType::F16x2, |