diff options
author | Andrzej Janik <[email protected]> | 2020-10-25 11:21:51 +0100 |
---|---|---|
committer | Andrzej Janik <[email protected]> | 2020-10-25 11:21:51 +0100 |
commit | 6480cccc4fb129eb0f9bfd0a0ade6895d04ff55e (patch) | |
tree | 07d9f8181910e31d0bf3418af96edaf1209f6a3e /ptx/src/ptx.lalrpop | |
parent | eb9053a42f7246e93bd12ee557de700ddade347c (diff) | |
download | ZLUDA-6480cccc4fb129eb0f9bfd0a0ade6895d04ff55e.tar.gz ZLUDA-6480cccc4fb129eb0f9bfd0a0ade6895d04ff55e.zip |
Implement rcp instruction
Diffstat (limited to 'ptx/src/ptx.lalrpop')
-rw-r--r-- | ptx/src/ptx.lalrpop | 29 |
1 files changed, 29 insertions, 0 deletions
diff --git a/ptx/src/ptx.lalrpop b/ptx/src/ptx.lalrpop index c29d16b..a132705 100644 --- a/ptx/src/ptx.lalrpop +++ b/ptx/src/ptx.lalrpop @@ -35,6 +35,7 @@ match { ".address_size", ".align", ".and", + ".approx", ".b16", ".b32", ".b64", @@ -134,6 +135,7 @@ match { "mul", "not", "or", + "rcp", "ret", "setp", "shl", @@ -166,6 +168,7 @@ ExtendedID : &'input str = { "mul", "not", "or", + "rcp", "ret", "setp", "shl", @@ -542,6 +545,7 @@ Instruction: ast::Instruction<ast::ParsedArgParams<'input>> = { InstSub, InstMin, InstMax, + InstRcp }; // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-ld @@ -1119,6 +1123,31 @@ OrType: ast::OrType = { ".b64" => ast::OrType::B64, } +// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-rcp +InstRcp: ast::Instruction<ast::ParsedArgParams<'input>> = { + "rcp" <rounding:RcpRoundingMode> <ftz:".ftz"?> ".f32" <a:Arg2> => { + let details = ast::RcpDetails { + rounding, + flush_to_zero: ftz.is_some(), + is_f64: false, + }; + ast::Instruction::Rcp(details, a) + }, + "rcp" <rn:RoundingModeFloat> ".f64" <a:Arg2> => { + let details = ast::RcpDetails { + rounding: Some(rn), + flush_to_zero: false, + is_f64: true, + }; + ast::Instruction::Rcp(details, a) + } +}; + +RcpRoundingMode: Option<ast::RoundingMode> = { + ".approx" => None, + <r:RoundingModeFloat> => Some(r) +}; + // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-sub // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-sub // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#half-precision-floating-point-instructions-sub |