Implement rcp instruction

author: Andrzej Janik <[email protected]> 2020-10-25 11:21:51 +0100
committer: Andrzej Janik <[email protected]> 2020-10-25 11:21:51 +0100
commit: 6480cccc4fb129eb0f9bfd0a0ade6895d04ff55e (patch)
tree: 07d9f8181910e31d0bf3418af96edaf1209f6a3e /ptx/src/ptx.lalrpop
parent: eb9053a42f7246e93bd12ee557de700ddade347c (diff)
download: ZLUDA-6480cccc4fb129eb0f9bfd0a0ade6895d04ff55e.tar.gz
ZLUDA-6480cccc4fb129eb0f9bfd0a0ade6895d04ff55e.zip
1 files changed, 29 insertions, 0 deletions
diff --git a/ptx/src/ptx.lalrpop b/ptx/src/ptx.lalrpop
index c29d16b..a132705 100644
--- a/ptx/src/ptx.lalrpop
+++ b/ptx/src/ptx.lalrpop
@@ -35,6 +35,7 @@ match {
     ".address_size",
     ".align",
     ".and",
+    ".approx",
     ".b16",
     ".b32",
     ".b64",
@@ -134,6 +135,7 @@ match {
     "mul",
     "not",
     "or",
+    "rcp",
     "ret",
     "setp",
     "shl",
@@ -166,6 +168,7 @@ ExtendedID : &'input str = {
     "mul",
     "not",
     "or",
+    "rcp",
     "ret",
     "setp",
     "shl",
@@ -542,6 +545,7 @@ Instruction: ast::Instruction<ast::ParsedArgParams<'input>> = {
     InstSub,
     InstMin,
     InstMax,
+    InstRcp
 };
 
 // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-ld
@@ -1119,6 +1123,31 @@ OrType: ast::OrType = {
     ".b64" => ast::OrType::B64,
 }
 
+// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-rcp
+InstRcp: ast::Instruction<ast::ParsedArgParams<'input>> = {
+    "rcp" <rounding:RcpRoundingMode> <ftz:".ftz"?> ".f32" <a:Arg2> => {
+        let details = ast::RcpDetails {
+            rounding,
+            flush_to_zero: ftz.is_some(),
+            is_f64: false,
+        };
+        ast::Instruction::Rcp(details, a)
+    },
+    "rcp" <rn:RoundingModeFloat> ".f64" <a:Arg2> => {
+        let details = ast::RcpDetails {
+            rounding: Some(rn),
+            flush_to_zero: false,
+            is_f64: true,
+        };
+        ast::Instruction::Rcp(details, a)
+    }
+};
+
+RcpRoundingMode: Option<ast::RoundingMode> = {
+    ".approx" => None,
+    <r:RoundingModeFloat> => Some(r)
+};
+
 // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-sub
 // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-sub
 // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#half-precision-floating-point-instructions-sub
author	Andrzej Janik <[email protected]>	2020-10-25 11:21:51 +0100
committer	Andrzej Janik <[email protected]>	2020-10-25 11:21:51 +0100
commit	6480cccc4fb129eb0f9bfd0a0ade6895d04ff55e (patch)
tree	07d9f8181910e31d0bf3418af96edaf1209f6a3e /ptx/src/ptx.lalrpop
parent	eb9053a42f7246e93bd12ee557de700ddade347c (diff)
download	ZLUDA-6480cccc4fb129eb0f9bfd0a0ade6895d04ff55e.tar.gz ZLUDA-6480cccc4fb129eb0f9bfd0a0ade6895d04ff55e.zip