Add support for fma instruction

author: Andrzej Janik <[email protected]> 2020-10-26 23:46:28 +0100
committer: Andrzej Janik <[email protected]> 2020-10-26 23:46:28 +0100
commit: 861116f223081528cf1e32f5e1eddb733ac00241 (patch)
tree: 44c49ae69754a814cb30f9af916a4030b860f5c6 /ptx
parent: c8dadca7d259996e3eb170f5b34cfb054dba4a11 (diff)
download: ZLUDA-861116f223081528cf1e32f5e1eddb733ac00241.tar.gz
ZLUDA-861116f223081528cf1e32f5e1eddb733ac00241.zip
5 files changed, 142 insertions, 3 deletions
diff --git a/ptx/src/ptx.lalrpop b/ptx/src/ptx.lalrpop
index 025f0be..dfe5a5f 100644
--- a/ptx/src/ptx.lalrpop
+++ b/ptx/src/ptx.lalrpop
@@ -131,6 +131,7 @@ match {
     "cvt",
     "cvta",
     "debug",
+    "fma",
     "ld",
     "mad",
     "map_f64_to_f32",
@@ -166,6 +167,7 @@ ExtendedID : &'input str = {
     "cvt",
     "cvta",
     "debug",
+    "fma",
     "ld",
     "mad",
     "map_f64_to_f32",
@@ -1185,7 +1187,8 @@ InstAbs: ast::Instruction<ast::ParsedArgParams<'input>> = {
 // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-mad
 InstMad: ast::Instruction<ast::ParsedArgParams<'input>> = {
     "mad" <d:MulDetails> <a:Arg4> => ast::Instruction::Mad(d, a),
-    "mad" ".hi" ".sat" ".s32" => todo!()
+    "mad" ".hi" ".sat" ".s32" => todo!(),
+    "fma" <f:ArithFloatMustRound> <a:Arg4> => ast::Instruction::Mad(ast::MulDetails::Float(f), a),
 };
 
 SignedIntType: ast::ScalarType = {
@@ -1333,6 +1336,33 @@ ArithFloat: ast::ArithFloat = {
     },
 }
 
+ArithFloatMustRound: ast::ArithFloat = {
+    <rn:RoundingModeFloat> <ftz:".ftz"?> <sat:".sat"?> ".f32" => ast::ArithFloat {
+        typ: ast::FloatType::F32,
+        rounding: Some(rn),
+        flush_to_zero: Some(ftz.is_some()),
+        saturate: sat.is_some(),
+    },
+    <rn:RoundingModeFloat> ".f64" => ast::ArithFloat {
+        typ: ast::FloatType::F64,
+        rounding: Some(rn),
+        flush_to_zero: None,
+        saturate: false,
+    },
+    ".rn" <ftz:".ftz"?> <sat:".sat"?> ".f16" => ast::ArithFloat {
+        typ: ast::FloatType::F16,
+        rounding: Some(ast::RoundingMode::NearestEven),
+        flush_to_zero: Some(ftz.is_some()),
+        saturate: sat.is_some(),
+    },
+    ".rn" <ftz:".ftz"?> <sat:".sat"?> ".f16x2" => ast::ArithFloat {
+        typ: ast::FloatType::F16x2,
+        rounding: Some(ast::RoundingMode::NearestEven),
+        flush_to_zero: Some(ftz.is_some()),
+        saturate: sat.is_some(),
+    },
+}
+
 Operand: ast::Operand<&'input str> = {
     <r:ExtendedID> => ast::Operand::Reg(r),
     <r:ExtendedID> "+" <offset:S32Num> => ast::Operand::RegOffset(r, offset),
diff --git a/ptx/src/test/spirv_run/fma.ptx b/ptx/src/test/spirv_run/fma.ptx
new file mode 100644
index 0000000..171d306
--- /dev/null
+++ b/ptx/src/test/spirv_run/fma.ptx
@@ -0,0 +1,25 @@
+.version 6.5
+.target sm_30
+.address_size 64
+
+.visible .entry fma(
+	.param .u64 input,
+	.param .u64 output
+)
+{
+	.reg .u64 	    in_addr;
+    .reg .u64 	    out_addr;
+    .reg .f32 	    temp1;
+    .reg .f32 	    temp2;
+    .reg .f32 	    temp3;
+
+	ld.param.u64 	in_addr, [input];
+    ld.param.u64 	out_addr, [output];
+
+    ld.f32          temp1, [in_addr];
+    ld.f32          temp2, [in_addr+4];
+    ld.f32          temp3, [in_addr+8];
+	fma.rn.f32		temp1, temp1, temp2, temp3;
+    st.f32          [out_addr], temp1;
+	ret;
+}
diff --git a/ptx/src/test/spirv_run/fma.spvtxt b/ptx/src/test/spirv_run/fma.spvtxt
new file mode 100644
index 0000000..734bf0f
--- /dev/null
+++ b/ptx/src/test/spirv_run/fma.spvtxt
@@ -0,0 +1,72 @@
+; SPIR-V
+; Version: 1.3
+; Generator: rspirv
+; Bound: 45
+OpCapability GenericPointer
+OpCapability Linkage
+OpCapability Addresses
+OpCapability Kernel
+OpCapability Int8
+OpCapability Int16
+OpCapability Int64
+OpCapability Float16
+OpCapability Float64
+OpCapability FunctionFloatControlINTEL
+OpExtension "SPV_INTEL_float_controls2"
+%37 = OpExtInstImport "OpenCL.std"
+OpMemoryModel Physical64 OpenCL
+OpEntryPoint Kernel %1 "fma"
+OpDecorate %1 FunctionDenormModeINTEL 32 Preserve
+%38 = OpTypeVoid
+%39 = OpTypeInt 64 0
+%40 = OpTypeFunction %38 %39 %39
+%41 = OpTypePointer Function %39
+%42 = OpTypeFloat 32
+%43 = OpTypePointer Function %42
+%44 = OpTypePointer Generic %42
+%27 = OpConstant  %39  4
+%29 = OpConstant  %39  8
+%1 = OpFunction  %38  None %40
+%9 = OpFunctionParameter  %39 
+%10 = OpFunctionParameter  %39 
+%35 = OpLabel
+%2 = OpVariable  %41  Function
+%3 = OpVariable  %41  Function
+%4 = OpVariable  %41  Function
+%5 = OpVariable  %41  Function
+%6 = OpVariable  %43  Function
+%7 = OpVariable  %43  Function
+%8 = OpVariable  %43  Function
+OpStore %2 %9
+OpStore %3 %10
+%12 = OpLoad  %39  %2
+%11 = OpCopyObject  %39  %12
+OpStore %4 %11
+%14 = OpLoad  %39  %3
+%13 = OpCopyObject  %39  %14
+OpStore %5 %13
+%16 = OpLoad  %39  %4
+%31 = OpConvertUToPtr  %44  %16
+%15 = OpLoad  %42  %31
+OpStore %6 %15
+%18 = OpLoad  %39  %4
+%28 = OpIAdd  %39  %18 %27
+%32 = OpConvertUToPtr  %44  %28
+%17 = OpLoad  %42  %32
+OpStore %7 %17
+%20 = OpLoad  %39  %4
+%30 = OpIAdd  %39  %20 %29
+%33 = OpConvertUToPtr  %44  %30
+%19 = OpLoad  %42  %33
+OpStore %8 %19
+%22 = OpLoad  %42  %6
+%23 = OpLoad  %42  %7
+%24 = OpLoad  %42  %8
+%21 = OpExtInst  %42  %37 mad %22 %23 %24
+OpStore %6 %21
+%25 = OpLoad  %39  %5
+%26 = OpLoad  %42  %6
+%34 = OpConvertUToPtr  %44  %25
+OpStore %34 %26
+OpReturn
+OpFunctionEnd
+\ No newline at end of file
diff --git a/ptx/src/test/spirv_run/mod.rs b/ptx/src/test/spirv_run/mod.rs
index f336055..98b9630 100644
--- a/ptx/src/test/spirv_run/mod.rs
+++ b/ptx/src/test/spirv_run/mod.rs
@@ -91,6 +91,7 @@ test_ptx!(constant_f32, [10f32], [5f32]);
 test_ptx!(constant_negative, [-101i32], [101i32]);
 test_ptx!(and, [6u32, 3u32], [2u32]);
 test_ptx!(selp, [100u16, 200u16], [200u16]);
+test_ptx!(fma,  [2f32, 3f32, 5f32], [11f32]);
 
 struct DisplayError<T: Debug> {
     err: T,
diff --git a/ptx/src/translate.rs b/ptx/src/translate.rs
index 9d73742..a7025b1 100644
--- a/ptx/src/translate.rs
+++ b/ptx/src/translate.rs
@@ -2343,7 +2343,9 @@ fn emit_function_body_ops(
                     ast::MulDetails::Unsigned(ref desc) => {
                         emit_mad_uint(builder, map, opencl, desc, arg)?
                     }
-                    ast::MulDetails::Float(desc) => emit_mad_float(builder, map, desc, arg)?,
+                    ast::MulDetails::Float(desc) => {
+                        emit_mad_float(builder, map, opencl, desc, arg)?
+                    }
                 },
                 ast::Instruction::Or(t, a) => {
                     let result_type = map.get_or_add_scalar(builder, ast::ScalarType::from(*t));
@@ -2560,10 +2562,19 @@ fn emit_mad_sint(
 fn emit_mad_float(
     builder: &mut dr::Builder,
     map: &mut TypeWordMap,
+    opencl: spirv::Word,
     desc: &ast::ArithFloat,
     arg: &ast::Arg4<ExpandedArgParams>,
 ) -> Result<(), dr::Error> {
-    todo!()
+    let inst_type = map.get_or_add(builder, SpirvType::from(ast::ScalarType::from(desc.typ)));
+    builder.ext_inst(
+        inst_type,
+        Some(arg.dst),
+        opencl,
+        spirv::CLOp::mad as spirv::Word,
+        [arg.src1, arg.src2, arg.src3],
+    )?;
+    Ok(())
 }
 
 fn emit_add_float(
author	Andrzej Janik <[email protected]>	2020-10-26 23:46:28 +0100
committer	Andrzej Janik <[email protected]>	2020-10-26 23:46:28 +0100
commit	861116f223081528cf1e32f5e1eddb733ac00241 (patch)
tree	44c49ae69754a814cb30f9af916a4030b860f5c6 /ptx
parent	c8dadca7d259996e3eb170f5b34cfb054dba4a11 (diff)
download	ZLUDA-861116f223081528cf1e32f5e1eddb733ac00241.tar.gz ZLUDA-861116f223081528cf1e32f5e1eddb733ac00241.zip