aboutsummaryrefslogtreecommitdiffhomepage
path: root/ptx/src/ptx.lalrpop
diff options
context:
space:
mode:
Diffstat (limited to 'ptx/src/ptx.lalrpop')
-rw-r--r--ptx/src/ptx.lalrpop2198
1 files changed, 0 insertions, 2198 deletions
diff --git a/ptx/src/ptx.lalrpop b/ptx/src/ptx.lalrpop
deleted file mode 100644
index e3a4022..0000000
--- a/ptx/src/ptx.lalrpop
+++ /dev/null
@@ -1,2198 +0,0 @@
-use crate::ast;
-use crate::ast::UnwrapWithVec;
-use crate::{without_none, vector_index};
-
-use lalrpop_util::ParseError;
-use std::convert::TryInto;
-
-grammar<'err>(errors: &'err mut Vec<ParseError<usize, Token<'input>, ast::PtxError>>);
-
-extern {
- type Error = ast::PtxError;
-}
-
-match {
- r"\s+" => { },
- r"//[^\n\r]*[\n\r]*" => { },
- r"/\*[^*]*\*+(?:[^/*][^*]*\*+)*/" => { },
- r"0[fF][0-9a-zA-Z]{8}" => F32NumToken,
- r"0[dD][0-9a-zA-Z]{16}" => F64NumToken,
- r"0[xX][0-9a-zA-Z]+U?" => HexNumToken,
- r"[0-9]+U?" => DecimalNumToken,
- r#""[^"]*""# => String,
- r"[0-9]+\.[0-9]+" => VersionNumber,
- "!",
- "(", ")",
- "+",
- "-",
- ",",
- ".",
- ":",
- ";",
- "@",
- "[", "]",
- "{", "}",
- "<", ">",
- "|",
- "=",
- ".acq_rel",
- ".acquire",
- ".add",
- ".address_size",
- ".align",
- ".aligned",
- ".and",
- ".approx",
- ".b16",
- ".b32",
- ".b64",
- ".b8",
- ".ca",
- ".cas",
- ".cg",
- ".const",
- ".cs",
- ".cta",
- ".cv",
- ".dec",
- ".entry",
- ".eq",
- ".equ",
- ".exch",
- ".extern",
- ".f16",
- ".f16x2",
- ".f32",
- ".f64",
- ".file",
- ".ftz",
- ".full",
- ".func",
- ".ge",
- ".geu",
- ".gl",
- ".global",
- ".gpu",
- ".gt",
- ".gtu",
- ".hi",
- ".hs",
- ".inc",
- ".le",
- ".leu",
- ".lo",
- ".loc",
- ".local",
- ".ls",
- ".lt",
- ".ltu",
- ".lu",
- ".max",
- ".maxnreg",
- ".maxntid",
- ".minnctapersm",
- ".min",
- ".nan",
- ".NaN",
- ".nc",
- ".ne",
- ".neu",
- ".num",
- ".or",
- ".param",
- ".pragma",
- ".pred",
- ".reg",
- ".relaxed",
- ".release",
- ".reqntid",
- ".rm",
- ".rmi",
- ".rn",
- ".rni",
- ".rp",
- ".rpi",
- ".rz",
- ".rzi",
- ".s16",
- ".s32",
- ".s64",
- ".s8" ,
- ".sat",
- ".section",
- ".shared",
- ".sync",
- ".sys",
- ".target",
- ".to",
- ".u16",
- ".u32",
- ".u64",
- ".u8" ,
- ".uni",
- ".v2",
- ".v4",
- ".version",
- ".visible",
- ".volatile",
- ".wb",
- ".weak",
- ".wide",
- ".wt",
- ".xor",
-} else {
- // IF YOU ARE ADDING A NEW TOKEN HERE ALSO ADD IT BELOW TO ExtendedID
- "abs",
- "activemask",
- "add",
- "and",
- "atom",
- "bar",
- "barrier",
- "bfe",
- "bfi",
- "bra",
- "brev",
- "call",
- "clz",
- "cos",
- "cvt",
- "cvta",
- "debug",
- "div",
- "ex2",
- "fma",
- "ld",
- "lg2",
- "mad",
- "map_f64_to_f32",
- "max",
- "membar",
- "min",
- "mov",
- "mul",
- "neg",
- "not",
- "or",
- "popc",
- "prmt",
- "rcp",
- "rem",
- "ret",
- "rsqrt",
- "selp",
- "setp",
- "shl",
- "shr",
- "sin",
- r"sm_[0-9]+" => ShaderModel,
- "sqrt",
- "st",
- "sub",
- "texmode_independent",
- "texmode_unified",
- "xor",
-} else {
- // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#identifiers
- r"[a-zA-Z][a-zA-Z0-9_$]*|[_$%][a-zA-Z0-9_$]+" => ID,
- r"\.[a-zA-Z][a-zA-Z0-9_$]*" => DotID,
-}
-
-ExtendedID : &'input str = {
- "abs",
- "activemask",
- "add",
- "and",
- "atom",
- "bar",
- "barrier",
- "bfe",
- "bfi",
- "bra",
- "brev",
- "call",
- "clz",
- "cos",
- "cvt",
- "cvta",
- "debug",
- "div",
- "ex2",
- "fma",
- "ld",
- "lg2",
- "mad",
- "map_f64_to_f32",
- "max",
- "membar",
- "min",
- "mov",
- "mul",
- "neg",
- "not",
- "or",
- "popc",
- "prmt",
- "rcp",
- "rem",
- "ret",
- "rsqrt",
- "selp",
- "setp",
- "shl",
- "shr",
- "sin",
- ShaderModel,
- "sqrt",
- "st",
- "sub",
- "texmode_independent",
- "texmode_unified",
- "xor",
- ID
-}
-
-NumToken: (&'input str, u32, bool) = {
- <s:HexNumToken> => {
- if s.ends_with('U') {
- (&s[2..s.len() - 1], 16, true)
- } else {
- (&s[2..], 16, false)
- }
- },
- <s:DecimalNumToken> => {
- let radix = if s.starts_with('0') { 8 } else { 10 };
- if s.ends_with('U') {
- (&s[..s.len() - 1], radix, true)
- } else {
- (s, radix, false)
- }
- }
-}
-
-F32Num: f32 = {
- <s:F32NumToken> => {
- match u32::from_str_radix(&s[2..], 16) {
- Ok(x) => unsafe { std::mem::transmute::<_, f32>(x) },
- Err(err) => {
- errors.push(ParseError::User { error: ast::PtxError::from(err) });
- 0.0
- }
- }
-
- }
-}
-
-F64Num: f64 = {
- <s:F64NumToken> => {
- match u64::from_str_radix(&s[2..], 16) {
- Ok(x) => unsafe { std::mem::transmute::<_, f64>(x) },
- Err(err) => {
- errors.push(ParseError::User { error: ast::PtxError::from(err) });
- 0.0
- }
- }
- }
-}
-
-U8Num: u8 = {
- <x:NumToken> => {
- let (text, radix, _) = x;
- match u8::from_str_radix(text, radix) {
- Ok(x) => x,
- Err(err) => {
- errors.push(ParseError::User { error: ast::PtxError::from(err) });
- 0
- }
- }
- }
-}
-
-U16Num: u16 = {
- <x:NumToken> => {
- let (text, radix, _) = x;
- match u16::from_str_radix(text, radix) {
- Ok(x) => x,
- Err(err) => {
- errors.push(ParseError::User { error: ast::PtxError::from(err) });
- 0
- }
- }
- }
-}
-
-U32Num: u32 = {
- <x:NumToken> => {
- let (text, radix, _) = x;
- match u32::from_str_radix(text, radix) {
- Ok(x) => x,
- Err(err) => {
- errors.push(ParseError::User { error: ast::PtxError::from(err) });
- 0
- }
- }
- }
-}
-
-// TODO: handle negative number properly
-S32Num: i32 = {
- <sign:"-"?> <x:NumToken> => {
- let (text, radix, _) = x;
- match i32::from_str_radix(text, radix) {
- Ok(x) => if sign.is_some() { -x } else { x },
- Err(err) => {
- errors.push(ParseError::User { error: ast::PtxError::from(err) });
- 0
- }
- }
- }
-}
-
-pub Module: ast::Module<'input> = {
- <v:Version> Target <d:Directive*> => {
- ast::Module { version: v, directives: without_none(d) }
- }
-};
-
-Version: (u8, u8) = {
- ".version" <v:VersionNumber> => {
- let dot = v.find('.').unwrap();
- let major = v[..dot].parse::<u8>().unwrap_or_else(|err| {
- errors.push(ParseError::User { error: ast::PtxError::from(err) });
- 0
- });
- let minor = v[dot+1..].parse::<u8>().unwrap_or_else(|err| {
- errors.push(ParseError::User { error: ast::PtxError::from(err) });
- 0
- });
- (major,minor)
- }
-}
-
-// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#ptx-module-directives-target
-Target = {
- ".target" Comma<TargetSpecifier>
-};
-
-TargetSpecifier = {
- ShaderModel,
- "texmode_unified",
- "texmode_independent",
- "debug",
- "map_f64_to_f32"
-};
-
-Directive: Option<ast::Directive<'input, ast::ParsedArgParams<'input>>> = {
- AddressSize => None,
- <f:Function> => {
- let (linking, func) = f;
- Some(ast::Directive::Method(linking, func))
- },
- File => None,
- Section => None,
- <v:ModuleVariable> ";" => {
- let (linking, var) = v;
- Some(ast::Directive::Variable(linking, var))
- },
- @L ! @R => {
- let (start, _, end)= (<>);
- errors.push(ParseError::User { error:
- ast::PtxError::UnrecognizedDirective { start, end }
- });
- None
- }
-};
-
-AddressSize = {
- ".address_size" U8Num
-};
-
-Function: (ast::LinkingDirective, ast::Function<'input, &'input str, ast::Statement<ast::ParsedArgParams<'input>>>) = {
- <linking:LinkingDirectives>
- <func_directive:MethodDeclaration>
- <tuning:TuningDirective*>
- <body:FunctionBody> => {
- (linking, ast::Function{func_directive, tuning, body})
- }
-};
-
-LinkingDirective: ast::LinkingDirective = {
- ".extern" => ast::LinkingDirective::EXTERN,
- ".visible" => ast::LinkingDirective::VISIBLE,
- ".weak" => ast::LinkingDirective::WEAK,
-};
-
-TuningDirective: ast::TuningDirective = {
- ".maxnreg" <ncta:U32Num> => ast::TuningDirective::MaxNReg(ncta),
- ".maxntid" <nx:U32Num> => ast::TuningDirective::MaxNtid(nx, 1, 1),
- ".maxntid" <nx:U32Num> "," <ny:U32Num> => ast::TuningDirective::MaxNtid(nx, ny, 1),
- ".maxntid" <nx:U32Num> "," <ny:U32Num> "," <nz:U32Num> => ast::TuningDirective::MaxNtid(nx, ny, nz),
- ".reqntid" <nx:U32Num> => ast::TuningDirective::ReqNtid(nx, 1, 1),
- ".reqntid" <nx:U32Num> "," <ny:U32Num> => ast::TuningDirective::ReqNtid(nx, ny, 1),
- ".reqntid" <nx:U32Num> "," <ny:U32Num> "," <nz:U32Num> => ast::TuningDirective::ReqNtid(nx, ny, nz),
- ".minnctapersm" <ncta:U32Num> => ast::TuningDirective::MinNCtaPerSm(ncta),
-};
-
-LinkingDirectives: ast::LinkingDirective = {
- <ldirs:LinkingDirective*> => {
- ldirs.into_iter().fold(ast::LinkingDirective::NONE, |x, y| x | y)
- }
-}
-
-MethodDeclaration: ast::MethodDeclaration<'input, &'input str> = {
- ".entry" <name:ExtendedID> <input_arguments:KernelArguments> => {
- let return_arguments = Vec::new();
- let name = ast::MethodName::Kernel(name);
- ast::MethodDeclaration{ return_arguments, name, input_arguments, shared_mem: None }
- },
- ".func" <return_arguments:FnArguments?> <name:ExtendedID> <input_arguments:FnArguments> => {
- let return_arguments = return_arguments.unwrap_or_else(|| Vec::new());
- let name = ast::MethodName::Func(name);
- ast::MethodDeclaration{ return_arguments, name, input_arguments, shared_mem: None }
- }
-};
-
-KernelArguments: Vec<ast::Variable<&'input str>> = {
- "(" <args:Comma<KernelInput>> ")" => args
-};
-
-FnArguments: Vec<ast::Variable<&'input str>> = {
- "(" <args:Comma<FnInput>> ")" => args
-};
-
-KernelInput: ast::Variable<&'input str> = {
- <v:ParamDeclaration> => {
- let (align, v_type, name) = v;
- ast::Variable {
- align,
- v_type,
- state_space: ast::StateSpace::Param,
- name,
- array_init: Vec::new()
- }
- }
-}
-
-FnInput: ast::Variable<&'input str> = {
- <v:RegVariable> => {
- let (align, v_type, name) = v;
- let state_space = ast::StateSpace::Reg;
- ast::Variable{ align, v_type, state_space, name, array_init: Vec::new() }
- },
- <v:ParamDeclaration> => {
- let (align, v_type, name) = v;
- let state_space = ast::StateSpace::Param;
- ast::Variable{ align, v_type, state_space, name, array_init: Vec::new() }
- }
-}
-
-FunctionBody: Option<Vec<ast::Statement<ast::ParsedArgParams<'input>>>> = {
- "{" <s:Statement*> "}" => { Some(without_none(s)) },
- ";" => { None }
-};
-
-StateSpaceSpecifier: ast::StateSpace = {
- ".reg" => ast::StateSpace::Reg,
- ".const" => ast::StateSpace::Const,
- ".global" => ast::StateSpace::Global,
- ".local" => ast::StateSpace::Local,
- ".shared" => ast::StateSpace::Shared,
- ".param" => ast::StateSpace::Param, // used to prepare function call
-};
-
-#[inline]
-ScalarType: ast::ScalarType = {
- ".f16" => ast::ScalarType::F16,
- ".f16x2" => ast::ScalarType::F16x2,
- ".pred" => ast::ScalarType::Pred,
- ".b8" => ast::ScalarType::B8,
- ".b16" => ast::ScalarType::B16,
- ".b32" => ast::ScalarType::B32,
- ".b64" => ast::ScalarType::B64,
- ".u8" => ast::ScalarType::U8,
- ".u16" => ast::ScalarType::U16,
- ".u32" => ast::ScalarType::U32,
- ".u64" => ast::ScalarType::U64,
- ".s8" => ast::ScalarType::S8,
- ".s16" => ast::ScalarType::S16,
- ".s32" => ast::ScalarType::S32,
- ".s64" => ast::ScalarType::S64,
- ".f32" => ast::ScalarType::F32,
- ".f64" => ast::ScalarType::F64,
-};
-
-Statement: Option<ast::Statement<ast::ParsedArgParams<'input>>> = {
- <l:Label> => Some(ast::Statement::Label(l)),
- DebugDirective => None,
- <v:MultiVariable> ";" => Some(ast::Statement::Variable(v)),
- <p:PredAt?> <i:Instruction> ";" => Some(ast::Statement::Instruction(p, i)),
- PragmaStatement => None,
- "{" <s:Statement*> "}" => Some(ast::Statement::Block(without_none(s))),
- @L ! ";" @R => {
- let (start, _, _, end) = (<>);
- errors.push(ParseError::User { error:
- ast::PtxError::UnrecognizedStatement { start, end }
- });
- None
- }
-};
-
-PragmaStatement: () = {
- ".pragma" String ";"
-}
-
-DebugDirective: () = {
- DebugLocation
-};
-
-// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#debugging-directives-loc
-DebugLocation = {
- ".loc" U32Num U32Num U32Num
-};
-
-Label: &'input str = {
- <id:ExtendedID> ":" => id
-};
-
-Align: u32 = {
- ".align" <x:U32Num> => x
-};
-
-// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parameterized-variable-names
-MultiVariable: ast::MultiVariable<&'input str> = {
- <var:Variable> <count:VariableParam?> => ast::MultiVariable{<>}
-}
-
-VariableParam: u32 = {
- "<" <n:U32Num> ">" => n
-}
-
-Variable: ast::Variable<&'input str> = {
- <v:RegVariable> => {
- let (align, v_type, name) = v;
- let state_space = ast::StateSpace::Reg;
- ast::Variable {align, v_type, state_space, name, array_init: Vec::new()}
- },
- LocalVariable,
- <v:ParamVariable> => {
- let (align, array_init, v_type, name) = v;
- let state_space = ast::StateSpace::Param;
- ast::Variable {align, v_type, state_space, name, array_init}
- },
- SharedVariable,
-};
-
-RegVariable: (Option<u32>, ast::Type, &'input str) = {
- ".reg" <var:VariableScalar<ScalarType>> => {
- let (align, t, name) = var;
- let v_type = ast::Type::Scalar(t);
- (align, v_type, name)
- },
- ".reg" <var:VariableVector<SizedScalarType>> => {
- let (align, v_len, t, name) = var;
- let v_type = ast::Type::Vector(t, v_len);
- (align, v_type, name)
- }
-}
-
-LocalVariable: ast::Variable<&'input str> = {
- ".local" <var:VariableScalar<SizedScalarType>> => {
- let (align, t, name) = var;
- let v_type = ast::Type::Scalar(t);
- let state_space = ast::StateSpace::Local;
- ast::Variable { align, v_type, state_space, name, array_init: Vec::new() }
- },
- ".local" <var:VariableVector<SizedScalarType>> => {
- let (align, v_len, t, name) = var;
- let v_type = ast::Type::Vector(t, v_len);
- let state_space = ast::StateSpace::Local;
- ast::Variable { align, v_type, state_space, name, array_init: Vec::new() }
- },
- ".local" <var:VariableArrayOrPointer<SizedScalarType>> => {
- let (align, t, name, arr_or_ptr) = var;
- let state_space = ast::StateSpace::Local;
- let (v_type, array_init) = match arr_or_ptr {
- ast::ArrayOrPointer::Array { dimensions, init } => {
- (ast::Type::Array(t, dimensions), init)
- }
- ast::ArrayOrPointer::Pointer => {
- errors.push(ParseError::User { error: ast::PtxError::ZeroDimensionArray });
- (ast::Type::Array(t, Vec::new()), Vec::new())
- }
- };
- ast::Variable { align, v_type, state_space, name, array_init }
- }
-}
-
-SharedVariable: ast::Variable<&'input str> = {
- ".shared" <var:VariableScalar<SizedScalarType>> => {
- let (align, t, name) = var;
- let state_space = ast::StateSpace::Shared;
- let v_type = ast::Type::Scalar(t);
- ast::Variable { align, v_type, state_space, name, array_init: Vec::new() }
- },
- ".shared" <var:VariableVector<SizedScalarType>> => {
- let (align, v_len, t, name) = var;
- let state_space = ast::StateSpace::Shared;
- let v_type = ast::Type::Vector(t, v_len);
- ast::Variable { align, v_type, state_space, name, array_init: Vec::new() }
- },
- ".shared" <var:VariableArrayOrPointer<SizedScalarType>> => {
- let (align, t, name, arr_or_ptr) = var;
- let state_space = ast::StateSpace::Shared;
- let (v_type, array_init) = match arr_or_ptr {
- ast::ArrayOrPointer::Array { dimensions, init } => {
- (ast::Type::Array(t, dimensions), init)
- }
- ast::ArrayOrPointer::Pointer => {
- errors.push(ParseError::User { error: ast::PtxError::ZeroDimensionArray });
- (ast::Type::Array(t, Vec::new()), Vec::new())
- }
- };
- ast::Variable { align, v_type, state_space, name, array_init }
- }
-}
-
-ModuleVariable: (ast::LinkingDirective, ast::Variable<&'input str>) = {
- <linking:LinkingDirectives> <state_space:VariableStateSpace> <def:GlobalVariableDefinitionNoArray> => {
- let (align, v_type, name, array_init) = def;
- (linking, ast::Variable { align, v_type, state_space, name, array_init })
- },
- <linking:LinkingDirectives> <space:VariableStateSpace> <var:VariableArrayOrPointer<SizedScalarType>> => {
- let (align, t, name, arr_or_ptr) = var;
- let (v_type, state_space, array_init) = match arr_or_ptr {
- ast::ArrayOrPointer::Array { dimensions, init } => {
- (ast::Type::Array(t, dimensions), space, init)
- }
- ast::ArrayOrPointer::Pointer => {
- if !linking.contains(ast::LinkingDirective::EXTERN) {
- errors.push(ParseError::User { error: ast::PtxError::NonExternPointer });
- }
- (ast::Type::Array(t, Vec::new()), space, Vec::new())
- }
- };
- (linking, ast::Variable{ align, v_type, state_space, name, array_init })
- }
-}
-
-VariableStateSpace: ast::StateSpace = {
- ".const" => ast::StateSpace::Const,
- ".global" => ast::StateSpace::Global,
- ".shared" => ast::StateSpace::Shared,
-};
-
-// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parameter-state-space
-ParamVariable: (Option<u32>, Vec<u8>, ast::Type, &'input str) = {
- ".param" <var:VariableScalar<LdStScalarType>> => {
- let (align, t, name) = var;
- let v_type = ast::Type::Scalar(t);
- (align, Vec::new(), v_type, name)
- },
- ".param" <var:VariableArrayOrPointer<SizedScalarType>> => {
- let (align, t, name, arr_or_ptr) = var;
- let (v_type, array_init) = match arr_or_ptr {
- ast::ArrayOrPointer::Array { dimensions, init } => {
- (ast::Type::Array(t, dimensions), init)
- }
- ast::ArrayOrPointer::Pointer => {
- (ast::Type::Scalar(t), Vec::new())
- }
- };
- (align, array_init, v_type, name)
- }
-}
-
-ParamDeclaration: (Option<u32>, ast::Type, &'input str) = {
- <var:ParamVariable> => {
- let (align, array_init, v_type, name) = var;
- if array_init.len() > 0 {
- errors.push(ParseError::User { error: ast::PtxError::ArrayInitalizer });
- }
- (align, v_type, name)
- }
-}
-
-GlobalVariableDefinitionNoArray: (Option<u32>, ast::Type, &'input str, Vec<u8>) = {
- <scalar:VariableScalar<SizedScalarType>> => {
- let (align, t, name) = scalar;
- let v_type = ast::Type::Scalar(t);
- (align, v_type, name, Vec::new())
- },
- <var:VariableVector<SizedScalarType>> => {
- let (align, v_len, t, name) = var;
- let v_type = ast::Type::Vector(t, v_len);
- (align, v_type, name, Vec::new())
- },
-}
-
-#[inline]
-SizedScalarType: ast::ScalarType = {
- ".b8" => ast::ScalarType::B8,
- ".b16" => ast::ScalarType::B16,
- ".b32" => ast::ScalarType::B32,
- ".b64" => ast::ScalarType::B64,
- ".u8" => ast::ScalarType::U8,
- ".u16" => ast::ScalarType::U16,
- ".u32" => ast::ScalarType::U32,
- ".u64" => ast::ScalarType::U64,
- ".s8" => ast::ScalarType::S8,
- ".s16" => ast::ScalarType::S16,
- ".s32" => ast::ScalarType::S32,
- ".s64" => ast::ScalarType::S64,
- ".f16" => ast::ScalarType::F16,
- ".f16x2" => ast::ScalarType::F16x2,
- ".f32" => ast::ScalarType::F32,
- ".f64" => ast::ScalarType::F64,
-}
-
-#[inline]
-LdStScalarType: ast::ScalarType = {
- ".b8" => ast::ScalarType::B8,
- ".b16" => ast::ScalarType::B16,
- ".b32" => ast::ScalarType::B32,
- ".b64" => ast::ScalarType::B64,
- ".u8" => ast::ScalarType::U8,
- ".u16" => ast::ScalarType::U16,
- ".u32" => ast::ScalarType::U32,
- ".u64" => ast::ScalarType::U64,
- ".s8" => ast::ScalarType::S8,
- ".s16" => ast::ScalarType::S16,
- ".s32" => ast::ScalarType::S32,
- ".s64" => ast::ScalarType::S64,
- ".f16" => ast::ScalarType::F16,
- ".f32" => ast::ScalarType::F32,
- ".f64" => ast::ScalarType::F64,
-}
-
-Instruction: ast::Instruction<ast::ParsedArgParams<'input>> = {
- InstLd,
- InstMov,
- InstMul,
- InstAdd,
- InstSetp,
- InstNot,
- InstBra,
- InstCvt,
- InstShl,
- InstShr,
- InstSt,
- InstRet,
- InstCvta,
- InstCall,
- InstAbs,
- InstMad,
- InstFma,
- InstOr,
- InstAnd,
- InstSub,
- InstMin,
- InstMax,
- InstRcp,
- InstSelp,
- InstBar,
- InstAtom,
- InstAtomCas,
- InstDiv,
- InstSqrt,
- InstRsqrt,
- InstNeg,
- InstSin,
- InstCos,
- InstLg2,
- InstEx2,
- InstClz,
- InstBrev,
- InstPopc,
- InstXor,
- InstRem,
- InstBfe,
- InstBfi,
- InstPrmt,
- InstActivemask,
- InstMembar,
-};
-
-// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-ld
-InstLd: ast::Instruction<ast::ParsedArgParams<'input>> = {
- "ld" <q:LdStQualifier?> <ss:LdNonGlobalStateSpace?> <cop:LdCacheOperator?> <t:LdStType> <dst:DstOperandVec> "," <src:MemoryOperand> => {
- ast::Instruction::Ld(
- ast::LdDetails {
- qualifier: q.unwrap_or(ast::LdStQualifier::Weak),
- state_space: ss.unwrap_or(ast::StateSpace::Generic),
- caching: cop.unwrap_or(ast::LdCacheOperator::Cached),
- typ: t,
- non_coherent: false
- },
- ast::Arg2Ld { dst:dst, src:src }
- )
- },
- "ld" <q:LdStQualifier?> ".global" <cop:LdCacheOperator?> <t:LdStType> <dst:DstOperandVec> "," <src:MemoryOperand> => {
- ast::Instruction::Ld(
- ast::LdDetails {
- qualifier: q.unwrap_or(ast::LdStQualifier::Weak),
- state_space: ast::StateSpace::Global,
- caching: cop.unwrap_or(ast::LdCacheOperator::Cached),
- typ: t,
- non_coherent: false
- },
- ast::Arg2Ld { dst:dst, src:src }
- )
- },
- "ld" ".global" <cop:LdNcCacheOperator?> ".nc" <t:LdStType> <dst:DstOperandVec> "," <src:MemoryOperand> => {
- ast::Instruction::Ld(
- ast::LdDetails {
- qualifier: ast::LdStQualifier::Weak,
- state_space: ast::StateSpace::Global,
- caching: cop.unwrap_or(ast::LdCacheOperator::Cached),
- typ: t,
- non_coherent: true
- },
- ast::Arg2Ld { dst:dst, src:src }
- )
- }
-};
-
-LdStType: ast::Type = {
- <v:VectorPrefix> <t:LdStScalarType> => ast::Type::Vector(t, v),
- <t:LdStScalarType> => ast::Type::Scalar(t),
-}
-
-LdStQualifier: ast::LdStQualifier = {
- ".weak" => ast::LdStQualifier::Weak,
- ".volatile" => ast::LdStQualifier::Volatile,
- ".relaxed" <s:MemScope> => ast::LdStQualifier::Relaxed(s),
- ".acquire" <s:MemScope> => ast::LdStQualifier::Acquire(s),
-};
-
-MemScope: ast::MemScope = {
- ".cta" => ast::MemScope::Cta,
- ".gpu" => ast::MemScope::Gpu,
- ".sys" => ast::MemScope::Sys
-};
-
-MembarLevel: ast::MemScope = {
- ".cta" => ast::MemScope::Cta,
- ".gl" => ast::MemScope::Gpu,
- ".sys" => ast::MemScope::Sys
-};
-
-LdNonGlobalStateSpace: ast::StateSpace = {
- ".const" => ast::StateSpace::Const,
- ".local" => ast::StateSpace::Local,
- ".param" => ast::StateSpace::Param,
- ".shared" => ast::StateSpace::Shared,
-};
-
-LdCacheOperator: ast::LdCacheOperator = {
- ".ca" => ast::LdCacheOperator::Cached,
- ".cg" => ast::LdCacheOperator::L2Only,
- ".cs" => ast::LdCacheOperator::Streaming,
- ".lu" => ast::LdCacheOperator::LastUse,
- ".cv" => ast::LdCacheOperator::Uncached,
-};
-
-LdNcCacheOperator: ast::LdCacheOperator = {
- ".ca" => ast::LdCacheOperator::Cached,
- ".cg" => ast::LdCacheOperator::L2Only,
- ".cs" => ast::LdCacheOperator::Streaming,
-};
-
-// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-mov
-InstMov: ast::Instruction<ast::ParsedArgParams<'input>> = {
- "mov" <pref:VectorPrefix?> <t:MovScalarType> <dst:DstOperandVec> "," <src:SrcOperandVec> => {
- let mov_type = match pref {
- Some(vec_width) => ast::Type::Vector(t, vec_width),
- None => ast::Type::Scalar(t)
- };
- let details = ast::MovDetails::new(mov_type);
- ast::Instruction::Mov(
- details,
- ast::Arg2Mov { dst, src }
- )
- }
-}
-
-#[inline]
-MovScalarType: ast::ScalarType = {
- ".b16" => ast::ScalarType::B16,
- ".b32" => ast::ScalarType::B32,
- ".b64" => ast::ScalarType::B64,
- ".u16" => ast::ScalarType::U16,
- ".u32" => ast::ScalarType::U32,
- ".u64" => ast::ScalarType::U64,
- ".s16" => ast::ScalarType::S16,
- ".s32" => ast::ScalarType::S32,
- ".s64" => ast::ScalarType::S64,
- ".f32" => ast::ScalarType::F32,
- ".f64" => ast::ScalarType::F64,
- ".pred" => ast::ScalarType::Pred
-};
-
-// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-mul
-// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-mul
-// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#half-precision-floating-point-instructions-mul
-InstMul: ast::Instruction<ast::ParsedArgParams<'input>> = {
- "mul" <d:MulDetails> <a:Arg3> => ast::Instruction::Mul(d, a)
-};
-
-MulDetails: ast::MulDetails = {
- <ctr:MulIntControl> <t:UIntType> => ast::MulDetails::Unsigned(ast::MulUInt{
- typ: t,
- control: ctr
- }),
- <ctr:MulIntControl> <t:SIntType> => ast::MulDetails::Signed(ast::MulSInt{
- typ: t,
- control: ctr
- }),
- <f:ArithFloat> => ast::MulDetails::Float(f)
-};
-
-MulIntControl: ast::MulIntControl = {
- ".hi" => ast::MulIntControl::High,
- ".lo" => ast::MulIntControl::Low,
- ".wide" => ast::MulIntControl::Wide
-};
-
-#[inline]
-RoundingModeFloat : ast::RoundingMode = {
- ".rn" => ast::RoundingMode::NearestEven,
- ".rz" => ast::RoundingMode::Zero,
- ".rm" => ast::RoundingMode::NegativeInf,
- ".rp" => ast::RoundingMode::PositiveInf,
-};
-
-RoundingModeInt : ast::RoundingMode = {
- ".rni" => ast::RoundingMode::NearestEven,
- ".rzi" => ast::RoundingMode::Zero,
- ".rmi" => ast::RoundingMode::NegativeInf,
- ".rpi" => ast::RoundingMode::PositiveInf,
-};
-
-IntType : ast::ScalarType = {
- ".u16" => ast::ScalarType::U16,
- ".u32" => ast::ScalarType::U32,
- ".u64" => ast::ScalarType::U64,
- ".s16" => ast::ScalarType::S16,
- ".s32" => ast::ScalarType::S32,
- ".s64" => ast::ScalarType::S64,
-};
-
-IntType3264: ast::ScalarType = {
- ".u32" => ast::ScalarType::U32,
- ".u64" => ast::ScalarType::U64,
- ".s32" => ast::ScalarType::S32,
- ".s64" => ast::ScalarType::S64,
-}
-
-UIntType: ast::ScalarType = {
- ".u16" => ast::ScalarType::U16,
- ".u32" => ast::ScalarType::U32,
- ".u64" => ast::ScalarType::U64,
-};
-
-SIntType: ast::ScalarType = {
- ".s16" => ast::ScalarType::S16,
- ".s32" => ast::ScalarType::S32,
- ".s64" => ast::ScalarType::S64,
-};
-
-FloatType: ast::ScalarType = {
- ".f16" => ast::ScalarType::F16,
- ".f16x2" => ast::ScalarType::F16x2,
- ".f32" => ast::ScalarType::F32,
- ".f64" => ast::ScalarType::F64,
-};
-
-// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-add
-// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-add
-// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#half-precision-floating-point-instructions-add
-InstAdd: ast::Instruction<ast::ParsedArgParams<'input>> = {
- "add" <d:ArithDetails> <a:Arg3> => ast::Instruction::Add(d, a)
-};
-
-// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#comparison-and-selection-instructions-setp
-// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#half-precision-comparison-instructions-setp
-// TODO: support f16 setp
-InstSetp: ast::Instruction<ast::ParsedArgParams<'input>> = {
- "setp" <d:SetpMode> <a:Arg4Setp> => ast::Instruction::Setp(d, a),
- "setp" <d:SetpBoolMode> <a:Arg5Setp> => ast::Instruction::SetpBool(d, a),
-};
-
-SetpMode: ast::SetpData = {
- <cmp_op:SetpCompareOp> <t:SetpTypeNoF32> => ast::SetpData {
- typ: t,
- flush_to_zero: None,
- cmp_op: cmp_op,
- },
- <cmp_op:SetpCompareOp> <ftz:".ftz"?> ".f32" => ast::SetpData {
- typ: ast::ScalarType::F32,
- flush_to_zero: Some(ftz.is_some()),
- cmp_op: cmp_op,
- }
-
-};
-
-SetpBoolMode: ast::SetpBoolData = {
- <cmp_op:SetpCompareOp> <bool_op:SetpBoolPostOp> <t:SetpTypeNoF32> => ast::SetpBoolData {
- typ: t,
- flush_to_zero: None,
- cmp_op: cmp_op,
- bool_op: bool_op,
- },
- <cmp_op:SetpCompareOp> <bool_op:SetpBoolPostOp> <ftz:".ftz"?> ".f32" => ast::SetpBoolData {
- typ: ast::ScalarType::F32,
- flush_to_zero: Some(ftz.is_some()),
- cmp_op: cmp_op,
- bool_op: bool_op,
- }
-};
-
-SetpCompareOp: ast::SetpCompareOp = {
- ".eq" => ast::SetpCompareOp::Eq,
- ".ne" => ast::SetpCompareOp::NotEq,
- ".lt" => ast::SetpCompareOp::Less,
- ".le" => ast::SetpCompareOp::LessOrEq,
- ".gt" => ast::SetpCompareOp::Greater,
- ".ge" => ast::SetpCompareOp::GreaterOrEq,
- ".lo" => ast::SetpCompareOp::Less,
- ".ls" => ast::SetpCompareOp::LessOrEq,
- ".hi" => ast::SetpCompareOp::Greater,
- ".hs" => ast::SetpCompareOp::GreaterOrEq,
- ".equ" => ast::SetpCompareOp::NanEq,
- ".neu" => ast::SetpCompareOp::NanNotEq,
- ".ltu" => ast::SetpCompareOp::NanLess,
- ".leu" => ast::SetpCompareOp::NanLessOrEq,
- ".gtu" => ast::SetpCompareOp::NanGreater,
- ".geu" => ast::SetpCompareOp::NanGreaterOrEq,
- ".num" => ast::SetpCompareOp::IsNotNan,
- ".nan" => ast::SetpCompareOp::IsAnyNan,
-};
-
-SetpBoolPostOp: ast::SetpBoolPostOp = {
- ".and" => ast::SetpBoolPostOp::And,
- ".or" => ast::SetpBoolPostOp::Or,
- ".xor" => ast::SetpBoolPostOp::Xor,
-};
-
-SetpTypeNoF32: ast::ScalarType = {
- ".b16" => ast::ScalarType::B16,
- ".b32" => ast::ScalarType::B32,
- ".b64" => ast::ScalarType::B64,
- ".u16" => ast::ScalarType::U16,
- ".u32" => ast::ScalarType::U32,
- ".u64" => ast::ScalarType::U64,
- ".s16" => ast::ScalarType::S16,
- ".s32" => ast::ScalarType::S32,
- ".s64" => ast::ScalarType::S64,
- ".f64" => ast::ScalarType::F64,
-};
-
-// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#logic-and-shift-instructions-not
-InstNot: ast::Instruction<ast::ParsedArgParams<'input>> = {
- "not" <t:BooleanType> <a:Arg2> => ast::Instruction::Not(t, a)
-};
-
-BooleanType: ast::ScalarType = {
- ".pred" => ast::ScalarType::Pred,
- ".b16" => ast::ScalarType::B16,
- ".b32" => ast::ScalarType::B32,
- ".b64" => ast::ScalarType::B64,
-};
-
-// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#control-flow-instructions-at
-PredAt: ast::PredAt<&'input str> = {
- "@" <label:ExtendedID> => ast::PredAt { not: false, label:label },
- "@" "!" <label:ExtendedID> => ast::PredAt { not: true, label:label }
-};
-
-// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#control-flow-instructions-bra
-InstBra: ast::Instruction<ast::ParsedArgParams<'input>> = {
- "bra" <u:".uni"?> <a:Arg1> => ast::Instruction::Bra(ast::BraData{ uniform: u.is_some() }, a)
-};
-
-// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cvt
-InstCvt: ast::Instruction<ast::ParsedArgParams<'input>> = {
- "cvt" <s:".sat"?> <dst_t:CvtTypeInt> <src_t:CvtTypeInt> <a:Arg2> => {
- ast::Instruction::Cvt(ast::CvtDetails::new_int_from_int_checked(
- s.is_some(),
- dst_t,
- src_t,
- errors
- ),
- a)
- },
- "cvt" <r:RoundingModeFloat> <f:".ftz"?> <s:".sat"?> <dst_t:CvtTypeFloat> <src_t:CvtTypeInt> <a:Arg2> => {
- ast::Instruction::Cvt(ast::CvtDetails::new_float_from_int_checked(
- r,
- f.is_some(),
- s.is_some(),
- dst_t,
- src_t,
- errors
- ),
- a)
- },
- "cvt" <r:RoundingModeInt> <f:".ftz"?> <s:".sat"?> <dst_t:CvtTypeInt> <src_t:CvtTypeFloat> <a:Arg2> => {
- ast::Instruction::Cvt(ast::CvtDetails::new_int_from_float_checked(
- r,
- f.is_some(),
- s.is_some(),
- dst_t,
- src_t,
- errors
- ),
- a)
- },
- "cvt" <r:RoundingModeInt?> <s:".sat"?> ".f16" ".f16" <a:Arg2> => {
- ast::Instruction::Cvt(ast::CvtDetails::FloatFromFloat(
- ast::CvtDesc {
- rounding: r,
- flush_to_zero: None,
- saturate: s.is_some(),
- dst: ast::ScalarType::F16,
- src: ast::ScalarType::F16
- }
- ), a)
- },
- "cvt" <f:".ftz"?> <s:".sat"?> ".f32" ".f16" <a:Arg2> => {
- ast::Instruction::Cvt(ast::CvtDetails::FloatFromFloat(
- ast::CvtDesc {
- rounding: None,
- flush_to_zero: Some(f.is_some()),
- saturate: s.is_some(),
- dst: ast::ScalarType::F32,
- src: ast::ScalarType::F16
- }
- ), a)
- },
- "cvt" <s:".sat"?> ".f64" ".f16" <a:Arg2> => {
- ast::Instruction::Cvt(ast::CvtDetails::FloatFromFloat(
- ast::CvtDesc {
- rounding: None,
- flush_to_zero: None,
- saturate: s.is_some(),
- dst: ast::ScalarType::F64,
- src: ast::ScalarType::F16
- }
- ), a)
- },
- "cvt" <r:RoundingModeFloat> <f:".ftz"?> <s:".sat"?> ".f16" ".f32" <a:Arg2> => {
- ast::Instruction::Cvt(ast::CvtDetails::FloatFromFloat(
- ast::CvtDesc {
- rounding: Some(r),
- flush_to_zero: Some(f.is_some()),
- saturate: s.is_some(),
- dst: ast::ScalarType::F16,
- src: ast::ScalarType::F32
- }
- ), a)
- },
- "cvt" <r:RoundingModeInt?> <f:".ftz"?> <s:".sat"?> ".f32" ".f32" <a:Arg2> => {
- ast::Instruction::Cvt(ast::CvtDetails::FloatFromFloat(
- ast::CvtDesc {
- rounding: r,
- flush_to_zero: Some(f.is_some()),
- saturate: s.is_some(),
- dst: ast::ScalarType::F32,
- src: ast::ScalarType::F32
- }
- ), a)
- },
- "cvt" <s:".sat"?> <f:".ftz"?> ".f64" ".f32" <a:Arg2> => {
- ast::Instruction::Cvt(ast::CvtDetails::FloatFromFloat(
- ast::CvtDesc {
- rounding: None,
- flush_to_zero: Some(f.is_some()),
- saturate: s.is_some(),
- dst: ast::ScalarType::F64,
- src: ast::ScalarType::F32
- }
- ), a)
- },
- "cvt" <r:RoundingModeFloat> <s:".sat"?> ".f16" ".f64" <a:Arg2> => {
- ast::Instruction::Cvt(ast::CvtDetails::FloatFromFloat(
- ast::CvtDesc {
- rounding: Some(r),
- flush_to_zero: None,
- saturate: s.is_some(),
- dst: ast::ScalarType::F16,
- src: ast::ScalarType::F64
- }
- ), a)
- },
- "cvt" <r:RoundingModeFloat> <f:".ftz"?> <s:".sat"?> ".f32" ".f64" <a:Arg2> => {
- ast::Instruction::Cvt(ast::CvtDetails::FloatFromFloat(
- ast::CvtDesc {
- rounding: Some(r),
- flush_to_zero: Some(s.is_some()),
- saturate: s.is_some(),
- dst: ast::ScalarType::F32,
- src: ast::ScalarType::F64
- }
- ), a)
- },
- "cvt" <r:RoundingModeInt?> <s:".sat"?> ".f64" ".f64" <a:Arg2> => {
- ast::Instruction::Cvt(ast::CvtDetails::FloatFromFloat(
- ast::CvtDesc {
- rounding: r,
- flush_to_zero: None,
- saturate: s.is_some(),
- dst: ast::ScalarType::F64,
- src: ast::ScalarType::F64
- }
- ), a)
- },
-};
-
-CvtTypeInt: ast::ScalarType = {
- ".u8" => ast::ScalarType::U8,
- ".u16" => ast::ScalarType::U16,
- ".u32" => ast::ScalarType::U32,
- ".u64" => ast::ScalarType::U64,
- ".s8" => ast::ScalarType::S8,
- ".s16" => ast::ScalarType::S16,
- ".s32" => ast::ScalarType::S32,
- ".s64" => ast::ScalarType::S64,
-};
-
-CvtTypeFloat: ast::ScalarType = {
- ".f16" => ast::ScalarType::F16,
- ".f32" => ast::ScalarType::F32,
- ".f64" => ast::ScalarType::F64,
-};
-
-// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#logic-and-shift-instructions-shl
-InstShl: ast::Instruction<ast::ParsedArgParams<'input>> = {
- "shl" <t:ShlType> <a:Arg3> => ast::Instruction::Shl(t, a)
-};
-
-ShlType: ast::ScalarType = {
- ".b16" => ast::ScalarType::B16,
- ".b32" => ast::ScalarType::B32,
- ".b64" => ast::ScalarType::B64,
-};
-
-// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#logic-and-shift-instructions-shr
-InstShr: ast::Instruction<ast::ParsedArgParams<'input>> = {
- "shr" <t:ShrType> <a:Arg3> => ast::Instruction::Shr(t, a)
-};
-
-ShrType: ast::ScalarType = {
- ".b16" => ast::ScalarType::B16,
- ".b32" => ast::ScalarType::B32,
- ".b64" => ast::ScalarType::B64,
- ".u16" => ast::ScalarType::U16,
- ".u32" => ast::ScalarType::U32,
- ".u64" => ast::ScalarType::U64,
- ".s16" => ast::ScalarType::S16,
- ".s32" => ast::ScalarType::S32,
- ".s64" => ast::ScalarType::S64,
-};
-
-// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-st
-// Warning: NVIDIA documentation is incorrect, you can specify scope only once
-InstSt: ast::Instruction<ast::ParsedArgParams<'input>> = {
- "st" <q:LdStQualifier?> <ss:StStateSpace?> <cop:StCacheOperator?> <t:LdStType> <src1:MemoryOperand> "," <src2:SrcOperandVec> => {
- ast::Instruction::St(
- ast::StData {
- qualifier: q.unwrap_or(ast::LdStQualifier::Weak),
- state_space: ss.unwrap_or(ast::StateSpace::Generic),
- caching: cop.unwrap_or(ast::StCacheOperator::Writeback),
- typ: t
- },
- ast::Arg2St { src1:src1, src2:src2 }
- )
- }
-};
-
-// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#using-addresses-arrays-and-vectors
-MemoryOperand: ast::Operand<&'input str> = {
- "[" <o:Operand> "]" => o
-}
-
-StStateSpace: ast::StateSpace = {
- ".global" => ast::StateSpace::Global,
- ".local" => ast::StateSpace::Local,
- ".param" => ast::StateSpace::Param,
- ".shared" => ast::StateSpace::Shared,
-};
-
-StCacheOperator: ast::StCacheOperator = {
- ".wb" => ast::StCacheOperator::Writeback,
- ".cg" => ast::StCacheOperator::L2Only,
- ".cs" => ast::StCacheOperator::Streaming,
- ".wt" => ast::StCacheOperator::Writethrough,
-};
-
-// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#control-flow-instructions-ret
-InstRet: ast::Instruction<ast::ParsedArgParams<'input>> = {
- "ret" <u:".uni"?> => ast::Instruction::Ret(ast::RetData { uniform: u.is_some() })
-};
-
-// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cvta
-InstCvta: ast::Instruction<ast::ParsedArgParams<'input>> = {
- "cvta" <from:CvtaStateSpace> <s:CvtaSize> <a:Arg2> => {
- ast::Instruction::Cvta(ast::CvtaDetails {
- to: ast::StateSpace::Generic,
- from,
- size: s
- },
- a)
- },
- "cvta" ".to" <to:CvtaStateSpace> <s:CvtaSize> <a:Arg2> => {
- ast::Instruction::Cvta(ast::CvtaDetails {
- to,
- from: ast::StateSpace::Generic,
- size: s
- },
- a)
- }
-}
-
-CvtaStateSpace: ast::StateSpace = {
- ".const" => ast::StateSpace::Const,
- ".global" => ast::StateSpace::Global,
- ".local" => ast::StateSpace::Local,
- ".shared" => ast::StateSpace::Shared,
-}
-
-CvtaSize: ast::CvtaSize = {
- ".u32" => ast::CvtaSize::U32,
- ".u64" => ast::CvtaSize::U64,
-}
-
-// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#control-flow-instructions-call
-InstCall: ast::Instruction<ast::ParsedArgParams<'input>> = {
- "call" <u:".uni"?> <args:ArgCall> => {
- let (ret_params, func, param_list) = args;
- ast::Instruction::Call(ast::CallInst { uniform: u.is_some(), ret_params, func, param_list })
- }
-};
-
-// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-abs
-InstAbs: ast::Instruction<ast::ParsedArgParams<'input>> = {
- "abs" <t:SignedIntType> <a:Arg2> => {
- ast::Instruction::Abs(ast::AbsDetails { flush_to_zero: None, typ: t }, a)
- },
- "abs" <f:".ftz"?> ".f32" <a:Arg2> => {
- ast::Instruction::Abs(ast::AbsDetails { flush_to_zero: Some(f.is_some()), typ: ast::ScalarType::F32 }, a)
- },
- "abs" ".f64" <a:Arg2> => {
- ast::Instruction::Abs(ast::AbsDetails { flush_to_zero: None, typ: ast::ScalarType::F64 }, a)
- },
- "abs" <f:".ftz"?> ".f16" <a:Arg2> => {
- ast::Instruction::Abs(ast::AbsDetails { flush_to_zero: Some(f.is_some()), typ: ast::ScalarType::F16 }, a)
- },
- "abs" <f:".ftz"?> ".f16x2" <a:Arg2> => {
- ast::Instruction::Abs(ast::AbsDetails { flush_to_zero: Some(f.is_some()), typ: ast::ScalarType::F16x2 }, a)
- },
-};
-
-// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-mad
-// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-mad
-InstMad: ast::Instruction<ast::ParsedArgParams<'input>> = {
- "mad" <d:MulDetails> <a:Arg4> => ast::Instruction::Mad(d, a),
- "mad" ".hi" ".sat" ".s32" => todo!(),
-};
-
-// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-fma
-InstFma: ast::Instruction<ast::ParsedArgParams<'input>> = {
- "fma" <f:ArithFloatMustRound> <a:Arg4> => ast::Instruction::Fma(f, a),
-};
-
-SignedIntType: ast::ScalarType = {
- ".s16" => ast::ScalarType::S16,
- ".s32" => ast::ScalarType::S32,
- ".s64" => ast::ScalarType::S64,
-};
-
-// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#logic-and-shift-instructions-or
-InstOr: ast::Instruction<ast::ParsedArgParams<'input>> = {
- "or" <d:BooleanType> <a:Arg3> => ast::Instruction::Or(d, a),
-};
-
-// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#logic-and-shift-instructions-and
-InstAnd: ast::Instruction<ast::ParsedArgParams<'input>> = {
- "and" <d:BooleanType> <a:Arg3> => ast::Instruction::And(d, a),
-};
-
-// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-rcp
-InstRcp: ast::Instruction<ast::ParsedArgParams<'input>> = {
- "rcp" <rounding:RcpRoundingMode> <ftz:".ftz"?> ".f32" <a:Arg2> => {
- let details = ast::RcpDetails {
- rounding,
- flush_to_zero: Some(ftz.is_some()),
- is_f64: false,
- };
- ast::Instruction::Rcp(details, a)
- },
- "rcp" <rn:RoundingModeFloat> ".f64" <a:Arg2> => {
- let details = ast::RcpDetails {
- rounding: Some(rn),
- flush_to_zero: None,
- is_f64: true,
- };
- ast::Instruction::Rcp(details, a)
- }
-};
-
-RcpRoundingMode: Option<ast::RoundingMode> = {
- ".approx" => None,
- <r:RoundingModeFloat> => Some(r)
-};
-
-// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-sub
-// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-sub
-// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#half-precision-floating-point-instructions-sub
-InstSub: ast::Instruction<ast::ParsedArgParams<'input>> = {
- "sub" <d:ArithDetails> <a:Arg3> => ast::Instruction::Sub(d, a),
-};
-
-// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-min
-// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-min
-// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#half-precision-floating-point-instructions-min
-InstMin: ast::Instruction<ast::ParsedArgParams<'input>> = {
- "min" <d:MinMaxDetails> <a:Arg3> => ast::Instruction::Min(d, a),
-};
-
-// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-max
-// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-max
-// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#half-precision-floating-point-instructions-max
-InstMax: ast::Instruction<ast::ParsedArgParams<'input>> = {
- "max" <d:MinMaxDetails> <a:Arg3> => ast::Instruction::Max(d, a),
-};
-
-MinMaxDetails: ast::MinMaxDetails = {
- <t:UIntType> => ast::MinMaxDetails::Unsigned(t),
- <t:SIntType> => ast::MinMaxDetails::Signed(t),
- <ftz:".ftz"?> <nan:".NaN"?> ".f32" => ast::MinMaxDetails::Float(
- ast::MinMaxFloat{ flush_to_zero: Some(ftz.is_some()), nan: nan.is_some(), typ: ast::ScalarType::F32 }
- ),
- ".f64" => ast::MinMaxDetails::Float(
- ast::MinMaxFloat{ flush_to_zero: None, nan: false, typ: ast::ScalarType::F64 }
- ),
- <ftz:".ftz"?> <nan:".NaN"?> ".f16" => ast::MinMaxDetails::Float(
- ast::MinMaxFloat{ flush_to_zero: Some(ftz.is_some()), nan: nan.is_some(), typ: ast::ScalarType::F16 }
- ),
- <ftz:".ftz"?> <nan:".NaN"?> ".f16x2" => ast::MinMaxDetails::Float(
- ast::MinMaxFloat{ flush_to_zero: Some(ftz.is_some()), nan: nan.is_some(), typ: ast::ScalarType::F16x2 }
- )
-}
-
-// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#comparison-and-selection-instructions-selp
-InstSelp: ast::Instruction<ast::ParsedArgParams<'input>> = {
- "selp" <t:SelpType> <a:Arg4> => ast::Instruction::Selp(t, a),
-};
-
-SelpType: ast::ScalarType = {
- ".b16" => ast::ScalarType::B16,
- ".b32" => ast::ScalarType::B32,
- ".b64" => ast::ScalarType::B64,
- ".u16" => ast::ScalarType::U16,
- ".u32" => ast::ScalarType::U32,
- ".u64" => ast::ScalarType::U64,
- ".s16" => ast::ScalarType::S16,
- ".s32" => ast::ScalarType::S32,
- ".s64" => ast::ScalarType::S64,
- ".f32" => ast::ScalarType::F32,
- ".f64" => ast::ScalarType::F64,
-};
-
-// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-bar
-InstBar: ast::Instruction<ast::ParsedArgParams<'input>> = {
- "bar" ".sync" <a:Arg1Bar> => ast::Instruction::Bar(ast::BarDetails::SyncAligned, a),
- "barrier" ".sync" <a:Arg1Bar> => ast::Instruction::Bar(ast::BarDetails::SyncAligned, a),
- "barrier" ".sync" ".aligned" <a:Arg1Bar> => ast::Instruction::Bar(ast::BarDetails::SyncAligned, a),
-}
-
-// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-atom
-// The documentation does not mention all spported operations:
-// * Operation .add requires .u32 or .s32 or .u64 or .f64 or f16 or f16x2 or .f32
-// * Operation .inc requires .u32 type for instuction
-// * Operation .dec requires .u32 type for instuction
-// Otherwise as documented
-InstAtom: ast::Instruction<ast::ParsedArgParams<'input>> = {
- "atom" <sema:AtomSemantics?> <scope:MemScope?> <space:AtomSpace?> <op:AtomBitOp> <typ:BitType> <a:Arg3Atom> => {
- let details = ast::AtomDetails {
- semantics: sema.unwrap_or(ast::AtomSemantics::Relaxed),
- scope: scope.unwrap_or(ast::MemScope::Gpu),
- space: space.unwrap_or(ast::StateSpace::Generic),
- inner: ast::AtomInnerDetails::Bit { op, typ }
- };
- ast::Instruction::Atom(details,a)
- },
- "atom" <sema:AtomSemantics?> <scope:MemScope?> <space:AtomSpace?> ".inc" ".u32" <a:Arg3Atom> => {
- let details = ast::AtomDetails {
- semantics: sema.unwrap_or(ast::AtomSemantics::Relaxed),
- scope: scope.unwrap_or(ast::MemScope::Gpu),
- space: space.unwrap_or(ast::StateSpace::Generic),
- inner: ast::AtomInnerDetails::Unsigned {
- op: ast::AtomUIntOp::Inc,
- typ: ast::ScalarType::U32
- }
- };
- ast::Instruction::Atom(details,a)
- },
- "atom" <sema:AtomSemantics?> <scope:MemScope?> <space:AtomSpace?> ".dec" ".u32" <a:Arg3Atom> => {
- let details = ast::AtomDetails {
- semantics: sema.unwrap_or(ast::AtomSemantics::Relaxed),
- scope: scope.unwrap_or(ast::MemScope::Gpu),
- space: space.unwrap_or(ast::StateSpace::Generic),
- inner: ast::AtomInnerDetails::Unsigned {
- op: ast::AtomUIntOp::Dec,
- typ: ast::ScalarType::U32
- }
- };
- ast::Instruction::Atom(details,a)
- },
- "atom" <sema:AtomSemantics?> <scope:MemScope?> <space:AtomSpace?> ".add" <typ:FloatType> <a:Arg3Atom> => {
- let op = ast::AtomFloatOp::Add;
- let details = ast::AtomDetails {
- semantics: sema.unwrap_or(ast::AtomSemantics::Relaxed),
- scope: scope.unwrap_or(ast::MemScope::Gpu),
- space: space.unwrap_or(ast::StateSpace::Generic),
- inner: ast::AtomInnerDetails::Float { op, typ }
- };
- ast::Instruction::Atom(details,a)
- },
- "atom" <sema:AtomSemantics?> <scope:MemScope?> <space:AtomSpace?> <op: AtomUIntOp> <typ:UIntType3264> <a:Arg3Atom> => {
- let details = ast::AtomDetails {
- semantics: sema.unwrap_or(ast::AtomSemantics::Relaxed),
- scope: scope.unwrap_or(ast::MemScope::Gpu),
- space: space.unwrap_or(ast::StateSpace::Generic),
- inner: ast::AtomInnerDetails::Unsigned { op, typ }
- };
- ast::Instruction::Atom(details,a)
- },
- "atom" <sema:AtomSemantics?> <scope:MemScope?> <space:AtomSpace?> <op: AtomSIntOp> <typ:SIntType3264> <a:Arg3Atom> => {
- let details = ast::AtomDetails {
- semantics: sema.unwrap_or(ast::AtomSemantics::Relaxed),
- scope: scope.unwrap_or(ast::MemScope::Gpu),
- space: space.unwrap_or(ast::StateSpace::Generic),
- inner: ast::AtomInnerDetails::Signed { op, typ }
- };
- ast::Instruction::Atom(details,a)
- }
-}
-
-InstAtomCas: ast::Instruction<ast::ParsedArgParams<'input>> = {
- "atom" <sema:AtomSemantics?> <scope:MemScope?> <space:AtomSpace?> ".cas" <typ:BitType> <a:Arg4Atom> => {
- let details = ast::AtomCasDetails {
- semantics: sema.unwrap_or(ast::AtomSemantics::Relaxed),
- scope: scope.unwrap_or(ast::MemScope::Gpu),
- space: space.unwrap_or(ast::StateSpace::Generic),
- typ,
- };
- ast::Instruction::AtomCas(details,a)
- },
-}
-
-AtomSemantics: ast::AtomSemantics = {
- ".relaxed" => ast::AtomSemantics::Relaxed,
- ".acquire" => ast::AtomSemantics::Acquire,
- ".release" => ast::AtomSemantics::Release,
- ".acq_rel" => ast::AtomSemantics::AcquireRelease
-}
-
-AtomSpace: ast::StateSpace = {
- ".global" => ast::StateSpace::Global,
- ".shared" => ast::StateSpace::Shared
-}
-
-AtomBitOp: ast::AtomBitOp = {
- ".and" => ast::AtomBitOp::And,
- ".or" => ast::AtomBitOp::Or,
- ".xor" => ast::AtomBitOp::Xor,
- ".exch" => ast::AtomBitOp::Exchange,
-}
-
-AtomUIntOp: ast::AtomUIntOp = {
- ".add" => ast::AtomUIntOp::Add,
- ".min" => ast::AtomUIntOp::Min,
- ".max" => ast::AtomUIntOp::Max,
-}
-
-AtomSIntOp: ast::AtomSIntOp = {
- ".add" => ast::AtomSIntOp::Add,
- ".min" => ast::AtomSIntOp::Min,
- ".max" => ast::AtomSIntOp::Max,
-}
-
-BitType: ast::ScalarType = {
- ".b32" => ast::ScalarType::B32,
- ".b64" => ast::ScalarType::B64,
-}
-
-UIntType3264: ast::ScalarType = {
- ".u32" => ast::ScalarType::U32,
- ".u64" => ast::ScalarType::U64,
-}
-
-SIntType3264: ast::ScalarType = {
- ".s32" => ast::ScalarType::S32,
- ".s64" => ast::ScalarType::S64,
-}
-
-// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-div
-// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-div
-InstDiv: ast::Instruction<ast::ParsedArgParams<'input>> = {
- "div" <t:UIntType> <a:Arg3> => ast::Instruction::Div(ast::DivDetails::Unsigned(t), a),
- "div" <t:SIntType> <a:Arg3> => ast::Instruction::Div(ast::DivDetails::Signed(t), a),
- "div" <kind:DivFloatKind> <ftz:".ftz"?> ".f32" <a:Arg3> => {
- let inner = ast::DivFloatDetails {
- typ: ast::ScalarType::F32,
- flush_to_zero: Some(ftz.is_some()),
- kind
- };
- ast::Instruction::Div(ast::DivDetails::Float(inner), a)
- },
- "div" <rnd:RoundingModeFloat> ".f64" <a:Arg3> => {
- let inner = ast::DivFloatDetails {
- typ: ast::ScalarType::F64,
- flush_to_zero: None,
- kind: ast::DivFloatKind::Rounding(rnd)
- };
- ast::Instruction::Div(ast::DivDetails::Float(inner), a)
- },
-}
-
-DivFloatKind: ast::DivFloatKind = {
- ".approx" => ast::DivFloatKind::Approx,
- ".full" => ast::DivFloatKind::Full,
- <rnd:RoundingModeFloat> => ast::DivFloatKind::Rounding(rnd),
-}
-
-// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-sqrt
-InstSqrt: ast::Instruction<ast::ParsedArgParams<'input>> = {
- "sqrt" ".approx" <ftz:".ftz"?> ".f32" <a:Arg2> => {
- let details = ast::SqrtDetails {
- typ: ast::ScalarType::F32,
- flush_to_zero: Some(ftz.is_some()),
- kind: ast::SqrtKind::Approx,
- };
- ast::Instruction::Sqrt(details, a)
- },
- "sqrt" <rnd:RoundingModeFloat> <ftz:".ftz"?> ".f32" <a:Arg2> => {
- let details = ast::SqrtDetails {
- typ: ast::ScalarType::F32,
- flush_to_zero: Some(ftz.is_some()),
- kind: ast::SqrtKind::Rounding(rnd),
- };
- ast::Instruction::Sqrt(details, a)
- },
- "sqrt" <rnd:RoundingModeFloat> ".f64" <a:Arg2> => {
- let details = ast::SqrtDetails {
- typ: ast::ScalarType::F64,
- flush_to_zero: None,
- kind: ast::SqrtKind::Rounding(rnd),
- };
- ast::Instruction::Sqrt(details, a)
- }
-}
-
-// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-rsqrt
-// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-rsqrt-approx-ftz-f64
-InstRsqrt: ast::Instruction<ast::ParsedArgParams<'input>> = {
- "rsqrt" ".approx" <ftz:".ftz"?> ".f32" <a:Arg2> => {
- let details = ast::RsqrtDetails {
- typ: ast::ScalarType::F32,
- flush_to_zero: ftz.is_some(),
- };
- ast::Instruction::Rsqrt(details, a)
- },
- "rsqrt" ".approx" <ftz:".ftz"?> ".f64" <a:Arg2> => {
- let details = ast::RsqrtDetails {
- typ: ast::ScalarType::F64,
- flush_to_zero: ftz.is_some(),
- };
- ast::Instruction::Rsqrt(details, a)
- },
-}
-
-// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-neg
-// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-neg
-// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#half-precision-floating-point-instructions-neg
-InstNeg: ast::Instruction<ast::ParsedArgParams<'input>> = {
- "neg" <ftz:".ftz"?> <typ:NegTypeFtz> <a:Arg2> => {
- let details = ast::NegDetails {
- typ,
- flush_to_zero: Some(ftz.is_some()),
- };
- ast::Instruction::Neg(details, a)
- },
- "neg" <typ:NegTypeNonFtz> <a:Arg2> => {
- let details = ast::NegDetails {
- typ,
- flush_to_zero: None,
- };
- ast::Instruction::Neg(details, a)
- },
-}
-
-// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-sin
-InstSin: ast::Instruction<ast::ParsedArgParams<'input>> = {
- "sin" ".approx" <ftz:".ftz"?> ".f32" <arg:Arg2> => {
- ast::Instruction::Sin{ flush_to_zero: ftz.is_some(), arg }
- },
-}
-
-// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-cos
-InstCos: ast::Instruction<ast::ParsedArgParams<'input>> = {
- "cos" ".approx" <ftz:".ftz"?> ".f32" <arg:Arg2> => {
- ast::Instruction::Cos{ flush_to_zero: ftz.is_some(), arg }
- },
-}
-
-// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-lg2
-InstLg2: ast::Instruction<ast::ParsedArgParams<'input>> = {
- "lg2" ".approx" <ftz:".ftz"?> ".f32" <arg:Arg2> => {
- ast::Instruction::Lg2{ flush_to_zero: ftz.is_some(), arg }
- },
-}
-
-// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-ex2
-InstEx2: ast::Instruction<ast::ParsedArgParams<'input>> = {
- "ex2" ".approx" <ftz:".ftz"?> ".f32" <arg:Arg2> => {
- ast::Instruction::Ex2{ flush_to_zero: ftz.is_some(), arg }
- },
-}
-
-// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-clz
-InstClz: ast::Instruction<ast::ParsedArgParams<'input>> = {
- "clz" <typ:BitType> <arg:Arg2> => ast::Instruction::Clz{ <> }
-}
-
-// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-brev
-InstBrev: ast::Instruction<ast::ParsedArgParams<'input>> = {
- "brev" <typ:BitType> <arg:Arg2> => ast::Instruction::Brev{ <> }
-}
-
-// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-popc
-InstPopc: ast::Instruction<ast::ParsedArgParams<'input>> = {
- "popc" <typ:BitType> <arg:Arg2> => ast::Instruction::Popc{ <> }
-}
-
-// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#logic-and-shift-instructions-xor
-InstXor: ast::Instruction<ast::ParsedArgParams<'input>> = {
- "xor" <typ:BooleanType> <arg:Arg3> => ast::Instruction::Xor{ <> }
-}
-
-// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-bfe
-InstBfe: ast::Instruction<ast::ParsedArgParams<'input>> = {
- "bfe" <typ:IntType3264> <arg:Arg4> => ast::Instruction::Bfe{ <> }
-}
-
-// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-bfi
-InstBfi: ast::Instruction<ast::ParsedArgParams<'input>> = {
- "bfi" <typ:BitType> <arg:Arg5> => ast::Instruction::Bfi{ <> }
-}
-
-// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-prmt
-InstPrmt: ast::Instruction<ast::ParsedArgParams<'input>> = {
- "prmt" ".b32" <arg:Arg3> "," <control:U16Num> => ast::Instruction::Prmt{ <> }
-}
-
-// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-rem
-InstRem: ast::Instruction<ast::ParsedArgParams<'input>> = {
- "rem" <typ:IntType> <arg:Arg3> => ast::Instruction::Rem{ <> }
-}
-
-// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-activemask
-InstActivemask: ast::Instruction<ast::ParsedArgParams<'input>> = {
- "activemask" ".b32" <arg:Arg1> => ast::Instruction::Activemask{ <> }
-}
-
-// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-membar
-InstMembar: ast::Instruction<ast::ParsedArgParams<'input>> = {
- "membar" <level:MembarLevel> => ast::Instruction::Membar{ <> }
-}
-
-NegTypeFtz: ast::ScalarType = {
- ".f16" => ast::ScalarType::F16,
- ".f16x2" => ast::ScalarType::F16x2,
- ".f32" => ast::ScalarType::F32,
-}
-
-NegTypeNonFtz: ast::ScalarType = {
- ".s16" => ast::ScalarType::S16,
- ".s32" => ast::ScalarType::S32,
- ".s64" => ast::ScalarType::S64,
- ".f64" => ast::ScalarType::F64
-}
-
-ArithDetails: ast::ArithDetails = {
- <t:UIntType> => ast::ArithDetails::Unsigned(t),
- <t:SIntType> => ast::ArithDetails::Signed(ast::ArithSInt {
- typ: t,
- saturate: false,
- }),
- ".sat" ".s32" => ast::ArithDetails::Signed(ast::ArithSInt {
- typ: ast::ScalarType::S32,
- saturate: true,
- }),
- <f:ArithFloat> => ast::ArithDetails::Float(f)
-}
-
-ArithFloat: ast::ArithFloat = {
- <rn:RoundingModeFloat?> <ftz:".ftz"?> <sat:".sat"?> ".f32" => ast::ArithFloat {
- typ: ast::ScalarType::F32,
- rounding: rn,
- flush_to_zero: Some(ftz.is_some()),
- saturate: sat.is_some(),
- },
- <rn:RoundingModeFloat?> ".f64" => ast::ArithFloat {
- typ: ast::ScalarType::F64,
- rounding: rn,
- flush_to_zero: None,
- saturate: false,
- },
- <rn:".rn"?> <ftz:".ftz"?> <sat:".sat"?> ".f16" => ast::ArithFloat {
- typ: ast::ScalarType::F16,
- rounding: rn.map(|_| ast::RoundingMode::NearestEven),
- flush_to_zero: Some(ftz.is_some()),
- saturate: sat.is_some(),
- },
- <rn:".rn"?> <ftz:".ftz"?> <sat:".sat"?> ".f16x2" => ast::ArithFloat {
- typ: ast::ScalarType::F16x2,
- rounding: rn.map(|_| ast::RoundingMode::NearestEven),
- flush_to_zero: Some(ftz.is_some()),
- saturate: sat.is_some(),
- },
-}
-
-ArithFloatMustRound: ast::ArithFloat = {
- <rn:RoundingModeFloat> <ftz:".ftz"?> <sat:".sat"?> ".f32" => ast::ArithFloat {
- typ: ast::ScalarType::F32,
- rounding: Some(rn),
- flush_to_zero: Some(ftz.is_some()),
- saturate: sat.is_some(),
- },
- <rn:RoundingModeFloat> ".f64" => ast::ArithFloat {
- typ: ast::ScalarType::F64,
- rounding: Some(rn),
- flush_to_zero: None,
- saturate: false,
- },
- ".rn" <ftz:".ftz"?> <sat:".sat"?> ".f16" => ast::ArithFloat {
- typ: ast::ScalarType::F16,
- rounding: Some(ast::RoundingMode::NearestEven),
- flush_to_zero: Some(ftz.is_some()),
- saturate: sat.is_some(),
- },
- ".rn" <ftz:".ftz"?> <sat:".sat"?> ".f16x2" => ast::ArithFloat {
- typ: ast::ScalarType::F16x2,
- rounding: Some(ast::RoundingMode::NearestEven),
- flush_to_zero: Some(ftz.is_some()),
- saturate: sat.is_some(),
- },
-}
-
-Operand: ast::Operand<&'input str> = {
- <r:ExtendedID> => ast::Operand::Reg(r),
- <r:ExtendedID> "+" <offset:S32Num> => ast::Operand::RegOffset(r, offset),
- <x:ImmediateValue> => ast::Operand::Imm(x)
-};
-
-CallOperand: ast::Operand<&'input str> = {
- <r:ExtendedID> => ast::Operand::Reg(r),
- <x:ImmediateValue> => ast::Operand::Imm(x)
-};
-
-// TODO: start parsing whole constants sub-language:
-// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#constants
-ImmediateValue: ast::ImmediateValue = {
- // TODO: treat negation correctly
- <neg:"-"?> <x:NumToken> => {
- let (num, radix, is_unsigned) = x;
- if neg.is_some() {
- match i64::from_str_radix(num, radix) {
- Ok(x) => ast::ImmediateValue::S64(-x),
- Err(err) => {
- errors.push(ParseError::User { error: ast::PtxError::from(err) });
- ast::ImmediateValue::S64(0)
- }
- }
- } else if is_unsigned {
- match u64::from_str_radix(num, radix) {
- Ok(x) => ast::ImmediateValue::U64(x),
- Err(err) => {
- errors.push(ParseError::User { error: ast::PtxError::from(err) });
- ast::ImmediateValue::U64(0)
- }
- }
- } else {
- match i64::from_str_radix(num, radix) {
- Ok(x) => ast::ImmediateValue::S64(x),
- Err(_) => {
- match u64::from_str_radix(num, radix) {
- Ok(x) => ast::ImmediateValue::U64(x),
- Err(err) => {
- errors.push(ParseError::User { error: ast::PtxError::from(err) });
- ast::ImmediateValue::U64(0)
- }
- }
- }
- }
- }
- },
- <f:F32Num> => {
- ast::ImmediateValue::F32(f)
- },
- <f:F64Num> => {
- ast::ImmediateValue::F64(f)
- }
-}
-
-Arg1: ast::Arg1<ast::ParsedArgParams<'input>> = {
- <src:ExtendedID> => ast::Arg1{<>}
-};
-
-Arg1Bar: ast::Arg1Bar<ast::ParsedArgParams<'input>> = {
- <src:Operand> => ast::Arg1Bar{<>}
-};
-
-Arg2: ast::Arg2<ast::ParsedArgParams<'input>> = {
- <dst:DstOperand> "," <src:Operand> => ast::Arg2{<>}
-};
-
-MemberOperand: (&'input str, u8) = {
- <pref:ExtendedID> "." <suf:ExtendedID> => {
- let suf_idx = match vector_index(suf) {
- Ok(x) => x,
- Err(err) => {
- errors.push(err);
- 0
- }
- };
- (pref, suf_idx)
- },
- <pref:ExtendedID> <suf:DotID> => {
- let suf_idx = match vector_index(&suf[1..]) {
- Ok(x) => x,
- Err(err) => {
- errors.push(err);
- 0
- }
- };
- (pref, suf_idx)
- }
-};
-
-VectorExtract: Vec<&'input str> = {
- "{" <r1:ExtendedID> "," <r2:ExtendedID> "}" => {
- vec![r1, r2]
- },
- "{" <r1:ExtendedID> "," <r2:ExtendedID> "," <r3:ExtendedID> "," <r4:ExtendedID> "}" => {
- vec![r1, r2, r3, r4]
- },
-};
-
-Arg3: ast::Arg3<ast::ParsedArgParams<'input>> = {
- <dst:DstOperand> "," <src1:Operand> "," <src2:Operand> => ast::Arg3{<>}
-};
-
-Arg3Atom: ast::Arg3<ast::ParsedArgParams<'input>> = {
- <dst:DstOperand> "," "[" <src1:Operand> "]" "," <src2:Operand> => ast::Arg3{<>}
-};
-
-Arg4: ast::Arg4<ast::ParsedArgParams<'input>> = {
- <dst:DstOperand> "," <src1:Operand> "," <src2:Operand> "," <src3:Operand> => ast::Arg4{<>}
-};
-
-Arg4Atom: ast::Arg4<ast::ParsedArgParams<'input>> = {
- <dst:DstOperand> "," "[" <src1:Operand> "]" "," <src2:Operand> "," <src3:Operand> => ast::Arg4{<>}
-};
-
-Arg4Setp: ast::Arg4Setp<ast::ParsedArgParams<'input>> = {
- <dst1:ExtendedID> <dst2:OptionalDst?> "," <src1:Operand> "," <src2:Operand> => ast::Arg4Setp{<>}
-};
-
-Arg5: ast::Arg5<ast::ParsedArgParams<'input>> = {
- <dst:DstOperand> "," <src1:Operand> "," <src2:Operand> "," <src3:Operand> "," <src4:Operand> => ast::Arg5{<>}
-};
-
-// TODO: pass src3 negation somewhere
-Arg5Setp: ast::Arg5Setp<ast::ParsedArgParams<'input>> = {
- <dst1:ExtendedID> <dst2:OptionalDst?> "," <src1:Operand> "," <src2:Operand> "," "!"? <src3:Operand> => ast::Arg5Setp{<>}
-};
-
-ArgCall: (Vec<&'input str>, &'input str, Vec<ast::Operand<&'input str>>) = {
- "(" <ret_params:Comma<ExtendedID>> ")" "," <func:ExtendedID> "," "(" <param_list:Comma<CallOperand>> ")" => {
- (ret_params, func, param_list)
- },
- "(" <ret_params:Comma<ExtendedID>> ")" "," <func:ExtendedID> => {
- (ret_params, func, Vec::new())
- },
- <func:ExtendedID> "," "(" <param_list:Comma<CallOperand>> ")" => (Vec::new(), func, param_list),
- <func:ExtendedID> => (Vec::new(), func, Vec::<ast::Operand<_>>::new()),
-};
-
-OptionalDst: &'input str = {
- "|" <dst2:ExtendedID> => dst2
-}
-
-SrcOperand: ast::Operand<&'input str> = {
- <r:ExtendedID> => ast::Operand::Reg(r),
- <r:ExtendedID> "+" <offset:S32Num> => ast::Operand::RegOffset(r, offset),
- <x:ImmediateValue> => ast::Operand::Imm(x),
- <mem_op:MemberOperand> => {
- let (reg, idx) = mem_op;
- ast::Operand::VecMember(reg, idx)
- }
-}
-
-SrcOperandVec: ast::Operand<&'input str> = {
- <normal:SrcOperand> => normal,
- <vec:VectorExtract> => ast::Operand::VecPack(vec),
-}
-
-DstOperand: ast::Operand<&'input str> = {
- <r:ExtendedID> => ast::Operand::Reg(r),
- <mem_op:MemberOperand> => {
- let (reg, idx) = mem_op;
- ast::Operand::VecMember(reg, idx)
- }
-}
-
-DstOperandVec: ast::Operand<&'input str> = {
- <normal:DstOperand> => normal,
- <vec:VectorExtract> => ast::Operand::VecPack(vec),
-}
-
-VectorPrefix: u8 = {
- ".v2" => 2,
- ".v4" => 4
-};
-
-// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#debugging-directives-file
-File = {
- ".file" U32Num String ("," U32Num "," U32Num)?
-};
-
-// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#debugging-directives-section
-Section = {
- ".section" DotID "{" SectionDwarfLines* "}"
-};
-
-SectionDwarfLines: () = {
- AnyBitType Comma<U32Num>,
- ".b32" SectionLabel,
- ".b64" SectionLabel,
- ".b32" SectionLabel "+" U32Num,
- ".b64" SectionLabel "+" U32Num,
-};
-
-SectionLabel = {
- ID,
- DotID
-};
-
-AnyBitType = {
- ".b8", ".b16", ".b32", ".b64"
-};
-
-VariableScalar<T>: (Option<u32>, T, &'input str) = {
- <align:Align?> <v_type:T> <name:ExtendedID> => {
- (align, v_type, name)
- }
-}
-
-VariableVector<T>: (Option<u32>, u8, T, &'input str) = {
- <align:Align?> <v_len:VectorPrefix> <v_type:T> <name:ExtendedID> => {
- (align, v_len, v_type, name)
- }
-}
-
-// empty dimensions [0] means it's a pointer
-VariableArrayOrPointer<T>: (Option<u32>, T, &'input str, ast::ArrayOrPointer) = {
- <align:Align?> <typ:SizedScalarType> <name:ExtendedID> <dims:ArrayDimensions> <init:ArrayInitializer?> => {
- let mut dims = dims;
- let array_init = match init {
- Some(init) => {
- let init_vec = match init.to_vec(typ, &mut dims) {
- Err(error) => {
- errors.push(ParseError::User { error });
- Vec::new()
- }
- Ok(x) => x
- };
- ast::ArrayOrPointer::Array { dimensions: dims, init: init_vec }
- }
- None => {
- if dims.len() > 1 && dims.contains(&0) {
- errors.push(ParseError::User { error: ast::PtxError::ZeroDimensionArray });
- }
- match &*dims {
- [0] => ast::ArrayOrPointer::Pointer,
- _ => ast::ArrayOrPointer::Array { dimensions: dims, init: Vec::new() }
- }
- }
- };
- (align, typ, name, array_init)
- }
-}
-
-// [0] and [] are treated the same
-ArrayDimensions: Vec<u32> = {
- ArrayEmptyDimension => vec![0u32],
- ArrayEmptyDimension <dims:ArrayDimension+> => {
- let mut dims = dims;
- let mut result = vec![0u32];
- result.append(&mut dims);
- result
- },
- <dims:ArrayDimension+> => dims
-}
-
-ArrayEmptyDimension = {
- "[" "]"
-}
-
-ArrayDimension: u32 = {
- "[" <n:U32Num> "]" => n,
-}
-
-ArrayInitializer: ast::NumsOrArrays<'input> = {
- "=" <nums:NumsOrArraysBracket> => nums
-}
-
-NumsOrArraysBracket: ast::NumsOrArrays<'input> = {
- "{" <nums:NumsOrArrays> "}" => nums
-}
-
-NumsOrArrays: ast::NumsOrArrays<'input> = {
- <n:Comma<NumsOrArraysBracket>> => ast::NumsOrArrays::Arrays(n),
- <n:CommaNonEmpty<NumToken>> => ast::NumsOrArrays::Nums(n.into_iter().map(|(x,radix,_)| (x, radix)).collect()),
-}
-
-Comma<T>: Vec<T> = {
- <v:(<T> ",")*> <e:T?> => match e {
- None => v,
- Some(e) => {
- let mut v = v;
- v.push(e);
- v
- }
- }
-};
-
-CommaNonEmpty<T>: Vec<T> = {
- <v:(<T> ",")*> <e:T> => {
- let mut v = v;
- v.push(e);
- v
- }
-};
-
-#[inline]
-Or<T1, T2>: T1 = {
- T1,
- T2
-}
-
-#[inline]
-Or3<T1, T2, T3>: T1 = {
- T1,
- T2,
- T3
-} \ No newline at end of file