diff options
Diffstat (limited to 'ptx/src/ptx.lalrpop')
-rw-r--r-- | ptx/src/ptx.lalrpop | 2198 |
1 files changed, 0 insertions, 2198 deletions
diff --git a/ptx/src/ptx.lalrpop b/ptx/src/ptx.lalrpop deleted file mode 100644 index e3a4022..0000000 --- a/ptx/src/ptx.lalrpop +++ /dev/null @@ -1,2198 +0,0 @@ -use crate::ast; -use crate::ast::UnwrapWithVec; -use crate::{without_none, vector_index}; - -use lalrpop_util::ParseError; -use std::convert::TryInto; - -grammar<'err>(errors: &'err mut Vec<ParseError<usize, Token<'input>, ast::PtxError>>); - -extern { - type Error = ast::PtxError; -} - -match { - r"\s+" => { }, - r"//[^\n\r]*[\n\r]*" => { }, - r"/\*[^*]*\*+(?:[^/*][^*]*\*+)*/" => { }, - r"0[fF][0-9a-zA-Z]{8}" => F32NumToken, - r"0[dD][0-9a-zA-Z]{16}" => F64NumToken, - r"0[xX][0-9a-zA-Z]+U?" => HexNumToken, - r"[0-9]+U?" => DecimalNumToken, - r#""[^"]*""# => String, - r"[0-9]+\.[0-9]+" => VersionNumber, - "!", - "(", ")", - "+", - "-", - ",", - ".", - ":", - ";", - "@", - "[", "]", - "{", "}", - "<", ">", - "|", - "=", - ".acq_rel", - ".acquire", - ".add", - ".address_size", - ".align", - ".aligned", - ".and", - ".approx", - ".b16", - ".b32", - ".b64", - ".b8", - ".ca", - ".cas", - ".cg", - ".const", - ".cs", - ".cta", - ".cv", - ".dec", - ".entry", - ".eq", - ".equ", - ".exch", - ".extern", - ".f16", - ".f16x2", - ".f32", - ".f64", - ".file", - ".ftz", - ".full", - ".func", - ".ge", - ".geu", - ".gl", - ".global", - ".gpu", - ".gt", - ".gtu", - ".hi", - ".hs", - ".inc", - ".le", - ".leu", - ".lo", - ".loc", - ".local", - ".ls", - ".lt", - ".ltu", - ".lu", - ".max", - ".maxnreg", - ".maxntid", - ".minnctapersm", - ".min", - ".nan", - ".NaN", - ".nc", - ".ne", - ".neu", - ".num", - ".or", - ".param", - ".pragma", - ".pred", - ".reg", - ".relaxed", - ".release", - ".reqntid", - ".rm", - ".rmi", - ".rn", - ".rni", - ".rp", - ".rpi", - ".rz", - ".rzi", - ".s16", - ".s32", - ".s64", - ".s8" , - ".sat", - ".section", - ".shared", - ".sync", - ".sys", - ".target", - ".to", - ".u16", - ".u32", - ".u64", - ".u8" , - ".uni", - ".v2", - ".v4", - ".version", - ".visible", - ".volatile", - ".wb", - ".weak", - ".wide", - ".wt", - ".xor", -} else { - // IF YOU ARE ADDING A NEW TOKEN HERE ALSO ADD IT BELOW TO ExtendedID - "abs", - "activemask", - "add", - "and", - "atom", - "bar", - "barrier", - "bfe", - "bfi", - "bra", - "brev", - "call", - "clz", - "cos", - "cvt", - "cvta", - "debug", - "div", - "ex2", - "fma", - "ld", - "lg2", - "mad", - "map_f64_to_f32", - "max", - "membar", - "min", - "mov", - "mul", - "neg", - "not", - "or", - "popc", - "prmt", - "rcp", - "rem", - "ret", - "rsqrt", - "selp", - "setp", - "shl", - "shr", - "sin", - r"sm_[0-9]+" => ShaderModel, - "sqrt", - "st", - "sub", - "texmode_independent", - "texmode_unified", - "xor", -} else { - // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#identifiers - r"[a-zA-Z][a-zA-Z0-9_$]*|[_$%][a-zA-Z0-9_$]+" => ID, - r"\.[a-zA-Z][a-zA-Z0-9_$]*" => DotID, -} - -ExtendedID : &'input str = { - "abs", - "activemask", - "add", - "and", - "atom", - "bar", - "barrier", - "bfe", - "bfi", - "bra", - "brev", - "call", - "clz", - "cos", - "cvt", - "cvta", - "debug", - "div", - "ex2", - "fma", - "ld", - "lg2", - "mad", - "map_f64_to_f32", - "max", - "membar", - "min", - "mov", - "mul", - "neg", - "not", - "or", - "popc", - "prmt", - "rcp", - "rem", - "ret", - "rsqrt", - "selp", - "setp", - "shl", - "shr", - "sin", - ShaderModel, - "sqrt", - "st", - "sub", - "texmode_independent", - "texmode_unified", - "xor", - ID -} - -NumToken: (&'input str, u32, bool) = { - <s:HexNumToken> => { - if s.ends_with('U') { - (&s[2..s.len() - 1], 16, true) - } else { - (&s[2..], 16, false) - } - }, - <s:DecimalNumToken> => { - let radix = if s.starts_with('0') { 8 } else { 10 }; - if s.ends_with('U') { - (&s[..s.len() - 1], radix, true) - } else { - (s, radix, false) - } - } -} - -F32Num: f32 = { - <s:F32NumToken> => { - match u32::from_str_radix(&s[2..], 16) { - Ok(x) => unsafe { std::mem::transmute::<_, f32>(x) }, - Err(err) => { - errors.push(ParseError::User { error: ast::PtxError::from(err) }); - 0.0 - } - } - - } -} - -F64Num: f64 = { - <s:F64NumToken> => { - match u64::from_str_radix(&s[2..], 16) { - Ok(x) => unsafe { std::mem::transmute::<_, f64>(x) }, - Err(err) => { - errors.push(ParseError::User { error: ast::PtxError::from(err) }); - 0.0 - } - } - } -} - -U8Num: u8 = { - <x:NumToken> => { - let (text, radix, _) = x; - match u8::from_str_radix(text, radix) { - Ok(x) => x, - Err(err) => { - errors.push(ParseError::User { error: ast::PtxError::from(err) }); - 0 - } - } - } -} - -U16Num: u16 = { - <x:NumToken> => { - let (text, radix, _) = x; - match u16::from_str_radix(text, radix) { - Ok(x) => x, - Err(err) => { - errors.push(ParseError::User { error: ast::PtxError::from(err) }); - 0 - } - } - } -} - -U32Num: u32 = { - <x:NumToken> => { - let (text, radix, _) = x; - match u32::from_str_radix(text, radix) { - Ok(x) => x, - Err(err) => { - errors.push(ParseError::User { error: ast::PtxError::from(err) }); - 0 - } - } - } -} - -// TODO: handle negative number properly -S32Num: i32 = { - <sign:"-"?> <x:NumToken> => { - let (text, radix, _) = x; - match i32::from_str_radix(text, radix) { - Ok(x) => if sign.is_some() { -x } else { x }, - Err(err) => { - errors.push(ParseError::User { error: ast::PtxError::from(err) }); - 0 - } - } - } -} - -pub Module: ast::Module<'input> = { - <v:Version> Target <d:Directive*> => { - ast::Module { version: v, directives: without_none(d) } - } -}; - -Version: (u8, u8) = { - ".version" <v:VersionNumber> => { - let dot = v.find('.').unwrap(); - let major = v[..dot].parse::<u8>().unwrap_or_else(|err| { - errors.push(ParseError::User { error: ast::PtxError::from(err) }); - 0 - }); - let minor = v[dot+1..].parse::<u8>().unwrap_or_else(|err| { - errors.push(ParseError::User { error: ast::PtxError::from(err) }); - 0 - }); - (major,minor) - } -} - -// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#ptx-module-directives-target -Target = { - ".target" Comma<TargetSpecifier> -}; - -TargetSpecifier = { - ShaderModel, - "texmode_unified", - "texmode_independent", - "debug", - "map_f64_to_f32" -}; - -Directive: Option<ast::Directive<'input, ast::ParsedArgParams<'input>>> = { - AddressSize => None, - <f:Function> => { - let (linking, func) = f; - Some(ast::Directive::Method(linking, func)) - }, - File => None, - Section => None, - <v:ModuleVariable> ";" => { - let (linking, var) = v; - Some(ast::Directive::Variable(linking, var)) - }, - @L ! @R => { - let (start, _, end)= (<>); - errors.push(ParseError::User { error: - ast::PtxError::UnrecognizedDirective { start, end } - }); - None - } -}; - -AddressSize = { - ".address_size" U8Num -}; - -Function: (ast::LinkingDirective, ast::Function<'input, &'input str, ast::Statement<ast::ParsedArgParams<'input>>>) = { - <linking:LinkingDirectives> - <func_directive:MethodDeclaration> - <tuning:TuningDirective*> - <body:FunctionBody> => { - (linking, ast::Function{func_directive, tuning, body}) - } -}; - -LinkingDirective: ast::LinkingDirective = { - ".extern" => ast::LinkingDirective::EXTERN, - ".visible" => ast::LinkingDirective::VISIBLE, - ".weak" => ast::LinkingDirective::WEAK, -}; - -TuningDirective: ast::TuningDirective = { - ".maxnreg" <ncta:U32Num> => ast::TuningDirective::MaxNReg(ncta), - ".maxntid" <nx:U32Num> => ast::TuningDirective::MaxNtid(nx, 1, 1), - ".maxntid" <nx:U32Num> "," <ny:U32Num> => ast::TuningDirective::MaxNtid(nx, ny, 1), - ".maxntid" <nx:U32Num> "," <ny:U32Num> "," <nz:U32Num> => ast::TuningDirective::MaxNtid(nx, ny, nz), - ".reqntid" <nx:U32Num> => ast::TuningDirective::ReqNtid(nx, 1, 1), - ".reqntid" <nx:U32Num> "," <ny:U32Num> => ast::TuningDirective::ReqNtid(nx, ny, 1), - ".reqntid" <nx:U32Num> "," <ny:U32Num> "," <nz:U32Num> => ast::TuningDirective::ReqNtid(nx, ny, nz), - ".minnctapersm" <ncta:U32Num> => ast::TuningDirective::MinNCtaPerSm(ncta), -}; - -LinkingDirectives: ast::LinkingDirective = { - <ldirs:LinkingDirective*> => { - ldirs.into_iter().fold(ast::LinkingDirective::NONE, |x, y| x | y) - } -} - -MethodDeclaration: ast::MethodDeclaration<'input, &'input str> = { - ".entry" <name:ExtendedID> <input_arguments:KernelArguments> => { - let return_arguments = Vec::new(); - let name = ast::MethodName::Kernel(name); - ast::MethodDeclaration{ return_arguments, name, input_arguments, shared_mem: None } - }, - ".func" <return_arguments:FnArguments?> <name:ExtendedID> <input_arguments:FnArguments> => { - let return_arguments = return_arguments.unwrap_or_else(|| Vec::new()); - let name = ast::MethodName::Func(name); - ast::MethodDeclaration{ return_arguments, name, input_arguments, shared_mem: None } - } -}; - -KernelArguments: Vec<ast::Variable<&'input str>> = { - "(" <args:Comma<KernelInput>> ")" => args -}; - -FnArguments: Vec<ast::Variable<&'input str>> = { - "(" <args:Comma<FnInput>> ")" => args -}; - -KernelInput: ast::Variable<&'input str> = { - <v:ParamDeclaration> => { - let (align, v_type, name) = v; - ast::Variable { - align, - v_type, - state_space: ast::StateSpace::Param, - name, - array_init: Vec::new() - } - } -} - -FnInput: ast::Variable<&'input str> = { - <v:RegVariable> => { - let (align, v_type, name) = v; - let state_space = ast::StateSpace::Reg; - ast::Variable{ align, v_type, state_space, name, array_init: Vec::new() } - }, - <v:ParamDeclaration> => { - let (align, v_type, name) = v; - let state_space = ast::StateSpace::Param; - ast::Variable{ align, v_type, state_space, name, array_init: Vec::new() } - } -} - -FunctionBody: Option<Vec<ast::Statement<ast::ParsedArgParams<'input>>>> = { - "{" <s:Statement*> "}" => { Some(without_none(s)) }, - ";" => { None } -}; - -StateSpaceSpecifier: ast::StateSpace = { - ".reg" => ast::StateSpace::Reg, - ".const" => ast::StateSpace::Const, - ".global" => ast::StateSpace::Global, - ".local" => ast::StateSpace::Local, - ".shared" => ast::StateSpace::Shared, - ".param" => ast::StateSpace::Param, // used to prepare function call -}; - -#[inline] -ScalarType: ast::ScalarType = { - ".f16" => ast::ScalarType::F16, - ".f16x2" => ast::ScalarType::F16x2, - ".pred" => ast::ScalarType::Pred, - ".b8" => ast::ScalarType::B8, - ".b16" => ast::ScalarType::B16, - ".b32" => ast::ScalarType::B32, - ".b64" => ast::ScalarType::B64, - ".u8" => ast::ScalarType::U8, - ".u16" => ast::ScalarType::U16, - ".u32" => ast::ScalarType::U32, - ".u64" => ast::ScalarType::U64, - ".s8" => ast::ScalarType::S8, - ".s16" => ast::ScalarType::S16, - ".s32" => ast::ScalarType::S32, - ".s64" => ast::ScalarType::S64, - ".f32" => ast::ScalarType::F32, - ".f64" => ast::ScalarType::F64, -}; - -Statement: Option<ast::Statement<ast::ParsedArgParams<'input>>> = { - <l:Label> => Some(ast::Statement::Label(l)), - DebugDirective => None, - <v:MultiVariable> ";" => Some(ast::Statement::Variable(v)), - <p:PredAt?> <i:Instruction> ";" => Some(ast::Statement::Instruction(p, i)), - PragmaStatement => None, - "{" <s:Statement*> "}" => Some(ast::Statement::Block(without_none(s))), - @L ! ";" @R => { - let (start, _, _, end) = (<>); - errors.push(ParseError::User { error: - ast::PtxError::UnrecognizedStatement { start, end } - }); - None - } -}; - -PragmaStatement: () = { - ".pragma" String ";" -} - -DebugDirective: () = { - DebugLocation -}; - -// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#debugging-directives-loc -DebugLocation = { - ".loc" U32Num U32Num U32Num -}; - -Label: &'input str = { - <id:ExtendedID> ":" => id -}; - -Align: u32 = { - ".align" <x:U32Num> => x -}; - -// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parameterized-variable-names -MultiVariable: ast::MultiVariable<&'input str> = { - <var:Variable> <count:VariableParam?> => ast::MultiVariable{<>} -} - -VariableParam: u32 = { - "<" <n:U32Num> ">" => n -} - -Variable: ast::Variable<&'input str> = { - <v:RegVariable> => { - let (align, v_type, name) = v; - let state_space = ast::StateSpace::Reg; - ast::Variable {align, v_type, state_space, name, array_init: Vec::new()} - }, - LocalVariable, - <v:ParamVariable> => { - let (align, array_init, v_type, name) = v; - let state_space = ast::StateSpace::Param; - ast::Variable {align, v_type, state_space, name, array_init} - }, - SharedVariable, -}; - -RegVariable: (Option<u32>, ast::Type, &'input str) = { - ".reg" <var:VariableScalar<ScalarType>> => { - let (align, t, name) = var; - let v_type = ast::Type::Scalar(t); - (align, v_type, name) - }, - ".reg" <var:VariableVector<SizedScalarType>> => { - let (align, v_len, t, name) = var; - let v_type = ast::Type::Vector(t, v_len); - (align, v_type, name) - } -} - -LocalVariable: ast::Variable<&'input str> = { - ".local" <var:VariableScalar<SizedScalarType>> => { - let (align, t, name) = var; - let v_type = ast::Type::Scalar(t); - let state_space = ast::StateSpace::Local; - ast::Variable { align, v_type, state_space, name, array_init: Vec::new() } - }, - ".local" <var:VariableVector<SizedScalarType>> => { - let (align, v_len, t, name) = var; - let v_type = ast::Type::Vector(t, v_len); - let state_space = ast::StateSpace::Local; - ast::Variable { align, v_type, state_space, name, array_init: Vec::new() } - }, - ".local" <var:VariableArrayOrPointer<SizedScalarType>> => { - let (align, t, name, arr_or_ptr) = var; - let state_space = ast::StateSpace::Local; - let (v_type, array_init) = match arr_or_ptr { - ast::ArrayOrPointer::Array { dimensions, init } => { - (ast::Type::Array(t, dimensions), init) - } - ast::ArrayOrPointer::Pointer => { - errors.push(ParseError::User { error: ast::PtxError::ZeroDimensionArray }); - (ast::Type::Array(t, Vec::new()), Vec::new()) - } - }; - ast::Variable { align, v_type, state_space, name, array_init } - } -} - -SharedVariable: ast::Variable<&'input str> = { - ".shared" <var:VariableScalar<SizedScalarType>> => { - let (align, t, name) = var; - let state_space = ast::StateSpace::Shared; - let v_type = ast::Type::Scalar(t); - ast::Variable { align, v_type, state_space, name, array_init: Vec::new() } - }, - ".shared" <var:VariableVector<SizedScalarType>> => { - let (align, v_len, t, name) = var; - let state_space = ast::StateSpace::Shared; - let v_type = ast::Type::Vector(t, v_len); - ast::Variable { align, v_type, state_space, name, array_init: Vec::new() } - }, - ".shared" <var:VariableArrayOrPointer<SizedScalarType>> => { - let (align, t, name, arr_or_ptr) = var; - let state_space = ast::StateSpace::Shared; - let (v_type, array_init) = match arr_or_ptr { - ast::ArrayOrPointer::Array { dimensions, init } => { - (ast::Type::Array(t, dimensions), init) - } - ast::ArrayOrPointer::Pointer => { - errors.push(ParseError::User { error: ast::PtxError::ZeroDimensionArray }); - (ast::Type::Array(t, Vec::new()), Vec::new()) - } - }; - ast::Variable { align, v_type, state_space, name, array_init } - } -} - -ModuleVariable: (ast::LinkingDirective, ast::Variable<&'input str>) = { - <linking:LinkingDirectives> <state_space:VariableStateSpace> <def:GlobalVariableDefinitionNoArray> => { - let (align, v_type, name, array_init) = def; - (linking, ast::Variable { align, v_type, state_space, name, array_init }) - }, - <linking:LinkingDirectives> <space:VariableStateSpace> <var:VariableArrayOrPointer<SizedScalarType>> => { - let (align, t, name, arr_or_ptr) = var; - let (v_type, state_space, array_init) = match arr_or_ptr { - ast::ArrayOrPointer::Array { dimensions, init } => { - (ast::Type::Array(t, dimensions), space, init) - } - ast::ArrayOrPointer::Pointer => { - if !linking.contains(ast::LinkingDirective::EXTERN) { - errors.push(ParseError::User { error: ast::PtxError::NonExternPointer }); - } - (ast::Type::Array(t, Vec::new()), space, Vec::new()) - } - }; - (linking, ast::Variable{ align, v_type, state_space, name, array_init }) - } -} - -VariableStateSpace: ast::StateSpace = { - ".const" => ast::StateSpace::Const, - ".global" => ast::StateSpace::Global, - ".shared" => ast::StateSpace::Shared, -}; - -// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parameter-state-space -ParamVariable: (Option<u32>, Vec<u8>, ast::Type, &'input str) = { - ".param" <var:VariableScalar<LdStScalarType>> => { - let (align, t, name) = var; - let v_type = ast::Type::Scalar(t); - (align, Vec::new(), v_type, name) - }, - ".param" <var:VariableArrayOrPointer<SizedScalarType>> => { - let (align, t, name, arr_or_ptr) = var; - let (v_type, array_init) = match arr_or_ptr { - ast::ArrayOrPointer::Array { dimensions, init } => { - (ast::Type::Array(t, dimensions), init) - } - ast::ArrayOrPointer::Pointer => { - (ast::Type::Scalar(t), Vec::new()) - } - }; - (align, array_init, v_type, name) - } -} - -ParamDeclaration: (Option<u32>, ast::Type, &'input str) = { - <var:ParamVariable> => { - let (align, array_init, v_type, name) = var; - if array_init.len() > 0 { - errors.push(ParseError::User { error: ast::PtxError::ArrayInitalizer }); - } - (align, v_type, name) - } -} - -GlobalVariableDefinitionNoArray: (Option<u32>, ast::Type, &'input str, Vec<u8>) = { - <scalar:VariableScalar<SizedScalarType>> => { - let (align, t, name) = scalar; - let v_type = ast::Type::Scalar(t); - (align, v_type, name, Vec::new()) - }, - <var:VariableVector<SizedScalarType>> => { - let (align, v_len, t, name) = var; - let v_type = ast::Type::Vector(t, v_len); - (align, v_type, name, Vec::new()) - }, -} - -#[inline] -SizedScalarType: ast::ScalarType = { - ".b8" => ast::ScalarType::B8, - ".b16" => ast::ScalarType::B16, - ".b32" => ast::ScalarType::B32, - ".b64" => ast::ScalarType::B64, - ".u8" => ast::ScalarType::U8, - ".u16" => ast::ScalarType::U16, - ".u32" => ast::ScalarType::U32, - ".u64" => ast::ScalarType::U64, - ".s8" => ast::ScalarType::S8, - ".s16" => ast::ScalarType::S16, - ".s32" => ast::ScalarType::S32, - ".s64" => ast::ScalarType::S64, - ".f16" => ast::ScalarType::F16, - ".f16x2" => ast::ScalarType::F16x2, - ".f32" => ast::ScalarType::F32, - ".f64" => ast::ScalarType::F64, -} - -#[inline] -LdStScalarType: ast::ScalarType = { - ".b8" => ast::ScalarType::B8, - ".b16" => ast::ScalarType::B16, - ".b32" => ast::ScalarType::B32, - ".b64" => ast::ScalarType::B64, - ".u8" => ast::ScalarType::U8, - ".u16" => ast::ScalarType::U16, - ".u32" => ast::ScalarType::U32, - ".u64" => ast::ScalarType::U64, - ".s8" => ast::ScalarType::S8, - ".s16" => ast::ScalarType::S16, - ".s32" => ast::ScalarType::S32, - ".s64" => ast::ScalarType::S64, - ".f16" => ast::ScalarType::F16, - ".f32" => ast::ScalarType::F32, - ".f64" => ast::ScalarType::F64, -} - -Instruction: ast::Instruction<ast::ParsedArgParams<'input>> = { - InstLd, - InstMov, - InstMul, - InstAdd, - InstSetp, - InstNot, - InstBra, - InstCvt, - InstShl, - InstShr, - InstSt, - InstRet, - InstCvta, - InstCall, - InstAbs, - InstMad, - InstFma, - InstOr, - InstAnd, - InstSub, - InstMin, - InstMax, - InstRcp, - InstSelp, - InstBar, - InstAtom, - InstAtomCas, - InstDiv, - InstSqrt, - InstRsqrt, - InstNeg, - InstSin, - InstCos, - InstLg2, - InstEx2, - InstClz, - InstBrev, - InstPopc, - InstXor, - InstRem, - InstBfe, - InstBfi, - InstPrmt, - InstActivemask, - InstMembar, -}; - -// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-ld -InstLd: ast::Instruction<ast::ParsedArgParams<'input>> = { - "ld" <q:LdStQualifier?> <ss:LdNonGlobalStateSpace?> <cop:LdCacheOperator?> <t:LdStType> <dst:DstOperandVec> "," <src:MemoryOperand> => { - ast::Instruction::Ld( - ast::LdDetails { - qualifier: q.unwrap_or(ast::LdStQualifier::Weak), - state_space: ss.unwrap_or(ast::StateSpace::Generic), - caching: cop.unwrap_or(ast::LdCacheOperator::Cached), - typ: t, - non_coherent: false - }, - ast::Arg2Ld { dst:dst, src:src } - ) - }, - "ld" <q:LdStQualifier?> ".global" <cop:LdCacheOperator?> <t:LdStType> <dst:DstOperandVec> "," <src:MemoryOperand> => { - ast::Instruction::Ld( - ast::LdDetails { - qualifier: q.unwrap_or(ast::LdStQualifier::Weak), - state_space: ast::StateSpace::Global, - caching: cop.unwrap_or(ast::LdCacheOperator::Cached), - typ: t, - non_coherent: false - }, - ast::Arg2Ld { dst:dst, src:src } - ) - }, - "ld" ".global" <cop:LdNcCacheOperator?> ".nc" <t:LdStType> <dst:DstOperandVec> "," <src:MemoryOperand> => { - ast::Instruction::Ld( - ast::LdDetails { - qualifier: ast::LdStQualifier::Weak, - state_space: ast::StateSpace::Global, - caching: cop.unwrap_or(ast::LdCacheOperator::Cached), - typ: t, - non_coherent: true - }, - ast::Arg2Ld { dst:dst, src:src } - ) - } -}; - -LdStType: ast::Type = { - <v:VectorPrefix> <t:LdStScalarType> => ast::Type::Vector(t, v), - <t:LdStScalarType> => ast::Type::Scalar(t), -} - -LdStQualifier: ast::LdStQualifier = { - ".weak" => ast::LdStQualifier::Weak, - ".volatile" => ast::LdStQualifier::Volatile, - ".relaxed" <s:MemScope> => ast::LdStQualifier::Relaxed(s), - ".acquire" <s:MemScope> => ast::LdStQualifier::Acquire(s), -}; - -MemScope: ast::MemScope = { - ".cta" => ast::MemScope::Cta, - ".gpu" => ast::MemScope::Gpu, - ".sys" => ast::MemScope::Sys -}; - -MembarLevel: ast::MemScope = { - ".cta" => ast::MemScope::Cta, - ".gl" => ast::MemScope::Gpu, - ".sys" => ast::MemScope::Sys -}; - -LdNonGlobalStateSpace: ast::StateSpace = { - ".const" => ast::StateSpace::Const, - ".local" => ast::StateSpace::Local, - ".param" => ast::StateSpace::Param, - ".shared" => ast::StateSpace::Shared, -}; - -LdCacheOperator: ast::LdCacheOperator = { - ".ca" => ast::LdCacheOperator::Cached, - ".cg" => ast::LdCacheOperator::L2Only, - ".cs" => ast::LdCacheOperator::Streaming, - ".lu" => ast::LdCacheOperator::LastUse, - ".cv" => ast::LdCacheOperator::Uncached, -}; - -LdNcCacheOperator: ast::LdCacheOperator = { - ".ca" => ast::LdCacheOperator::Cached, - ".cg" => ast::LdCacheOperator::L2Only, - ".cs" => ast::LdCacheOperator::Streaming, -}; - -// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-mov -InstMov: ast::Instruction<ast::ParsedArgParams<'input>> = { - "mov" <pref:VectorPrefix?> <t:MovScalarType> <dst:DstOperandVec> "," <src:SrcOperandVec> => { - let mov_type = match pref { - Some(vec_width) => ast::Type::Vector(t, vec_width), - None => ast::Type::Scalar(t) - }; - let details = ast::MovDetails::new(mov_type); - ast::Instruction::Mov( - details, - ast::Arg2Mov { dst, src } - ) - } -} - -#[inline] -MovScalarType: ast::ScalarType = { - ".b16" => ast::ScalarType::B16, - ".b32" => ast::ScalarType::B32, - ".b64" => ast::ScalarType::B64, - ".u16" => ast::ScalarType::U16, - ".u32" => ast::ScalarType::U32, - ".u64" => ast::ScalarType::U64, - ".s16" => ast::ScalarType::S16, - ".s32" => ast::ScalarType::S32, - ".s64" => ast::ScalarType::S64, - ".f32" => ast::ScalarType::F32, - ".f64" => ast::ScalarType::F64, - ".pred" => ast::ScalarType::Pred -}; - -// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-mul -// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-mul -// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#half-precision-floating-point-instructions-mul -InstMul: ast::Instruction<ast::ParsedArgParams<'input>> = { - "mul" <d:MulDetails> <a:Arg3> => ast::Instruction::Mul(d, a) -}; - -MulDetails: ast::MulDetails = { - <ctr:MulIntControl> <t:UIntType> => ast::MulDetails::Unsigned(ast::MulUInt{ - typ: t, - control: ctr - }), - <ctr:MulIntControl> <t:SIntType> => ast::MulDetails::Signed(ast::MulSInt{ - typ: t, - control: ctr - }), - <f:ArithFloat> => ast::MulDetails::Float(f) -}; - -MulIntControl: ast::MulIntControl = { - ".hi" => ast::MulIntControl::High, - ".lo" => ast::MulIntControl::Low, - ".wide" => ast::MulIntControl::Wide -}; - -#[inline] -RoundingModeFloat : ast::RoundingMode = { - ".rn" => ast::RoundingMode::NearestEven, - ".rz" => ast::RoundingMode::Zero, - ".rm" => ast::RoundingMode::NegativeInf, - ".rp" => ast::RoundingMode::PositiveInf, -}; - -RoundingModeInt : ast::RoundingMode = { - ".rni" => ast::RoundingMode::NearestEven, - ".rzi" => ast::RoundingMode::Zero, - ".rmi" => ast::RoundingMode::NegativeInf, - ".rpi" => ast::RoundingMode::PositiveInf, -}; - -IntType : ast::ScalarType = { - ".u16" => ast::ScalarType::U16, - ".u32" => ast::ScalarType::U32, - ".u64" => ast::ScalarType::U64, - ".s16" => ast::ScalarType::S16, - ".s32" => ast::ScalarType::S32, - ".s64" => ast::ScalarType::S64, -}; - -IntType3264: ast::ScalarType = { - ".u32" => ast::ScalarType::U32, - ".u64" => ast::ScalarType::U64, - ".s32" => ast::ScalarType::S32, - ".s64" => ast::ScalarType::S64, -} - -UIntType: ast::ScalarType = { - ".u16" => ast::ScalarType::U16, - ".u32" => ast::ScalarType::U32, - ".u64" => ast::ScalarType::U64, -}; - -SIntType: ast::ScalarType = { - ".s16" => ast::ScalarType::S16, - ".s32" => ast::ScalarType::S32, - ".s64" => ast::ScalarType::S64, -}; - -FloatType: ast::ScalarType = { - ".f16" => ast::ScalarType::F16, - ".f16x2" => ast::ScalarType::F16x2, - ".f32" => ast::ScalarType::F32, - ".f64" => ast::ScalarType::F64, -}; - -// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-add -// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-add -// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#half-precision-floating-point-instructions-add -InstAdd: ast::Instruction<ast::ParsedArgParams<'input>> = { - "add" <d:ArithDetails> <a:Arg3> => ast::Instruction::Add(d, a) -}; - -// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#comparison-and-selection-instructions-setp -// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#half-precision-comparison-instructions-setp -// TODO: support f16 setp -InstSetp: ast::Instruction<ast::ParsedArgParams<'input>> = { - "setp" <d:SetpMode> <a:Arg4Setp> => ast::Instruction::Setp(d, a), - "setp" <d:SetpBoolMode> <a:Arg5Setp> => ast::Instruction::SetpBool(d, a), -}; - -SetpMode: ast::SetpData = { - <cmp_op:SetpCompareOp> <t:SetpTypeNoF32> => ast::SetpData { - typ: t, - flush_to_zero: None, - cmp_op: cmp_op, - }, - <cmp_op:SetpCompareOp> <ftz:".ftz"?> ".f32" => ast::SetpData { - typ: ast::ScalarType::F32, - flush_to_zero: Some(ftz.is_some()), - cmp_op: cmp_op, - } - -}; - -SetpBoolMode: ast::SetpBoolData = { - <cmp_op:SetpCompareOp> <bool_op:SetpBoolPostOp> <t:SetpTypeNoF32> => ast::SetpBoolData { - typ: t, - flush_to_zero: None, - cmp_op: cmp_op, - bool_op: bool_op, - }, - <cmp_op:SetpCompareOp> <bool_op:SetpBoolPostOp> <ftz:".ftz"?> ".f32" => ast::SetpBoolData { - typ: ast::ScalarType::F32, - flush_to_zero: Some(ftz.is_some()), - cmp_op: cmp_op, - bool_op: bool_op, - } -}; - -SetpCompareOp: ast::SetpCompareOp = { - ".eq" => ast::SetpCompareOp::Eq, - ".ne" => ast::SetpCompareOp::NotEq, - ".lt" => ast::SetpCompareOp::Less, - ".le" => ast::SetpCompareOp::LessOrEq, - ".gt" => ast::SetpCompareOp::Greater, - ".ge" => ast::SetpCompareOp::GreaterOrEq, - ".lo" => ast::SetpCompareOp::Less, - ".ls" => ast::SetpCompareOp::LessOrEq, - ".hi" => ast::SetpCompareOp::Greater, - ".hs" => ast::SetpCompareOp::GreaterOrEq, - ".equ" => ast::SetpCompareOp::NanEq, - ".neu" => ast::SetpCompareOp::NanNotEq, - ".ltu" => ast::SetpCompareOp::NanLess, - ".leu" => ast::SetpCompareOp::NanLessOrEq, - ".gtu" => ast::SetpCompareOp::NanGreater, - ".geu" => ast::SetpCompareOp::NanGreaterOrEq, - ".num" => ast::SetpCompareOp::IsNotNan, - ".nan" => ast::SetpCompareOp::IsAnyNan, -}; - -SetpBoolPostOp: ast::SetpBoolPostOp = { - ".and" => ast::SetpBoolPostOp::And, - ".or" => ast::SetpBoolPostOp::Or, - ".xor" => ast::SetpBoolPostOp::Xor, -}; - -SetpTypeNoF32: ast::ScalarType = { - ".b16" => ast::ScalarType::B16, - ".b32" => ast::ScalarType::B32, - ".b64" => ast::ScalarType::B64, - ".u16" => ast::ScalarType::U16, - ".u32" => ast::ScalarType::U32, - ".u64" => ast::ScalarType::U64, - ".s16" => ast::ScalarType::S16, - ".s32" => ast::ScalarType::S32, - ".s64" => ast::ScalarType::S64, - ".f64" => ast::ScalarType::F64, -}; - -// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#logic-and-shift-instructions-not -InstNot: ast::Instruction<ast::ParsedArgParams<'input>> = { - "not" <t:BooleanType> <a:Arg2> => ast::Instruction::Not(t, a) -}; - -BooleanType: ast::ScalarType = { - ".pred" => ast::ScalarType::Pred, - ".b16" => ast::ScalarType::B16, - ".b32" => ast::ScalarType::B32, - ".b64" => ast::ScalarType::B64, -}; - -// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#control-flow-instructions-at -PredAt: ast::PredAt<&'input str> = { - "@" <label:ExtendedID> => ast::PredAt { not: false, label:label }, - "@" "!" <label:ExtendedID> => ast::PredAt { not: true, label:label } -}; - -// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#control-flow-instructions-bra -InstBra: ast::Instruction<ast::ParsedArgParams<'input>> = { - "bra" <u:".uni"?> <a:Arg1> => ast::Instruction::Bra(ast::BraData{ uniform: u.is_some() }, a) -}; - -// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cvt -InstCvt: ast::Instruction<ast::ParsedArgParams<'input>> = { - "cvt" <s:".sat"?> <dst_t:CvtTypeInt> <src_t:CvtTypeInt> <a:Arg2> => { - ast::Instruction::Cvt(ast::CvtDetails::new_int_from_int_checked( - s.is_some(), - dst_t, - src_t, - errors - ), - a) - }, - "cvt" <r:RoundingModeFloat> <f:".ftz"?> <s:".sat"?> <dst_t:CvtTypeFloat> <src_t:CvtTypeInt> <a:Arg2> => { - ast::Instruction::Cvt(ast::CvtDetails::new_float_from_int_checked( - r, - f.is_some(), - s.is_some(), - dst_t, - src_t, - errors - ), - a) - }, - "cvt" <r:RoundingModeInt> <f:".ftz"?> <s:".sat"?> <dst_t:CvtTypeInt> <src_t:CvtTypeFloat> <a:Arg2> => { - ast::Instruction::Cvt(ast::CvtDetails::new_int_from_float_checked( - r, - f.is_some(), - s.is_some(), - dst_t, - src_t, - errors - ), - a) - }, - "cvt" <r:RoundingModeInt?> <s:".sat"?> ".f16" ".f16" <a:Arg2> => { - ast::Instruction::Cvt(ast::CvtDetails::FloatFromFloat( - ast::CvtDesc { - rounding: r, - flush_to_zero: None, - saturate: s.is_some(), - dst: ast::ScalarType::F16, - src: ast::ScalarType::F16 - } - ), a) - }, - "cvt" <f:".ftz"?> <s:".sat"?> ".f32" ".f16" <a:Arg2> => { - ast::Instruction::Cvt(ast::CvtDetails::FloatFromFloat( - ast::CvtDesc { - rounding: None, - flush_to_zero: Some(f.is_some()), - saturate: s.is_some(), - dst: ast::ScalarType::F32, - src: ast::ScalarType::F16 - } - ), a) - }, - "cvt" <s:".sat"?> ".f64" ".f16" <a:Arg2> => { - ast::Instruction::Cvt(ast::CvtDetails::FloatFromFloat( - ast::CvtDesc { - rounding: None, - flush_to_zero: None, - saturate: s.is_some(), - dst: ast::ScalarType::F64, - src: ast::ScalarType::F16 - } - ), a) - }, - "cvt" <r:RoundingModeFloat> <f:".ftz"?> <s:".sat"?> ".f16" ".f32" <a:Arg2> => { - ast::Instruction::Cvt(ast::CvtDetails::FloatFromFloat( - ast::CvtDesc { - rounding: Some(r), - flush_to_zero: Some(f.is_some()), - saturate: s.is_some(), - dst: ast::ScalarType::F16, - src: ast::ScalarType::F32 - } - ), a) - }, - "cvt" <r:RoundingModeInt?> <f:".ftz"?> <s:".sat"?> ".f32" ".f32" <a:Arg2> => { - ast::Instruction::Cvt(ast::CvtDetails::FloatFromFloat( - ast::CvtDesc { - rounding: r, - flush_to_zero: Some(f.is_some()), - saturate: s.is_some(), - dst: ast::ScalarType::F32, - src: ast::ScalarType::F32 - } - ), a) - }, - "cvt" <s:".sat"?> <f:".ftz"?> ".f64" ".f32" <a:Arg2> => { - ast::Instruction::Cvt(ast::CvtDetails::FloatFromFloat( - ast::CvtDesc { - rounding: None, - flush_to_zero: Some(f.is_some()), - saturate: s.is_some(), - dst: ast::ScalarType::F64, - src: ast::ScalarType::F32 - } - ), a) - }, - "cvt" <r:RoundingModeFloat> <s:".sat"?> ".f16" ".f64" <a:Arg2> => { - ast::Instruction::Cvt(ast::CvtDetails::FloatFromFloat( - ast::CvtDesc { - rounding: Some(r), - flush_to_zero: None, - saturate: s.is_some(), - dst: ast::ScalarType::F16, - src: ast::ScalarType::F64 - } - ), a) - }, - "cvt" <r:RoundingModeFloat> <f:".ftz"?> <s:".sat"?> ".f32" ".f64" <a:Arg2> => { - ast::Instruction::Cvt(ast::CvtDetails::FloatFromFloat( - ast::CvtDesc { - rounding: Some(r), - flush_to_zero: Some(s.is_some()), - saturate: s.is_some(), - dst: ast::ScalarType::F32, - src: ast::ScalarType::F64 - } - ), a) - }, - "cvt" <r:RoundingModeInt?> <s:".sat"?> ".f64" ".f64" <a:Arg2> => { - ast::Instruction::Cvt(ast::CvtDetails::FloatFromFloat( - ast::CvtDesc { - rounding: r, - flush_to_zero: None, - saturate: s.is_some(), - dst: ast::ScalarType::F64, - src: ast::ScalarType::F64 - } - ), a) - }, -}; - -CvtTypeInt: ast::ScalarType = { - ".u8" => ast::ScalarType::U8, - ".u16" => ast::ScalarType::U16, - ".u32" => ast::ScalarType::U32, - ".u64" => ast::ScalarType::U64, - ".s8" => ast::ScalarType::S8, - ".s16" => ast::ScalarType::S16, - ".s32" => ast::ScalarType::S32, - ".s64" => ast::ScalarType::S64, -}; - -CvtTypeFloat: ast::ScalarType = { - ".f16" => ast::ScalarType::F16, - ".f32" => ast::ScalarType::F32, - ".f64" => ast::ScalarType::F64, -}; - -// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#logic-and-shift-instructions-shl -InstShl: ast::Instruction<ast::ParsedArgParams<'input>> = { - "shl" <t:ShlType> <a:Arg3> => ast::Instruction::Shl(t, a) -}; - -ShlType: ast::ScalarType = { - ".b16" => ast::ScalarType::B16, - ".b32" => ast::ScalarType::B32, - ".b64" => ast::ScalarType::B64, -}; - -// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#logic-and-shift-instructions-shr -InstShr: ast::Instruction<ast::ParsedArgParams<'input>> = { - "shr" <t:ShrType> <a:Arg3> => ast::Instruction::Shr(t, a) -}; - -ShrType: ast::ScalarType = { - ".b16" => ast::ScalarType::B16, - ".b32" => ast::ScalarType::B32, - ".b64" => ast::ScalarType::B64, - ".u16" => ast::ScalarType::U16, - ".u32" => ast::ScalarType::U32, - ".u64" => ast::ScalarType::U64, - ".s16" => ast::ScalarType::S16, - ".s32" => ast::ScalarType::S32, - ".s64" => ast::ScalarType::S64, -}; - -// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-st -// Warning: NVIDIA documentation is incorrect, you can specify scope only once -InstSt: ast::Instruction<ast::ParsedArgParams<'input>> = { - "st" <q:LdStQualifier?> <ss:StStateSpace?> <cop:StCacheOperator?> <t:LdStType> <src1:MemoryOperand> "," <src2:SrcOperandVec> => { - ast::Instruction::St( - ast::StData { - qualifier: q.unwrap_or(ast::LdStQualifier::Weak), - state_space: ss.unwrap_or(ast::StateSpace::Generic), - caching: cop.unwrap_or(ast::StCacheOperator::Writeback), - typ: t - }, - ast::Arg2St { src1:src1, src2:src2 } - ) - } -}; - -// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#using-addresses-arrays-and-vectors -MemoryOperand: ast::Operand<&'input str> = { - "[" <o:Operand> "]" => o -} - -StStateSpace: ast::StateSpace = { - ".global" => ast::StateSpace::Global, - ".local" => ast::StateSpace::Local, - ".param" => ast::StateSpace::Param, - ".shared" => ast::StateSpace::Shared, -}; - -StCacheOperator: ast::StCacheOperator = { - ".wb" => ast::StCacheOperator::Writeback, - ".cg" => ast::StCacheOperator::L2Only, - ".cs" => ast::StCacheOperator::Streaming, - ".wt" => ast::StCacheOperator::Writethrough, -}; - -// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#control-flow-instructions-ret -InstRet: ast::Instruction<ast::ParsedArgParams<'input>> = { - "ret" <u:".uni"?> => ast::Instruction::Ret(ast::RetData { uniform: u.is_some() }) -}; - -// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cvta -InstCvta: ast::Instruction<ast::ParsedArgParams<'input>> = { - "cvta" <from:CvtaStateSpace> <s:CvtaSize> <a:Arg2> => { - ast::Instruction::Cvta(ast::CvtaDetails { - to: ast::StateSpace::Generic, - from, - size: s - }, - a) - }, - "cvta" ".to" <to:CvtaStateSpace> <s:CvtaSize> <a:Arg2> => { - ast::Instruction::Cvta(ast::CvtaDetails { - to, - from: ast::StateSpace::Generic, - size: s - }, - a) - } -} - -CvtaStateSpace: ast::StateSpace = { - ".const" => ast::StateSpace::Const, - ".global" => ast::StateSpace::Global, - ".local" => ast::StateSpace::Local, - ".shared" => ast::StateSpace::Shared, -} - -CvtaSize: ast::CvtaSize = { - ".u32" => ast::CvtaSize::U32, - ".u64" => ast::CvtaSize::U64, -} - -// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#control-flow-instructions-call -InstCall: ast::Instruction<ast::ParsedArgParams<'input>> = { - "call" <u:".uni"?> <args:ArgCall> => { - let (ret_params, func, param_list) = args; - ast::Instruction::Call(ast::CallInst { uniform: u.is_some(), ret_params, func, param_list }) - } -}; - -// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-abs -InstAbs: ast::Instruction<ast::ParsedArgParams<'input>> = { - "abs" <t:SignedIntType> <a:Arg2> => { - ast::Instruction::Abs(ast::AbsDetails { flush_to_zero: None, typ: t }, a) - }, - "abs" <f:".ftz"?> ".f32" <a:Arg2> => { - ast::Instruction::Abs(ast::AbsDetails { flush_to_zero: Some(f.is_some()), typ: ast::ScalarType::F32 }, a) - }, - "abs" ".f64" <a:Arg2> => { - ast::Instruction::Abs(ast::AbsDetails { flush_to_zero: None, typ: ast::ScalarType::F64 }, a) - }, - "abs" <f:".ftz"?> ".f16" <a:Arg2> => { - ast::Instruction::Abs(ast::AbsDetails { flush_to_zero: Some(f.is_some()), typ: ast::ScalarType::F16 }, a) - }, - "abs" <f:".ftz"?> ".f16x2" <a:Arg2> => { - ast::Instruction::Abs(ast::AbsDetails { flush_to_zero: Some(f.is_some()), typ: ast::ScalarType::F16x2 }, a) - }, -}; - -// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-mad -// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-mad -InstMad: ast::Instruction<ast::ParsedArgParams<'input>> = { - "mad" <d:MulDetails> <a:Arg4> => ast::Instruction::Mad(d, a), - "mad" ".hi" ".sat" ".s32" => todo!(), -}; - -// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-fma -InstFma: ast::Instruction<ast::ParsedArgParams<'input>> = { - "fma" <f:ArithFloatMustRound> <a:Arg4> => ast::Instruction::Fma(f, a), -}; - -SignedIntType: ast::ScalarType = { - ".s16" => ast::ScalarType::S16, - ".s32" => ast::ScalarType::S32, - ".s64" => ast::ScalarType::S64, -}; - -// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#logic-and-shift-instructions-or -InstOr: ast::Instruction<ast::ParsedArgParams<'input>> = { - "or" <d:BooleanType> <a:Arg3> => ast::Instruction::Or(d, a), -}; - -// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#logic-and-shift-instructions-and -InstAnd: ast::Instruction<ast::ParsedArgParams<'input>> = { - "and" <d:BooleanType> <a:Arg3> => ast::Instruction::And(d, a), -}; - -// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-rcp -InstRcp: ast::Instruction<ast::ParsedArgParams<'input>> = { - "rcp" <rounding:RcpRoundingMode> <ftz:".ftz"?> ".f32" <a:Arg2> => { - let details = ast::RcpDetails { - rounding, - flush_to_zero: Some(ftz.is_some()), - is_f64: false, - }; - ast::Instruction::Rcp(details, a) - }, - "rcp" <rn:RoundingModeFloat> ".f64" <a:Arg2> => { - let details = ast::RcpDetails { - rounding: Some(rn), - flush_to_zero: None, - is_f64: true, - }; - ast::Instruction::Rcp(details, a) - } -}; - -RcpRoundingMode: Option<ast::RoundingMode> = { - ".approx" => None, - <r:RoundingModeFloat> => Some(r) -}; - -// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-sub -// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-sub -// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#half-precision-floating-point-instructions-sub -InstSub: ast::Instruction<ast::ParsedArgParams<'input>> = { - "sub" <d:ArithDetails> <a:Arg3> => ast::Instruction::Sub(d, a), -}; - -// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-min -// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-min -// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#half-precision-floating-point-instructions-min -InstMin: ast::Instruction<ast::ParsedArgParams<'input>> = { - "min" <d:MinMaxDetails> <a:Arg3> => ast::Instruction::Min(d, a), -}; - -// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-max -// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-max -// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#half-precision-floating-point-instructions-max -InstMax: ast::Instruction<ast::ParsedArgParams<'input>> = { - "max" <d:MinMaxDetails> <a:Arg3> => ast::Instruction::Max(d, a), -}; - -MinMaxDetails: ast::MinMaxDetails = { - <t:UIntType> => ast::MinMaxDetails::Unsigned(t), - <t:SIntType> => ast::MinMaxDetails::Signed(t), - <ftz:".ftz"?> <nan:".NaN"?> ".f32" => ast::MinMaxDetails::Float( - ast::MinMaxFloat{ flush_to_zero: Some(ftz.is_some()), nan: nan.is_some(), typ: ast::ScalarType::F32 } - ), - ".f64" => ast::MinMaxDetails::Float( - ast::MinMaxFloat{ flush_to_zero: None, nan: false, typ: ast::ScalarType::F64 } - ), - <ftz:".ftz"?> <nan:".NaN"?> ".f16" => ast::MinMaxDetails::Float( - ast::MinMaxFloat{ flush_to_zero: Some(ftz.is_some()), nan: nan.is_some(), typ: ast::ScalarType::F16 } - ), - <ftz:".ftz"?> <nan:".NaN"?> ".f16x2" => ast::MinMaxDetails::Float( - ast::MinMaxFloat{ flush_to_zero: Some(ftz.is_some()), nan: nan.is_some(), typ: ast::ScalarType::F16x2 } - ) -} - -// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#comparison-and-selection-instructions-selp -InstSelp: ast::Instruction<ast::ParsedArgParams<'input>> = { - "selp" <t:SelpType> <a:Arg4> => ast::Instruction::Selp(t, a), -}; - -SelpType: ast::ScalarType = { - ".b16" => ast::ScalarType::B16, - ".b32" => ast::ScalarType::B32, - ".b64" => ast::ScalarType::B64, - ".u16" => ast::ScalarType::U16, - ".u32" => ast::ScalarType::U32, - ".u64" => ast::ScalarType::U64, - ".s16" => ast::ScalarType::S16, - ".s32" => ast::ScalarType::S32, - ".s64" => ast::ScalarType::S64, - ".f32" => ast::ScalarType::F32, - ".f64" => ast::ScalarType::F64, -}; - -// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-bar -InstBar: ast::Instruction<ast::ParsedArgParams<'input>> = { - "bar" ".sync" <a:Arg1Bar> => ast::Instruction::Bar(ast::BarDetails::SyncAligned, a), - "barrier" ".sync" <a:Arg1Bar> => ast::Instruction::Bar(ast::BarDetails::SyncAligned, a), - "barrier" ".sync" ".aligned" <a:Arg1Bar> => ast::Instruction::Bar(ast::BarDetails::SyncAligned, a), -} - -// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-atom -// The documentation does not mention all spported operations: -// * Operation .add requires .u32 or .s32 or .u64 or .f64 or f16 or f16x2 or .f32 -// * Operation .inc requires .u32 type for instuction -// * Operation .dec requires .u32 type for instuction -// Otherwise as documented -InstAtom: ast::Instruction<ast::ParsedArgParams<'input>> = { - "atom" <sema:AtomSemantics?> <scope:MemScope?> <space:AtomSpace?> <op:AtomBitOp> <typ:BitType> <a:Arg3Atom> => { - let details = ast::AtomDetails { - semantics: sema.unwrap_or(ast::AtomSemantics::Relaxed), - scope: scope.unwrap_or(ast::MemScope::Gpu), - space: space.unwrap_or(ast::StateSpace::Generic), - inner: ast::AtomInnerDetails::Bit { op, typ } - }; - ast::Instruction::Atom(details,a) - }, - "atom" <sema:AtomSemantics?> <scope:MemScope?> <space:AtomSpace?> ".inc" ".u32" <a:Arg3Atom> => { - let details = ast::AtomDetails { - semantics: sema.unwrap_or(ast::AtomSemantics::Relaxed), - scope: scope.unwrap_or(ast::MemScope::Gpu), - space: space.unwrap_or(ast::StateSpace::Generic), - inner: ast::AtomInnerDetails::Unsigned { - op: ast::AtomUIntOp::Inc, - typ: ast::ScalarType::U32 - } - }; - ast::Instruction::Atom(details,a) - }, - "atom" <sema:AtomSemantics?> <scope:MemScope?> <space:AtomSpace?> ".dec" ".u32" <a:Arg3Atom> => { - let details = ast::AtomDetails { - semantics: sema.unwrap_or(ast::AtomSemantics::Relaxed), - scope: scope.unwrap_or(ast::MemScope::Gpu), - space: space.unwrap_or(ast::StateSpace::Generic), - inner: ast::AtomInnerDetails::Unsigned { - op: ast::AtomUIntOp::Dec, - typ: ast::ScalarType::U32 - } - }; - ast::Instruction::Atom(details,a) - }, - "atom" <sema:AtomSemantics?> <scope:MemScope?> <space:AtomSpace?> ".add" <typ:FloatType> <a:Arg3Atom> => { - let op = ast::AtomFloatOp::Add; - let details = ast::AtomDetails { - semantics: sema.unwrap_or(ast::AtomSemantics::Relaxed), - scope: scope.unwrap_or(ast::MemScope::Gpu), - space: space.unwrap_or(ast::StateSpace::Generic), - inner: ast::AtomInnerDetails::Float { op, typ } - }; - ast::Instruction::Atom(details,a) - }, - "atom" <sema:AtomSemantics?> <scope:MemScope?> <space:AtomSpace?> <op: AtomUIntOp> <typ:UIntType3264> <a:Arg3Atom> => { - let details = ast::AtomDetails { - semantics: sema.unwrap_or(ast::AtomSemantics::Relaxed), - scope: scope.unwrap_or(ast::MemScope::Gpu), - space: space.unwrap_or(ast::StateSpace::Generic), - inner: ast::AtomInnerDetails::Unsigned { op, typ } - }; - ast::Instruction::Atom(details,a) - }, - "atom" <sema:AtomSemantics?> <scope:MemScope?> <space:AtomSpace?> <op: AtomSIntOp> <typ:SIntType3264> <a:Arg3Atom> => { - let details = ast::AtomDetails { - semantics: sema.unwrap_or(ast::AtomSemantics::Relaxed), - scope: scope.unwrap_or(ast::MemScope::Gpu), - space: space.unwrap_or(ast::StateSpace::Generic), - inner: ast::AtomInnerDetails::Signed { op, typ } - }; - ast::Instruction::Atom(details,a) - } -} - -InstAtomCas: ast::Instruction<ast::ParsedArgParams<'input>> = { - "atom" <sema:AtomSemantics?> <scope:MemScope?> <space:AtomSpace?> ".cas" <typ:BitType> <a:Arg4Atom> => { - let details = ast::AtomCasDetails { - semantics: sema.unwrap_or(ast::AtomSemantics::Relaxed), - scope: scope.unwrap_or(ast::MemScope::Gpu), - space: space.unwrap_or(ast::StateSpace::Generic), - typ, - }; - ast::Instruction::AtomCas(details,a) - }, -} - -AtomSemantics: ast::AtomSemantics = { - ".relaxed" => ast::AtomSemantics::Relaxed, - ".acquire" => ast::AtomSemantics::Acquire, - ".release" => ast::AtomSemantics::Release, - ".acq_rel" => ast::AtomSemantics::AcquireRelease -} - -AtomSpace: ast::StateSpace = { - ".global" => ast::StateSpace::Global, - ".shared" => ast::StateSpace::Shared -} - -AtomBitOp: ast::AtomBitOp = { - ".and" => ast::AtomBitOp::And, - ".or" => ast::AtomBitOp::Or, - ".xor" => ast::AtomBitOp::Xor, - ".exch" => ast::AtomBitOp::Exchange, -} - -AtomUIntOp: ast::AtomUIntOp = { - ".add" => ast::AtomUIntOp::Add, - ".min" => ast::AtomUIntOp::Min, - ".max" => ast::AtomUIntOp::Max, -} - -AtomSIntOp: ast::AtomSIntOp = { - ".add" => ast::AtomSIntOp::Add, - ".min" => ast::AtomSIntOp::Min, - ".max" => ast::AtomSIntOp::Max, -} - -BitType: ast::ScalarType = { - ".b32" => ast::ScalarType::B32, - ".b64" => ast::ScalarType::B64, -} - -UIntType3264: ast::ScalarType = { - ".u32" => ast::ScalarType::U32, - ".u64" => ast::ScalarType::U64, -} - -SIntType3264: ast::ScalarType = { - ".s32" => ast::ScalarType::S32, - ".s64" => ast::ScalarType::S64, -} - -// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-div -// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-div -InstDiv: ast::Instruction<ast::ParsedArgParams<'input>> = { - "div" <t:UIntType> <a:Arg3> => ast::Instruction::Div(ast::DivDetails::Unsigned(t), a), - "div" <t:SIntType> <a:Arg3> => ast::Instruction::Div(ast::DivDetails::Signed(t), a), - "div" <kind:DivFloatKind> <ftz:".ftz"?> ".f32" <a:Arg3> => { - let inner = ast::DivFloatDetails { - typ: ast::ScalarType::F32, - flush_to_zero: Some(ftz.is_some()), - kind - }; - ast::Instruction::Div(ast::DivDetails::Float(inner), a) - }, - "div" <rnd:RoundingModeFloat> ".f64" <a:Arg3> => { - let inner = ast::DivFloatDetails { - typ: ast::ScalarType::F64, - flush_to_zero: None, - kind: ast::DivFloatKind::Rounding(rnd) - }; - ast::Instruction::Div(ast::DivDetails::Float(inner), a) - }, -} - -DivFloatKind: ast::DivFloatKind = { - ".approx" => ast::DivFloatKind::Approx, - ".full" => ast::DivFloatKind::Full, - <rnd:RoundingModeFloat> => ast::DivFloatKind::Rounding(rnd), -} - -// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-sqrt -InstSqrt: ast::Instruction<ast::ParsedArgParams<'input>> = { - "sqrt" ".approx" <ftz:".ftz"?> ".f32" <a:Arg2> => { - let details = ast::SqrtDetails { - typ: ast::ScalarType::F32, - flush_to_zero: Some(ftz.is_some()), - kind: ast::SqrtKind::Approx, - }; - ast::Instruction::Sqrt(details, a) - }, - "sqrt" <rnd:RoundingModeFloat> <ftz:".ftz"?> ".f32" <a:Arg2> => { - let details = ast::SqrtDetails { - typ: ast::ScalarType::F32, - flush_to_zero: Some(ftz.is_some()), - kind: ast::SqrtKind::Rounding(rnd), - }; - ast::Instruction::Sqrt(details, a) - }, - "sqrt" <rnd:RoundingModeFloat> ".f64" <a:Arg2> => { - let details = ast::SqrtDetails { - typ: ast::ScalarType::F64, - flush_to_zero: None, - kind: ast::SqrtKind::Rounding(rnd), - }; - ast::Instruction::Sqrt(details, a) - } -} - -// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-rsqrt -// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-rsqrt-approx-ftz-f64 -InstRsqrt: ast::Instruction<ast::ParsedArgParams<'input>> = { - "rsqrt" ".approx" <ftz:".ftz"?> ".f32" <a:Arg2> => { - let details = ast::RsqrtDetails { - typ: ast::ScalarType::F32, - flush_to_zero: ftz.is_some(), - }; - ast::Instruction::Rsqrt(details, a) - }, - "rsqrt" ".approx" <ftz:".ftz"?> ".f64" <a:Arg2> => { - let details = ast::RsqrtDetails { - typ: ast::ScalarType::F64, - flush_to_zero: ftz.is_some(), - }; - ast::Instruction::Rsqrt(details, a) - }, -} - -// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-neg -// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-neg -// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#half-precision-floating-point-instructions-neg -InstNeg: ast::Instruction<ast::ParsedArgParams<'input>> = { - "neg" <ftz:".ftz"?> <typ:NegTypeFtz> <a:Arg2> => { - let details = ast::NegDetails { - typ, - flush_to_zero: Some(ftz.is_some()), - }; - ast::Instruction::Neg(details, a) - }, - "neg" <typ:NegTypeNonFtz> <a:Arg2> => { - let details = ast::NegDetails { - typ, - flush_to_zero: None, - }; - ast::Instruction::Neg(details, a) - }, -} - -// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-sin -InstSin: ast::Instruction<ast::ParsedArgParams<'input>> = { - "sin" ".approx" <ftz:".ftz"?> ".f32" <arg:Arg2> => { - ast::Instruction::Sin{ flush_to_zero: ftz.is_some(), arg } - }, -} - -// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-cos -InstCos: ast::Instruction<ast::ParsedArgParams<'input>> = { - "cos" ".approx" <ftz:".ftz"?> ".f32" <arg:Arg2> => { - ast::Instruction::Cos{ flush_to_zero: ftz.is_some(), arg } - }, -} - -// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-lg2 -InstLg2: ast::Instruction<ast::ParsedArgParams<'input>> = { - "lg2" ".approx" <ftz:".ftz"?> ".f32" <arg:Arg2> => { - ast::Instruction::Lg2{ flush_to_zero: ftz.is_some(), arg } - }, -} - -// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-ex2 -InstEx2: ast::Instruction<ast::ParsedArgParams<'input>> = { - "ex2" ".approx" <ftz:".ftz"?> ".f32" <arg:Arg2> => { - ast::Instruction::Ex2{ flush_to_zero: ftz.is_some(), arg } - }, -} - -// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-clz -InstClz: ast::Instruction<ast::ParsedArgParams<'input>> = { - "clz" <typ:BitType> <arg:Arg2> => ast::Instruction::Clz{ <> } -} - -// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-brev -InstBrev: ast::Instruction<ast::ParsedArgParams<'input>> = { - "brev" <typ:BitType> <arg:Arg2> => ast::Instruction::Brev{ <> } -} - -// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-popc -InstPopc: ast::Instruction<ast::ParsedArgParams<'input>> = { - "popc" <typ:BitType> <arg:Arg2> => ast::Instruction::Popc{ <> } -} - -// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#logic-and-shift-instructions-xor -InstXor: ast::Instruction<ast::ParsedArgParams<'input>> = { - "xor" <typ:BooleanType> <arg:Arg3> => ast::Instruction::Xor{ <> } -} - -// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-bfe -InstBfe: ast::Instruction<ast::ParsedArgParams<'input>> = { - "bfe" <typ:IntType3264> <arg:Arg4> => ast::Instruction::Bfe{ <> } -} - -// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-bfi -InstBfi: ast::Instruction<ast::ParsedArgParams<'input>> = { - "bfi" <typ:BitType> <arg:Arg5> => ast::Instruction::Bfi{ <> } -} - -// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-prmt -InstPrmt: ast::Instruction<ast::ParsedArgParams<'input>> = { - "prmt" ".b32" <arg:Arg3> "," <control:U16Num> => ast::Instruction::Prmt{ <> } -} - -// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-rem -InstRem: ast::Instruction<ast::ParsedArgParams<'input>> = { - "rem" <typ:IntType> <arg:Arg3> => ast::Instruction::Rem{ <> } -} - -// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-activemask -InstActivemask: ast::Instruction<ast::ParsedArgParams<'input>> = { - "activemask" ".b32" <arg:Arg1> => ast::Instruction::Activemask{ <> } -} - -// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-membar -InstMembar: ast::Instruction<ast::ParsedArgParams<'input>> = { - "membar" <level:MembarLevel> => ast::Instruction::Membar{ <> } -} - -NegTypeFtz: ast::ScalarType = { - ".f16" => ast::ScalarType::F16, - ".f16x2" => ast::ScalarType::F16x2, - ".f32" => ast::ScalarType::F32, -} - -NegTypeNonFtz: ast::ScalarType = { - ".s16" => ast::ScalarType::S16, - ".s32" => ast::ScalarType::S32, - ".s64" => ast::ScalarType::S64, - ".f64" => ast::ScalarType::F64 -} - -ArithDetails: ast::ArithDetails = { - <t:UIntType> => ast::ArithDetails::Unsigned(t), - <t:SIntType> => ast::ArithDetails::Signed(ast::ArithSInt { - typ: t, - saturate: false, - }), - ".sat" ".s32" => ast::ArithDetails::Signed(ast::ArithSInt { - typ: ast::ScalarType::S32, - saturate: true, - }), - <f:ArithFloat> => ast::ArithDetails::Float(f) -} - -ArithFloat: ast::ArithFloat = { - <rn:RoundingModeFloat?> <ftz:".ftz"?> <sat:".sat"?> ".f32" => ast::ArithFloat { - typ: ast::ScalarType::F32, - rounding: rn, - flush_to_zero: Some(ftz.is_some()), - saturate: sat.is_some(), - }, - <rn:RoundingModeFloat?> ".f64" => ast::ArithFloat { - typ: ast::ScalarType::F64, - rounding: rn, - flush_to_zero: None, - saturate: false, - }, - <rn:".rn"?> <ftz:".ftz"?> <sat:".sat"?> ".f16" => ast::ArithFloat { - typ: ast::ScalarType::F16, - rounding: rn.map(|_| ast::RoundingMode::NearestEven), - flush_to_zero: Some(ftz.is_some()), - saturate: sat.is_some(), - }, - <rn:".rn"?> <ftz:".ftz"?> <sat:".sat"?> ".f16x2" => ast::ArithFloat { - typ: ast::ScalarType::F16x2, - rounding: rn.map(|_| ast::RoundingMode::NearestEven), - flush_to_zero: Some(ftz.is_some()), - saturate: sat.is_some(), - }, -} - -ArithFloatMustRound: ast::ArithFloat = { - <rn:RoundingModeFloat> <ftz:".ftz"?> <sat:".sat"?> ".f32" => ast::ArithFloat { - typ: ast::ScalarType::F32, - rounding: Some(rn), - flush_to_zero: Some(ftz.is_some()), - saturate: sat.is_some(), - }, - <rn:RoundingModeFloat> ".f64" => ast::ArithFloat { - typ: ast::ScalarType::F64, - rounding: Some(rn), - flush_to_zero: None, - saturate: false, - }, - ".rn" <ftz:".ftz"?> <sat:".sat"?> ".f16" => ast::ArithFloat { - typ: ast::ScalarType::F16, - rounding: Some(ast::RoundingMode::NearestEven), - flush_to_zero: Some(ftz.is_some()), - saturate: sat.is_some(), - }, - ".rn" <ftz:".ftz"?> <sat:".sat"?> ".f16x2" => ast::ArithFloat { - typ: ast::ScalarType::F16x2, - rounding: Some(ast::RoundingMode::NearestEven), - flush_to_zero: Some(ftz.is_some()), - saturate: sat.is_some(), - }, -} - -Operand: ast::Operand<&'input str> = { - <r:ExtendedID> => ast::Operand::Reg(r), - <r:ExtendedID> "+" <offset:S32Num> => ast::Operand::RegOffset(r, offset), - <x:ImmediateValue> => ast::Operand::Imm(x) -}; - -CallOperand: ast::Operand<&'input str> = { - <r:ExtendedID> => ast::Operand::Reg(r), - <x:ImmediateValue> => ast::Operand::Imm(x) -}; - -// TODO: start parsing whole constants sub-language: -// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#constants -ImmediateValue: ast::ImmediateValue = { - // TODO: treat negation correctly - <neg:"-"?> <x:NumToken> => { - let (num, radix, is_unsigned) = x; - if neg.is_some() { - match i64::from_str_radix(num, radix) { - Ok(x) => ast::ImmediateValue::S64(-x), - Err(err) => { - errors.push(ParseError::User { error: ast::PtxError::from(err) }); - ast::ImmediateValue::S64(0) - } - } - } else if is_unsigned { - match u64::from_str_radix(num, radix) { - Ok(x) => ast::ImmediateValue::U64(x), - Err(err) => { - errors.push(ParseError::User { error: ast::PtxError::from(err) }); - ast::ImmediateValue::U64(0) - } - } - } else { - match i64::from_str_radix(num, radix) { - Ok(x) => ast::ImmediateValue::S64(x), - Err(_) => { - match u64::from_str_radix(num, radix) { - Ok(x) => ast::ImmediateValue::U64(x), - Err(err) => { - errors.push(ParseError::User { error: ast::PtxError::from(err) }); - ast::ImmediateValue::U64(0) - } - } - } - } - } - }, - <f:F32Num> => { - ast::ImmediateValue::F32(f) - }, - <f:F64Num> => { - ast::ImmediateValue::F64(f) - } -} - -Arg1: ast::Arg1<ast::ParsedArgParams<'input>> = { - <src:ExtendedID> => ast::Arg1{<>} -}; - -Arg1Bar: ast::Arg1Bar<ast::ParsedArgParams<'input>> = { - <src:Operand> => ast::Arg1Bar{<>} -}; - -Arg2: ast::Arg2<ast::ParsedArgParams<'input>> = { - <dst:DstOperand> "," <src:Operand> => ast::Arg2{<>} -}; - -MemberOperand: (&'input str, u8) = { - <pref:ExtendedID> "." <suf:ExtendedID> => { - let suf_idx = match vector_index(suf) { - Ok(x) => x, - Err(err) => { - errors.push(err); - 0 - } - }; - (pref, suf_idx) - }, - <pref:ExtendedID> <suf:DotID> => { - let suf_idx = match vector_index(&suf[1..]) { - Ok(x) => x, - Err(err) => { - errors.push(err); - 0 - } - }; - (pref, suf_idx) - } -}; - -VectorExtract: Vec<&'input str> = { - "{" <r1:ExtendedID> "," <r2:ExtendedID> "}" => { - vec![r1, r2] - }, - "{" <r1:ExtendedID> "," <r2:ExtendedID> "," <r3:ExtendedID> "," <r4:ExtendedID> "}" => { - vec![r1, r2, r3, r4] - }, -}; - -Arg3: ast::Arg3<ast::ParsedArgParams<'input>> = { - <dst:DstOperand> "," <src1:Operand> "," <src2:Operand> => ast::Arg3{<>} -}; - -Arg3Atom: ast::Arg3<ast::ParsedArgParams<'input>> = { - <dst:DstOperand> "," "[" <src1:Operand> "]" "," <src2:Operand> => ast::Arg3{<>} -}; - -Arg4: ast::Arg4<ast::ParsedArgParams<'input>> = { - <dst:DstOperand> "," <src1:Operand> "," <src2:Operand> "," <src3:Operand> => ast::Arg4{<>} -}; - -Arg4Atom: ast::Arg4<ast::ParsedArgParams<'input>> = { - <dst:DstOperand> "," "[" <src1:Operand> "]" "," <src2:Operand> "," <src3:Operand> => ast::Arg4{<>} -}; - -Arg4Setp: ast::Arg4Setp<ast::ParsedArgParams<'input>> = { - <dst1:ExtendedID> <dst2:OptionalDst?> "," <src1:Operand> "," <src2:Operand> => ast::Arg4Setp{<>} -}; - -Arg5: ast::Arg5<ast::ParsedArgParams<'input>> = { - <dst:DstOperand> "," <src1:Operand> "," <src2:Operand> "," <src3:Operand> "," <src4:Operand> => ast::Arg5{<>} -}; - -// TODO: pass src3 negation somewhere -Arg5Setp: ast::Arg5Setp<ast::ParsedArgParams<'input>> = { - <dst1:ExtendedID> <dst2:OptionalDst?> "," <src1:Operand> "," <src2:Operand> "," "!"? <src3:Operand> => ast::Arg5Setp{<>} -}; - -ArgCall: (Vec<&'input str>, &'input str, Vec<ast::Operand<&'input str>>) = { - "(" <ret_params:Comma<ExtendedID>> ")" "," <func:ExtendedID> "," "(" <param_list:Comma<CallOperand>> ")" => { - (ret_params, func, param_list) - }, - "(" <ret_params:Comma<ExtendedID>> ")" "," <func:ExtendedID> => { - (ret_params, func, Vec::new()) - }, - <func:ExtendedID> "," "(" <param_list:Comma<CallOperand>> ")" => (Vec::new(), func, param_list), - <func:ExtendedID> => (Vec::new(), func, Vec::<ast::Operand<_>>::new()), -}; - -OptionalDst: &'input str = { - "|" <dst2:ExtendedID> => dst2 -} - -SrcOperand: ast::Operand<&'input str> = { - <r:ExtendedID> => ast::Operand::Reg(r), - <r:ExtendedID> "+" <offset:S32Num> => ast::Operand::RegOffset(r, offset), - <x:ImmediateValue> => ast::Operand::Imm(x), - <mem_op:MemberOperand> => { - let (reg, idx) = mem_op; - ast::Operand::VecMember(reg, idx) - } -} - -SrcOperandVec: ast::Operand<&'input str> = { - <normal:SrcOperand> => normal, - <vec:VectorExtract> => ast::Operand::VecPack(vec), -} - -DstOperand: ast::Operand<&'input str> = { - <r:ExtendedID> => ast::Operand::Reg(r), - <mem_op:MemberOperand> => { - let (reg, idx) = mem_op; - ast::Operand::VecMember(reg, idx) - } -} - -DstOperandVec: ast::Operand<&'input str> = { - <normal:DstOperand> => normal, - <vec:VectorExtract> => ast::Operand::VecPack(vec), -} - -VectorPrefix: u8 = { - ".v2" => 2, - ".v4" => 4 -}; - -// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#debugging-directives-file -File = { - ".file" U32Num String ("," U32Num "," U32Num)? -}; - -// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#debugging-directives-section -Section = { - ".section" DotID "{" SectionDwarfLines* "}" -}; - -SectionDwarfLines: () = { - AnyBitType Comma<U32Num>, - ".b32" SectionLabel, - ".b64" SectionLabel, - ".b32" SectionLabel "+" U32Num, - ".b64" SectionLabel "+" U32Num, -}; - -SectionLabel = { - ID, - DotID -}; - -AnyBitType = { - ".b8", ".b16", ".b32", ".b64" -}; - -VariableScalar<T>: (Option<u32>, T, &'input str) = { - <align:Align?> <v_type:T> <name:ExtendedID> => { - (align, v_type, name) - } -} - -VariableVector<T>: (Option<u32>, u8, T, &'input str) = { - <align:Align?> <v_len:VectorPrefix> <v_type:T> <name:ExtendedID> => { - (align, v_len, v_type, name) - } -} - -// empty dimensions [0] means it's a pointer -VariableArrayOrPointer<T>: (Option<u32>, T, &'input str, ast::ArrayOrPointer) = { - <align:Align?> <typ:SizedScalarType> <name:ExtendedID> <dims:ArrayDimensions> <init:ArrayInitializer?> => { - let mut dims = dims; - let array_init = match init { - Some(init) => { - let init_vec = match init.to_vec(typ, &mut dims) { - Err(error) => { - errors.push(ParseError::User { error }); - Vec::new() - } - Ok(x) => x - }; - ast::ArrayOrPointer::Array { dimensions: dims, init: init_vec } - } - None => { - if dims.len() > 1 && dims.contains(&0) { - errors.push(ParseError::User { error: ast::PtxError::ZeroDimensionArray }); - } - match &*dims { - [0] => ast::ArrayOrPointer::Pointer, - _ => ast::ArrayOrPointer::Array { dimensions: dims, init: Vec::new() } - } - } - }; - (align, typ, name, array_init) - } -} - -// [0] and [] are treated the same -ArrayDimensions: Vec<u32> = { - ArrayEmptyDimension => vec![0u32], - ArrayEmptyDimension <dims:ArrayDimension+> => { - let mut dims = dims; - let mut result = vec![0u32]; - result.append(&mut dims); - result - }, - <dims:ArrayDimension+> => dims -} - -ArrayEmptyDimension = { - "[" "]" -} - -ArrayDimension: u32 = { - "[" <n:U32Num> "]" => n, -} - -ArrayInitializer: ast::NumsOrArrays<'input> = { - "=" <nums:NumsOrArraysBracket> => nums -} - -NumsOrArraysBracket: ast::NumsOrArrays<'input> = { - "{" <nums:NumsOrArrays> "}" => nums -} - -NumsOrArrays: ast::NumsOrArrays<'input> = { - <n:Comma<NumsOrArraysBracket>> => ast::NumsOrArrays::Arrays(n), - <n:CommaNonEmpty<NumToken>> => ast::NumsOrArrays::Nums(n.into_iter().map(|(x,radix,_)| (x, radix)).collect()), -} - -Comma<T>: Vec<T> = { - <v:(<T> ",")*> <e:T?> => match e { - None => v, - Some(e) => { - let mut v = v; - v.push(e); - v - } - } -}; - -CommaNonEmpty<T>: Vec<T> = { - <v:(<T> ",")*> <e:T> => { - let mut v = v; - v.push(e); - v - } -}; - -#[inline] -Or<T1, T2>: T1 = { - T1, - T2 -} - -#[inline] -Or3<T1, T2, T3>: T1 = { - T1, - T2, - T3 -}
\ No newline at end of file |