use crate::ast; use crate::ast::UnwrapWithVec; use crate::{without_none, vector_index}; grammar<'a>(errors: &mut Vec); extern { type Error = ast::PtxError; } match { r"\s+" => { }, r"//[^\n\r]*[\n\r]*" => { }, r"/\*([^\*]*\*+[^\*/])*([^\*]*\*+|[^\*])*\*/" => { }, r"-?[?:0x]?[0-9]+" => Num, r#""[^"]*""# => String, r"[0-9]+\.[0-9]+" => VersionNumber, "!", "(", ")", "+", ",", ".", ":", ";", "@", "[", "]", "{", "}", "<", ">", "|", ".acquire", ".address_size", ".align", ".and", ".b16", ".b32", ".b64", ".b8", ".ca", ".cg", ".const", ".cs", ".cta", ".cv", ".entry", ".eq", ".equ", ".extern", ".f16", ".f16x2", ".f32", ".f64", ".file", ".ftz", ".func", ".ge", ".geu", ".global", ".gpu", ".gt", ".gtu", ".hi", ".hs", ".le", ".leu", ".lo", ".loc", ".local", ".ls", ".lt", ".ltu", ".lu", ".nan", ".ne", ".neu", ".num", ".or", ".param", ".pred", ".reg", ".relaxed", ".rm", ".rmi", ".rn", ".rni", ".rp", ".rpi", ".rz", ".rzi", ".s16", ".s32", ".s64", ".s8" , ".sat", ".section", ".shared", ".sreg", ".sys", ".target", ".to", ".u16", ".u32", ".u64", ".u8" , ".uni", ".v2", ".v4", ".version", ".visible", ".volatile", ".wb", ".weak", ".wide", ".wt", ".xor", } else { // IF YOU ARE ADDING A NEW TOKEN HERE ALSO ADD IT BELOW TO ExtendedID "abs", "add", "bra", "call", "cvt", "cvta", "debug", "ld", "map_f64_to_f32", "mov", "mul", "not", "ret", "setp", "shl", "shr", r"sm_[0-9]+" => ShaderModel, "st", "texmode_independent", "texmode_unified", } else { // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#identifiers r"[a-zA-Z][a-zA-Z0-9_$]*|[_$%][a-zA-Z0-9_$]+" => ID, r"\.[a-zA-Z][a-zA-Z0-9_$]*" => DotID, } ExtendedID : &'input str = { "abs", "add", "bra", "call", "cvt", "cvta", "debug", "ld", "map_f64_to_f32", "mov", "mul", "not", "ret", "setp", "shl", "shr", ShaderModel, "st", "texmode_independent", "texmode_unified", ID } pub Module: ast::Module<'input> = { Target => { ast::Module { version: v, functions: without_none(f) } } }; Version: (u8, u8) = { ".version" => { let dot = v.find('.').unwrap(); let major = v[..dot].parse::(); let minor = v[dot+1..].parse::(); (major,minor).unwrap_with(errors) } } // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#ptx-module-directives-target Target = { ".target" Comma }; TargetSpecifier = { ShaderModel, "texmode_unified", "texmode_independent", "debug", "map_f64_to_f32" }; Directive: Option, ast::Statement>>> = { AddressSize => None, => Some(f), File => None, Section => None }; AddressSize = { ".address_size" Num }; Function: ast::Function<'input, ast::ParsedArgParams<'input>, ast::Statement>> = { LinkingDirective* => ast::Function{<>} }; LinkingDirective = { ".extern", ".visible", ".weak" }; MethodDecl: ast::MethodDecl<'input, ast::ParsedArgParams<'input>> = { ".entry" => ast::MethodDecl::Kernel(name, params), ".func" => { ast::MethodDecl::Func(ret_vals.unwrap_or_else(|| Vec::new()), name, params) } }; KernelArguments: Vec>> = { "(" > ")" => args }; FnArguments: Vec>> = { "(" > ")" => args }; KernelInput: ast::Variable> = { => { let (align, v_type, name) = v; ast::Variable{ align, v_type, name } } } FnInput: ast::Variable> = { => { let (align, v_type, name) = v; let v_type = ast::FnArgumentType::Reg(v_type); ast::Variable{ align, v_type, name } }, => { let (align, v_type, name) = v; let v_type = ast::FnArgumentType::Param(v_type); ast::Variable{ align, v_type, name } } } pub(crate) FunctionBody: Option>>> = { "{" "}" => { Some(without_none(s)) }, ";" => { None } }; StateSpaceSpecifier: ast::StateSpace = { ".reg" => ast::StateSpace::Reg, ".sreg" => ast::StateSpace::Sreg, ".const" => ast::StateSpace::Const, ".global" => ast::StateSpace::Global, ".local" => ast::StateSpace::Local, ".shared" => ast::StateSpace::Shared, ".param" => ast::StateSpace::Param, // used to prepare function call }; ScalarType: ast::ScalarType = { ".f16" => ast::ScalarType::F16, ".f16x2" => ast::ScalarType::F16x2, ".pred" => ast::ScalarType::Pred, LdStScalarType }; LdStScalarType: ast::ScalarType = { ".b8" => ast::ScalarType::B8, ".b16" => ast::ScalarType::B16, ".b32" => ast::ScalarType::B32, ".b64" => ast::ScalarType::B64, ".u8" => ast::ScalarType::U8, ".u16" => ast::ScalarType::U16, ".u32" => ast::ScalarType::U32, ".u64" => ast::ScalarType::U64, ".s8" => ast::ScalarType::S8, ".s16" => ast::ScalarType::S16, ".s32" => ast::ScalarType::S32, ".s64" => ast::ScalarType::S64, ".f32" => ast::ScalarType::F32, ".f64" => ast::ScalarType::F64, }; Statement: Option>> = { => Some(ast::Statement::Label(l)), DebugDirective => None, ";" => Some(ast::Statement::Variable(v)), ";" => Some(ast::Statement::Instruction(p, i)), "{" "}" => Some(ast::Statement::Block(without_none(s))) }; DebugDirective: () = { DebugLocation }; // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#debugging-directives-loc DebugLocation = { ".loc" Num Num Num }; Label: &'input str = { ":" => id }; Align: u32 = { ".align" => { let align = a.parse::(); align.unwrap_with(errors) } }; // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parameterized-variable-names MultiVariable: ast::MultiVariable> = { => ast::MultiVariable{<>} } VariableParam: u32 = { "<" ">" => { let size = n.parse::(); size.unwrap_with(errors) } } Variable: ast::Variable> = { => { let (align, v_type, name) = v; let v_type = ast::VariableType::Reg(v_type); ast::Variable {align, v_type, name} }, LocalVariable, => { let (align, v_type, name) = v; let v_type = ast::VariableType::Param(v_type); ast::Variable {align, v_type, name} }, }; RegVariable: (Option, ast::VariableRegType, &'input str) = { ".reg" => { let v_type = ast::VariableRegType::Scalar(t); (align, v_type, name) }, ".reg" => { let v_type = ast::VariableRegType::Vector(t, v_len); (align, v_type, name) } } LocalVariable: ast::Variable> = { ".local" => { let v_type = ast::VariableType::Local(ast::VariableLocalType::Scalar(t)); ast::Variable {align, v_type, name} }, ".local" => { let v_type = ast::VariableType::Local(ast::VariableLocalType::Vector(t, v_len)); ast::Variable {align, v_type, name} }, ".local" => { let v_type = ast::VariableType::Local(ast::VariableLocalType::Array(t, arr)); ast::Variable {align, v_type, name} } } // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parameter-state-space ParamVariable: (Option, ast::VariableParamType, &'input str) = { ".param" => { let v_type = ast::VariableParamType::Scalar(t); (align, v_type, name) }, ".param" => { let v_type = ast::VariableParamType::Array(t, arr); (align, v_type, name) } } #[inline] SizedScalarType: ast::SizedScalarType = { ".b8" => ast::SizedScalarType::B8, ".b16" => ast::SizedScalarType::B16, ".b32" => ast::SizedScalarType::B32, ".b64" => ast::SizedScalarType::B64, ".u8" => ast::SizedScalarType::U8, ".u16" => ast::SizedScalarType::U16, ".u32" => ast::SizedScalarType::U32, ".u64" => ast::SizedScalarType::U64, ".s8" => ast::SizedScalarType::S8, ".s16" => ast::SizedScalarType::S16, ".s32" => ast::SizedScalarType::S32, ".s64" => ast::SizedScalarType::S64, ".f16" => ast::SizedScalarType::F16, ".f16x2" => ast::SizedScalarType::F16x2, ".f32" => ast::SizedScalarType::F32, ".f64" => ast::SizedScalarType::F64, } #[inline] ParamScalarType: ast::ParamScalarType = { ".b8" => ast::ParamScalarType::B8, ".b16" => ast::ParamScalarType::B16, ".b32" => ast::ParamScalarType::B32, ".b64" => ast::ParamScalarType::B64, ".u8" => ast::ParamScalarType::U8, ".u16" => ast::ParamScalarType::U16, ".u32" => ast::ParamScalarType::U32, ".u64" => ast::ParamScalarType::U64, ".s8" => ast::ParamScalarType::S8, ".s16" => ast::ParamScalarType::S16, ".s32" => ast::ParamScalarType::S32, ".s64" => ast::ParamScalarType::S64, ".f16" => ast::ParamScalarType::F16, ".f32" => ast::ParamScalarType::F32, ".f64" => ast::ParamScalarType::F64, } ArraySpecifier: u32 = { "[" "]" => { let size = n.parse::(); size.unwrap_with(errors) } }; Instruction: ast::Instruction> = { InstLd, InstMov, InstMul, InstAdd, InstSetp, InstNot, InstBra, InstCvt, InstShl, InstSt, InstRet, InstCvta, InstCall, InstAbs, }; // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-ld InstLd: ast::Instruction> = { "ld" "," => { ast::Instruction::Ld( ast::LdData { qualifier: q.unwrap_or(ast::LdStQualifier::Weak), state_space: ss.unwrap_or(ast::LdStateSpace::Generic), caching: cop.unwrap_or(ast::LdCacheOperator::Cached), typ: t }, ast::Arg2 { dst:dst, src:src } ) } }; LdStType: ast::Type = { => ast::Type::Vector(t, v), => ast::Type::Scalar(t), } LdStQualifier: ast::LdStQualifier = { ".weak" => ast::LdStQualifier::Weak, ".volatile" => ast::LdStQualifier::Volatile, ".relaxed" => ast::LdStQualifier::Relaxed(s), ".acquire" => ast::LdStQualifier::Acquire(s), }; LdScope: ast::LdScope = { ".cta" => ast::LdScope::Cta, ".gpu" => ast::LdScope::Gpu, ".sys" => ast::LdScope::Sys }; LdStateSpace: ast::LdStateSpace = { ".const" => ast::LdStateSpace::Const, ".global" => ast::LdStateSpace::Global, ".local" => ast::LdStateSpace::Local, ".param" => ast::LdStateSpace::Param, ".shared" => ast::LdStateSpace::Shared, }; LdCacheOperator: ast::LdCacheOperator = { ".ca" => ast::LdCacheOperator::Cached, ".cg" => ast::LdCacheOperator::L2Only, ".cs" => ast::LdCacheOperator::Streaming, ".lu" => ast::LdCacheOperator::LastUse, ".cv" => ast::LdCacheOperator::Uncached, }; // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-mov InstMov: ast::Instruction> = { "mov" => { ast::Instruction::Mov(t, a) }, "mov" => { ast::Instruction::MovVector(ast::MovVectorDetails{typ: t, length: 0}, a) } }; #[inline] MovType: ast::MovType = { => ast::MovType::Scalar(t), => ast::MovType::Vector(t, pref) } #[inline] MovScalarType: ast::MovScalarType = { ".b16" => ast::MovScalarType::B16, ".b32" => ast::MovScalarType::B32, ".b64" => ast::MovScalarType::B64, ".u16" => ast::MovScalarType::U16, ".u32" => ast::MovScalarType::U32, ".u64" => ast::MovScalarType::U64, ".s16" => ast::MovScalarType::S16, ".s32" => ast::MovScalarType::S32, ".s64" => ast::MovScalarType::S64, ".f32" => ast::MovScalarType::F32, ".f64" => ast::MovScalarType::F64, ".pred" => ast::MovScalarType::Pred }; #[inline] MovVectorType: ast::MovVectorType = { ".b16" => ast::MovVectorType::B16, ".b32" => ast::MovVectorType::B32, ".b64" => ast::MovVectorType::B64, ".u16" => ast::MovVectorType::U16, ".u32" => ast::MovVectorType::U32, ".u64" => ast::MovVectorType::U64, ".s16" => ast::MovVectorType::S16, ".s32" => ast::MovVectorType::S32, ".s64" => ast::MovVectorType::S64, ".f32" => ast::MovVectorType::F32, ".f64" => ast::MovVectorType::F64, }; // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-mul // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-mul // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#half-precision-floating-point-instructions-mul InstMul: ast::Instruction> = { "mul" => ast::Instruction::Mul(d, a) }; InstMulMode: ast::MulDetails = { => ast::MulDetails::Int(ast::MulIntDesc { typ: t, control: ctr }), ".f32" => ast::MulDetails::Float(ast::MulFloatDesc { typ: ast::FloatType::F32, rounding: r, flush_to_zero: ftz.is_some(), saturate: s.is_some() }), ".f64" => ast::MulDetails::Float(ast::MulFloatDesc { typ: ast::FloatType::F64, rounding: r, flush_to_zero: false, saturate: false }), ".f16" => ast::MulDetails::Float(ast::MulFloatDesc { typ: ast::FloatType::F16, rounding: r.map(|_| ast::RoundingMode::NearestEven), flush_to_zero: ftz.is_some(), saturate: s.is_some() }), ".f16x2" => ast::MulDetails::Float(ast::MulFloatDesc { typ: ast::FloatType::F16x2, rounding: r.map(|_| ast::RoundingMode::NearestEven), flush_to_zero: ftz.is_some(), saturate: s.is_some() }) }; MulIntControl: ast::MulIntControl = { ".hi" => ast::MulIntControl::High, ".lo" => ast::MulIntControl::Low, ".wide" => ast::MulIntControl::Wide }; #[inline] RoundingModeFloat : ast::RoundingMode = { ".rn" => ast::RoundingMode::NearestEven, ".rz" => ast::RoundingMode::Zero, ".rm" => ast::RoundingMode::NegativeInf, ".rp" => ast::RoundingMode::PositiveInf, }; RoundingModeInt : ast::RoundingMode = { ".rni" => ast::RoundingMode::NearestEven, ".rzi" => ast::RoundingMode::Zero, ".rmi" => ast::RoundingMode::NegativeInf, ".rpi" => ast::RoundingMode::PositiveInf, }; IntType : ast::IntType = { ".u16" => ast::IntType::U16, ".u32" => ast::IntType::U32, ".u64" => ast::IntType::U64, ".s16" => ast::IntType::S16, ".s32" => ast::IntType::S32, ".s64" => ast::IntType::S64, }; // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-add // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-add // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#half-precision-floating-point-instructions-add InstAdd: ast::Instruction> = { "add" => ast::Instruction::Add(d, a) }; InstAddMode: ast::AddDetails = { => ast::AddDetails::Int(ast::AddIntDesc { typ: t, saturate: false, }), ".sat" ".s32" => ast::AddDetails::Int(ast::AddIntDesc { typ: ast::IntType::S32, saturate: true, }), ".f32" => ast::AddDetails::Float(ast::AddFloatDesc { typ: ast::FloatType::F32, rounding: rn, flush_to_zero: ftz.is_some(), saturate: sat.is_some(), }), ".f64" => ast::AddDetails::Float(ast::AddFloatDesc { typ: ast::FloatType::F64, rounding: rn, flush_to_zero: false, saturate: false, }), ".f16" => ast::AddDetails::Float(ast::AddFloatDesc { typ: ast::FloatType::F16, rounding: rn.map(|_| ast::RoundingMode::NearestEven), flush_to_zero: ftz.is_some(), saturate: sat.is_some(), }), ".rn"? ".ftz"? ".sat"? ".f16x2" => todo!() }; // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#comparison-and-selection-instructions-setp // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#half-precision-comparison-instructions-setp // TODO: support f16 setp InstSetp: ast::Instruction> = { "setp" => ast::Instruction::Setp(d, a), "setp" => ast::Instruction::SetpBool(d, a), }; SetpMode: ast::SetpData = { => ast::SetpData{ typ: t, flush_to_zero: ftz.is_some(), cmp_op: cmp_op, } }; SetpBoolMode: ast::SetpBoolData = { => ast::SetpBoolData{ typ: t, flush_to_zero: ftz.is_some(), cmp_op: cmp_op, bool_op: bool_op, } }; SetpCompareOp: ast::SetpCompareOp = { ".eq" => ast::SetpCompareOp::Eq, ".ne" => ast::SetpCompareOp::NotEq, ".lt" => ast::SetpCompareOp::Less, ".le" => ast::SetpCompareOp::LessOrEq, ".gt" => ast::SetpCompareOp::Greater, ".ge" => ast::SetpCompareOp::GreaterOrEq, ".lo" => ast::SetpCompareOp::Less, ".ls" => ast::SetpCompareOp::LessOrEq, ".hi" => ast::SetpCompareOp::Greater, ".hs" => ast::SetpCompareOp::GreaterOrEq, ".equ" => ast::SetpCompareOp::NanEq, ".neu" => ast::SetpCompareOp::NanNotEq, ".ltu" => ast::SetpCompareOp::NanLess, ".leu" => ast::SetpCompareOp::NanLessOrEq, ".gtu" => ast::SetpCompareOp::NanGreater, ".geu" => ast::SetpCompareOp::NanGreaterOrEq, ".num" => ast::SetpCompareOp::IsNotNan, ".nan" => ast::SetpCompareOp::IsNan, }; SetpBoolPostOp: ast::SetpBoolPostOp = { ".and" => ast::SetpBoolPostOp::And, ".or" => ast::SetpBoolPostOp::Or, ".xor" => ast::SetpBoolPostOp::Xor, }; SetpType: ast::ScalarType = { ".b16" => ast::ScalarType::B16, ".b32" => ast::ScalarType::B32, ".b64" => ast::ScalarType::B64, ".u16" => ast::ScalarType::U16, ".u32" => ast::ScalarType::U32, ".u64" => ast::ScalarType::U64, ".s16" => ast::ScalarType::S16, ".s32" => ast::ScalarType::S32, ".s64" => ast::ScalarType::S64, ".f32" => ast::ScalarType::F32, ".f64" => ast::ScalarType::F64, }; // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#logic-and-shift-instructions-not InstNot: ast::Instruction> = { "not" => ast::Instruction::Not(t, a) }; NotType: ast::NotType = { ".pred" => ast::NotType::Pred, ".b16" => ast::NotType::B16, ".b32" => ast::NotType::B32, ".b64" => ast::NotType::B64, }; // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#control-flow-instructions-at PredAt: ast::PredAt<&'input str> = { "@" => ast::PredAt { not: false, label:label }, "@" "!" => ast::PredAt { not: true, label:label } }; // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#control-flow-instructions-bra InstBra: ast::Instruction> = { "bra" => ast::Instruction::Bra(ast::BraData{ uniform: u.is_some() }, a) }; // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cvt InstCvt: ast::Instruction> = { "cvt" => { ast::Instruction::Cvt(ast::CvtDetails::new_int_from_int_checked( s.is_some(), dst_t, src_t, errors ), a) }, "cvt" => { ast::Instruction::Cvt(ast::CvtDetails::new_float_from_int_checked( r, f.is_some(), s.is_some(), dst_t, src_t, errors ), a) }, "cvt" => { ast::Instruction::Cvt(ast::CvtDetails::new_int_from_float_checked( r, f.is_some(), s.is_some(), dst_t, src_t, errors ), a) }, "cvt" ".f16" ".f16" => { ast::Instruction::Cvt(ast::CvtDetails::FloatFromFloat( ast::CvtDesc { rounding: r, flush_to_zero: false, saturate: s.is_some(), dst: ast::FloatType::F16, src: ast::FloatType::F16 } ), a) }, "cvt" ".f32" ".f16" => { ast::Instruction::Cvt(ast::CvtDetails::FloatFromFloat( ast::CvtDesc { rounding: None, flush_to_zero: f.is_some(), saturate: s.is_some(), dst: ast::FloatType::F32, src: ast::FloatType::F16 } ), a) }, "cvt" ".f64" ".f16" => { ast::Instruction::Cvt(ast::CvtDetails::FloatFromFloat( ast::CvtDesc { rounding: None, flush_to_zero: false, saturate: s.is_some(), dst: ast::FloatType::F64, src: ast::FloatType::F16 } ), a) }, "cvt" ".f16" ".f32" => { ast::Instruction::Cvt(ast::CvtDetails::FloatFromFloat( ast::CvtDesc { rounding: Some(r), flush_to_zero: f.is_some(), saturate: s.is_some(), dst: ast::FloatType::F16, src: ast::FloatType::F32 } ), a) }, "cvt" ".f32" ".f32" => { ast::Instruction::Cvt(ast::CvtDetails::FloatFromFloat( ast::CvtDesc { rounding: r, flush_to_zero: f.is_some(), saturate: s.is_some(), dst: ast::FloatType::F32, src: ast::FloatType::F32 } ), a) }, "cvt" ".f64" ".f32" => { ast::Instruction::Cvt(ast::CvtDetails::FloatFromFloat( ast::CvtDesc { rounding: None, flush_to_zero: false, saturate: s.is_some(), dst: ast::FloatType::F64, src: ast::FloatType::F32 } ), a) }, "cvt" ".f16" ".f64" => { ast::Instruction::Cvt(ast::CvtDetails::FloatFromFloat( ast::CvtDesc { rounding: Some(r), flush_to_zero: false, saturate: s.is_some(), dst: ast::FloatType::F16, src: ast::FloatType::F64 } ), a) }, "cvt" ".f32" ".f64" => { ast::Instruction::Cvt(ast::CvtDetails::FloatFromFloat( ast::CvtDesc { rounding: Some(r), flush_to_zero: s.is_some(), saturate: s.is_some(), dst: ast::FloatType::F32, src: ast::FloatType::F64 } ), a) }, "cvt" ".f64" ".f64" => { ast::Instruction::Cvt(ast::CvtDetails::FloatFromFloat( ast::CvtDesc { rounding: r, flush_to_zero: false, saturate: s.is_some(), dst: ast::FloatType::F64, src: ast::FloatType::F64 } ), a) }, }; CvtTypeInt: ast::IntType = { ".u8" => ast::IntType::U8, ".u16" => ast::IntType::U16, ".u32" => ast::IntType::U32, ".u64" => ast::IntType::U64, ".s8" => ast::IntType::S8, ".s16" => ast::IntType::S16, ".s32" => ast::IntType::S32, ".s64" => ast::IntType::S64, }; CvtTypeFloat: ast::FloatType = { ".f16" => ast::FloatType::F16, ".f32" => ast::FloatType::F32, ".f64" => ast::FloatType::F64, }; // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#logic-and-shift-instructions-shl InstShl: ast::Instruction> = { "shl" => ast::Instruction::Shl(t, a) }; ShlType: ast::ShlType = { ".b16" => ast::ShlType::B16, ".b32" => ast::ShlType::B32, ".b64" => ast::ShlType::B64, }; // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-st // Warning: NVIDIA documentation is incorrect, you can specify scope only once InstSt: ast::Instruction> = { "st" "," => { ast::Instruction::St( ast::StData { qualifier: q.unwrap_or(ast::LdStQualifier::Weak), state_space: ss.unwrap_or(ast::StStateSpace::Generic), caching: cop.unwrap_or(ast::StCacheOperator::Writeback), typ: t }, ast::Arg2St { src1:src1, src2:src2 } ) } }; // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#using-addresses-arrays-and-vectors MemoryOperand: ast::Operand<&'input str> = { "[" "]" => o } StStateSpace: ast::StStateSpace = { ".global" => ast::StStateSpace::Global, ".local" => ast::StStateSpace::Local, ".param" => ast::StStateSpace::Param, ".shared" => ast::StStateSpace::Shared, }; StCacheOperator: ast::StCacheOperator = { ".wb" => ast::StCacheOperator::Writeback, ".cg" => ast::StCacheOperator::L2Only, ".cs" => ast::StCacheOperator::Streaming, ".wt" => ast::StCacheOperator::Writethrough, }; // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#control-flow-instructions-ret InstRet: ast::Instruction> = { "ret" => ast::Instruction::Ret(ast::RetData { uniform: u.is_some() }) }; // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cvta InstCvta: ast::Instruction> = { "cvta" => { ast::Instruction::Cvta(ast::CvtaDetails { to: to, from: ast::CvtaStateSpace::Generic, size: s }, a) }, "cvta" ".to" => { ast::Instruction::Cvta(ast::CvtaDetails { to: ast::CvtaStateSpace::Generic, from: from, size: s }, a) } } CvtaStateSpace: ast::CvtaStateSpace = { ".const" => ast::CvtaStateSpace::Const, ".global" => ast::CvtaStateSpace::Global, ".local" => ast::CvtaStateSpace::Local, ".shared" => ast::CvtaStateSpace::Shared, } CvtaSize: ast::CvtaSize = { ".u32" => ast::CvtaSize::U32, ".u64" => ast::CvtaSize::U64, } // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#control-flow-instructions-call InstCall: ast::Instruction> = { "call" => { let (ret_params, func, param_list) = args; ast::Instruction::Call(ast::CallInst { uniform: u.is_some(), ret_params, func, param_list }) } }; // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-abs InstAbs: ast::Instruction> = { "abs" => { ast::Instruction::Abs(ast::AbsDetails { flush_to_zero: false, typ: t }, a) }, "abs" ".f32" => { ast::Instruction::Abs(ast::AbsDetails { flush_to_zero: f.is_some(), typ: ast::ScalarType::F32 }, a) }, "abs" ".f64" => { ast::Instruction::Abs(ast::AbsDetails { flush_to_zero: false, typ: ast::ScalarType::F64 }, a) }, "abs" ".f16" => { ast::Instruction::Abs(ast::AbsDetails { flush_to_zero: f.is_some(), typ: ast::ScalarType::F16 }, a) }, "abs" ".f16x2" => { todo!() }, }; SignedIntType: ast::ScalarType = { ".s16" => ast::ScalarType::S16, ".s32" => ast::ScalarType::S32, ".s64" => ast::ScalarType::S64, }; Operand: ast::Operand<&'input str> = { => ast::Operand::Reg(r), "+" => { let offset = o.parse::(); let offset = offset.unwrap_with(errors); ast::Operand::RegOffset(r, offset) }, // TODO: start parsing whole constants sub-language: // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#constants => { let offset = o.parse::(); let offset = offset.unwrap_with(errors); ast::Operand::Imm(offset) } }; CallOperand: ast::CallOperand<&'input str> = { => ast::CallOperand::Reg(r), => { let offset = o.parse::(); let offset = offset.unwrap_with(errors); ast::CallOperand::Imm(offset) } }; Arg1: ast::Arg1> = { => ast::Arg1{<>} }; Arg2: ast::Arg2> = { "," => ast::Arg2{<>} }; Arg2Vec: ast::Arg2Vec> = { "," => ast::Arg2Vec::Dst(dst, dst.0, src), "," => ast::Arg2Vec::Src(dst, src), "," => ast::Arg2Vec::Both(dst, dst.0, src), }; VectorOperand: (&'input str, u8) = { "." =>? { let suf_idx = vector_index(suf)?; Ok((pref, suf_idx)) }, =>? { let suf_idx = vector_index(&suf[1..])?; Ok((pref, suf_idx)) } }; Arg3: ast::Arg3> = { "," "," => ast::Arg3{<>} }; Arg4: ast::Arg4> = { "," "," => ast::Arg4{<>} }; // TODO: pass src3 negation somewhere Arg5: ast::Arg5> = { "," "," "," "!"? => ast::Arg5{<>} }; ArgCall: (Vec<&'input str>, &'input str, Vec>) = { "(" > ")" "," "," "(" > ")" => { (ret_params, func, param_list) }, "," "(" > ")" => (Vec::new(), func, param_list), => (Vec::new(), func, Vec::>::new()), }; OptionalDst: &'input str = { "|" => dst2 } VectorPrefix: u8 = { ".v2" => 2, ".v4" => 4 }; // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#debugging-directives-file File = { ".file" Num String ("," Num "," Num)? }; // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#debugging-directives-section Section = { ".section" DotID "{" SectionDwarfLines* "}" }; SectionDwarfLines: () = { BitType Comma, ".b32" SectionLabel, ".b64" SectionLabel, ".b32" SectionLabel "+" Num, ".b64" SectionLabel "+" Num, }; SectionLabel = { ID, DotID }; BitType = { ".b8", ".b16", ".b32", ".b64" }; Comma: Vec = { ",")*> => match e { None => v, Some(e) => { let mut v = v; v.push(e); v } } };