use crate::ast; use crate::ast::UnwrapWithVec; use crate::{without_none, vector_index}; use lalrpop_util::ParseError; use std::convert::TryInto; grammar<'err>(errors: &'err mut Vec, ast::PtxError>>); extern { type Error = ast::PtxError; } match { r"\s+" => { }, r"//[^\n\r]*[\n\r]*" => { }, r"/\*[^*]*\*+(?:[^/*][^*]*\*+)*/" => { }, r"0[fF][0-9a-zA-Z]{8}" => F32NumToken, r"0[dD][0-9a-zA-Z]{16}" => F64NumToken, r"0[xX][0-9a-zA-Z]+U?" => HexNumToken, r"[0-9]+U?" => DecimalNumToken, r#""[^"]*""# => String, r"[0-9]+\.[0-9]+" => VersionNumber, "!", "(", ")", "+", "-", ",", ".", ":", ";", "@", "[", "]", "{", "}", "<", ">", "|", "=", ".acq_rel", ".acquire", ".add", ".address_size", ".align", ".aligned", ".and", ".approx", ".b16", ".b32", ".b64", ".b8", ".ca", ".cas", ".cg", ".const", ".cs", ".cta", ".cv", ".dec", ".entry", ".eq", ".equ", ".exch", ".extern", ".f16", ".f16x2", ".f32", ".f64", ".file", ".ftz", ".full", ".func", ".ge", ".geu", ".gl", ".global", ".gpu", ".gt", ".gtu", ".hi", ".hs", ".inc", ".le", ".leu", ".lo", ".loc", ".local", ".ls", ".lt", ".ltu", ".lu", ".max", ".maxnreg", ".maxntid", ".minnctapersm", ".min", ".nan", ".NaN", ".nc", ".ne", ".neu", ".num", ".or", ".param", ".pragma", ".pred", ".reg", ".relaxed", ".release", ".reqntid", ".rm", ".rmi", ".rn", ".rni", ".rp", ".rpi", ".rz", ".rzi", ".s16", ".s32", ".s64", ".s8" , ".sat", ".section", ".shared", ".sync", ".sys", ".target", ".to", ".u16", ".u32", ".u64", ".u8" , ".uni", ".v2", ".v4", ".version", ".visible", ".volatile", ".wb", ".weak", ".wide", ".wt", ".xor", } else { // IF YOU ARE ADDING A NEW TOKEN HERE ALSO ADD IT BELOW TO ExtendedID "abs", "activemask", "add", "and", "atom", "bar", "barrier", "bfe", "bfi", "bra", "brev", "call", "clz", "cos", "cvt", "cvta", "debug", "div", "ex2", "fma", "ld", "lg2", "mad", "map_f64_to_f32", "max", "membar", "min", "mov", "mul", "neg", "not", "or", "popc", "prmt", "rcp", "rem", "ret", "rsqrt", "selp", "setp", "shl", "shr", "sin", r"sm_[0-9]+" => ShaderModel, "sqrt", "st", "sub", "texmode_independent", "texmode_unified", "xor", } else { // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#identifiers r"[a-zA-Z][a-zA-Z0-9_$]*|[_$%][a-zA-Z0-9_$]+" => ID, r"\.[a-zA-Z][a-zA-Z0-9_$]*" => DotID, } ExtendedID : &'input str = { "abs", "activemask", "add", "and", "atom", "bar", "barrier", "bfe", "bfi", "bra", "brev", "call", "clz", "cos", "cvt", "cvta", "debug", "div", "ex2", "fma", "ld", "lg2", "mad", "map_f64_to_f32", "max", "membar", "min", "mov", "mul", "neg", "not", "or", "popc", "prmt", "rcp", "rem", "ret", "rsqrt", "selp", "setp", "shl", "shr", "sin", ShaderModel, "sqrt", "st", "sub", "texmode_independent", "texmode_unified", "xor", ID } NumToken: (&'input str, u32, bool) = { => { if s.ends_with('U') { (&s[2..s.len() - 1], 16, true) } else { (&s[2..], 16, false) } }, => { let radix = if s.starts_with('0') { 8 } else { 10 }; if s.ends_with('U') { (&s[..s.len() - 1], radix, true) } else { (s, radix, false) } } } F32Num: f32 = { => { match u32::from_str_radix(&s[2..], 16) { Ok(x) => unsafe { std::mem::transmute::<_, f32>(x) }, Err(err) => { errors.push(ParseError::User { error: ast::PtxError::from(err) }); 0.0 } } } } F64Num: f64 = { => { match u64::from_str_radix(&s[2..], 16) { Ok(x) => unsafe { std::mem::transmute::<_, f64>(x) }, Err(err) => { errors.push(ParseError::User { error: ast::PtxError::from(err) }); 0.0 } } } } U8Num: u8 = { => { let (text, radix, _) = x; match u8::from_str_radix(text, radix) { Ok(x) => x, Err(err) => { errors.push(ParseError::User { error: ast::PtxError::from(err) }); 0 } } } } U16Num: u16 = { => { let (text, radix, _) = x; match u16::from_str_radix(text, radix) { Ok(x) => x, Err(err) => { errors.push(ParseError::User { error: ast::PtxError::from(err) }); 0 } } } } U32Num: u32 = { => { let (text, radix, _) = x; match u32::from_str_radix(text, radix) { Ok(x) => x, Err(err) => { errors.push(ParseError::User { error: ast::PtxError::from(err) }); 0 } } } } // TODO: handle negative number properly S32Num: i32 = { => { let (text, radix, _) = x; match i32::from_str_radix(text, radix) { Ok(x) => if sign.is_some() { -x } else { x }, Err(err) => { errors.push(ParseError::User { error: ast::PtxError::from(err) }); 0 } } } } pub Module: ast::Module<'input> = { Target => { ast::Module { version: v, directives: without_none(d) } } }; Version: (u8, u8) = { ".version" => { let dot = v.find('.').unwrap(); let major = v[..dot].parse::().unwrap_or_else(|err| { errors.push(ParseError::User { error: ast::PtxError::from(err) }); 0 }); let minor = v[dot+1..].parse::().unwrap_or_else(|err| { errors.push(ParseError::User { error: ast::PtxError::from(err) }); 0 }); (major,minor) } } // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#ptx-module-directives-target Target = { ".target" Comma }; TargetSpecifier = { ShaderModel, "texmode_unified", "texmode_independent", "debug", "map_f64_to_f32" }; Directive: Option>> = { AddressSize => None, => { let (linking, func) = f; Some(ast::Directive::Method(linking, func)) }, File => None, Section => None, ";" => { let (linking, var) = v; Some(ast::Directive::Variable(linking, var)) }, @L ! @R => { let (start, _, end)= (<>); errors.push(ParseError::User { error: ast::PtxError::UnrecognizedDirective { start, end } }); None } }; AddressSize = { ".address_size" U8Num }; Function: (ast::LinkingDirective, ast::Function<'input, &'input str, ast::Statement>>) = { => { (linking, ast::Function{func_directive, tuning, body}) } }; LinkingDirective: ast::LinkingDirective = { ".extern" => ast::LinkingDirective::EXTERN, ".visible" => ast::LinkingDirective::VISIBLE, ".weak" => ast::LinkingDirective::WEAK, }; TuningDirective: ast::TuningDirective = { ".maxnreg" => ast::TuningDirective::MaxNReg(ncta), ".maxntid" => ast::TuningDirective::MaxNtid(nx, 1, 1), ".maxntid" "," => ast::TuningDirective::MaxNtid(nx, ny, 1), ".maxntid" "," "," => ast::TuningDirective::MaxNtid(nx, ny, nz), ".reqntid" => ast::TuningDirective::ReqNtid(nx, 1, 1), ".reqntid" "," => ast::TuningDirective::ReqNtid(nx, ny, 1), ".reqntid" "," "," => ast::TuningDirective::ReqNtid(nx, ny, nz), ".minnctapersm" => ast::TuningDirective::MinNCtaPerSm(ncta), }; LinkingDirectives: ast::LinkingDirective = { => { ldirs.into_iter().fold(ast::LinkingDirective::NONE, |x, y| x | y) } } MethodDeclaration: ast::MethodDeclaration<'input, &'input str> = { ".entry" => { let return_arguments = Vec::new(); let name = ast::MethodName::Kernel(name); ast::MethodDeclaration{ return_arguments, name, input_arguments, shared_mem: None } }, ".func" => { let return_arguments = return_arguments.unwrap_or_else(|| Vec::new()); let name = ast::MethodName::Func(name); ast::MethodDeclaration{ return_arguments, name, input_arguments, shared_mem: None } } }; KernelArguments: Vec> = { "(" > ")" => args }; FnArguments: Vec> = { "(" > ")" => args }; KernelInput: ast::Variable<&'input str> = { => { let (align, v_type, name) = v; ast::Variable { align, v_type, state_space: ast::StateSpace::Param, name, array_init: Vec::new() } } } FnInput: ast::Variable<&'input str> = { => { let (align, v_type, name) = v; let state_space = ast::StateSpace::Reg; ast::Variable{ align, v_type, state_space, name, array_init: Vec::new() } }, => { let (align, v_type, name) = v; let state_space = ast::StateSpace::Param; ast::Variable{ align, v_type, state_space, name, array_init: Vec::new() } } } FunctionBody: Option>>> = { "{" "}" => { Some(without_none(s)) }, ";" => { None } }; StateSpaceSpecifier: ast::StateSpace = { ".reg" => ast::StateSpace::Reg, ".const" => ast::StateSpace::Const, ".global" => ast::StateSpace::Global, ".local" => ast::StateSpace::Local, ".shared" => ast::StateSpace::Shared, ".param" => ast::StateSpace::Param, // used to prepare function call }; #[inline] ScalarType: ast::ScalarType = { ".f16" => ast::ScalarType::F16, ".f16x2" => ast::ScalarType::F16x2, ".pred" => ast::ScalarType::Pred, ".b8" => ast::ScalarType::B8, ".b16" => ast::ScalarType::B16, ".b32" => ast::ScalarType::B32, ".b64" => ast::ScalarType::B64, ".u8" => ast::ScalarType::U8, ".u16" => ast::ScalarType::U16, ".u32" => ast::ScalarType::U32, ".u64" => ast::ScalarType::U64, ".s8" => ast::ScalarType::S8, ".s16" => ast::ScalarType::S16, ".s32" => ast::ScalarType::S32, ".s64" => ast::ScalarType::S64, ".f32" => ast::ScalarType::F32, ".f64" => ast::ScalarType::F64, }; Statement: Option>> = { => Some(ast::Statement::Label(l)), DebugDirective => None, ";" => Some(ast::Statement::Variable(v)), ";" => Some(ast::Statement::Instruction(p, i)), PragmaStatement => None, "{" "}" => Some(ast::Statement::Block(without_none(s))), @L ! ";" @R => { let (start, _, _, end) = (<>); errors.push(ParseError::User { error: ast::PtxError::UnrecognizedStatement { start, end } }); None } }; PragmaStatement: () = { ".pragma" String ";" } DebugDirective: () = { DebugLocation }; // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#debugging-directives-loc DebugLocation = { ".loc" U32Num U32Num U32Num }; Label: &'input str = { ":" => id }; Align: u32 = { ".align" => x }; // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parameterized-variable-names MultiVariable: ast::MultiVariable<&'input str> = { => ast::MultiVariable{<>} } VariableParam: u32 = { "<" ">" => n } Variable: ast::Variable<&'input str> = { => { let (align, v_type, name) = v; let state_space = ast::StateSpace::Reg; ast::Variable {align, v_type, state_space, name, array_init: Vec::new()} }, LocalVariable, => { let (align, array_init, v_type, name) = v; let state_space = ast::StateSpace::Param; ast::Variable {align, v_type, state_space, name, array_init} }, SharedVariable, }; RegVariable: (Option, ast::Type, &'input str) = { ".reg" > => { let (align, t, name) = var; let v_type = ast::Type::Scalar(t); (align, v_type, name) }, ".reg" > => { let (align, v_len, t, name) = var; let v_type = ast::Type::Vector(t, v_len); (align, v_type, name) } } LocalVariable: ast::Variable<&'input str> = { ".local" > => { let (align, t, name) = var; let v_type = ast::Type::Scalar(t); let state_space = ast::StateSpace::Local; ast::Variable { align, v_type, state_space, name, array_init: Vec::new() } }, ".local" > => { let (align, v_len, t, name) = var; let v_type = ast::Type::Vector(t, v_len); let state_space = ast::StateSpace::Local; ast::Variable { align, v_type, state_space, name, array_init: Vec::new() } }, ".local" > => { let (align, t, name, arr_or_ptr) = var; let state_space = ast::StateSpace::Local; let (v_type, array_init) = match arr_or_ptr { ast::ArrayOrPointer::Array { dimensions, init } => { (ast::Type::Array(t, dimensions), init) } ast::ArrayOrPointer::Pointer => { errors.push(ParseError::User { error: ast::PtxError::ZeroDimensionArray }); (ast::Type::Array(t, Vec::new()), Vec::new()) } }; ast::Variable { align, v_type, state_space, name, array_init } } } SharedVariable: ast::Variable<&'input str> = { ".shared" > => { let (align, t, name) = var; let state_space = ast::StateSpace::Shared; let v_type = ast::Type::Scalar(t); ast::Variable { align, v_type, state_space, name, array_init: Vec::new() } }, ".shared" > => { let (align, v_len, t, name) = var; let state_space = ast::StateSpace::Shared; let v_type = ast::Type::Vector(t, v_len); ast::Variable { align, v_type, state_space, name, array_init: Vec::new() } }, ".shared" > => { let (align, t, name, arr_or_ptr) = var; let state_space = ast::StateSpace::Shared; let (v_type, array_init) = match arr_or_ptr { ast::ArrayOrPointer::Array { dimensions, init } => { (ast::Type::Array(t, dimensions), init) } ast::ArrayOrPointer::Pointer => { errors.push(ParseError::User { error: ast::PtxError::ZeroDimensionArray }); (ast::Type::Array(t, Vec::new()), Vec::new()) } }; ast::Variable { align, v_type, state_space, name, array_init } } } ModuleVariable: (ast::LinkingDirective, ast::Variable<&'input str>) = { => { let (align, v_type, name, array_init) = def; (linking, ast::Variable { align, v_type, state_space, name, array_init }) }, > => { let (align, t, name, arr_or_ptr) = var; let (v_type, state_space, array_init) = match arr_or_ptr { ast::ArrayOrPointer::Array { dimensions, init } => { (ast::Type::Array(t, dimensions), space, init) } ast::ArrayOrPointer::Pointer => { if !linking.contains(ast::LinkingDirective::EXTERN) { errors.push(ParseError::User { error: ast::PtxError::NonExternPointer }); } (ast::Type::Array(t, Vec::new()), space, Vec::new()) } }; (linking, ast::Variable{ align, v_type, state_space, name, array_init }) } } VariableStateSpace: ast::StateSpace = { ".const" => ast::StateSpace::Const, ".global" => ast::StateSpace::Global, ".shared" => ast::StateSpace::Shared, }; // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parameter-state-space ParamVariable: (Option, Vec, ast::Type, &'input str) = { ".param" > => { let (align, t, name) = var; let v_type = ast::Type::Scalar(t); (align, Vec::new(), v_type, name) }, ".param" > => { let (align, t, name, arr_or_ptr) = var; let (v_type, array_init) = match arr_or_ptr { ast::ArrayOrPointer::Array { dimensions, init } => { (ast::Type::Array(t, dimensions), init) } ast::ArrayOrPointer::Pointer => { (ast::Type::Scalar(t), Vec::new()) } }; (align, array_init, v_type, name) } } ParamDeclaration: (Option, ast::Type, &'input str) = { => { let (align, array_init, v_type, name) = var; if array_init.len() > 0 { errors.push(ParseError::User { error: ast::PtxError::ArrayInitalizer }); } (align, v_type, name) } } GlobalVariableDefinitionNoArray: (Option, ast::Type, &'input str, Vec) = { > => { let (align, t, name) = scalar; let v_type = ast::Type::Scalar(t); (align, v_type, name, Vec::new()) }, > => { let (align, v_len, t, name) = var; let v_type = ast::Type::Vector(t, v_len); (align, v_type, name, Vec::new()) }, } #[inline] SizedScalarType: ast::ScalarType = { ".b8" => ast::ScalarType::B8, ".b16" => ast::ScalarType::B16, ".b32" => ast::ScalarType::B32, ".b64" => ast::ScalarType::B64, ".u8" => ast::ScalarType::U8, ".u16" => ast::ScalarType::U16, ".u32" => ast::ScalarType::U32, ".u64" => ast::ScalarType::U64, ".s8" => ast::ScalarType::S8, ".s16" => ast::ScalarType::S16, ".s32" => ast::ScalarType::S32, ".s64" => ast::ScalarType::S64, ".f16" => ast::ScalarType::F16, ".f16x2" => ast::ScalarType::F16x2, ".f32" => ast::ScalarType::F32, ".f64" => ast::ScalarType::F64, } #[inline] LdStScalarType: ast::ScalarType = { ".b8" => ast::ScalarType::B8, ".b16" => ast::ScalarType::B16, ".b32" => ast::ScalarType::B32, ".b64" => ast::ScalarType::B64, ".u8" => ast::ScalarType::U8, ".u16" => ast::ScalarType::U16, ".u32" => ast::ScalarType::U32, ".u64" => ast::ScalarType::U64, ".s8" => ast::ScalarType::S8, ".s16" => ast::ScalarType::S16, ".s32" => ast::ScalarType::S32, ".s64" => ast::ScalarType::S64, ".f16" => ast::ScalarType::F16, ".f32" => ast::ScalarType::F32, ".f64" => ast::ScalarType::F64, } Instruction: ast::Instruction> = { InstLd, InstMov, InstMul, InstAdd, InstSetp, InstNot, InstBra, InstCvt, InstShl, InstShr, InstSt, InstRet, InstCvta, InstCall, InstAbs, InstMad, InstFma, InstOr, InstAnd, InstSub, InstMin, InstMax, InstRcp, InstSelp, InstBar, InstAtom, InstAtomCas, InstDiv, InstSqrt, InstRsqrt, InstNeg, InstSin, InstCos, InstLg2, InstEx2, InstClz, InstBrev, InstPopc, InstXor, InstRem, InstBfe, InstBfi, InstPrmt, InstActivemask, InstMembar, }; // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-ld InstLd: ast::Instruction> = { "ld" "," => { ast::Instruction::Ld( ast::LdDetails { qualifier: q.unwrap_or(ast::LdStQualifier::Weak), state_space: ss.unwrap_or(ast::StateSpace::Generic), caching: cop.unwrap_or(ast::LdCacheOperator::Cached), typ: t, non_coherent: false }, ast::Arg2Ld { dst:dst, src:src } ) }, "ld" ".global" "," => { ast::Instruction::Ld( ast::LdDetails { qualifier: q.unwrap_or(ast::LdStQualifier::Weak), state_space: ast::StateSpace::Global, caching: cop.unwrap_or(ast::LdCacheOperator::Cached), typ: t, non_coherent: false }, ast::Arg2Ld { dst:dst, src:src } ) }, "ld" ".global" ".nc" "," => { ast::Instruction::Ld( ast::LdDetails { qualifier: ast::LdStQualifier::Weak, state_space: ast::StateSpace::Global, caching: cop.unwrap_or(ast::LdCacheOperator::Cached), typ: t, non_coherent: true }, ast::Arg2Ld { dst:dst, src:src } ) } }; LdStType: ast::Type = { => ast::Type::Vector(t, v), => ast::Type::Scalar(t), } LdStQualifier: ast::LdStQualifier = { ".weak" => ast::LdStQualifier::Weak, ".volatile" => ast::LdStQualifier::Volatile, ".relaxed" => ast::LdStQualifier::Relaxed(s), ".acquire" => ast::LdStQualifier::Acquire(s), }; MemScope: ast::MemScope = { ".cta" => ast::MemScope::Cta, ".gpu" => ast::MemScope::Gpu, ".sys" => ast::MemScope::Sys }; MembarLevel: ast::MemScope = { ".cta" => ast::MemScope::Cta, ".gl" => ast::MemScope::Gpu, ".sys" => ast::MemScope::Sys }; LdNonGlobalStateSpace: ast::StateSpace = { ".const" => ast::StateSpace::Const, ".local" => ast::StateSpace::Local, ".param" => ast::StateSpace::Param, ".shared" => ast::StateSpace::Shared, }; LdCacheOperator: ast::LdCacheOperator = { ".ca" => ast::LdCacheOperator::Cached, ".cg" => ast::LdCacheOperator::L2Only, ".cs" => ast::LdCacheOperator::Streaming, ".lu" => ast::LdCacheOperator::LastUse, ".cv" => ast::LdCacheOperator::Uncached, }; LdNcCacheOperator: ast::LdCacheOperator = { ".ca" => ast::LdCacheOperator::Cached, ".cg" => ast::LdCacheOperator::L2Only, ".cs" => ast::LdCacheOperator::Streaming, }; // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-mov InstMov: ast::Instruction> = { "mov" "," => { let mov_type = match pref { Some(vec_width) => ast::Type::Vector(t, vec_width), None => ast::Type::Scalar(t) }; let details = ast::MovDetails::new(mov_type); ast::Instruction::Mov( details, ast::Arg2Mov { dst, src } ) } } #[inline] MovScalarType: ast::ScalarType = { ".b16" => ast::ScalarType::B16, ".b32" => ast::ScalarType::B32, ".b64" => ast::ScalarType::B64, ".u16" => ast::ScalarType::U16, ".u32" => ast::ScalarType::U32, ".u64" => ast::ScalarType::U64, ".s16" => ast::ScalarType::S16, ".s32" => ast::ScalarType::S32, ".s64" => ast::ScalarType::S64, ".f32" => ast::ScalarType::F32, ".f64" => ast::ScalarType::F64, ".pred" => ast::ScalarType::Pred }; // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-mul // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-mul // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#half-precision-floating-point-instructions-mul InstMul: ast::Instruction> = { "mul" => ast::Instruction::Mul(d, a) }; MulDetails: ast::MulDetails = { => ast::MulDetails::Unsigned(ast::MulUInt{ typ: t, control: ctr }), => ast::MulDetails::Signed(ast::MulSInt{ typ: t, control: ctr }), => ast::MulDetails::Float(f) }; MulIntControl: ast::MulIntControl = { ".hi" => ast::MulIntControl::High, ".lo" => ast::MulIntControl::Low, ".wide" => ast::MulIntControl::Wide }; #[inline] RoundingModeFloat : ast::RoundingMode = { ".rn" => ast::RoundingMode::NearestEven, ".rz" => ast::RoundingMode::Zero, ".rm" => ast::RoundingMode::NegativeInf, ".rp" => ast::RoundingMode::PositiveInf, }; RoundingModeInt : ast::RoundingMode = { ".rni" => ast::RoundingMode::NearestEven, ".rzi" => ast::RoundingMode::Zero, ".rmi" => ast::RoundingMode::NegativeInf, ".rpi" => ast::RoundingMode::PositiveInf, }; IntType : ast::ScalarType = { ".u16" => ast::ScalarType::U16, ".u32" => ast::ScalarType::U32, ".u64" => ast::ScalarType::U64, ".s16" => ast::ScalarType::S16, ".s32" => ast::ScalarType::S32, ".s64" => ast::ScalarType::S64, }; IntType3264: ast::ScalarType = { ".u32" => ast::ScalarType::U32, ".u64" => ast::ScalarType::U64, ".s32" => ast::ScalarType::S32, ".s64" => ast::ScalarType::S64, } UIntType: ast::ScalarType = { ".u16" => ast::ScalarType::U16, ".u32" => ast::ScalarType::U32, ".u64" => ast::ScalarType::U64, }; SIntType: ast::ScalarType = { ".s16" => ast::ScalarType::S16, ".s32" => ast::ScalarType::S32, ".s64" => ast::ScalarType::S64, }; FloatType: ast::ScalarType = { ".f16" => ast::ScalarType::F16, ".f16x2" => ast::ScalarType::F16x2, ".f32" => ast::ScalarType::F32, ".f64" => ast::ScalarType::F64, }; // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-add // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-add // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#half-precision-floating-point-instructions-add InstAdd: ast::Instruction> = { "add" => ast::Instruction::Add(d, a) }; // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#comparison-and-selection-instructions-setp // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#half-precision-comparison-instructions-setp // TODO: support f16 setp InstSetp: ast::Instruction> = { "setp" => ast::Instruction::Setp(d, a), "setp" => ast::Instruction::SetpBool(d, a), }; SetpMode: ast::SetpData = { => ast::SetpData { typ: t, flush_to_zero: None, cmp_op: cmp_op, }, ".f32" => ast::SetpData { typ: ast::ScalarType::F32, flush_to_zero: Some(ftz.is_some()), cmp_op: cmp_op, } }; SetpBoolMode: ast::SetpBoolData = { => ast::SetpBoolData { typ: t, flush_to_zero: None, cmp_op: cmp_op, bool_op: bool_op, }, ".f32" => ast::SetpBoolData { typ: ast::ScalarType::F32, flush_to_zero: Some(ftz.is_some()), cmp_op: cmp_op, bool_op: bool_op, } }; SetpCompareOp: ast::SetpCompareOp = { ".eq" => ast::SetpCompareOp::Eq, ".ne" => ast::SetpCompareOp::NotEq, ".lt" => ast::SetpCompareOp::Less, ".le" => ast::SetpCompareOp::LessOrEq, ".gt" => ast::SetpCompareOp::Greater, ".ge" => ast::SetpCompareOp::GreaterOrEq, ".lo" => ast::SetpCompareOp::Less, ".ls" => ast::SetpCompareOp::LessOrEq, ".hi" => ast::SetpCompareOp::Greater, ".hs" => ast::SetpCompareOp::GreaterOrEq, ".equ" => ast::SetpCompareOp::NanEq, ".neu" => ast::SetpCompareOp::NanNotEq, ".ltu" => ast::SetpCompareOp::NanLess, ".leu" => ast::SetpCompareOp::NanLessOrEq, ".gtu" => ast::SetpCompareOp::NanGreater, ".geu" => ast::SetpCompareOp::NanGreaterOrEq, ".num" => ast::SetpCompareOp::IsNotNan, ".nan" => ast::SetpCompareOp::IsAnyNan, }; SetpBoolPostOp: ast::SetpBoolPostOp = { ".and" => ast::SetpBoolPostOp::And, ".or" => ast::SetpBoolPostOp::Or, ".xor" => ast::SetpBoolPostOp::Xor, }; SetpTypeNoF32: ast::ScalarType = { ".b16" => ast::ScalarType::B16, ".b32" => ast::ScalarType::B32, ".b64" => ast::ScalarType::B64, ".u16" => ast::ScalarType::U16, ".u32" => ast::ScalarType::U32, ".u64" => ast::ScalarType::U64, ".s16" => ast::ScalarType::S16, ".s32" => ast::ScalarType::S32, ".s64" => ast::ScalarType::S64, ".f64" => ast::ScalarType::F64, }; // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#logic-and-shift-instructions-not InstNot: ast::Instruction> = { "not" => ast::Instruction::Not(t, a) }; BooleanType: ast::ScalarType = { ".pred" => ast::ScalarType::Pred, ".b16" => ast::ScalarType::B16, ".b32" => ast::ScalarType::B32, ".b64" => ast::ScalarType::B64, }; // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#control-flow-instructions-at PredAt: ast::PredAt<&'input str> = { "@" => ast::PredAt { not: false, label:label }, "@" "!" => ast::PredAt { not: true, label:label } }; // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#control-flow-instructions-bra InstBra: ast::Instruction> = { "bra" => ast::Instruction::Bra(ast::BraData{ uniform: u.is_some() }, a) }; // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cvt InstCvt: ast::Instruction> = { "cvt" => { ast::Instruction::Cvt(ast::CvtDetails::new_int_from_int_checked( s.is_some(), dst_t, src_t, errors ), a) }, "cvt" => { ast::Instruction::Cvt(ast::CvtDetails::new_float_from_int_checked( r, f.is_some(), s.is_some(), dst_t, src_t, errors ), a) }, "cvt" => { ast::Instruction::Cvt(ast::CvtDetails::new_int_from_float_checked( r, f.is_some(), s.is_some(), dst_t, src_t, errors ), a) }, "cvt" ".f16" ".f16" => { ast::Instruction::Cvt(ast::CvtDetails::FloatFromFloat( ast::CvtDesc { rounding: r, flush_to_zero: None, saturate: s.is_some(), dst: ast::ScalarType::F16, src: ast::ScalarType::F16 } ), a) }, "cvt" ".f32" ".f16" => { ast::Instruction::Cvt(ast::CvtDetails::FloatFromFloat( ast::CvtDesc { rounding: None, flush_to_zero: Some(f.is_some()), saturate: s.is_some(), dst: ast::ScalarType::F32, src: ast::ScalarType::F16 } ), a) }, "cvt" ".f64" ".f16" => { ast::Instruction::Cvt(ast::CvtDetails::FloatFromFloat( ast::CvtDesc { rounding: None, flush_to_zero: None, saturate: s.is_some(), dst: ast::ScalarType::F64, src: ast::ScalarType::F16 } ), a) }, "cvt" ".f16" ".f32" => { ast::Instruction::Cvt(ast::CvtDetails::FloatFromFloat( ast::CvtDesc { rounding: Some(r), flush_to_zero: Some(f.is_some()), saturate: s.is_some(), dst: ast::ScalarType::F16, src: ast::ScalarType::F32 } ), a) }, "cvt" ".f32" ".f32" => { ast::Instruction::Cvt(ast::CvtDetails::FloatFromFloat( ast::CvtDesc { rounding: r, flush_to_zero: Some(f.is_some()), saturate: s.is_some(), dst: ast::ScalarType::F32, src: ast::ScalarType::F32 } ), a) }, "cvt" ".f64" ".f32" => { ast::Instruction::Cvt(ast::CvtDetails::FloatFromFloat( ast::CvtDesc { rounding: None, flush_to_zero: Some(f.is_some()), saturate: s.is_some(), dst: ast::ScalarType::F64, src: ast::ScalarType::F32 } ), a) }, "cvt" ".f16" ".f64" => { ast::Instruction::Cvt(ast::CvtDetails::FloatFromFloat( ast::CvtDesc { rounding: Some(r), flush_to_zero: None, saturate: s.is_some(), dst: ast::ScalarType::F16, src: ast::ScalarType::F64 } ), a) }, "cvt" ".f32" ".f64" => { ast::Instruction::Cvt(ast::CvtDetails::FloatFromFloat( ast::CvtDesc { rounding: Some(r), flush_to_zero: Some(s.is_some()), saturate: s.is_some(), dst: ast::ScalarType::F32, src: ast::ScalarType::F64 } ), a) }, "cvt" ".f64" ".f64" => { ast::Instruction::Cvt(ast::CvtDetails::FloatFromFloat( ast::CvtDesc { rounding: r, flush_to_zero: None, saturate: s.is_some(), dst: ast::ScalarType::F64, src: ast::ScalarType::F64 } ), a) }, }; CvtTypeInt: ast::ScalarType = { ".u8" => ast::ScalarType::U8, ".u16" => ast::ScalarType::U16, ".u32" => ast::ScalarType::U32, ".u64" => ast::ScalarType::U64, ".s8" => ast::ScalarType::S8, ".s16" => ast::ScalarType::S16, ".s32" => ast::ScalarType::S32, ".s64" => ast::ScalarType::S64, }; CvtTypeFloat: ast::ScalarType = { ".f16" => ast::ScalarType::F16, ".f32" => ast::ScalarType::F32, ".f64" => ast::ScalarType::F64, }; // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#logic-and-shift-instructions-shl InstShl: ast::Instruction> = { "shl" => ast::Instruction::Shl(t, a) }; ShlType: ast::ScalarType = { ".b16" => ast::ScalarType::B16, ".b32" => ast::ScalarType::B32, ".b64" => ast::ScalarType::B64, }; // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#logic-and-shift-instructions-shr InstShr: ast::Instruction> = { "shr" => ast::Instruction::Shr(t, a) }; ShrType: ast::ScalarType = { ".b16" => ast::ScalarType::B16, ".b32" => ast::ScalarType::B32, ".b64" => ast::ScalarType::B64, ".u16" => ast::ScalarType::U16, ".u32" => ast::ScalarType::U32, ".u64" => ast::ScalarType::U64, ".s16" => ast::ScalarType::S16, ".s32" => ast::ScalarType::S32, ".s64" => ast::ScalarType::S64, }; // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-st // Warning: NVIDIA documentation is incorrect, you can specify scope only once InstSt: ast::Instruction> = { "st" "," => { ast::Instruction::St( ast::StData { qualifier: q.unwrap_or(ast::LdStQualifier::Weak), state_space: ss.unwrap_or(ast::StateSpace::Generic), caching: cop.unwrap_or(ast::StCacheOperator::Writeback), typ: t }, ast::Arg2St { src1:src1, src2:src2 } ) } }; // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#using-addresses-arrays-and-vectors MemoryOperand: ast::Operand<&'input str> = { "[" "]" => o } StStateSpace: ast::StateSpace = { ".global" => ast::StateSpace::Global, ".local" => ast::StateSpace::Local, ".param" => ast::StateSpace::Param, ".shared" => ast::StateSpace::Shared, }; StCacheOperator: ast::StCacheOperator = { ".wb" => ast::StCacheOperator::Writeback, ".cg" => ast::StCacheOperator::L2Only, ".cs" => ast::StCacheOperator::Streaming, ".wt" => ast::StCacheOperator::Writethrough, }; // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#control-flow-instructions-ret InstRet: ast::Instruction> = { "ret" => ast::Instruction::Ret(ast::RetData { uniform: u.is_some() }) }; // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cvta InstCvta: ast::Instruction> = { "cvta" => { ast::Instruction::Cvta(ast::CvtaDetails { to: ast::StateSpace::Generic, from, size: s }, a) }, "cvta" ".to" => { ast::Instruction::Cvta(ast::CvtaDetails { to, from: ast::StateSpace::Generic, size: s }, a) } } CvtaStateSpace: ast::StateSpace = { ".const" => ast::StateSpace::Const, ".global" => ast::StateSpace::Global, ".local" => ast::StateSpace::Local, ".shared" => ast::StateSpace::Shared, } CvtaSize: ast::CvtaSize = { ".u32" => ast::CvtaSize::U32, ".u64" => ast::CvtaSize::U64, } // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#control-flow-instructions-call InstCall: ast::Instruction> = { "call" => { let (ret_params, func, param_list) = args; ast::Instruction::Call(ast::CallInst { uniform: u.is_some(), ret_params, func, param_list }) } }; // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-abs InstAbs: ast::Instruction> = { "abs" => { ast::Instruction::Abs(ast::AbsDetails { flush_to_zero: None, typ: t }, a) }, "abs" ".f32" => { ast::Instruction::Abs(ast::AbsDetails { flush_to_zero: Some(f.is_some()), typ: ast::ScalarType::F32 }, a) }, "abs" ".f64" => { ast::Instruction::Abs(ast::AbsDetails { flush_to_zero: None, typ: ast::ScalarType::F64 }, a) }, "abs" ".f16" => { ast::Instruction::Abs(ast::AbsDetails { flush_to_zero: Some(f.is_some()), typ: ast::ScalarType::F16 }, a) }, "abs" ".f16x2" => { ast::Instruction::Abs(ast::AbsDetails { flush_to_zero: Some(f.is_some()), typ: ast::ScalarType::F16x2 }, a) }, }; // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-mad // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-mad InstMad: ast::Instruction> = { "mad" => ast::Instruction::Mad(d, a), "mad" ".hi" ".sat" ".s32" => todo!(), }; // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-fma InstFma: ast::Instruction> = { "fma" => ast::Instruction::Fma(f, a), }; SignedIntType: ast::ScalarType = { ".s16" => ast::ScalarType::S16, ".s32" => ast::ScalarType::S32, ".s64" => ast::ScalarType::S64, }; // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#logic-and-shift-instructions-or InstOr: ast::Instruction> = { "or" => ast::Instruction::Or(d, a), }; // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#logic-and-shift-instructions-and InstAnd: ast::Instruction> = { "and" => ast::Instruction::And(d, a), }; // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-rcp InstRcp: ast::Instruction> = { "rcp" ".f32" => { let details = ast::RcpDetails { rounding, flush_to_zero: Some(ftz.is_some()), is_f64: false, }; ast::Instruction::Rcp(details, a) }, "rcp" ".f64" => { let details = ast::RcpDetails { rounding: Some(rn), flush_to_zero: None, is_f64: true, }; ast::Instruction::Rcp(details, a) } }; RcpRoundingMode: Option = { ".approx" => None, => Some(r) }; // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-sub // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-sub // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#half-precision-floating-point-instructions-sub InstSub: ast::Instruction> = { "sub" => ast::Instruction::Sub(d, a), }; // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-min // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-min // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#half-precision-floating-point-instructions-min InstMin: ast::Instruction> = { "min" => ast::Instruction::Min(d, a), }; // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-max // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-max // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#half-precision-floating-point-instructions-max InstMax: ast::Instruction> = { "max" => ast::Instruction::Max(d, a), }; MinMaxDetails: ast::MinMaxDetails = { => ast::MinMaxDetails::Unsigned(t), => ast::MinMaxDetails::Signed(t), ".f32" => ast::MinMaxDetails::Float( ast::MinMaxFloat{ flush_to_zero: Some(ftz.is_some()), nan: nan.is_some(), typ: ast::ScalarType::F32 } ), ".f64" => ast::MinMaxDetails::Float( ast::MinMaxFloat{ flush_to_zero: None, nan: false, typ: ast::ScalarType::F64 } ), ".f16" => ast::MinMaxDetails::Float( ast::MinMaxFloat{ flush_to_zero: Some(ftz.is_some()), nan: nan.is_some(), typ: ast::ScalarType::F16 } ), ".f16x2" => ast::MinMaxDetails::Float( ast::MinMaxFloat{ flush_to_zero: Some(ftz.is_some()), nan: nan.is_some(), typ: ast::ScalarType::F16x2 } ) } // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#comparison-and-selection-instructions-selp InstSelp: ast::Instruction> = { "selp" => ast::Instruction::Selp(t, a), }; SelpType: ast::ScalarType = { ".b16" => ast::ScalarType::B16, ".b32" => ast::ScalarType::B32, ".b64" => ast::ScalarType::B64, ".u16" => ast::ScalarType::U16, ".u32" => ast::ScalarType::U32, ".u64" => ast::ScalarType::U64, ".s16" => ast::ScalarType::S16, ".s32" => ast::ScalarType::S32, ".s64" => ast::ScalarType::S64, ".f32" => ast::ScalarType::F32, ".f64" => ast::ScalarType::F64, }; // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-bar InstBar: ast::Instruction> = { "bar" ".sync" => ast::Instruction::Bar(ast::BarDetails::SyncAligned, a), "barrier" ".sync" => ast::Instruction::Bar(ast::BarDetails::SyncAligned, a), "barrier" ".sync" ".aligned" => ast::Instruction::Bar(ast::BarDetails::SyncAligned, a), } // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-atom // The documentation does not mention all spported operations: // * Operation .add requires .u32 or .s32 or .u64 or .f64 or f16 or f16x2 or .f32 // * Operation .inc requires .u32 type for instuction // * Operation .dec requires .u32 type for instuction // Otherwise as documented InstAtom: ast::Instruction> = { "atom" => { let details = ast::AtomDetails { semantics: sema.unwrap_or(ast::AtomSemantics::Relaxed), scope: scope.unwrap_or(ast::MemScope::Gpu), space: space.unwrap_or(ast::StateSpace::Generic), inner: ast::AtomInnerDetails::Bit { op, typ } }; ast::Instruction::Atom(details,a) }, "atom" ".inc" ".u32" => { let details = ast::AtomDetails { semantics: sema.unwrap_or(ast::AtomSemantics::Relaxed), scope: scope.unwrap_or(ast::MemScope::Gpu), space: space.unwrap_or(ast::StateSpace::Generic), inner: ast::AtomInnerDetails::Unsigned { op: ast::AtomUIntOp::Inc, typ: ast::ScalarType::U32 } }; ast::Instruction::Atom(details,a) }, "atom" ".dec" ".u32" => { let details = ast::AtomDetails { semantics: sema.unwrap_or(ast::AtomSemantics::Relaxed), scope: scope.unwrap_or(ast::MemScope::Gpu), space: space.unwrap_or(ast::StateSpace::Generic), inner: ast::AtomInnerDetails::Unsigned { op: ast::AtomUIntOp::Dec, typ: ast::ScalarType::U32 } }; ast::Instruction::Atom(details,a) }, "atom" ".add" => { let op = ast::AtomFloatOp::Add; let details = ast::AtomDetails { semantics: sema.unwrap_or(ast::AtomSemantics::Relaxed), scope: scope.unwrap_or(ast::MemScope::Gpu), space: space.unwrap_or(ast::StateSpace::Generic), inner: ast::AtomInnerDetails::Float { op, typ } }; ast::Instruction::Atom(details,a) }, "atom" => { let details = ast::AtomDetails { semantics: sema.unwrap_or(ast::AtomSemantics::Relaxed), scope: scope.unwrap_or(ast::MemScope::Gpu), space: space.unwrap_or(ast::StateSpace::Generic), inner: ast::AtomInnerDetails::Unsigned { op, typ } }; ast::Instruction::Atom(details,a) }, "atom" => { let details = ast::AtomDetails { semantics: sema.unwrap_or(ast::AtomSemantics::Relaxed), scope: scope.unwrap_or(ast::MemScope::Gpu), space: space.unwrap_or(ast::StateSpace::Generic), inner: ast::AtomInnerDetails::Signed { op, typ } }; ast::Instruction::Atom(details,a) } } InstAtomCas: ast::Instruction> = { "atom" ".cas" => { let details = ast::AtomCasDetails { semantics: sema.unwrap_or(ast::AtomSemantics::Relaxed), scope: scope.unwrap_or(ast::MemScope::Gpu), space: space.unwrap_or(ast::StateSpace::Generic), typ, }; ast::Instruction::AtomCas(details,a) }, } AtomSemantics: ast::AtomSemantics = { ".relaxed" => ast::AtomSemantics::Relaxed, ".acquire" => ast::AtomSemantics::Acquire, ".release" => ast::AtomSemantics::Release, ".acq_rel" => ast::AtomSemantics::AcquireRelease } AtomSpace: ast::StateSpace = { ".global" => ast::StateSpace::Global, ".shared" => ast::StateSpace::Shared } AtomBitOp: ast::AtomBitOp = { ".and" => ast::AtomBitOp::And, ".or" => ast::AtomBitOp::Or, ".xor" => ast::AtomBitOp::Xor, ".exch" => ast::AtomBitOp::Exchange, } AtomUIntOp: ast::AtomUIntOp = { ".add" => ast::AtomUIntOp::Add, ".min" => ast::AtomUIntOp::Min, ".max" => ast::AtomUIntOp::Max, } AtomSIntOp: ast::AtomSIntOp = { ".add" => ast::AtomSIntOp::Add, ".min" => ast::AtomSIntOp::Min, ".max" => ast::AtomSIntOp::Max, } BitType: ast::ScalarType = { ".b32" => ast::ScalarType::B32, ".b64" => ast::ScalarType::B64, } UIntType3264: ast::ScalarType = { ".u32" => ast::ScalarType::U32, ".u64" => ast::ScalarType::U64, } SIntType3264: ast::ScalarType = { ".s32" => ast::ScalarType::S32, ".s64" => ast::ScalarType::S64, } // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-div // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-div InstDiv: ast::Instruction> = { "div" => ast::Instruction::Div(ast::DivDetails::Unsigned(t), a), "div" => ast::Instruction::Div(ast::DivDetails::Signed(t), a), "div" ".f32" => { let inner = ast::DivFloatDetails { typ: ast::ScalarType::F32, flush_to_zero: Some(ftz.is_some()), kind }; ast::Instruction::Div(ast::DivDetails::Float(inner), a) }, "div" ".f64" => { let inner = ast::DivFloatDetails { typ: ast::ScalarType::F64, flush_to_zero: None, kind: ast::DivFloatKind::Rounding(rnd) }; ast::Instruction::Div(ast::DivDetails::Float(inner), a) }, } DivFloatKind: ast::DivFloatKind = { ".approx" => ast::DivFloatKind::Approx, ".full" => ast::DivFloatKind::Full, => ast::DivFloatKind::Rounding(rnd), } // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-sqrt InstSqrt: ast::Instruction> = { "sqrt" ".approx" ".f32" => { let details = ast::SqrtDetails { typ: ast::ScalarType::F32, flush_to_zero: Some(ftz.is_some()), kind: ast::SqrtKind::Approx, }; ast::Instruction::Sqrt(details, a) }, "sqrt" ".f32" => { let details = ast::SqrtDetails { typ: ast::ScalarType::F32, flush_to_zero: Some(ftz.is_some()), kind: ast::SqrtKind::Rounding(rnd), }; ast::Instruction::Sqrt(details, a) }, "sqrt" ".f64" => { let details = ast::SqrtDetails { typ: ast::ScalarType::F64, flush_to_zero: None, kind: ast::SqrtKind::Rounding(rnd), }; ast::Instruction::Sqrt(details, a) } } // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-rsqrt // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-rsqrt-approx-ftz-f64 InstRsqrt: ast::Instruction> = { "rsqrt" ".approx" ".f32" => { let details = ast::RsqrtDetails { typ: ast::ScalarType::F32, flush_to_zero: ftz.is_some(), }; ast::Instruction::Rsqrt(details, a) }, "rsqrt" ".approx" ".f64" => { let details = ast::RsqrtDetails { typ: ast::ScalarType::F64, flush_to_zero: ftz.is_some(), }; ast::Instruction::Rsqrt(details, a) }, } // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-neg // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-neg // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#half-precision-floating-point-instructions-neg InstNeg: ast::Instruction> = { "neg" => { let details = ast::NegDetails { typ, flush_to_zero: Some(ftz.is_some()), }; ast::Instruction::Neg(details, a) }, "neg" => { let details = ast::NegDetails { typ, flush_to_zero: None, }; ast::Instruction::Neg(details, a) }, } // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-sin InstSin: ast::Instruction> = { "sin" ".approx" ".f32" => { ast::Instruction::Sin{ flush_to_zero: ftz.is_some(), arg } }, } // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-cos InstCos: ast::Instruction> = { "cos" ".approx" ".f32" => { ast::Instruction::Cos{ flush_to_zero: ftz.is_some(), arg } }, } // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-lg2 InstLg2: ast::Instruction> = { "lg2" ".approx" ".f32" => { ast::Instruction::Lg2{ flush_to_zero: ftz.is_some(), arg } }, } // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-ex2 InstEx2: ast::Instruction> = { "ex2" ".approx" ".f32" => { ast::Instruction::Ex2{ flush_to_zero: ftz.is_some(), arg } }, } // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-clz InstClz: ast::Instruction> = { "clz" => ast::Instruction::Clz{ <> } } // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-brev InstBrev: ast::Instruction> = { "brev" => ast::Instruction::Brev{ <> } } // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-popc InstPopc: ast::Instruction> = { "popc" => ast::Instruction::Popc{ <> } } // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#logic-and-shift-instructions-xor InstXor: ast::Instruction> = { "xor" => ast::Instruction::Xor{ <> } } // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-bfe InstBfe: ast::Instruction> = { "bfe" => ast::Instruction::Bfe{ <> } } // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-bfi InstBfi: ast::Instruction> = { "bfi" => ast::Instruction::Bfi{ <> } } // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-prmt InstPrmt: ast::Instruction> = { "prmt" ".b32" "," => ast::Instruction::Prmt{ <> } } // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-rem InstRem: ast::Instruction> = { "rem" => ast::Instruction::Rem{ <> } } // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-activemask InstActivemask: ast::Instruction> = { "activemask" ".b32" => ast::Instruction::Activemask{ <> } } // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-membar InstMembar: ast::Instruction> = { "membar" => ast::Instruction::Membar{ <> } } NegTypeFtz: ast::ScalarType = { ".f16" => ast::ScalarType::F16, ".f16x2" => ast::ScalarType::F16x2, ".f32" => ast::ScalarType::F32, } NegTypeNonFtz: ast::ScalarType = { ".s16" => ast::ScalarType::S16, ".s32" => ast::ScalarType::S32, ".s64" => ast::ScalarType::S64, ".f64" => ast::ScalarType::F64 } ArithDetails: ast::ArithDetails = { => ast::ArithDetails::Unsigned(t), => ast::ArithDetails::Signed(ast::ArithSInt { typ: t, saturate: false, }), ".sat" ".s32" => ast::ArithDetails::Signed(ast::ArithSInt { typ: ast::ScalarType::S32, saturate: true, }), => ast::ArithDetails::Float(f) } ArithFloat: ast::ArithFloat = { ".f32" => ast::ArithFloat { typ: ast::ScalarType::F32, rounding: rn, flush_to_zero: Some(ftz.is_some()), saturate: sat.is_some(), }, ".f64" => ast::ArithFloat { typ: ast::ScalarType::F64, rounding: rn, flush_to_zero: None, saturate: false, }, ".f16" => ast::ArithFloat { typ: ast::ScalarType::F16, rounding: rn.map(|_| ast::RoundingMode::NearestEven), flush_to_zero: Some(ftz.is_some()), saturate: sat.is_some(), }, ".f16x2" => ast::ArithFloat { typ: ast::ScalarType::F16x2, rounding: rn.map(|_| ast::RoundingMode::NearestEven), flush_to_zero: Some(ftz.is_some()), saturate: sat.is_some(), }, } ArithFloatMustRound: ast::ArithFloat = { ".f32" => ast::ArithFloat { typ: ast::ScalarType::F32, rounding: Some(rn), flush_to_zero: Some(ftz.is_some()), saturate: sat.is_some(), }, ".f64" => ast::ArithFloat { typ: ast::ScalarType::F64, rounding: Some(rn), flush_to_zero: None, saturate: false, }, ".rn" ".f16" => ast::ArithFloat { typ: ast::ScalarType::F16, rounding: Some(ast::RoundingMode::NearestEven), flush_to_zero: Some(ftz.is_some()), saturate: sat.is_some(), }, ".rn" ".f16x2" => ast::ArithFloat { typ: ast::ScalarType::F16x2, rounding: Some(ast::RoundingMode::NearestEven), flush_to_zero: Some(ftz.is_some()), saturate: sat.is_some(), }, } Operand: ast::Operand<&'input str> = { => ast::Operand::Reg(r), "+" => ast::Operand::RegOffset(r, offset), => ast::Operand::Imm(x) }; CallOperand: ast::Operand<&'input str> = { => ast::Operand::Reg(r), => ast::Operand::Imm(x) }; // TODO: start parsing whole constants sub-language: // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#constants ImmediateValue: ast::ImmediateValue = { // TODO: treat negation correctly => { let (num, radix, is_unsigned) = x; if neg.is_some() { match i64::from_str_radix(num, radix) { Ok(x) => ast::ImmediateValue::S64(-x), Err(err) => { errors.push(ParseError::User { error: ast::PtxError::from(err) }); ast::ImmediateValue::S64(0) } } } else if is_unsigned { match u64::from_str_radix(num, radix) { Ok(x) => ast::ImmediateValue::U64(x), Err(err) => { errors.push(ParseError::User { error: ast::PtxError::from(err) }); ast::ImmediateValue::U64(0) } } } else { match i64::from_str_radix(num, radix) { Ok(x) => ast::ImmediateValue::S64(x), Err(_) => { match u64::from_str_radix(num, radix) { Ok(x) => ast::ImmediateValue::U64(x), Err(err) => { errors.push(ParseError::User { error: ast::PtxError::from(err) }); ast::ImmediateValue::U64(0) } } } } } }, => { ast::ImmediateValue::F32(f) }, => { ast::ImmediateValue::F64(f) } } Arg1: ast::Arg1> = { => ast::Arg1{<>} }; Arg1Bar: ast::Arg1Bar> = { => ast::Arg1Bar{<>} }; Arg2: ast::Arg2> = { "," => ast::Arg2{<>} }; MemberOperand: (&'input str, u8) = { "." => { let suf_idx = match vector_index(suf) { Ok(x) => x, Err(err) => { errors.push(err); 0 } }; (pref, suf_idx) }, => { let suf_idx = match vector_index(&suf[1..]) { Ok(x) => x, Err(err) => { errors.push(err); 0 } }; (pref, suf_idx) } }; VectorExtract: Vec<&'input str> = { "{" "," "}" => { vec![r1, r2] }, "{" "," "," "," "}" => { vec![r1, r2, r3, r4] }, }; Arg3: ast::Arg3> = { "," "," => ast::Arg3{<>} }; Arg3Atom: ast::Arg3> = { "," "[" "]" "," => ast::Arg3{<>} }; Arg4: ast::Arg4> = { "," "," "," => ast::Arg4{<>} }; Arg4Atom: ast::Arg4> = { "," "[" "]" "," "," => ast::Arg4{<>} }; Arg4Setp: ast::Arg4Setp> = { "," "," => ast::Arg4Setp{<>} }; Arg5: ast::Arg5> = { "," "," "," "," => ast::Arg5{<>} }; // TODO: pass src3 negation somewhere Arg5Setp: ast::Arg5Setp> = { "," "," "," "!"? => ast::Arg5Setp{<>} }; ArgCall: (Vec<&'input str>, &'input str, Vec>) = { "(" > ")" "," "," "(" > ")" => { (ret_params, func, param_list) }, "(" > ")" "," => { (ret_params, func, Vec::new()) }, "," "(" > ")" => (Vec::new(), func, param_list), => (Vec::new(), func, Vec::>::new()), }; OptionalDst: &'input str = { "|" => dst2 } SrcOperand: ast::Operand<&'input str> = { => ast::Operand::Reg(r), "+" => ast::Operand::RegOffset(r, offset), => ast::Operand::Imm(x), => { let (reg, idx) = mem_op; ast::Operand::VecMember(reg, idx) } } SrcOperandVec: ast::Operand<&'input str> = { => normal, => ast::Operand::VecPack(vec), } DstOperand: ast::Operand<&'input str> = { => ast::Operand::Reg(r), => { let (reg, idx) = mem_op; ast::Operand::VecMember(reg, idx) } } DstOperandVec: ast::Operand<&'input str> = { => normal, => ast::Operand::VecPack(vec), } VectorPrefix: u8 = { ".v2" => 2, ".v4" => 4 }; // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#debugging-directives-file File = { ".file" U32Num String ("," U32Num "," U32Num)? }; // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#debugging-directives-section Section = { ".section" DotID "{" SectionDwarfLines* "}" }; SectionDwarfLines: () = { AnyBitType Comma, ".b32" SectionLabel, ".b64" SectionLabel, ".b32" SectionLabel "+" U32Num, ".b64" SectionLabel "+" U32Num, }; SectionLabel = { ID, DotID }; AnyBitType = { ".b8", ".b16", ".b32", ".b64" }; VariableScalar: (Option, T, &'input str) = { => { (align, v_type, name) } } VariableVector: (Option, u8, T, &'input str) = { => { (align, v_len, v_type, name) } } // empty dimensions [0] means it's a pointer VariableArrayOrPointer: (Option, T, &'input str, ast::ArrayOrPointer) = { => { let mut dims = dims; let array_init = match init { Some(init) => { let init_vec = match init.to_vec(typ, &mut dims) { Err(error) => { errors.push(ParseError::User { error }); Vec::new() } Ok(x) => x }; ast::ArrayOrPointer::Array { dimensions: dims, init: init_vec } } None => { if dims.len() > 1 && dims.contains(&0) { errors.push(ParseError::User { error: ast::PtxError::ZeroDimensionArray }); } match &*dims { [0] => ast::ArrayOrPointer::Pointer, _ => ast::ArrayOrPointer::Array { dimensions: dims, init: Vec::new() } } } }; (align, typ, name, array_init) } } // [0] and [] are treated the same ArrayDimensions: Vec = { ArrayEmptyDimension => vec![0u32], ArrayEmptyDimension => { let mut dims = dims; let mut result = vec![0u32]; result.append(&mut dims); result }, => dims } ArrayEmptyDimension = { "[" "]" } ArrayDimension: u32 = { "[" "]" => n, } ArrayInitializer: ast::NumsOrArrays<'input> = { "=" => nums } NumsOrArraysBracket: ast::NumsOrArrays<'input> = { "{" "}" => nums } NumsOrArrays: ast::NumsOrArrays<'input> = { > => ast::NumsOrArrays::Arrays(n), > => ast::NumsOrArrays::Nums(n.into_iter().map(|(x,radix,_)| (x, radix)).collect()), } Comma: Vec = { ",")*> => match e { None => v, Some(e) => { let mut v = v; v.push(e); v } } }; CommaNonEmpty: Vec = { ",")*> => { let mut v = v; v.push(e); v } }; #[inline] Or: T1 = { T1, T2 } #[inline] Or3: T1 = { T1, T2, T3 }