package interp // This file compiles the LLVM IR to a form that's easy to efficiently // interpret. import ( "strings" "tinygo.org/x/go-llvm" ) // A function is a compiled LLVM function, which means that interpreting it // avoids most CGo calls necessary. This is done in a separate step so the // result can be cached. // Functions are in SSA form, just like the LLVM version if it. The first block // (blocks[0]) is the entry block. type function struct { llvmFn llvm.Value name string // precalculated llvmFn.Name() params []llvm.Value // precalculated llvmFn.Params() blocks []*basicBlock locals map[llvm.Value]int } // basicBlock represents a LLVM basic block and contains a slice of // instructions. The last instruction must be a terminator instruction. type basicBlock struct { phiNodes []instruction instructions []instruction } // instruction is a precompiled LLVM IR instruction. The operands can be either // an already known value (such as literalValue or pointerValue) but can also be // the special localValue, which means that the value is a function parameter or // is produced by another instruction in the function. In that case, the // interpreter will replace the operand with that local value. type instruction struct { opcode llvm.Opcode localIndex int operands []value llvmInst llvm.Value name string } // String returns a nice human-readable version of this instruction. func (inst *instruction) String() string { operands := make([]string, len(inst.operands)) for i, op := range inst.operands { operands[i] = op.String() } name := instructionNameMap[inst.opcode] if name == "" { name = "" } return name + " " + strings.Join(operands, " ") } // compileFunction compiles a given LLVM function to an easier to interpret // version of the function. As far as possible, all operands are preprocessed so // that the interpreter doesn't have to call into LLVM. func (r *runner) compileFunction(llvmFn llvm.Value) *function { fn := &function{ llvmFn: llvmFn, name: llvmFn.Name(), params: llvmFn.Params(), locals: make(map[llvm.Value]int), } if llvmFn.IsDeclaration() { // Nothing to do. return fn } for i, param := range fn.params { fn.locals[param] = i } // Make a map of all the blocks, to quickly find the block number for a // given branch instruction. blockIndices := make(map[llvm.Value]int) for llvmBB := llvmFn.FirstBasicBlock(); !llvmBB.IsNil(); llvmBB = llvm.NextBasicBlock(llvmBB) { index := len(blockIndices) blockIndices[llvmBB.AsValue()] = index } // Compile every block. for llvmBB := llvmFn.FirstBasicBlock(); !llvmBB.IsNil(); llvmBB = llvm.NextBasicBlock(llvmBB) { bb := &basicBlock{} fn.blocks = append(fn.blocks, bb) // Compile every instruction in the block. for llvmInst := llvmBB.FirstInstruction(); !llvmInst.IsNil(); llvmInst = llvm.NextInstruction(llvmInst) { // Create instruction skeleton. opcode := llvmInst.InstructionOpcode() inst := instruction{ opcode: opcode, localIndex: len(fn.locals), llvmInst: llvmInst, } fn.locals[llvmInst] = len(fn.locals) // Add operands specific for this instruction. switch opcode { case llvm.Ret: // Return instruction, which can either be a `ret void` (no // return value) or return a value. numOperands := llvmInst.OperandsCount() if numOperands != 0 { inst.operands = []value{ r.getValue(llvmInst.Operand(0)), } } case llvm.Br: // Branch instruction. Can be either a conditional branch (with // 3 operands) or unconditional branch (with just one basic // block operand). numOperands := llvmInst.OperandsCount() switch numOperands { case 3: // Conditional jump to one of two blocks. Comparable to an // if/else in procedural languages. thenBB := llvmInst.Operand(2) elseBB := llvmInst.Operand(1) inst.operands = []value{ r.getValue(llvmInst.Operand(0)), literalValue{uint32(blockIndices[thenBB])}, literalValue{uint32(blockIndices[elseBB])}, } case 1: // Unconditional jump to a target basic block. Comparable to // a jump in C and Go. jumpBB := llvmInst.Operand(0) inst.operands = []value{ literalValue{uint32(blockIndices[jumpBB])}, } default: panic("unknown number of operands") } case llvm.Switch: // A switch is an array of (value, label) pairs, of which the // first one indicates the to-switch value and the default // label. numOperands := llvmInst.OperandsCount() for i := 0; i < numOperands; i += 2 { inst.operands = append(inst.operands, r.getValue(llvmInst.Operand(i))) inst.operands = append(inst.operands, literalValue{uint32(blockIndices[llvmInst.Operand(i+1)])}) } case llvm.PHI: inst.name = llvmInst.Name() incomingCount := inst.llvmInst.IncomingCount() for i := 0; i < incomingCount; i++ { incomingBB := inst.llvmInst.IncomingBlock(i) incomingValue := inst.llvmInst.IncomingValue(i) inst.operands = append(inst.operands, literalValue{uint32(blockIndices[incomingBB.AsValue()])}, r.getValue(incomingValue), ) } case llvm.Select: // Select is a special instruction that is much like a ternary // operator. It produces operand 1 or 2 based on the boolean // that is operand 0. inst.name = llvmInst.Name() inst.operands = []value{ r.getValue(llvmInst.Operand(0)), r.getValue(llvmInst.Operand(1)), r.getValue(llvmInst.Operand(2)), } case llvm.Call: // Call is a regular function call but could also be a runtime // intrinsic. Some runtime intrinsics are treated specially by // the interpreter, such as runtime.alloc. We don't // differentiate between them here because these calls may also // need to be run at runtime, in which case they should all be // created in the same way. llvmCalledValue := llvmInst.CalledValue() if !llvmCalledValue.IsAFunction().IsNil() { name := llvmCalledValue.Name() if name == "llvm.dbg.value" || strings.HasPrefix(name, "llvm.lifetime.") { // These intrinsics should not be interpreted, they are not // relevant to the execution of this function. continue } } inst.name = llvmInst.Name() numOperands := llvmInst.OperandsCount() inst.operands = append(inst.operands, r.getValue(llvmCalledValue)) for i := 0; i < numOperands-1; i++ { inst.operands = append(inst.operands, r.getValue(llvmInst.Operand(i))) } case llvm.Load: // Load instruction. The interpreter will load from the // appropriate memory view. // Also provide the memory size to be loaded, which is necessary // with a lack of type information. inst.name = llvmInst.Name() inst.operands = []value{ r.getValue(llvmInst.Operand(0)), literalValue{r.targetData.TypeAllocSize(llvmInst.Type())}, } case llvm.Store: // Store instruction. The interpreter will create a new object // in the memory view of the function invocation and store to // that, to make it possible to roll back this store. inst.operands = []value{ r.getValue(llvmInst.Operand(0)), r.getValue(llvmInst.Operand(1)), } case llvm.Alloca: // Alloca allocates stack space for local variables. numElements := r.getValue(inst.llvmInst.Operand(0)).(literalValue).value.(uint32) elementSize := r.targetData.TypeAllocSize(inst.llvmInst.AllocatedType()) inst.operands = []value{ literalValue{elementSize * uint64(numElements)}, } case llvm.GetElementPtr: // GetElementPtr does pointer arithmetic. inst.name = llvmInst.Name() ptr := llvmInst.Operand(0) n := llvmInst.OperandsCount() elementType := llvmInst.GEPSourceElementType() // gep: [source ptr, dest value size, pairs of indices...] inst.operands = []value{ r.getValue(ptr), r.getValue(llvmInst.Operand(1)), literalValue{r.targetData.TypeAllocSize(elementType)}, } for i := 2; i < n; i++ { operand := r.getValue(llvmInst.Operand(i)) switch elementType.TypeKind() { case llvm.StructTypeKind: index := operand.(literalValue).value.(uint32) elementOffset := r.targetData.ElementOffset(elementType, int(index)) // Encode operands in a special way. The elementOffset // is just the offset in bytes. The elementSize is a // negative number (when cast to a int64) by flipping // all the bits. This allows the interpreter to detect // this is a struct field and that it should not // multiply it with the elementOffset to get the offset. // It is important for the interpreter to know the // struct field index for when the GEP must be done at // runtime. inst.operands = append(inst.operands, literalValue{elementOffset}, literalValue{^uint64(index)}) elementType = elementType.StructElementTypes()[index] case llvm.ArrayTypeKind: elementType = elementType.ElementType() elementSize := r.targetData.TypeAllocSize(elementType) elementSizeOperand := literalValue{elementSize} // Add operand * elementSizeOperand bytes to the pointer. inst.operands = append(inst.operands, operand, elementSizeOperand) default: // This should be unreachable. panic("unknown type: " + elementType.String()) } } case llvm.BitCast, llvm.IntToPtr, llvm.PtrToInt: // Bitcasts are ususally used to cast a pointer from one type to // another leaving the pointer itself intact. inst.name = llvmInst.Name() inst.operands = []value{ r.getValue(llvmInst.Operand(0)), } case llvm.ExtractValue: inst.name = llvmInst.Name() agg := llvmInst.Operand(0) var offset uint64 indexingType := agg.Type() for _, index := range inst.llvmInst.Indices() { switch indexingType.TypeKind() { case llvm.StructTypeKind: offset += r.targetData.ElementOffset(indexingType, int(index)) indexingType = indexingType.StructElementTypes()[index] case llvm.ArrayTypeKind: indexingType = indexingType.ElementType() elementSize := r.targetData.TypeAllocSize(indexingType) offset += elementSize * uint64(index) default: panic("unknown type kind") // unreachable } } size := r.targetData.TypeAllocSize(inst.llvmInst.Type()) // extractvalue [agg, byteOffset, byteSize] inst.operands = []value{ r.getValue(agg), literalValue{offset}, literalValue{size}, } case llvm.InsertValue: inst.name = llvmInst.Name() agg := llvmInst.Operand(0) var offset uint64 indexingType := agg.Type() for _, index := range inst.llvmInst.Indices() { switch indexingType.TypeKind() { case llvm.StructTypeKind: offset += r.targetData.ElementOffset(indexingType, int(index)) indexingType = indexingType.StructElementTypes()[index] case llvm.ArrayTypeKind: indexingType = indexingType.ElementType() elementSize := r.targetData.TypeAllocSize(indexingType) offset += elementSize * uint64(index) default: panic("unknown type kind") // unreachable } } // insertvalue [agg, elt, byteOffset] inst.operands = []value{ r.getValue(agg), r.getValue(llvmInst.Operand(1)), literalValue{offset}, } case llvm.ICmp: inst.name = llvmInst.Name() inst.operands = []value{ r.getValue(llvmInst.Operand(0)), r.getValue(llvmInst.Operand(1)), literalValue{uint8(llvmInst.IntPredicate())}, } case llvm.FCmp: inst.name = llvmInst.Name() inst.operands = []value{ r.getValue(llvmInst.Operand(0)), r.getValue(llvmInst.Operand(1)), literalValue{uint8(llvmInst.FloatPredicate())}, } case llvm.Add, llvm.Sub, llvm.Mul, llvm.UDiv, llvm.SDiv, llvm.URem, llvm.SRem, llvm.Shl, llvm.LShr, llvm.AShr, llvm.And, llvm.Or, llvm.Xor: // Integer binary operations. inst.name = llvmInst.Name() inst.operands = []value{ r.getValue(llvmInst.Operand(0)), r.getValue(llvmInst.Operand(1)), } case llvm.SExt, llvm.ZExt, llvm.Trunc: // Extend or shrink an integer size. // No sign extension going on so easy to do. // zext: [value, bitwidth] // trunc: [value, bitwidth] inst.name = llvmInst.Name() inst.operands = []value{ r.getValue(llvmInst.Operand(0)), literalValue{uint64(llvmInst.Type().IntTypeWidth())}, } case llvm.SIToFP, llvm.UIToFP: // Convert an integer to a floating point instruction. // opcode: [value, bitwidth] inst.name = llvmInst.Name() inst.operands = []value{ r.getValue(llvmInst.Operand(0)), literalValue{uint64(r.targetData.TypeAllocSize(llvmInst.Type()) * 8)}, } default: // Unknown instruction, which is already set in inst.opcode so // is detectable. // This error is handled when actually trying to interpret this // instruction (to not trigger on code that won't be executed). } if inst.opcode == llvm.PHI { // PHI nodes need to be treated specially, see the comment in // interpreter.go for an explanation. bb.phiNodes = append(bb.phiNodes, inst) } else { bb.instructions = append(bb.instructions, inst) } } } return fn } // instructionNameMap maps from instruction opcodes to instruction names. This // can be useful for debug logging. var instructionNameMap = [...]string{ llvm.Ret: "ret", llvm.Br: "br", llvm.Switch: "switch", llvm.IndirectBr: "indirectbr", llvm.Invoke: "invoke", llvm.Unreachable: "unreachable", // Standard Binary Operators llvm.Add: "add", llvm.FAdd: "fadd", llvm.Sub: "sub", llvm.FSub: "fsub", llvm.Mul: "mul", llvm.FMul: "fmul", llvm.UDiv: "udiv", llvm.SDiv: "sdiv", llvm.FDiv: "fdiv", llvm.URem: "urem", llvm.SRem: "srem", llvm.FRem: "frem", // Logical Operators llvm.Shl: "shl", llvm.LShr: "lshr", llvm.AShr: "ashr", llvm.And: "and", llvm.Or: "or", llvm.Xor: "xor", // Memory Operators llvm.Alloca: "alloca", llvm.Load: "load", llvm.Store: "store", llvm.GetElementPtr: "getelementptr", // Cast Operators llvm.Trunc: "trunc", llvm.ZExt: "zext", llvm.SExt: "sext", llvm.FPToUI: "fptoui", llvm.FPToSI: "fptosi", llvm.UIToFP: "uitofp", llvm.SIToFP: "sitofp", llvm.FPTrunc: "fptrunc", llvm.FPExt: "fpext", llvm.PtrToInt: "ptrtoint", llvm.IntToPtr: "inttoptr", llvm.BitCast: "bitcast", // Other Operators llvm.ICmp: "icmp", llvm.FCmp: "fcmp", llvm.PHI: "phi", llvm.Call: "call", llvm.Select: "select", llvm.VAArg: "vaarg", llvm.ExtractElement: "extractelement", llvm.InsertElement: "insertelement", llvm.ShuffleVector: "shufflevector", llvm.ExtractValue: "extractvalue", llvm.InsertValue: "insertvalue", }