diff options
Diffstat (limited to 'stacksize')
-rw-r--r-- | stacksize/dwarf.go | 280 | ||||
-rw-r--r-- | stacksize/stacksize.go | 296 |
2 files changed, 576 insertions, 0 deletions
diff --git a/stacksize/dwarf.go b/stacksize/dwarf.go new file mode 100644 index 000000000..d913186f6 --- /dev/null +++ b/stacksize/dwarf.go @@ -0,0 +1,280 @@ +package stacksize + +// This file implements parsing DWARF call frame information and interpreting +// the CFI bytecode, or enough of it for most practical code. + +import ( + "bytes" + "debug/elf" + "encoding/binary" + "fmt" + "io" +) + +// dwarfCIE represents one DWARF Call Frame Information structure. +type dwarfCIE struct { + bytecode []byte + codeAlignmentFactor uint64 +} + +// parseFrames parses all call frame information from a .debug_frame section and +// provides the passed in symbols map with frame size information. +func parseFrames(f *elf.File, data []byte, symbols map[uint64]*CallNode) error { + if f.Class != elf.ELFCLASS32 { + // TODO: ELF64 + return fmt.Errorf("expected ELF32") + } + cies := make(map[uint32]*dwarfCIE) + + // Read each entity. + r := bytes.NewBuffer(data) + for { + start := len(data) - r.Len() + var length uint32 + err := binary.Read(r, binary.LittleEndian, &length) + if err == io.EOF { + return nil + } + if err != nil { + return err + } + var cie uint32 + err = binary.Read(r, binary.LittleEndian, &cie) + if err != nil { + return err + } + if cie == 0xffffffff { + // This is a CIE. + var fields struct { + Version uint8 + Augmentation uint8 + AddressSize uint8 + SegmentSize uint8 + } + err = binary.Read(r, binary.LittleEndian, &fields) + if err != nil { + return err + } + if fields.Version != 4 { + return fmt.Errorf("unimplemented: .debug_frame version %d", fields.Version) + } + if fields.Augmentation != 0 { + return fmt.Errorf("unimplemented: .debug_frame with augmentation") + } + if fields.SegmentSize != 0 { + return fmt.Errorf("unimplemented: .debug_frame with segment size") + } + codeAlignmentFactor, err := readULEB128(r) + if err != nil { + return err + } + _, err = readSLEB128(r) // data alignment factor + if err != nil { + return err + } + _, err = readULEB128(r) // return address register + if err != nil { + return err + } + rest := (start + int(length) + 4) - (len(data) - r.Len()) + bytecode := r.Next(rest) + cies[uint32(start)] = &dwarfCIE{ + codeAlignmentFactor: codeAlignmentFactor, + bytecode: bytecode, + } + } else { + // This is a FDE. + var fields struct { + InitialLocation uint32 + AddressRange uint32 + } + err = binary.Read(r, binary.LittleEndian, &fields) + if err != nil { + return err + } + if _, ok := cies[cie]; !ok { + return fmt.Errorf("could not find CIE 0x%x in .debug_frame section", cie) + } + frame := frameInfo{ + cie: cies[cie], + start: uint64(fields.InitialLocation), + loc: uint64(fields.InitialLocation), + length: uint64(fields.AddressRange), + } + rest := (start + int(length) + 4) - (len(data) - r.Len()) + bytecode := r.Next(rest) + + if frame.start == 0 { + // Not sure where these come from but they don't seem to be + // important. + continue + } + + _, err = frame.exec(frame.cie.bytecode) + if err != nil { + return err + } + entries, err := frame.exec(bytecode) + if err != nil { + return err + } + var maxFrameSize uint64 + for _, entry := range entries { + switch f.Machine { + case elf.EM_ARM: + if entry.cfaRegister != 13 { // r13 or sp + // something other than a stack pointer (on ARM) + return fmt.Errorf("%08x..%08x: unknown CFA register number %d", frame.start, frame.start+frame.length, entry.cfaRegister) + } + default: + return fmt.Errorf("unknown architecture: %s", f.Machine) + } + if entry.cfaOffset > maxFrameSize { + maxFrameSize = entry.cfaOffset + } + } + node := symbols[frame.start] + if node.Size != frame.length { + return fmt.Errorf("%s: symtab gives symbol length %d while DWARF gives symbol length %d", node, node.Size, frame.length) + } + node.FrameSize = maxFrameSize + node.FrameSizeType = Bounded + if debugPrint { + fmt.Printf("%08x..%08x: frame size %4d %s\n", frame.start, frame.start+frame.length, maxFrameSize, node) + } + } + } +} + +// frameInfo contains the state of executing call frame information bytecode. +type frameInfo struct { + cie *dwarfCIE + start uint64 + loc uint64 + length uint64 + cfaRegister uint64 + cfaOffset uint64 +} + +// frameInfoLine represents one line in the frame table (.debug_frame) at one +// point in the execution of the bytecode. +type frameInfoLine struct { + loc uint64 + cfaRegister uint64 + cfaOffset uint64 +} + +func (fi *frameInfo) newLine() frameInfoLine { + return frameInfoLine{ + loc: fi.loc, + cfaRegister: fi.cfaRegister, + cfaOffset: fi.cfaOffset, + } +} + +// exec executes the given bytecode in the CFI. Most CFI bytecode is actually +// very simple and provides a way to determine the maximum call frame size. +// +// The frame size often changes multiple times in a function, for example the +// frame size may be adjusted in the prologue and epilogue. Each frameInfoLine +// may contain such a change. +func (fi *frameInfo) exec(bytecode []byte) ([]frameInfoLine, error) { + var entries []frameInfoLine + r := bytes.NewBuffer(bytecode) + for { + op, err := r.ReadByte() + if err != nil { + if err == io.EOF { + entries = append(entries, fi.newLine()) + return entries, nil + } + return nil, err + } + highBits := op >> 6 // high order 2 bits + lowBits := op & 0x1f + switch highBits { + case 1: // DW_CFA_advance_loc + fi.loc += uint64(lowBits) * fi.cie.codeAlignmentFactor + entries = append(entries, fi.newLine()) + case 2: // DW_CFA_offset + // This indicates where a register is saved on the stack in the + // prologue. We can ignore that for our purposes. + _, err := readULEB128(r) + if err != nil { + return nil, err + } + case 0: + switch lowBits { + case 0: // DW_CFA_nop + // no operation + case 0x0c: // DW_CFA_def_cfa + register, err := readULEB128(r) + if err != nil { + return nil, err + } + offset, err := readULEB128(r) + if err != nil { + return nil, err + } + fi.cfaRegister = register + fi.cfaOffset = offset + case 0x0e: // DW_CFA_def_cfa_offset + offset, err := readULEB128(r) + if err != nil { + return nil, err + } + fi.cfaOffset = offset + default: + return nil, fmt.Errorf("could not decode .debug_frame bytecode op 0x%x", op) + } + default: + return nil, fmt.Errorf("could not decode .debug_frame bytecode op 0x%x", op) + } + } +} + +// Source: https://en.wikipedia.org/wiki/LEB128#Decode_unsigned_integer +func readULEB128(r *bytes.Buffer) (result uint64, err error) { + // TODO: guard against overflowing 64-bit integers. + var shift uint8 + for { + b, err := r.ReadByte() + if err != nil { + return 0, err + } + result |= uint64(b&0x7f) << shift + if b&0x80 == 0 { + break + } + shift += 7 + } + return +} + +// Source: https://en.wikipedia.org/wiki/LEB128#Decode_signed_integer +func readSLEB128(r *bytes.Buffer) (result int64, err error) { + var shift uint8 + + var b byte + var rawResult uint64 + for { + b, err = r.ReadByte() + if err != nil { + return 0, err + } + rawResult |= uint64(b&0x7f) << shift + shift += 7 + if b&0x80 == 0 { + break + } + } + + // sign bit of byte is second high order bit (0x40) + if shift < 64 && b&0x40 != 0 { + // sign extend + rawResult |= ^uint64(0) << shift + } + result = int64(rawResult) + + return +} diff --git a/stacksize/stacksize.go b/stacksize/stacksize.go new file mode 100644 index 000000000..da9143b20 --- /dev/null +++ b/stacksize/stacksize.go @@ -0,0 +1,296 @@ +// Package stacksize tries to determine the call graph for ELF binaries and +// tries to parse stack size information from DWARF call frame information. +package stacksize + +import ( + "debug/elf" + "encoding/binary" + "errors" + "fmt" + "os" + "sort" +) + +// set to true to print information useful for debugging +const debugPrint = false + +type sizeType uint8 + +// Results after trying to determine the stack size of a function in the call +// graph. The goal is to find a maximum (bounded) stack size, but sometimes this +// is not possible for some reasons such as recursion or indirect calls. +const ( + Undefined sizeType = iota // not yet calculated + Unknown // child has unknown stack size + Bounded // stack size is fixed at compile time (no recursion etc) + Recursive + IndirectCall +) + +// CallNode is a node in the call graph (that is, a function). Because this is +// determined after linking, there may be multiple names for a single function +// (due to aliases). It is also possible multiple functions have the same name +// (but are in fact different), for example for static functions in C. +type CallNode struct { + Names []string + Address uint64 // address at which the function is linked (without T bit on ARM) + Size uint64 // symbol size, in bytes + Children []*CallNode // functions this function calls + FrameSize uint64 // frame size, if FrameSizeType is Bounded + FrameSizeType sizeType // can be Undefined or Bounded + stackSize uint64 + stackSizeType sizeType + missingFrameInfo *CallNode // the child function that is the cause for not being able to determine the stack size +} + +func (n *CallNode) String() string { + if n == nil { + return "<nil>" + } + return n.Names[0] +} + +// CallGraph parses the ELF file and reads DWARF call frame information to +// determine frame sizes for each function, as far as that's possible. Because +// at this point it is not possible to determine indirect calls, a list of +// indirect function calling functions needs to be supplied separately. +// +// This function does not attempt to determine the stack size for functions. +// This is done by calling StackSize on a function in the call graph. +func CallGraph(f *elf.File, callsIndirectFunction []string) (map[string][]*CallNode, error) { + // Sanity check that there is exactly one symbol table. + // Multiple symbol tables are possible, but aren't yet supported below. + numSymbolTables := 0 + for _, section := range f.Sections { + if section.Type == elf.SHT_SYMTAB { + numSymbolTables++ + } + } + if numSymbolTables != 1 { + return nil, fmt.Errorf("expected exactly one symbol table, got %d", numSymbolTables) + } + + // Collect all symbols in the executable. + symbols := make(map[uint64]*CallNode) + symbolList := make([]*CallNode, 0) + symbolNames := make(map[string][]*CallNode) + elfSymbols, err := f.Symbols() + if err != nil { + return nil, err + } + for _, elfSymbol := range elfSymbols { + if elf.ST_TYPE(elfSymbol.Info) != elf.STT_FUNC { + continue + } + address := elfSymbol.Value + if f.Machine == elf.EM_ARM { + address = address &^ 1 + } + var node *CallNode + if n, ok := symbols[address]; ok { + // Existing symbol. + if n.Size != elfSymbol.Size { + return nil, fmt.Errorf("symbol at 0x%x has inconsistent size (%d for %s and %d for %s)", address, n.Size, n.Names[0], elfSymbol.Size, elfSymbol.Name) + } + node = n + node.Names = append(node.Names, elfSymbol.Name) + } else { + // New symbol. + node = &CallNode{ + Names: []string{elfSymbol.Name}, + Address: address, + Size: elfSymbol.Size, + } + symbols[address] = node + symbolList = append(symbolList, node) + } + symbolNames[elfSymbol.Name] = append(symbolNames[elfSymbol.Name], node) + } + + // Sort symbols by address, for binary searching. + sort.Slice(symbolList, func(i, j int) bool { + return symbolList[i].Address < symbolList[j].Address + }) + + // Load relocations and construct the call graph. + for _, section := range f.Sections { + if section.Type != elf.SHT_REL { + continue + } + if section.Entsize != 8 { + // Assume ELF32, this should be fixed. + return nil, fmt.Errorf("only ELF32 is supported at this time") + } + data, err := section.Data() + if err != nil { + return nil, err + } + for i := uint64(0); i < section.Size/section.Entsize; i++ { + offset := binary.LittleEndian.Uint32(data[i*section.Entsize:]) + info := binary.LittleEndian.Uint32(data[i*section.Entsize+4:]) + if elf.R_SYM32(info) == 0 { + continue + } + elfSymbol := elfSymbols[elf.R_SYM32(info)-1] + if elf.ST_TYPE(elfSymbol.Info) != elf.STT_FUNC { + continue + } + address := elfSymbol.Value + if f.Machine == elf.EM_ARM { + address = address &^ 1 + } + childSym := symbols[address] + switch f.Machine { + case elf.EM_ARM: + relocType := elf.R_ARM(elf.R_TYPE32(info)) + parentSym := findSymbol(symbolList, uint64(offset)) + if debugPrint { + fmt.Fprintf(os.Stderr, "found relocation %-24s at %s (0x%x) to %s (0x%x)\n", relocType, parentSym, offset, childSym, childSym.Address) + } + isCall := true + switch relocType { + case elf.R_ARM_THM_PC22: // actually R_ARM_THM_CALL + // used for bl calls + case elf.R_ARM_THM_JUMP24: + // used for b.w jumps + isCall = parentSym != childSym + case elf.R_ARM_THM_JUMP11: + // used for b.n jumps + isCall = parentSym != childSym + case elf.R_ARM_THM_MOVW_ABS_NC, elf.R_ARM_THM_MOVT_ABS: + // used for getting a function pointer + isCall = false + case elf.R_ARM_ABS32: + // used in the reset vector for pointers + isCall = false + default: + return nil, fmt.Errorf("unknown relocation: %s", relocType) + } + if isCall { + if parentSym != nil { + parentSym.Children = append(parentSym.Children, childSym) + } + } + default: + return nil, fmt.Errorf("unknown architecture: %s", f.Machine) + } + } + } + + // Set fixed frame size information, depending on the architecture. + switch f.Machine { + case elf.EM_ARM: + knownFrameSizes := map[string]uint64{ + // implemented in assembly in TinyGo + "tinygo_startTask": 0, // thunk + "tinygo_getSystemStackPointer": 0, // getter + "tinygo_switchToScheduler": 0, // calls tinygo_swapTask + "tinygo_switchToTask": 0, // calls tinygo_swapTask + "tinygo_swapTask": 9 * 4, // 9 registers saved + "tinygo_scanCurrentStack": 9 * 4, // 9 registers saved + + // implemented with assembly in compiler-rt + "__aeabi_uidivmod": 3 * 4, // 3 registers on thumb1 but 1 register on thumb2 + } + for name, size := range knownFrameSizes { + if sym, ok := symbolNames[name]; ok { + if len(sym) > 1 { + return nil, fmt.Errorf("expected zero or one occurence of the symbol %s, found %d", name, len(sym)) + } + sym[0].FrameSize = size + sym[0].FrameSizeType = Bounded + } + } + } + + // Mark functions that do indirect calls (which cannot be determined + // directly from ELF/DWARF information). + for _, name := range callsIndirectFunction { + for _, fn := range symbolNames[name] { + fn.stackSizeType = IndirectCall + fn.missingFrameInfo = fn + } + } + + // Read the .debug_frame section. + section := f.Section(".debug_frame") + if section == nil { + return nil, errors.New("no .debug_frame section present, binary was compiled without debug information") + } + data, err := section.Data() + if err != nil { + return nil, fmt.Errorf("could not read .debug_frame section: %w", err) + } + err = parseFrames(f, data, symbols) + if err != nil { + return nil, err + } + + return symbolNames, nil +} + +// findSymbol determines in which symbol the given address lies. +func findSymbol(symbolList []*CallNode, address uint64) *CallNode { + // TODO: binary search + for _, sym := range symbolList { + if address >= sym.Address && address < sym.Address+sym.Size { + return sym + } + } + return nil +} + +// StackSize tries to determine the stack size of the given call graph node. It +// returns the maximum stack size, whether this size can be known at compile +// time and the call node responsible for failing to determine the maximum stack +// usage. The stack size is only valid if sizeType is Bounded. +func (node *CallNode) StackSize() (uint64, sizeType, *CallNode) { + if node.stackSizeType == Undefined { + node.determineStackSize(make(map[*CallNode]struct{})) + } + return node.stackSize, node.stackSizeType, node.missingFrameInfo +} + +// determineStackSize tries to determine the maximum stack size for this +// function, recursively. +func (node *CallNode) determineStackSize(parents map[*CallNode]struct{}) { + if _, ok := parents[node]; ok { + // The function calls itself (directly or indirectly). + node.stackSizeType = Recursive + node.missingFrameInfo = node + return + } + parents[node] = struct{}{} + defer func() { + delete(parents, node) + }() + switch node.FrameSizeType { + case Bounded: + // Determine the stack size recursively. + childMaxStackSize := uint64(0) + for _, child := range node.Children { + if child.stackSizeType == Undefined { + child.determineStackSize(parents) + } + switch child.stackSizeType { + case Bounded: + if child.stackSize > childMaxStackSize { + childMaxStackSize = child.stackSize + } + case Unknown, Recursive, IndirectCall: + node.stackSizeType = child.stackSizeType + node.missingFrameInfo = child.missingFrameInfo + return + default: + panic("unknown child stack size type") + } + } + node.stackSize = node.FrameSize + childMaxStackSize + node.stackSizeType = Bounded + case Undefined: + node.stackSizeType = Unknown + node.missingFrameInfo = node + default: + panic("unknown frame size type") // unreachable + } +} |