aboutsummaryrefslogtreecommitdiffhomepage
path: root/stacksize
diff options
context:
space:
mode:
Diffstat (limited to 'stacksize')
-rw-r--r--stacksize/dwarf.go280
-rw-r--r--stacksize/stacksize.go296
2 files changed, 576 insertions, 0 deletions
diff --git a/stacksize/dwarf.go b/stacksize/dwarf.go
new file mode 100644
index 000000000..d913186f6
--- /dev/null
+++ b/stacksize/dwarf.go
@@ -0,0 +1,280 @@
+package stacksize
+
+// This file implements parsing DWARF call frame information and interpreting
+// the CFI bytecode, or enough of it for most practical code.
+
+import (
+ "bytes"
+ "debug/elf"
+ "encoding/binary"
+ "fmt"
+ "io"
+)
+
+// dwarfCIE represents one DWARF Call Frame Information structure.
+type dwarfCIE struct {
+ bytecode []byte
+ codeAlignmentFactor uint64
+}
+
+// parseFrames parses all call frame information from a .debug_frame section and
+// provides the passed in symbols map with frame size information.
+func parseFrames(f *elf.File, data []byte, symbols map[uint64]*CallNode) error {
+ if f.Class != elf.ELFCLASS32 {
+ // TODO: ELF64
+ return fmt.Errorf("expected ELF32")
+ }
+ cies := make(map[uint32]*dwarfCIE)
+
+ // Read each entity.
+ r := bytes.NewBuffer(data)
+ for {
+ start := len(data) - r.Len()
+ var length uint32
+ err := binary.Read(r, binary.LittleEndian, &length)
+ if err == io.EOF {
+ return nil
+ }
+ if err != nil {
+ return err
+ }
+ var cie uint32
+ err = binary.Read(r, binary.LittleEndian, &cie)
+ if err != nil {
+ return err
+ }
+ if cie == 0xffffffff {
+ // This is a CIE.
+ var fields struct {
+ Version uint8
+ Augmentation uint8
+ AddressSize uint8
+ SegmentSize uint8
+ }
+ err = binary.Read(r, binary.LittleEndian, &fields)
+ if err != nil {
+ return err
+ }
+ if fields.Version != 4 {
+ return fmt.Errorf("unimplemented: .debug_frame version %d", fields.Version)
+ }
+ if fields.Augmentation != 0 {
+ return fmt.Errorf("unimplemented: .debug_frame with augmentation")
+ }
+ if fields.SegmentSize != 0 {
+ return fmt.Errorf("unimplemented: .debug_frame with segment size")
+ }
+ codeAlignmentFactor, err := readULEB128(r)
+ if err != nil {
+ return err
+ }
+ _, err = readSLEB128(r) // data alignment factor
+ if err != nil {
+ return err
+ }
+ _, err = readULEB128(r) // return address register
+ if err != nil {
+ return err
+ }
+ rest := (start + int(length) + 4) - (len(data) - r.Len())
+ bytecode := r.Next(rest)
+ cies[uint32(start)] = &dwarfCIE{
+ codeAlignmentFactor: codeAlignmentFactor,
+ bytecode: bytecode,
+ }
+ } else {
+ // This is a FDE.
+ var fields struct {
+ InitialLocation uint32
+ AddressRange uint32
+ }
+ err = binary.Read(r, binary.LittleEndian, &fields)
+ if err != nil {
+ return err
+ }
+ if _, ok := cies[cie]; !ok {
+ return fmt.Errorf("could not find CIE 0x%x in .debug_frame section", cie)
+ }
+ frame := frameInfo{
+ cie: cies[cie],
+ start: uint64(fields.InitialLocation),
+ loc: uint64(fields.InitialLocation),
+ length: uint64(fields.AddressRange),
+ }
+ rest := (start + int(length) + 4) - (len(data) - r.Len())
+ bytecode := r.Next(rest)
+
+ if frame.start == 0 {
+ // Not sure where these come from but they don't seem to be
+ // important.
+ continue
+ }
+
+ _, err = frame.exec(frame.cie.bytecode)
+ if err != nil {
+ return err
+ }
+ entries, err := frame.exec(bytecode)
+ if err != nil {
+ return err
+ }
+ var maxFrameSize uint64
+ for _, entry := range entries {
+ switch f.Machine {
+ case elf.EM_ARM:
+ if entry.cfaRegister != 13 { // r13 or sp
+ // something other than a stack pointer (on ARM)
+ return fmt.Errorf("%08x..%08x: unknown CFA register number %d", frame.start, frame.start+frame.length, entry.cfaRegister)
+ }
+ default:
+ return fmt.Errorf("unknown architecture: %s", f.Machine)
+ }
+ if entry.cfaOffset > maxFrameSize {
+ maxFrameSize = entry.cfaOffset
+ }
+ }
+ node := symbols[frame.start]
+ if node.Size != frame.length {
+ return fmt.Errorf("%s: symtab gives symbol length %d while DWARF gives symbol length %d", node, node.Size, frame.length)
+ }
+ node.FrameSize = maxFrameSize
+ node.FrameSizeType = Bounded
+ if debugPrint {
+ fmt.Printf("%08x..%08x: frame size %4d %s\n", frame.start, frame.start+frame.length, maxFrameSize, node)
+ }
+ }
+ }
+}
+
+// frameInfo contains the state of executing call frame information bytecode.
+type frameInfo struct {
+ cie *dwarfCIE
+ start uint64
+ loc uint64
+ length uint64
+ cfaRegister uint64
+ cfaOffset uint64
+}
+
+// frameInfoLine represents one line in the frame table (.debug_frame) at one
+// point in the execution of the bytecode.
+type frameInfoLine struct {
+ loc uint64
+ cfaRegister uint64
+ cfaOffset uint64
+}
+
+func (fi *frameInfo) newLine() frameInfoLine {
+ return frameInfoLine{
+ loc: fi.loc,
+ cfaRegister: fi.cfaRegister,
+ cfaOffset: fi.cfaOffset,
+ }
+}
+
+// exec executes the given bytecode in the CFI. Most CFI bytecode is actually
+// very simple and provides a way to determine the maximum call frame size.
+//
+// The frame size often changes multiple times in a function, for example the
+// frame size may be adjusted in the prologue and epilogue. Each frameInfoLine
+// may contain such a change.
+func (fi *frameInfo) exec(bytecode []byte) ([]frameInfoLine, error) {
+ var entries []frameInfoLine
+ r := bytes.NewBuffer(bytecode)
+ for {
+ op, err := r.ReadByte()
+ if err != nil {
+ if err == io.EOF {
+ entries = append(entries, fi.newLine())
+ return entries, nil
+ }
+ return nil, err
+ }
+ highBits := op >> 6 // high order 2 bits
+ lowBits := op & 0x1f
+ switch highBits {
+ case 1: // DW_CFA_advance_loc
+ fi.loc += uint64(lowBits) * fi.cie.codeAlignmentFactor
+ entries = append(entries, fi.newLine())
+ case 2: // DW_CFA_offset
+ // This indicates where a register is saved on the stack in the
+ // prologue. We can ignore that for our purposes.
+ _, err := readULEB128(r)
+ if err != nil {
+ return nil, err
+ }
+ case 0:
+ switch lowBits {
+ case 0: // DW_CFA_nop
+ // no operation
+ case 0x0c: // DW_CFA_def_cfa
+ register, err := readULEB128(r)
+ if err != nil {
+ return nil, err
+ }
+ offset, err := readULEB128(r)
+ if err != nil {
+ return nil, err
+ }
+ fi.cfaRegister = register
+ fi.cfaOffset = offset
+ case 0x0e: // DW_CFA_def_cfa_offset
+ offset, err := readULEB128(r)
+ if err != nil {
+ return nil, err
+ }
+ fi.cfaOffset = offset
+ default:
+ return nil, fmt.Errorf("could not decode .debug_frame bytecode op 0x%x", op)
+ }
+ default:
+ return nil, fmt.Errorf("could not decode .debug_frame bytecode op 0x%x", op)
+ }
+ }
+}
+
+// Source: https://en.wikipedia.org/wiki/LEB128#Decode_unsigned_integer
+func readULEB128(r *bytes.Buffer) (result uint64, err error) {
+ // TODO: guard against overflowing 64-bit integers.
+ var shift uint8
+ for {
+ b, err := r.ReadByte()
+ if err != nil {
+ return 0, err
+ }
+ result |= uint64(b&0x7f) << shift
+ if b&0x80 == 0 {
+ break
+ }
+ shift += 7
+ }
+ return
+}
+
+// Source: https://en.wikipedia.org/wiki/LEB128#Decode_signed_integer
+func readSLEB128(r *bytes.Buffer) (result int64, err error) {
+ var shift uint8
+
+ var b byte
+ var rawResult uint64
+ for {
+ b, err = r.ReadByte()
+ if err != nil {
+ return 0, err
+ }
+ rawResult |= uint64(b&0x7f) << shift
+ shift += 7
+ if b&0x80 == 0 {
+ break
+ }
+ }
+
+ // sign bit of byte is second high order bit (0x40)
+ if shift < 64 && b&0x40 != 0 {
+ // sign extend
+ rawResult |= ^uint64(0) << shift
+ }
+ result = int64(rawResult)
+
+ return
+}
diff --git a/stacksize/stacksize.go b/stacksize/stacksize.go
new file mode 100644
index 000000000..da9143b20
--- /dev/null
+++ b/stacksize/stacksize.go
@@ -0,0 +1,296 @@
+// Package stacksize tries to determine the call graph for ELF binaries and
+// tries to parse stack size information from DWARF call frame information.
+package stacksize
+
+import (
+ "debug/elf"
+ "encoding/binary"
+ "errors"
+ "fmt"
+ "os"
+ "sort"
+)
+
+// set to true to print information useful for debugging
+const debugPrint = false
+
+type sizeType uint8
+
+// Results after trying to determine the stack size of a function in the call
+// graph. The goal is to find a maximum (bounded) stack size, but sometimes this
+// is not possible for some reasons such as recursion or indirect calls.
+const (
+ Undefined sizeType = iota // not yet calculated
+ Unknown // child has unknown stack size
+ Bounded // stack size is fixed at compile time (no recursion etc)
+ Recursive
+ IndirectCall
+)
+
+// CallNode is a node in the call graph (that is, a function). Because this is
+// determined after linking, there may be multiple names for a single function
+// (due to aliases). It is also possible multiple functions have the same name
+// (but are in fact different), for example for static functions in C.
+type CallNode struct {
+ Names []string
+ Address uint64 // address at which the function is linked (without T bit on ARM)
+ Size uint64 // symbol size, in bytes
+ Children []*CallNode // functions this function calls
+ FrameSize uint64 // frame size, if FrameSizeType is Bounded
+ FrameSizeType sizeType // can be Undefined or Bounded
+ stackSize uint64
+ stackSizeType sizeType
+ missingFrameInfo *CallNode // the child function that is the cause for not being able to determine the stack size
+}
+
+func (n *CallNode) String() string {
+ if n == nil {
+ return "<nil>"
+ }
+ return n.Names[0]
+}
+
+// CallGraph parses the ELF file and reads DWARF call frame information to
+// determine frame sizes for each function, as far as that's possible. Because
+// at this point it is not possible to determine indirect calls, a list of
+// indirect function calling functions needs to be supplied separately.
+//
+// This function does not attempt to determine the stack size for functions.
+// This is done by calling StackSize on a function in the call graph.
+func CallGraph(f *elf.File, callsIndirectFunction []string) (map[string][]*CallNode, error) {
+ // Sanity check that there is exactly one symbol table.
+ // Multiple symbol tables are possible, but aren't yet supported below.
+ numSymbolTables := 0
+ for _, section := range f.Sections {
+ if section.Type == elf.SHT_SYMTAB {
+ numSymbolTables++
+ }
+ }
+ if numSymbolTables != 1 {
+ return nil, fmt.Errorf("expected exactly one symbol table, got %d", numSymbolTables)
+ }
+
+ // Collect all symbols in the executable.
+ symbols := make(map[uint64]*CallNode)
+ symbolList := make([]*CallNode, 0)
+ symbolNames := make(map[string][]*CallNode)
+ elfSymbols, err := f.Symbols()
+ if err != nil {
+ return nil, err
+ }
+ for _, elfSymbol := range elfSymbols {
+ if elf.ST_TYPE(elfSymbol.Info) != elf.STT_FUNC {
+ continue
+ }
+ address := elfSymbol.Value
+ if f.Machine == elf.EM_ARM {
+ address = address &^ 1
+ }
+ var node *CallNode
+ if n, ok := symbols[address]; ok {
+ // Existing symbol.
+ if n.Size != elfSymbol.Size {
+ return nil, fmt.Errorf("symbol at 0x%x has inconsistent size (%d for %s and %d for %s)", address, n.Size, n.Names[0], elfSymbol.Size, elfSymbol.Name)
+ }
+ node = n
+ node.Names = append(node.Names, elfSymbol.Name)
+ } else {
+ // New symbol.
+ node = &CallNode{
+ Names: []string{elfSymbol.Name},
+ Address: address,
+ Size: elfSymbol.Size,
+ }
+ symbols[address] = node
+ symbolList = append(symbolList, node)
+ }
+ symbolNames[elfSymbol.Name] = append(symbolNames[elfSymbol.Name], node)
+ }
+
+ // Sort symbols by address, for binary searching.
+ sort.Slice(symbolList, func(i, j int) bool {
+ return symbolList[i].Address < symbolList[j].Address
+ })
+
+ // Load relocations and construct the call graph.
+ for _, section := range f.Sections {
+ if section.Type != elf.SHT_REL {
+ continue
+ }
+ if section.Entsize != 8 {
+ // Assume ELF32, this should be fixed.
+ return nil, fmt.Errorf("only ELF32 is supported at this time")
+ }
+ data, err := section.Data()
+ if err != nil {
+ return nil, err
+ }
+ for i := uint64(0); i < section.Size/section.Entsize; i++ {
+ offset := binary.LittleEndian.Uint32(data[i*section.Entsize:])
+ info := binary.LittleEndian.Uint32(data[i*section.Entsize+4:])
+ if elf.R_SYM32(info) == 0 {
+ continue
+ }
+ elfSymbol := elfSymbols[elf.R_SYM32(info)-1]
+ if elf.ST_TYPE(elfSymbol.Info) != elf.STT_FUNC {
+ continue
+ }
+ address := elfSymbol.Value
+ if f.Machine == elf.EM_ARM {
+ address = address &^ 1
+ }
+ childSym := symbols[address]
+ switch f.Machine {
+ case elf.EM_ARM:
+ relocType := elf.R_ARM(elf.R_TYPE32(info))
+ parentSym := findSymbol(symbolList, uint64(offset))
+ if debugPrint {
+ fmt.Fprintf(os.Stderr, "found relocation %-24s at %s (0x%x) to %s (0x%x)\n", relocType, parentSym, offset, childSym, childSym.Address)
+ }
+ isCall := true
+ switch relocType {
+ case elf.R_ARM_THM_PC22: // actually R_ARM_THM_CALL
+ // used for bl calls
+ case elf.R_ARM_THM_JUMP24:
+ // used for b.w jumps
+ isCall = parentSym != childSym
+ case elf.R_ARM_THM_JUMP11:
+ // used for b.n jumps
+ isCall = parentSym != childSym
+ case elf.R_ARM_THM_MOVW_ABS_NC, elf.R_ARM_THM_MOVT_ABS:
+ // used for getting a function pointer
+ isCall = false
+ case elf.R_ARM_ABS32:
+ // used in the reset vector for pointers
+ isCall = false
+ default:
+ return nil, fmt.Errorf("unknown relocation: %s", relocType)
+ }
+ if isCall {
+ if parentSym != nil {
+ parentSym.Children = append(parentSym.Children, childSym)
+ }
+ }
+ default:
+ return nil, fmt.Errorf("unknown architecture: %s", f.Machine)
+ }
+ }
+ }
+
+ // Set fixed frame size information, depending on the architecture.
+ switch f.Machine {
+ case elf.EM_ARM:
+ knownFrameSizes := map[string]uint64{
+ // implemented in assembly in TinyGo
+ "tinygo_startTask": 0, // thunk
+ "tinygo_getSystemStackPointer": 0, // getter
+ "tinygo_switchToScheduler": 0, // calls tinygo_swapTask
+ "tinygo_switchToTask": 0, // calls tinygo_swapTask
+ "tinygo_swapTask": 9 * 4, // 9 registers saved
+ "tinygo_scanCurrentStack": 9 * 4, // 9 registers saved
+
+ // implemented with assembly in compiler-rt
+ "__aeabi_uidivmod": 3 * 4, // 3 registers on thumb1 but 1 register on thumb2
+ }
+ for name, size := range knownFrameSizes {
+ if sym, ok := symbolNames[name]; ok {
+ if len(sym) > 1 {
+ return nil, fmt.Errorf("expected zero or one occurence of the symbol %s, found %d", name, len(sym))
+ }
+ sym[0].FrameSize = size
+ sym[0].FrameSizeType = Bounded
+ }
+ }
+ }
+
+ // Mark functions that do indirect calls (which cannot be determined
+ // directly from ELF/DWARF information).
+ for _, name := range callsIndirectFunction {
+ for _, fn := range symbolNames[name] {
+ fn.stackSizeType = IndirectCall
+ fn.missingFrameInfo = fn
+ }
+ }
+
+ // Read the .debug_frame section.
+ section := f.Section(".debug_frame")
+ if section == nil {
+ return nil, errors.New("no .debug_frame section present, binary was compiled without debug information")
+ }
+ data, err := section.Data()
+ if err != nil {
+ return nil, fmt.Errorf("could not read .debug_frame section: %w", err)
+ }
+ err = parseFrames(f, data, symbols)
+ if err != nil {
+ return nil, err
+ }
+
+ return symbolNames, nil
+}
+
+// findSymbol determines in which symbol the given address lies.
+func findSymbol(symbolList []*CallNode, address uint64) *CallNode {
+ // TODO: binary search
+ for _, sym := range symbolList {
+ if address >= sym.Address && address < sym.Address+sym.Size {
+ return sym
+ }
+ }
+ return nil
+}
+
+// StackSize tries to determine the stack size of the given call graph node. It
+// returns the maximum stack size, whether this size can be known at compile
+// time and the call node responsible for failing to determine the maximum stack
+// usage. The stack size is only valid if sizeType is Bounded.
+func (node *CallNode) StackSize() (uint64, sizeType, *CallNode) {
+ if node.stackSizeType == Undefined {
+ node.determineStackSize(make(map[*CallNode]struct{}))
+ }
+ return node.stackSize, node.stackSizeType, node.missingFrameInfo
+}
+
+// determineStackSize tries to determine the maximum stack size for this
+// function, recursively.
+func (node *CallNode) determineStackSize(parents map[*CallNode]struct{}) {
+ if _, ok := parents[node]; ok {
+ // The function calls itself (directly or indirectly).
+ node.stackSizeType = Recursive
+ node.missingFrameInfo = node
+ return
+ }
+ parents[node] = struct{}{}
+ defer func() {
+ delete(parents, node)
+ }()
+ switch node.FrameSizeType {
+ case Bounded:
+ // Determine the stack size recursively.
+ childMaxStackSize := uint64(0)
+ for _, child := range node.Children {
+ if child.stackSizeType == Undefined {
+ child.determineStackSize(parents)
+ }
+ switch child.stackSizeType {
+ case Bounded:
+ if child.stackSize > childMaxStackSize {
+ childMaxStackSize = child.stackSize
+ }
+ case Unknown, Recursive, IndirectCall:
+ node.stackSizeType = child.stackSizeType
+ node.missingFrameInfo = child.missingFrameInfo
+ return
+ default:
+ panic("unknown child stack size type")
+ }
+ }
+ node.stackSize = node.FrameSize + childMaxStackSize
+ node.stackSizeType = Bounded
+ case Undefined:
+ node.stackSizeType = Unknown
+ node.missingFrameInfo = node
+ default:
+ panic("unknown frame size type") // unreachable
+ }
+}