builder/sizes.go


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990

package builder

import (
	"bytes"
	"debug/dwarf"
	"debug/elf"
	"debug/macho"
	"debug/pe"
	"encoding/binary"
	"fmt"
	"io"
	"os"
	"path/filepath"
	"regexp"
	"runtime"
	"sort"
	"strings"

	"github.com/aykevl/go-wasm"
	"github.com/tinygo-org/tinygo/goenv"
)

// Set to true to print extra debug logs.
const sizesDebug = false

// programSize contains size statistics per package of a compiled program.
type programSize struct {
	Packages map[string]*packageSize
	Code     uint64
	ROData   uint64
	Data     uint64
	BSS      uint64
}

// sortedPackageNames returns the list of package names (ProgramSize.Packages)
// sorted alphabetically.
func (ps *programSize) sortedPackageNames() []string {
	names := make([]string, 0, len(ps.Packages))
	for name := range ps.Packages {
		names = append(names, name)
	}
	sort.Strings(names)
	return names
}

// Flash usage in regular microcontrollers.
func (ps *programSize) Flash() uint64 {
	return ps.Code + ps.ROData + ps.Data
}

// Static RAM usage in regular microcontrollers.
func (ps *programSize) RAM() uint64 {
	return ps.Data + ps.BSS
}

// Return the package size information for a given package path, creating it if
// it doesn't exist yet.
func (ps *programSize) getPackage(path string) *packageSize {
	if field, ok := ps.Packages[path]; ok {
		return field
	}
	field := &packageSize{
		Program: ps,
		Sub:     map[string]*packageSize{},
	}
	ps.Packages[path] = field
	return field
}

// packageSize contains the size of a package, calculated from the linked object
// file.
type packageSize struct {
	Program *programSize
	Code    uint64
	ROData  uint64
	Data    uint64
	BSS     uint64
	Sub     map[string]*packageSize
}

// Flash usage in regular microcontrollers.
func (ps *packageSize) Flash() uint64 {
	return ps.Code + ps.ROData + ps.Data
}

// Static RAM usage in regular microcontrollers.
func (ps *packageSize) RAM() uint64 {
	return ps.Data + ps.BSS
}

// Flash usage in regular microcontrollers, as a percentage of the total flash
// usage of the program.
func (ps *packageSize) FlashPercent() float64 {
	return float64(ps.Flash()) / float64(ps.Program.Flash()) * 100
}

// Add a single size data point to this package.
// This must only be called while calculating package size, not afterwards.
func (ps *packageSize) addSize(getField func(*packageSize, bool) *uint64, filename string, size uint64, isVariable bool) {
	if size == 0 {
		return
	}

	// Add size for the package.
	*getField(ps, isVariable) += size

	// Add size for file inside package.
	sub, ok := ps.Sub[filename]
	if !ok {
		sub = &packageSize{Program: ps.Program}
		ps.Sub[filename] = sub
	}
	*getField(sub, isVariable) += size
}

// A mapping of a single chunk of code or data to a file path.
type addressLine struct {
	Address    uint64
	Length     uint64 // length of this chunk
	Align      uint64 // (maximum) alignment of this line
	File       string // file path as stored in DWARF
	IsVariable bool   // true if this is a variable (or constant), false if it is code
}

// Sections defined in the input file. This struct defines them in a
// filetype-agnostic way but roughly follow the ELF types (.text, .data, .bss,
// etc).
type memorySection struct {
	Type    memoryType
	Address uint64
	Size    uint64
	Align   uint64
}

type memoryType int

const (
	memoryCode memoryType = iota + 1
	memoryData
	memoryROData
	memoryBSS
	memoryStack
)

func (t memoryType) String() string {
	return [...]string{
		0:            "-",
		memoryCode:   "code",
		memoryData:   "data",
		memoryROData: "rodata",
		memoryBSS:    "bss",
		memoryStack:  "stack",
	}[t]
}

// Regular expressions to match particular symbol names. These are not stored as
// DWARF variables because they have no mapping to source code global variables.
var (
	// Various globals that aren't a variable but nonetheless need to be stored
	// somewhere:
	//   alloc:  heap allocations during init interpretation
	//   pack:   data created when storing a constant in an interface for example
	//   string: buffer behind strings
	packageSymbolRegexp = regexp.MustCompile(`\$(alloc|pack|string)(\.[0-9]+)?$`)
)

// readProgramSizeFromDWARF reads the source location for each line of code and
// each variable in the program, as far as this is stored in the DWARF debug
// information.
func readProgramSizeFromDWARF(data *dwarf.Data, codeOffset, codeAlignment uint64, skipTombstone bool) ([]addressLine, error) {
	r := data.Reader()
	var lines []*dwarf.LineFile
	var addresses []addressLine
	for {
		e, err := r.Next()
		if err != nil {
			return nil, err
		}
		if e == nil {
			break
		}
		switch e.Tag {
		case dwarf.TagCompileUnit:
			// Found a compile unit.
			// We can read the .debug_line section using it, which contains a
			// mapping for most instructions to their file/line/column - even
			// for inlined functions!
			lr, err := data.LineReader(e)
			if err != nil {
				return nil, err
			}
			lines = lr.Files()
			var lineEntry = dwarf.LineEntry{
				EndSequence: true,
			}

			// Line tables are organized as sequences of line entries until an
			// end sequence. A single line table can contain multiple such
			// sequences. The last line entry is an EndSequence to indicate the
			// end.
			for {
				// Read the next .debug_line entry.
				prevLineEntry := lineEntry
				err := lr.Next(&lineEntry)
				if err != nil {
					if err == io.EOF {
						break
					}
					return nil, err
				}

				if prevLineEntry.EndSequence && lineEntry.Address == 0 && skipTombstone {
					// Tombstone value. This symbol has been removed, for
					// example by the --gc-sections linker flag. It is still
					// here in the debug information because the linker can't
					// just remove this reference.
					// Read until the next EndSequence so that this sequence is
					// skipped.
					// For more details, see (among others):
					// https://reviews.llvm.org/D84825
					// The value 0 can however really occur in object files,
					// that typically start at address 0. So don't skip
					// tombstone values in object files (like when parsing MachO
					// files).
					for {
						err := lr.Next(&lineEntry)
						if err != nil {
							return nil, err
						}
						if lineEntry.EndSequence {
							break
						}
					}
				}

				if !prevLineEntry.EndSequence {
					// The chunk describes the code from prevLineEntry to
					// lineEntry.
					path := prevLineEntry.File.Name
					if runtime.GOOS == "windows" {
						// Work around a Clang bug on Windows:
						// https://github.com/llvm/llvm-project/issues/117317
						path = strings.ReplaceAll(path, "\\\\", "\\")

						// wasi-libc likes to use forward slashes, but we
						// canonicalize everything to use backwards slashes as
						// is common on Windows.
						path = strings.ReplaceAll(path, "/", "\\")
					}
					line := addressLine{
						Address: prevLineEntry.Address + codeOffset,
						Length:  lineEntry.Address - prevLineEntry.Address,
						Align:   codeAlignment,
						File:    path,
					}
					if line.Length != 0 {
						addresses = append(addresses, line)
					}
				}
			}
		case dwarf.TagVariable:
			// Global variable (or constant). Most of these are not actually
			// stored in the binary, because they have been optimized out. Only
			// the ones with a location are still present.
			r.SkipChildren()

			file := e.AttrField(dwarf.AttrDeclFile)
			location := e.AttrField(dwarf.AttrLocation)
			globalType := e.AttrField(dwarf.AttrType)
			if file == nil || location == nil || globalType == nil {
				// Doesn't contain the requested information.
				continue
			}

			// Try to parse the location. While this could in theory be a very
			// complex expression, usually it's just a DW_OP_addr opcode
			// followed by an address.
			addr, err := readDWARFConstant(r.AddressSize(), location.Val.([]uint8))
			if err != nil {
				continue // ignore the error, we don't know what to do with it
			}

			// Parse the type of the global variable, which (importantly)
			// contains the variable size. We're not interested in the type,
			// only in the size.
			typ, err := data.Type(globalType.Val.(dwarf.Offset))
			if err != nil {
				return nil, err
			}

			// Read alignment, if it's stored as part of the debug information.
			var alignment uint64
			if attr := e.AttrField(dwarf.AttrAlignment); attr != nil {
				alignment = uint64(attr.Val.(int64))
			}

			addresses = append(addresses, addressLine{
				Address:    addr,
				Length:     uint64(typ.Size()),
				Align:      alignment,
				File:       lines[file.Val.(int64)].Name,
				IsVariable: true,
			})
		default:
			r.SkipChildren()
		}
	}
	return addresses, nil
}

// Parse a DWARF constant. For addresses, this is usually a very simple
// expression.
func readDWARFConstant(addressSize int, bytecode []byte) (uint64, error) {
	var addr uint64
	for len(bytecode) != 0 {
		op := bytecode[0]
		bytecode = bytecode[1:]
		switch op {
		case 0x03: // DW_OP_addr
			switch addressSize {
			case 2:
				addr = uint64(binary.LittleEndian.Uint16(bytecode))
			case 4:
				addr = uint64(binary.LittleEndian.Uint32(bytecode))
			case 8:
				addr = binary.LittleEndian.Uint64(bytecode)
			default:
				panic("unexpected address size")
			}
			bytecode = bytecode[addressSize:]
		case 0x23: // DW_OP_plus_uconst
			offset, n := readULEB128(bytecode)
			addr += offset
			bytecode = bytecode[n:]
		default:
			return 0, fmt.Errorf("unknown DWARF opcode: 0x%x", op)
		}
	}
	return addr, nil
}

// Source: https://en.wikipedia.org/wiki/LEB128#Decode_unsigned_integer
func readULEB128(buf []byte) (result uint64, n int) {
	var shift uint8
	for {
		b := buf[n]
		n++
		result |= uint64(b&0x7f) << shift
		if b&0x80 == 0 {
			break
		}
		shift += 7
	}
	return
}

// Read a MachO object file and return a line table.
// Also return an index from symbol name to start address in the line table.
func readMachOSymbolAddresses(path string) (map[string]int, []addressLine, error) {
	// Some constants from mach-o/nlist.h
	// See: https://opensource.apple.com/source/xnu/xnu-7195.141.2/EXTERNAL_HEADERS/mach-o/nlist.h.auto.html
	const (
		N_STAB = 0xe0
		N_TYPE = 0x0e // bitmask for N_TYPE field
		N_SECT = 0xe  // one of the possible type in the N_TYPE field
	)

	// Read DWARF from the given object file.
	file, err := macho.Open(path)
	if err != nil {
		return nil, nil, err
	}
	defer file.Close()
	dwarf, err := file.DWARF()
	if err != nil {
		return nil, nil, err
	}
	lines, err := readProgramSizeFromDWARF(dwarf, 0, 0, false)
	if err != nil {
		return nil, nil, err
	}

	// Make a map from start addresses to indices in the line table (because the
	// line table is a slice, not a map).
	addressToLine := make(map[uint64]int, len(lines))
	for i, line := range lines {
		if _, ok := addressToLine[line.Address]; ok {
			addressToLine[line.Address] = -1
			continue
		}
		addressToLine[line.Address] = i
	}

	// Make a map that for each symbol gives the start index in the line table.
	addresses := make(map[string]int, len(addressToLine))
	for _, symbol := range file.Symtab.Syms {
		if symbol.Type&N_STAB != 0 {
			continue // STABS entry, ignore
		}
		if symbol.Type&0x0e != N_SECT {
			continue // undefined symbol
		}
		if index, ok := addressToLine[symbol.Value]; ok && index >= 0 {
			if _, ok := addresses[symbol.Name]; ok {
				// There is a duplicate. Mark it as unavailable.
				addresses[symbol.Name] = -1
				continue
			}
			addresses[symbol.Name] = index
		}
	}

	return addresses, lines, nil
}

// loadProgramSize calculate a program/data size breakdown of each package for a
// given ELF file.
// If the file doesn't contain DWARF debug information, the returned program
// size will still have valid summaries but won't have complete size information
// per package.
func loadProgramSize(path string, packagePathMap map[string]string) (*programSize, error) {
	// Open the binary file.
	f, err := os.Open(path)
	if err != nil {
		return nil, err
	}
	defer f.Close()

	// This stores all chunks of addresses found in the binary.
	var addresses []addressLine

	// Load the binary file, which could be in a number of file formats.
	var sections []memorySection
	if file, err := elf.NewFile(f); err == nil {
		var codeAlignment uint64
		switch file.Machine {
		case elf.EM_ARM:
			codeAlignment = 4 // usually 2, but can be 4
		}
		// Read DWARF information. The error is intentionally ignored.
		data, _ := file.DWARF()
		if data != nil {
			addresses, err = readProgramSizeFromDWARF(data, 0, codeAlignment, true)
			if err != nil {
				// However, _do_ report an error here. Something must have gone
				// wrong while trying to parse DWARF data.
				return nil, err
			}
		}

		// Read the ELF symbols for some more chunks of location information.
		// Some globals (such as strings) aren't stored in the DWARF debug
		// information and therefore need to be obtained in a different way.
		allSymbols, err := file.Symbols()
		if err != nil {
			return nil, err
		}
		for _, symbol := range allSymbols {
			symType := elf.ST_TYPE(symbol.Info)
			if symbol.Size == 0 {
				continue
			}
			if symType != elf.STT_FUNC && symType != elf.STT_OBJECT && symType != elf.STT_NOTYPE {
				continue
			}
			if symbol.Section >= elf.SHN_LORESERVE {
				// Not a regular section, so skip it.
				// One example is elf.SHN_ABS, which is used for symbols
				// declared with an absolute value such as the memset function
				// on the ESP32 which is defined in the mask ROM.
				continue
			}
			section := file.Sections[symbol.Section]
			if section.Flags&elf.SHF_ALLOC == 0 {
				continue
			}
			if packageSymbolRegexp.MatchString(symbol.Name) || symbol.Name == "__isr_vector" {
				addresses = append(addresses, addressLine{
					Address:    symbol.Value,
					Length:     symbol.Size,
					File:       symbol.Name,
					IsVariable: true,
				})
			}
		}

		// Load allocated sections.
		for _, section := range file.Sections {
			if section.Flags&elf.SHF_ALLOC == 0 {
				continue
			}
			if section.Type == elf.SHT_NOBITS {
				if section.Name == ".stack" {
					// TinyGo emits stack sections on microcontroller using the
					// ".stack" name.
					// This is a bit ugly, but I don't think there is a way to
					// mark the stack section in a linker script.
					sections = append(sections, memorySection{
						Address: section.Addr,
						Size:    section.Size,
						Align:   section.Addralign,
						Type:    memoryStack,
					})
				} else {
					// Regular .bss section.
					sections = append(sections, memorySection{
						Address: section.Addr,
						Size:    section.Size,
						Align:   section.Addralign,
						Type:    memoryBSS,
					})
				}
			} else if section.Type == elf.SHT_PROGBITS && section.Flags&elf.SHF_EXECINSTR != 0 {
				// .text
				sections = append(sections, memorySection{
					Address: section.Addr,
					Size:    section.Size,
					Align:   section.Addralign,
					Type:    memoryCode,
				})
			} else if section.Type == elf.SHT_PROGBITS && section.Flags&elf.SHF_WRITE != 0 {
				// .data
				sections = append(sections, memorySection{
					Address: section.Addr,
					Size:    section.Size,
					Align:   section.Addralign,
					Type:    memoryData,
				})
			} else if section.Type == elf.SHT_PROGBITS {
				// .rodata
				sections = append(sections, memorySection{
					Address: section.Addr,
					Size:    section.Size,
					Align:   section.Addralign,
					Type:    memoryROData,
				})
			}
		}
	} else if file, err := macho.NewFile(f); err == nil {
		// Read segments, for use while reading through sections.
		segments := map[string]*macho.Segment{}
		for _, load := range file.Loads {
			switch load := load.(type) {
			case *macho.Segment:
				segments[load.Name] = load
			}
		}

		// Read MachO sections.
		for _, section := range file.Sections {
			sectionType := section.Flags & 0xff
			sectionFlags := section.Flags >> 8
			segment := segments[section.Seg]
			// For the constants used here, see:
			// https://github.com/llvm/llvm-project/blob/release/14.x/llvm/include/llvm/BinaryFormat/MachO.h
			if sectionFlags&0x800000 != 0 { // S_ATTR_PURE_INSTRUCTIONS
				// Section containing only instructions.
				sections = append(sections, memorySection{
					Address: section.Addr,
					Size:    uint64(section.Size),
					Align:   uint64(section.Align),
					Type:    memoryCode,
				})
			} else if sectionType == 1 { // S_ZEROFILL
				// Section filled with zeroes on demand.
				sections = append(sections, memorySection{
					Address: section.Addr,
					Size:    uint64(section.Size),
					Align:   uint64(section.Align),
					Type:    memoryBSS,
				})
			} else if segment.Maxprot&0b011 == 0b001 { // --r (read-only data)
				// Protection doesn't allow writes, so mark this section read-only.
				sections = append(sections, memorySection{
					Address: section.Addr,
					Size:    uint64(section.Size),
					Align:   uint64(section.Align),
					Type:    memoryROData,
				})
			} else {
				// The rest is assumed to be regular data.
				sections = append(sections, memorySection{
					Address: section.Addr,
					Size:    uint64(section.Size),
					Align:   uint64(section.Align),
					Type:    memoryData,
				})
			}
		}

		// Read DWARF information.
		// The data isn't stored directly in the binary as in most executable
		// formats. Instead, it is left in the object files that were used as a
		// basis for linking. The executable does however contain STABS debug
		// information that points to the source object file and is used by
		// debuggers.
		// For more information:
		// http://wiki.dwarfstd.org/index.php?title=Apple%27s_%22Lazy%22_DWARF_Scheme
		var objSymbolNames map[string]int
		var objAddresses []addressLine
		var previousSymbol macho.Symbol
		for _, symbol := range file.Symtab.Syms {
			// STABS constants, from mach-o/stab.h:
			// https://opensource.apple.com/source/xnu/xnu-7195.141.2/EXTERNAL_HEADERS/mach-o/stab.h.auto.html
			const (
				N_GSYM  = 0x20
				N_FUN   = 0x24
				N_STSYM = 0x26
				N_SO    = 0x64
				N_OSO   = 0x66
			)
			if symbol.Type == N_OSO {
				// Found an object file. Now try to parse it.
				objSymbolNames, objAddresses, err = readMachOSymbolAddresses(symbol.Name)
				if err != nil && sizesDebug {
					// Errors are normally ignored. If there is an error, it's
					// simply treated as that the DWARF is not available.
					fmt.Fprintf(os.Stderr, "could not read DWARF from file %s: %s\n", symbol.Name, err)
				}
			} else if symbol.Type == N_FUN {
				// Found a function.
				// The way this is encoded is a bit weird. MachO symbols don't
				// have a length. What I've found is that the length is encoded
				// by first having a N_FUN symbol as usual, and then having a
				// symbol with a zero-length name that has the value not set to
				// the address of the symbol but to the length. So in order to
				// get both the address and the length, we look for a symbol
				// with a name followed by a symbol without a name.
				if symbol.Name == "" && previousSymbol.Type == N_FUN && previousSymbol.Name != "" {
					// Functions are encoded as many small chunks in the line
					// table (one or a few instructions per source line). But
					// the symbol length covers the whole symbols, over many
					// lines and possibly including inlined functions. So we
					// continue to iterate through the objAddresses slice until
					// we've found all the source lines that are part of this
					// symbol.
					address := previousSymbol.Value
					length := symbol.Value
					if index, ok := objSymbolNames[previousSymbol.Name]; ok && index >= 0 {
						for length > 0 {
							line := objAddresses[index]
							line.Address = address
							if line.Length > length {
								// Line extends beyond the end of te symbol?
								// Weird, shouldn't happen.
								break
							}
							addresses = append(addresses, line)
							index++
							length -= line.Length
							address += line.Length
						}
					}
				}
			} else if symbol.Type == N_GSYM || symbol.Type == N_STSYM {
				// Global variables.
				if index, ok := objSymbolNames[symbol.Name]; ok {
					address := objAddresses[index]
					address.Address = symbol.Value
					addresses = append(addresses, address)
				}
			}
			previousSymbol = symbol
		}
	} else if file, err := pe.NewFile(f); err == nil {
		// Read DWARF information. The error is intentionally ignored.
		data, _ := file.DWARF()
		if data != nil {
			addresses, err = readProgramSizeFromDWARF(data, 0, 0, true)
			if err != nil {
				// However, _do_ report an error here. Something must have gone
				// wrong while trying to parse DWARF data.
				return nil, err
			}
		}

		// Read COFF sections.
		optionalHeader := file.OptionalHeader.(*pe.OptionalHeader64)
		for _, section := range file.Sections {
			// For more information:
			// https://docs.microsoft.com/en-us/windows/win32/api/winnt/ns-winnt-image_section_header
			const (
				IMAGE_SCN_CNT_CODE             = 0x00000020
				IMAGE_SCN_CNT_INITIALIZED_DATA = 0x00000040
				IMAGE_SCN_MEM_DISCARDABLE      = 0x02000000
				IMAGE_SCN_MEM_READ             = 0x40000000
				IMAGE_SCN_MEM_WRITE            = 0x80000000
			)
			if section.Characteristics&IMAGE_SCN_MEM_DISCARDABLE != 0 {
				// Debug sections, etc.
				continue
			}
			address := uint64(section.VirtualAddress) + optionalHeader.ImageBase
			if section.Characteristics&IMAGE_SCN_CNT_CODE != 0 {
				// .text
				sections = append(sections, memorySection{
					Address: address,
					Size:    uint64(section.VirtualSize),
					Type:    memoryCode,
				})
			} else if section.Characteristics&IMAGE_SCN_CNT_INITIALIZED_DATA != 0 {
				if section.Characteristics&IMAGE_SCN_MEM_WRITE != 0 {
					// .data
					sections = append(sections, memorySection{
						Address: address,
						Size:    uint64(section.Size),
						Type:    memoryData,
					})
					if section.Size < section.VirtualSize {
						// Equivalent of a .bss section.
						// Note: because of how the PE/COFF format is
						// structured, not all zero-initialized data is marked
						// as such. A portion may be at the end of the .data
						// section and is thus marked as initialized data.
						sections = append(sections, memorySection{
							Address: address + uint64(section.Size),
							Size:    uint64(section.VirtualSize) - uint64(section.Size),
							Type:    memoryBSS,
						})
					}
				} else if section.Characteristics&IMAGE_SCN_MEM_READ != 0 {
					// .rdata, .buildid, .pdata
					sections = append(sections, memorySection{
						Address: address,
						Size:    uint64(section.VirtualSize),
						Type:    memoryROData,
					})
				}
			}
		}
	} else if file, err := wasm.Parse(f); err == nil {
		// File is in WebAssembly format.

		// Put code at a very high address, so that it won't conflict with the
		// data in the memory section.
		const codeOffset = 0x8000_0000_0000_0000

		// Read DWARF information. The error is intentionally ignored.
		data, _ := file.DWARF()
		if data != nil {
			addresses, err = readProgramSizeFromDWARF(data, codeOffset, 0, true)
			if err != nil {
				// However, _do_ report an error here. Something must have gone
				// wrong while trying to parse DWARF data.
				return nil, err
			}
		}

		var linearMemorySize uint64
		for _, section := range file.Sections {
			switch section := section.(type) {
			case *wasm.SectionCode:
				sections = append(sections, memorySection{
					Address: codeOffset,
					Size:    uint64(section.Size()),
					Type:    memoryCode,
				})
			case *wasm.SectionMemory:
				// This value is used when processing *wasm.SectionData (which
				// always comes after *wasm.SectionMemory).
				linearMemorySize = uint64(section.Entries[0].Limits.Initial) * 64 * 1024
			case *wasm.SectionData:
				// Data sections contain initial values for linear memory.
				// First load the list of data sections, and sort them by
				// address for easier processing.
				var dataSections []memorySection
				for _, entry := range section.Entries {
					address, err := wasm.Eval(bytes.NewBuffer(entry.Offset))
					if err != nil {
						return nil, fmt.Errorf("could not parse data section address: %w", err)
					}
					dataSections = append(dataSections, memorySection{
						Address: uint64(address[0].(int32)),
						Size:    uint64(len(entry.Data)),
						Type:    memoryData,
					})
				}
				sort.Slice(dataSections, func(i, j int) bool {
					return dataSections[i].Address < dataSections[j].Address
				})

				// And now add all data sections for linear memory.
				// Parts that are in the slice of data sections are added as
				// memoryData, and parts that are not are added as memoryBSS.
				addr := uint64(0)
				for _, section := range dataSections {
					if addr < section.Address {
						sections = append(sections, memorySection{
							Address: addr,
							Size:    section.Address - addr,
							Type:    memoryBSS,
						})
					}
					if addr > section.Address {
						// This might be allowed, I'm not sure.
						// It certainly doesn't make a lot of sense.
						return nil, fmt.Errorf("overlapping data section")
					}
					// addr == section.Address
					sections = append(sections, section)
					addr = section.Address + section.Size
				}
				if addr < linearMemorySize {
					sections = append(sections, memorySection{
						Address: addr,
						Size:    linearMemorySize - addr,
						Type:    memoryBSS,
					})
				}
			}
		}
	} else {
		return nil, fmt.Errorf("could not parse file: %w", err)
	}

	// Sort the slice of address chunks by address, so that we can iterate
	// through it to calculate section sizes.
	sort.Slice(addresses, func(i, j int) bool {
		if addresses[i].Address == addresses[j].Address {
			// Very rarely, there might be duplicate addresses.
			// If that happens, sort the largest chunks first.
			return addresses[i].Length > addresses[j].Length
		}
		return addresses[i].Address < addresses[j].Address
	})

	// Now finally determine the binary/RAM size usage per package by going
	// through each allocated section.
	sizes := make(map[string]*packageSize)
	program := &programSize{
		Packages: sizes,
	}
	for _, section := range sections {
		switch section.Type {
		case memoryCode:
			readSection(section, addresses, program, func(ps *packageSize, isVariable bool) *uint64 {
				if isVariable {
					return &ps.ROData
				}
				return &ps.Code
			}, packagePathMap)
		case memoryROData:
			readSection(section, addresses, program, func(ps *packageSize, isVariable bool) *uint64 {
				return &ps.ROData
			}, packagePathMap)
		case memoryData:
			readSection(section, addresses, program, func(ps *packageSize, isVariable bool) *uint64 {
				return &ps.Data
			}, packagePathMap)
		case memoryBSS:
			readSection(section, addresses, program, func(ps *packageSize, isVariable bool) *uint64 {
				return &ps.BSS
			}, packagePathMap)
		case memoryStack:
			// We store the C stack as a pseudo-package.
			program.getPackage("C stack").addSize(func(ps *packageSize, isVariable bool) *uint64 {
				return &ps.BSS
			}, "", section.Size, false)
		}
	}

	// ...and summarize the results.
	for _, pkg := range sizes {
		program.Code += pkg.Code
		program.ROData += pkg.ROData
		program.Data += pkg.Data
		program.BSS += pkg.BSS
	}
	return program, nil
}

// readSection determines for each byte in this section to which package it
// belongs.
func readSection(section memorySection, addresses []addressLine, program *programSize, getField func(*packageSize, bool) *uint64, packagePathMap map[string]string) {
	// The addr variable tracks at which address we are while going through this
	// section. We start at the beginning.
	addr := section.Address
	sectionEnd := section.Address + section.Size
	if sizesDebug {
		fmt.Printf("%08x..%08x %5d: %s\n", addr, sectionEnd, section.Size, section.Type)
	}
	for _, line := range addresses {
		if line.Address < section.Address || line.Address+line.Length > sectionEnd {
			// Check that this line is entirely within the section.
			// Don't bother dealing with line entries that cross sections (that
			// seems rather unlikely anyway).
			continue
		}
		if addr < line.Address {
			// There is a gap: there is a space between the current and the
			// previous line entry.
			// Check whether this is caused by alignment requirements.
			addrAligned := (addr + line.Align - 1) &^ (line.Align - 1)
			if line.Align > 1 && addrAligned >= line.Address {
				// It is, assume that's what causes the gap.
				program.getPackage("(padding)").addSize(getField, "", line.Address-addr, true)
			} else {
				program.getPackage("(unknown)").addSize(getField, "", line.Address-addr, false)
				if sizesDebug {
					fmt.Printf("%08x..%08x %5d:  unknown (gap), alignment=%d\n", addr, line.Address, line.Address-addr, line.Align)
				}
			}
			addr = line.Address
		}
		if addr > line.Address+line.Length {
			// The current line is already covered by a previous line entry.
			// Simply skip it.
			continue
		}
		// At this point, addr falls within the current line (probably at the
		// start).
		length := line.Length
		if addr > line.Address {
			// There is some overlap: the previous line entry already covered
			// part of this line entry. So reduce the length to add to the
			// remaining bit of the line entry.
			length = line.Length - (addr - line.Address)
		}
		// Finally, mark this chunk of memory as used by the given package.
		packagePath, filename := findPackagePath(line.File, packagePathMap)
		program.getPackage(packagePath).addSize(getField, filename, length, line.IsVariable)
		addr = line.Address + line.Length
	}
	if addr < sectionEnd {
		// There is a gap at the end of the section.
		addrAligned := (addr + section.Align - 1) &^ (section.Align - 1)
		if section.Align > 1 && addrAligned >= sectionEnd {
			// The gap is caused by the section alignment.
			// For example, if a .rodata section ends with a non-aligned string.
			program.getPackage("(padding)").addSize(getField, "", sectionEnd-addr, true)
		} else {
			program.getPackage("(unknown)").addSize(getField, "", sectionEnd-addr, false)
			if sizesDebug {
				fmt.Printf("%08x..%08x %5d:  unknown (end), alignment=%d\n", addr, sectionEnd, sectionEnd-addr, section.Align)
			}
		}
	}
}

// findPackagePath returns the Go package (or a pseudo package) for the given
// path. It uses some heuristics, for example for some C libraries.
func findPackagePath(path string, packagePathMap map[string]string) (packagePath, filename string) {
	// Check whether this path is part of one of the compiled packages.
	packagePath, ok := packagePathMap[filepath.Dir(path)]
	if ok {
		// Directory is known as a Go package.
		// Add the file itself as well.
		filename = filepath.Base(path)
	} else {
		if strings.HasPrefix(path, filepath.Join(goenv.Get("TINYGOROOT"), "lib")) {
			// Emit C libraries (in the lib subdirectory of TinyGo) as a single
			// package, with a "C" prefix. For example: "C picolibc" for the
			// baremetal libc.
			libPath := strings.TrimPrefix(path, filepath.Join(goenv.Get("TINYGOROOT"), "lib")+string(os.PathSeparator))
			parts := strings.SplitN(libPath, string(os.PathSeparator), 2)
			packagePath = "C " + parts[0]
			filename = parts[1]
		} else if prefix := filepath.Join(goenv.Get("TINYGOROOT"), "llvm-project", "compiler-rt"); strings.HasPrefix(path, prefix) {
			packagePath = "C compiler-rt"
			filename = strings.TrimPrefix(path, prefix+string(os.PathSeparator))
		} else if packageSymbolRegexp.MatchString(path) {
			// Parse symbol names like main$alloc or runtime$string.
			packagePath = path[:strings.LastIndex(path, "$")]
		} else if path == "__isr_vector" {
			packagePath = "C interrupt vector"
		} else if path == "<Go type>" {
			packagePath = "Go types"
		} else if path == "<Go interface assert>" {
			// Interface type assert, generated by the interface lowering pass.
			packagePath = "Go interface assert"
		} else if path == "<Go interface method>" {
			// Interface method wrapper (switch over all concrete types),
			// generated by the interface lowering pass.
			packagePath = "Go interface method"
		} else if path == "<stdin>" {
			// This can happen when the source code (in Go) doesn't have a
			// source file and uses "-" as the location. Somewhere this is
			// converted to "<stdin>".
			// Convert this back to the "-" string. Eventually, this should be
			// fixed in the compiler.
			packagePath = "-"
		} else {
			// This is some other path. Not sure what it is, so just emit its
			// directory as a fallback.
			packagePath = filepath.Dir(path)
			filename = filepath.Base(path)
		}
	}
	return
}