aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorAyke van Laethem <[email protected]>2024-10-31 12:03:49 +0100
committerAyke <[email protected]>2024-11-14 09:28:25 +0100
commit860697257b8deb3bd2e7c8d9ee72420448e8861e (patch)
treecc0bd77aed1a7bd5c66212496039ec5c734b6d22
parent91563cff1f91efd58671f7b8b4d9880424071c4a (diff)
downloadtinygo-860697257b8deb3bd2e7c8d9ee72420448e8861e.tar.gz
tinygo-860697257b8deb3bd2e7c8d9ee72420448e8861e.zip
runtime: optimize findHead
This is similar to https://github.com/tinygo-org/tinygo/pull/3899, but smaller and hopefully just as efficient. Thanks to @HattoriHanzo031 for starting this work, benchmarking, and for improving the performance of the code even further.
-rw-r--r--builder/sizes_test.go6
-rw-r--r--src/runtime/gc_blocks.go40
2 files changed, 40 insertions, 6 deletions
diff --git a/builder/sizes_test.go b/builder/sizes_test.go
index dd89609e7..650a5fbb5 100644
--- a/builder/sizes_test.go
+++ b/builder/sizes_test.go
@@ -41,9 +41,9 @@ func TestBinarySize(t *testing.T) {
// This is a small number of very diverse targets that we want to test.
tests := []sizeTest{
// microcontrollers
- {"hifive1b", "examples/echo", 4580, 280, 0, 2268},
- {"microbit", "examples/serial", 2888, 388, 8, 2272},
- {"wioterminal", "examples/pininterrupt", 6124, 1484, 116, 6832},
+ {"hifive1b", "examples/echo", 4600, 280, 0, 2268},
+ {"microbit", "examples/serial", 2908, 388, 8, 2272},
+ {"wioterminal", "examples/pininterrupt", 6140, 1484, 116, 6832},
// TODO: also check wasm. Right now this is difficult, because
// wasm binaries are run through wasm-opt and therefore the
diff --git a/src/runtime/gc_blocks.go b/src/runtime/gc_blocks.go
index 3c3862dbb..b2d2fed7f 100644
--- a/src/runtime/gc_blocks.go
+++ b/src/runtime/gc_blocks.go
@@ -76,6 +76,13 @@ const (
blockStateMask blockState = 3 // 11
)
+// The byte value of a block where every block is a 'tail' block.
+const blockStateByteAllTails = 0 |
+ uint8(blockStateTail<<(stateBits*3)) |
+ uint8(blockStateTail<<(stateBits*2)) |
+ uint8(blockStateTail<<(stateBits*1)) |
+ uint8(blockStateTail<<(stateBits*0))
+
// String returns a human-readable version of the block state, for debugging.
func (s blockState) String() string {
switch s {
@@ -123,7 +130,25 @@ func (b gcBlock) address() uintptr {
// points to an allocated object. It returns the same block if this block
// already points to the head.
func (b gcBlock) findHead() gcBlock {
- for b.state() == blockStateTail {
+ for {
+ // Optimization: check whether the current block state byte (which
+ // contains the state of multiple blocks) is composed entirely of tail
+ // blocks. If so, we can skip back to the last block in the previous
+ // state byte.
+ // This optimization speeds up findHead for pointers that point into a
+ // large allocation.
+ stateByte := b.stateByte()
+ if stateByte == blockStateByteAllTails {
+ b -= (b % blocksPerStateByte) + 1
+ continue
+ }
+
+ // Check whether we've found a non-tail block, which means we found the
+ // head.
+ state := b.stateFromByte(stateByte)
+ if state != blockStateTail {
+ break
+ }
b--
}
if gcAsserts {
@@ -146,10 +171,19 @@ func (b gcBlock) findNext() gcBlock {
return b
}
+func (b gcBlock) stateByte() byte {
+ return *(*uint8)(unsafe.Add(metadataStart, b/blocksPerStateByte))
+}
+
+// Return the block state given a state byte. The state byte must have been
+// obtained using b.stateByte(), otherwise the result is incorrect.
+func (b gcBlock) stateFromByte(stateByte byte) blockState {
+ return blockState(stateByte>>((b%blocksPerStateByte)*stateBits)) & blockStateMask
+}
+
// State returns the current block state.
func (b gcBlock) state() blockState {
- stateBytePtr := (*uint8)(unsafe.Add(metadataStart, b/blocksPerStateByte))
- return blockState(*stateBytePtr>>((b%blocksPerStateByte)*stateBits)) & blockStateMask
+ return b.stateFromByte(b.stateByte())
}
// setState sets the current block to the given state, which must contain more