diff options
author | Ayke van Laethem <[email protected]> | 2024-10-31 12:03:49 +0100 |
---|---|---|
committer | Ayke <[email protected]> | 2024-11-14 09:28:25 +0100 |
commit | 860697257b8deb3bd2e7c8d9ee72420448e8861e (patch) | |
tree | cc0bd77aed1a7bd5c66212496039ec5c734b6d22 | |
parent | 91563cff1f91efd58671f7b8b4d9880424071c4a (diff) | |
download | tinygo-860697257b8deb3bd2e7c8d9ee72420448e8861e.tar.gz tinygo-860697257b8deb3bd2e7c8d9ee72420448e8861e.zip |
runtime: optimize findHead
This is similar to https://github.com/tinygo-org/tinygo/pull/3899, but
smaller and hopefully just as efficient.
Thanks to @HattoriHanzo031 for starting this work, benchmarking, and for
improving the performance of the code even further.
-rw-r--r-- | builder/sizes_test.go | 6 | ||||
-rw-r--r-- | src/runtime/gc_blocks.go | 40 |
2 files changed, 40 insertions, 6 deletions
diff --git a/builder/sizes_test.go b/builder/sizes_test.go index dd89609e7..650a5fbb5 100644 --- a/builder/sizes_test.go +++ b/builder/sizes_test.go @@ -41,9 +41,9 @@ func TestBinarySize(t *testing.T) { // This is a small number of very diverse targets that we want to test. tests := []sizeTest{ // microcontrollers - {"hifive1b", "examples/echo", 4580, 280, 0, 2268}, - {"microbit", "examples/serial", 2888, 388, 8, 2272}, - {"wioterminal", "examples/pininterrupt", 6124, 1484, 116, 6832}, + {"hifive1b", "examples/echo", 4600, 280, 0, 2268}, + {"microbit", "examples/serial", 2908, 388, 8, 2272}, + {"wioterminal", "examples/pininterrupt", 6140, 1484, 116, 6832}, // TODO: also check wasm. Right now this is difficult, because // wasm binaries are run through wasm-opt and therefore the diff --git a/src/runtime/gc_blocks.go b/src/runtime/gc_blocks.go index 3c3862dbb..b2d2fed7f 100644 --- a/src/runtime/gc_blocks.go +++ b/src/runtime/gc_blocks.go @@ -76,6 +76,13 @@ const ( blockStateMask blockState = 3 // 11 ) +// The byte value of a block where every block is a 'tail' block. +const blockStateByteAllTails = 0 | + uint8(blockStateTail<<(stateBits*3)) | + uint8(blockStateTail<<(stateBits*2)) | + uint8(blockStateTail<<(stateBits*1)) | + uint8(blockStateTail<<(stateBits*0)) + // String returns a human-readable version of the block state, for debugging. func (s blockState) String() string { switch s { @@ -123,7 +130,25 @@ func (b gcBlock) address() uintptr { // points to an allocated object. It returns the same block if this block // already points to the head. func (b gcBlock) findHead() gcBlock { - for b.state() == blockStateTail { + for { + // Optimization: check whether the current block state byte (which + // contains the state of multiple blocks) is composed entirely of tail + // blocks. If so, we can skip back to the last block in the previous + // state byte. + // This optimization speeds up findHead for pointers that point into a + // large allocation. + stateByte := b.stateByte() + if stateByte == blockStateByteAllTails { + b -= (b % blocksPerStateByte) + 1 + continue + } + + // Check whether we've found a non-tail block, which means we found the + // head. + state := b.stateFromByte(stateByte) + if state != blockStateTail { + break + } b-- } if gcAsserts { @@ -146,10 +171,19 @@ func (b gcBlock) findNext() gcBlock { return b } +func (b gcBlock) stateByte() byte { + return *(*uint8)(unsafe.Add(metadataStart, b/blocksPerStateByte)) +} + +// Return the block state given a state byte. The state byte must have been +// obtained using b.stateByte(), otherwise the result is incorrect. +func (b gcBlock) stateFromByte(stateByte byte) blockState { + return blockState(stateByte>>((b%blocksPerStateByte)*stateBits)) & blockStateMask +} + // State returns the current block state. func (b gcBlock) state() blockState { - stateBytePtr := (*uint8)(unsafe.Add(metadataStart, b/blocksPerStateByte)) - return blockState(*stateBytePtr>>((b%blocksPerStateByte)*stateBits)) & blockStateMask + return b.stateFromByte(b.stateByte()) } // setState sets the current block to the given state, which must contain more |