runtime: optimize findHead

This is similar to https://github.com/tinygo-org/tinygo/pull/3899, but smaller and hopefully just as efficient. Thanks to @HattoriHanzo031 for starting this work, benchmarking, and for improving the performance of the code even further.
author: Ayke van Laethem <[email protected]> 2024-10-31 12:03:49 +0100
committer: Ayke <[email protected]> 2024-11-14 09:28:25 +0100
commit: 860697257b8deb3bd2e7c8d9ee72420448e8861e (patch)
tree: cc0bd77aed1a7bd5c66212496039ec5c734b6d22
parent: 91563cff1f91efd58671f7b8b4d9880424071c4a (diff)
download: tinygo-860697257b8deb3bd2e7c8d9ee72420448e8861e.tar.gz
tinygo-860697257b8deb3bd2e7c8d9ee72420448e8861e.zip
2 files changed, 40 insertions, 6 deletions
diff --git a/builder/sizes_test.go b/builder/sizes_test.go
index dd89609e7..650a5fbb5 100644
--- a/builder/sizes_test.go
+++ b/builder/sizes_test.go
@@ -41,9 +41,9 @@ func TestBinarySize(t *testing.T) {
 	// This is a small number of very diverse targets that we want to test.
 	tests := []sizeTest{
 		// microcontrollers
-		{"hifive1b", "examples/echo", 4580, 280, 0, 2268},
-		{"microbit", "examples/serial", 2888, 388, 8, 2272},
-		{"wioterminal", "examples/pininterrupt", 6124, 1484, 116, 6832},
+		{"hifive1b", "examples/echo", 4600, 280, 0, 2268},
+		{"microbit", "examples/serial", 2908, 388, 8, 2272},
+		{"wioterminal", "examples/pininterrupt", 6140, 1484, 116, 6832},
 
 		// TODO: also check wasm. Right now this is difficult, because
 		// wasm binaries are run through wasm-opt and therefore the
diff --git a/src/runtime/gc_blocks.go b/src/runtime/gc_blocks.go
index 3c3862dbb..b2d2fed7f 100644
--- a/src/runtime/gc_blocks.go
+++ b/src/runtime/gc_blocks.go
@@ -76,6 +76,13 @@ const (
 	blockStateMask blockState = 3 // 11
 )
 
+// The byte value of a block where every block is a 'tail' block.
+const blockStateByteAllTails = 0 |
+	uint8(blockStateTail<<(stateBits*3)) |
+	uint8(blockStateTail<<(stateBits*2)) |
+	uint8(blockStateTail<<(stateBits*1)) |
+	uint8(blockStateTail<<(stateBits*0))
+
 // String returns a human-readable version of the block state, for debugging.
 func (s blockState) String() string {
 	switch s {
@@ -123,7 +130,25 @@ func (b gcBlock) address() uintptr {
 // points to an allocated object. It returns the same block if this block
 // already points to the head.
 func (b gcBlock) findHead() gcBlock {
-	for b.state() == blockStateTail {
+	for {
+		// Optimization: check whether the current block state byte (which
+		// contains the state of multiple blocks) is composed entirely of tail
+		// blocks. If so, we can skip back to the last block in the previous
+		// state byte.
+		// This optimization speeds up findHead for pointers that point into a
+		// large allocation.
+		stateByte := b.stateByte()
+		if stateByte == blockStateByteAllTails {
+			b -= (b % blocksPerStateByte) + 1
+			continue
+		}
+
+		// Check whether we've found a non-tail block, which means we found the
+		// head.
+		state := b.stateFromByte(stateByte)
+		if state != blockStateTail {
+			break
+		}
 		b--
 	}
 	if gcAsserts {
@@ -146,10 +171,19 @@ func (b gcBlock) findNext() gcBlock {
 	return b
 }
 
+func (b gcBlock) stateByte() byte {
+	return *(*uint8)(unsafe.Add(metadataStart, b/blocksPerStateByte))
+}
+
+// Return the block state given a state byte. The state byte must have been
+// obtained using b.stateByte(), otherwise the result is incorrect.
+func (b gcBlock) stateFromByte(stateByte byte) blockState {
+	return blockState(stateByte>>((b%blocksPerStateByte)*stateBits)) & blockStateMask
+}
+
 // State returns the current block state.
 func (b gcBlock) state() blockState {
-	stateBytePtr := (*uint8)(unsafe.Add(metadataStart, b/blocksPerStateByte))
-	return blockState(*stateBytePtr>>((b%blocksPerStateByte)*stateBits)) & blockStateMask
+	return b.stateFromByte(b.stateByte())
 }
 
 // setState sets the current block to the given state, which must contain more
author	Ayke van Laethem <[email protected]>	2024-10-31 12:03:49 +0100
committer	Ayke <[email protected]>	2024-11-14 09:28:25 +0100
commit	860697257b8deb3bd2e7c8d9ee72420448e8861e (patch)
tree	cc0bd77aed1a7bd5c66212496039ec5c734b6d22
parent	91563cff1f91efd58671f7b8b4d9880424071c4a (diff)
download	tinygo-860697257b8deb3bd2e7c8d9ee72420448e8861e.tar.gz tinygo-860697257b8deb3bd2e7c8d9ee72420448e8861e.zip