diff options
author | Nia Waldvogel <[email protected]> | 2022-01-20 11:03:54 -0500 |
---|---|---|
committer | Ron Evans <[email protected]> | 2022-06-02 18:13:56 +0200 |
commit | f2e576decf923f21605f5c499067a89d848ecb45 (patch) | |
tree | e29e89b3fbf2c0e9e7293274495443e9f2546847 /interp | |
parent | 2d61972475fff9f6d70bf0f5853bb4fee8374928 (diff) | |
download | tinygo-f2e576decf923f21605f5c499067a89d848ecb45.tar.gz tinygo-f2e576decf923f21605f5c499067a89d848ecb45.zip |
interp: do not unroll loops
This change triggers a revert whenever a basic block runs instructions at runtime twice.
As a result, a loop body with runtime-only instructions will no longer be unrolled.
This should help some extreme cases where loops can be expanded into hundreds or thousands of instructions.
Diffstat (limited to 'interp')
-rw-r--r-- | interp/errors.go | 5 | ||||
-rw-r--r-- | interp/interpreter.go | 24 | ||||
-rw-r--r-- | interp/testdata/revert.ll | 45 | ||||
-rw-r--r-- | interp/testdata/revert.out.ll | 33 |
4 files changed, 106 insertions, 1 deletions
diff --git a/interp/errors.go b/interp/errors.go index c90de3329..eaab781d5 100644 --- a/interp/errors.go +++ b/interp/errors.go @@ -18,6 +18,7 @@ var ( errUnsupportedInst = errors.New("interp: unsupported instruction") errUnsupportedRuntimeInst = errors.New("interp: unsupported instruction (to be emitted at runtime)") errMapAlreadyCreated = errors.New("interp: map already created") + errLoopUnrolled = errors.New("interp: loop unrolled") ) // This is one of the errors that can be returned from toLLVMValue when the @@ -26,7 +27,9 @@ var ( var errInvalidPtrToIntSize = errors.New("interp: ptrtoint integer size does not equal pointer size") func isRecoverableError(err error) bool { - return err == errIntegerAsPointer || err == errUnsupportedInst || err == errUnsupportedRuntimeInst || err == errMapAlreadyCreated + return err == errIntegerAsPointer || err == errUnsupportedInst || + err == errUnsupportedRuntimeInst || err == errMapAlreadyCreated || + err == errLoopUnrolled } // ErrorLine is one line in a traceback. The position may be missing. diff --git a/interp/interpreter.go b/interp/interpreter.go index 6ba4a5244..317bc6c03 100644 --- a/interp/interpreter.go +++ b/interp/interpreter.go @@ -24,15 +24,39 @@ func (r *runner) run(fn *function, params []value, parentMem *memoryView, indent locals[i] = param } + // Track what blocks have run instructions at runtime. + // This is used to prevent unrolling. + var runtimeBlocks map[int]struct{} + // Start with the first basic block and the first instruction. // Branch instructions may modify both bb and instIndex when branching. bb := fn.blocks[0] currentBB := 0 lastBB := -1 // last basic block is undefined, only defined after a branch var operands []value + startRTInsts := len(mem.instructions) for instIndex := 0; instIndex < len(bb.instructions); instIndex++ { if instIndex == 0 { // This is the start of a new basic block. + if len(mem.instructions) != startRTInsts { + if _, ok := runtimeBlocks[lastBB]; ok { + // This loop has been unrolled. + // Avoid doing this, as it can result in a large amount of extra machine code. + // This currently uses the branch from the last block, as there is no available information to give a better location. + lastBBInsts := fn.blocks[lastBB].instructions + return nil, mem, r.errorAt(lastBBInsts[len(lastBBInsts)-1], errLoopUnrolled) + } + + // Flag the last block as having run stuff at runtime. + if runtimeBlocks == nil { + runtimeBlocks = make(map[int]struct{}) + } + runtimeBlocks[lastBB] = struct{}{} + + // Reset the block-start runtime instructions counter. + startRTInsts = len(mem.instructions) + } + // There may be PHI nodes that need to be resolved. Resolve all PHI // nodes before continuing with regular instructions. // PHI nodes need to be treated specially because they can have a diff --git a/interp/testdata/revert.ll b/interp/testdata/revert.ll index dc6ee5c2a..517c152ba 100644 --- a/interp/testdata/revert.ll +++ b/interp/testdata/revert.ll @@ -3,6 +3,8 @@ target triple = "x86_64--linux" declare void @externalCall(i64) +declare i64 @ptrHash(i8* nocapture) + @foo.knownAtRuntime = global i64 0 @bar.knownAtRuntime = global i64 0 @baz.someGlobal = external global [3 x {i64, i32}] @@ -10,6 +12,8 @@ declare void @externalCall(i64) @x.atomicNum = global i32 0 @x.volatileNum = global i32 0 @y.ready = global i32 0 [email protected] = global i64 0 [email protected] = global [32 x i8] zeroinitializer define void @runtime.initAll() unnamed_addr { entry: @@ -19,6 +23,7 @@ entry: call void @main.init(i8* undef) call void @x.init(i8* undef) call void @y.init(i8* undef) + call void @z.init(i8* undef) ret void } @@ -72,3 +77,43 @@ loop: end: ret void } + +define internal void @z.init(i8* %context) unnamed_addr { + %bloom = bitcast i64* @z.bloom to i8* + + ; This can be safely expanded. + call void @z.setArr(i8* %bloom, i64 1, i8* %bloom) + + ; This call should be reverted to prevent unrolling. + call void @z.setArr(i8* bitcast ([32 x i8]* @z.arr to i8*), i64 32, i8* %bloom) + + ret void +} + +define internal void @z.setArr(i8* %arr, i64 %n, i8* %context) unnamed_addr { +entry: + br label %loop + +loop: + %prev = phi i64 [ %n, %entry ], [ %idx, %loop ] + %idx = sub i64 %prev, 1 + %elem = getelementptr i8, i8* %arr, i64 %idx + call void @z.set(i8* %elem, i8* %context) + %done = icmp eq i64 %idx, 0 + br i1 %done, label %end, label %loop + +end: + ret void +} + +define internal void @z.set(i8* %ptr, i8* %context) unnamed_addr { + ; Insert the pointer into the Bloom filter. + %hash = call i64 @ptrHash(i8* %ptr) + %index = lshr i64 %hash, 58 + %bit = shl i64 1, %index + %bloom = bitcast i8* %context to i64* + %old = load i64, i64* %bloom + %new = or i64 %old, %bit + store i64 %new, i64* %bloom + ret void +} diff --git a/interp/testdata/revert.out.ll b/interp/testdata/revert.out.ll index 030734d8e..edf19b540 100644 --- a/interp/testdata/revert.out.ll +++ b/interp/testdata/revert.out.ll @@ -8,9 +8,13 @@ target triple = "x86_64--linux" @x.atomicNum = local_unnamed_addr global i32 0 @x.volatileNum = global i32 0 @y.ready = local_unnamed_addr global i32 0 [email protected] = global i64 0 [email protected] = global [32 x i8] zeroinitializer declare void @externalCall(i64) local_unnamed_addr +declare i64 @ptrHash(i8* nocapture) local_unnamed_addr + define void @runtime.initAll() unnamed_addr { entry: call fastcc void @baz.init(i8* undef) @@ -24,6 +28,8 @@ entry: %y = load volatile i32, i32* @x.volatileNum, align 4 store volatile i32 %y, i32* @x.volatileNum, align 4 call fastcc void @y.init(i8* undef) + call fastcc void @z.set(i8* bitcast (i64* @z.bloom to i8*), i8* bitcast (i64* @z.bloom to i8*)) + call fastcc void @z.setArr(i8* getelementptr inbounds ([32 x i8], [32 x i8]* @z.arr, i32 0, i32 0), i64 32, i8* bitcast (i64* @z.bloom to i8*)) ret void } @@ -48,3 +54,30 @@ loop: ; preds = %loop, %entry end: ; preds = %loop ret void } + +define internal fastcc void @z.setArr(i8* %arr, i64 %n, i8* %context) unnamed_addr { +entry: + br label %loop + +loop: ; preds = %loop, %entry + %prev = phi i64 [ %n, %entry ], [ %idx, %loop ] + %idx = sub i64 %prev, 1 + %elem = getelementptr i8, i8* %arr, i64 %idx + call fastcc void @z.set(i8* %elem, i8* %context) + %done = icmp eq i64 %idx, 0 + br i1 %done, label %end, label %loop + +end: ; preds = %loop + ret void +} + +define internal fastcc void @z.set(i8* %ptr, i8* %context) unnamed_addr { + %hash = call i64 @ptrHash(i8* %ptr) + %index = lshr i64 %hash, 58 + %bit = shl i64 1, %index + %bloom = bitcast i8* %context to i64* + %old = load i64, i64* %bloom, align 8 + %new = or i64 %old, %bit + store i64 %new, i64* %bloom, align 8 + ret void +}
\ No newline at end of file |