all: refactor reflect package

This is a big commit that changes the way runtime type information is stored in the binary. Instead of compressing it and storing it in a number of sidetables, it is stored similar to how the Go compiler toolchain stores it (but still more compactly). This has a number of advantages: * It is much easier to add new features to reflect support. They can simply be added to these structs without requiring massive changes (especially in the reflect lowering pass). * It removes the reflect lowering pass, which was a large amount of hard to understand and debug code. * The reflect lowering pass also required merging all LLVM IR into one module, which is terrible for performance especially when compiling large amounts of code. See issue 2870 for details. * It is (probably!) easier to reason about for the compiler. The downside is that it increases code size a bit, especially when reflect is involved. I hope to fix some of that in later patches.
author: Ayke van Laethem <[email protected]> 2022-06-24 00:09:03 +0200
committer: Ron Evans <[email protected]> 2023-02-17 22:54:34 +0100
commit: 4e8453167f42976aad87099ffdb3746fc540d6a6 (patch)
tree: b3acee7dc97a19219fd1a84cabaf9b9d8eba1f3a /interp
parent: ebb410afd916047ee17f0e51dfba36ad3a6c002b (diff)
download: tinygo-4e8453167f42976aad87099ffdb3746fc540d6a6.tar.gz
tinygo-4e8453167f42976aad87099ffdb3746fc540d6a6.zip
3 files changed, 39 insertions, 66 deletions
diff --git a/interp/interpreter.go b/interp/interpreter.go
index c61ce7cf3..7c58a5d20 100644
--- a/interp/interpreter.go
+++ b/interp/interpreter.go
@@ -238,7 +238,7 @@ func (r *runner) run(fn *function, params []value, parentMem *memoryView, indent
 				// which case this call won't even get to this point but will
 				// already be emitted in initAll.
 				continue
-			case strings.HasPrefix(callFn.name, "runtime.print") || callFn.name == "runtime._panic" || callFn.name == "runtime.hashmapGet" ||
+			case strings.HasPrefix(callFn.name, "runtime.print") || callFn.name == "runtime._panic" || callFn.name == "runtime.hashmapGet" || callFn.name == "runtime.hashmapInterfaceHash" ||
 				callFn.name == "os.runtime_args" || callFn.name == "internal/task.start" || callFn.name == "internal/task.Current":
 				// These functions should be run at runtime. Specifically:
 				//   * Print and panic functions are best emitted directly without
@@ -378,42 +378,6 @@ func (r *runner) run(fn *function, params []value, parentMem *memoryView, indent
 				copy(dstBuf.buf[dst.offset():dst.offset()+nBytes], srcBuf.buf[src.offset():])
 				dstObj.buffer = dstBuf
 				mem.put(dst.index(), dstObj)
-			case callFn.name == "(reflect.rawType).elem":
-				if r.debug {
-					fmt.Fprintln(os.Stderr, indent+"call (reflect.rawType).elem:", operands[1:])
-				}
-				// Extract the type code global from the first parameter.
-				typecodeIDPtrToInt, err := operands[1].toLLVMValue(inst.llvmInst.Operand(0).Type(), &mem)
-				if err != nil {
-					return nil, mem, r.errorAt(inst, err)
-				}
-				typecodeID := typecodeIDPtrToInt.Operand(0)
-
-				// Get the type class.
-				// See also: getClassAndValueFromTypeCode in transform/reflect.go.
-				typecodeName := typecodeID.Name()
-				const prefix = "reflect/types.type:"
-				if !strings.HasPrefix(typecodeName, prefix) {
-					panic("unexpected typecode name: " + typecodeName)
-				}
-				id := typecodeName[len(prefix):]
-				class := id[:strings.IndexByte(id, ':')]
-				value := id[len(class)+1:]
-				if class == "named" {
-					// Get the underlying type.
-					class = value[:strings.IndexByte(value, ':')]
-					value = value[len(class)+1:]
-				}
-
-				// Elem() is only valid for certain type classes.
-				switch class {
-				case "chan", "pointer", "slice", "array":
-					elementType := r.builder.CreateExtractValue(typecodeID.Initializer(), 0, "")
-					uintptrType := r.mod.Context().IntType(int(mem.r.pointerSize) * 8)
-					locals[inst.localIndex] = r.getValue(llvm.ConstPtrToInt(elementType, uintptrType))
-				default:
-					return nil, mem, r.errorAt(inst, fmt.Errorf("(reflect.Type).Elem() called on %s type", class))
-				}
 			case callFn.name == "runtime.typeAssert":
 				// This function must be implemented manually as it is normally
 				// implemented by the interface lowering pass.
@@ -424,15 +388,22 @@ func (r *runner) run(fn *function, params []value, parentMem *memoryView, indent
 				if err != nil {
 					return nil, mem, r.errorAt(inst, err)
 				}
-				actualTypePtrToInt, err := operands[1].toLLVMValue(inst.llvmInst.Operand(0).Type(), &mem)
+				actualType, err := operands[1].toLLVMValue(inst.llvmInst.Operand(0).Type(), &mem)
 				if err != nil {
 					return nil, mem, r.errorAt(inst, err)
 				}
-				if !actualTypePtrToInt.IsAConstantInt().IsNil() && actualTypePtrToInt.ZExtValue() == 0 {
+				if !actualType.IsAConstantInt().IsNil() && actualType.ZExtValue() == 0 {
 					locals[inst.localIndex] = literalValue{uint8(0)}
 					break
 				}
-				actualType := actualTypePtrToInt.Operand(0)
+				// Strip pointer casts (bitcast, getelementptr).
+				for !actualType.IsAConstantExpr().IsNil() {
+					opcode := actualType.Opcode()
+					if opcode != llvm.GetElementPtr && opcode != llvm.BitCast {
+						break
+					}
+					actualType = actualType.Operand(0)
+				}
 				if strings.TrimPrefix(actualType.Name(), "reflect/types.type:") == strings.TrimPrefix(assertedType.Name(), "reflect/types.typeid:") {
 					locals[inst.localIndex] = literalValue{uint8(1)}
 				} else {
@@ -448,11 +419,12 @@ func (r *runner) run(fn *function, params []value, parentMem *memoryView, indent
 				if err != nil {
 					return nil, mem, r.errorAt(inst, err)
 				}
-				methodSetPtr, err := mem.load(typecodePtr.addOffset(r.pointerSize*2), r.pointerSize).asPointer(r)
+				methodSetPtr, err := mem.load(typecodePtr.addOffset(-int64(r.pointerSize)), r.pointerSize).asPointer(r)
 				if err != nil {
 					return nil, mem, r.errorAt(inst, err)
 				}
 				methodSet := mem.get(methodSetPtr.index()).llvmGlobal.Initializer()
+				numMethods := int(r.builder.CreateExtractValue(methodSet, 0, "").ZExtValue())
 				llvmFn := inst.llvmInst.CalledValue()
 				methodSetAttr := llvmFn.GetStringAttributeAtIndex(-1, "tinygo-methods")
 				methodSetString := methodSetAttr.GetStringValue()
@@ -460,9 +432,9 @@ func (r *runner) run(fn *function, params []value, parentMem *memoryView, indent
 				// Make a set of all the methods on the concrete type, for
 				// easier checking in the next step.
 				concreteTypeMethods := map[string]struct{}{}
-				for i := 0; i < methodSet.Type().ArrayLength(); i++ {
-					methodInfo := r.builder.CreateExtractValue(methodSet, i, "")
-					name := r.builder.CreateExtractValue(methodInfo, 0, "").Name()
+				for i := 0; i < numMethods; i++ {
+					methodInfo := r.builder.CreateExtractValue(methodSet, 1, "")
+					name := r.builder.CreateExtractValue(methodInfo, i, "").Name()
 					concreteTypeMethods[name] = struct{}{}
 				}
 
@@ -488,15 +460,16 @@ func (r *runner) run(fn *function, params []value, parentMem *memoryView, indent
 					fmt.Fprintln(os.Stderr, indent+"invoke method:", operands[1:])
 				}
 
-				// Load the type code of the interface value.
-				typecodeIDBitCast, err := operands[len(operands)-2].toLLVMValue(inst.llvmInst.Operand(len(operands)-3).Type(), &mem)
+				// Load the type code and method set of the interface value.
+				typecodePtr, err := operands[len(operands)-2].asPointer(r)
 				if err != nil {
 					return nil, mem, r.errorAt(inst, err)
 				}
-				typecodeID := typecodeIDBitCast.Operand(0).Initializer()
-
-				// Load the method set, which is part of the typecodeID object.
-				methodSet := stripPointerCasts(r.builder.CreateExtractValue(typecodeID, 2, "")).Initializer()
+				methodSetPtr, err := mem.load(typecodePtr.addOffset(-int64(r.pointerSize)), r.pointerSize).asPointer(r)
+				if err != nil {
+					return nil, mem, r.errorAt(inst, err)
+				}
+				methodSet := mem.get(methodSetPtr.index()).llvmGlobal.Initializer()
 
 				// We don't need to load the interface method set.
 
@@ -508,13 +481,14 @@ func (r *runner) run(fn *function, params []value, parentMem *memoryView, indent
 
 				// Iterate through all methods, looking for the one method that
 				// should be returned.
-				numMethods := methodSet.Type().ArrayLength()
+				numMethods := int(r.builder.CreateExtractValue(methodSet, 0, "").ZExtValue())
 				var method llvm.Value
 				for i := 0; i < numMethods; i++ {
-					methodSignatureAgg := r.builder.CreateExtractValue(methodSet, i, "")
-					methodSignature := r.builder.CreateExtractValue(methodSignatureAgg, 0, "")
+					methodSignatureAgg := r.builder.CreateExtractValue(methodSet, 1, "")
+					methodSignature := r.builder.CreateExtractValue(methodSignatureAgg, i, "")
 					if methodSignature == signature {
-						method = r.builder.CreateExtractValue(methodSignatureAgg, 1, "").Operand(0)
+						methodAgg := r.builder.CreateExtractValue(methodSet, 2, "")
+						method = r.builder.CreateExtractValue(methodAgg, i, "")
 					}
 				}
 				if method.IsNil() {
@@ -685,7 +659,7 @@ func (r *runner) run(fn *function, params []value, parentMem *memoryView, indent
 				}
 				continue
 			}
-			ptr = ptr.addOffset(uint32(offset))
+			ptr = ptr.addOffset(int64(offset))
 			locals[inst.localIndex] = ptr
 			if r.debug {
 				fmt.Fprintln(os.Stderr, indent+"gep:", operands, "->", ptr)
@@ -784,7 +758,7 @@ func (r *runner) run(fn *function, params []value, parentMem *memoryView, indent
 				case llvm.Add:
 					// This likely means this is part of a
 					// unsafe.Pointer(uintptr(ptr) + offset) pattern.
-					lhsPtr = lhsPtr.addOffset(uint32(rhs.Uint()))
+					lhsPtr = lhsPtr.addOffset(int64(rhs.Uint()))
 					locals[inst.localIndex] = lhsPtr
 					continue
 				case llvm.Xor:
diff --git a/interp/memory.go b/interp/memory.go
index 1f9ed99f3..9a28f1d49 100644
--- a/interp/memory.go
+++ b/interp/memory.go
@@ -501,7 +501,7 @@ func (v pointerValue) offset() uint32 {
 // addOffset essentially does a GEP operation (pointer arithmetic): it adds the
 // offset to the pointer. It also checks that the offset doesn't overflow the
 // maximum offset size (which is 4GB).
-func (v pointerValue) addOffset(offset uint32) pointerValue {
+func (v pointerValue) addOffset(offset int64) pointerValue {
 	result := pointerValue{v.pointer + uint64(offset)}
 	if checks && v.index() != result.index() {
 		panic("interp: offset out of range")
@@ -815,7 +815,7 @@ func (v rawValue) rawLLVMValue(mem *memoryView) (llvm.Value, error) {
 					// as a ptrtoint, so that they can be used in certain
 					// optimizations.
 					name := elementType.StructName()
-					if name == "runtime.typecodeID" || name == "runtime.funcValueWithSignature" {
+					if name == "runtime.funcValueWithSignature" {
 						uintptrType := ctx.IntType(int(mem.r.pointerSize) * 8)
 						field = llvm.ConstPtrToInt(field, uintptrType)
 					}
diff --git a/interp/testdata/interface.ll b/interp/testdata/interface.ll
index 6520efc5c..da27ad8a0 100644
--- a/interp/testdata/interface.ll
+++ b/interp/testdata/interface.ll
@@ -1,17 +1,16 @@
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64--linux"
 
-%runtime.typecodeID = type { %runtime.typecodeID*, i64, %runtime.interfaceMethodInfo* }
-%runtime.interfaceMethodInfo = type { i8*, i64 }
-
 @main.v1 = global i1 0
 @main.v2 = global i1 0
-@"reflect/types.type:named:main.foo" = private constant %runtime.typecodeID { %runtime.typecodeID* @"reflect/types.type:basic:int", i64 0, %runtime.interfaceMethodInfo* null }
+@"reflect/types.type:named:main.foo" = private constant { i8, i8*, i8* } { i8 34, i8* getelementptr inbounds ({ i8, i8* }, { i8, i8* }* @"reflect/types.type:pointer:named:main.foo", i32 0, i32 0), i8* getelementptr inbounds ({ i8, i8* }, { i8, i8* }* @"reflect/types.type:basic:int", i32 0, i32 0) }, align 4
+@"reflect/types.type:pointer:named:main.foo" = external constant { i8, i8* }
 @"reflect/types.typeid:named:main.foo" = external constant i8
-@"reflect/types.type:basic:int" = external constant %runtime.typecodeID
+@"reflect/types.type:basic:int" = private constant { i8, i8* } { i8 2, i8* getelementptr inbounds ({ i8, i8* }, { i8, i8* }* @"reflect/types.type:pointer:basic:int", i32 0, i32 0) }, align 4
+@"reflect/types.type:pointer:basic:int" = external constant { i8, i8* }
 
 
-declare i1 @runtime.typeAssert(i64, i8*, i8*, i8*)
+declare i1 @runtime.typeAssert(i8*, i8*, i8*, i8*)
 
 define void @runtime.initAll() unnamed_addr {
 entry:
@@ -22,9 +21,9 @@ entry:
 define internal void @main.init() unnamed_addr {
 entry:
   ; Test type asserts.
-  %typecode = call i1 @runtime.typeAssert(i64 ptrtoint (%runtime.typecodeID* @"reflect/types.type:named:main.foo" to i64), i8* @"reflect/types.typeid:named:main.foo", i8* undef, i8* null)
+  %typecode = call i1 @runtime.typeAssert(i8* getelementptr inbounds ({ i8, i8*, i8* }, { i8, i8*, i8* }* @"reflect/types.type:named:main.foo", i32 0, i32 0), i8* @"reflect/types.typeid:named:main.foo", i8* undef, i8* null)
   store i1 %typecode, i1* @main.v1
-  %typecode2 = call i1 @runtime.typeAssert(i64 0, i8* @"reflect/types.typeid:named:main.foo", i8* undef, i8* null)
+  %typecode2 = call i1 @runtime.typeAssert(i8* null, i8* @"reflect/types.typeid:named:main.foo", i8* undef, i8* null)
   store i1 %typecode2, i1* @main.v2
   ret void
 }
author	Ayke van Laethem <[email protected]>	2022-06-24 00:09:03 +0200
committer	Ron Evans <[email protected]>	2023-02-17 22:54:34 +0100
commit	4e8453167f42976aad87099ffdb3746fc540d6a6 (patch)
tree	b3acee7dc97a19219fd1a84cabaf9b9d8eba1f3a /interp
parent	ebb410afd916047ee17f0e51dfba36ad3a6c002b (diff)
download	tinygo-4e8453167f42976aad87099ffdb3746fc540d6a6.tar.gz tinygo-4e8453167f42976aad87099ffdb3746fc540d6a6.zip