diff options
author | Ayke van Laethem <[email protected]> | 2019-05-10 19:48:33 +0200 |
---|---|---|
committer | Ron Evans <[email protected]> | 2019-05-12 10:49:15 +0200 |
commit | 11567c62d45418685161cd484b55c225f1d1f65d (patch) | |
tree | 60762f41abf7c25502dae481d6fc7ba5a6d7d920 /cgo | |
parent | 4619207f99839c388e8310bb5921749bcdcb7d3e (diff) | |
download | tinygo-11567c62d45418685161cd484b55c225f1d1f65d.tar.gz tinygo-11567c62d45418685161cd484b55c225f1d1f65d.zip |
cgo: refactor; support multiple cgo files in a single package
This is a big commit that does a few things:
* It moves CGo processing into a separate package. It never really
belonged in the loader package, and certainly not now that the
loader package may be refactored into a driver package.
* It adds support for multiple CGo files (files that import package
"C") in a single package. Previously, this led to multiple
definition errors in the Go typecheck phase because certain C
symbols were defined multiple times in all the files. Now it
generates a new fake AST that defines these, to avoid multiple
definition errors.
* It improves debug info in a few edge cases that are probably not
relevant outside of bugs in cgo itself.
Diffstat (limited to 'cgo')
-rw-r--r-- | cgo/cgo.go | 636 | ||||
-rw-r--r-- | cgo/libclang.go | 624 | ||||
-rw-r--r-- | cgo/libclang_config.go | 11 | ||||
-rw-r--r-- | cgo/libclang_stubs.c | 58 | ||||
-rw-r--r-- | cgo/sync.go | 46 |
5 files changed, 1375 insertions, 0 deletions
diff --git a/cgo/cgo.go b/cgo/cgo.go new file mode 100644 index 000000000..e7f44dee0 --- /dev/null +++ b/cgo/cgo.go @@ -0,0 +1,636 @@ +// Package cgo implements CGo by modifying a loaded AST. It does this by parsing +// the `import "C"` statements found in the source code with libclang and +// generating stub function and global declarations. +// +// There are a few advantages to modifying the AST directly instead of doing CGo +// as a preprocessing step, with the main advantage being that debug information +// is kept intact as much as possible. +package cgo + +// This file extracts the `import "C"` statement from the source and modifies +// the AST for CCo. It does not use libclang directly: see libclang.go for the C +// source file parsing. + +import ( + "go/ast" + "go/token" + "sort" + "strconv" + "strings" + + "golang.org/x/tools/go/ast/astutil" +) + +// cgoPackage holds all CCo-related information of a package. +type cgoPackage struct { + generated *ast.File + generatedPos token.Pos + errors []error + dir string + fset *token.FileSet + tokenFiles map[string]*token.File + missingSymbols map[string]struct{} + constants map[string]constantInfo + functions map[string]*functionInfo + globals map[string]globalInfo + typedefs map[string]*typedefInfo + elaboratedTypes map[string]*elaboratedTypeInfo +} + +// constantInfo stores some information about a CGo constant found by libclang +// and declared in the Go AST. +type constantInfo struct { + expr *ast.BasicLit + pos token.Pos +} + +// functionInfo stores some information about a CCo function found by libclang +// and declared in the AST. +type functionInfo struct { + args []paramInfo + results *ast.FieldList + pos token.Pos +} + +// paramInfo is a parameter of a Cgo function (see functionInfo). +type paramInfo struct { + name string + typeExpr ast.Expr +} + +// typedefInfo contains information about a single typedef in C. +type typedefInfo struct { + typeExpr ast.Expr + pos token.Pos +} + +// elaboratedTypeInfo contains some information about an elaborated type +// (struct, union) found in the C AST. +type elaboratedTypeInfo struct { + typeExpr ast.Expr + pos token.Pos +} + +// globalInfo contains information about a declared global variable in C. +type globalInfo struct { + typeExpr ast.Expr + pos token.Pos +} + +// cgoAliases list type aliases between Go and C, for types that are equivalent +// in both languages. See addTypeAliases. +var cgoAliases = map[string]string{ + "C.int8_t": "int8", + "C.int16_t": "int16", + "C.int32_t": "int32", + "C.int64_t": "int64", + "C.uint8_t": "uint8", + "C.uint16_t": "uint16", + "C.uint32_t": "uint32", + "C.uint64_t": "uint64", + "C.uintptr_t": "uintptr", +} + +// builtinAliases are handled specially because they only exist on the Go side +// of CGo, not on the CGo side (they're prefixed with "_Cgo_" there). +var builtinAliases = map[string]struct{}{ + "char": struct{}{}, + "schar": struct{}{}, + "uchar": struct{}{}, + "short": struct{}{}, + "ushort": struct{}{}, + "int": struct{}{}, + "uint": struct{}{}, + "long": struct{}{}, + "ulong": struct{}{}, + "longlong": struct{}{}, + "ulonglong": struct{}{}, +} + +// cgoTypes lists some C types with ambiguous sizes that must be retrieved +// somehow from C. This is done by adding some typedefs to get the size of each +// type. +const cgoTypes = ` +typedef char _Cgo_char; +typedef signed char _Cgo_schar; +typedef unsigned char _Cgo_uchar; +typedef short _Cgo_short; +typedef unsigned short _Cgo_ushort; +typedef int _Cgo_int; +typedef unsigned int _Cgo_uint; +typedef long _Cgo_long; +typedef unsigned long _Cgo_ulong; +typedef long long _Cgo_longlong; +typedef unsigned long long _Cgo_ulonglong; +` + +// Process extracts `import "C"` statements from the AST, parses the comment +// with libclang, and modifies the AST to use this information. It returns a +// newly created *ast.File that should be added to the list of to-be-parsed +// files. If there is one or more error, it returns these in the []error slice +// but still modifies the AST. +func Process(files []*ast.File, dir string, fset *token.FileSet, cflags []string) (*ast.File, []error) { + p := &cgoPackage{ + dir: dir, + fset: fset, + tokenFiles: map[string]*token.File{}, + missingSymbols: map[string]struct{}{}, + constants: map[string]constantInfo{}, + functions: map[string]*functionInfo{}, + globals: map[string]globalInfo{}, + typedefs: map[string]*typedefInfo{}, + elaboratedTypes: map[string]*elaboratedTypeInfo{}, + } + + // Add a new location for the following file. + generatedTokenPos := p.fset.AddFile(dir+"/!cgo.go", -1, 0) + generatedTokenPos.SetLines([]int{0}) + p.generatedPos = generatedTokenPos.Pos(0) + + // Construct a new in-memory AST for CGo declarations of this package. + unsafeImport := &ast.ImportSpec{ + Path: &ast.BasicLit{ + ValuePos: p.generatedPos, + Kind: token.STRING, + Value: "\"unsafe\"", + }, + EndPos: p.generatedPos, + } + p.generated = &ast.File{ + Package: p.generatedPos, + Name: &ast.Ident{ + NamePos: p.generatedPos, + Name: files[0].Name.Name, + }, + Decls: []ast.Decl{ + &ast.GenDecl{ + TokPos: p.generatedPos, + Tok: token.IMPORT, + Specs: []ast.Spec{ + unsafeImport, + }, + }, + }, + Imports: []*ast.ImportSpec{unsafeImport}, + } + + // Find all C.* symbols. + for _, f := range files { + astutil.Apply(f, p.findMissingCGoNames, nil) + } + for name := range builtinAliases { + p.missingSymbols["_Cgo_"+name] = struct{}{} + } + + // Find `import "C"` statements in the file. + for _, f := range files { + for i := 0; i < len(f.Decls); i++ { + decl := f.Decls[i] + genDecl, ok := decl.(*ast.GenDecl) + if !ok { + continue + } + if len(genDecl.Specs) != 1 { + continue + } + spec, ok := genDecl.Specs[0].(*ast.ImportSpec) + if !ok { + continue + } + path, err := strconv.Unquote(spec.Path.Value) + if err != nil { + panic("could not parse import path: " + err.Error()) + } + if path != "C" { + continue + } + cgoComment := genDecl.Doc.Text() + + pos := genDecl.Pos() + if genDecl.Doc != nil { + pos = genDecl.Doc.Pos() + } + position := fset.PositionFor(pos, true) + p.parseFragment(cgoComment+cgoTypes, cflags, position.Filename, position.Line) + + // Remove this import declaration. + f.Decls = append(f.Decls[:i], f.Decls[i+1:]...) + i-- + } + + // Print the AST, for debugging. + //ast.Print(fset, f) + } + + // Declare functions found by libclang. + p.addFuncDecls() + + // Declare stub function pointer values found by libclang. + p.addFuncPtrDecls() + + // Declare globals found by libclang. + p.addConstDecls() + + // Declare globals found by libclang. + p.addVarDecls() + + // Forward C types to Go types (like C.uint32_t -> uint32). + p.addTypeAliases() + + // Add type declarations for C types, declared using typedef in C. + p.addTypedefs() + + // Add elaborated types for C structs and unions. + p.addElaboratedTypes() + + // Patch the AST to use the declared types and functions. + for _, f := range files { + astutil.Apply(f, p.walker, nil) + } + + // Print the newly generated in-memory AST, for debugging. + //ast.Print(fset, p.generated) + + return p.generated, p.errors +} + +// addFuncDecls adds the C function declarations found by libclang in the +// comment above the `import "C"` statement. +func (p *cgoPackage) addFuncDecls() { + names := make([]string, 0, len(p.functions)) + for name := range p.functions { + names = append(names, name) + } + sort.Strings(names) + for _, name := range names { + fn := p.functions[name] + obj := &ast.Object{ + Kind: ast.Fun, + Name: "C." + name, + } + args := make([]*ast.Field, len(fn.args)) + decl := &ast.FuncDecl{ + Name: &ast.Ident{ + NamePos: fn.pos, + Name: "C." + name, + Obj: obj, + }, + Type: &ast.FuncType{ + Func: fn.pos, + Params: &ast.FieldList{ + Opening: fn.pos, + List: args, + Closing: fn.pos, + }, + Results: fn.results, + }, + } + obj.Decl = decl + for i, arg := range fn.args { + args[i] = &ast.Field{ + Names: []*ast.Ident{ + &ast.Ident{ + NamePos: fn.pos, + Name: arg.name, + Obj: &ast.Object{ + Kind: ast.Var, + Name: arg.name, + Decl: decl, + }, + }, + }, + Type: arg.typeExpr, + } + } + p.generated.Decls = append(p.generated.Decls, decl) + } +} + +// addFuncPtrDecls creates stub declarations of function pointer values. These +// values will later be replaced with the real values in the compiler. +// It adds code like the following to the AST: +// +// var ( +// C.add unsafe.Pointer +// C.mul unsafe.Pointer +// // ... +// ) +func (p *cgoPackage) addFuncPtrDecls() { + if len(p.functions) == 0 { + return + } + gen := &ast.GenDecl{ + TokPos: token.NoPos, + Tok: token.VAR, + Lparen: token.NoPos, + Rparen: token.NoPos, + } + names := make([]string, 0, len(p.functions)) + for name := range p.functions { + names = append(names, name) + } + sort.Strings(names) + for _, name := range names { + fn := p.functions[name] + obj := &ast.Object{ + Kind: ast.Typ, + Name: "C." + name + "$funcaddr", + } + valueSpec := &ast.ValueSpec{ + Names: []*ast.Ident{&ast.Ident{ + NamePos: fn.pos, + Name: "C." + name + "$funcaddr", + Obj: obj, + }}, + Type: &ast.SelectorExpr{ + X: &ast.Ident{ + NamePos: fn.pos, + Name: "unsafe", + }, + Sel: &ast.Ident{ + NamePos: fn.pos, + Name: "Pointer", + }, + }, + } + obj.Decl = valueSpec + gen.Specs = append(gen.Specs, valueSpec) + } + p.generated.Decls = append(p.generated.Decls, gen) +} + +// addConstDecls declares external C constants in the Go source. +// It adds code like the following to the AST: +// +// const ( +// C.CONST_INT = 5 +// C.CONST_FLOAT = 5.8 +// // ... +// ) +func (p *cgoPackage) addConstDecls() { + if len(p.constants) == 0 { + return + } + gen := &ast.GenDecl{ + TokPos: token.NoPos, + Tok: token.CONST, + Lparen: token.NoPos, + Rparen: token.NoPos, + } + names := make([]string, 0, len(p.constants)) + for name := range p.constants { + names = append(names, name) + } + sort.Strings(names) + for _, name := range names { + constVal := p.constants[name] + obj := &ast.Object{ + Kind: ast.Con, + Name: "C." + name, + } + valueSpec := &ast.ValueSpec{ + Names: []*ast.Ident{&ast.Ident{ + NamePos: constVal.pos, + Name: "C." + name, + Obj: obj, + }}, + Values: []ast.Expr{constVal.expr}, + } + obj.Decl = valueSpec + gen.Specs = append(gen.Specs, valueSpec) + } + p.generated.Decls = append(p.generated.Decls, gen) +} + +// addVarDecls declares external C globals in the Go source. +// It adds code like the following to the AST: +// +// var ( +// C.globalInt int +// C.globalBool bool +// // ... +// ) +func (p *cgoPackage) addVarDecls() { + if len(p.globals) == 0 { + return + } + gen := &ast.GenDecl{ + TokPos: token.NoPos, + Tok: token.VAR, + Lparen: token.NoPos, + Rparen: token.NoPos, + } + names := make([]string, 0, len(p.globals)) + for name := range p.globals { + names = append(names, name) + } + sort.Strings(names) + for _, name := range names { + global := p.globals[name] + obj := &ast.Object{ + Kind: ast.Var, + Name: "C." + name, + } + valueSpec := &ast.ValueSpec{ + Names: []*ast.Ident{&ast.Ident{ + NamePos: global.pos, + Name: "C." + name, + Obj: obj, + }}, + Type: global.typeExpr, + } + obj.Decl = valueSpec + gen.Specs = append(gen.Specs, valueSpec) + } + p.generated.Decls = append(p.generated.Decls, gen) +} + +// addTypeAliases aliases some built-in Go types with their equivalent C types. +// It adds code like the following to the AST: +// +// type ( +// C.int8_t = int8 +// C.int16_t = int16 +// // ... +// ) +func (p *cgoPackage) addTypeAliases() { + aliasKeys := make([]string, 0, len(cgoAliases)) + for key := range cgoAliases { + aliasKeys = append(aliasKeys, key) + } + sort.Strings(aliasKeys) + gen := &ast.GenDecl{ + TokPos: token.NoPos, + Tok: token.TYPE, + Lparen: token.NoPos, + Rparen: token.NoPos, + } + for _, typeName := range aliasKeys { + goTypeName := cgoAliases[typeName] + obj := &ast.Object{ + Kind: ast.Typ, + Name: typeName, + } + typeSpec := &ast.TypeSpec{ + Name: &ast.Ident{ + NamePos: token.NoPos, + Name: typeName, + Obj: obj, + }, + Assign: p.generatedPos, + Type: &ast.Ident{ + NamePos: token.NoPos, + Name: goTypeName, + }, + } + obj.Decl = typeSpec + gen.Specs = append(gen.Specs, typeSpec) + } + p.generated.Decls = append(p.generated.Decls, gen) +} + +func (p *cgoPackage) addTypedefs() { + if len(p.typedefs) == 0 { + return + } + gen := &ast.GenDecl{ + TokPos: token.NoPos, + Tok: token.TYPE, + } + names := make([]string, 0, len(p.typedefs)) + for name := range p.typedefs { + names = append(names, name) + } + sort.Strings(names) + for _, name := range names { + typedef := p.typedefs[name] + typeName := "C." + name + isAlias := true + if strings.HasPrefix(name, "_Cgo_") { + typeName = "C." + name[len("_Cgo_"):] + isAlias = false // C.short etc. should not be aliased to the equivalent Go type (not portable) + } + if _, ok := cgoAliases[typeName]; ok { + // This is a type that also exists in Go (defined in stdint.h). + continue + } + obj := &ast.Object{ + Kind: ast.Typ, + Name: typeName, + } + typeSpec := &ast.TypeSpec{ + Name: &ast.Ident{ + NamePos: typedef.pos, + Name: typeName, + Obj: obj, + }, + Type: typedef.typeExpr, + } + if isAlias { + typeSpec.Assign = typedef.pos + } + obj.Decl = typeSpec + gen.Specs = append(gen.Specs, typeSpec) + } + p.generated.Decls = append(p.generated.Decls, gen) +} + +// addElaboratedTypes adds C elaborated types as aliases. These are the "struct +// foo" or "union foo" types, often used in a typedef. +// +// See also: +// https://en.cppreference.com/w/cpp/language/elaborated_type_specifier +func (p *cgoPackage) addElaboratedTypes() { + if len(p.elaboratedTypes) == 0 { + return + } + gen := &ast.GenDecl{ + TokPos: token.NoPos, + Tok: token.TYPE, + } + names := make([]string, 0, len(p.elaboratedTypes)) + for name := range p.elaboratedTypes { + names = append(names, name) + } + sort.Strings(names) + for _, name := range names { + typ := p.elaboratedTypes[name] + typeName := "C." + name + obj := &ast.Object{ + Kind: ast.Typ, + Name: typeName, + } + typeSpec := &ast.TypeSpec{ + Name: &ast.Ident{ + NamePos: typ.pos, + Name: typeName, + Obj: obj, + }, + Type: typ.typeExpr, + } + obj.Decl = typeSpec + gen.Specs = append(gen.Specs, typeSpec) + } + p.generated.Decls = append(p.generated.Decls, gen) +} + +// findMissingCGoNames traverses the AST and finds all C.something names. Only +// these symbols are extracted from the parsed C AST and converted to the Go +// equivalent. +func (p *cgoPackage) findMissingCGoNames(cursor *astutil.Cursor) bool { + switch node := cursor.Node().(type) { + case *ast.SelectorExpr: + x, ok := node.X.(*ast.Ident) + if !ok { + return true + } + if x.Name == "C" { + name := node.Sel.Name + if _, ok := builtinAliases[name]; ok { + name = "_Cgo_" + name + } + p.missingSymbols[name] = struct{}{} + } + } + return true +} + +// walker replaces all "C".<something> expressions to literal "C.<something>" +// expressions. Such expressions are impossible to write in Go (a dot cannot be +// used in the middle of a name) so in practice all C identifiers live in a +// separate namespace (no _Cgo_ hacks like in gc). +func (p *cgoPackage) walker(cursor *astutil.Cursor) bool { + switch node := cursor.Node().(type) { + case *ast.CallExpr: + fun, ok := node.Fun.(*ast.SelectorExpr) + if !ok { + return true + } + x, ok := fun.X.(*ast.Ident) + if !ok { + return true + } + if _, ok := p.functions[fun.Sel.Name]; ok && x.Name == "C" { + node.Fun = &ast.Ident{ + NamePos: x.NamePos, + Name: "C." + fun.Sel.Name, + } + } + case *ast.SelectorExpr: + x, ok := node.X.(*ast.Ident) + if !ok { + return true + } + if x.Name == "C" { + name := "C." + node.Sel.Name + if _, ok := p.functions[node.Sel.Name]; ok { + name += "$funcaddr" + } + cursor.Replace(&ast.Ident{ + NamePos: x.NamePos, + Name: name, + }) + } + } + return true +} diff --git a/cgo/libclang.go b/cgo/libclang.go new file mode 100644 index 000000000..24dac8c3c --- /dev/null +++ b/cgo/libclang.go @@ -0,0 +1,624 @@ +package cgo + +// This file parses a fragment of C with libclang and stores the result for AST +// modification. It does not touch the AST itself. + +import ( + "fmt" + "go/ast" + "go/scanner" + "go/token" + "path/filepath" + "strconv" + "strings" + "unsafe" +) + +/* +#include <clang-c/Index.h> // if this fails, install libclang-8-dev +#include <stdlib.h> +#include <stdint.h> + +// This struct should be ABI-compatible on all platforms (uintptr_t has the same +// alignment etc. as void*) but does not include void* pointers that are not +// always real pointers. +// The Go garbage collector assumes that all non-nil pointer-typed integers are +// actually pointers. This is not always true, as data[1] often contains 0x1, +// which is clearly not a valid pointer. Usually the GC won't catch this issue, +// but occasionally it will leading to a crash with a vague error message. +typedef struct { + enum CXCursorKind kind; + int xdata; + uintptr_t data[3]; +} GoCXCursor; + +// Forwarding functions. They are implemented in libclang_stubs.c and forward to +// the real functions without doing anything else, thus they are entirely +// compatible with the versions without tinygo_ prefix. The only difference is +// the CXCursor type, which has been replaced with GoCXCursor. +GoCXCursor tinygo_clang_getTranslationUnitCursor(CXTranslationUnit tu); +unsigned tinygo_clang_visitChildren(GoCXCursor parent, CXCursorVisitor visitor, CXClientData client_data); +CXString tinygo_clang_getCursorSpelling(GoCXCursor c); +enum CXCursorKind tinygo_clang_getCursorKind(GoCXCursor c); +CXType tinygo_clang_getCursorType(GoCXCursor c); +GoCXCursor tinygo_clang_getTypeDeclaration(CXType t); +CXType tinygo_clang_getTypedefDeclUnderlyingType(GoCXCursor c); +CXType tinygo_clang_getCursorResultType(GoCXCursor c); +int tinygo_clang_Cursor_getNumArguments(GoCXCursor c); +GoCXCursor tinygo_clang_Cursor_getArgument(GoCXCursor c, unsigned i); +CXSourceLocation tinygo_clang_getCursorLocation(GoCXCursor c); +CXSourceRange tinygo_clang_getCursorExtent(GoCXCursor c); +CXTranslationUnit tinygo_clang_Cursor_getTranslationUnit(GoCXCursor c); + +int tinygo_clang_globals_visitor(GoCXCursor c, GoCXCursor parent, CXClientData client_data); +int tinygo_clang_struct_visitor(GoCXCursor c, GoCXCursor parent, CXClientData client_data); +*/ +import "C" + +// storedRefs stores references to types, used for clang_visitChildren. +var storedRefs refMap + +var diagnosticSeverity = [...]string{ + C.CXDiagnostic_Ignored: "ignored", + C.CXDiagnostic_Note: "note", + C.CXDiagnostic_Warning: "warning", + C.CXDiagnostic_Error: "error", + C.CXDiagnostic_Fatal: "fatal", +} + +func (p *cgoPackage) parseFragment(fragment string, cflags []string, posFilename string, posLine int) { + index := C.clang_createIndex(0, 0) + defer C.clang_disposeIndex(index) + + // pretend to be a .c file + filenameC := C.CString(posFilename + "!cgo.c") + defer C.free(unsafe.Pointer(filenameC)) + + // fix up error locations + fragment = fmt.Sprintf("# %d %#v\n", posLine+1, posFilename) + fragment + + fragmentC := C.CString(fragment) + defer C.free(unsafe.Pointer(fragmentC)) + + unsavedFile := C.struct_CXUnsavedFile{ + Filename: filenameC, + Length: C.ulong(len(fragment)), + Contents: fragmentC, + } + + // convert Go slice of strings to C array of strings. + cmdargsC := C.malloc(C.size_t(len(cflags)) * C.size_t(unsafe.Sizeof(uintptr(0)))) + defer C.free(cmdargsC) + cmdargs := (*[1 << 16]*C.char)(cmdargsC) + for i, cflag := range cflags { + s := C.CString(cflag) + cmdargs[i] = s + defer C.free(unsafe.Pointer(s)) + } + + var unit C.CXTranslationUnit + errCode := C.clang_parseTranslationUnit2( + index, + filenameC, + (**C.char)(cmdargsC), C.int(len(cflags)), // command line args + &unsavedFile, 1, // unsaved files + C.CXTranslationUnit_DetailedPreprocessingRecord, + &unit) + if errCode != 0 { + panic("loader: failed to parse source with libclang") + } + defer C.clang_disposeTranslationUnit(unit) + + if numDiagnostics := int(C.clang_getNumDiagnostics(unit)); numDiagnostics != 0 { + addDiagnostic := func(diagnostic C.CXDiagnostic) { + spelling := getString(C.clang_getDiagnosticSpelling(diagnostic)) + severity := diagnosticSeverity[C.clang_getDiagnosticSeverity(diagnostic)] + location := C.clang_getDiagnosticLocation(diagnostic) + var libclangFilename C.CXString + var line C.unsigned + var column C.unsigned + C.clang_getPresumedLocation(location, &libclangFilename, &line, &column) + filename := getString(libclangFilename) + if filepath.IsAbs(filename) { + // Relative paths for readability, like other Go parser errors. + relpath, err := filepath.Rel(p.dir, filename) + if err == nil { + filename = relpath + } + } + p.errors = append(p.errors, &scanner.Error{ + Pos: token.Position{ + Filename: filename, + Offset: 0, // not provided by clang_getPresumedLocation + Line: int(line), + Column: int(column), + }, + Msg: severity + ": " + spelling, + }) + } + for i := 0; i < numDiagnostics; i++ { + diagnostic := C.clang_getDiagnostic(unit, C.uint(i)) + addDiagnostic(diagnostic) + + // Child diagnostics (like notes on redefinitions). + diagnostics := C.clang_getChildDiagnostics(diagnostic) + for j := 0; j < int(C.clang_getNumDiagnosticsInSet(diagnostics)); j++ { + addDiagnostic(C.clang_getDiagnosticInSet(diagnostics, C.uint(j))) + } + } + return + } + + ref := storedRefs.Put(p) + defer storedRefs.Remove(ref) + cursor := C.tinygo_clang_getTranslationUnitCursor(unit) + C.tinygo_clang_visitChildren(cursor, C.CXCursorVisitor(C.tinygo_clang_globals_visitor), C.CXClientData(ref)) +} + +//export tinygo_clang_globals_visitor +func tinygo_clang_globals_visitor(c, parent C.GoCXCursor, client_data C.CXClientData) C.int { + p := storedRefs.Get(unsafe.Pointer(client_data)).(*cgoPackage) + kind := C.tinygo_clang_getCursorKind(c) + pos := p.getCursorPosition(c) + switch kind { + case C.CXCursor_FunctionDecl: + name := getString(C.tinygo_clang_getCursorSpelling(c)) + if _, required := p.missingSymbols[name]; !required { + return C.CXChildVisit_Continue + } + cursorType := C.tinygo_clang_getCursorType(c) + if C.clang_isFunctionTypeVariadic(cursorType) != 0 { + return C.CXChildVisit_Continue // not supported + } + numArgs := int(C.tinygo_clang_Cursor_getNumArguments(c)) + fn := &functionInfo{ + pos: pos, + } + p.functions[name] = fn + for i := 0; i < numArgs; i++ { + arg := C.tinygo_clang_Cursor_getArgument(c, C.uint(i)) + argName := getString(C.tinygo_clang_getCursorSpelling(arg)) + argType := C.clang_getArgType(cursorType, C.uint(i)) + if argName == "" { + argName = "$" + strconv.Itoa(i) + } + fn.args = append(fn.args, paramInfo{ + name: argName, + typeExpr: p.makeASTType(argType, pos), + }) + } + resultType := C.tinygo_clang_getCursorResultType(c) + if resultType.kind != C.CXType_Void { + fn.results = &ast.FieldList{ + List: []*ast.Field{ + &ast.Field{ + Type: p.makeASTType(resultType, pos), + }, + }, + } + } + case C.CXCursor_StructDecl: + typ := C.tinygo_clang_getCursorType(c) + name := getString(C.tinygo_clang_getCursorSpelling(c)) + if _, required := p.missingSymbols["struct_"+name]; !required { + return C.CXChildVisit_Continue + } + p.makeASTType(typ, pos) + case C.CXCursor_TypedefDecl: + typedefType := C.tinygo_clang_getCursorType(c) + name := getString(C.clang_getTypedefName(typedefType)) + if _, required := p.missingSymbols[name]; !required { + return C.CXChildVisit_Continue + } + p.makeASTType(typedefType, pos) + case C.CXCursor_VarDecl: + name := getString(C.tinygo_clang_getCursorSpelling(c)) + if _, required := p.missingSymbols[name]; !required { + return C.CXChildVisit_Continue + } + cursorType := C.tinygo_clang_getCursorType(c) + p.globals[name] = globalInfo{ + typeExpr: p.makeASTType(cursorType, pos), + pos: pos, + } + case C.CXCursor_MacroDefinition: + name := getString(C.tinygo_clang_getCursorSpelling(c)) + if _, required := p.missingSymbols[name]; !required { + return C.CXChildVisit_Continue + } + sourceRange := C.tinygo_clang_getCursorExtent(c) + start := C.clang_getRangeStart(sourceRange) + end := C.clang_getRangeEnd(sourceRange) + var file, endFile C.CXFile + var startOffset, endOffset C.unsigned + C.clang_getExpansionLocation(start, &file, nil, nil, &startOffset) + if file == nil { + panic("could not find file where macro is defined") + } + C.clang_getExpansionLocation(end, &endFile, nil, nil, &endOffset) + if file != endFile { + panic("expected start and end location of a #define to be in the same file") + } + if startOffset > endOffset { + panic("startOffset > endOffset") + } + + // read file contents and extract the relevant byte range + tu := C.tinygo_clang_Cursor_getTranslationUnit(c) + var size C.size_t + sourcePtr := C.clang_getFileContents(tu, file, &size) + if endOffset >= C.uint(size) { + panic("endOffset lies after end of file") + } + source := string(((*[1 << 28]byte)(unsafe.Pointer(sourcePtr)))[startOffset:endOffset:endOffset]) + if !strings.HasPrefix(source, name) { + panic(fmt.Sprintf("expected #define value to start with %#v, got %#v", name, source)) + } + value := strings.TrimSpace(source[len(name):]) + for len(value) != 0 && value[0] == '(' && value[len(value)-1] == ')' { + value = strings.TrimSpace(value[1 : len(value)-1]) + } + if len(value) == 0 { + // Pretend it doesn't exist at all. + return C.CXChildVisit_Continue + } + // For information about integer literals: + // https://en.cppreference.com/w/cpp/language/integer_literal + if value[0] == '"' { + // string constant + p.constants[name] = constantInfo{&ast.BasicLit{pos, token.STRING, value}, pos} + return C.CXChildVisit_Continue + } + if value[0] == '\'' { + // char constant + p.constants[name] = constantInfo{&ast.BasicLit{pos, token.CHAR, value}, pos} + return C.CXChildVisit_Continue + } + // assume it's a number (int or float) + value = strings.Replace(value, "'", "", -1) // remove ' chars + value = strings.TrimRight(value, "lu") // remove llu suffixes etc. + // find the first non-number + nonnum := byte(0) + for i := 0; i < len(value); i++ { + if value[i] < '0' || value[i] > '9' { + nonnum = value[i] + break + } + } + // determine number type based on the first non-number + switch nonnum { + case 0: + // no non-number found, must be an integer + p.constants[name] = constantInfo{&ast.BasicLit{pos, token.INT, value}, pos} + case 'x', 'X': + // hex integer constant + // TODO: may also be a floating point number per C++17. + p.constants[name] = constantInfo{&ast.BasicLit{pos, token.INT, value}, pos} + case '.', 'e': + // float constant + value = strings.TrimRight(value, "fFlL") + p.constants[name] = constantInfo{&ast.BasicLit{pos, token.FLOAT, value}, pos} + default: + // unknown type, ignore + } + } + return C.CXChildVisit_Continue +} + +func getString(clangString C.CXString) (s string) { + rawString := C.clang_getCString(clangString) + s = C.GoString(rawString) + C.clang_disposeString(clangString) + return +} + +// getCursorPosition returns a usable token.Pos from a libclang cursor. If the +// file for this cursor has not been seen before, it is read from libclang +// (which already has the file in memory) and added to the token.FileSet. +func (p *cgoPackage) getCursorPosition(cursor C.GoCXCursor) token.Pos { + location := C.tinygo_clang_getCursorLocation(cursor) + var file C.CXFile + var line C.unsigned + var column C.unsigned + var offset C.unsigned + C.clang_getExpansionLocation(location, &file, &line, &column, &offset) + if line == 0 || file == nil { + // Invalid token. + return token.NoPos + } + filename := getString(C.clang_getFileName(file)) + if _, ok := p.tokenFiles[filename]; !ok { + // File has not been seen before in this package, add line information + // now by reading the file from libclang. + tu := C.tinygo_clang_Cursor_getTranslationUnit(cursor) + var size C.size_t + sourcePtr := C.clang_getFileContents(tu, file, &size) + source := ((*[1 << 28]byte)(unsafe.Pointer(sourcePtr)))[:size:size] + lines := []int{0} + for i := 0; i < len(source)-1; i++ { + if source[i] == '\n' { + lines = append(lines, i+1) + } + } + f := p.fset.AddFile(filename, -1, int(size)) + f.SetLines(lines) + p.tokenFiles[filename] = f + } + return p.tokenFiles[filename].Pos(int(offset)) +} + +// makeASTType return the ast.Expr for the given libclang type. In other words, +// it converts a libclang type to a type in the Go AST. +func (p *cgoPackage) makeASTType(typ C.CXType, pos token.Pos) ast.Expr { + var typeName string + switch typ.kind { + case C.CXType_Char_S, C.CXType_Char_U: + typeName = "C.char" + case C.CXType_SChar: + typeName = "C.schar" + case C.CXType_UChar: + typeName = "C.uchar" + case C.CXType_Short: + typeName = "C.short" + case C.CXType_UShort: + typeName = "C.ushort" + case C.CXType_Int: + typeName = "C.int" + case C.CXType_UInt: + typeName = "C.uint" + case C.CXType_Long: + typeName = "C.long" + case C.CXType_ULong: + typeName = "C.ulong" + case C.CXType_LongLong: + typeName = "C.longlong" + case C.CXType_ULongLong: + typeName = "C.ulonglong" + case C.CXType_Bool: + typeName = "bool" + case C.CXType_Float, C.CXType_Double, C.CXType_LongDouble: + switch C.clang_Type_getSizeOf(typ) { + case 4: + typeName = "float32" + case 8: + typeName = "float64" + default: + // Don't do anything, rely on the fallback code to show a somewhat + // sensible error message like "undeclared name: C.long double". + } + case C.CXType_Complex: + switch C.clang_Type_getSizeOf(typ) { + case 8: + typeName = "complex64" + case 16: + typeName = "complex128" + } + case C.CXType_Pointer: + pointeeType := C.clang_getPointeeType(typ) + if pointeeType.kind == C.CXType_Void { + // void* type is translated to Go as unsafe.Pointer + return &ast.SelectorExpr{ + X: &ast.Ident{ + NamePos: pos, + Name: "unsafe", + }, + Sel: &ast.Ident{ + NamePos: pos, + Name: "Pointer", + }, + } + } + return &ast.StarExpr{ + Star: pos, + X: p.makeASTType(pointeeType, pos), + } + case C.CXType_ConstantArray: + return &ast.ArrayType{ + Lbrack: pos, + Len: &ast.BasicLit{ + ValuePos: pos, + Kind: token.INT, + Value: strconv.FormatInt(int64(C.clang_getArraySize(typ)), 10), + }, + Elt: p.makeASTType(C.clang_getElementType(typ), pos), + } + case C.CXType_FunctionProto: + // Be compatible with gc, which uses the *[0]byte type for function + // pointer types. + // Return type [0]byte because this is a function type, not a pointer to + // this function type. + return &ast.ArrayType{ + Lbrack: pos, + Len: &ast.BasicLit{ + ValuePos: pos, + Kind: token.INT, + Value: "0", + }, + Elt: &ast.Ident{ + NamePos: pos, + Name: "byte", + }, + } + case C.CXType_Typedef: + name := getString(C.clang_getTypedefName(typ)) + if _, ok := p.typedefs[name]; !ok { + p.typedefs[name] = nil // don't recurse + c := C.tinygo_clang_getTypeDeclaration(typ) + underlyingType := C.tinygo_clang_getTypedefDeclUnderlyingType(c) + expr := p.makeASTType(underlyingType, pos) + if strings.HasPrefix(name, "_Cgo_") { + expr := expr.(*ast.Ident) + typeSize := C.clang_Type_getSizeOf(underlyingType) + switch expr.Name { + case "C.char": + if typeSize != 1 { + // This happens for some very special purpose architectures + // (DSPs etc.) that are not currently targeted. + // https://www.embecosm.com/2017/04/18/non-8-bit-char-support-in-clang-and-llvm/ + panic("unknown char width") + } + switch underlyingType.kind { + case C.CXType_Char_S: + expr.Name = "int8" + case C.CXType_Char_U: + expr.Name = "uint8" + } + case "C.schar", "C.short", "C.int", "C.long", "C.longlong": + switch typeSize { + case 1: + expr.Name = "int8" + case 2: + expr.Name = "int16" + case 4: + expr.Name = "int32" + case 8: + expr.Name = "int64" + } + case "C.uchar", "C.ushort", "C.uint", "C.ulong", "C.ulonglong": + switch typeSize { + case 1: + expr.Name = "uint8" + case 2: + expr.Name = "uint16" + case 4: + expr.Name = "uint32" + case 8: + expr.Name = "uint64" + } + } + } + p.typedefs[name] = &typedefInfo{ + typeExpr: expr, + pos: pos, + } + } + return &ast.Ident{ + NamePos: pos, + Name: "C." + name, + } + case C.CXType_Elaborated: + underlying := C.clang_Type_getNamedType(typ) + switch underlying.kind { + case C.CXType_Record: + return p.makeASTType(underlying, pos) + default: + panic("unknown elaborated type") + } + case C.CXType_Record: + cursor := C.tinygo_clang_getTypeDeclaration(typ) + name := getString(C.tinygo_clang_getCursorSpelling(cursor)) + var cgoName string + switch C.tinygo_clang_getCursorKind(cursor) { + case C.CXCursor_StructDecl: + cgoName = "struct_" + name + case C.CXCursor_UnionDecl: + cgoName = "union_" + name + default: + panic("unknown record declaration") + } + if _, ok := p.elaboratedTypes[cgoName]; !ok { + p.elaboratedTypes[cgoName] = nil // predeclare (to avoid endless recursion) + fieldList := &ast.FieldList{ + Opening: pos, + Closing: pos, + } + ref := storedRefs.Put(struct { + fieldList *ast.FieldList + pkg *cgoPackage + }{fieldList, p}) + defer storedRefs.Remove(ref) + C.tinygo_clang_visitChildren(cursor, C.CXCursorVisitor(C.tinygo_clang_struct_visitor), C.CXClientData(ref)) + switch C.tinygo_clang_getCursorKind(cursor) { + case C.CXCursor_StructDecl: + p.elaboratedTypes[cgoName] = &elaboratedTypeInfo{ + typeExpr: &ast.StructType{ + Struct: pos, + Fields: fieldList, + }, + pos: pos, + } + case C.CXCursor_UnionDecl: + if len(fieldList.List) > 1 { + // Insert a special field at the front (of zero width) as a + // marker that this is struct is actually a union. This is done + // by giving the field a name that cannot be expressed directly + // in Go. + // Other parts of the compiler look at the first element in a + // struct (of size > 2) to know whether this is a union. + // Note that we don't have to insert it for single-element + // unions as they're basically equivalent to a struct. + unionMarker := &ast.Field{ + Type: &ast.StructType{ + Struct: pos, + }, + } + unionMarker.Names = []*ast.Ident{ + &ast.Ident{ + NamePos: pos, + Name: "C union", + Obj: &ast.Object{ + Kind: ast.Var, + Name: "C union", + Decl: unionMarker, + }, + }, + } + fieldList.List = append([]*ast.Field{unionMarker}, fieldList.List...) + } + p.elaboratedTypes[cgoName] = &elaboratedTypeInfo{ + typeExpr: &ast.StructType{ + Struct: pos, + Fields: fieldList, + }, + pos: pos, + } + default: + panic("unreachable") + } + } + return &ast.Ident{ + NamePos: pos, + Name: "C." + cgoName, + } + } + if typeName == "" { + // Fallback, probably incorrect but at least the error points to an odd + // type name. + typeName = "C." + getString(C.clang_getTypeSpelling(typ)) + } + return &ast.Ident{ + NamePos: pos, + Name: typeName, + } +} + +//export tinygo_clang_struct_visitor +func tinygo_clang_struct_visitor(c, parent C.GoCXCursor, client_data C.CXClientData) C.int { + passed := storedRefs.Get(unsafe.Pointer(client_data)).(struct { + fieldList *ast.FieldList + pkg *cgoPackage + }) + fieldList := passed.fieldList + p := passed.pkg + if C.tinygo_clang_getCursorKind(c) != C.CXCursor_FieldDecl { + panic("expected field inside cursor") + } + name := getString(C.tinygo_clang_getCursorSpelling(c)) + typ := C.tinygo_clang_getCursorType(c) + field := &ast.Field{ + Type: p.makeASTType(typ, p.getCursorPosition(c)), + } + field.Names = []*ast.Ident{ + &ast.Ident{ + NamePos: p.getCursorPosition(c), + Name: name, + Obj: &ast.Object{ + Kind: ast.Var, + Name: name, + Decl: field, + }, + }, + } + fieldList.List = append(fieldList.List, field) + return C.CXChildVisit_Continue +} diff --git a/cgo/libclang_config.go b/cgo/libclang_config.go new file mode 100644 index 000000000..96b4f4bcf --- /dev/null +++ b/cgo/libclang_config.go @@ -0,0 +1,11 @@ +// +build !byollvm + +package cgo + +/* +#cgo linux CFLAGS: -I/usr/lib/llvm-8/include +#cgo darwin CFLAGS: -I/usr/local/opt/llvm/include +#cgo linux LDFLAGS: -L/usr/lib/llvm-8/lib -lclang +#cgo darwin LDFLAGS: -L/usr/local/opt/llvm/lib -lclang -lffi +*/ +import "C" diff --git a/cgo/libclang_stubs.c b/cgo/libclang_stubs.c new file mode 100644 index 000000000..c282540a4 --- /dev/null +++ b/cgo/libclang_stubs.c @@ -0,0 +1,58 @@ + +// This file implements some small trampoline functions. The signatures +// are slightly different from the ones defined in libclang.go, but they +// should be ABI compatible. + +#include <clang-c/Index.h> // if this fails, install libclang-8-dev + +CXCursor tinygo_clang_getTranslationUnitCursor(CXTranslationUnit tu) { + return clang_getTranslationUnitCursor(tu); +} + +unsigned tinygo_clang_visitChildren(CXCursor parent, CXCursorVisitor visitor, CXClientData client_data) { + return clang_visitChildren(parent, visitor, client_data); +} + +CXString tinygo_clang_getCursorSpelling(CXCursor c) { + return clang_getCursorSpelling(c); +} + +enum CXCursorKind tinygo_clang_getCursorKind(CXCursor c) { + return clang_getCursorKind(c); +} + +CXType tinygo_clang_getCursorType(CXCursor c) { + return clang_getCursorType(c); +} + +CXCursor tinygo_clang_getTypeDeclaration(CXType t) { + return clang_getTypeDeclaration(t); +} + +CXType tinygo_clang_getTypedefDeclUnderlyingType(CXCursor c) { + return clang_getTypedefDeclUnderlyingType(c); +} + +CXType tinygo_clang_getCursorResultType(CXCursor c) { + return clang_getCursorResultType(c); +} + +int tinygo_clang_Cursor_getNumArguments(CXCursor c) { + return clang_Cursor_getNumArguments(c); +} + +CXCursor tinygo_clang_Cursor_getArgument(CXCursor c, unsigned i) { + return clang_Cursor_getArgument(c, i); +} + +CXSourceLocation tinygo_clang_getCursorLocation(CXCursor c) { + return clang_getCursorLocation(c); +} + +CXSourceRange tinygo_clang_getCursorExtent(CXCursor c) { + return clang_getCursorExtent(c); +} + +CXTranslationUnit tinygo_clang_Cursor_getTranslationUnit(CXCursor c) { + return clang_Cursor_getTranslationUnit(c); +} diff --git a/cgo/sync.go b/cgo/sync.go new file mode 100644 index 000000000..64eab30be --- /dev/null +++ b/cgo/sync.go @@ -0,0 +1,46 @@ +package cgo + +import ( + "sync" + "unsafe" +) + +// #include <stdlib.h> +import "C" + +// refMap is a convenient way to store opaque references that can be passed to +// C. It is useful if an API uses function pointers and you cannot pass a Go +// pointer but only a C pointer. +type refMap struct { + refs map[unsafe.Pointer]interface{} + lock sync.Mutex +} + +// Put stores a value in the map. It can later be retrieved using Get. It must +// be removed using Remove to avoid memory leaks. +func (m *refMap) Put(v interface{}) unsafe.Pointer { + m.lock.Lock() + defer m.lock.Unlock() + if m.refs == nil { + m.refs = make(map[unsafe.Pointer]interface{}, 1) + } + ref := C.malloc(1) + m.refs[ref] = v + return ref +} + +// Get returns a stored value previously inserted with Put. Use the same +// reference as you got from Put. +func (m *refMap) Get(ref unsafe.Pointer) interface{} { + m.lock.Lock() + defer m.lock.Unlock() + return m.refs[ref] +} + +// Remove deletes a single reference from the map. +func (m *refMap) Remove(ref unsafe.Pointer) { + m.lock.Lock() + defer m.lock.Unlock() + delete(m.refs, ref) + C.free(ref) +} |