path: root/cgo/libclang.go
diff options
Diffstat (limited to 'cgo/libclang.go')
1 files changed, 624 insertions, 0 deletions
diff --git a/cgo/libclang.go b/cgo/libclang.go
new file mode 100644
index 000000000..24dac8c3c
--- /dev/null
+++ b/cgo/libclang.go
@@ -0,0 +1,624 @@
+package cgo
+// This file parses a fragment of C with libclang and stores the result for AST
+// modification. It does not touch the AST itself.
+import (
+ "fmt"
+ "go/ast"
+ "go/scanner"
+ "go/token"
+ "path/filepath"
+ "strconv"
+ "strings"
+ "unsafe"
+#include <clang-c/Index.h> // if this fails, install libclang-8-dev
+#include <stdlib.h>
+#include <stdint.h>
+// This struct should be ABI-compatible on all platforms (uintptr_t has the same
+// alignment etc. as void*) but does not include void* pointers that are not
+// always real pointers.
+// The Go garbage collector assumes that all non-nil pointer-typed integers are
+// actually pointers. This is not always true, as data[1] often contains 0x1,
+// which is clearly not a valid pointer. Usually the GC won't catch this issue,
+// but occasionally it will leading to a crash with a vague error message.
+typedef struct {
+ enum CXCursorKind kind;
+ int xdata;
+ uintptr_t data[3];
+} GoCXCursor;
+// Forwarding functions. They are implemented in libclang_stubs.c and forward to
+// the real functions without doing anything else, thus they are entirely
+// compatible with the versions without tinygo_ prefix. The only difference is
+// the CXCursor type, which has been replaced with GoCXCursor.
+GoCXCursor tinygo_clang_getTranslationUnitCursor(CXTranslationUnit tu);
+unsigned tinygo_clang_visitChildren(GoCXCursor parent, CXCursorVisitor visitor, CXClientData client_data);
+CXString tinygo_clang_getCursorSpelling(GoCXCursor c);
+enum CXCursorKind tinygo_clang_getCursorKind(GoCXCursor c);
+CXType tinygo_clang_getCursorType(GoCXCursor c);
+GoCXCursor tinygo_clang_getTypeDeclaration(CXType t);
+CXType tinygo_clang_getTypedefDeclUnderlyingType(GoCXCursor c);
+CXType tinygo_clang_getCursorResultType(GoCXCursor c);
+int tinygo_clang_Cursor_getNumArguments(GoCXCursor c);
+GoCXCursor tinygo_clang_Cursor_getArgument(GoCXCursor c, unsigned i);
+CXSourceLocation tinygo_clang_getCursorLocation(GoCXCursor c);
+CXSourceRange tinygo_clang_getCursorExtent(GoCXCursor c);
+CXTranslationUnit tinygo_clang_Cursor_getTranslationUnit(GoCXCursor c);
+int tinygo_clang_globals_visitor(GoCXCursor c, GoCXCursor parent, CXClientData client_data);
+int tinygo_clang_struct_visitor(GoCXCursor c, GoCXCursor parent, CXClientData client_data);
+import "C"
+// storedRefs stores references to types, used for clang_visitChildren.
+var storedRefs refMap
+var diagnosticSeverity = [...]string{
+ C.CXDiagnostic_Ignored: "ignored",
+ C.CXDiagnostic_Note: "note",
+ C.CXDiagnostic_Warning: "warning",
+ C.CXDiagnostic_Error: "error",
+ C.CXDiagnostic_Fatal: "fatal",
+func (p *cgoPackage) parseFragment(fragment string, cflags []string, posFilename string, posLine int) {
+ index := C.clang_createIndex(0, 0)
+ defer C.clang_disposeIndex(index)
+ // pretend to be a .c file
+ filenameC := C.CString(posFilename + "!cgo.c")
+ defer C.free(unsafe.Pointer(filenameC))
+ // fix up error locations
+ fragment = fmt.Sprintf("# %d %#v\n", posLine+1, posFilename) + fragment
+ fragmentC := C.CString(fragment)
+ defer C.free(unsafe.Pointer(fragmentC))
+ unsavedFile := C.struct_CXUnsavedFile{
+ Filename: filenameC,
+ Length: C.ulong(len(fragment)),
+ Contents: fragmentC,
+ }
+ // convert Go slice of strings to C array of strings.
+ cmdargsC := C.malloc(C.size_t(len(cflags)) * C.size_t(unsafe.Sizeof(uintptr(0))))
+ defer C.free(cmdargsC)
+ cmdargs := (*[1 << 16]*C.char)(cmdargsC)
+ for i, cflag := range cflags {
+ s := C.CString(cflag)
+ cmdargs[i] = s
+ defer C.free(unsafe.Pointer(s))
+ }
+ var unit C.CXTranslationUnit
+ errCode := C.clang_parseTranslationUnit2(
+ index,
+ filenameC,
+ (**C.char)(cmdargsC), C.int(len(cflags)), // command line args
+ &unsavedFile, 1, // unsaved files
+ C.CXTranslationUnit_DetailedPreprocessingRecord,
+ &unit)
+ if errCode != 0 {
+ panic("loader: failed to parse source with libclang")
+ }
+ defer C.clang_disposeTranslationUnit(unit)
+ if numDiagnostics := int(C.clang_getNumDiagnostics(unit)); numDiagnostics != 0 {
+ addDiagnostic := func(diagnostic C.CXDiagnostic) {
+ spelling := getString(C.clang_getDiagnosticSpelling(diagnostic))
+ severity := diagnosticSeverity[C.clang_getDiagnosticSeverity(diagnostic)]
+ location := C.clang_getDiagnosticLocation(diagnostic)
+ var libclangFilename C.CXString
+ var line C.unsigned
+ var column C.unsigned
+ C.clang_getPresumedLocation(location, &libclangFilename, &line, &column)
+ filename := getString(libclangFilename)
+ if filepath.IsAbs(filename) {
+ // Relative paths for readability, like other Go parser errors.
+ relpath, err := filepath.Rel(p.dir, filename)
+ if err == nil {
+ filename = relpath
+ }
+ }
+ p.errors = append(p.errors, &scanner.Error{
+ Pos: token.Position{
+ Filename: filename,
+ Offset: 0, // not provided by clang_getPresumedLocation
+ Line: int(line),
+ Column: int(column),
+ },
+ Msg: severity + ": " + spelling,
+ })
+ }
+ for i := 0; i < numDiagnostics; i++ {
+ diagnostic := C.clang_getDiagnostic(unit, C.uint(i))
+ addDiagnostic(diagnostic)
+ // Child diagnostics (like notes on redefinitions).
+ diagnostics := C.clang_getChildDiagnostics(diagnostic)
+ for j := 0; j < int(C.clang_getNumDiagnosticsInSet(diagnostics)); j++ {
+ addDiagnostic(C.clang_getDiagnosticInSet(diagnostics, C.uint(j)))
+ }
+ }
+ return
+ }
+ ref := storedRefs.Put(p)
+ defer storedRefs.Remove(ref)
+ cursor := C.tinygo_clang_getTranslationUnitCursor(unit)
+ C.tinygo_clang_visitChildren(cursor, C.CXCursorVisitor(C.tinygo_clang_globals_visitor), C.CXClientData(ref))
+//export tinygo_clang_globals_visitor
+func tinygo_clang_globals_visitor(c, parent C.GoCXCursor, client_data C.CXClientData) C.int {
+ p := storedRefs.Get(unsafe.Pointer(client_data)).(*cgoPackage)
+ kind := C.tinygo_clang_getCursorKind(c)
+ pos := p.getCursorPosition(c)
+ switch kind {
+ case C.CXCursor_FunctionDecl:
+ name := getString(C.tinygo_clang_getCursorSpelling(c))
+ if _, required := p.missingSymbols[name]; !required {
+ return C.CXChildVisit_Continue
+ }
+ cursorType := C.tinygo_clang_getCursorType(c)
+ if C.clang_isFunctionTypeVariadic(cursorType) != 0 {
+ return C.CXChildVisit_Continue // not supported
+ }
+ numArgs := int(C.tinygo_clang_Cursor_getNumArguments(c))
+ fn := &functionInfo{
+ pos: pos,
+ }
+ p.functions[name] = fn
+ for i := 0; i < numArgs; i++ {
+ arg := C.tinygo_clang_Cursor_getArgument(c, C.uint(i))
+ argName := getString(C.tinygo_clang_getCursorSpelling(arg))
+ argType := C.clang_getArgType(cursorType, C.uint(i))
+ if argName == "" {
+ argName = "$" + strconv.Itoa(i)
+ }
+ fn.args = append(fn.args, paramInfo{
+ name: argName,
+ typeExpr: p.makeASTType(argType, pos),
+ })
+ }
+ resultType := C.tinygo_clang_getCursorResultType(c)
+ if resultType.kind != C.CXType_Void {
+ fn.results = &ast.FieldList{
+ List: []*ast.Field{
+ &ast.Field{
+ Type: p.makeASTType(resultType, pos),
+ },
+ },
+ }
+ }
+ case C.CXCursor_StructDecl:
+ typ := C.tinygo_clang_getCursorType(c)
+ name := getString(C.tinygo_clang_getCursorSpelling(c))
+ if _, required := p.missingSymbols["struct_"+name]; !required {
+ return C.CXChildVisit_Continue
+ }
+ p.makeASTType(typ, pos)
+ case C.CXCursor_TypedefDecl:
+ typedefType := C.tinygo_clang_getCursorType(c)
+ name := getString(C.clang_getTypedefName(typedefType))
+ if _, required := p.missingSymbols[name]; !required {
+ return C.CXChildVisit_Continue
+ }
+ p.makeASTType(typedefType, pos)
+ case C.CXCursor_VarDecl:
+ name := getString(C.tinygo_clang_getCursorSpelling(c))
+ if _, required := p.missingSymbols[name]; !required {
+ return C.CXChildVisit_Continue
+ }
+ cursorType := C.tinygo_clang_getCursorType(c)
+ p.globals[name] = globalInfo{
+ typeExpr: p.makeASTType(cursorType, pos),
+ pos: pos,
+ }
+ case C.CXCursor_MacroDefinition:
+ name := getString(C.tinygo_clang_getCursorSpelling(c))
+ if _, required := p.missingSymbols[name]; !required {
+ return C.CXChildVisit_Continue
+ }
+ sourceRange := C.tinygo_clang_getCursorExtent(c)
+ start := C.clang_getRangeStart(sourceRange)
+ end := C.clang_getRangeEnd(sourceRange)
+ var file, endFile C.CXFile
+ var startOffset, endOffset C.unsigned
+ C.clang_getExpansionLocation(start, &file, nil, nil, &startOffset)
+ if file == nil {
+ panic("could not find file where macro is defined")
+ }
+ C.clang_getExpansionLocation(end, &endFile, nil, nil, &endOffset)
+ if file != endFile {
+ panic("expected start and end location of a #define to be in the same file")
+ }
+ if startOffset > endOffset {
+ panic("startOffset > endOffset")
+ }
+ // read file contents and extract the relevant byte range
+ tu := C.tinygo_clang_Cursor_getTranslationUnit(c)
+ var size C.size_t
+ sourcePtr := C.clang_getFileContents(tu, file, &size)
+ if endOffset >= C.uint(size) {
+ panic("endOffset lies after end of file")
+ }
+ source := string(((*[1 << 28]byte)(unsafe.Pointer(sourcePtr)))[startOffset:endOffset:endOffset])
+ if !strings.HasPrefix(source, name) {
+ panic(fmt.Sprintf("expected #define value to start with %#v, got %#v", name, source))
+ }
+ value := strings.TrimSpace(source[len(name):])
+ for len(value) != 0 && value[0] == '(' && value[len(value)-1] == ')' {
+ value = strings.TrimSpace(value[1 : len(value)-1])
+ }
+ if len(value) == 0 {
+ // Pretend it doesn't exist at all.
+ return C.CXChildVisit_Continue
+ }
+ // For information about integer literals:
+ // https://en.cppreference.com/w/cpp/language/integer_literal
+ if value[0] == '"' {
+ // string constant
+ p.constants[name] = constantInfo{&ast.BasicLit{pos, token.STRING, value}, pos}
+ return C.CXChildVisit_Continue
+ }
+ if value[0] == '\'' {
+ // char constant
+ p.constants[name] = constantInfo{&ast.BasicLit{pos, token.CHAR, value}, pos}
+ return C.CXChildVisit_Continue
+ }
+ // assume it's a number (int or float)
+ value = strings.Replace(value, "'", "", -1) // remove ' chars
+ value = strings.TrimRight(value, "lu") // remove llu suffixes etc.
+ // find the first non-number
+ nonnum := byte(0)
+ for i := 0; i < len(value); i++ {
+ if value[i] < '0' || value[i] > '9' {
+ nonnum = value[i]
+ break
+ }
+ }
+ // determine number type based on the first non-number
+ switch nonnum {
+ case 0:
+ // no non-number found, must be an integer
+ p.constants[name] = constantInfo{&ast.BasicLit{pos, token.INT, value}, pos}
+ case 'x', 'X':
+ // hex integer constant
+ // TODO: may also be a floating point number per C++17.
+ p.constants[name] = constantInfo{&ast.BasicLit{pos, token.INT, value}, pos}
+ case '.', 'e':
+ // float constant
+ value = strings.TrimRight(value, "fFlL")
+ p.constants[name] = constantInfo{&ast.BasicLit{pos, token.FLOAT, value}, pos}
+ default:
+ // unknown type, ignore
+ }
+ }
+ return C.CXChildVisit_Continue
+func getString(clangString C.CXString) (s string) {
+ rawString := C.clang_getCString(clangString)
+ s = C.GoString(rawString)
+ C.clang_disposeString(clangString)
+ return
+// getCursorPosition returns a usable token.Pos from a libclang cursor. If the
+// file for this cursor has not been seen before, it is read from libclang
+// (which already has the file in memory) and added to the token.FileSet.
+func (p *cgoPackage) getCursorPosition(cursor C.GoCXCursor) token.Pos {
+ location := C.tinygo_clang_getCursorLocation(cursor)
+ var file C.CXFile
+ var line C.unsigned
+ var column C.unsigned
+ var offset C.unsigned
+ C.clang_getExpansionLocation(location, &file, &line, &column, &offset)
+ if line == 0 || file == nil {
+ // Invalid token.
+ return token.NoPos
+ }
+ filename := getString(C.clang_getFileName(file))
+ if _, ok := p.tokenFiles[filename]; !ok {
+ // File has not been seen before in this package, add line information
+ // now by reading the file from libclang.
+ tu := C.tinygo_clang_Cursor_getTranslationUnit(cursor)
+ var size C.size_t
+ sourcePtr := C.clang_getFileContents(tu, file, &size)
+ source := ((*[1 << 28]byte)(unsafe.Pointer(sourcePtr)))[:size:size]
+ lines := []int{0}
+ for i := 0; i < len(source)-1; i++ {
+ if source[i] == '\n' {
+ lines = append(lines, i+1)
+ }
+ }
+ f := p.fset.AddFile(filename, -1, int(size))
+ f.SetLines(lines)
+ p.tokenFiles[filename] = f
+ }
+ return p.tokenFiles[filename].Pos(int(offset))
+// makeASTType return the ast.Expr for the given libclang type. In other words,
+// it converts a libclang type to a type in the Go AST.
+func (p *cgoPackage) makeASTType(typ C.CXType, pos token.Pos) ast.Expr {
+ var typeName string
+ switch typ.kind {
+ case C.CXType_Char_S, C.CXType_Char_U:
+ typeName = "C.char"
+ case C.CXType_SChar:
+ typeName = "C.schar"
+ case C.CXType_UChar:
+ typeName = "C.uchar"
+ case C.CXType_Short:
+ typeName = "C.short"
+ case C.CXType_UShort:
+ typeName = "C.ushort"
+ case C.CXType_Int:
+ typeName = "C.int"
+ case C.CXType_UInt:
+ typeName = "C.uint"
+ case C.CXType_Long:
+ typeName = "C.long"
+ case C.CXType_ULong:
+ typeName = "C.ulong"
+ case C.CXType_LongLong:
+ typeName = "C.longlong"
+ case C.CXType_ULongLong:
+ typeName = "C.ulonglong"
+ case C.CXType_Bool:
+ typeName = "bool"
+ case C.CXType_Float, C.CXType_Double, C.CXType_LongDouble:
+ switch C.clang_Type_getSizeOf(typ) {
+ case 4:
+ typeName = "float32"
+ case 8:
+ typeName = "float64"
+ default:
+ // Don't do anything, rely on the fallback code to show a somewhat
+ // sensible error message like "undeclared name: C.long double".
+ }
+ case C.CXType_Complex:
+ switch C.clang_Type_getSizeOf(typ) {
+ case 8:
+ typeName = "complex64"
+ case 16:
+ typeName = "complex128"
+ }
+ case C.CXType_Pointer:
+ pointeeType := C.clang_getPointeeType(typ)
+ if pointeeType.kind == C.CXType_Void {
+ // void* type is translated to Go as unsafe.Pointer
+ return &ast.SelectorExpr{
+ X: &ast.Ident{
+ NamePos: pos,
+ Name: "unsafe",
+ },
+ Sel: &ast.Ident{
+ NamePos: pos,
+ Name: "Pointer",
+ },
+ }
+ }
+ return &ast.StarExpr{
+ Star: pos,
+ X: p.makeASTType(pointeeType, pos),
+ }
+ case C.CXType_ConstantArray:
+ return &ast.ArrayType{
+ Lbrack: pos,
+ Len: &ast.BasicLit{
+ ValuePos: pos,
+ Kind: token.INT,
+ Value: strconv.FormatInt(int64(C.clang_getArraySize(typ)), 10),
+ },
+ Elt: p.makeASTType(C.clang_getElementType(typ), pos),
+ }
+ case C.CXType_FunctionProto:
+ // Be compatible with gc, which uses the *[0]byte type for function
+ // pointer types.
+ // Return type [0]byte because this is a function type, not a pointer to
+ // this function type.
+ return &ast.ArrayType{
+ Lbrack: pos,
+ Len: &ast.BasicLit{
+ ValuePos: pos,
+ Kind: token.INT,
+ Value: "0",
+ },
+ Elt: &ast.Ident{
+ NamePos: pos,
+ Name: "byte",
+ },
+ }
+ case C.CXType_Typedef:
+ name := getString(C.clang_getTypedefName(typ))
+ if _, ok := p.typedefs[name]; !ok {
+ p.typedefs[name] = nil // don't recurse
+ c := C.tinygo_clang_getTypeDeclaration(typ)
+ underlyingType := C.tinygo_clang_getTypedefDeclUnderlyingType(c)
+ expr := p.makeASTType(underlyingType, pos)
+ if strings.HasPrefix(name, "_Cgo_") {
+ expr := expr.(*ast.Ident)
+ typeSize := C.clang_Type_getSizeOf(underlyingType)
+ switch expr.Name {
+ case "C.char":
+ if typeSize != 1 {
+ // This happens for some very special purpose architectures
+ // (DSPs etc.) that are not currently targeted.
+ // https://www.embecosm.com/2017/04/18/non-8-bit-char-support-in-clang-and-llvm/
+ panic("unknown char width")
+ }
+ switch underlyingType.kind {
+ case C.CXType_Char_S:
+ expr.Name = "int8"
+ case C.CXType_Char_U:
+ expr.Name = "uint8"
+ }
+ case "C.schar", "C.short", "C.int", "C.long", "C.longlong":
+ switch typeSize {
+ case 1:
+ expr.Name = "int8"
+ case 2:
+ expr.Name = "int16"
+ case 4:
+ expr.Name = "int32"
+ case 8:
+ expr.Name = "int64"
+ }
+ case "C.uchar", "C.ushort", "C.uint", "C.ulong", "C.ulonglong":
+ switch typeSize {
+ case 1:
+ expr.Name = "uint8"
+ case 2:
+ expr.Name = "uint16"
+ case 4:
+ expr.Name = "uint32"
+ case 8:
+ expr.Name = "uint64"
+ }
+ }
+ }
+ p.typedefs[name] = &typedefInfo{
+ typeExpr: expr,
+ pos: pos,
+ }
+ }
+ return &ast.Ident{
+ NamePos: pos,
+ Name: "C." + name,
+ }
+ case C.CXType_Elaborated:
+ underlying := C.clang_Type_getNamedType(typ)
+ switch underlying.kind {
+ case C.CXType_Record:
+ return p.makeASTType(underlying, pos)
+ default:
+ panic("unknown elaborated type")
+ }
+ case C.CXType_Record:
+ cursor := C.tinygo_clang_getTypeDeclaration(typ)
+ name := getString(C.tinygo_clang_getCursorSpelling(cursor))
+ var cgoName string
+ switch C.tinygo_clang_getCursorKind(cursor) {
+ case C.CXCursor_StructDecl:
+ cgoName = "struct_" + name
+ case C.CXCursor_UnionDecl:
+ cgoName = "union_" + name
+ default:
+ panic("unknown record declaration")
+ }
+ if _, ok := p.elaboratedTypes[cgoName]; !ok {
+ p.elaboratedTypes[cgoName] = nil // predeclare (to avoid endless recursion)
+ fieldList := &ast.FieldList{
+ Opening: pos,
+ Closing: pos,
+ }
+ ref := storedRefs.Put(struct {
+ fieldList *ast.FieldList
+ pkg *cgoPackage
+ }{fieldList, p})
+ defer storedRefs.Remove(ref)
+ C.tinygo_clang_visitChildren(cursor, C.CXCursorVisitor(C.tinygo_clang_struct_visitor), C.CXClientData(ref))
+ switch C.tinygo_clang_getCursorKind(cursor) {
+ case C.CXCursor_StructDecl:
+ p.elaboratedTypes[cgoName] = &elaboratedTypeInfo{
+ typeExpr: &ast.StructType{
+ Struct: pos,
+ Fields: fieldList,
+ },
+ pos: pos,
+ }
+ case C.CXCursor_UnionDecl:
+ if len(fieldList.List) > 1 {
+ // Insert a special field at the front (of zero width) as a
+ // marker that this is struct is actually a union. This is done
+ // by giving the field a name that cannot be expressed directly
+ // in Go.
+ // Other parts of the compiler look at the first element in a
+ // struct (of size > 2) to know whether this is a union.
+ // Note that we don't have to insert it for single-element
+ // unions as they're basically equivalent to a struct.
+ unionMarker := &ast.Field{
+ Type: &ast.StructType{
+ Struct: pos,
+ },
+ }
+ unionMarker.Names = []*ast.Ident{
+ &ast.Ident{
+ NamePos: pos,
+ Name: "C union",
+ Obj: &ast.Object{
+ Kind: ast.Var,
+ Name: "C union",
+ Decl: unionMarker,
+ },
+ },
+ }
+ fieldList.List = append([]*ast.Field{unionMarker}, fieldList.List...)
+ }
+ p.elaboratedTypes[cgoName] = &elaboratedTypeInfo{
+ typeExpr: &ast.StructType{
+ Struct: pos,
+ Fields: fieldList,
+ },
+ pos: pos,
+ }
+ default:
+ panic("unreachable")
+ }
+ }
+ return &ast.Ident{
+ NamePos: pos,
+ Name: "C." + cgoName,
+ }
+ }
+ if typeName == "" {
+ // Fallback, probably incorrect but at least the error points to an odd
+ // type name.
+ typeName = "C." + getString(C.clang_getTypeSpelling(typ))
+ }
+ return &ast.Ident{
+ NamePos: pos,
+ Name: typeName,
+ }
+//export tinygo_clang_struct_visitor
+func tinygo_clang_struct_visitor(c, parent C.GoCXCursor, client_data C.CXClientData) C.int {
+ passed := storedRefs.Get(unsafe.Pointer(client_data)).(struct {
+ fieldList *ast.FieldList
+ pkg *cgoPackage
+ })
+ fieldList := passed.fieldList
+ p := passed.pkg
+ if C.tinygo_clang_getCursorKind(c) != C.CXCursor_FieldDecl {
+ panic("expected field inside cursor")
+ }
+ name := getString(C.tinygo_clang_getCursorSpelling(c))
+ typ := C.tinygo_clang_getCursorType(c)
+ field := &ast.Field{
+ Type: p.makeASTType(typ, p.getCursorPosition(c)),
+ }
+ field.Names = []*ast.Ident{
+ &ast.Ident{
+ NamePos: p.getCursorPosition(c),
+ Name: name,
+ Obj: &ast.Object{
+ Kind: ast.Var,
+ Name: name,
+ Decl: field,
+ },
+ },
+ }
+ fieldList.List = append(fieldList.List, field)
+ return C.CXChildVisit_Continue