diff options
Diffstat (limited to 'cgo/libclang.go')
-rw-r--r-- | cgo/libclang.go | 624 |
1 files changed, 624 insertions, 0 deletions
diff --git a/cgo/libclang.go b/cgo/libclang.go new file mode 100644 index 000000000..24dac8c3c --- /dev/null +++ b/cgo/libclang.go @@ -0,0 +1,624 @@ +package cgo + +// This file parses a fragment of C with libclang and stores the result for AST +// modification. It does not touch the AST itself. + +import ( + "fmt" + "go/ast" + "go/scanner" + "go/token" + "path/filepath" + "strconv" + "strings" + "unsafe" +) + +/* +#include <clang-c/Index.h> // if this fails, install libclang-8-dev +#include <stdlib.h> +#include <stdint.h> + +// This struct should be ABI-compatible on all platforms (uintptr_t has the same +// alignment etc. as void*) but does not include void* pointers that are not +// always real pointers. +// The Go garbage collector assumes that all non-nil pointer-typed integers are +// actually pointers. This is not always true, as data[1] often contains 0x1, +// which is clearly not a valid pointer. Usually the GC won't catch this issue, +// but occasionally it will leading to a crash with a vague error message. +typedef struct { + enum CXCursorKind kind; + int xdata; + uintptr_t data[3]; +} GoCXCursor; + +// Forwarding functions. They are implemented in libclang_stubs.c and forward to +// the real functions without doing anything else, thus they are entirely +// compatible with the versions without tinygo_ prefix. The only difference is +// the CXCursor type, which has been replaced with GoCXCursor. +GoCXCursor tinygo_clang_getTranslationUnitCursor(CXTranslationUnit tu); +unsigned tinygo_clang_visitChildren(GoCXCursor parent, CXCursorVisitor visitor, CXClientData client_data); +CXString tinygo_clang_getCursorSpelling(GoCXCursor c); +enum CXCursorKind tinygo_clang_getCursorKind(GoCXCursor c); +CXType tinygo_clang_getCursorType(GoCXCursor c); +GoCXCursor tinygo_clang_getTypeDeclaration(CXType t); +CXType tinygo_clang_getTypedefDeclUnderlyingType(GoCXCursor c); +CXType tinygo_clang_getCursorResultType(GoCXCursor c); +int tinygo_clang_Cursor_getNumArguments(GoCXCursor c); +GoCXCursor tinygo_clang_Cursor_getArgument(GoCXCursor c, unsigned i); +CXSourceLocation tinygo_clang_getCursorLocation(GoCXCursor c); +CXSourceRange tinygo_clang_getCursorExtent(GoCXCursor c); +CXTranslationUnit tinygo_clang_Cursor_getTranslationUnit(GoCXCursor c); + +int tinygo_clang_globals_visitor(GoCXCursor c, GoCXCursor parent, CXClientData client_data); +int tinygo_clang_struct_visitor(GoCXCursor c, GoCXCursor parent, CXClientData client_data); +*/ +import "C" + +// storedRefs stores references to types, used for clang_visitChildren. +var storedRefs refMap + +var diagnosticSeverity = [...]string{ + C.CXDiagnostic_Ignored: "ignored", + C.CXDiagnostic_Note: "note", + C.CXDiagnostic_Warning: "warning", + C.CXDiagnostic_Error: "error", + C.CXDiagnostic_Fatal: "fatal", +} + +func (p *cgoPackage) parseFragment(fragment string, cflags []string, posFilename string, posLine int) { + index := C.clang_createIndex(0, 0) + defer C.clang_disposeIndex(index) + + // pretend to be a .c file + filenameC := C.CString(posFilename + "!cgo.c") + defer C.free(unsafe.Pointer(filenameC)) + + // fix up error locations + fragment = fmt.Sprintf("# %d %#v\n", posLine+1, posFilename) + fragment + + fragmentC := C.CString(fragment) + defer C.free(unsafe.Pointer(fragmentC)) + + unsavedFile := C.struct_CXUnsavedFile{ + Filename: filenameC, + Length: C.ulong(len(fragment)), + Contents: fragmentC, + } + + // convert Go slice of strings to C array of strings. + cmdargsC := C.malloc(C.size_t(len(cflags)) * C.size_t(unsafe.Sizeof(uintptr(0)))) + defer C.free(cmdargsC) + cmdargs := (*[1 << 16]*C.char)(cmdargsC) + for i, cflag := range cflags { + s := C.CString(cflag) + cmdargs[i] = s + defer C.free(unsafe.Pointer(s)) + } + + var unit C.CXTranslationUnit + errCode := C.clang_parseTranslationUnit2( + index, + filenameC, + (**C.char)(cmdargsC), C.int(len(cflags)), // command line args + &unsavedFile, 1, // unsaved files + C.CXTranslationUnit_DetailedPreprocessingRecord, + &unit) + if errCode != 0 { + panic("loader: failed to parse source with libclang") + } + defer C.clang_disposeTranslationUnit(unit) + + if numDiagnostics := int(C.clang_getNumDiagnostics(unit)); numDiagnostics != 0 { + addDiagnostic := func(diagnostic C.CXDiagnostic) { + spelling := getString(C.clang_getDiagnosticSpelling(diagnostic)) + severity := diagnosticSeverity[C.clang_getDiagnosticSeverity(diagnostic)] + location := C.clang_getDiagnosticLocation(diagnostic) + var libclangFilename C.CXString + var line C.unsigned + var column C.unsigned + C.clang_getPresumedLocation(location, &libclangFilename, &line, &column) + filename := getString(libclangFilename) + if filepath.IsAbs(filename) { + // Relative paths for readability, like other Go parser errors. + relpath, err := filepath.Rel(p.dir, filename) + if err == nil { + filename = relpath + } + } + p.errors = append(p.errors, &scanner.Error{ + Pos: token.Position{ + Filename: filename, + Offset: 0, // not provided by clang_getPresumedLocation + Line: int(line), + Column: int(column), + }, + Msg: severity + ": " + spelling, + }) + } + for i := 0; i < numDiagnostics; i++ { + diagnostic := C.clang_getDiagnostic(unit, C.uint(i)) + addDiagnostic(diagnostic) + + // Child diagnostics (like notes on redefinitions). + diagnostics := C.clang_getChildDiagnostics(diagnostic) + for j := 0; j < int(C.clang_getNumDiagnosticsInSet(diagnostics)); j++ { + addDiagnostic(C.clang_getDiagnosticInSet(diagnostics, C.uint(j))) + } + } + return + } + + ref := storedRefs.Put(p) + defer storedRefs.Remove(ref) + cursor := C.tinygo_clang_getTranslationUnitCursor(unit) + C.tinygo_clang_visitChildren(cursor, C.CXCursorVisitor(C.tinygo_clang_globals_visitor), C.CXClientData(ref)) +} + +//export tinygo_clang_globals_visitor +func tinygo_clang_globals_visitor(c, parent C.GoCXCursor, client_data C.CXClientData) C.int { + p := storedRefs.Get(unsafe.Pointer(client_data)).(*cgoPackage) + kind := C.tinygo_clang_getCursorKind(c) + pos := p.getCursorPosition(c) + switch kind { + case C.CXCursor_FunctionDecl: + name := getString(C.tinygo_clang_getCursorSpelling(c)) + if _, required := p.missingSymbols[name]; !required { + return C.CXChildVisit_Continue + } + cursorType := C.tinygo_clang_getCursorType(c) + if C.clang_isFunctionTypeVariadic(cursorType) != 0 { + return C.CXChildVisit_Continue // not supported + } + numArgs := int(C.tinygo_clang_Cursor_getNumArguments(c)) + fn := &functionInfo{ + pos: pos, + } + p.functions[name] = fn + for i := 0; i < numArgs; i++ { + arg := C.tinygo_clang_Cursor_getArgument(c, C.uint(i)) + argName := getString(C.tinygo_clang_getCursorSpelling(arg)) + argType := C.clang_getArgType(cursorType, C.uint(i)) + if argName == "" { + argName = "$" + strconv.Itoa(i) + } + fn.args = append(fn.args, paramInfo{ + name: argName, + typeExpr: p.makeASTType(argType, pos), + }) + } + resultType := C.tinygo_clang_getCursorResultType(c) + if resultType.kind != C.CXType_Void { + fn.results = &ast.FieldList{ + List: []*ast.Field{ + &ast.Field{ + Type: p.makeASTType(resultType, pos), + }, + }, + } + } + case C.CXCursor_StructDecl: + typ := C.tinygo_clang_getCursorType(c) + name := getString(C.tinygo_clang_getCursorSpelling(c)) + if _, required := p.missingSymbols["struct_"+name]; !required { + return C.CXChildVisit_Continue + } + p.makeASTType(typ, pos) + case C.CXCursor_TypedefDecl: + typedefType := C.tinygo_clang_getCursorType(c) + name := getString(C.clang_getTypedefName(typedefType)) + if _, required := p.missingSymbols[name]; !required { + return C.CXChildVisit_Continue + } + p.makeASTType(typedefType, pos) + case C.CXCursor_VarDecl: + name := getString(C.tinygo_clang_getCursorSpelling(c)) + if _, required := p.missingSymbols[name]; !required { + return C.CXChildVisit_Continue + } + cursorType := C.tinygo_clang_getCursorType(c) + p.globals[name] = globalInfo{ + typeExpr: p.makeASTType(cursorType, pos), + pos: pos, + } + case C.CXCursor_MacroDefinition: + name := getString(C.tinygo_clang_getCursorSpelling(c)) + if _, required := p.missingSymbols[name]; !required { + return C.CXChildVisit_Continue + } + sourceRange := C.tinygo_clang_getCursorExtent(c) + start := C.clang_getRangeStart(sourceRange) + end := C.clang_getRangeEnd(sourceRange) + var file, endFile C.CXFile + var startOffset, endOffset C.unsigned + C.clang_getExpansionLocation(start, &file, nil, nil, &startOffset) + if file == nil { + panic("could not find file where macro is defined") + } + C.clang_getExpansionLocation(end, &endFile, nil, nil, &endOffset) + if file != endFile { + panic("expected start and end location of a #define to be in the same file") + } + if startOffset > endOffset { + panic("startOffset > endOffset") + } + + // read file contents and extract the relevant byte range + tu := C.tinygo_clang_Cursor_getTranslationUnit(c) + var size C.size_t + sourcePtr := C.clang_getFileContents(tu, file, &size) + if endOffset >= C.uint(size) { + panic("endOffset lies after end of file") + } + source := string(((*[1 << 28]byte)(unsafe.Pointer(sourcePtr)))[startOffset:endOffset:endOffset]) + if !strings.HasPrefix(source, name) { + panic(fmt.Sprintf("expected #define value to start with %#v, got %#v", name, source)) + } + value := strings.TrimSpace(source[len(name):]) + for len(value) != 0 && value[0] == '(' && value[len(value)-1] == ')' { + value = strings.TrimSpace(value[1 : len(value)-1]) + } + if len(value) == 0 { + // Pretend it doesn't exist at all. + return C.CXChildVisit_Continue + } + // For information about integer literals: + // https://en.cppreference.com/w/cpp/language/integer_literal + if value[0] == '"' { + // string constant + p.constants[name] = constantInfo{&ast.BasicLit{pos, token.STRING, value}, pos} + return C.CXChildVisit_Continue + } + if value[0] == '\'' { + // char constant + p.constants[name] = constantInfo{&ast.BasicLit{pos, token.CHAR, value}, pos} + return C.CXChildVisit_Continue + } + // assume it's a number (int or float) + value = strings.Replace(value, "'", "", -1) // remove ' chars + value = strings.TrimRight(value, "lu") // remove llu suffixes etc. + // find the first non-number + nonnum := byte(0) + for i := 0; i < len(value); i++ { + if value[i] < '0' || value[i] > '9' { + nonnum = value[i] + break + } + } + // determine number type based on the first non-number + switch nonnum { + case 0: + // no non-number found, must be an integer + p.constants[name] = constantInfo{&ast.BasicLit{pos, token.INT, value}, pos} + case 'x', 'X': + // hex integer constant + // TODO: may also be a floating point number per C++17. + p.constants[name] = constantInfo{&ast.BasicLit{pos, token.INT, value}, pos} + case '.', 'e': + // float constant + value = strings.TrimRight(value, "fFlL") + p.constants[name] = constantInfo{&ast.BasicLit{pos, token.FLOAT, value}, pos} + default: + // unknown type, ignore + } + } + return C.CXChildVisit_Continue +} + +func getString(clangString C.CXString) (s string) { + rawString := C.clang_getCString(clangString) + s = C.GoString(rawString) + C.clang_disposeString(clangString) + return +} + +// getCursorPosition returns a usable token.Pos from a libclang cursor. If the +// file for this cursor has not been seen before, it is read from libclang +// (which already has the file in memory) and added to the token.FileSet. +func (p *cgoPackage) getCursorPosition(cursor C.GoCXCursor) token.Pos { + location := C.tinygo_clang_getCursorLocation(cursor) + var file C.CXFile + var line C.unsigned + var column C.unsigned + var offset C.unsigned + C.clang_getExpansionLocation(location, &file, &line, &column, &offset) + if line == 0 || file == nil { + // Invalid token. + return token.NoPos + } + filename := getString(C.clang_getFileName(file)) + if _, ok := p.tokenFiles[filename]; !ok { + // File has not been seen before in this package, add line information + // now by reading the file from libclang. + tu := C.tinygo_clang_Cursor_getTranslationUnit(cursor) + var size C.size_t + sourcePtr := C.clang_getFileContents(tu, file, &size) + source := ((*[1 << 28]byte)(unsafe.Pointer(sourcePtr)))[:size:size] + lines := []int{0} + for i := 0; i < len(source)-1; i++ { + if source[i] == '\n' { + lines = append(lines, i+1) + } + } + f := p.fset.AddFile(filename, -1, int(size)) + f.SetLines(lines) + p.tokenFiles[filename] = f + } + return p.tokenFiles[filename].Pos(int(offset)) +} + +// makeASTType return the ast.Expr for the given libclang type. In other words, +// it converts a libclang type to a type in the Go AST. +func (p *cgoPackage) makeASTType(typ C.CXType, pos token.Pos) ast.Expr { + var typeName string + switch typ.kind { + case C.CXType_Char_S, C.CXType_Char_U: + typeName = "C.char" + case C.CXType_SChar: + typeName = "C.schar" + case C.CXType_UChar: + typeName = "C.uchar" + case C.CXType_Short: + typeName = "C.short" + case C.CXType_UShort: + typeName = "C.ushort" + case C.CXType_Int: + typeName = "C.int" + case C.CXType_UInt: + typeName = "C.uint" + case C.CXType_Long: + typeName = "C.long" + case C.CXType_ULong: + typeName = "C.ulong" + case C.CXType_LongLong: + typeName = "C.longlong" + case C.CXType_ULongLong: + typeName = "C.ulonglong" + case C.CXType_Bool: + typeName = "bool" + case C.CXType_Float, C.CXType_Double, C.CXType_LongDouble: + switch C.clang_Type_getSizeOf(typ) { + case 4: + typeName = "float32" + case 8: + typeName = "float64" + default: + // Don't do anything, rely on the fallback code to show a somewhat + // sensible error message like "undeclared name: C.long double". + } + case C.CXType_Complex: + switch C.clang_Type_getSizeOf(typ) { + case 8: + typeName = "complex64" + case 16: + typeName = "complex128" + } + case C.CXType_Pointer: + pointeeType := C.clang_getPointeeType(typ) + if pointeeType.kind == C.CXType_Void { + // void* type is translated to Go as unsafe.Pointer + return &ast.SelectorExpr{ + X: &ast.Ident{ + NamePos: pos, + Name: "unsafe", + }, + Sel: &ast.Ident{ + NamePos: pos, + Name: "Pointer", + }, + } + } + return &ast.StarExpr{ + Star: pos, + X: p.makeASTType(pointeeType, pos), + } + case C.CXType_ConstantArray: + return &ast.ArrayType{ + Lbrack: pos, + Len: &ast.BasicLit{ + ValuePos: pos, + Kind: token.INT, + Value: strconv.FormatInt(int64(C.clang_getArraySize(typ)), 10), + }, + Elt: p.makeASTType(C.clang_getElementType(typ), pos), + } + case C.CXType_FunctionProto: + // Be compatible with gc, which uses the *[0]byte type for function + // pointer types. + // Return type [0]byte because this is a function type, not a pointer to + // this function type. + return &ast.ArrayType{ + Lbrack: pos, + Len: &ast.BasicLit{ + ValuePos: pos, + Kind: token.INT, + Value: "0", + }, + Elt: &ast.Ident{ + NamePos: pos, + Name: "byte", + }, + } + case C.CXType_Typedef: + name := getString(C.clang_getTypedefName(typ)) + if _, ok := p.typedefs[name]; !ok { + p.typedefs[name] = nil // don't recurse + c := C.tinygo_clang_getTypeDeclaration(typ) + underlyingType := C.tinygo_clang_getTypedefDeclUnderlyingType(c) + expr := p.makeASTType(underlyingType, pos) + if strings.HasPrefix(name, "_Cgo_") { + expr := expr.(*ast.Ident) + typeSize := C.clang_Type_getSizeOf(underlyingType) + switch expr.Name { + case "C.char": + if typeSize != 1 { + // This happens for some very special purpose architectures + // (DSPs etc.) that are not currently targeted. + // https://www.embecosm.com/2017/04/18/non-8-bit-char-support-in-clang-and-llvm/ + panic("unknown char width") + } + switch underlyingType.kind { + case C.CXType_Char_S: + expr.Name = "int8" + case C.CXType_Char_U: + expr.Name = "uint8" + } + case "C.schar", "C.short", "C.int", "C.long", "C.longlong": + switch typeSize { + case 1: + expr.Name = "int8" + case 2: + expr.Name = "int16" + case 4: + expr.Name = "int32" + case 8: + expr.Name = "int64" + } + case "C.uchar", "C.ushort", "C.uint", "C.ulong", "C.ulonglong": + switch typeSize { + case 1: + expr.Name = "uint8" + case 2: + expr.Name = "uint16" + case 4: + expr.Name = "uint32" + case 8: + expr.Name = "uint64" + } + } + } + p.typedefs[name] = &typedefInfo{ + typeExpr: expr, + pos: pos, + } + } + return &ast.Ident{ + NamePos: pos, + Name: "C." + name, + } + case C.CXType_Elaborated: + underlying := C.clang_Type_getNamedType(typ) + switch underlying.kind { + case C.CXType_Record: + return p.makeASTType(underlying, pos) + default: + panic("unknown elaborated type") + } + case C.CXType_Record: + cursor := C.tinygo_clang_getTypeDeclaration(typ) + name := getString(C.tinygo_clang_getCursorSpelling(cursor)) + var cgoName string + switch C.tinygo_clang_getCursorKind(cursor) { + case C.CXCursor_StructDecl: + cgoName = "struct_" + name + case C.CXCursor_UnionDecl: + cgoName = "union_" + name + default: + panic("unknown record declaration") + } + if _, ok := p.elaboratedTypes[cgoName]; !ok { + p.elaboratedTypes[cgoName] = nil // predeclare (to avoid endless recursion) + fieldList := &ast.FieldList{ + Opening: pos, + Closing: pos, + } + ref := storedRefs.Put(struct { + fieldList *ast.FieldList + pkg *cgoPackage + }{fieldList, p}) + defer storedRefs.Remove(ref) + C.tinygo_clang_visitChildren(cursor, C.CXCursorVisitor(C.tinygo_clang_struct_visitor), C.CXClientData(ref)) + switch C.tinygo_clang_getCursorKind(cursor) { + case C.CXCursor_StructDecl: + p.elaboratedTypes[cgoName] = &elaboratedTypeInfo{ + typeExpr: &ast.StructType{ + Struct: pos, + Fields: fieldList, + }, + pos: pos, + } + case C.CXCursor_UnionDecl: + if len(fieldList.List) > 1 { + // Insert a special field at the front (of zero width) as a + // marker that this is struct is actually a union. This is done + // by giving the field a name that cannot be expressed directly + // in Go. + // Other parts of the compiler look at the first element in a + // struct (of size > 2) to know whether this is a union. + // Note that we don't have to insert it for single-element + // unions as they're basically equivalent to a struct. + unionMarker := &ast.Field{ + Type: &ast.StructType{ + Struct: pos, + }, + } + unionMarker.Names = []*ast.Ident{ + &ast.Ident{ + NamePos: pos, + Name: "C union", + Obj: &ast.Object{ + Kind: ast.Var, + Name: "C union", + Decl: unionMarker, + }, + }, + } + fieldList.List = append([]*ast.Field{unionMarker}, fieldList.List...) + } + p.elaboratedTypes[cgoName] = &elaboratedTypeInfo{ + typeExpr: &ast.StructType{ + Struct: pos, + Fields: fieldList, + }, + pos: pos, + } + default: + panic("unreachable") + } + } + return &ast.Ident{ + NamePos: pos, + Name: "C." + cgoName, + } + } + if typeName == "" { + // Fallback, probably incorrect but at least the error points to an odd + // type name. + typeName = "C." + getString(C.clang_getTypeSpelling(typ)) + } + return &ast.Ident{ + NamePos: pos, + Name: typeName, + } +} + +//export tinygo_clang_struct_visitor +func tinygo_clang_struct_visitor(c, parent C.GoCXCursor, client_data C.CXClientData) C.int { + passed := storedRefs.Get(unsafe.Pointer(client_data)).(struct { + fieldList *ast.FieldList + pkg *cgoPackage + }) + fieldList := passed.fieldList + p := passed.pkg + if C.tinygo_clang_getCursorKind(c) != C.CXCursor_FieldDecl { + panic("expected field inside cursor") + } + name := getString(C.tinygo_clang_getCursorSpelling(c)) + typ := C.tinygo_clang_getCursorType(c) + field := &ast.Field{ + Type: p.makeASTType(typ, p.getCursorPosition(c)), + } + field.Names = []*ast.Ident{ + &ast.Ident{ + NamePos: p.getCursorPosition(c), + Name: name, + Obj: &ast.Object{ + Kind: ast.Var, + Name: name, + Decl: field, + }, + }, + } + fieldList.List = append(fieldList.List, field) + return C.CXChildVisit_Continue +} |