From bedc2375958aa124c464405ec0b7557d06513f3b Mon Sep 17 00:00:00 2001 From: Ayke van Laethem Date: Fri, 12 Jul 2024 19:15:32 +0200 Subject: cgo: support preprocessor macros passed on the command line Go code might sometimes want to use preprocessor macros that were passed on the command line. This wasn't working before and resulted in the following error: internal error: could not find file where macro is defined This is now supported, though location information isn't available (which makes sense: the command line is not a file). I had to use the `clang_tokenize` API for this and reconstruct the original source location. Apparently this is the only way to do it: https://stackoverflow.com/a/19074846/559350 In the future we could consider replacing our own tokenization with the tokenizer that's built into Clang directly. This should reduce the possibility of bugs a bit. --- cgo/cgo_test.go | 9 +++++- cgo/const.go | 8 ++++-- cgo/libclang.go | 68 ++++++++++++++++++++++++---------------------- cgo/testdata/errors.go | 13 +++++++++ cgo/testdata/errors.out.go | 11 ++++++-- 5 files changed, 72 insertions(+), 37 deletions(-) diff --git a/cgo/cgo_test.go b/cgo/cgo_test.go index 60af3e6f2..dc79b21d5 100644 --- a/cgo/cgo_test.go +++ b/cgo/cgo_test.go @@ -7,6 +7,7 @@ import ( "go/ast" "go/format" "go/parser" + "go/scanner" "go/token" "go/types" "os" @@ -219,7 +220,13 @@ func (i simpleImporter) Import(path string) (*types.Package, error) { // formatDiagnostic formats the error message to be an indented comment. It // also fixes Windows path name issues (backward slashes). func formatDiagnostic(err error) string { - msg := err.Error() + var msg string + switch err := err.(type) { + case scanner.Error: + msg = err.Pos.String() + ": " + err.Msg + default: + msg = err.Error() + } if runtime.GOOS == "windows" { // Fix Windows path slashes. msg = strings.ReplaceAll(msg, "testdata\\", "testdata/") diff --git a/cgo/const.go b/cgo/const.go index 2d0e29e10..f4707c80a 100644 --- a/cgo/const.go +++ b/cgo/const.go @@ -195,7 +195,9 @@ func (t *tokenizer) Next() { t.curValue = t.peekValue // Parse the next peek token. - t.peekPos += token.Pos(len(t.curValue)) + if t.peekPos != token.NoPos { + t.peekPos += token.Pos(len(t.curValue)) + } for { if len(t.buf) == 0 { t.peekToken = token.EOF @@ -207,7 +209,9 @@ func (t *tokenizer) Next() { // Skip whitespace. // Based on this source, not sure whether it represents C whitespace: // https://en.cppreference.com/w/cpp/string/byte/isspace - t.peekPos++ + if t.peekPos != token.NoPos { + t.peekPos++ + } t.buf = t.buf[1:] case len(t.buf) >= 2 && (string(t.buf[:2]) == "||" || string(t.buf[:2]) == "&&" || string(t.buf[:2]) == "<<" || string(t.buf[:2]) == ">>"): // Two-character tokens. diff --git a/cgo/libclang.go b/cgo/libclang.go index 0860c6af4..ee77611e6 100644 --- a/cgo/libclang.go +++ b/cgo/libclang.go @@ -4,6 +4,7 @@ package cgo // modification. It does not touch the AST itself. import ( + "bytes" "crypto/sha256" "crypto/sha512" "encoding/hex" @@ -369,42 +370,45 @@ func (f *cgoFile) createASTNode(name string, c clangCursor) (ast.Node, any) { gen.Specs = append(gen.Specs, valueSpec) return gen, nil case C.CXCursor_MacroDefinition: + // Extract tokens from the Clang tokenizer. + // See: https://stackoverflow.com/a/19074846/559350 sourceRange := C.tinygo_clang_getCursorExtent(c) - start := C.clang_getRangeStart(sourceRange) - end := C.clang_getRangeEnd(sourceRange) - var file, endFile C.CXFile - var startOffset, endOffset C.unsigned - C.clang_getExpansionLocation(start, &file, nil, nil, &startOffset) - if file == nil { - f.addError(pos, "internal error: could not find file where macro is defined") - return nil, nil - } - C.clang_getExpansionLocation(end, &endFile, nil, nil, &endOffset) - if file != endFile { - f.addError(pos, "internal error: expected start and end location of a macro to be in the same file") - return nil, nil - } - if startOffset > endOffset { - f.addError(pos, "internal error: start offset of macro is after end offset") - return nil, nil - } - - // read file contents and extract the relevant byte range tu := C.tinygo_clang_Cursor_getTranslationUnit(c) - var size C.size_t - sourcePtr := C.clang_getFileContents(tu, file, &size) - if endOffset >= C.uint(size) { - f.addError(pos, "internal error: end offset of macro lies after end of file") - return nil, nil - } - source := string(((*[1 << 28]byte)(unsafe.Pointer(sourcePtr)))[startOffset:endOffset:endOffset]) - if !strings.HasPrefix(source, name) { - f.addError(pos, fmt.Sprintf("internal error: expected macro value to start with %#v, got %#v", name, source)) - return nil, nil + var rawTokens *C.CXToken + var numTokens C.unsigned + C.clang_tokenize(tu, sourceRange, &rawTokens, &numTokens) + tokens := unsafe.Slice(rawTokens, numTokens) + // Convert this range of tokens back to source text. + // Ugly, but it works well enough. + sourceBuf := &bytes.Buffer{} + var startOffset int + for i, token := range tokens { + spelling := getString(C.clang_getTokenSpelling(tu, token)) + location := C.clang_getTokenLocation(tu, token) + var tokenOffset C.unsigned + C.clang_getExpansionLocation(location, nil, nil, nil, &tokenOffset) + if i == 0 { + // The first token is the macro name itself. + // Skip it (after using its location). + startOffset = int(tokenOffset) + len(name) + } else { + // Later tokens are the macro contents. + for int(tokenOffset) > (startOffset + sourceBuf.Len()) { + // Pad the source text with whitespace (that must have been + // present in the original source as well). + sourceBuf.WriteByte(' ') + } + sourceBuf.WriteString(spelling) + } } - value := source[len(name):] + C.clang_disposeTokens(tu, rawTokens, numTokens) + value := sourceBuf.String() // Try to convert this #define into a Go constant expression. - expr, scannerError := parseConst(pos+token.Pos(len(name)), f.fset, value) + tokenPos := token.NoPos + if pos != token.NoPos { + tokenPos = pos + token.Pos(len(name)) + } + expr, scannerError := parseConst(tokenPos, f.fset, value) if scannerError != nil { f.errors = append(f.errors, *scannerError) return nil, nil diff --git a/cgo/testdata/errors.go b/cgo/testdata/errors.go index 7ca5b7960..e5e809881 100644 --- a/cgo/testdata/errors.go +++ b/cgo/testdata/errors.go @@ -13,10 +13,14 @@ typedef someType noType; // undefined type #define SOME_CONST_1 5) // invalid const syntax #define SOME_CONST_2 6) // const not used (so no error) #define SOME_CONST_3 1234 // const too large for byte +#define SOME_CONST_b 3 ) // const with lots of weird whitespace (to test error locations) +# define SOME_CONST_startspace 3) */ // // // #define SOME_CONST_4 8) // after some empty lines +// #cgo CFLAGS: -DSOME_PARAM_CONST_invalid=3/+3 +// #cgo CFLAGS: -DSOME_PARAM_CONST_valid=3+4 import "C" // #warning another warning @@ -24,6 +28,7 @@ import "C" // Make sure that errors for the following lines won't change with future // additions to the CGo preamble. +// //line errors.go:100 var ( // constant too large @@ -38,4 +43,12 @@ var ( _ byte = C.SOME_CONST_3 _ = C.SOME_CONST_4 + + _ = C.SOME_CONST_b + + _ = C.SOME_CONST_startspace + + // constants passed by a command line parameter + _ = C.SOME_PARAM_CONST_invalid + _ = C.SOME_PARAM_CONST_valid ) diff --git a/cgo/testdata/errors.out.go b/cgo/testdata/errors.out.go index b1646a2e0..d0e04320a 100644 --- a/cgo/testdata/errors.out.go +++ b/cgo/testdata/errors.out.go @@ -1,9 +1,12 @@ // CGo errors: // testdata/errors.go:4:2: warning: some warning // testdata/errors.go:11:9: error: unknown type name 'someType' -// testdata/errors.go:22:5: warning: another warning +// testdata/errors.go:26:5: warning: another warning // testdata/errors.go:13:23: unexpected token ), expected end of expression -// testdata/errors.go:19:26: unexpected token ), expected end of expression +// testdata/errors.go:21:26: unexpected token ), expected end of expression +// testdata/errors.go:16:33: unexpected token ), expected end of expression +// testdata/errors.go:17:34: unexpected token ), expected end of expression +// -: unexpected token INT, expected end of expression // Type checking errors after CGo processing: // testdata/errors.go:102: cannot use 2 << 10 (untyped int constant 2048) as C.char value in variable declaration (overflows) @@ -11,6 +14,9 @@ // testdata/errors.go:108: undefined: C.SOME_CONST_1 // testdata/errors.go:110: cannot use C.SOME_CONST_3 (untyped int constant 1234) as byte value in variable declaration (overflows) // testdata/errors.go:112: undefined: C.SOME_CONST_4 +// testdata/errors.go:114: undefined: C.SOME_CONST_b +// testdata/errors.go:116: undefined: C.SOME_CONST_startspace +// testdata/errors.go:119: undefined: C.SOME_PARAM_CONST_invalid package main @@ -58,3 +64,4 @@ type C.struct_point_t struct { type C.point_t = C.struct_point_t const C.SOME_CONST_3 = 1234 +const C.SOME_PARAM_CONST_valid = 3 + 4 -- cgit v1.2.3