diff options
author | Ayke van Laethem <[email protected]> | 2024-07-12 19:15:32 +0200 |
---|---|---|
committer | Ayke van Laethem <[email protected]> | 2024-07-12 20:09:17 +0200 |
commit | bedc2375958aa124c464405ec0b7557d06513f3b (patch) | |
tree | 899e86c94213802e32a682464b9c7f439a2ddf72 | |
parent | ac396708da17271ec48bffefb57d1185dd8cadb1 (diff) | |
download | tinygo-cgo-macro-via-cmdline.tar.gz tinygo-cgo-macro-via-cmdline.zip |
cgo: support preprocessor macros passed on the command linecgo-macro-via-cmdline
Go code might sometimes want to use preprocessor macros that were passed
on the command line. This wasn't working before and resulted in the
following error:
internal error: could not find file where macro is defined
This is now supported, though location information isn't available
(which makes sense: the command line is not a file).
I had to use the `clang_tokenize` API for this and reconstruct the
original source location. Apparently this is the only way to do it:
https://stackoverflow.com/a/19074846/559350
In the future we could consider replacing our own tokenization with the
tokenizer that's built into Clang directly. This should reduce the
possibility of bugs a bit.
-rw-r--r-- | cgo/cgo_test.go | 9 | ||||
-rw-r--r-- | cgo/const.go | 8 | ||||
-rw-r--r-- | cgo/libclang.go | 68 | ||||
-rw-r--r-- | cgo/testdata/errors.go | 13 | ||||
-rw-r--r-- | cgo/testdata/errors.out.go | 11 |
5 files changed, 72 insertions, 37 deletions
diff --git a/cgo/cgo_test.go b/cgo/cgo_test.go index 60af3e6f2..dc79b21d5 100644 --- a/cgo/cgo_test.go +++ b/cgo/cgo_test.go @@ -7,6 +7,7 @@ import ( "go/ast" "go/format" "go/parser" + "go/scanner" "go/token" "go/types" "os" @@ -219,7 +220,13 @@ func (i simpleImporter) Import(path string) (*types.Package, error) { // formatDiagnostic formats the error message to be an indented comment. It // also fixes Windows path name issues (backward slashes). func formatDiagnostic(err error) string { - msg := err.Error() + var msg string + switch err := err.(type) { + case scanner.Error: + msg = err.Pos.String() + ": " + err.Msg + default: + msg = err.Error() + } if runtime.GOOS == "windows" { // Fix Windows path slashes. msg = strings.ReplaceAll(msg, "testdata\\", "testdata/") diff --git a/cgo/const.go b/cgo/const.go index 2d0e29e10..f4707c80a 100644 --- a/cgo/const.go +++ b/cgo/const.go @@ -195,7 +195,9 @@ func (t *tokenizer) Next() { t.curValue = t.peekValue // Parse the next peek token. - t.peekPos += token.Pos(len(t.curValue)) + if t.peekPos != token.NoPos { + t.peekPos += token.Pos(len(t.curValue)) + } for { if len(t.buf) == 0 { t.peekToken = token.EOF @@ -207,7 +209,9 @@ func (t *tokenizer) Next() { // Skip whitespace. // Based on this source, not sure whether it represents C whitespace: // https://en.cppreference.com/w/cpp/string/byte/isspace - t.peekPos++ + if t.peekPos != token.NoPos { + t.peekPos++ + } t.buf = t.buf[1:] case len(t.buf) >= 2 && (string(t.buf[:2]) == "||" || string(t.buf[:2]) == "&&" || string(t.buf[:2]) == "<<" || string(t.buf[:2]) == ">>"): // Two-character tokens. diff --git a/cgo/libclang.go b/cgo/libclang.go index 0860c6af4..ee77611e6 100644 --- a/cgo/libclang.go +++ b/cgo/libclang.go @@ -4,6 +4,7 @@ package cgo // modification. It does not touch the AST itself. import ( + "bytes" "crypto/sha256" "crypto/sha512" "encoding/hex" @@ -369,42 +370,45 @@ func (f *cgoFile) createASTNode(name string, c clangCursor) (ast.Node, any) { gen.Specs = append(gen.Specs, valueSpec) return gen, nil case C.CXCursor_MacroDefinition: + // Extract tokens from the Clang tokenizer. + // See: https://stackoverflow.com/a/19074846/559350 sourceRange := C.tinygo_clang_getCursorExtent(c) - start := C.clang_getRangeStart(sourceRange) - end := C.clang_getRangeEnd(sourceRange) - var file, endFile C.CXFile - var startOffset, endOffset C.unsigned - C.clang_getExpansionLocation(start, &file, nil, nil, &startOffset) - if file == nil { - f.addError(pos, "internal error: could not find file where macro is defined") - return nil, nil - } - C.clang_getExpansionLocation(end, &endFile, nil, nil, &endOffset) - if file != endFile { - f.addError(pos, "internal error: expected start and end location of a macro to be in the same file") - return nil, nil - } - if startOffset > endOffset { - f.addError(pos, "internal error: start offset of macro is after end offset") - return nil, nil - } - - // read file contents and extract the relevant byte range tu := C.tinygo_clang_Cursor_getTranslationUnit(c) - var size C.size_t - sourcePtr := C.clang_getFileContents(tu, file, &size) - if endOffset >= C.uint(size) { - f.addError(pos, "internal error: end offset of macro lies after end of file") - return nil, nil - } - source := string(((*[1 << 28]byte)(unsafe.Pointer(sourcePtr)))[startOffset:endOffset:endOffset]) - if !strings.HasPrefix(source, name) { - f.addError(pos, fmt.Sprintf("internal error: expected macro value to start with %#v, got %#v", name, source)) - return nil, nil + var rawTokens *C.CXToken + var numTokens C.unsigned + C.clang_tokenize(tu, sourceRange, &rawTokens, &numTokens) + tokens := unsafe.Slice(rawTokens, numTokens) + // Convert this range of tokens back to source text. + // Ugly, but it works well enough. + sourceBuf := &bytes.Buffer{} + var startOffset int + for i, token := range tokens { + spelling := getString(C.clang_getTokenSpelling(tu, token)) + location := C.clang_getTokenLocation(tu, token) + var tokenOffset C.unsigned + C.clang_getExpansionLocation(location, nil, nil, nil, &tokenOffset) + if i == 0 { + // The first token is the macro name itself. + // Skip it (after using its location). + startOffset = int(tokenOffset) + len(name) + } else { + // Later tokens are the macro contents. + for int(tokenOffset) > (startOffset + sourceBuf.Len()) { + // Pad the source text with whitespace (that must have been + // present in the original source as well). + sourceBuf.WriteByte(' ') + } + sourceBuf.WriteString(spelling) + } } - value := source[len(name):] + C.clang_disposeTokens(tu, rawTokens, numTokens) + value := sourceBuf.String() // Try to convert this #define into a Go constant expression. - expr, scannerError := parseConst(pos+token.Pos(len(name)), f.fset, value) + tokenPos := token.NoPos + if pos != token.NoPos { + tokenPos = pos + token.Pos(len(name)) + } + expr, scannerError := parseConst(tokenPos, f.fset, value) if scannerError != nil { f.errors = append(f.errors, *scannerError) return nil, nil diff --git a/cgo/testdata/errors.go b/cgo/testdata/errors.go index 7ca5b7960..e5e809881 100644 --- a/cgo/testdata/errors.go +++ b/cgo/testdata/errors.go @@ -13,10 +13,14 @@ typedef someType noType; // undefined type #define SOME_CONST_1 5) // invalid const syntax #define SOME_CONST_2 6) // const not used (so no error) #define SOME_CONST_3 1234 // const too large for byte +#define SOME_CONST_b 3 ) // const with lots of weird whitespace (to test error locations) +# define SOME_CONST_startspace 3) */ // // // #define SOME_CONST_4 8) // after some empty lines +// #cgo CFLAGS: -DSOME_PARAM_CONST_invalid=3/+3 +// #cgo CFLAGS: -DSOME_PARAM_CONST_valid=3+4 import "C" // #warning another warning @@ -24,6 +28,7 @@ import "C" // Make sure that errors for the following lines won't change with future // additions to the CGo preamble. +// //line errors.go:100 var ( // constant too large @@ -38,4 +43,12 @@ var ( _ byte = C.SOME_CONST_3 _ = C.SOME_CONST_4 + + _ = C.SOME_CONST_b + + _ = C.SOME_CONST_startspace + + // constants passed by a command line parameter + _ = C.SOME_PARAM_CONST_invalid + _ = C.SOME_PARAM_CONST_valid ) diff --git a/cgo/testdata/errors.out.go b/cgo/testdata/errors.out.go index b1646a2e0..d0e04320a 100644 --- a/cgo/testdata/errors.out.go +++ b/cgo/testdata/errors.out.go @@ -1,9 +1,12 @@ // CGo errors: // testdata/errors.go:4:2: warning: some warning // testdata/errors.go:11:9: error: unknown type name 'someType' -// testdata/errors.go:22:5: warning: another warning +// testdata/errors.go:26:5: warning: another warning // testdata/errors.go:13:23: unexpected token ), expected end of expression -// testdata/errors.go:19:26: unexpected token ), expected end of expression +// testdata/errors.go:21:26: unexpected token ), expected end of expression +// testdata/errors.go:16:33: unexpected token ), expected end of expression +// testdata/errors.go:17:34: unexpected token ), expected end of expression +// -: unexpected token INT, expected end of expression // Type checking errors after CGo processing: // testdata/errors.go:102: cannot use 2 << 10 (untyped int constant 2048) as C.char value in variable declaration (overflows) @@ -11,6 +14,9 @@ // testdata/errors.go:108: undefined: C.SOME_CONST_1 // testdata/errors.go:110: cannot use C.SOME_CONST_3 (untyped int constant 1234) as byte value in variable declaration (overflows) // testdata/errors.go:112: undefined: C.SOME_CONST_4 +// testdata/errors.go:114: undefined: C.SOME_CONST_b +// testdata/errors.go:116: undefined: C.SOME_CONST_startspace +// testdata/errors.go:119: undefined: C.SOME_PARAM_CONST_invalid package main @@ -58,3 +64,4 @@ type C.struct_point_t struct { type C.point_t = C.struct_point_t const C.SOME_CONST_3 = 1234 +const C.SOME_PARAM_CONST_valid = 3 + 4 |