diff options
-rw-r--r-- | cgo/cgo.go | 2 | ||||
-rw-r--r-- | cgo/const.go | 215 | ||||
-rw-r--r-- | cgo/const_test.go | 30 | ||||
-rw-r--r-- | cgo/libclang.go | 7 |
4 files changed, 203 insertions, 51 deletions
diff --git a/cgo/cgo.go b/cgo/cgo.go index d42034c1a..9867b875e 100644 --- a/cgo/cgo.go +++ b/cgo/cgo.go @@ -42,7 +42,7 @@ type cgoPackage struct { // constantInfo stores some information about a CGo constant found by libclang // and declared in the Go AST. type constantInfo struct { - expr *ast.BasicLit + expr ast.Expr pos token.Pos } diff --git a/cgo/const.go b/cgo/const.go index 831515e7c..97483a5cf 100644 --- a/cgo/const.go +++ b/cgo/const.go @@ -4,56 +4,191 @@ package cgo // parse common #define statements to Go constant expressions. import ( + "fmt" "go/ast" + "go/scanner" "go/token" "strings" ) // parseConst parses the given string as a C constant. -func parseConst(pos token.Pos, value string) *ast.BasicLit { - for len(value) != 0 && value[0] == '(' && value[len(value)-1] == ')' { - value = strings.TrimSpace(value[1 : len(value)-1]) +func parseConst(pos token.Pos, fset *token.FileSet, value string) (ast.Expr, *scanner.Error) { + t := newTokenizer(pos, fset, value) + expr, err := parseConstExpr(t) + if t.token != token.EOF { + return nil, &scanner.Error{ + Pos: t.fset.Position(t.pos), + Msg: "unexpected token " + t.token.String(), + } } - if len(value) == 0 { - // Pretend it doesn't exist at all. - return nil + return expr, err +} + +// parseConstExpr parses a stream of C tokens to a Go expression. +func parseConstExpr(t *tokenizer) (ast.Expr, *scanner.Error) { + switch t.token { + case token.LPAREN: + lparen := t.pos + t.Next() + x, err := parseConstExpr(t) + if err != nil { + return nil, err + } + if t.token != token.RPAREN { + return nil, unexpectedToken(t, token.RPAREN) + } + expr := &ast.ParenExpr{ + Lparen: lparen, + X: x, + Rparen: t.pos, + } + t.Next() + return expr, nil + case token.INT, token.FLOAT, token.STRING, token.CHAR: + expr := &ast.BasicLit{ + ValuePos: t.pos, + Kind: t.token, + Value: t.value, + } + t.Next() + return expr, nil + case token.EOF: + return nil, &scanner.Error{ + Pos: t.fset.Position(t.pos), + Msg: "empty constant", + } + default: + return nil, &scanner.Error{ + Pos: t.fset.Position(t.pos), + Msg: fmt.Sprintf("unexpected token %s", t.token), + } } - // For information about integer literals: - // https://en.cppreference.com/w/cpp/language/integer_literal - if value[0] == '"' { - // string constant - return &ast.BasicLit{ValuePos: pos, Kind: token.STRING, Value: value} +} + +// unexpectedToken returns an error of the form "unexpected token FOO, expected +// BAR". +func unexpectedToken(t *tokenizer, expected token.Token) *scanner.Error { + return &scanner.Error{ + Pos: t.fset.Position(t.pos), + Msg: fmt.Sprintf("unexpected token %s, expected %s", t.token, expected), } - if value[0] == '\'' { - // char constant - return &ast.BasicLit{ValuePos: pos, Kind: token.CHAR, Value: value} +} + +// tokenizer reads C source code and converts it to Go tokens. +type tokenizer struct { + pos token.Pos + fset *token.FileSet + token token.Token + value string + buf string +} + +// newTokenizer initializes a new tokenizer, positioned at the first token in +// the string. +func newTokenizer(start token.Pos, fset *token.FileSet, buf string) *tokenizer { + t := &tokenizer{ + pos: start, + fset: fset, + buf: buf, + token: token.ILLEGAL, } - // assume it's a number (int or float) - value = strings.Replace(value, "'", "", -1) // remove ' chars - value = strings.TrimRight(value, "lu") // remove llu suffixes etc. - // find the first non-number - nonnum := byte(0) - for i := 0; i < len(value); i++ { - if value[i] < '0' || value[i] > '9' { - nonnum = value[i] - break + t.Next() // Parse the first token. + return t +} + +// Next consumes the next token in the stream. There is no return value, read +// the next token from the pos, token and value properties. +func (t *tokenizer) Next() { + t.pos += token.Pos(len(t.value)) + for { + if len(t.buf) == 0 { + t.token = token.EOF + return + } + c := t.buf[0] + switch { + case c == ' ' || c == '\f' || c == '\n' || c == '\r' || c == '\t' || c == '\v': + // Skip whitespace. + // Based on this source, not sure whether it represents C whitespace: + // https://en.cppreference.com/w/cpp/string/byte/isspace + t.pos++ + t.buf = t.buf[1:] + case c == '(' || c == ')': + // Single-character tokens. + switch c { + case '(': + t.token = token.LPAREN + case ')': + t.token = token.RPAREN + } + t.value = t.buf[:1] + t.buf = t.buf[1:] + return + case c >= '0' && c <= '9': + // Numeric constant (int, float, etc.). + // Find the last non-numeric character. + tokenLen := len(t.buf) + hasDot := false + for i, c := range t.buf { + if c == '.' { + hasDot = true + } + if (c >= '0' && c <= '9') || c == '.' || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') { + tokenLen = i + 1 + } + } + t.value = t.buf[:tokenLen] + t.buf = t.buf[tokenLen:] + if hasDot { + // Integer constants are more complicated than this but this is + // a close approximation. + // https://en.cppreference.com/w/cpp/language/integer_literal + t.token = token.FLOAT + t.value = strings.TrimRight(t.value, "f") + } else { + t.token = token.INT + t.value = strings.TrimRight(t.value, "uUlL") + } + return + case c == '"': + // String constant. Find the first '"' character that is not + // preceded by a backslash. + escape := false + tokenLen := len(t.buf) + for i, c := range t.buf { + if i != 0 && c == '"' && !escape { + tokenLen = i + 1 + break + } + if !escape { + escape = c == '\\' + } + } + t.token = token.STRING + t.value = t.buf[:tokenLen] + t.buf = t.buf[tokenLen:] + return + case c == '\'': + // Char (rune) constant. Find the first '\'' character that is not + // preceded by a backslash. + escape := false + tokenLen := len(t.buf) + for i, c := range t.buf { + if i != 0 && c == '\'' && !escape { + tokenLen = i + 1 + break + } + if !escape { + escape = c == '\\' + } + } + t.token = token.CHAR + t.value = t.buf[:tokenLen] + t.buf = t.buf[tokenLen:] + return + default: + t.token = token.ILLEGAL + return } } - // determine number type based on the first non-number - switch nonnum { - case 0: - // no non-number found, must be an integer - return &ast.BasicLit{ValuePos: pos, Kind: token.INT, Value: value} - case 'x', 'X': - // hex integer constant - // TODO: may also be a floating point number per C++17. - return &ast.BasicLit{ValuePos: pos, Kind: token.INT, Value: value} - case '.', 'e': - // float constant - value = strings.TrimRight(value, "fFlL") - return &ast.BasicLit{ValuePos: pos, Kind: token.FLOAT, Value: value} - default: - // unknown type, ignore - } - return nil } diff --git a/cgo/const_test.go b/cgo/const_test.go index 27ed8b772..235a7117b 100644 --- a/cgo/const_test.go +++ b/cgo/const_test.go @@ -4,6 +4,7 @@ import ( "bytes" "go/format" "go/token" + "strings" "testing" ) @@ -14,20 +15,33 @@ func TestParseConst(t *testing.T) { Go string }{ {`5`, `5`}, - {`(5)`, `5`}, - {`(((5)))`, `5`}, + {`(5)`, `(5)`}, + {`(((5)))`, `(5)`}, + {`)`, `error: 1:1: unexpected token )`}, + {`5)`, `error: 1:2: unexpected token )`}, + {" \t)", `error: 1:4: unexpected token )`}, {`5.8f`, `5.8`}, - {`foo`, `<invalid>`}, // identifiers unimplemented - {``, `<invalid>`}, // empty constants not allowed in Go + {`foo`, `error: 1:1: unexpected token ILLEGAL`}, // identifiers unimplemented + {``, `error: 1:1: empty constant`}, // empty constants not allowed in Go {`"foo"`, `"foo"`}, + {`"a\\n"`, `"a\\n"`}, + {`"a\n"`, `"a\n"`}, + {`"a\""`, `"a\""`}, {`'a'`, `'a'`}, - {`0b10`, `<invalid>`}, // binary number literals unimplemented + {`0b10`, `0b10`}, + {`0x1234_5678`, `0x1234_5678`}, } { fset := token.NewFileSet() - startPos := fset.AddFile("test.c", -1, 1000).Pos(0) - expr := parseConst(startPos, tc.C) + startPos := fset.AddFile("", -1, 1000).Pos(0) + expr, err := parseConst(startPos, fset, tc.C) s := "<invalid>" - if expr != nil { + if err != nil { + if !strings.HasPrefix(tc.Go, "error: ") { + t.Errorf("expected value %#v for C constant %#v but got error %#v", tc.Go, tc.C, err.Error()) + continue + } + s = "error: " + err.Error() + } else if expr != nil { // Serialize the Go constant to a string, for more readable test // cases. buf := &bytes.Buffer{} diff --git a/cgo/libclang.go b/cgo/libclang.go index efefe370b..a4817a4f1 100644 --- a/cgo/libclang.go +++ b/cgo/libclang.go @@ -245,9 +245,12 @@ func tinygo_clang_globals_visitor(c, parent C.GoCXCursor, client_data C.CXClient p.addError(pos, fmt.Sprintf("internal error: expected macro value to start with %#v, got %#v", name, source)) break } - value := strings.TrimSpace(source[len(name):]) + value := source[len(name):] // Try to convert this #define into a Go constant expression. - expr := parseConst(pos, value) + expr, err := parseConst(pos+token.Pos(len(name)), p.fset, value) + if err != nil { + p.errors = append(p.errors, err) + } if expr != nil { // Parsing was successful. p.constants[name] = constantInfo{expr, pos} |