aboutsummaryrefslogtreecommitdiffhomepage
path: root/cgo
diff options
context:
space:
mode:
authorAyke van Laethem <[email protected]>2019-11-04 16:30:57 +0100
committerRon Evans <[email protected]>2019-11-05 14:18:38 +0100
commitcadb75a4aa1cdf9a447842013e5616c2b2e4b894 (patch)
tree1f66cfc66ae98f77716b587cd15ef7d2f6e72f52 /cgo
parent5987233b99c7b78f28a358ddf98f74853c27635b (diff)
downloadtinygo-cadb75a4aa1cdf9a447842013e5616c2b2e4b894.tar.gz
tinygo-cadb75a4aa1cdf9a447842013e5616c2b2e4b894.zip
cgo: implement the constant parser as a real parser
Previously it was just a combination of heuristics to try to fit a constant in an *ast.BasicLit. For more complex expressions, this is not enough. This change also introduces proper syntax error with locations, if parsing a constant failed. For example, this will print a real error message with source location: #define FOO 5)
Diffstat (limited to 'cgo')
-rw-r--r--cgo/cgo.go2
-rw-r--r--cgo/const.go215
-rw-r--r--cgo/const_test.go30
-rw-r--r--cgo/libclang.go7
4 files changed, 203 insertions, 51 deletions
diff --git a/cgo/cgo.go b/cgo/cgo.go
index d42034c1a..9867b875e 100644
--- a/cgo/cgo.go
+++ b/cgo/cgo.go
@@ -42,7 +42,7 @@ type cgoPackage struct {
// constantInfo stores some information about a CGo constant found by libclang
// and declared in the Go AST.
type constantInfo struct {
- expr *ast.BasicLit
+ expr ast.Expr
pos token.Pos
}
diff --git a/cgo/const.go b/cgo/const.go
index 831515e7c..97483a5cf 100644
--- a/cgo/const.go
+++ b/cgo/const.go
@@ -4,56 +4,191 @@ package cgo
// parse common #define statements to Go constant expressions.
import (
+ "fmt"
"go/ast"
+ "go/scanner"
"go/token"
"strings"
)
// parseConst parses the given string as a C constant.
-func parseConst(pos token.Pos, value string) *ast.BasicLit {
- for len(value) != 0 && value[0] == '(' && value[len(value)-1] == ')' {
- value = strings.TrimSpace(value[1 : len(value)-1])
+func parseConst(pos token.Pos, fset *token.FileSet, value string) (ast.Expr, *scanner.Error) {
+ t := newTokenizer(pos, fset, value)
+ expr, err := parseConstExpr(t)
+ if t.token != token.EOF {
+ return nil, &scanner.Error{
+ Pos: t.fset.Position(t.pos),
+ Msg: "unexpected token " + t.token.String(),
+ }
}
- if len(value) == 0 {
- // Pretend it doesn't exist at all.
- return nil
+ return expr, err
+}
+
+// parseConstExpr parses a stream of C tokens to a Go expression.
+func parseConstExpr(t *tokenizer) (ast.Expr, *scanner.Error) {
+ switch t.token {
+ case token.LPAREN:
+ lparen := t.pos
+ t.Next()
+ x, err := parseConstExpr(t)
+ if err != nil {
+ return nil, err
+ }
+ if t.token != token.RPAREN {
+ return nil, unexpectedToken(t, token.RPAREN)
+ }
+ expr := &ast.ParenExpr{
+ Lparen: lparen,
+ X: x,
+ Rparen: t.pos,
+ }
+ t.Next()
+ return expr, nil
+ case token.INT, token.FLOAT, token.STRING, token.CHAR:
+ expr := &ast.BasicLit{
+ ValuePos: t.pos,
+ Kind: t.token,
+ Value: t.value,
+ }
+ t.Next()
+ return expr, nil
+ case token.EOF:
+ return nil, &scanner.Error{
+ Pos: t.fset.Position(t.pos),
+ Msg: "empty constant",
+ }
+ default:
+ return nil, &scanner.Error{
+ Pos: t.fset.Position(t.pos),
+ Msg: fmt.Sprintf("unexpected token %s", t.token),
+ }
}
- // For information about integer literals:
- // https://en.cppreference.com/w/cpp/language/integer_literal
- if value[0] == '"' {
- // string constant
- return &ast.BasicLit{ValuePos: pos, Kind: token.STRING, Value: value}
+}
+
+// unexpectedToken returns an error of the form "unexpected token FOO, expected
+// BAR".
+func unexpectedToken(t *tokenizer, expected token.Token) *scanner.Error {
+ return &scanner.Error{
+ Pos: t.fset.Position(t.pos),
+ Msg: fmt.Sprintf("unexpected token %s, expected %s", t.token, expected),
}
- if value[0] == '\'' {
- // char constant
- return &ast.BasicLit{ValuePos: pos, Kind: token.CHAR, Value: value}
+}
+
+// tokenizer reads C source code and converts it to Go tokens.
+type tokenizer struct {
+ pos token.Pos
+ fset *token.FileSet
+ token token.Token
+ value string
+ buf string
+}
+
+// newTokenizer initializes a new tokenizer, positioned at the first token in
+// the string.
+func newTokenizer(start token.Pos, fset *token.FileSet, buf string) *tokenizer {
+ t := &tokenizer{
+ pos: start,
+ fset: fset,
+ buf: buf,
+ token: token.ILLEGAL,
}
- // assume it's a number (int or float)
- value = strings.Replace(value, "'", "", -1) // remove ' chars
- value = strings.TrimRight(value, "lu") // remove llu suffixes etc.
- // find the first non-number
- nonnum := byte(0)
- for i := 0; i < len(value); i++ {
- if value[i] < '0' || value[i] > '9' {
- nonnum = value[i]
- break
+ t.Next() // Parse the first token.
+ return t
+}
+
+// Next consumes the next token in the stream. There is no return value, read
+// the next token from the pos, token and value properties.
+func (t *tokenizer) Next() {
+ t.pos += token.Pos(len(t.value))
+ for {
+ if len(t.buf) == 0 {
+ t.token = token.EOF
+ return
+ }
+ c := t.buf[0]
+ switch {
+ case c == ' ' || c == '\f' || c == '\n' || c == '\r' || c == '\t' || c == '\v':
+ // Skip whitespace.
+ // Based on this source, not sure whether it represents C whitespace:
+ // https://en.cppreference.com/w/cpp/string/byte/isspace
+ t.pos++
+ t.buf = t.buf[1:]
+ case c == '(' || c == ')':
+ // Single-character tokens.
+ switch c {
+ case '(':
+ t.token = token.LPAREN
+ case ')':
+ t.token = token.RPAREN
+ }
+ t.value = t.buf[:1]
+ t.buf = t.buf[1:]
+ return
+ case c >= '0' && c <= '9':
+ // Numeric constant (int, float, etc.).
+ // Find the last non-numeric character.
+ tokenLen := len(t.buf)
+ hasDot := false
+ for i, c := range t.buf {
+ if c == '.' {
+ hasDot = true
+ }
+ if (c >= '0' && c <= '9') || c == '.' || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') {
+ tokenLen = i + 1
+ }
+ }
+ t.value = t.buf[:tokenLen]
+ t.buf = t.buf[tokenLen:]
+ if hasDot {
+ // Integer constants are more complicated than this but this is
+ // a close approximation.
+ // https://en.cppreference.com/w/cpp/language/integer_literal
+ t.token = token.FLOAT
+ t.value = strings.TrimRight(t.value, "f")
+ } else {
+ t.token = token.INT
+ t.value = strings.TrimRight(t.value, "uUlL")
+ }
+ return
+ case c == '"':
+ // String constant. Find the first '"' character that is not
+ // preceded by a backslash.
+ escape := false
+ tokenLen := len(t.buf)
+ for i, c := range t.buf {
+ if i != 0 && c == '"' && !escape {
+ tokenLen = i + 1
+ break
+ }
+ if !escape {
+ escape = c == '\\'
+ }
+ }
+ t.token = token.STRING
+ t.value = t.buf[:tokenLen]
+ t.buf = t.buf[tokenLen:]
+ return
+ case c == '\'':
+ // Char (rune) constant. Find the first '\'' character that is not
+ // preceded by a backslash.
+ escape := false
+ tokenLen := len(t.buf)
+ for i, c := range t.buf {
+ if i != 0 && c == '\'' && !escape {
+ tokenLen = i + 1
+ break
+ }
+ if !escape {
+ escape = c == '\\'
+ }
+ }
+ t.token = token.CHAR
+ t.value = t.buf[:tokenLen]
+ t.buf = t.buf[tokenLen:]
+ return
+ default:
+ t.token = token.ILLEGAL
+ return
}
}
- // determine number type based on the first non-number
- switch nonnum {
- case 0:
- // no non-number found, must be an integer
- return &ast.BasicLit{ValuePos: pos, Kind: token.INT, Value: value}
- case 'x', 'X':
- // hex integer constant
- // TODO: may also be a floating point number per C++17.
- return &ast.BasicLit{ValuePos: pos, Kind: token.INT, Value: value}
- case '.', 'e':
- // float constant
- value = strings.TrimRight(value, "fFlL")
- return &ast.BasicLit{ValuePos: pos, Kind: token.FLOAT, Value: value}
- default:
- // unknown type, ignore
- }
- return nil
}
diff --git a/cgo/const_test.go b/cgo/const_test.go
index 27ed8b772..235a7117b 100644
--- a/cgo/const_test.go
+++ b/cgo/const_test.go
@@ -4,6 +4,7 @@ import (
"bytes"
"go/format"
"go/token"
+ "strings"
"testing"
)
@@ -14,20 +15,33 @@ func TestParseConst(t *testing.T) {
Go string
}{
{`5`, `5`},
- {`(5)`, `5`},
- {`(((5)))`, `5`},
+ {`(5)`, `(5)`},
+ {`(((5)))`, `(5)`},
+ {`)`, `error: 1:1: unexpected token )`},
+ {`5)`, `error: 1:2: unexpected token )`},
+ {" \t)", `error: 1:4: unexpected token )`},
{`5.8f`, `5.8`},
- {`foo`, `<invalid>`}, // identifiers unimplemented
- {``, `<invalid>`}, // empty constants not allowed in Go
+ {`foo`, `error: 1:1: unexpected token ILLEGAL`}, // identifiers unimplemented
+ {``, `error: 1:1: empty constant`}, // empty constants not allowed in Go
{`"foo"`, `"foo"`},
+ {`"a\\n"`, `"a\\n"`},
+ {`"a\n"`, `"a\n"`},
+ {`"a\""`, `"a\""`},
{`'a'`, `'a'`},
- {`0b10`, `<invalid>`}, // binary number literals unimplemented
+ {`0b10`, `0b10`},
+ {`0x1234_5678`, `0x1234_5678`},
} {
fset := token.NewFileSet()
- startPos := fset.AddFile("test.c", -1, 1000).Pos(0)
- expr := parseConst(startPos, tc.C)
+ startPos := fset.AddFile("", -1, 1000).Pos(0)
+ expr, err := parseConst(startPos, fset, tc.C)
s := "<invalid>"
- if expr != nil {
+ if err != nil {
+ if !strings.HasPrefix(tc.Go, "error: ") {
+ t.Errorf("expected value %#v for C constant %#v but got error %#v", tc.Go, tc.C, err.Error())
+ continue
+ }
+ s = "error: " + err.Error()
+ } else if expr != nil {
// Serialize the Go constant to a string, for more readable test
// cases.
buf := &bytes.Buffer{}
diff --git a/cgo/libclang.go b/cgo/libclang.go
index efefe370b..a4817a4f1 100644
--- a/cgo/libclang.go
+++ b/cgo/libclang.go
@@ -245,9 +245,12 @@ func tinygo_clang_globals_visitor(c, parent C.GoCXCursor, client_data C.CXClient
p.addError(pos, fmt.Sprintf("internal error: expected macro value to start with %#v, got %#v", name, source))
break
}
- value := strings.TrimSpace(source[len(name):])
+ value := source[len(name):]
// Try to convert this #define into a Go constant expression.
- expr := parseConst(pos, value)
+ expr, err := parseConst(pos+token.Pos(len(name)), p.fset, value)
+ if err != nil {
+ p.errors = append(p.errors, err)
+ }
if expr != nil {
// Parsing was successful.
p.constants[name] = constantInfo{expr, pos}