aboutsummaryrefslogtreecommitdiffhomepage
path: root/cgo
diff options
context:
space:
mode:
authorAyke van Laethem <[email protected]>2021-05-20 16:11:10 +0200
committerRon Evans <[email protected]>2021-05-21 17:54:13 +0200
commit70f8eeaca0288af63ad31229acad96933799900d (patch)
treefb3504f7aaa5751cd5327e9f93e6f11bfa615c0c /cgo
parent3339d0f47e3083fba80a7335c3401eb0f7a93298 (diff)
downloadtinygo-70f8eeaca0288af63ad31229acad96933799900d.tar.gz
tinygo-70f8eeaca0288af63ad31229acad96933799900d.zip
cgo: parse binary operators
This follows the Pratt parser design.
Diffstat (limited to 'cgo')
-rw-r--r--cgo/const.go159
-rw-r--r--cgo/const_test.go13
-rw-r--r--cgo/testdata/errors.out.go2
3 files changed, 122 insertions, 52 deletions
diff --git a/cgo/const.go b/cgo/const.go
index 6e7d7ade4..245bb899f 100644
--- a/cgo/const.go
+++ b/cgo/const.go
@@ -11,7 +11,23 @@ import (
"strings"
)
-var prefixParseFns map[token.Token]func(*tokenizer) (ast.Expr, *scanner.Error)
+var (
+ prefixParseFns map[token.Token]func(*tokenizer) (ast.Expr, *scanner.Error)
+ precedences = map[token.Token]int{
+ token.ADD: precedenceAdd,
+ token.SUB: precedenceAdd,
+ token.MUL: precedenceMul,
+ token.QUO: precedenceMul,
+ token.REM: precedenceMul,
+ }
+)
+
+const (
+ precedenceLowest = iota + 1
+ precedenceAdd
+ precedenceMul
+ precedencePrefix
+)
func init() {
// This must be done in an init function to avoid an initialization order
@@ -29,108 +45,138 @@ func init() {
// parseConst parses the given string as a C constant.
func parseConst(pos token.Pos, fset *token.FileSet, value string) (ast.Expr, *scanner.Error) {
t := newTokenizer(pos, fset, value)
- expr, err := parseConstExpr(t)
+ expr, err := parseConstExpr(t, precedenceLowest)
t.Next()
- if t.token != token.EOF {
+ if t.curToken != token.EOF {
return nil, &scanner.Error{
- Pos: t.fset.Position(t.pos),
- Msg: "unexpected token " + t.token.String() + ", expected end of expression",
+ Pos: t.fset.Position(t.curPos),
+ Msg: "unexpected token " + t.curToken.String() + ", expected end of expression",
}
}
return expr, err
}
// parseConstExpr parses a stream of C tokens to a Go expression.
-func parseConstExpr(t *tokenizer) (ast.Expr, *scanner.Error) {
- if t.token == token.EOF {
+func parseConstExpr(t *tokenizer, precedence int) (ast.Expr, *scanner.Error) {
+ if t.curToken == token.EOF {
return nil, &scanner.Error{
- Pos: t.fset.Position(t.pos),
+ Pos: t.fset.Position(t.curPos),
Msg: "empty constant",
}
}
- prefix := prefixParseFns[t.token]
+ prefix := prefixParseFns[t.curToken]
if prefix == nil {
return nil, &scanner.Error{
- Pos: t.fset.Position(t.pos),
- Msg: fmt.Sprintf("unexpected token %s", t.token),
+ Pos: t.fset.Position(t.curPos),
+ Msg: fmt.Sprintf("unexpected token %s", t.curToken),
}
}
leftExpr, err := prefix(t)
+
+ for t.peekToken != token.EOF && precedence < precedences[t.peekToken] {
+ switch t.peekToken {
+ case token.ADD, token.SUB, token.MUL, token.QUO, token.REM:
+ t.Next()
+ leftExpr, err = parseBinaryExpr(t, leftExpr)
+ }
+ }
+
return leftExpr, err
}
func parseIdent(t *tokenizer) (ast.Expr, *scanner.Error) {
return &ast.Ident{
- NamePos: t.pos,
- Name: "C." + t.value,
+ NamePos: t.curPos,
+ Name: "C." + t.curValue,
}, nil
}
func parseBasicLit(t *tokenizer) (ast.Expr, *scanner.Error) {
return &ast.BasicLit{
- ValuePos: t.pos,
- Kind: t.token,
- Value: t.value,
+ ValuePos: t.curPos,
+ Kind: t.curToken,
+ Value: t.curValue,
}, nil
}
func parseParenExpr(t *tokenizer) (ast.Expr, *scanner.Error) {
- lparen := t.pos
+ lparen := t.curPos
t.Next()
- x, err := parseConstExpr(t)
+ x, err := parseConstExpr(t, precedenceLowest)
if err != nil {
return nil, err
}
t.Next()
- if t.token != token.RPAREN {
+ if t.curToken != token.RPAREN {
return nil, unexpectedToken(t, token.RPAREN)
}
expr := &ast.ParenExpr{
Lparen: lparen,
X: x,
- Rparen: t.pos,
+ Rparen: t.curPos,
}
return expr, nil
}
+func parseBinaryExpr(t *tokenizer, left ast.Expr) (ast.Expr, *scanner.Error) {
+ expression := &ast.BinaryExpr{
+ X: left,
+ Op: t.curToken,
+ OpPos: t.curPos,
+ }
+ precedence := precedences[t.curToken]
+ t.Next()
+ right, err := parseConstExpr(t, precedence)
+ expression.Y = right
+ return expression, err
+}
+
// unexpectedToken returns an error of the form "unexpected token FOO, expected
// BAR".
func unexpectedToken(t *tokenizer, expected token.Token) *scanner.Error {
return &scanner.Error{
- Pos: t.fset.Position(t.pos),
- Msg: fmt.Sprintf("unexpected token %s, expected %s", t.token, expected),
+ Pos: t.fset.Position(t.curPos),
+ Msg: fmt.Sprintf("unexpected token %s, expected %s", t.curToken, expected),
}
}
// tokenizer reads C source code and converts it to Go tokens.
type tokenizer struct {
- pos token.Pos
- fset *token.FileSet
- token token.Token
- value string
- buf string
+ curPos, peekPos token.Pos
+ fset *token.FileSet
+ curToken, peekToken token.Token
+ curValue, peekValue string
+ buf string
}
// newTokenizer initializes a new tokenizer, positioned at the first token in
// the string.
func newTokenizer(start token.Pos, fset *token.FileSet, buf string) *tokenizer {
t := &tokenizer{
- pos: start,
- fset: fset,
- buf: buf,
- token: token.ILLEGAL,
+ peekPos: start,
+ fset: fset,
+ buf: buf,
+ peekToken: token.ILLEGAL,
}
- t.Next() // Parse the first token.
+ // Parse the first two tokens (cur and peek).
+ t.Next()
+ t.Next()
return t
}
// Next consumes the next token in the stream. There is no return value, read
// the next token from the pos, token and value properties.
func (t *tokenizer) Next() {
- t.pos += token.Pos(len(t.value))
+ // The previous peek is now the current token.
+ t.curPos = t.peekPos
+ t.curToken = t.peekToken
+ t.curValue = t.peekValue
+
+ // Parse the next peek token.
+ t.peekPos += token.Pos(len(t.curValue))
for {
if len(t.buf) == 0 {
- t.token = token.EOF
+ t.peekToken = token.EOF
return
}
c := t.buf[0]
@@ -139,17 +185,28 @@ func (t *tokenizer) Next() {
// Skip whitespace.
// Based on this source, not sure whether it represents C whitespace:
// https://en.cppreference.com/w/cpp/string/byte/isspace
- t.pos++
+ t.peekPos++
t.buf = t.buf[1:]
- case c == '(' || c == ')':
+ case c == '(' || c == ')' || c == '+' || c == '-' || c == '*' || c == '/' || c == '%':
// Single-character tokens.
+ // TODO: ++ (increment) and -- (decrement) operators.
switch c {
case '(':
- t.token = token.LPAREN
+ t.peekToken = token.LPAREN
case ')':
- t.token = token.RPAREN
+ t.peekToken = token.RPAREN
+ case '+':
+ t.peekToken = token.ADD
+ case '-':
+ t.peekToken = token.SUB
+ case '*':
+ t.peekToken = token.MUL
+ case '/':
+ t.peekToken = token.QUO
+ case '%':
+ t.peekToken = token.REM
}
- t.value = t.buf[:1]
+ t.peekValue = t.buf[:1]
t.buf = t.buf[1:]
return
case c >= '0' && c <= '9':
@@ -167,17 +224,17 @@ func (t *tokenizer) Next() {
break
}
}
- t.value = t.buf[:tokenLen]
+ t.peekValue = t.buf[:tokenLen]
t.buf = t.buf[tokenLen:]
if hasDot {
// Integer constants are more complicated than this but this is
// a close approximation.
// https://en.cppreference.com/w/cpp/language/integer_literal
- t.token = token.FLOAT
- t.value = strings.TrimRight(t.value, "f")
+ t.peekToken = token.FLOAT
+ t.peekValue = strings.TrimRight(t.peekValue, "f")
} else {
- t.token = token.INT
- t.value = strings.TrimRight(t.value, "uUlL")
+ t.peekToken = token.INT
+ t.peekValue = strings.TrimRight(t.peekValue, "uUlL")
}
return
case c >= 'A' && c <= 'Z' || c >= 'a' && c <= 'z' || c == '_':
@@ -191,9 +248,9 @@ func (t *tokenizer) Next() {
break
}
}
- t.value = t.buf[:tokenLen]
+ t.peekValue = t.buf[:tokenLen]
t.buf = t.buf[tokenLen:]
- t.token = token.IDENT
+ t.peekToken = token.IDENT
return
case c == '"':
// String constant. Find the first '"' character that is not
@@ -209,8 +266,8 @@ func (t *tokenizer) Next() {
escape = c == '\\'
}
}
- t.token = token.STRING
- t.value = t.buf[:tokenLen]
+ t.peekToken = token.STRING
+ t.peekValue = t.buf[:tokenLen]
t.buf = t.buf[tokenLen:]
return
case c == '\'':
@@ -227,12 +284,12 @@ func (t *tokenizer) Next() {
escape = c == '\\'
}
}
- t.token = token.CHAR
- t.value = t.buf[:tokenLen]
+ t.peekToken = token.CHAR
+ t.peekValue = t.buf[:tokenLen]
t.buf = t.buf[tokenLen:]
return
default:
- t.token = token.ILLEGAL
+ t.peekToken = token.ILLEGAL
return
}
}
diff --git a/cgo/const_test.go b/cgo/const_test.go
index 1402c18e2..c984ec6f6 100644
--- a/cgo/const_test.go
+++ b/cgo/const_test.go
@@ -31,6 +31,19 @@ func TestParseConst(t *testing.T) {
{`0b10`, `0b10`},
{`0x1234_5678`, `0x1234_5678`},
{`5 5`, `error: 1:3: unexpected token INT, expected end of expression`}, // test for a bugfix
+ // Binary operators.
+ {`5+5`, `5 + 5`},
+ {`5-5`, `5 - 5`},
+ {`5*5`, `5 * 5`},
+ {`5/5`, `5 / 5`},
+ {`5%5`, `5 % 5`},
+ {`(5/5)`, `(5 / 5)`},
+ {`1 - 2`, `1 - 2`},
+ {`1 - 2 + 3`, `1 - 2 + 3`},
+ {`1 - 2 * 3`, `1 - 2*3`},
+ {`(1 - 2) * 3`, `(1 - 2) * 3`},
+ {`1 * 2 - 3`, `1*2 - 3`},
+ {`1 * (2 - 3)`, `1 * (2 - 3)`},
} {
fset := token.NewFileSet()
startPos := fset.AddFile("", -1, 1000).Pos(0)
diff --git a/cgo/testdata/errors.out.go b/cgo/testdata/errors.out.go
index b5d697856..065af2d54 100644
--- a/cgo/testdata/errors.out.go
+++ b/cgo/testdata/errors.out.go
@@ -1,7 +1,7 @@
// CGo errors:
// testdata/errors.go:4:2: warning: some warning
// testdata/errors.go:11:9: error: unknown type name 'someType'
-// testdata/errors.go:13:23: unexpected token )
+// testdata/errors.go:13:23: unexpected token ), expected end of expression
// Type checking errors after CGo processing:
// testdata/errors.go:102: cannot use 2 << 10 (untyped int constant 2048) as uint8 value in variable declaration (overflows)