aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorBjørn Erik Pedersen <[email protected]>2018-12-17 20:54:06 +0100
committerBjørn Erik Pedersen <[email protected]>2018-12-20 20:08:01 +0100
commita8853f1c5ace30ae8d256ad374bdb280c95d4228 (patch)
treedb4bdd65b5ae9bfe7894a4a9bdf687a5d0063381
parent4d93aca27dfdebc9e06948ccf37a7922dac09d65 (diff)
downloadhugo-a8853f1c5ace30ae8d256ad374bdb280c95d4228.tar.gz
hugo-a8853f1c5ace30ae8d256ad374bdb280c95d4228.zip
parser/pageparser: Split the page lexer into some more files
See #5534
-rw-r--r--parser/pageparser/pagelexer.go489
-rw-r--r--parser/pageparser/pagelexer_intro.go202
-rw-r--r--parser/pageparser/pagelexer_shortcode.go322
3 files changed, 524 insertions, 489 deletions
diff --git a/parser/pageparser/pagelexer.go b/parser/pageparser/pagelexer.go
index 5802c318b..d11e88403 100644
--- a/parser/pageparser/pagelexer.go
+++ b/parser/pageparser/pagelexer.go
@@ -29,18 +29,6 @@ const eof = -1
// returns the next state in scanner.
type stateFunc func(*pageLexer) stateFunc
-type lexerShortcodeState struct {
- currLeftDelimItem ItemType
- currRightDelimItem ItemType
- isInline bool
- currShortcodeName string // is only set when a shortcode is in opened state
- closingState int // > 0 = on its way to be closed
- elementStepNum int // step number in element
- paramElements int // number of elements (name + value = 2) found first
- openShortcodes map[string]bool // set of shortcodes in open state
-
-}
-
type pageLexer struct {
input []byte
stateStart stateFunc
@@ -102,17 +90,6 @@ func (l *pageLexer) run() *pageLexer {
return l
}
-// Shortcode syntax
-var (
- leftDelimSc = []byte("{{")
- leftDelimScNoMarkup = []byte("{{<")
- rightDelimScNoMarkup = []byte(">}}")
- leftDelimScWithMarkup = []byte("{{%")
- rightDelimScWithMarkup = []byte("%}}")
- leftComment = []byte("/*") // comments in this context us used to to mark shortcodes as "not really a shortcode"
- rightComment = []byte("*/")
-)
-
// Page syntax
var (
byteOrderMark = '\ufeff'
@@ -293,11 +270,6 @@ func lexMainSection(l *pageLexer) stateFunc {
}
-func (l *pageLexer) isShortCodeStart() bool {
- return l.hasPrefix(leftDelimScWithMarkup) || l.hasPrefix(leftDelimScNoMarkup)
-
-}
-
func (l *pageLexer) posFirstNonWhiteSpace() int {
f := func(c rune) bool {
return !unicode.IsSpace(c)
@@ -305,69 +277,6 @@ func (l *pageLexer) posFirstNonWhiteSpace() int {
return bytes.IndexFunc(l.input[l.pos:], f)
}
-func lexIntroSection(l *pageLexer) stateFunc {
- l.summaryDivider = summaryDivider
-
-LOOP:
- for {
- r := l.next()
- if r == eof {
- break
- }
-
- switch {
- case r == '+':
- return l.lexFrontMatterSection(TypeFrontMatterTOML, r, "TOML", delimTOML)
- case r == '-':
- return l.lexFrontMatterSection(TypeFrontMatterYAML, r, "YAML", delimYAML)
- case r == '{':
- return lexFrontMatterJSON
- case r == '#':
- return lexFrontMatterOrgMode
- case r == byteOrderMark:
- l.emit(TypeIgnore)
- case !isSpace(r) && !isEndOfLine(r):
- if r == '<' {
- l.backup()
- if l.hasPrefix(htmlCommentStart) {
- // This may be commented out front mattter, which should
- // still be read.
- l.consumeToNextLine()
- l.isInHTMLComment = true
- l.emit(TypeIgnore)
- continue LOOP
- } else {
- if l.pos > l.start {
- l.emit(tText)
- }
- l.next()
- // This is the start of a plain HTML document with no
- // front matter. I still can contain shortcodes, so we
- // have to keep looking.
- l.emit(TypeHTMLStart)
- }
- }
- break LOOP
- }
- }
-
- // Now move on to the shortcodes.
- return lexMainSection
-}
-
-func lexEndFromtMatterHTMLComment(l *pageLexer) stateFunc {
- l.isInHTMLComment = false
- right := l.index(htmlCommentEnd)
- if right == -1 {
- return l.errorf("starting HTML comment with no end")
- }
- l.pos += right + len(htmlCommentEnd)
- l.emit(TypeIgnore)
-
- // Now move on to the shortcodes.
- return lexMainSection
-}
-
func lexDone(l *pageLexer) stateFunc {
// Done!
@@ -378,385 +287,10 @@ func lexDone(l *pageLexer) stateFunc {
return nil
}
-func lexFrontMatterJSON(l *pageLexer) stateFunc {
- // Include the left delimiter
- l.backup()
-
- var (
- inQuote bool
- level int
- )
-
- for {
-
- r := l.next()
-
- switch {
- case r == eof:
- return l.errorf("unexpected EOF parsing JSON front matter")
- case r == '{':
- if !inQuote {
- level++
- }
- case r == '}':
- if !inQuote {
- level--
- }
- case r == '"':
- inQuote = !inQuote
- case r == '\\':
- // This may be an escaped quote. Make sure it's not marked as a
- // real one.
- l.next()
- }
-
- if level == 0 {
- break
- }
- }
-
- l.consumeCRLF()
- l.emit(TypeFrontMatterJSON)
-
- return lexMainSection
-}
-
-func lexFrontMatterOrgMode(l *pageLexer) stateFunc {
- /*
- #+TITLE: Test File For chaseadamsio/goorgeous
- #+AUTHOR: Chase Adams
- #+DESCRIPTION: Just another golang parser for org content!
- */
-
- l.summaryDivider = summaryDividerOrg
-
- l.backup()
-
- if !l.hasPrefix(delimOrg) {
- return lexMainSection
- }
-
- // Read lines until we no longer see a #+ prefix
-LOOP:
- for {
-
- r := l.next()
-
- switch {
- case r == '\n':
- if !l.hasPrefix(delimOrg) {
- break LOOP
- }
- case r == eof:
- break LOOP
-
- }
- }
-
- l.emit(TypeFrontMatterORG)
-
- return lexMainSection
-
-}
-
func (l *pageLexer) printCurrentInput() {
fmt.Printf("input[%d:]: %q", l.pos, string(l.input[l.pos:]))
}
-// Handle YAML or TOML front matter.
-func (l *pageLexer) lexFrontMatterSection(tp ItemType, delimr rune, name string, delim []byte) stateFunc {
-
- for i := 0; i < 2; i++ {
- if r := l.next(); r != delimr {
- return l.errorf("invalid %s delimiter", name)
- }
- }
-
- // Let front matter start at line 1
- wasEndOfLine := l.consumeCRLF()
- // We don't care about the delimiters.
- l.ignore()
-
- var r rune
-
- for {
- if !wasEndOfLine {
- r = l.next()
- if r == eof {
- return l.errorf("EOF looking for end %s front matter delimiter", name)
- }
- }
-
- if wasEndOfLine || isEndOfLine(r) {
- if l.hasPrefix(delim) {
- l.emit(tp)
- l.pos += 3
- l.consumeCRLF()
- l.ignore()
- break
- }
- }
-
- wasEndOfLine = false
- }
-
- return lexMainSection
-}
-
-func lexShortcodeLeftDelim(l *pageLexer) stateFunc {
- l.pos += len(l.currentLeftShortcodeDelim())
- if l.hasPrefix(leftComment) {
- return lexShortcodeComment
- }
- l.emit(l.currentLeftShortcodeDelimItem())
- l.elementStepNum = 0
- l.paramElements = 0
- return lexInsideShortcode
-}
-
-func lexShortcodeComment(l *pageLexer) stateFunc {
- posRightComment := l.index(append(rightComment, l.currentRightShortcodeDelim()...))
- if posRightComment <= 1 {
- return l.errorf("comment must be closed")
- }
- // we emit all as text, except the comment markers
- l.emit(tText)
- l.pos += len(leftComment)
- l.ignore()
- l.pos += posRightComment - len(leftComment)
- l.emit(tText)
- l.pos += len(rightComment)
- l.ignore()
- l.pos += len(l.currentRightShortcodeDelim())
- l.emit(tText)
- return lexMainSection
-}
-
-func lexShortcodeRightDelim(l *pageLexer) stateFunc {
- l.closingState = 0
- l.pos += len(l.currentRightShortcodeDelim())
- l.emit(l.currentRightShortcodeDelimItem())
- return lexMainSection
-}
-
-// either:
-// 1. param
-// 2. "param" or "param\"
-// 3. param="123" or param="123\"
-// 4. param="Some \"escaped\" text"
-func lexShortcodeParam(l *pageLexer, escapedQuoteStart bool) stateFunc {
-
- first := true
- nextEq := false
-
- var r rune
-
- for {
- r = l.next()
- if first {
- if r == '"' {
- // a positional param with quotes
- if l.paramElements == 2 {
- return l.errorf("got quoted positional parameter. Cannot mix named and positional parameters")
- }
- l.paramElements = 1
- l.backup()
- return lexShortcodeQuotedParamVal(l, !escapedQuoteStart, tScParam)
- }
- first = false
- } else if r == '=' {
- // a named param
- l.backup()
- nextEq = true
- break
- }
-
- if !isAlphaNumericOrHyphen(r) {
- l.backup()
- break
- }
- }
-
- if l.paramElements == 0 {
- l.paramElements++
-
- if nextEq {
- l.paramElements++
- }
- } else {
- if nextEq && l.paramElements == 1 {
- return l.errorf("got named parameter '%s'. Cannot mix named and positional parameters", l.current())
- } else if !nextEq && l.paramElements == 2 {
- return l.errorf("got positional parameter '%s'. Cannot mix named and positional parameters", l.current())
- }
- }
-
- l.emit(tScParam)
- return lexInsideShortcode
-
-}
-
-func lexShortcodeQuotedParamVal(l *pageLexer, escapedQuotedValuesAllowed bool, typ ItemType) stateFunc {
- openQuoteFound := false
- escapedInnerQuoteFound := false
- escapedQuoteState := 0
-
-Loop:
- for {
- switch r := l.next(); {
- case r == '\\':
- if l.peek() == '"' {
- if openQuoteFound && !escapedQuotedValuesAllowed {
- l.backup()
- break Loop
- } else if openQuoteFound {
- // the coming quoute is inside
- escapedInnerQuoteFound = true
- escapedQuoteState = 1
- }
- }
- case r == eof, r == '\n':
- return l.errorf("unterminated quoted string in shortcode parameter-argument: '%s'", l.current())
- case r == '"':
- if escapedQuoteState == 0 {
- if openQuoteFound {
- l.backup()
- break Loop
-
- } else {
- openQuoteFound = true
- l.ignore()
- }
- } else {
- escapedQuoteState = 0
- }
-
- }
- }
-
- if escapedInnerQuoteFound {
- l.ignoreEscapesAndEmit(typ)
- } else {
- l.emit(typ)
- }
-
- r := l.next()
-
- if r == '\\' {
- if l.peek() == '"' {
- // ignore the escaped closing quote
- l.ignore()
- l.next()
- l.ignore()
- }
- } else if r == '"' {
- // ignore closing quote
- l.ignore()
- } else {
- // handled by next state
- l.backup()
- }
-
- return lexInsideShortcode
-}
-
-// Inline shortcodes has the form {{< myshortcode.inline >}}
-var inlineIdentifier = []byte("inline ")
-
-// scans an alphanumeric inside shortcode
-func lexIdentifierInShortcode(l *pageLexer) stateFunc {
- lookForEnd := false
-Loop:
- for {
- switch r := l.next(); {
- case isAlphaNumericOrHyphen(r):
- // Allow forward slash inside names to make it possible to create namespaces.
- case r == '/':
- case r == '.':
- l.isInline = l.hasPrefix(inlineIdentifier)
- if !l.isInline {
- return l.errorf("period in shortcode name only allowed for inline identifiers")
- }
- default:
- l.backup()
- word := string(l.input[l.start:l.pos])
- if l.closingState > 0 && !l.openShortcodes[word] {
- return l.errorf("closing tag for shortcode '%s' does not match start tag", word)
- } else if l.closingState > 0 {
- l.openShortcodes[word] = false
- lookForEnd = true
- }
-
- l.closingState = 0
- l.currShortcodeName = word
- l.openShortcodes[word] = true
- l.elementStepNum++
- if l.isInline {
- l.emit(tScNameInline)
- } else {
- l.emit(tScName)
- }
- break Loop
- }
- }
-
- if lookForEnd {
- return lexEndOfShortcode
- }
- return lexInsideShortcode
-}
-
-func lexEndOfShortcode(l *pageLexer) stateFunc {
- l.isInline = false
- if l.hasPrefix(l.currentRightShortcodeDelim()) {
- return lexShortcodeRightDelim
- }
- switch r := l.next(); {
- case isSpace(r):
- l.ignore()
- default:
- return l.errorf("unclosed shortcode")
- }
- return lexEndOfShortcode
-}
-
-// scans the elements inside shortcode tags
-func lexInsideShortcode(l *pageLexer) stateFunc {
- if l.hasPrefix(l.currentRightShortcodeDelim()) {
- return lexShortcodeRightDelim
- }
- switch r := l.next(); {
- case r == eof:
- // eol is allowed inside shortcodes; this may go to end of document before it fails
- return l.errorf("unclosed shortcode action")
- case isSpace(r), isEndOfLine(r):
- l.ignore()
- case r == '=':
- l.ignore()
- return lexShortcodeQuotedParamVal(l, l.peek() != '\\', tScParamVal)
- case r == '/':
- if l.currShortcodeName == "" {
- return l.errorf("got closing shortcode, but none is open")
- }
- l.closingState++
- l.emit(tScClose)
- case r == '\\':
- l.ignore()
- if l.peek() == '"' {
- return lexShortcodeParam(l, true)
- }
- case l.elementStepNum > 0 && (isAlphaNumericOrHyphen(r) || r == '"'): // positional params can have quotes
- l.backup()
- return lexShortcodeParam(l, false)
- case isAlphaNumeric(r):
- l.backup()
- return lexIdentifierInShortcode
- default:
- return l.errorf("unrecognized character in shortcode action: %#U. Note: Parameters with non-alphanumeric args must be quoted", r)
- }
- return lexInsideShortcode
-}
-
// state helpers
func (l *pageLexer) index(sep []byte) int {
@@ -767,29 +301,6 @@ func (l *pageLexer) hasPrefix(prefix []byte) bool {
return bytes.HasPrefix(l.input[l.pos:], prefix)
}
-func (l *pageLexer) currentLeftShortcodeDelimItem() ItemType {
- return l.currLeftDelimItem
-}
-
-func (l *pageLexer) currentRightShortcodeDelimItem() ItemType {
- return l.currRightDelimItem
-}
-
-func (l *pageLexer) currentLeftShortcodeDelim() []byte {
- if l.currLeftDelimItem == tLeftDelimScWithMarkup {
- return leftDelimScWithMarkup
- }
- return leftDelimScNoMarkup
-
-}
-
-func (l *pageLexer) currentRightShortcodeDelim() []byte {
- if l.currRightDelimItem == tRightDelimScWithMarkup {
- return rightDelimScWithMarkup
- }
- return rightDelimScNoMarkup
-}
-
// helper functions
// returns the min index >= 0
diff --git a/parser/pageparser/pagelexer_intro.go b/parser/pageparser/pagelexer_intro.go
new file mode 100644
index 000000000..56dd4224d
--- /dev/null
+++ b/parser/pageparser/pagelexer_intro.go
@@ -0,0 +1,202 @@
+// Copyright 2018 The Hugo Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package pageparser provides a parser for Hugo content files (Markdown, HTML etc.) in Hugo.
+// This implementation is highly inspired by the great talk given by Rob Pike called "Lexical Scanning in Go"
+// It's on YouTube, Google it!.
+// See slides here: http://cuddle.googlecode.com/hg/talk/lex.html
+package pageparser
+
+func lexIntroSection(l *pageLexer) stateFunc {
+ l.summaryDivider = summaryDivider
+
+LOOP:
+ for {
+ r := l.next()
+ if r == eof {
+ break
+ }
+
+ switch {
+ case r == '+':
+ return l.lexFrontMatterSection(TypeFrontMatterTOML, r, "TOML", delimTOML)
+ case r == '-':
+ return l.lexFrontMatterSection(TypeFrontMatterYAML, r, "YAML", delimYAML)
+ case r == '{':
+ return lexFrontMatterJSON
+ case r == '#':
+ return lexFrontMatterOrgMode
+ case r == byteOrderMark:
+ l.emit(TypeIgnore)
+ case !isSpace(r) && !isEndOfLine(r):
+ if r == '<' {
+ l.backup()
+ if l.hasPrefix(htmlCommentStart) {
+ // This may be commented out front mattter, which should
+ // still be read.
+ l.consumeToNextLine()
+ l.isInHTMLComment = true
+ l.emit(TypeIgnore)
+ continue LOOP
+ } else {
+ if l.pos > l.start {
+ l.emit(tText)
+ }
+ l.next()
+ // This is the start of a plain HTML document with no
+ // front matter. I still can contain shortcodes, so we
+ // have to keep looking.
+ l.emit(TypeHTMLStart)
+ }
+ }
+ break LOOP
+ }
+ }
+
+ // Now move on to the shortcodes.
+ return lexMainSection
+}
+
+func lexEndFromtMatterHTMLComment(l *pageLexer) stateFunc {
+ l.isInHTMLComment = false
+ right := l.index(htmlCommentEnd)
+ if right == -1 {
+ return l.errorf("starting HTML comment with no end")
+ }
+ l.pos += right + len(htmlCommentEnd)
+ l.emit(TypeIgnore)
+
+ // Now move on to the shortcodes.
+ return lexMainSection
+}
+
+func lexFrontMatterJSON(l *pageLexer) stateFunc {
+ // Include the left delimiter
+ l.backup()
+
+ var (
+ inQuote bool
+ level int
+ )
+
+ for {
+
+ r := l.next()
+
+ switch {
+ case r == eof:
+ return l.errorf("unexpected EOF parsing JSON front matter")
+ case r == '{':
+ if !inQuote {
+ level++
+ }
+ case r == '}':
+ if !inQuote {
+ level--
+ }
+ case r == '"':
+ inQuote = !inQuote
+ case r == '\\':
+ // This may be an escaped quote. Make sure it's not marked as a
+ // real one.
+ l.next()
+ }
+
+ if level == 0 {
+ break
+ }
+ }
+
+ l.consumeCRLF()
+ l.emit(TypeFrontMatterJSON)
+
+ return lexMainSection
+}
+
+func lexFrontMatterOrgMode(l *pageLexer) stateFunc {
+ /*
+ #+TITLE: Test File For chaseadamsio/goorgeous
+ #+AUTHOR: Chase Adams
+ #+DESCRIPTION: Just another golang parser for org content!
+ */
+
+ l.summaryDivider = summaryDividerOrg
+
+ l.backup()
+
+ if !l.hasPrefix(delimOrg) {
+ return lexMainSection
+ }
+
+ // Read lines until we no longer see a #+ prefix
+LOOP:
+ for {
+
+ r := l.next()
+
+ switch {
+ case r == '\n':
+ if !l.hasPrefix(delimOrg) {
+ break LOOP
+ }
+ case r == eof:
+ break LOOP
+
+ }
+ }
+
+ l.emit(TypeFrontMatterORG)
+
+ return lexMainSection
+
+}
+
+// Handle YAML or TOML front matter.
+func (l *pageLexer) lexFrontMatterSection(tp ItemType, delimr rune, name string, delim []byte) stateFunc {
+
+ for i := 0; i < 2; i++ {
+ if r := l.next(); r != delimr {
+ return l.errorf("invalid %s delimiter", name)
+ }
+ }
+
+ // Let front matter start at line 1
+ wasEndOfLine := l.consumeCRLF()
+ // We don't care about the delimiters.
+ l.ignore()
+
+ var r rune
+
+ for {
+ if !wasEndOfLine {
+ r = l.next()
+ if r == eof {
+ return l.errorf("EOF looking for end %s front matter delimiter", name)
+ }
+ }
+
+ if wasEndOfLine || isEndOfLine(r) {
+ if l.hasPrefix(delim) {
+ l.emit(tp)
+ l.pos += 3
+ l.consumeCRLF()
+ l.ignore()
+ break
+ }
+ }
+
+ wasEndOfLine = false
+ }
+
+ return lexMainSection
+}
diff --git a/parser/pageparser/pagelexer_shortcode.go b/parser/pageparser/pagelexer_shortcode.go
new file mode 100644
index 000000000..fe182459a
--- /dev/null
+++ b/parser/pageparser/pagelexer_shortcode.go
@@ -0,0 +1,322 @@
+// Copyright 2018 The Hugo Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package pageparser provides a parser for Hugo content files (Markdown, HTML etc.) in Hugo.
+// This implementation is highly inspired by the great talk given by Rob Pike called "Lexical Scanning in Go"
+// It's on YouTube, Google it!.
+// See slides here: http://cuddle.googlecode.com/hg/talk/lex.html
+package pageparser
+
+type lexerShortcodeState struct {
+ currLeftDelimItem ItemType
+ currRightDelimItem ItemType
+ isInline bool
+ currShortcodeName string // is only set when a shortcode is in opened state
+ closingState int // > 0 = on its way to be closed
+ elementStepNum int // step number in element
+ paramElements int // number of elements (name + value = 2) found first
+ openShortcodes map[string]bool // set of shortcodes in open state
+
+}
+
+// Shortcode syntax
+var (
+ leftDelimSc = []byte("{{")
+ leftDelimScNoMarkup = []byte("{{<")
+ rightDelimScNoMarkup = []byte(">}}")
+ leftDelimScWithMarkup = []byte("{{%")
+ rightDelimScWithMarkup = []byte("%}}")
+ leftComment = []byte("/*") // comments in this context us used to to mark shortcodes as "not really a shortcode"
+ rightComment = []byte("*/")
+)
+
+func (l *pageLexer) isShortCodeStart() bool {
+ return l.hasPrefix(leftDelimScWithMarkup) || l.hasPrefix(leftDelimScNoMarkup)
+}
+
+func lexShortcodeLeftDelim(l *pageLexer) stateFunc {
+ l.pos += len(l.currentLeftShortcodeDelim())
+ if l.hasPrefix(leftComment) {
+ return lexShortcodeComment
+ }
+ l.emit(l.currentLeftShortcodeDelimItem())
+ l.elementStepNum = 0
+ l.paramElements = 0
+ return lexInsideShortcode
+}
+
+func lexShortcodeComment(l *pageLexer) stateFunc {
+ posRightComment := l.index(append(rightComment, l.currentRightShortcodeDelim()...))
+ if posRightComment <= 1 {
+ return l.errorf("comment must be closed")
+ }
+ // we emit all as text, except the comment markers
+ l.emit(tText)
+ l.pos += len(leftComment)
+ l.ignore()
+ l.pos += posRightComment - len(leftComment)
+ l.emit(tText)
+ l.pos += len(rightComment)
+ l.ignore()
+ l.pos += len(l.currentRightShortcodeDelim())
+ l.emit(tText)
+ return lexMainSection
+}
+
+func lexShortcodeRightDelim(l *pageLexer) stateFunc {
+ l.closingState = 0
+ l.pos += len(l.currentRightShortcodeDelim())
+ l.emit(l.currentRightShortcodeDelimItem())
+ return lexMainSection
+}
+
+// either:
+// 1. param
+// 2. "param" or "param\"
+// 3. param="123" or param="123\"
+// 4. param="Some \"escaped\" text"
+func lexShortcodeParam(l *pageLexer, escapedQuoteStart bool) stateFunc {
+
+ first := true
+ nextEq := false
+
+ var r rune
+
+ for {
+ r = l.next()
+ if first {
+ if r == '"' {
+ // a positional param with quotes
+ if l.paramElements == 2 {
+ return l.errorf("got quoted positional parameter. Cannot mix named and positional parameters")
+ }
+ l.paramElements = 1
+ l.backup()
+ return lexShortcodeQuotedParamVal(l, !escapedQuoteStart, tScParam)
+ }
+ first = false
+ } else if r == '=' {
+ // a named param
+ l.backup()
+ nextEq = true
+ break
+ }
+
+ if !isAlphaNumericOrHyphen(r) {
+ l.backup()
+ break
+ }
+ }
+
+ if l.paramElements == 0 {
+ l.paramElements++
+
+ if nextEq {
+ l.paramElements++
+ }
+ } else {
+ if nextEq && l.paramElements == 1 {
+ return l.errorf("got named parameter '%s'. Cannot mix named and positional parameters", l.current())
+ } else if !nextEq && l.paramElements == 2 {
+ return l.errorf("got positional parameter '%s'. Cannot mix named and positional parameters", l.current())
+ }
+ }
+
+ l.emit(tScParam)
+ return lexInsideShortcode
+
+}
+
+func lexShortcodeQuotedParamVal(l *pageLexer, escapedQuotedValuesAllowed bool, typ ItemType) stateFunc {
+ openQuoteFound := false
+ escapedInnerQuoteFound := false
+ escapedQuoteState := 0
+
+Loop:
+ for {
+ switch r := l.next(); {
+ case r == '\\':
+ if l.peek() == '"' {
+ if openQuoteFound && !escapedQuotedValuesAllowed {
+ l.backup()
+ break Loop
+ } else if openQuoteFound {
+ // the coming quoute is inside
+ escapedInnerQuoteFound = true
+ escapedQuoteState = 1
+ }
+ }
+ case r == eof, r == '\n':
+ return l.errorf("unterminated quoted string in shortcode parameter-argument: '%s'", l.current())
+ case r == '"':
+ if escapedQuoteState == 0 {
+ if openQuoteFound {
+ l.backup()
+ break Loop
+
+ } else {
+ openQuoteFound = true
+ l.ignore()
+ }
+ } else {
+ escapedQuoteState = 0
+ }
+
+ }
+ }
+
+ if escapedInnerQuoteFound {
+ l.ignoreEscapesAndEmit(typ)
+ } else {
+ l.emit(typ)
+ }
+
+ r := l.next()
+
+ if r == '\\' {
+ if l.peek() == '"' {
+ // ignore the escaped closing quote
+ l.ignore()
+ l.next()
+ l.ignore()
+ }
+ } else if r == '"' {
+ // ignore closing quote
+ l.ignore()
+ } else {
+ // handled by next state
+ l.backup()
+ }
+
+ return lexInsideShortcode
+}
+
+// Inline shortcodes has the form {{< myshortcode.inline >}}
+var inlineIdentifier = []byte("inline ")
+
+// scans an alphanumeric inside shortcode
+func lexIdentifierInShortcode(l *pageLexer) stateFunc {
+ lookForEnd := false
+Loop:
+ for {
+ switch r := l.next(); {
+ case isAlphaNumericOrHyphen(r):
+ // Allow forward slash inside names to make it possible to create namespaces.
+ case r == '/':
+ case r == '.':
+ l.isInline = l.hasPrefix(inlineIdentifier)
+ if !l.isInline {
+ return l.errorf("period in shortcode name only allowed for inline identifiers")
+ }
+ default:
+ l.backup()
+ word := string(l.input[l.start:l.pos])
+ if l.closingState > 0 && !l.openShortcodes[word] {
+ return l.errorf("closing tag for shortcode '%s' does not match start tag", word)
+ } else if l.closingState > 0 {
+ l.openShortcodes[word] = false
+ lookForEnd = true
+ }
+
+ l.closingState = 0
+ l.currShortcodeName = word
+ l.openShortcodes[word] = true
+ l.elementStepNum++
+ if l.isInline {
+ l.emit(tScNameInline)
+ } else {
+ l.emit(tScName)
+ }
+ break Loop
+ }
+ }
+
+ if lookForEnd {
+ return lexEndOfShortcode
+ }
+ return lexInsideShortcode
+}
+
+func lexEndOfShortcode(l *pageLexer) stateFunc {
+ l.isInline = false
+ if l.hasPrefix(l.currentRightShortcodeDelim()) {
+ return lexShortcodeRightDelim
+ }
+ switch r := l.next(); {
+ case isSpace(r):
+ l.ignore()
+ default:
+ return l.errorf("unclosed shortcode")
+ }
+ return lexEndOfShortcode
+}
+
+// scans the elements inside shortcode tags
+func lexInsideShortcode(l *pageLexer) stateFunc {
+ if l.hasPrefix(l.currentRightShortcodeDelim()) {
+ return lexShortcodeRightDelim
+ }
+ switch r := l.next(); {
+ case r == eof:
+ // eol is allowed inside shortcodes; this may go to end of document before it fails
+ return l.errorf("unclosed shortcode action")
+ case isSpace(r), isEndOfLine(r):
+ l.ignore()
+ case r == '=':
+ l.ignore()
+ return lexShortcodeQuotedParamVal(l, l.peek() != '\\', tScParamVal)
+ case r == '/':
+ if l.currShortcodeName == "" {
+ return l.errorf("got closing shortcode, but none is open")
+ }
+ l.closingState++
+ l.emit(tScClose)
+ case r == '\\':
+ l.ignore()
+ if l.peek() == '"' {
+ return lexShortcodeParam(l, true)
+ }
+ case l.elementStepNum > 0 && (isAlphaNumericOrHyphen(r) || r == '"'): // positional params can have quotes
+ l.backup()
+ return lexShortcodeParam(l, false)
+ case isAlphaNumeric(r):
+ l.backup()
+ return lexIdentifierInShortcode
+ default:
+ return l.errorf("unrecognized character in shortcode action: %#U. Note: Parameters with non-alphanumeric args must be quoted", r)
+ }
+ return lexInsideShortcode
+}
+
+func (l *pageLexer) currentLeftShortcodeDelimItem() ItemType {
+ return l.currLeftDelimItem
+}
+
+func (l *pageLexer) currentRightShortcodeDelimItem() ItemType {
+ return l.currRightDelimItem
+}
+
+func (l *pageLexer) currentLeftShortcodeDelim() []byte {
+ if l.currLeftDelimItem == tLeftDelimScWithMarkup {
+ return leftDelimScWithMarkup
+ }
+ return leftDelimScNoMarkup
+
+}
+
+func (l *pageLexer) currentRightShortcodeDelim() []byte {
+ if l.currRightDelimItem == tRightDelimScWithMarkup {
+ return rightDelimScWithMarkup
+ }
+ return rightDelimScNoMarkup
+}