diff options
author | Bjørn Erik Pedersen <[email protected]> | 2018-12-17 20:54:06 +0100 |
---|---|---|
committer | Bjørn Erik Pedersen <[email protected]> | 2018-12-20 20:08:01 +0100 |
commit | a8853f1c5ace30ae8d256ad374bdb280c95d4228 (patch) | |
tree | db4bdd65b5ae9bfe7894a4a9bdf687a5d0063381 | |
parent | 4d93aca27dfdebc9e06948ccf37a7922dac09d65 (diff) | |
download | hugo-a8853f1c5ace30ae8d256ad374bdb280c95d4228.tar.gz hugo-a8853f1c5ace30ae8d256ad374bdb280c95d4228.zip |
parser/pageparser: Split the page lexer into some more files
See #5534
-rw-r--r-- | parser/pageparser/pagelexer.go | 489 | ||||
-rw-r--r-- | parser/pageparser/pagelexer_intro.go | 202 | ||||
-rw-r--r-- | parser/pageparser/pagelexer_shortcode.go | 322 |
3 files changed, 524 insertions, 489 deletions
diff --git a/parser/pageparser/pagelexer.go b/parser/pageparser/pagelexer.go index 5802c318b..d11e88403 100644 --- a/parser/pageparser/pagelexer.go +++ b/parser/pageparser/pagelexer.go @@ -29,18 +29,6 @@ const eof = -1 // returns the next state in scanner. type stateFunc func(*pageLexer) stateFunc -type lexerShortcodeState struct { - currLeftDelimItem ItemType - currRightDelimItem ItemType - isInline bool - currShortcodeName string // is only set when a shortcode is in opened state - closingState int // > 0 = on its way to be closed - elementStepNum int // step number in element - paramElements int // number of elements (name + value = 2) found first - openShortcodes map[string]bool // set of shortcodes in open state - -} - type pageLexer struct { input []byte stateStart stateFunc @@ -102,17 +90,6 @@ func (l *pageLexer) run() *pageLexer { return l } -// Shortcode syntax -var ( - leftDelimSc = []byte("{{") - leftDelimScNoMarkup = []byte("{{<") - rightDelimScNoMarkup = []byte(">}}") - leftDelimScWithMarkup = []byte("{{%") - rightDelimScWithMarkup = []byte("%}}") - leftComment = []byte("/*") // comments in this context us used to to mark shortcodes as "not really a shortcode" - rightComment = []byte("*/") -) - // Page syntax var ( byteOrderMark = '\ufeff' @@ -293,11 +270,6 @@ func lexMainSection(l *pageLexer) stateFunc { } -func (l *pageLexer) isShortCodeStart() bool { - return l.hasPrefix(leftDelimScWithMarkup) || l.hasPrefix(leftDelimScNoMarkup) - -} - func (l *pageLexer) posFirstNonWhiteSpace() int { f := func(c rune) bool { return !unicode.IsSpace(c) @@ -305,69 +277,6 @@ func (l *pageLexer) posFirstNonWhiteSpace() int { return bytes.IndexFunc(l.input[l.pos:], f) } -func lexIntroSection(l *pageLexer) stateFunc { - l.summaryDivider = summaryDivider - -LOOP: - for { - r := l.next() - if r == eof { - break - } - - switch { - case r == '+': - return l.lexFrontMatterSection(TypeFrontMatterTOML, r, "TOML", delimTOML) - case r == '-': - return l.lexFrontMatterSection(TypeFrontMatterYAML, r, "YAML", delimYAML) - case r == '{': - return lexFrontMatterJSON - case r == '#': - return lexFrontMatterOrgMode - case r == byteOrderMark: - l.emit(TypeIgnore) - case !isSpace(r) && !isEndOfLine(r): - if r == '<' { - l.backup() - if l.hasPrefix(htmlCommentStart) { - // This may be commented out front mattter, which should - // still be read. - l.consumeToNextLine() - l.isInHTMLComment = true - l.emit(TypeIgnore) - continue LOOP - } else { - if l.pos > l.start { - l.emit(tText) - } - l.next() - // This is the start of a plain HTML document with no - // front matter. I still can contain shortcodes, so we - // have to keep looking. - l.emit(TypeHTMLStart) - } - } - break LOOP - } - } - - // Now move on to the shortcodes. - return lexMainSection -} - -func lexEndFromtMatterHTMLComment(l *pageLexer) stateFunc { - l.isInHTMLComment = false - right := l.index(htmlCommentEnd) - if right == -1 { - return l.errorf("starting HTML comment with no end") - } - l.pos += right + len(htmlCommentEnd) - l.emit(TypeIgnore) - - // Now move on to the shortcodes. - return lexMainSection -} - func lexDone(l *pageLexer) stateFunc { // Done! @@ -378,385 +287,10 @@ func lexDone(l *pageLexer) stateFunc { return nil } -func lexFrontMatterJSON(l *pageLexer) stateFunc { - // Include the left delimiter - l.backup() - - var ( - inQuote bool - level int - ) - - for { - - r := l.next() - - switch { - case r == eof: - return l.errorf("unexpected EOF parsing JSON front matter") - case r == '{': - if !inQuote { - level++ - } - case r == '}': - if !inQuote { - level-- - } - case r == '"': - inQuote = !inQuote - case r == '\\': - // This may be an escaped quote. Make sure it's not marked as a - // real one. - l.next() - } - - if level == 0 { - break - } - } - - l.consumeCRLF() - l.emit(TypeFrontMatterJSON) - - return lexMainSection -} - -func lexFrontMatterOrgMode(l *pageLexer) stateFunc { - /* - #+TITLE: Test File For chaseadamsio/goorgeous - #+AUTHOR: Chase Adams - #+DESCRIPTION: Just another golang parser for org content! - */ - - l.summaryDivider = summaryDividerOrg - - l.backup() - - if !l.hasPrefix(delimOrg) { - return lexMainSection - } - - // Read lines until we no longer see a #+ prefix -LOOP: - for { - - r := l.next() - - switch { - case r == '\n': - if !l.hasPrefix(delimOrg) { - break LOOP - } - case r == eof: - break LOOP - - } - } - - l.emit(TypeFrontMatterORG) - - return lexMainSection - -} - func (l *pageLexer) printCurrentInput() { fmt.Printf("input[%d:]: %q", l.pos, string(l.input[l.pos:])) } -// Handle YAML or TOML front matter. -func (l *pageLexer) lexFrontMatterSection(tp ItemType, delimr rune, name string, delim []byte) stateFunc { - - for i := 0; i < 2; i++ { - if r := l.next(); r != delimr { - return l.errorf("invalid %s delimiter", name) - } - } - - // Let front matter start at line 1 - wasEndOfLine := l.consumeCRLF() - // We don't care about the delimiters. - l.ignore() - - var r rune - - for { - if !wasEndOfLine { - r = l.next() - if r == eof { - return l.errorf("EOF looking for end %s front matter delimiter", name) - } - } - - if wasEndOfLine || isEndOfLine(r) { - if l.hasPrefix(delim) { - l.emit(tp) - l.pos += 3 - l.consumeCRLF() - l.ignore() - break - } - } - - wasEndOfLine = false - } - - return lexMainSection -} - -func lexShortcodeLeftDelim(l *pageLexer) stateFunc { - l.pos += len(l.currentLeftShortcodeDelim()) - if l.hasPrefix(leftComment) { - return lexShortcodeComment - } - l.emit(l.currentLeftShortcodeDelimItem()) - l.elementStepNum = 0 - l.paramElements = 0 - return lexInsideShortcode -} - -func lexShortcodeComment(l *pageLexer) stateFunc { - posRightComment := l.index(append(rightComment, l.currentRightShortcodeDelim()...)) - if posRightComment <= 1 { - return l.errorf("comment must be closed") - } - // we emit all as text, except the comment markers - l.emit(tText) - l.pos += len(leftComment) - l.ignore() - l.pos += posRightComment - len(leftComment) - l.emit(tText) - l.pos += len(rightComment) - l.ignore() - l.pos += len(l.currentRightShortcodeDelim()) - l.emit(tText) - return lexMainSection -} - -func lexShortcodeRightDelim(l *pageLexer) stateFunc { - l.closingState = 0 - l.pos += len(l.currentRightShortcodeDelim()) - l.emit(l.currentRightShortcodeDelimItem()) - return lexMainSection -} - -// either: -// 1. param -// 2. "param" or "param\" -// 3. param="123" or param="123\" -// 4. param="Some \"escaped\" text" -func lexShortcodeParam(l *pageLexer, escapedQuoteStart bool) stateFunc { - - first := true - nextEq := false - - var r rune - - for { - r = l.next() - if first { - if r == '"' { - // a positional param with quotes - if l.paramElements == 2 { - return l.errorf("got quoted positional parameter. Cannot mix named and positional parameters") - } - l.paramElements = 1 - l.backup() - return lexShortcodeQuotedParamVal(l, !escapedQuoteStart, tScParam) - } - first = false - } else if r == '=' { - // a named param - l.backup() - nextEq = true - break - } - - if !isAlphaNumericOrHyphen(r) { - l.backup() - break - } - } - - if l.paramElements == 0 { - l.paramElements++ - - if nextEq { - l.paramElements++ - } - } else { - if nextEq && l.paramElements == 1 { - return l.errorf("got named parameter '%s'. Cannot mix named and positional parameters", l.current()) - } else if !nextEq && l.paramElements == 2 { - return l.errorf("got positional parameter '%s'. Cannot mix named and positional parameters", l.current()) - } - } - - l.emit(tScParam) - return lexInsideShortcode - -} - -func lexShortcodeQuotedParamVal(l *pageLexer, escapedQuotedValuesAllowed bool, typ ItemType) stateFunc { - openQuoteFound := false - escapedInnerQuoteFound := false - escapedQuoteState := 0 - -Loop: - for { - switch r := l.next(); { - case r == '\\': - if l.peek() == '"' { - if openQuoteFound && !escapedQuotedValuesAllowed { - l.backup() - break Loop - } else if openQuoteFound { - // the coming quoute is inside - escapedInnerQuoteFound = true - escapedQuoteState = 1 - } - } - case r == eof, r == '\n': - return l.errorf("unterminated quoted string in shortcode parameter-argument: '%s'", l.current()) - case r == '"': - if escapedQuoteState == 0 { - if openQuoteFound { - l.backup() - break Loop - - } else { - openQuoteFound = true - l.ignore() - } - } else { - escapedQuoteState = 0 - } - - } - } - - if escapedInnerQuoteFound { - l.ignoreEscapesAndEmit(typ) - } else { - l.emit(typ) - } - - r := l.next() - - if r == '\\' { - if l.peek() == '"' { - // ignore the escaped closing quote - l.ignore() - l.next() - l.ignore() - } - } else if r == '"' { - // ignore closing quote - l.ignore() - } else { - // handled by next state - l.backup() - } - - return lexInsideShortcode -} - -// Inline shortcodes has the form {{< myshortcode.inline >}} -var inlineIdentifier = []byte("inline ") - -// scans an alphanumeric inside shortcode -func lexIdentifierInShortcode(l *pageLexer) stateFunc { - lookForEnd := false -Loop: - for { - switch r := l.next(); { - case isAlphaNumericOrHyphen(r): - // Allow forward slash inside names to make it possible to create namespaces. - case r == '/': - case r == '.': - l.isInline = l.hasPrefix(inlineIdentifier) - if !l.isInline { - return l.errorf("period in shortcode name only allowed for inline identifiers") - } - default: - l.backup() - word := string(l.input[l.start:l.pos]) - if l.closingState > 0 && !l.openShortcodes[word] { - return l.errorf("closing tag for shortcode '%s' does not match start tag", word) - } else if l.closingState > 0 { - l.openShortcodes[word] = false - lookForEnd = true - } - - l.closingState = 0 - l.currShortcodeName = word - l.openShortcodes[word] = true - l.elementStepNum++ - if l.isInline { - l.emit(tScNameInline) - } else { - l.emit(tScName) - } - break Loop - } - } - - if lookForEnd { - return lexEndOfShortcode - } - return lexInsideShortcode -} - -func lexEndOfShortcode(l *pageLexer) stateFunc { - l.isInline = false - if l.hasPrefix(l.currentRightShortcodeDelim()) { - return lexShortcodeRightDelim - } - switch r := l.next(); { - case isSpace(r): - l.ignore() - default: - return l.errorf("unclosed shortcode") - } - return lexEndOfShortcode -} - -// scans the elements inside shortcode tags -func lexInsideShortcode(l *pageLexer) stateFunc { - if l.hasPrefix(l.currentRightShortcodeDelim()) { - return lexShortcodeRightDelim - } - switch r := l.next(); { - case r == eof: - // eol is allowed inside shortcodes; this may go to end of document before it fails - return l.errorf("unclosed shortcode action") - case isSpace(r), isEndOfLine(r): - l.ignore() - case r == '=': - l.ignore() - return lexShortcodeQuotedParamVal(l, l.peek() != '\\', tScParamVal) - case r == '/': - if l.currShortcodeName == "" { - return l.errorf("got closing shortcode, but none is open") - } - l.closingState++ - l.emit(tScClose) - case r == '\\': - l.ignore() - if l.peek() == '"' { - return lexShortcodeParam(l, true) - } - case l.elementStepNum > 0 && (isAlphaNumericOrHyphen(r) || r == '"'): // positional params can have quotes - l.backup() - return lexShortcodeParam(l, false) - case isAlphaNumeric(r): - l.backup() - return lexIdentifierInShortcode - default: - return l.errorf("unrecognized character in shortcode action: %#U. Note: Parameters with non-alphanumeric args must be quoted", r) - } - return lexInsideShortcode -} - // state helpers func (l *pageLexer) index(sep []byte) int { @@ -767,29 +301,6 @@ func (l *pageLexer) hasPrefix(prefix []byte) bool { return bytes.HasPrefix(l.input[l.pos:], prefix) } -func (l *pageLexer) currentLeftShortcodeDelimItem() ItemType { - return l.currLeftDelimItem -} - -func (l *pageLexer) currentRightShortcodeDelimItem() ItemType { - return l.currRightDelimItem -} - -func (l *pageLexer) currentLeftShortcodeDelim() []byte { - if l.currLeftDelimItem == tLeftDelimScWithMarkup { - return leftDelimScWithMarkup - } - return leftDelimScNoMarkup - -} - -func (l *pageLexer) currentRightShortcodeDelim() []byte { - if l.currRightDelimItem == tRightDelimScWithMarkup { - return rightDelimScWithMarkup - } - return rightDelimScNoMarkup -} - // helper functions // returns the min index >= 0 diff --git a/parser/pageparser/pagelexer_intro.go b/parser/pageparser/pagelexer_intro.go new file mode 100644 index 000000000..56dd4224d --- /dev/null +++ b/parser/pageparser/pagelexer_intro.go @@ -0,0 +1,202 @@ +// Copyright 2018 The Hugo Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package pageparser provides a parser for Hugo content files (Markdown, HTML etc.) in Hugo. +// This implementation is highly inspired by the great talk given by Rob Pike called "Lexical Scanning in Go" +// It's on YouTube, Google it!. +// See slides here: http://cuddle.googlecode.com/hg/talk/lex.html +package pageparser + +func lexIntroSection(l *pageLexer) stateFunc { + l.summaryDivider = summaryDivider + +LOOP: + for { + r := l.next() + if r == eof { + break + } + + switch { + case r == '+': + return l.lexFrontMatterSection(TypeFrontMatterTOML, r, "TOML", delimTOML) + case r == '-': + return l.lexFrontMatterSection(TypeFrontMatterYAML, r, "YAML", delimYAML) + case r == '{': + return lexFrontMatterJSON + case r == '#': + return lexFrontMatterOrgMode + case r == byteOrderMark: + l.emit(TypeIgnore) + case !isSpace(r) && !isEndOfLine(r): + if r == '<' { + l.backup() + if l.hasPrefix(htmlCommentStart) { + // This may be commented out front mattter, which should + // still be read. + l.consumeToNextLine() + l.isInHTMLComment = true + l.emit(TypeIgnore) + continue LOOP + } else { + if l.pos > l.start { + l.emit(tText) + } + l.next() + // This is the start of a plain HTML document with no + // front matter. I still can contain shortcodes, so we + // have to keep looking. + l.emit(TypeHTMLStart) + } + } + break LOOP + } + } + + // Now move on to the shortcodes. + return lexMainSection +} + +func lexEndFromtMatterHTMLComment(l *pageLexer) stateFunc { + l.isInHTMLComment = false + right := l.index(htmlCommentEnd) + if right == -1 { + return l.errorf("starting HTML comment with no end") + } + l.pos += right + len(htmlCommentEnd) + l.emit(TypeIgnore) + + // Now move on to the shortcodes. + return lexMainSection +} + +func lexFrontMatterJSON(l *pageLexer) stateFunc { + // Include the left delimiter + l.backup() + + var ( + inQuote bool + level int + ) + + for { + + r := l.next() + + switch { + case r == eof: + return l.errorf("unexpected EOF parsing JSON front matter") + case r == '{': + if !inQuote { + level++ + } + case r == '}': + if !inQuote { + level-- + } + case r == '"': + inQuote = !inQuote + case r == '\\': + // This may be an escaped quote. Make sure it's not marked as a + // real one. + l.next() + } + + if level == 0 { + break + } + } + + l.consumeCRLF() + l.emit(TypeFrontMatterJSON) + + return lexMainSection +} + +func lexFrontMatterOrgMode(l *pageLexer) stateFunc { + /* + #+TITLE: Test File For chaseadamsio/goorgeous + #+AUTHOR: Chase Adams + #+DESCRIPTION: Just another golang parser for org content! + */ + + l.summaryDivider = summaryDividerOrg + + l.backup() + + if !l.hasPrefix(delimOrg) { + return lexMainSection + } + + // Read lines until we no longer see a #+ prefix +LOOP: + for { + + r := l.next() + + switch { + case r == '\n': + if !l.hasPrefix(delimOrg) { + break LOOP + } + case r == eof: + break LOOP + + } + } + + l.emit(TypeFrontMatterORG) + + return lexMainSection + +} + +// Handle YAML or TOML front matter. +func (l *pageLexer) lexFrontMatterSection(tp ItemType, delimr rune, name string, delim []byte) stateFunc { + + for i := 0; i < 2; i++ { + if r := l.next(); r != delimr { + return l.errorf("invalid %s delimiter", name) + } + } + + // Let front matter start at line 1 + wasEndOfLine := l.consumeCRLF() + // We don't care about the delimiters. + l.ignore() + + var r rune + + for { + if !wasEndOfLine { + r = l.next() + if r == eof { + return l.errorf("EOF looking for end %s front matter delimiter", name) + } + } + + if wasEndOfLine || isEndOfLine(r) { + if l.hasPrefix(delim) { + l.emit(tp) + l.pos += 3 + l.consumeCRLF() + l.ignore() + break + } + } + + wasEndOfLine = false + } + + return lexMainSection +} diff --git a/parser/pageparser/pagelexer_shortcode.go b/parser/pageparser/pagelexer_shortcode.go new file mode 100644 index 000000000..fe182459a --- /dev/null +++ b/parser/pageparser/pagelexer_shortcode.go @@ -0,0 +1,322 @@ +// Copyright 2018 The Hugo Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package pageparser provides a parser for Hugo content files (Markdown, HTML etc.) in Hugo. +// This implementation is highly inspired by the great talk given by Rob Pike called "Lexical Scanning in Go" +// It's on YouTube, Google it!. +// See slides here: http://cuddle.googlecode.com/hg/talk/lex.html +package pageparser + +type lexerShortcodeState struct { + currLeftDelimItem ItemType + currRightDelimItem ItemType + isInline bool + currShortcodeName string // is only set when a shortcode is in opened state + closingState int // > 0 = on its way to be closed + elementStepNum int // step number in element + paramElements int // number of elements (name + value = 2) found first + openShortcodes map[string]bool // set of shortcodes in open state + +} + +// Shortcode syntax +var ( + leftDelimSc = []byte("{{") + leftDelimScNoMarkup = []byte("{{<") + rightDelimScNoMarkup = []byte(">}}") + leftDelimScWithMarkup = []byte("{{%") + rightDelimScWithMarkup = []byte("%}}") + leftComment = []byte("/*") // comments in this context us used to to mark shortcodes as "not really a shortcode" + rightComment = []byte("*/") +) + +func (l *pageLexer) isShortCodeStart() bool { + return l.hasPrefix(leftDelimScWithMarkup) || l.hasPrefix(leftDelimScNoMarkup) +} + +func lexShortcodeLeftDelim(l *pageLexer) stateFunc { + l.pos += len(l.currentLeftShortcodeDelim()) + if l.hasPrefix(leftComment) { + return lexShortcodeComment + } + l.emit(l.currentLeftShortcodeDelimItem()) + l.elementStepNum = 0 + l.paramElements = 0 + return lexInsideShortcode +} + +func lexShortcodeComment(l *pageLexer) stateFunc { + posRightComment := l.index(append(rightComment, l.currentRightShortcodeDelim()...)) + if posRightComment <= 1 { + return l.errorf("comment must be closed") + } + // we emit all as text, except the comment markers + l.emit(tText) + l.pos += len(leftComment) + l.ignore() + l.pos += posRightComment - len(leftComment) + l.emit(tText) + l.pos += len(rightComment) + l.ignore() + l.pos += len(l.currentRightShortcodeDelim()) + l.emit(tText) + return lexMainSection +} + +func lexShortcodeRightDelim(l *pageLexer) stateFunc { + l.closingState = 0 + l.pos += len(l.currentRightShortcodeDelim()) + l.emit(l.currentRightShortcodeDelimItem()) + return lexMainSection +} + +// either: +// 1. param +// 2. "param" or "param\" +// 3. param="123" or param="123\" +// 4. param="Some \"escaped\" text" +func lexShortcodeParam(l *pageLexer, escapedQuoteStart bool) stateFunc { + + first := true + nextEq := false + + var r rune + + for { + r = l.next() + if first { + if r == '"' { + // a positional param with quotes + if l.paramElements == 2 { + return l.errorf("got quoted positional parameter. Cannot mix named and positional parameters") + } + l.paramElements = 1 + l.backup() + return lexShortcodeQuotedParamVal(l, !escapedQuoteStart, tScParam) + } + first = false + } else if r == '=' { + // a named param + l.backup() + nextEq = true + break + } + + if !isAlphaNumericOrHyphen(r) { + l.backup() + break + } + } + + if l.paramElements == 0 { + l.paramElements++ + + if nextEq { + l.paramElements++ + } + } else { + if nextEq && l.paramElements == 1 { + return l.errorf("got named parameter '%s'. Cannot mix named and positional parameters", l.current()) + } else if !nextEq && l.paramElements == 2 { + return l.errorf("got positional parameter '%s'. Cannot mix named and positional parameters", l.current()) + } + } + + l.emit(tScParam) + return lexInsideShortcode + +} + +func lexShortcodeQuotedParamVal(l *pageLexer, escapedQuotedValuesAllowed bool, typ ItemType) stateFunc { + openQuoteFound := false + escapedInnerQuoteFound := false + escapedQuoteState := 0 + +Loop: + for { + switch r := l.next(); { + case r == '\\': + if l.peek() == '"' { + if openQuoteFound && !escapedQuotedValuesAllowed { + l.backup() + break Loop + } else if openQuoteFound { + // the coming quoute is inside + escapedInnerQuoteFound = true + escapedQuoteState = 1 + } + } + case r == eof, r == '\n': + return l.errorf("unterminated quoted string in shortcode parameter-argument: '%s'", l.current()) + case r == '"': + if escapedQuoteState == 0 { + if openQuoteFound { + l.backup() + break Loop + + } else { + openQuoteFound = true + l.ignore() + } + } else { + escapedQuoteState = 0 + } + + } + } + + if escapedInnerQuoteFound { + l.ignoreEscapesAndEmit(typ) + } else { + l.emit(typ) + } + + r := l.next() + + if r == '\\' { + if l.peek() == '"' { + // ignore the escaped closing quote + l.ignore() + l.next() + l.ignore() + } + } else if r == '"' { + // ignore closing quote + l.ignore() + } else { + // handled by next state + l.backup() + } + + return lexInsideShortcode +} + +// Inline shortcodes has the form {{< myshortcode.inline >}} +var inlineIdentifier = []byte("inline ") + +// scans an alphanumeric inside shortcode +func lexIdentifierInShortcode(l *pageLexer) stateFunc { + lookForEnd := false +Loop: + for { + switch r := l.next(); { + case isAlphaNumericOrHyphen(r): + // Allow forward slash inside names to make it possible to create namespaces. + case r == '/': + case r == '.': + l.isInline = l.hasPrefix(inlineIdentifier) + if !l.isInline { + return l.errorf("period in shortcode name only allowed for inline identifiers") + } + default: + l.backup() + word := string(l.input[l.start:l.pos]) + if l.closingState > 0 && !l.openShortcodes[word] { + return l.errorf("closing tag for shortcode '%s' does not match start tag", word) + } else if l.closingState > 0 { + l.openShortcodes[word] = false + lookForEnd = true + } + + l.closingState = 0 + l.currShortcodeName = word + l.openShortcodes[word] = true + l.elementStepNum++ + if l.isInline { + l.emit(tScNameInline) + } else { + l.emit(tScName) + } + break Loop + } + } + + if lookForEnd { + return lexEndOfShortcode + } + return lexInsideShortcode +} + +func lexEndOfShortcode(l *pageLexer) stateFunc { + l.isInline = false + if l.hasPrefix(l.currentRightShortcodeDelim()) { + return lexShortcodeRightDelim + } + switch r := l.next(); { + case isSpace(r): + l.ignore() + default: + return l.errorf("unclosed shortcode") + } + return lexEndOfShortcode +} + +// scans the elements inside shortcode tags +func lexInsideShortcode(l *pageLexer) stateFunc { + if l.hasPrefix(l.currentRightShortcodeDelim()) { + return lexShortcodeRightDelim + } + switch r := l.next(); { + case r == eof: + // eol is allowed inside shortcodes; this may go to end of document before it fails + return l.errorf("unclosed shortcode action") + case isSpace(r), isEndOfLine(r): + l.ignore() + case r == '=': + l.ignore() + return lexShortcodeQuotedParamVal(l, l.peek() != '\\', tScParamVal) + case r == '/': + if l.currShortcodeName == "" { + return l.errorf("got closing shortcode, but none is open") + } + l.closingState++ + l.emit(tScClose) + case r == '\\': + l.ignore() + if l.peek() == '"' { + return lexShortcodeParam(l, true) + } + case l.elementStepNum > 0 && (isAlphaNumericOrHyphen(r) || r == '"'): // positional params can have quotes + l.backup() + return lexShortcodeParam(l, false) + case isAlphaNumeric(r): + l.backup() + return lexIdentifierInShortcode + default: + return l.errorf("unrecognized character in shortcode action: %#U. Note: Parameters with non-alphanumeric args must be quoted", r) + } + return lexInsideShortcode +} + +func (l *pageLexer) currentLeftShortcodeDelimItem() ItemType { + return l.currLeftDelimItem +} + +func (l *pageLexer) currentRightShortcodeDelimItem() ItemType { + return l.currRightDelimItem +} + +func (l *pageLexer) currentLeftShortcodeDelim() []byte { + if l.currLeftDelimItem == tLeftDelimScWithMarkup { + return leftDelimScWithMarkup + } + return leftDelimScNoMarkup + +} + +func (l *pageLexer) currentRightShortcodeDelim() []byte { + if l.currRightDelimItem == tRightDelimScWithMarkup { + return rightDelimScWithMarkup + } + return rightDelimScNoMarkup +} |