diff options
author | Bjørn Erik Pedersen <[email protected]> | 2018-10-23 14:37:09 +0200 |
---|---|---|
committer | Bjørn Erik Pedersen <[email protected]> | 2018-10-23 19:41:22 +0200 |
commit | 6636cf1bea77d20ef2a72a45fae59ac402fb133b (patch) | |
tree | 91c9435acd1a2139f8816abcd9b0d978ff2fa300 /parser | |
parent | f669ef6bec25155d015b6ab231c53caef4fa5cdc (diff) | |
download | hugo-6636cf1bea77d20ef2a72a45fae59ac402fb133b.tar.gz hugo-6636cf1bea77d20ef2a72a45fae59ac402fb133b.zip |
Resolve error handling/parser related TODOs
See #5324
Diffstat (limited to 'parser')
-rw-r--r-- | parser/pageparser/item.go | 5 | ||||
-rw-r--r-- | parser/pageparser/pagelexer.go | 56 | ||||
-rw-r--r-- | parser/pageparser/pageparser.go | 2 | ||||
-rw-r--r-- | parser/pageparser/pageparser_intro_test.go | 8 |
4 files changed, 35 insertions, 36 deletions
diff --git a/parser/pageparser/item.go b/parser/pageparser/item.go index 049db584a..0567bd8b9 100644 --- a/parser/pageparser/item.go +++ b/parser/pageparser/item.go @@ -103,10 +103,9 @@ const ( tEOF // page items - TypeHTMLDocument // document starting with < as first non-whitespace + TypeHTMLStart // document starting with < as first non-whitespace TypeHTMLComment // We ignore leading comments - TypeLeadSummaryDivider // <!--more--> - TypeSummaryDividerOrg // # more + TypeLeadSummaryDivider // <!--more-->, # more TypeFrontMatterYAML TypeFrontMatterTOML TypeFrontMatterJSON diff --git a/parser/pageparser/pagelexer.go b/parser/pageparser/pagelexer.go index e02475d42..ddf109b3d 100644 --- a/parser/pageparser/pagelexer.go +++ b/parser/pageparser/pagelexer.go @@ -48,6 +48,8 @@ type pageLexer struct { start int // item start position width int // width of last element + // The summary divider to look for. + summaryDivider []byte // Set when we have parsed any summary divider summaryDividerChecked bool @@ -69,7 +71,6 @@ func (l *pageLexer) Input() []byte { // note: the input position here is normally 0 (start), but // can be set if position of first shortcode is known -// TODO(bep) 2errors byte func newPageLexer(input []byte, inputPosition int, stateStart stateFunc) *pageLexer { lexer := &pageLexer{ input: input, @@ -117,7 +118,7 @@ var ( delimTOML = []byte("+++") delimYAML = []byte("---") delimOrg = []byte("#+") - htmlCOmmentStart = []byte("<!--") + htmlCommentStart = []byte("<!--") htmlCOmmentEnd = []byte("-->") ) @@ -195,17 +196,18 @@ func (l *pageLexer) consumeCRLF() bool { func lexMainSection(l *pageLexer) stateFunc { // Fast forward as far as possible. - var l1, l2, l3 int - if !l.summaryDividerChecked { - // TODO(bep) 2errors make the summary divider per type - l1 = l.index(summaryDivider) - l2 = l.index(summaryDividerOrg) - if l1 == -1 && l2 == -1 { + var l1, l2 int + + if !l.summaryDividerChecked && l.summaryDivider != nil { + l1 = l.index(l.summaryDivider) + if l1 == -1 { l.summaryDividerChecked = true } } - l3 = l.index(leftDelimSc) - skip := minPositiveIndex(l1, l2, l3) + + l2 = l.index(leftDelimSc) + skip := minPositiveIndex(l1, l2) + if skip > 0 { l.pos += skip } @@ -225,23 +227,14 @@ func lexMainSection(l *pageLexer) stateFunc { return lexShortcodeLeftDelim } - if !l.summaryDividerChecked { - if l.hasPrefix(summaryDivider) { + if !l.summaryDividerChecked && l.summaryDivider != nil { + if l.hasPrefix(l.summaryDivider) { if l.pos > l.start { l.emit(tText) } l.summaryDividerChecked = true - l.pos += len(summaryDivider) - //l.consumeCRLF() + l.pos += len(l.summaryDivider) l.emit(TypeLeadSummaryDivider) - } else if l.hasPrefix(summaryDividerOrg) { - if l.pos > l.start { - l.emit(tText) - } - l.summaryDividerChecked = true - l.pos += len(summaryDividerOrg) - //l.consumeCRLF() - l.emit(TypeSummaryDividerOrg) } } @@ -261,6 +254,8 @@ func (l *pageLexer) isShortCodeStart() bool { } func lexIntroSection(l *pageLexer) stateFunc { + l.summaryDivider = summaryDivider + LOOP: for { r := l.next() @@ -283,7 +278,7 @@ LOOP: // No front matter. if r == '<' { l.backup() - if l.hasPrefix(htmlCOmmentStart) { + if l.hasPrefix(htmlCommentStart) { right := l.index(htmlCOmmentEnd) if right == -1 { return l.errorf("starting HTML comment with no end") @@ -291,10 +286,14 @@ LOOP: l.pos += right + len(htmlCOmmentEnd) l.emit(TypeHTMLComment) } else { - // Not need to look further. Hugo treats this as plain HTML, - // no front matter, no shortcodes, no nothing. - l.pos = len(l.input) - l.emit(TypeHTMLDocument) + if l.pos > l.start { + l.emit(tText) + } + l.next() + // This is the start of a plain HTML document with no + // front matter. I still can contain shortcodes, so we + // have to keep looking. + l.emit(TypeHTMLStart) } } break LOOP @@ -365,10 +364,11 @@ func lexFrontMatterOrgMode(l *pageLexer) stateFunc { #+DESCRIPTION: Just another golang parser for org content! */ + l.summaryDivider = summaryDividerOrg + l.backup() if !l.hasPrefix(delimOrg) { - // TODO(bep) consider error return lexMainSection } diff --git a/parser/pageparser/pageparser.go b/parser/pageparser/pageparser.go index 6e75f195a..754397121 100644 --- a/parser/pageparser/pageparser.go +++ b/parser/pageparser/pageparser.go @@ -48,7 +48,7 @@ func Parse(r io.Reader) (Result, error) { } func parseMainSection(input []byte, from int) Result { - lexer := newPageLexer(input, from, lexMainSection) // TODO(bep) 2errors + lexer := newPageLexer(input, from, lexMainSection) lexer.run() return lexer } diff --git a/parser/pageparser/pageparser_intro_test.go b/parser/pageparser/pageparser_intro_test.go index 32de6dc44..ba4a2c84b 100644 --- a/parser/pageparser/pageparser_intro_test.go +++ b/parser/pageparser/pageparser_intro_test.go @@ -38,7 +38,7 @@ var ( tstFrontMatterJSON = nti(TypeFrontMatterJSON, tstJSON+"\r\n") tstSomeText = nti(tText, "\nSome text.\n") tstSummaryDivider = nti(TypeLeadSummaryDivider, "<!--more-->") - tstSummaryDividerOrg = nti(TypeSummaryDividerOrg, "# more") + tstHtmlStart = nti(TypeHTMLStart, "<") tstORG = ` #+TITLE: T1 @@ -54,8 +54,8 @@ var crLfReplacer = strings.NewReplacer("\r", "#", "\n", "$") var frontMatterTests = []lexerTest{ {"empty", "", []Item{tstEOF}}, {"Byte order mark", "\ufeff\nSome text.\n", []Item{nti(TypeIgnore, "\ufeff"), tstSomeText, tstEOF}}, - {"HTML Document", ` <html> `, []Item{nti(TypeHTMLDocument, " <html> "), tstEOF}}, - {"HTML Document 2", `<html><h1>Hugo Rocks</h1></html>`, []Item{nti(TypeHTMLDocument, "<html><h1>Hugo Rocks</h1></html>"), tstEOF}}, + {"HTML Document", ` <html> `, []Item{nti(tText, " "), tstHtmlStart, nti(tText, "html> "), tstEOF}}, + {"HTML Document with shortcode", `<html>{{< sc1 >}}</html>`, []Item{tstHtmlStart, nti(tText, "html>"), tstLeftNoMD, tstSC1, tstRightNoMD, nti(tText, "</html>"), tstEOF}}, {"No front matter", "\nSome text.\n", []Item{tstSomeText, tstEOF}}, {"YAML front matter", "---\nfoo: \"bar\"\n---\n\nSome text.\n", []Item{tstFrontMatterYAML, tstSomeText, tstEOF}}, {"YAML empty front matter", "---\n---\n\nSome text.\n", []Item{nti(TypeFrontMatterYAML, ""), tstSomeText, tstEOF}}, @@ -65,7 +65,7 @@ var frontMatterTests = []lexerTest{ {"TOML front matter", "+++\nfoo = \"bar\"\n+++\n\nSome text.\n", []Item{tstFrontMatterTOML, tstSomeText, tstEOF}}, {"JSON front matter", tstJSON + "\r\n\nSome text.\n", []Item{tstFrontMatterJSON, tstSomeText, tstEOF}}, {"ORG front matter", tstORG + "\nSome text.\n", []Item{tstFrontMatterORG, tstSomeText, tstEOF}}, - {"Summary divider ORG", tstORG + "\nSome text.\n# more\nSome text.\n", []Item{tstFrontMatterORG, tstSomeText, tstSummaryDividerOrg, tstSomeText, tstEOF}}, + {"Summary divider ORG", tstORG + "\nSome text.\n# more\nSome text.\n", []Item{tstFrontMatterORG, tstSomeText, nti(TypeLeadSummaryDivider, "# more"), tstSomeText, tstEOF}}, {"Summary divider", "+++\nfoo = \"bar\"\n+++\n\nSome text.\n<!--more-->\nSome text.\n", []Item{tstFrontMatterTOML, tstSomeText, tstSummaryDivider, tstSomeText, tstEOF}}, } |