diff options
-rw-r--r-- | .circleci/config.yml | 4 | ||||
-rw-r--r-- | scripts/fork_go_templates/main.go | 5 | ||||
-rw-r--r-- | tpl/internal/go_templates/htmltemplate/context.go | 20 | ||||
-rw-r--r-- | tpl/internal/go_templates/htmltemplate/escape.go | 33 | ||||
-rw-r--r-- | tpl/internal/go_templates/htmltemplate/escape_test.go | 57 | ||||
-rw-r--r-- | tpl/internal/go_templates/htmltemplate/state_string.go | 26 | ||||
-rw-r--r-- | tpl/internal/go_templates/htmltemplate/transition.go | 95 |
7 files changed, 191 insertions, 49 deletions
diff --git a/.circleci/config.yml b/.circleci/config.yml index 4702a8457..3d1bee219 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -4,7 +4,7 @@ parameters: defaults: &defaults resource_class: large docker: - - image: bepsays/ci-hugoreleaser:1.22000.20100 + - image: bepsays/ci-hugoreleaser:1.22100.20100 environment: &buildenv GOMODCACHE: /root/project/gomodcache version: 2 @@ -60,7 +60,7 @@ jobs: environment: <<: [*buildenv] docker: - - image: bepsays/ci-hugoreleaser-linux-arm64:1.22000.20100 + - image: bepsays/ci-hugoreleaser-linux-arm64:1.22100.20100 steps: - *restore-cache - &attach-workspace diff --git a/scripts/fork_go_templates/main.go b/scripts/fork_go_templates/main.go index 30c61e585..4ab89a547 100644 --- a/scripts/fork_go_templates/main.go +++ b/scripts/fork_go_templates/main.go @@ -16,7 +16,7 @@ import ( ) func main() { - // The current is built with c19c4c566c HEAD, tag: go1.21.0. + // The current is built with 2c1e5b05fe39fc5e6c730dd60e82946b8e67c6ba, tag: go1.21.1. fmt.Println("Forking ...") defer fmt.Println("Done ...") @@ -162,7 +162,8 @@ func copyGoPackage(dst, src string) { func doWithGoFiles(dir string, rewrite func(name string), - transform func(name, in string) string) { + transform func(name, in string) string, +) { if rewrite == nil && transform == nil { return } diff --git a/tpl/internal/go_templates/htmltemplate/context.go b/tpl/internal/go_templates/htmltemplate/context.go index 9f592b57f..061f17ddb 100644 --- a/tpl/internal/go_templates/htmltemplate/context.go +++ b/tpl/internal/go_templates/htmltemplate/context.go @@ -129,6 +129,10 @@ const ( stateJSBlockCmt // stateJSLineCmt occurs inside a JavaScript // line comment. stateJSLineCmt + // stateJSHTMLOpenCmt occurs inside a JavaScript <!-- HTML-like comment. + stateJSHTMLOpenCmt + // stateJSHTMLCloseCmt occurs inside a JavaScript --> HTML-like comment. + stateJSHTMLCloseCmt // stateCSS occurs inside a <style> element or style attribute. stateCSS // stateCSSDqStr occurs inside a CSS double quoted string. @@ -156,7 +160,7 @@ const ( // authors & maintainers, not for end-users or machines. func isComment(s state) bool { switch s { - case stateHTMLCmt, stateJSBlockCmt, stateJSLineCmt, stateCSSBlockCmt, stateCSSLineCmt: + case stateHTMLCmt, stateJSBlockCmt, stateJSLineCmt, stateJSHTMLOpenCmt, stateJSHTMLCloseCmt, stateCSSBlockCmt, stateCSSLineCmt: return true } return false @@ -171,6 +175,20 @@ func isInTag(s state) bool { return false } +// isInScriptLiteral returns true if s is one of the literal states within a +// <script> tag, and as such occurances of "<!--", "<script", and "</script" +// need to be treated specially. +func isInScriptLiteral(s state) bool { + // Ignore the comment states (stateJSBlockCmt, stateJSLineCmt, + // stateJSHTMLOpenCmt, stateJSHTMLCloseCmt) because their content is already + // omitted from the output. + switch s { + case stateJSDqStr, stateJSSqStr, stateJSBqStr, stateJSRegexp: + return true + } + return false +} + // delim is the delimiter that will end the current HTML attribute. type delim uint8 diff --git a/tpl/internal/go_templates/htmltemplate/escape.go b/tpl/internal/go_templates/htmltemplate/escape.go index f6b2e4b34..5d47a83d3 100644 --- a/tpl/internal/go_templates/htmltemplate/escape.go +++ b/tpl/internal/go_templates/htmltemplate/escape.go @@ -11,6 +11,7 @@ import ( //"internal/godebug" "io" + "regexp" template "github.com/gohugoio/hugo/tpl/internal/go_templates/texttemplate" "github.com/gohugoio/hugo/tpl/internal/go_templates/texttemplate/parse" @@ -231,7 +232,7 @@ func (e *escaper) escapeAction(c context, n *parse.ActionNode) context { s = append(s, "_html_template_jsstrescaper") case stateJSBqStr: if SecurityAllowActionJSTmpl.Load() { - //debugAllowActionJSTmpl.IncNonDefault() + // debugAllowActionJSTmpl.IncNonDefault() s = append(s, "_html_template_jsstrescaper") } else { return context{ @@ -732,6 +733,26 @@ var delimEnds = [...]string{ delimSpaceOrTagEnd: " \t\n\f\r>", } +var ( + // Per WHATWG HTML specification, section 4.12.1.3, there are extremely + // complicated rules for how to handle the set of opening tags <!--, + // <script, and </script when they appear in JS literals (i.e. strings, + // regexs, and comments). The specification suggests a simple solution, + // rather than implementing the arcane ABNF, which involves simply escaping + // the opening bracket with \x3C. We use the below regex for this, since it + // makes doing the case-insensitive find-replace much simpler. + specialScriptTagRE = regexp.MustCompile("(?i)<(script|/script|!--)") + specialScriptTagReplacement = []byte("\\x3C$1") +) + +func containsSpecialScriptTag(s []byte) bool { + return specialScriptTagRE.Match(s) +} + +func escapeSpecialScriptTags(s []byte) []byte { + return specialScriptTagRE.ReplaceAll(s, specialScriptTagReplacement) +} + var doctypeBytes = []byte("<!DOCTYPE") // escapeText escapes a text template node. @@ -780,13 +801,21 @@ func (e *escaper) escapeText(c context, n *parse.TextNode) context { if c.state != c1.state && isComment(c1.state) && c1.delim == delimNone { // Preserve the portion between written and the comment start. cs := i1 - 2 - if c1.state == stateHTMLCmt { + if c1.state == stateHTMLCmt || c1.state == stateJSHTMLOpenCmt { // "<!--" instead of "/*" or "//" cs -= 2 + } else if c1.state == stateJSHTMLCloseCmt { + // "-->" instead of "/*" or "//" + cs -= 1 } b.Write(s[written:cs]) written = i1 } + if isInScriptLiteral(c.state) && containsSpecialScriptTag(s[i:i1]) { + b.Write(s[written:i]) + b.Write(escapeSpecialScriptTags(s[i:i1])) + written = i1 + } if i == i1 && c.state == c1.state { panic(fmt.Sprintf("infinite loop from %v to %v on %q..%q", c, c1, s[:i], s[i:])) } diff --git a/tpl/internal/go_templates/htmltemplate/escape_test.go b/tpl/internal/go_templates/htmltemplate/escape_test.go index 680ba6fa7..4ad5316fb 100644 --- a/tpl/internal/go_templates/htmltemplate/escape_test.go +++ b/tpl/internal/go_templates/htmltemplate/escape_test.go @@ -509,6 +509,31 @@ func TestEscape(t *testing.T) { "<script>var a \nd</script>", }, { + "JS HTML-like comments", + "<script>before <!-- beep\nbetween\nbefore-->boop\n</script>", + "<script>before \nbetween\nbefore\n</script>", + }, + { + "JS hashbang comment", + "<script>#! beep\n</script>", + "<script>\n</script>", + }, + { + "Special tags in <script> string literals", + `<script>var a = "asd < 123 <!-- 456 < fgh <script jkl < 789 </script"</script>`, + `<script>var a = "asd < 123 \x3C!-- 456 < fgh \x3Cscript jkl < 789 \x3C/script"</script>`, + }, + { + "Special tags in <script> string literals (mixed case)", + `<script>var a = "<!-- <ScripT </ScripT"</script>`, + `<script>var a = "\x3C!-- \x3CScripT \x3C/ScripT"</script>`, + }, + { + "Special tags in <script> regex literals (mixed case)", + `<script>var a = /<!-- <ScripT </ScripT/</script>`, + `<script>var a = /\x3C!-- \x3CScripT \x3C/ScripT/</script>`, + }, + { "CSS comments", "<style>p// paragraph\n" + `{border: 1px/* color */{{"#00f"}}}</style>`, @@ -1528,8 +1553,38 @@ func TestEscapeText(t *testing.T) { context{state: stateJS, element: elementScript}, }, { + // <script and </script tags are escaped, so </script> should not + // cause us to exit the JS state. `<script>document.write("<script>alert(1)</script>");`, - context{state: stateText}, + context{state: stateJS, element: elementScript}, + }, + { + `<script>document.write("<script>`, + context{state: stateJSDqStr, element: elementScript}, + }, + { + `<script>document.write("<script>alert(1)</script>`, + context{state: stateJSDqStr, element: elementScript}, + }, + { + `<script>document.write("<script>alert(1)<!--`, + context{state: stateJSDqStr, element: elementScript}, + }, + { + `<script>document.write("<script>alert(1)</Script>");`, + context{state: stateJS, element: elementScript}, + }, + { + `<script>document.write("<!--");`, + context{state: stateJS, element: elementScript}, + }, + { + `<script>let a = /</script`, + context{state: stateJSRegexp, element: elementScript}, + }, + { + `<script>let a = /</script/`, + context{state: stateJS, element: elementScript, jsCtx: jsCtxDivOp}, }, { `<script type="text/template">`, diff --git a/tpl/internal/go_templates/htmltemplate/state_string.go b/tpl/internal/go_templates/htmltemplate/state_string.go index 6fb1a6eeb..be7a92051 100644 --- a/tpl/internal/go_templates/htmltemplate/state_string.go +++ b/tpl/internal/go_templates/htmltemplate/state_string.go @@ -25,21 +25,23 @@ func _() { _ = x[stateJSRegexp-14] _ = x[stateJSBlockCmt-15] _ = x[stateJSLineCmt-16] - _ = x[stateCSS-17] - _ = x[stateCSSDqStr-18] - _ = x[stateCSSSqStr-19] - _ = x[stateCSSDqURL-20] - _ = x[stateCSSSqURL-21] - _ = x[stateCSSURL-22] - _ = x[stateCSSBlockCmt-23] - _ = x[stateCSSLineCmt-24] - _ = x[stateError-25] - _ = x[stateDead-26] + _ = x[stateJSHTMLOpenCmt-17] + _ = x[stateJSHTMLCloseCmt-18] + _ = x[stateCSS-19] + _ = x[stateCSSDqStr-20] + _ = x[stateCSSSqStr-21] + _ = x[stateCSSDqURL-22] + _ = x[stateCSSSqURL-23] + _ = x[stateCSSURL-24] + _ = x[stateCSSBlockCmt-25] + _ = x[stateCSSLineCmt-26] + _ = x[stateError-27] + _ = x[stateDead-28] } -const _state_name = "stateTextstateTagstateAttrNamestateAfterNamestateBeforeValuestateHTMLCmtstateRCDATAstateAttrstateURLstateSrcsetstateJSstateJSDqStrstateJSSqStrstateJSBqStrstateJSRegexpstateJSBlockCmtstateJSLineCmtstateCSSstateCSSDqStrstateCSSSqStrstateCSSDqURLstateCSSSqURLstateCSSURLstateCSSBlockCmtstateCSSLineCmtstateErrorstateDead" +const _state_name = "stateTextstateTagstateAttrNamestateAfterNamestateBeforeValuestateHTMLCmtstateRCDATAstateAttrstateURLstateSrcsetstateJSstateJSDqStrstateJSSqStrstateJSBqStrstateJSRegexpstateJSBlockCmtstateJSLineCmtstateJSHTMLOpenCmtstateJSHTMLCloseCmtstateCSSstateCSSDqStrstateCSSSqStrstateCSSDqURLstateCSSSqURLstateCSSURLstateCSSBlockCmtstateCSSLineCmtstateErrorstateDead" -var _state_index = [...]uint16{0, 9, 17, 30, 44, 60, 72, 83, 92, 100, 111, 118, 130, 142, 154, 167, 182, 196, 204, 217, 230, 243, 256, 267, 283, 298, 308, 317} +var _state_index = [...]uint16{0, 9, 17, 30, 44, 60, 72, 83, 92, 100, 111, 118, 130, 142, 154, 167, 182, 196, 214, 233, 241, 254, 267, 280, 293, 304, 320, 335, 345, 354} func (i state) String() string { if i >= state(len(_state_index)-1) { diff --git a/tpl/internal/go_templates/htmltemplate/transition.go b/tpl/internal/go_templates/htmltemplate/transition.go index 3b9fbfb68..432c365d3 100644 --- a/tpl/internal/go_templates/htmltemplate/transition.go +++ b/tpl/internal/go_templates/htmltemplate/transition.go @@ -14,32 +14,34 @@ import ( // the updated context and the number of bytes consumed from the front of the // input. var transitionFunc = [...]func(context, []byte) (context, int){ - stateText: tText, - stateTag: tTag, - stateAttrName: tAttrName, - stateAfterName: tAfterName, - stateBeforeValue: tBeforeValue, - stateHTMLCmt: tHTMLCmt, - stateRCDATA: tSpecialTagEnd, - stateAttr: tAttr, - stateURL: tURL, - stateSrcset: tURL, - stateJS: tJS, - stateJSDqStr: tJSDelimited, - stateJSSqStr: tJSDelimited, - stateJSBqStr: tJSDelimited, - stateJSRegexp: tJSDelimited, - stateJSBlockCmt: tBlockCmt, - stateJSLineCmt: tLineCmt, - stateCSS: tCSS, - stateCSSDqStr: tCSSStr, - stateCSSSqStr: tCSSStr, - stateCSSDqURL: tCSSStr, - stateCSSSqURL: tCSSStr, - stateCSSURL: tCSSStr, - stateCSSBlockCmt: tBlockCmt, - stateCSSLineCmt: tLineCmt, - stateError: tError, + stateText: tText, + stateTag: tTag, + stateAttrName: tAttrName, + stateAfterName: tAfterName, + stateBeforeValue: tBeforeValue, + stateHTMLCmt: tHTMLCmt, + stateRCDATA: tSpecialTagEnd, + stateAttr: tAttr, + stateURL: tURL, + stateSrcset: tURL, + stateJS: tJS, + stateJSDqStr: tJSDelimited, + stateJSSqStr: tJSDelimited, + stateJSBqStr: tJSDelimited, + stateJSRegexp: tJSDelimited, + stateJSBlockCmt: tBlockCmt, + stateJSLineCmt: tLineCmt, + stateJSHTMLOpenCmt: tLineCmt, + stateJSHTMLCloseCmt: tLineCmt, + stateCSS: tCSS, + stateCSSDqStr: tCSSStr, + stateCSSSqStr: tCSSStr, + stateCSSDqURL: tCSSStr, + stateCSSSqURL: tCSSStr, + stateCSSURL: tCSSStr, + stateCSSBlockCmt: tBlockCmt, + stateCSSLineCmt: tLineCmt, + stateError: tError, } var commentStart = []byte("<!--") @@ -212,6 +214,11 @@ var ( // element states. func tSpecialTagEnd(c context, s []byte) (context, int) { if c.element != elementNone { + // script end tags ("</script") within script literals are ignored, so that + // we can properly escape them. + if c.element == elementScript && (isInScriptLiteral(c.state) || isComment(c.state)) { + return c, len(s) + } if i := indexTagEnd(s, specialTagEndMarkers[c.element]); i != -1 { return context{}, i } @@ -263,7 +270,7 @@ func tURL(c context, s []byte) (context, int) { // tJS is the context transition function for the JS state. func tJS(c context, s []byte) (context, int) { - i := bytes.IndexAny(s, "\"`'/") + i := bytes.IndexAny(s, "\"`'/<-#") if i == -1 { // Entire input is non string, comment, regexp tokens. c.jsCtx = nextJSCtx(s, c.jsCtx) @@ -293,6 +300,26 @@ func tJS(c context, s []byte) (context, int) { err: errorf(ErrSlashAmbig, nil, 0, "'/' could start a division or regexp: %.32q", s[i:]), }, len(s) } + // ECMAScript supports HTML style comments for legacy reasons, see Appendix + // B.1.1 "HTML-like Comments". The handling of these comments is somewhat + // confusing. Multi-line comments are not supported, i.e. anything on lines + // between the opening and closing tokens is not considered a comment, but + // anything following the opening or closing token, on the same line, is + // ignored. As such we simply treat any line prefixed with "<!--" or "-->" + // as if it were actually prefixed with "//" and move on. + case '<': + if i+3 < len(s) && bytes.Equal(commentStart, s[i:i+4]) { + c.state, i = stateJSHTMLOpenCmt, i+3 + } + case '-': + if i+2 < len(s) && bytes.Equal(commentEnd, s[i:i+3]) { + c.state, i = stateJSHTMLCloseCmt, i+2 + } + // ECMAScript also supports "hashbang" comment lines, see Section 12.5. + case '#': + if i+1 < len(s) && s[i+1] == '!' { + c.state, i = stateJSLineCmt, i+1 + } default: panic("unreachable") } @@ -331,6 +358,16 @@ func tJSDelimited(c context, s []byte) (context, int) { inCharset = true case ']': inCharset = false + case '/': + // If "</script" appears in a regex literal, the '/' should not + // close the regex literal, and it will later be escaped to + // "\x3C/script" in escapeText. + if i > 0 && i+7 <= len(s) && bytes.Compare(bytes.ToLower(s[i-1:i+7]), []byte("</script")) == 0 { + i++ + } else if !inCharset { + c.state, c.jsCtx = stateJS, jsCtxDivOp + return c, i + 1 + } default: // end delimiter if !inCharset { @@ -372,12 +409,12 @@ func tBlockCmt(c context, s []byte) (context, int) { return c, i + 2 } -// tLineCmt is the context transition function for //comment states. +// tLineCmt is the context transition function for //comment states, and the JS HTML-like comment state. func tLineCmt(c context, s []byte) (context, int) { var lineTerminators string var endState state switch c.state { - case stateJSLineCmt: + case stateJSLineCmt, stateJSHTMLOpenCmt, stateJSHTMLCloseCmt: lineTerminators, endState = "\n\r\u2028\u2029", stateJS case stateCSSLineCmt: lineTerminators, endState = "\n\f\r", stateCSS |