diff options
author | Bjørn Erik Pedersen <[email protected]> | 2021-04-06 18:19:25 +0200 |
---|---|---|
committer | Bjørn Erik Pedersen <[email protected]> | 2021-04-07 00:26:02 +0200 |
commit | 8a308944e46f8c2aa054005d5aed89f2711f9c1d (patch) | |
tree | 7f993ed28e021396c31d94aa87f49617ac6c4195 /publisher | |
parent | 7b4ade56dd50d89a91760fc5ef8e2f151874de96 (diff) | |
download | hugo-8a308944e46f8c2aa054005d5aed89f2711f9c1d.tar.gz hugo-8a308944e46f8c2aa054005d5aed89f2711f9c1d.zip |
publisher: Skip script, pre and textarea content when looking for HTML elements
Updates #7567
Diffstat (limited to 'publisher')
-rw-r--r-- | publisher/htmlElementsCollector.go | 87 | ||||
-rw-r--r-- | publisher/htmlElementsCollector_test.go | 6 |
2 files changed, 60 insertions, 33 deletions
diff --git a/publisher/htmlElementsCollector.go b/publisher/htmlElementsCollector.go index 1823a8327..d9479aafa 100644 --- a/publisher/htmlElementsCollector.go +++ b/publisher/htmlElementsCollector.go @@ -64,7 +64,7 @@ type cssClassCollectorWriter struct { buff bytes.Buffer isCollecting bool - dropValue bool + inPreTag string inQuote bool quoteValue byte @@ -90,49 +90,58 @@ func (w *cssClassCollectorWriter) Write(p []byte) (n int, err error) { b := p[i] w.toggleIfQuote(b) if !w.inQuote && b == '>' { - w.endCollecting(false) + w.endCollecting() break } w.buff.WriteByte(b) } if !w.isCollecting { - if w.dropValue { - w.buff.Reset() - } else { - // First check if we have processed this element before. - w.collector.mu.RLock() - - // See https://github.com/dominikh/go-tools/issues/723 - //lint:ignore S1030 This construct avoids memory allocation for the string. - seen := w.collector.elementSet[string(w.buff.Bytes())] - w.collector.mu.RUnlock() - if seen { - w.buff.Reset() - continue + if w.inPreTag != "" { + s := w.buff.String() + if tagName, isEnd := w.parseEndTag(s); isEnd && w.inPreTag == tagName { + w.inPreTag = "" } + w.buff.Reset() + continue + } - s := w.buff.String() + // First check if we have processed this element before. + w.collector.mu.RLock() + // See https://github.com/dominikh/go-tools/issues/723 + //lint:ignore S1030 This construct avoids memory allocation for the string. + seen := w.collector.elementSet[string(w.buff.Bytes())] + w.collector.mu.RUnlock() + if seen { w.buff.Reset() + continue + } - if strings.HasPrefix(s, "</") { - continue - } + s := w.buff.String() - key := s + w.buff.Reset() - s, tagName := w.insertStandinHTMLElement(s) - el := parseHTMLElement(s) - el.Tag = tagName + if strings.HasPrefix(s, "</") { + continue + } - w.collector.mu.Lock() - w.collector.elementSet[key] = true - if el.Tag != "" { - w.collector.elements = append(w.collector.elements, el) - } - w.collector.mu.Unlock() + key := s + + s, tagName := w.insertStandinHTMLElement(s) + el := parseHTMLElement(s) + el.Tag = tagName + if w.isPreFormatted(tagName) { + w.inPreTag = tagName } + + w.collector.mu.Lock() + w.collector.elementSet[key] = true + if el.Tag != "" { + w.collector.elements = append(w.collector.elements, el) + } + w.collector.mu.Unlock() + } } } @@ -140,6 +149,11 @@ func (w *cssClassCollectorWriter) Write(p []byte) (n int, err error) { return } +// No need to look inside these for HTML elements. +func (c *cssClassCollectorWriter) isPreFormatted(s string) bool { + return s == "pre" || s == "textarea" || s == "script" +} + // The net/html parser does not handle single table elements as input, e.g. tbody. // We only care about the element/class/ids, so just store away the original tag name // and pretend it's a <div>. @@ -154,15 +168,24 @@ func (c *cssClassCollectorWriter) insertStandinHTMLElement(el string) (string, s return newv, strings.ToLower(tag) } -func (c *cssClassCollectorWriter) endCollecting(drop bool) { +func (c *cssClassCollectorWriter) parseEndTag(s string) (string, bool) { + if !strings.HasPrefix(s, "</") { + return "", false + } + s = strings.TrimPrefix(s, "</") + s = strings.TrimSuffix(s, ">") + return strings.ToLower(strings.TrimSpace(s)), true +} + +func (c *cssClassCollectorWriter) endCollecting() { c.isCollecting = false c.inQuote = false - c.dropValue = drop + } func (c *cssClassCollectorWriter) startCollecting() { c.isCollecting = true - c.dropValue = false + } func (c *cssClassCollectorWriter) toggleIfQuote(b byte) { diff --git a/publisher/htmlElementsCollector_test.go b/publisher/htmlElementsCollector_test.go index 2c2fd3733..5a1802234 100644 --- a/publisher/htmlElementsCollector_test.go +++ b/publisher/htmlElementsCollector_test.go @@ -89,8 +89,12 @@ func TestClassCollector(t *testing.T) { {"Alpine transition 1", `<div x-transition:enter-start="opacity-0 transform mobile:-translate-x-8 sm:-translate-y-8">`, f("div", "mobile:-translate-x-8 opacity-0 sm:-translate-y-8 transform", "")}, {"Vue bind", `<div v-bind:class="{ active: isActive }"></div>`, f("div", "active", "")}, - // https://github.com/gohugoio/hugo/issues/7746 + // Issue #7746 {"Apostrophe inside attribute value", `<a class="missingclass" title="Plus d'information">my text</a><div></div>`, f("a div", "missingclass", "")}, + // Issue #7567 + {"Script tags content should be skipped", `<script><span>foo</span><span>bar</span></script><div class="foo"></div>`, f("div script", "foo", "")}, + {"Pre tags content should be skipped", `<pre class="preclass"><span>foo</span><span>bar</span></pre><div class="foo"></div>`, f("div pre", "foo preclass", "")}, + {"Textare tags content should be skipped", `<textarea class="textareaclass"><span>foo</span><span>bar</span></textarea><div class="foo"></div>`, f("div textarea", "foo textareaclass", "")}, } { c.Run(test.name, func(c *qt.C) { w := newHTMLElementsCollectorWriter(newHTMLElementsCollector()) |