diff options
author | Bjørn Erik Pedersen <[email protected]> | 2021-04-20 16:50:03 +0200 |
---|---|---|
committer | Bjørn Erik Pedersen <[email protected]> | 2021-04-20 17:24:17 +0200 |
commit | ef34dd8f0e94e52ba6f1d5d607e4ac3ae98a7abb (patch) | |
tree | 6cccbecf2bf0a899bb61c87eb52e981198355ad9 /publisher | |
parent | bc80022e033a5462d1a9ce541f40a050994011cc (diff) | |
download | hugo-ef34dd8f0e94e52ba6f1d5d607e4ac3ae98a7abb.tar.gz hugo-ef34dd8f0e94e52ba6f1d5d607e4ac3ae98a7abb.zip |
publisher: Some performance tweaks for the HTML elements collector
Diffstat (limited to 'publisher')
-rw-r--r-- | publisher/htmlElementsCollector.go | 80 | ||||
-rw-r--r-- | publisher/htmlElementsCollector_test.go | 70 |
2 files changed, 49 insertions, 101 deletions
diff --git a/publisher/htmlElementsCollector.go b/publisher/htmlElementsCollector.go index 9f4be1ff5..13387a7ee 100644 --- a/publisher/htmlElementsCollector.go +++ b/publisher/htmlElementsCollector.go @@ -108,13 +108,13 @@ func newHTMLElementsCollectorWriter(collector *htmlElementsCollector) *htmlEleme } } -// Write splits the incoming stream into single html element and writes these into elementSet +// Write splits the incoming stream into single html element. func (w *htmlElementsCollectorWriter) Write(p []byte) (n int, err error) { n = len(p) i := 0 for i < len(p) { - // if is not collecting, cycle through byte stream until start bracket "<" is found + // If we are not collecting, cycle through byte stream until start bracket "<" is found. if !w.isCollecting { for ; i < len(p); i++ { b := p[i] @@ -126,9 +126,9 @@ func (w *htmlElementsCollectorWriter) Write(p []byte) (n int, err error) { } if w.isCollecting { - // if is collecting, cycle through byte stream until end bracket ">" is found - // disregard any ">" if within a quote - // write bytes until found to buffer + // If we are collecting, cycle through byte stream until end bracket ">" is found, + // disregard any ">" if within a quote, + // write bytes until found to buffer. for ; i < len(p); i++ { b := p[i] w.toggleIfQuote(b) @@ -141,54 +141,69 @@ func (w *htmlElementsCollectorWriter) Write(p []byte) (n int, err error) { } } - // if no end bracket ">" is found while collecting, but the stream ended + // If no end bracket ">" is found while collecting, but the stream ended // this could mean we received chunks of a stream from e.g. the minify functionality - // next if loop will be skipped + // next if loop will be skipped. - // at this point we have collected an element line between angle brackets "<" and ">" + // At this point we have collected an element line between angle brackets "<" and ">". if !w.isCollecting { - s := w.buff.String() - w.buff.Reset() - - // filter out unwanted tags - // empty string, just in case - // if within preformatted code blocks <pre>, <textarea>, <script>, <style> - // comments and doctype tags - // end tags - switch { - case s == "": // empty string + if w.buff.Len() == 0 { continue - case w.inPreTag != "": // within preformatted code block + } + + if w.inPreTag != "" { // within preformatted code block + s := w.buff.String() + w.buff.Reset() if tagName, isEnd := parseEndTag(s); isEnd && w.inPreTag == tagName { w.inPreTag = "" } continue - case strings.HasPrefix(s, "<!"): // comment or doctype tag - continue - case strings.HasPrefix(s, "</"): // end tag - continue } - // check if we have processed this element before. + // First check if we have processed this element before. w.collector.mu.RLock() - seen := w.collector.elementSet[s] + + // Work with the bytes slice as long as it's practical, + // to save memory allocations. + b := w.buff.Bytes() + + // See https://github.com/dominikh/go-tools/issues/723 + //lint:ignore S1030 This construct avoids memory allocation for the string. + seen := w.collector.elementSet[string(b)] w.collector.mu.RUnlock() if seen { + w.buff.Reset() continue } - // check if a preformatted code block started + // Filter out unwanted tags + // if within preformatted code blocks <pre>, <textarea>, <script>, <style> + // comments and doctype tags + // end tags. + switch { + case bytes.HasPrefix(b, []byte("<!")): // comment or doctype tag + w.buff.Reset() + continue + case bytes.HasPrefix(b, []byte("</")): // end tag + w.buff.Reset() + continue + } + + s := w.buff.String() + w.buff.Reset() + + // Check if a preformatted code block started. if tagName, isStart := parseStartTag(s); isStart && isPreFormatted(tagName) { w.inPreTag = tagName } - // parse each collected element + // Parse each collected element. el, err := parseHTMLElement(s) if err != nil { return n, err } - // write this tag to the element set + // Write this tag to the element set. w.collector.mu.Lock() w.collector.elementSet[s] = true w.collector.elements = append(w.collector.elements, el) @@ -265,17 +280,18 @@ var ( htmlJsonFixer = strings.NewReplacer(", ", "\n") jsonAttrRe = regexp.MustCompile(`'?(.*?)'?:.*`) classAttrRe = regexp.MustCompile(`(?i)^class$|transition`) -) -func parseHTMLElement(elStr string) (el htmlElement, err error) { - var tagBuffer string = "" - exceptionList := map[string]bool{ + exceptionList = map[string]bool{ "thead": true, "tbody": true, "tfoot": true, "td": true, "tr": true, } +) + +func parseHTMLElement(elStr string) (el htmlElement, err error) { + var tagBuffer string = "" tagName, ok := parseStartTag(elStr) if !ok { diff --git a/publisher/htmlElementsCollector_test.go b/publisher/htmlElementsCollector_test.go index 1ada27c18..0c8b2b65b 100644 --- a/publisher/htmlElementsCollector_test.go +++ b/publisher/htmlElementsCollector_test.go @@ -14,7 +14,6 @@ package publisher import ( - "bytes" "fmt" "strings" "testing" @@ -129,33 +128,8 @@ func TestClassCollector(t *testing.T) { } } -func BenchmarkClassCollectorWriter(b *testing.B) { +func BenchmarkElementsCollectorWriter(b *testing.B) { const benchHTML = ` -<html> -<body id="i1" class="a b c d"> -<a class="c d e"></a> -<br> -<a class="c d e"></a> -<a class="c d e"></a> -<br> -<a id="i2" class="c d e f"></a> -<a id="i3" class="c d e"></a> -<a class="c d e"></a> -<br> -<a class="c d e"></a> -<a class="c d e"></a> -<a class="c d e"></a> -<a class="c d e"></a> -</body> -</html> -` - for i := 0; i < b.N; i++ { - w := newHTMLElementsCollectorWriter(newHTMLElementsCollector()) - fmt.Fprint(w, benchHTML) - } -} - -const benchHTML = ` <!DOCTYPE html> <html> <head> @@ -207,51 +181,9 @@ const benchHTML = ` </body> </html> ` - -func BenchmarkElementsCollectorWriter(b *testing.B) { - b.ReportAllocs() for i := 0; i < b.N; i++ { w := newHTMLElementsCollectorWriter(newHTMLElementsCollector()) fmt.Fprint(w, benchHTML) - } -} - -func BenchmarkElementsCollectorWriterMinified(b *testing.B) { - b.ReportAllocs() - v := viper.New() - m, _ := minifiers.New(media.DefaultTypes, output.DefaultFormats, v) - var buf bytes.Buffer - m.Minify(media.HTMLType, &buf, strings.NewReader(benchHTML)) - b.ResetTimer() - - for i := 0; i < b.N; i++ { - w := newHTMLElementsCollectorWriter(newHTMLElementsCollector()) - fmt.Fprint(w, buf.String()) - } -} - -func BenchmarkElementsCollectorWriterWithMinifyStream(b *testing.B) { - b.ReportAllocs() - v := viper.New() - m, _ := minifiers.New(media.DefaultTypes, output.DefaultFormats, v) - b.ResetTimer() - - for i := 0; i < b.N; i++ { - w := newHTMLElementsCollectorWriter(newHTMLElementsCollector()) - m.Minify(media.HTMLType, w, strings.NewReader(benchHTML)) - } -} - -func BenchmarkElementsCollectorWriterWithMinifyString(b *testing.B) { - b.ReportAllocs() - v := viper.New() - m, _ := minifiers.New(media.DefaultTypes, output.DefaultFormats, v) - b.ResetTimer() - for i := 0; i < b.N; i++ { - var buf bytes.Buffer - m.Minify(media.HTMLType, &buf, strings.NewReader(benchHTML)) - w := newHTMLElementsCollectorWriter(newHTMLElementsCollector()) - fmt.Fprint(w, buf.String()) } } |