diff options
author | Bjørn Erik Pedersen <[email protected]> | 2022-05-25 10:56:14 +0200 |
---|---|---|
committer | Bjørn Erik Pedersen <[email protected]> | 2022-05-25 17:55:23 +0200 |
commit | 3854a6fa6c323d1c09aa71a0626c9eef62709294 (patch) | |
tree | ea3727c14f73fb73aef89d43795dd6d6f75f1220 /helpers | |
parent | cd0112a05a9ddb7043c9808284f93d8099c48473 (diff) | |
download | hugo-3854a6fa6c323d1c09aa71a0626c9eef62709294.tar.gz hugo-3854a6fa6c323d1c09aa71a0626c9eef62709294.zip |
Fix Plainify edge cases
This commit replaces the main part of `helpers.StripHTML` with Go's implementation in its html/template package.
It's a little slower, but correctness is more important:
```bash
BenchmarkStripHTMLOld-10 680316 1764 ns/op 728 B/op 4 allocs/op
BenchmarkStripHTMLNew-10 384520 3099 ns/op 2089 B/op 10 allocs/op
```
Fixes #9199
Fixes #9909
Closes #9410
Diffstat (limited to 'helpers')
-rw-r--r-- | helpers/content.go | 40 | ||||
-rw-r--r-- | helpers/content_test.go | 38 |
2 files changed, 0 insertions, 78 deletions
diff --git a/helpers/content.go b/helpers/content.go index 835663b76..d04e34a07 100644 --- a/helpers/content.go +++ b/helpers/content.go @@ -34,7 +34,6 @@ import ( "github.com/gohugoio/hugo/markup" - bp "github.com/gohugoio/hugo/bufferpool" "github.com/gohugoio/hugo/config" ) @@ -104,45 +103,6 @@ func NewContentSpec(cfg config.Provider, logger loggers.Logger, contentFs afero. return spec, nil } -var stripHTMLReplacer = strings.NewReplacer("\n", " ", "</p>", "\n", "<br>", "\n", "<br />", "\n") - -// StripHTML accepts a string, strips out all HTML tags and returns it. -func StripHTML(s string) string { - // Shortcut strings with no tags in them - if !strings.ContainsAny(s, "<>") { - return s - } - s = stripHTMLReplacer.Replace(s) - - // Walk through the string removing all tags - b := bp.GetBuffer() - defer bp.PutBuffer(b) - var inTag, isSpace, wasSpace bool - for _, r := range s { - if !inTag { - isSpace = false - } - - switch { - case r == '<': - inTag = true - case r == '>': - inTag = false - case unicode.IsSpace(r): - isSpace = true - fallthrough - default: - if !inTag && (!isSpace || (isSpace && !wasSpace)) { - b.WriteRune(r) - } - } - - wasSpace = isSpace - - } - return b.String() -} - // stripEmptyNav strips out empty <nav> tags from content. func stripEmptyNav(in []byte) []byte { return bytes.Replace(in, []byte("<nav>\n</nav>\n\n"), []byte(``), -1) diff --git a/helpers/content_test.go b/helpers/content_test.go index 4b67b44f0..54b7ef3f9 100644 --- a/helpers/content_test.go +++ b/helpers/content_test.go @@ -52,44 +52,6 @@ func TestTrimShortHTML(t *testing.T) { } } -func TestStripHTML(t *testing.T) { - type test struct { - input, expected string - } - data := []test{ - {"<h1>strip h1 tag <h1>", "strip h1 tag "}, - {"<p> strip p tag </p>", " strip p tag "}, - {"</br> strip br<br>", " strip br\n"}, - {"</br> strip br2<br />", " strip br2\n"}, - {"This <strong>is</strong> a\nnewline", "This is a newline"}, - {"No Tags", "No Tags"}, - {`<p>Summary Next Line. -<figure > - - <img src="/not/real" /> - - -</figure> -. -More text here.</p> - -<p>Some more text</p>`, "Summary Next Line. . More text here.\nSome more text\n"}, - } - for i, d := range data { - output := StripHTML(d.input) - if d.expected != output { - t.Errorf("Test %d failed. Expected %q got %q", i, d.expected, output) - } - } -} - -func BenchmarkStripHTML(b *testing.B) { - b.ResetTimer() - for i := 0; i < b.N; i++ { - StripHTML(tstHTMLContent) - } -} - func TestStripEmptyNav(t *testing.T) { c := qt.New(t) cleaned := stripEmptyNav([]byte("do<nav>\n</nav>\n\nbedobedo")) |