aboutsummaryrefslogtreecommitdiffhomepage
path: root/tpl
diff options
context:
space:
mode:
authorBjørn Erik Pedersen <[email protected]>2022-05-25 10:56:14 +0200
committerBjørn Erik Pedersen <[email protected]>2022-05-25 17:55:23 +0200
commit3854a6fa6c323d1c09aa71a0626c9eef62709294 (patch)
treeea3727c14f73fb73aef89d43795dd6d6f75f1220 /tpl
parentcd0112a05a9ddb7043c9808284f93d8099c48473 (diff)
downloadhugo-3854a6fa6c323d1c09aa71a0626c9eef62709294.tar.gz
hugo-3854a6fa6c323d1c09aa71a0626c9eef62709294.zip
Fix Plainify edge cases
This commit replaces the main part of `helpers.StripHTML` with Go's implementation in its html/template package. It's a little slower, but correctness is more important: ```bash BenchmarkStripHTMLOld-10 680316 1764 ns/op 728 B/op 4 allocs/op BenchmarkStripHTMLNew-10 384520 3099 ns/op 2089 B/op 10 allocs/op ``` Fixes #9199 Fixes #9909 Closes #9410
Diffstat (limited to 'tpl')
-rw-r--r--tpl/internal/go_templates/htmltemplate/hugo_template.go5
-rw-r--r--tpl/strings/strings.go7
-rw-r--r--tpl/template.go46
-rw-r--r--tpl/template_test.go41
-rw-r--r--tpl/transform/transform.go3
-rw-r--r--tpl/transform/transform_test.go1
6 files changed, 99 insertions, 4 deletions
diff --git a/tpl/internal/go_templates/htmltemplate/hugo_template.go b/tpl/internal/go_templates/htmltemplate/hugo_template.go
index eba54fbbf..99edf8f68 100644
--- a/tpl/internal/go_templates/htmltemplate/hugo_template.go
+++ b/tpl/internal/go_templates/htmltemplate/hugo_template.go
@@ -34,3 +34,8 @@ func (t *Template) Prepare() (*template.Template, error) {
}
return t.text, nil
}
+
+// See https://github.com/golang/go/issues/5884
+func StripTags(html string) string {
+ return stripTags(html)
+}
diff --git a/tpl/strings/strings.go b/tpl/strings/strings.go
index 482a8a837..a49451483 100644
--- a/tpl/strings/strings.go
+++ b/tpl/strings/strings.go
@@ -25,6 +25,7 @@ import (
"github.com/gohugoio/hugo/common/text"
"github.com/gohugoio/hugo/deps"
"github.com/gohugoio/hugo/helpers"
+ "github.com/gohugoio/hugo/tpl"
"github.com/spf13/cast"
)
@@ -52,7 +53,7 @@ func (ns *Namespace) CountRunes(s any) (int, error) {
}
counter := 0
- for _, r := range helpers.StripHTML(ss) {
+ for _, r := range tpl.StripHTML(ss) {
if !helpers.IsWhitespace(r) {
counter++
}
@@ -83,11 +84,11 @@ func (ns *Namespace) CountWords(s any) (int, error) {
}
if !isCJKLanguage {
- return len(strings.Fields(helpers.StripHTML((ss)))), nil
+ return len(strings.Fields(tpl.StripHTML(ss))), nil
}
counter := 0
- for _, word := range strings.Fields(helpers.StripHTML(ss)) {
+ for _, word := range strings.Fields(tpl.StripHTML(ss)) {
runeCount := utf8.RuneCountInString(word)
if len(word) == runeCount {
counter++
diff --git a/tpl/template.go b/tpl/template.go
index 299b7208d..738750de7 100644
--- a/tpl/template.go
+++ b/tpl/template.go
@@ -18,9 +18,14 @@ import (
"io"
"reflect"
"regexp"
+ "strings"
+ "unicode"
+
+ bp "github.com/gohugoio/hugo/bufferpool"
"github.com/gohugoio/hugo/output"
+ htmltemplate "github.com/gohugoio/hugo/tpl/internal/go_templates/htmltemplate"
texttemplate "github.com/gohugoio/hugo/tpl/internal/go_templates/texttemplate"
)
@@ -163,3 +168,44 @@ func GetHasLockFromContext(ctx context.Context) bool {
func SetHasLockInContext(ctx context.Context, hasLock bool) context.Context {
return context.WithValue(ctx, texttemplate.HasLockContextKey, hasLock)
}
+
+const hugoNewLinePlaceholder = "___hugonl_"
+
+var (
+ stripHTMLReplacerPre = strings.NewReplacer("\n", " ", "</p>", hugoNewLinePlaceholder, "<br>", hugoNewLinePlaceholder, "<br />", hugoNewLinePlaceholder)
+ whitespaceRe = regexp.MustCompile(`\s+`)
+)
+
+// StripHTML strips out all HTML tags in s.
+func StripHTML(s string) string {
+ // Shortcut strings with no tags in them
+ if !strings.ContainsAny(s, "<>") {
+ return s
+ }
+
+ pre := stripHTMLReplacerPre.Replace(s)
+ preReplaced := pre != s
+
+ s = htmltemplate.StripTags(pre)
+
+ if preReplaced {
+ s = strings.ReplaceAll(s, hugoNewLinePlaceholder, "\n")
+ }
+
+ var wasSpace bool
+ b := bp.GetBuffer()
+ defer bp.PutBuffer(b)
+ for _, r := range s {
+ isSpace := unicode.IsSpace(r)
+ if !(isSpace && wasSpace) {
+ b.WriteRune(r)
+ }
+ wasSpace = isSpace
+ }
+
+ if b.Len() > 0 {
+ s = b.String()
+ }
+
+ return s
+}
diff --git a/tpl/template_test.go b/tpl/template_test.go
index afd3c4b00..d989b7158 100644
--- a/tpl/template_test.go
+++ b/tpl/template_test.go
@@ -28,3 +28,44 @@ func TestExtractBaseof(t *testing.T) {
c.Assert(extractBaseOf("not baseof for you"), qt.Equals, "")
c.Assert(extractBaseOf("template: blog/baseof.html:23:11:"), qt.Equals, "blog/baseof.html")
}
+
+func TestStripHTML(t *testing.T) {
+ type test struct {
+ input, expected string
+ }
+ data := []test{
+ {"<h1>strip h1 tag <h1>", "strip h1 tag "},
+ {"<p> strip p tag </p>", " strip p tag "},
+ {"</br> strip br<br>", " strip br\n"},
+ {"</br> strip br2<br />", " strip br2\n"},
+ {"This <strong>is</strong> a\nnewline", "This is a newline"},
+ {"No Tags", "No Tags"},
+ {`<p>Summary Next Line.
+<figure >
+
+ <img src="/not/real" />
+
+
+</figure>
+.
+More text here.</p>
+
+<p>Some more text</p>`, "Summary Next Line. . More text here.\nSome more text\n"},
+
+ // Issue 9199
+ {"<div data-action='click->my-controller#doThing'>qwe</div>", "qwe"},
+ {"Hello, World!", "Hello, World!"},
+ {"foo&amp;bar", "foo&amp;bar"},
+ {`Hello <a href="www.example.com/">World</a>!`, "Hello World!"},
+ {"Foo <textarea>Bar</textarea> Baz", "Foo Bar Baz"},
+ {"Foo <!-- Bar --> Baz", "Foo Baz"},
+ }
+ for i, d := range data {
+ output := StripHTML(d.input)
+ if d.expected != output {
+ t.Errorf("Test %d failed. Expected %q got %q", i, d.expected, output)
+ }
+ }
+}
+
+const tstHTMLContent = "<!DOCTYPE html><html><head><script src=\"http://two/foobar.js\"></script></head><body><nav><ul><li hugo-nav=\"section_0\"></li><li hugo-nav=\"section_1\"></li></ul></nav><article>content <a href=\"http://two/foobar\">foobar</a>. Follow up</article><p>This is some text.<br>And some more.</p></body></html>"
diff --git a/tpl/transform/transform.go b/tpl/transform/transform.go
index 7c0914378..69ab98a9a 100644
--- a/tpl/transform/transform.go
+++ b/tpl/transform/transform.go
@@ -22,6 +22,7 @@ import (
"github.com/gohugoio/hugo/cache/namedmemcache"
"github.com/gohugoio/hugo/markup/converter/hooks"
"github.com/gohugoio/hugo/markup/highlight"
+ "github.com/gohugoio/hugo/tpl"
"github.com/gohugoio/hugo/deps"
"github.com/gohugoio/hugo/helpers"
@@ -141,7 +142,7 @@ func (ns *Namespace) Plainify(s any) (string, error) {
return "", err
}
- return helpers.StripHTML(ss), nil
+ return tpl.StripHTML(ss), nil
}
// For internal use.
diff --git a/tpl/transform/transform_test.go b/tpl/transform/transform_test.go
index ab2fd3b9e..edef4e1bd 100644
--- a/tpl/transform/transform_test.go
+++ b/tpl/transform/transform_test.go
@@ -237,6 +237,7 @@ func TestPlainify(t *testing.T) {
expect any
}{
{"<em>Note:</em> blah <b>blah</b>", "Note: blah blah"},
+ {"<div data-action='click->my-controller#doThing'>qwe</div>", "qwe"},
// errors
{tstNoStringer{}, false},
} {