From 3854a6fa6c323d1c09aa71a0626c9eef62709294 Mon Sep 17 00:00:00 2001
From: Bjørn Erik Pedersen
Date: Wed, 25 May 2022 10:56:14 +0200
Subject: Fix Plainify edge cases
This commit replaces the main part of `helpers.StripHTML` with Go's implementation in its html/template package.
It's a little slower, but correctness is more important:
```bash
BenchmarkStripHTMLOld-10 680316 1764 ns/op 728 B/op 4 allocs/op
BenchmarkStripHTMLNew-10 384520 3099 ns/op 2089 B/op 10 allocs/op
```
Fixes #9199
Fixes #9909
Closes #9410
---
helpers/content.go | 40 ----------------------------------------
helpers/content_test.go | 38 --------------------------------------
2 files changed, 78 deletions(-)
(limited to 'helpers')
diff --git a/helpers/content.go b/helpers/content.go
index 835663b76..d04e34a07 100644
--- a/helpers/content.go
+++ b/helpers/content.go
@@ -34,7 +34,6 @@ import (
"github.com/gohugoio/hugo/markup"
- bp "github.com/gohugoio/hugo/bufferpool"
"github.com/gohugoio/hugo/config"
)
@@ -104,45 +103,6 @@ func NewContentSpec(cfg config.Provider, logger loggers.Logger, contentFs afero.
return spec, nil
}
-var stripHTMLReplacer = strings.NewReplacer("\n", " ", "
", "\n", "
", "\n", "
", "\n")
-
-// StripHTML accepts a string, strips out all HTML tags and returns it.
-func StripHTML(s string) string {
- // Shortcut strings with no tags in them
- if !strings.ContainsAny(s, "<>") {
- return s
- }
- s = stripHTMLReplacer.Replace(s)
-
- // Walk through the string removing all tags
- b := bp.GetBuffer()
- defer bp.PutBuffer(b)
- var inTag, isSpace, wasSpace bool
- for _, r := range s {
- if !inTag {
- isSpace = false
- }
-
- switch {
- case r == '<':
- inTag = true
- case r == '>':
- inTag = false
- case unicode.IsSpace(r):
- isSpace = true
- fallthrough
- default:
- if !inTag && (!isSpace || (isSpace && !wasSpace)) {
- b.WriteRune(r)
- }
- }
-
- wasSpace = isSpace
-
- }
- return b.String()
-}
-
// stripEmptyNav strips out empty