aboutsummaryrefslogtreecommitdiffhomepage
path: root/transform
diff options
context:
space:
mode:
authorBjørn Erik Pedersen <[email protected]>2018-12-17 14:25:00 +0100
committerBjørn Erik Pedersen <[email protected]>2018-12-17 19:17:56 +0100
commitefe0b4e5c0292c1e5e27b0c32fbc368062fde3e8 (patch)
treef1b9ff30aa124a49c8ea29f977136e2c02db4d9b /transform
parentd5a0b6bbbc83a3e274c62ed397a293f04ee8d241 (diff)
downloadhugo-efe0b4e5c0292c1e5e27b0c32fbc368062fde3e8.tar.gz
hugo-efe0b4e5c0292c1e5e27b0c32fbc368062fde3e8.zip
transform/urlreplacers: Support unquoted URLs in canonifyURLs replacer
Fixes #5529
Diffstat (limited to 'transform')
-rw-r--r--transform/urlreplacers/absurlreplacer.go197
-rw-r--r--transform/urlreplacers/absurlreplacer_test.go17
2 files changed, 120 insertions, 94 deletions
diff --git a/transform/urlreplacers/absurlreplacer.go b/transform/urlreplacers/absurlreplacer.go
index 1de6b0ca7..45b98f821 100644
--- a/transform/urlreplacers/absurlreplacer.go
+++ b/transform/urlreplacers/absurlreplacer.go
@@ -16,6 +16,7 @@ package urlreplacers
import (
"bytes"
"io"
+ "unicode"
"unicode/utf8"
"github.com/gohugoio/hugo/transform"
@@ -43,7 +44,7 @@ type absurllexer struct {
start int // item start position
width int // width of last element
- matchers []absURLMatcher
+ quotes [][]byte
ms matchState
matches [3]bool // track matches of the 3 prefixes
@@ -140,84 +141,115 @@ func (l *absurllexer) emit() {
l.start = l.pos
}
-// handle URLs in src and href.
-func checkCandidateBase(l *absurllexer) {
- for _, m := range l.matchers {
- if !bytes.HasPrefix(l.content[l.pos:], m.match) {
- continue
- }
- // check for schemaless URLs
- posAfter := l.pos + len(m.match)
- if posAfter >= len(l.content) {
- return
- }
- r, _ := utf8.DecodeRune(l.content[posAfter:])
- if r == '/' {
- // schemaless: skip
- return
- }
- if l.pos > l.start {
+var (
+ relURLPrefix = []byte("/")
+ relURLPrefixLen = len(relURLPrefix)
+)
+
+func (l *absurllexer) consumeQuote() []byte {
+ for _, q := range l.quotes {
+ if bytes.HasPrefix(l.content[l.pos:], q) {
+ l.pos += len(q)
l.emit()
+ return q
}
- l.pos += len(m.match)
- l.w.Write(m.quote)
- l.w.Write(l.path)
- l.start = l.pos
}
+ return nil
+}
+
+// handle URLs in src and href.
+func checkCandidateBase(l *absurllexer) {
+ l.consumeQuote()
+
+ if !bytes.HasPrefix(l.content[l.pos:], relURLPrefix) {
+ return
+ }
+
+ // check for schemaless URLs
+ posAfter := l.pos + relURLPrefixLen
+ if posAfter >= len(l.content) {
+ return
+ }
+ r, _ := utf8.DecodeRune(l.content[posAfter:])
+ if r == '/' {
+ // schemaless: skip
+ return
+ }
+ if l.pos > l.start {
+ l.emit()
+ }
+ l.pos += relURLPrefixLen
+ l.w.Write(l.path)
+ l.start = l.pos
+}
+
+func (l *absurllexer) posAfterURL(q []byte) int {
+ if len(q) > 0 {
+ // look for end quote
+ return bytes.Index(l.content[l.pos:], q)
+ }
+
+ return bytes.IndexFunc(l.content[l.pos:], func(r rune) bool {
+ return r == '>' || unicode.IsSpace(r)
+ })
+
}
// handle URLs in srcset.
func checkCandidateSrcset(l *absurllexer) {
- // special case, not frequent (me think)
- for _, m := range l.matchers {
- if !bytes.HasPrefix(l.content[l.pos:], m.match) {
- continue
- }
+ q := l.consumeQuote()
+ if q == nil {
+ // srcset needs to be quoted.
+ return
+ }
- // check for schemaless URLs
- posAfter := l.pos + len(m.match)
- if posAfter >= len(l.content) {
- return
- }
- r, _ := utf8.DecodeRune(l.content[posAfter:])
- if r == '/' {
- // schemaless: skip
- continue
- }
+ // special case, not frequent (me think)
+ if !bytes.HasPrefix(l.content[l.pos:], relURLPrefix) {
+ return
+ }
- posLastQuote := bytes.Index(l.content[l.pos+1:], m.quote)
+ // check for schemaless URLs
+ posAfter := l.pos + relURLPrefixLen
+ if posAfter >= len(l.content) {
+ return
+ }
+ r, _ := utf8.DecodeRune(l.content[posAfter:])
+ if r == '/' {
+ // schemaless: skip
+ return
+ }
- // safe guard
- if posLastQuote < 0 || posLastQuote > 2000 {
- return
- }
+ posEnd := l.posAfterURL(q)
- if l.pos > l.start {
- l.emit()
- }
+ // safe guard
+ if posEnd < 0 || posEnd > 2000 {
+ return
+ }
- section := l.content[l.pos+len(m.quote) : l.pos+posLastQuote+1]
+ if l.pos > l.start {
+ l.emit()
+ }
- fields := bytes.Fields(section)
- l.w.Write(m.quote)
- for i, f := range fields {
- if f[0] == '/' {
- l.w.Write(l.path)
- l.w.Write(f[1:])
+ section := l.content[l.pos : l.pos+posEnd+1]
- } else {
- l.w.Write(f)
- }
+ fields := bytes.Fields(section)
+ for i, f := range fields {
+ if f[0] == '/' {
+ l.w.Write(l.path)
+ l.w.Write(f[1:])
- if i < len(fields)-1 {
- l.w.Write([]byte(" "))
- }
+ } else {
+ l.w.Write(f)
}
- l.w.Write(m.quote)
- l.pos += len(section) + (len(m.quote) * 2)
- l.start = l.pos
+ if i < len(fields)-1 {
+ l.w.Write([]byte(" "))
+ }
}
+
+ l.pos += len(section)
+ l.start = l.pos
+
}
// main loop
@@ -262,53 +294,32 @@ func (l *absurllexer) replace() {
}
}
-func doReplace(path string, ct transform.FromTo, matchers []absURLMatcher) {
+func doReplace(path string, ct transform.FromTo, quotes [][]byte) {
lexer := &absurllexer{
- content: ct.From().Bytes(),
- w: ct.To(),
- path: []byte(path),
- matchers: matchers}
+ content: ct.From().Bytes(),
+ w: ct.To(),
+ path: []byte(path),
+ quotes: quotes}
lexer.replace()
}
type absURLReplacer struct {
- htmlMatchers []absURLMatcher
- xmlMatchers []absURLMatcher
+ htmlQuotes [][]byte
+ xmlQuotes [][]byte
}
func newAbsURLReplacer() *absURLReplacer {
-
- // HTML
- dqHTMLMatch := []byte("\"/")
- sqHTMLMatch := []byte("'/")
-
- // XML
- dqXMLMatch := []byte("&#34;/")
- sqXMLMatch := []byte("&#39;/")
-
- dqHTML := []byte("\"")
- sqHTML := []byte("'")
-
- dqXML := []byte("&#34;")
- sqXML := []byte("&#39;")
-
return &absURLReplacer{
- htmlMatchers: []absURLMatcher{
- {dqHTMLMatch, dqHTML},
- {sqHTMLMatch, sqHTML},
- },
- xmlMatchers: []absURLMatcher{
- {dqXMLMatch, dqXML},
- {sqXMLMatch, sqXML},
- }}
+ htmlQuotes: [][]byte{[]byte("\""), []byte("'")},
+ xmlQuotes: [][]byte{[]byte("&#34;"), []byte("&#39;")}}
}
func (au *absURLReplacer) replaceInHTML(path string, ct transform.FromTo) {
- doReplace(path, ct, au.htmlMatchers)
+ doReplace(path, ct, au.htmlQuotes)
}
func (au *absURLReplacer) replaceInXML(path string, ct transform.FromTo) {
- doReplace(path, ct, au.xmlMatchers)
+ doReplace(path, ct, au.xmlQuotes)
}
diff --git a/transform/urlreplacers/absurlreplacer_test.go b/transform/urlreplacers/absurlreplacer_test.go
index 7a530862b..be6b91929 100644
--- a/transform/urlreplacers/absurlreplacer_test.go
+++ b/transform/urlreplacers/absurlreplacer_test.go
@@ -156,6 +156,21 @@ func TestAbsURL(t *testing.T) {
}
+func TestAbsURLUnqoted(t *testing.T) {
+ tr := transform.New(NewAbsURLTransformer(testBaseURL))
+
+ apply(t.Errorf, tr, []test{
+ test{
+ content: `Link: <a href=/asdf>ASDF</a>`,
+ expected: `Link: <a href=http://base/asdf>ASDF</a>`,
+ },
+ test{
+ content: `Link: <a href=/asdf >ASDF</a>`,
+ expected: `Link: <a href=http://base/asdf >ASDF</a>`,
+ },
+ })
+}
+
func TestRelativeURL(t *testing.T) {
tr := transform.New(NewAbsURLTransformer(helpers.GetDottedRelativePath(filepath.FromSlash("/post/sub/"))))
@@ -176,7 +191,7 @@ func TestAbsXMLURLSrcSet(t *testing.T) {
}
func BenchmarkXMLAbsURL(b *testing.B) {
- tr := transform.New(NewAbsURLInXMLTransformer(""))
+ tr := transform.New(NewAbsURLInXMLTransformer(testBaseURL))
b.ResetTimer()
for i := 0; i < b.N; i++ {