diff options
author | Bjørn Erik Pedersen <[email protected]> | 2018-12-17 14:25:00 +0100 |
---|---|---|
committer | Bjørn Erik Pedersen <[email protected]> | 2018-12-17 19:17:56 +0100 |
commit | efe0b4e5c0292c1e5e27b0c32fbc368062fde3e8 (patch) | |
tree | f1b9ff30aa124a49c8ea29f977136e2c02db4d9b /transform | |
parent | d5a0b6bbbc83a3e274c62ed397a293f04ee8d241 (diff) | |
download | hugo-efe0b4e5c0292c1e5e27b0c32fbc368062fde3e8.tar.gz hugo-efe0b4e5c0292c1e5e27b0c32fbc368062fde3e8.zip |
transform/urlreplacers: Support unquoted URLs in canonifyURLs replacer
Fixes #5529
Diffstat (limited to 'transform')
-rw-r--r-- | transform/urlreplacers/absurlreplacer.go | 197 | ||||
-rw-r--r-- | transform/urlreplacers/absurlreplacer_test.go | 17 |
2 files changed, 120 insertions, 94 deletions
diff --git a/transform/urlreplacers/absurlreplacer.go b/transform/urlreplacers/absurlreplacer.go index 1de6b0ca7..45b98f821 100644 --- a/transform/urlreplacers/absurlreplacer.go +++ b/transform/urlreplacers/absurlreplacer.go @@ -16,6 +16,7 @@ package urlreplacers import ( "bytes" "io" + "unicode" "unicode/utf8" "github.com/gohugoio/hugo/transform" @@ -43,7 +44,7 @@ type absurllexer struct { start int // item start position width int // width of last element - matchers []absURLMatcher + quotes [][]byte ms matchState matches [3]bool // track matches of the 3 prefixes @@ -140,84 +141,115 @@ func (l *absurllexer) emit() { l.start = l.pos } -// handle URLs in src and href. -func checkCandidateBase(l *absurllexer) { - for _, m := range l.matchers { - if !bytes.HasPrefix(l.content[l.pos:], m.match) { - continue - } - // check for schemaless URLs - posAfter := l.pos + len(m.match) - if posAfter >= len(l.content) { - return - } - r, _ := utf8.DecodeRune(l.content[posAfter:]) - if r == '/' { - // schemaless: skip - return - } - if l.pos > l.start { +var ( + relURLPrefix = []byte("/") + relURLPrefixLen = len(relURLPrefix) +) + +func (l *absurllexer) consumeQuote() []byte { + for _, q := range l.quotes { + if bytes.HasPrefix(l.content[l.pos:], q) { + l.pos += len(q) l.emit() + return q } - l.pos += len(m.match) - l.w.Write(m.quote) - l.w.Write(l.path) - l.start = l.pos } + return nil +} + +// handle URLs in src and href. +func checkCandidateBase(l *absurllexer) { + l.consumeQuote() + + if !bytes.HasPrefix(l.content[l.pos:], relURLPrefix) { + return + } + + // check for schemaless URLs + posAfter := l.pos + relURLPrefixLen + if posAfter >= len(l.content) { + return + } + r, _ := utf8.DecodeRune(l.content[posAfter:]) + if r == '/' { + // schemaless: skip + return + } + if l.pos > l.start { + l.emit() + } + l.pos += relURLPrefixLen + l.w.Write(l.path) + l.start = l.pos +} + +func (l *absurllexer) posAfterURL(q []byte) int { + if len(q) > 0 { + // look for end quote + return bytes.Index(l.content[l.pos:], q) + } + + return bytes.IndexFunc(l.content[l.pos:], func(r rune) bool { + return r == '>' || unicode.IsSpace(r) + }) + } // handle URLs in srcset. func checkCandidateSrcset(l *absurllexer) { - // special case, not frequent (me think) - for _, m := range l.matchers { - if !bytes.HasPrefix(l.content[l.pos:], m.match) { - continue - } + q := l.consumeQuote() + if q == nil { + // srcset needs to be quoted. + return + } - // check for schemaless URLs - posAfter := l.pos + len(m.match) - if posAfter >= len(l.content) { - return - } - r, _ := utf8.DecodeRune(l.content[posAfter:]) - if r == '/' { - // schemaless: skip - continue - } + // special case, not frequent (me think) + if !bytes.HasPrefix(l.content[l.pos:], relURLPrefix) { + return + } - posLastQuote := bytes.Index(l.content[l.pos+1:], m.quote) + // check for schemaless URLs + posAfter := l.pos + relURLPrefixLen + if posAfter >= len(l.content) { + return + } + r, _ := utf8.DecodeRune(l.content[posAfter:]) + if r == '/' { + // schemaless: skip + return + } - // safe guard - if posLastQuote < 0 || posLastQuote > 2000 { - return - } + posEnd := l.posAfterURL(q) - if l.pos > l.start { - l.emit() - } + // safe guard + if posEnd < 0 || posEnd > 2000 { + return + } - section := l.content[l.pos+len(m.quote) : l.pos+posLastQuote+1] + if l.pos > l.start { + l.emit() + } - fields := bytes.Fields(section) - l.w.Write(m.quote) - for i, f := range fields { - if f[0] == '/' { - l.w.Write(l.path) - l.w.Write(f[1:]) + section := l.content[l.pos : l.pos+posEnd+1] - } else { - l.w.Write(f) - } + fields := bytes.Fields(section) + for i, f := range fields { + if f[0] == '/' { + l.w.Write(l.path) + l.w.Write(f[1:]) - if i < len(fields)-1 { - l.w.Write([]byte(" ")) - } + } else { + l.w.Write(f) } - l.w.Write(m.quote) - l.pos += len(section) + (len(m.quote) * 2) - l.start = l.pos + if i < len(fields)-1 { + l.w.Write([]byte(" ")) + } } + + l.pos += len(section) + l.start = l.pos + } // main loop @@ -262,53 +294,32 @@ func (l *absurllexer) replace() { } } -func doReplace(path string, ct transform.FromTo, matchers []absURLMatcher) { +func doReplace(path string, ct transform.FromTo, quotes [][]byte) { lexer := &absurllexer{ - content: ct.From().Bytes(), - w: ct.To(), - path: []byte(path), - matchers: matchers} + content: ct.From().Bytes(), + w: ct.To(), + path: []byte(path), + quotes: quotes} lexer.replace() } type absURLReplacer struct { - htmlMatchers []absURLMatcher - xmlMatchers []absURLMatcher + htmlQuotes [][]byte + xmlQuotes [][]byte } func newAbsURLReplacer() *absURLReplacer { - - // HTML - dqHTMLMatch := []byte("\"/") - sqHTMLMatch := []byte("'/") - - // XML - dqXMLMatch := []byte(""/") - sqXMLMatch := []byte("'/") - - dqHTML := []byte("\"") - sqHTML := []byte("'") - - dqXML := []byte(""") - sqXML := []byte("'") - return &absURLReplacer{ - htmlMatchers: []absURLMatcher{ - {dqHTMLMatch, dqHTML}, - {sqHTMLMatch, sqHTML}, - }, - xmlMatchers: []absURLMatcher{ - {dqXMLMatch, dqXML}, - {sqXMLMatch, sqXML}, - }} + htmlQuotes: [][]byte{[]byte("\""), []byte("'")}, + xmlQuotes: [][]byte{[]byte("""), []byte("'")}} } func (au *absURLReplacer) replaceInHTML(path string, ct transform.FromTo) { - doReplace(path, ct, au.htmlMatchers) + doReplace(path, ct, au.htmlQuotes) } func (au *absURLReplacer) replaceInXML(path string, ct transform.FromTo) { - doReplace(path, ct, au.xmlMatchers) + doReplace(path, ct, au.xmlQuotes) } diff --git a/transform/urlreplacers/absurlreplacer_test.go b/transform/urlreplacers/absurlreplacer_test.go index 7a530862b..be6b91929 100644 --- a/transform/urlreplacers/absurlreplacer_test.go +++ b/transform/urlreplacers/absurlreplacer_test.go @@ -156,6 +156,21 @@ func TestAbsURL(t *testing.T) { } +func TestAbsURLUnqoted(t *testing.T) { + tr := transform.New(NewAbsURLTransformer(testBaseURL)) + + apply(t.Errorf, tr, []test{ + test{ + content: `Link: <a href=/asdf>ASDF</a>`, + expected: `Link: <a href=http://base/asdf>ASDF</a>`, + }, + test{ + content: `Link: <a href=/asdf >ASDF</a>`, + expected: `Link: <a href=http://base/asdf >ASDF</a>`, + }, + }) +} + func TestRelativeURL(t *testing.T) { tr := transform.New(NewAbsURLTransformer(helpers.GetDottedRelativePath(filepath.FromSlash("/post/sub/")))) @@ -176,7 +191,7 @@ func TestAbsXMLURLSrcSet(t *testing.T) { } func BenchmarkXMLAbsURL(b *testing.B) { - tr := transform.New(NewAbsURLInXMLTransformer("")) + tr := transform.New(NewAbsURLInXMLTransformer(testBaseURL)) b.ResetTimer() for i := 0; i < b.N; i++ { |