aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--.gitignore1
-rw-r--r--config/commonConfig.go5
-rw-r--r--go.mod2
-rw-r--r--hugolib/hugo_sites.go6
-rw-r--r--hugolib/hugo_sites_build.go62
-rw-r--r--hugolib/site_test.go44
-rw-r--r--publisher/htmlElementsCollector.go268
-rw-r--r--publisher/htmlElementsCollector_test.go81
-rw-r--r--publisher/publisher.go47
-rw-r--r--publisher/publisher_test.go14
10 files changed, 501 insertions, 29 deletions
diff --git a/.gitignore b/.gitignore
index 75d85e8d0..d3ef01991 100644
--- a/.gitignore
+++ b/.gitignore
@@ -20,6 +20,7 @@ dock.sh
GoBuilds
dist
+hugolib/hugo_stats.json
resources/sunset.jpg
vendor
diff --git a/config/commonConfig.go b/config/commonConfig.go
index 17d5619bb..ba99260a5 100644
--- a/config/commonConfig.go
+++ b/config/commonConfig.go
@@ -29,11 +29,16 @@ import (
var DefaultBuild = Build{
UseResourceCacheWhen: "fallback",
+ WriteStats: false,
}
// Build holds some build related condfiguration.
type Build struct {
UseResourceCacheWhen string // never, fallback, always. Default is fallback
+
+ // When enabled, will collect and write a hugo_stats.json with some build
+ // related aggregated data (e.g. CSS class names).
+ WriteStats bool
}
func (b Build) UseResourceCache(err error) bool {
diff --git a/go.mod b/go.mod
index 4b75840b0..c12caa8f0 100644
--- a/go.mod
+++ b/go.mod
@@ -55,7 +55,7 @@ require (
go.opencensus.io v0.22.0 // indirect
gocloud.dev v0.15.0
golang.org/x/image v0.0.0-20191214001246-9130b4cfad52
- golang.org/x/net v0.0.0-20191209160850-c0dbc17a3553 // indirect
+ golang.org/x/net v0.0.0-20191209160850-c0dbc17a3553
golang.org/x/oauth2 v0.0.0-20190523182746-aaccbc9213b0 // indirect
golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e
golang.org/x/sys v0.0.0-20200107144601-ef85f5a75ddf // indirect
diff --git a/hugolib/hugo_sites.go b/hugolib/hugo_sites.go
index dca9e4968..9ff4d36cd 100644
--- a/hugolib/hugo_sites.go
+++ b/hugolib/hugo_sites.go
@@ -408,7 +408,11 @@ func applyDeps(cfg deps.DepsCfg, sites ...*Site) error {
s.Deps = d
// Set up the main publishing chain.
- pub, err := publisher.NewDestinationPublisher(d.PathSpec.BaseFs.PublishFs, s.outputFormatsConfig, s.mediaTypesConfig, cfg.Cfg)
+ pub, err := publisher.NewDestinationPublisher(
+ d.ResourceSpec,
+ s.outputFormatsConfig,
+ s.mediaTypesConfig,
+ )
if err != nil {
return err
diff --git a/hugolib/hugo_sites_build.go b/hugolib/hugo_sites_build.go
index 6a65605fc..fac20e883 100644
--- a/hugolib/hugo_sites_build.go
+++ b/hugolib/hugo_sites_build.go
@@ -16,11 +16,17 @@ package hugolib
import (
"bytes"
"context"
+ "encoding/json"
"fmt"
"os"
+ "path/filepath"
"runtime/trace"
"strings"
+ "github.com/gohugoio/hugo/publisher"
+
+ "github.com/gohugoio/hugo/hugofs"
+
"github.com/gohugoio/hugo/common/para"
"github.com/gohugoio/hugo/config"
"github.com/gohugoio/hugo/resources/postpub"
@@ -146,10 +152,10 @@ func (h *HugoSites) Build(config BuildCfg, events ...fsnotify.Event) error {
if err != nil {
h.SendError(err)
}
- }
- if err := h.postProcess(); err != nil {
- h.SendError(err)
+ if err = h.postProcess(); err != nil {
+ h.SendError(err)
+ }
}
if h.Metrics != nil {
@@ -337,6 +343,12 @@ func (h *HugoSites) render(config *BuildCfg) error {
}
func (h *HugoSites) postProcess() error {
+ // Make sure to write any build stats to disk first so it's available
+ // to the post processors.
+ if err := h.writeBuildStats(); err != nil {
+ return err
+ }
+
var toPostProcess []resource.OriginProvider
for _, s := range h.Sites {
for _, v := range s.ResourceSpec.PostProcessResources {
@@ -422,3 +434,47 @@ func (h *HugoSites) postProcess() error {
return g.Wait()
}
+
+type publishStats struct {
+ CSSClasses string `json:"cssClasses"`
+}
+
+func (h *HugoSites) writeBuildStats() error {
+ if !h.ResourceSpec.BuildConfig.WriteStats {
+ return nil
+ }
+
+ htmlElements := &publisher.HTMLElements{}
+ for _, s := range h.Sites {
+ stats := s.publisher.PublishStats()
+ htmlElements.Merge(stats.HTMLElements)
+ }
+
+ htmlElements.Sort()
+
+ stats := publisher.PublishStats{
+ HTMLElements: *htmlElements,
+ }
+
+ js, err := json.MarshalIndent(stats, "", " ")
+ if err != nil {
+ return err
+ }
+
+ filename := filepath.Join(h.WorkingDir, "hugo_stats.json")
+
+ // Make sure it's always written to the OS fs.
+ if err := afero.WriteFile(hugofs.Os, filename, js, 0666); err != nil {
+ return err
+ }
+
+ // Write to the destination, too, if a mem fs is in play.
+ if h.Fs.Source != hugofs.Os {
+ if err := afero.WriteFile(h.Fs.Destination, filename, js, 0666); err != nil {
+ return err
+ }
+ }
+
+ return nil
+
+}
diff --git a/hugolib/site_test.go b/hugolib/site_test.go
index 0b05aac12..e404d80a4 100644
--- a/hugolib/site_test.go
+++ b/hugolib/site_test.go
@@ -980,3 +980,47 @@ func TestRefIssues(t *testing.T) {
b.AssertFileContent("public/post/nested-a/content-a/index.html", `Content: http://example.com/post/nested-b/content-b/`)
}
+
+func TestClassCollector(t *testing.T) {
+ b := newTestSitesBuilder(t)
+ b.WithConfigFile("toml", `
+
+[build]
+ writeStats = true
+
+`)
+
+ b.WithTemplates("index.html", `
+
+<div id="el1" class="a b c">Foo</div>
+
+Some text.
+
+<div class="c d e" id="el2">Foo</div>
+`)
+
+ b.WithContent("p1.md", "")
+
+ b.Build(BuildCfg{})
+
+ b.AssertFileContent("hugo_stats.json", `
+{
+ "htmlElements": {
+ "tags": [
+ "div"
+ ],
+ "classes": [
+ "a",
+ "b",
+ "c",
+ "d",
+ "e"
+ ],
+ "ids": [
+ "el1",
+ "el2"
+ ]
+ }
+ }
+`)
+}
diff --git a/publisher/htmlElementsCollector.go b/publisher/htmlElementsCollector.go
new file mode 100644
index 000000000..c6e0d3f0f
--- /dev/null
+++ b/publisher/htmlElementsCollector.go
@@ -0,0 +1,268 @@
+// Copyright 2020 The Hugo Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package publisher
+
+import (
+ "github.com/gohugoio/hugo/helpers"
+ "golang.org/x/net/html"
+ yaml "gopkg.in/yaml.v2"
+
+ "bytes"
+ "sort"
+ "strings"
+ "sync"
+)
+
+func newHTMLElementsCollector() *htmlElementsCollector {
+ return &htmlElementsCollector{
+ elementSet: make(map[string]bool),
+ }
+}
+
+func newHTMLElementsCollectorWriter(collector *htmlElementsCollector) *cssClassCollectorWriter {
+ return &cssClassCollectorWriter{
+ collector: collector,
+ }
+}
+
+// HTMLElements holds lists of tags and attribute values for classes and id.
+type HTMLElements struct {
+ Tags []string `json:"tags"`
+ Classes []string `json:"classes"`
+ IDs []string `json:"ids"`
+}
+
+func (h *HTMLElements) Merge(other HTMLElements) {
+ h.Tags = append(h.Tags, other.Tags...)
+ h.Classes = append(h.Classes, other.Classes...)
+ h.IDs = append(h.IDs, other.IDs...)
+
+ h.Tags = helpers.UniqueStringsReuse(h.Tags)
+ h.Classes = helpers.UniqueStringsReuse(h.Classes)
+ h.IDs = helpers.UniqueStringsReuse(h.IDs)
+
+}
+
+func (h *HTMLElements) Sort() {
+ sort.Strings(h.Tags)
+ sort.Strings(h.Classes)
+ sort.Strings(h.IDs)
+}
+
+type cssClassCollectorWriter struct {
+ collector *htmlElementsCollector
+ buff bytes.Buffer
+
+ isCollecting bool
+ dropValue bool
+ inQuote bool
+}
+
+func (w *cssClassCollectorWriter) Write(p []byte) (n int, err error) {
+ n = len(p)
+ i := 0
+
+ for i < len(p) {
+ if !w.isCollecting {
+ for ; i < len(p); i++ {
+ b := p[i]
+ if b == '<' {
+ w.startCollecting()
+ break
+ }
+ }
+ }
+
+ if w.isCollecting {
+ for ; i < len(p); i++ {
+ b := p[i]
+ if !w.inQuote && b == '/' {
+ // End element, we don't care about those.
+ w.endCollecting(true)
+ break
+ }
+ w.toggleIfQuote(b)
+ if !w.inQuote && b == '>' {
+ w.endCollecting(false)
+ break
+ }
+ w.buff.WriteByte(b)
+ }
+
+ if !w.isCollecting {
+ if w.dropValue {
+ w.buff.Reset()
+ } else {
+ // First check if we have processed this element before.
+ w.collector.mu.RLock()
+
+ // See https://github.com/dominikh/go-tools/issues/723
+ //lint:ignore S1030 This construct avoids memory allocation for the string.
+ seen := w.collector.elementSet[string(w.buff.Bytes())]
+ w.collector.mu.RUnlock()
+ if seen {
+ w.buff.Reset()
+ continue
+ }
+
+ s := w.buff.String()
+
+ w.buff.Reset()
+
+ el := parseHTMLElement(s)
+
+ w.collector.mu.Lock()
+ w.collector.elementSet[s] = true
+ if el.Tag != "" {
+ w.collector.elements = append(w.collector.elements, el)
+ }
+ w.collector.mu.Unlock()
+ }
+ }
+ }
+ }
+
+ return
+}
+
+func (c *cssClassCollectorWriter) endCollecting(drop bool) {
+ c.isCollecting = false
+ c.inQuote = false
+ c.dropValue = drop
+}
+
+func (c *cssClassCollectorWriter) startCollecting() {
+ c.isCollecting = true
+ c.dropValue = false
+}
+
+func (c *cssClassCollectorWriter) toggleIfQuote(b byte) {
+ if isQuote(b) {
+ c.inQuote = !c.inQuote
+ }
+}
+
+type htmlElement struct {
+ Tag string
+ Classes []string
+ IDs []string
+}
+
+type htmlElementsCollector struct {
+ // Contains the raw HTML string. We will get the same element
+ // several times, and want to avoid costly reparsing when this
+ // is used for aggregated data only.
+ elementSet map[string]bool
+
+ elements []htmlElement
+
+ mu sync.RWMutex
+}
+
+func (c *htmlElementsCollector) getHTMLElements() HTMLElements {
+
+ var (
+ classes []string
+ ids []string
+ tags []string
+ )
+
+ for _, el := range c.elements {
+ classes = append(classes, el.Classes...)
+ ids = append(ids, el.IDs...)
+ tags = append(tags, el.Tag)
+ }
+
+ classes = helpers.UniqueStringsSorted(classes)
+ ids = helpers.UniqueStringsSorted(ids)
+ tags = helpers.UniqueStringsSorted(tags)
+
+ els := HTMLElements{
+ Classes: classes,
+ IDs: ids,
+ Tags: tags,
+ }
+
+ return els
+}
+
+func isQuote(b byte) bool {
+ return b == '"' || b == '\''
+}
+
+var htmlJsonFixer = strings.NewReplacer(", ", "\n")
+
+func parseHTMLElement(elStr string) (el htmlElement) {
+ elStr = strings.TrimSpace(elStr)
+ if !strings.HasSuffix(elStr, ">") {
+ elStr += ">"
+ }
+ n, err := html.Parse(strings.NewReader(elStr))
+ if err != nil {
+ return
+ }
+ var walk func(*html.Node)
+ walk = func(n *html.Node) {
+ if n.Type == html.ElementNode && strings.Contains(elStr, n.Data) {
+ el.Tag = n.Data
+
+ for _, a := range n.Attr {
+ switch {
+ case strings.EqualFold(a.Key, "id"):
+ // There should be only one, but one never knows...
+ el.IDs = append(el.IDs, a.Val)
+ default:
+ if strings.EqualFold(a.Key, "class") {
+ el.Classes = append(el.Classes, strings.Fields(a.Val)...)
+ } else {
+ key := strings.ToLower(a.Key)
+ val := strings.TrimSpace(a.Val)
+ if strings.Contains(key, "class") && strings.HasPrefix(val, "{") {
+ // This looks like a Vue or AlpineJS class binding.
+ // Try to unmarshal it as YAML and pull the keys.
+ // This may look odd, as the source is (probably) JS (JSON), but the YAML
+ // parser is much more lenient with simple JS input, it seems.
+ m := make(map[string]interface{})
+ val = htmlJsonFixer.Replace(strings.Trim(val, "{}"))
+ // Remove leading space to make it look like YAML.
+ lines := strings.Split(val, "\n")
+ for i, l := range lines {
+ lines[i] = strings.TrimSpace(l)
+ }
+ val = strings.Join(lines, "\n")
+ err := yaml.Unmarshal([]byte(val), &m)
+ if err == nil {
+ for k := range m {
+ el.Classes = append(el.Classes, strings.Fields(k)...)
+ }
+ } else {
+ // Just insert the raw values. This is used for CSS class pruning
+ // so, it's important not to leave out values that may be a CSS class.
+ el.Classes = append(el.Classes, strings.Fields(val)...)
+ }
+ }
+ }
+ }
+ }
+ }
+
+ for c := n.FirstChild; c != nil; c = c.NextSibling {
+ walk(c)
+ }
+ }
+
+ walk(n)
+
+ return
+}
diff --git a/publisher/htmlElementsCollector_test.go b/publisher/htmlElementsCollector_test.go
new file mode 100644
index 000000000..3ef159d8b
--- /dev/null
+++ b/publisher/htmlElementsCollector_test.go
@@ -0,0 +1,81 @@
+// Copyright 2020 The Hugo Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package publisher
+
+import (
+ "fmt"
+ "strings"
+ "testing"
+
+ qt "github.com/frankban/quicktest"
+)
+
+func TestClassCollector(t *testing.T) {
+ c := qt.New((t))
+
+ f := func(tags, classes, ids string) HTMLElements {
+ var tagss, classess, idss []string
+ if tags != "" {
+ tagss = strings.Split(tags, " ")
+ }
+ if classes != "" {
+ classess = strings.Split(classes, " ")
+ }
+ if ids != "" {
+ idss = strings.Split(ids, " ")
+ }
+ return HTMLElements{
+ Tags: tagss,
+ Classes: classess,
+ IDs: idss,
+ }
+ }
+
+ for _, test := range []struct {
+ name string
+ html string
+ expect HTMLElements
+ }{
+ {"basic", `<body class="b a"></body>`, f("body", "a b", "")},
+ {"duplicates", `<div class="b a b"></div>`, f("div", "a b", "")},
+ {"single quote", `<body class='b a'></body>`, f("body", "a b", "")},
+ {"no quote", `<body class=b id=myelement></body>`, f("body", "b", "myelement")},
+
+ {"AlpineJS bind 1", `<body>
+ <div x-bind:class="{
+ 'class1': data.open,
+ 'class2 class3': data.foo == 'bar'
+ }">
+ </div>
+ </body>`, f("body div", "class1 class2 class3", "")},
+
+ {"Alpine bind 2", `<div x-bind:class="{ 'bg-black': filter.checked }"
+ class="inline-block mr-1 mb-2 rounded bg-gray-300 px-2 py-2">FOO</div>`,
+ f("div", "bg-black bg-gray-300 inline-block mb-2 mr-1 px-2 py-2 rounded", "")},
+
+ {"Alpine bind 3", `<div x-bind:class="{ 'text-gray-800': !checked, 'text-white': checked }"></div>`, f("div", "text-gray-800 text-white", "")},
+ {"Alpine bind 4", `<div x-bind:class="{ 'text-gray-800': !checked,
+ 'text-white': checked }"></div>`, f("div", "text-gray-800 text-white", "")},
+
+ {"Vue bind", `<div v-bind:class="{ active: isActive }"></div>`, f("div", "active", "")},
+ } {
+ c.Run(test.name, func(c *qt.C) {
+ w := newHTMLElementsCollectorWriter(newHTMLElementsCollector())
+ fmt.Fprint(w, test.html)
+ got := w.collector.getHTMLElements()
+ c.Assert(got, qt.DeepEquals, test.expect)
+ })
+ }
+
+}
diff --git a/publisher/publisher.go b/publisher/publisher.go
index f30073c08..8b8d2fa63 100644
--- a/publisher/publisher.go
+++ b/publisher/publisher.go
@@ -1,4 +1,4 @@
-// Copyright 2019 The Hugo Authors. All rights reserved.
+// Copyright 2020 The Hugo Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -18,7 +18,8 @@ import (
"io"
"sync/atomic"
- "github.com/gohugoio/hugo/config"
+ "github.com/gohugoio/hugo/resources"
+
"github.com/gohugoio/hugo/media"
"github.com/gohugoio/hugo/minifiers"
@@ -68,17 +69,21 @@ type Descriptor struct {
// DestinationPublisher is the default and currently only publisher in Hugo. This
// publisher prepares and publishes an item to the defined destination, e.g. /public.
type DestinationPublisher struct {
- fs afero.Fs
- min minifiers.Client
+ fs afero.Fs
+ min minifiers.Client
+ htmlElementsCollector *htmlElementsCollector
}
// NewDestinationPublisher creates a new DestinationPublisher.
-func NewDestinationPublisher(fs afero.Fs, outputFormats output.Formats, mediaTypes media.Types, cfg config.Provider) (pub DestinationPublisher, err error) {
- pub = DestinationPublisher{fs: fs}
- pub.min, err = minifiers.New(mediaTypes, outputFormats, cfg)
- if err != nil {
- return
+func NewDestinationPublisher(rs *resources.Spec, outputFormats output.Formats, mediaTypes media.Types) (pub DestinationPublisher, err error) {
+ fs := rs.BaseFs.PublishFs
+ cfg := rs.Cfg
+ var classCollector *htmlElementsCollector
+ if rs.BuildConfig.WriteStats {
+ classCollector = newHTMLElementsCollector()
}
+ pub = DestinationPublisher{fs: fs, htmlElementsCollector: classCollector}
+ pub.min, err = minifiers.New(mediaTypes, outputFormats, cfg)
return
}
@@ -111,16 +116,38 @@ func (p DestinationPublisher) Publish(d Descriptor) error {
}
defer f.Close()
- _, err = io.Copy(f, src)
+ var w io.Writer = f
+
+ if p.htmlElementsCollector != nil && d.OutputFormat.IsHTML {
+ w = io.MultiWriter(w, newHTMLElementsCollectorWriter(p.htmlElementsCollector))
+ }
+
+ _, err = io.Copy(w, src)
if err == nil && d.StatCounter != nil {
atomic.AddUint64(d.StatCounter, uint64(1))
}
+
return err
}
+func (p DestinationPublisher) PublishStats() PublishStats {
+ if p.htmlElementsCollector == nil {
+ return PublishStats{}
+ }
+
+ return PublishStats{
+ HTMLElements: p.htmlElementsCollector.getHTMLElements(),
+ }
+}
+
+type PublishStats struct {
+ HTMLElements HTMLElements `json:"htmlElements"`
+}
+
// Publisher publishes a result file.
type Publisher interface {
Publish(d Descriptor) error
+ PublishStats() PublishStats
}
// XML transformer := transform.New(urlreplacers.NewAbsURLInXMLTransformer(path))
diff --git a/publisher/publisher_test.go b/publisher/publisher_test.go
deleted file mode 100644
index 200accc8b..000000000
--- a/publisher/publisher_test.go
+++ /dev/null
@@ -1,14 +0,0 @@
-// Copyright 2018 The Hugo Authors. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package publisher