metrics: Detect partialCached candidates

This commit adds a "cache potential" column when running `hugo --templateMetrics --templateMetricsHints`. This is only calculated when `--templateMetricsHints` is set, as these calculations has an negative effect on the other timings. This gives a value for partials only, and is a number between 0-100 that indicates if `partial` can be replaced with `partialCached`. 100 means that all execution of the same partial resulted in the same output. You should do some manual research before going "all cache".
author: Bjørn Erik Pedersen <[email protected]> 2017-10-04 22:12:51 +0200
committer: Bjørn Erik Pedersen <[email protected]> 2017-10-07 18:00:07 +0200
commit: 5800a20a258378440e203a6c4a4343f5077755df (patch)
tree: dca1bd5030dccd56b4e9ad2dc1e037955aa4e20d /metrics
parent: e2e8bcbec34702a27047b91b6b007a15f1fc0797 (diff)
download: hugo-5800a20a258378440e203a6c4a4343f5077755df.tar.gz
hugo-5800a20a258378440e203a6c4a4343f5077755df.zip
2 files changed, 179 insertions, 16 deletions
diff --git a/metrics/metrics.go b/metrics/metrics.go
index 9f8a158ad..c83610a92 100644
--- a/metrics/metrics.go
+++ b/metrics/metrics.go
@@ -17,7 +17,10 @@ package metrics
 import (
 	"fmt"
 	"io"
+	"math"
 	"sort"
+	"strconv"
+	"strings"
 	"sync"
 	"time"
 )
@@ -31,21 +34,48 @@ type Provider interface {
 	// WriteMetrics will write a summary of the metrics to w.
 	WriteMetrics(w io.Writer)
 
+	// TrackValue tracks the value for diff calculations etc.
+	TrackValue(key, value string)
+
 	// Reset clears the metric store.
 	Reset()
 }
 
+type diff struct {
+	baseline string
+	count    int
+	simSum   int
+}
+
+func (d *diff) add(v string) *diff {
+	if d.baseline == "" {
+		d.baseline = v
+		d.count = 1
+		d.simSum = 100 // If we get only one it is very cache friendly.
+		return d
+	}
+
+	d.simSum += howSimilar(v, d.baseline)
+	d.count++
+
+	return d
+}
+
 // Store provides storage for a set of metrics.
 type Store struct {
-	metrics map[string][]time.Duration
-	mu      *sync.Mutex
+	calculateHints bool
+	metrics        map[string][]time.Duration
+	mu             sync.Mutex
+	diffs          map[string]*diff
+	diffmu         sync.Mutex
 }
 
 // NewProvider returns a new instance of a metric store.
-func NewProvider() Provider {
+func NewProvider(calculateHints bool) Provider {
 	return &Store{
-		metrics: make(map[string][]time.Duration),
-		mu:      &sync.Mutex{},
+		calculateHints: calculateHints,
+		metrics:        make(map[string][]time.Duration),
+		diffs:          make(map[string]*diff),
 	}
 }
 
@@ -54,6 +84,32 @@ func (s *Store) Reset() {
 	s.mu.Lock()
 	s.metrics = make(map[string][]time.Duration)
 	s.mu.Unlock()
+	s.diffmu.Lock()
+	s.diffs = make(map[string]*diff)
+	s.diffmu.Unlock()
+}
+
+// TrackValue tracks the value for diff calculations etc.
+func (s *Store) TrackValue(key, value string) {
+	if !s.calculateHints {
+		return
+	}
+
+	s.diffmu.Lock()
+	var (
+		d     *diff
+		found bool
+	)
+
+	d, found = s.diffs[key]
+
+	if !found {
+		d = &diff{}
+		s.diffs[key] = d
+	}
+
+	d.add(value)
+	s.diffmu.Unlock()
 }
 
 // MeasureSince adds a measurement for key to the metric store.
@@ -74,6 +130,12 @@ func (s *Store) WriteMetrics(w io.Writer) {
 		var sum time.Duration
 		var max time.Duration
 
+		diff, found := s.diffs[k]
+		cacheFactor := 0
+		if found {
+			cacheFactor = int(math.Floor(float64(diff.simSum) / float64(diff.count)))
+		}
+
 		for _, d := range v {
 			sum += d
 			if d > max {
@@ -83,31 +145,42 @@ func (s *Store) WriteMetrics(w io.Writer) {
 
 		avg := time.Duration(int(sum) / len(v))
 
-		results[i] = result{key: k, count: len(v), max: max, sum: sum, avg: avg}
+		results[i] = result{key: k, count: len(v), max: max, sum: sum, avg: avg, cacheFactor: cacheFactor}
 		i++
 	}
 
 	s.mu.Unlock()
 
-	// sort and print results
-	fmt.Fprintf(w, "  %13s  %12s  %12s  %5s  %s\n", "cumulative", "average", "maximum", "", "")
-	fmt.Fprintf(w, "  %13s  %12s  %12s  %5s  %s\n", "duration", "duration", "duration", "count", "template")
-	fmt.Fprintf(w, "  %13s  %12s  %12s  %5s  %s\n", "----------", "--------", "--------", "-----", "--------")
+	if s.calculateHints {
+		fmt.Fprintf(w, "  %9s  %13s  %12s  %12s  %5s  %s\n", "cache", "cumulative", "average", "maximum", "", "")
+		fmt.Fprintf(w, "  %9s  %13s  %12s  %12s  %5s  %s\n", "potential", "duration", "duration", "duration", "count", "template")
+		fmt.Fprintf(w, "  %9s  %13s  %12s  %12s  %5s  %s\n", "-----", "----------", "--------", "--------", "-----", "--------")
+	} else {
+		fmt.Fprintf(w, "  %13s  %12s  %12s  %5s  %s\n", "cumulative", "average", "maximum", "", "")
+		fmt.Fprintf(w, "  %13s  %12s  %12s  %5s  %s\n", "duration", "duration", "duration", "count", "template")
+		fmt.Fprintf(w, "  %13s  %12s  %12s  %5s  %s\n", "----------", "--------", "--------", "-----", "--------")
+
+	}
 
 	sort.Sort(bySum(results))
 	for _, v := range results {
-		fmt.Fprintf(w, "  %13s  %12s  %12s  %5d  %s\n", v.sum, v.avg, v.max, v.count, v.key)
+		if s.calculateHints {
+			fmt.Fprintf(w, "  %9d %13s  %12s  %12s  %5d  %s\n", v.cacheFactor, v.sum, v.avg, v.max, v.count, v.key)
+		} else {
+			fmt.Fprintf(w, "  %13s  %12s  %12s  %5d  %s\n", v.sum, v.avg, v.max, v.count, v.key)
+		}
 	}
 
 }
 
 // A result represents the calculated results for a given metric.
 type result struct {
-	key   string
-	count int
-	sum   time.Duration
-	max   time.Duration
-	avg   time.Duration
+	key         string
+	count       int
+	cacheFactor int
+	sum         time.Duration
+	max         time.Duration
+	avg         time.Duration
 }
 
 type bySum []result
@@ -115,3 +188,43 @@ type bySum []result
 func (b bySum) Len() int           { return len(b) }
 func (b bySum) Swap(i, j int)      { b[i], b[j] = b[j], b[i] }
 func (b bySum) Less(i, j int) bool { return b[i].sum > b[j].sum }
+
+// howSimilar is a naive diff implementation that returns
+// a number between 0-100 indicating how similar a and b are.
+// 100 is when all words in a also exists in b.
+func howSimilar(a, b string) int {
+
+	if a == b {
+		return 100
+	}
+
+	// Give some weight to the word positions.
+	const partitionSize = 4
+
+	af, bf := strings.Fields(a), strings.Fields(b)
+	if len(bf) > len(af) {
+		af, bf = bf, af
+	}
+
+	m1 := make(map[string]bool)
+	for i, x := range bf {
+		partition := partition(i, partitionSize)
+		key := x + "/" + strconv.Itoa(partition)
+		m1[key] = true
+	}
+
+	common := 0
+	for i, x := range af {
+		partition := partition(i, partitionSize)
+		key := x + "/" + strconv.Itoa(partition)
+		if m1[key] {
+			common++
+		}
+	}
+
+	return int(math.Floor((float64(common) / float64(len(af)) * 100)))
+}
+
+func partition(d, scale int) int {
+	return int(math.Floor((float64(d) / float64(scale)))) * scale
+}
diff --git a/metrics/metrics_test.go b/metrics/metrics_test.go
new file mode 100644
index 000000000..5a5553ecd
--- /dev/null
+++ b/metrics/metrics_test.go
@@ -0,0 +1,50 @@
+// Copyright 2017 The Hugo Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package metrics
+
+import (
+	"strings"
+	"testing"
+
+	"github.com/stretchr/testify/require"
+)
+
+func TestSimilarPercentage(t *testing.T) {
+	assert := require.New(t)
+
+	sentence := "this is some words about nothing, Hugo!"
+	words := strings.Fields(sentence)
+	for i, j := 0, len(words)-1; i < j; i, j = i+1, j-1 {
+		words[i], words[j] = words[j], words[i]
+	}
+	sentenceReversed := strings.Join(words, " ")
+
+	assert.Equal(100, howSimilar("Hugo Rules", "Hugo Rules"))
+	assert.Equal(50, howSimilar("Hugo Rules", "Hugo Rocks"))
+	assert.Equal(66, howSimilar("The Hugo Rules", "The Hugo Rocks"))
+	assert.Equal(66, howSimilar("The Hugo Rules", "The Hugo"))
+	assert.Equal(66, howSimilar("The Hugo", "The Hugo Rules"))
+	assert.Equal(0, howSimilar("Totally different", "Not Same"))
+	assert.Equal(14, howSimilar(sentence, sentenceReversed))
+
+}
+
+func BenchmarkHowSimilar(b *testing.B) {
+	s1 := "Hugo is cool and " + strings.Repeat("fun ", 10) + "!"
+	s2 := "Hugo is cool and " + strings.Repeat("cool ", 10) + "!"
+
+	for i := 0; i < b.N; i++ {
+		howSimilar(s1, s2)
+	}
+}
author	Bjørn Erik Pedersen <[email protected]>	2017-10-04 22:12:51 +0200
committer	Bjørn Erik Pedersen <[email protected]>	2017-10-07 18:00:07 +0200
commit	5800a20a258378440e203a6c4a4343f5077755df (patch)
tree	dca1bd5030dccd56b4e9ad2dc1e037955aa4e20d /metrics
parent	e2e8bcbec34702a27047b91b6b007a15f1fc0797 (diff)
download	hugo-5800a20a258378440e203a6c4a4343f5077755df.tar.gz hugo-5800a20a258378440e203a6c4a4343f5077755df.zip