aboutsummaryrefslogtreecommitdiffhomepage
path: root/common
diff options
context:
space:
mode:
authorBjørn Erik Pedersen <[email protected]>2024-07-30 12:52:54 +0200
committerBjørn Erik Pedersen <[email protected]>2024-07-31 16:44:06 +0200
commitd5eda13cb2e57998210b66e080dc96e95b38e5f0 (patch)
treeb52bc254cf9d6dcb768a491d73c725d7104dd9cd /common
parent8b5d796989cf0798ee61003159ba8b332675bdf2 (diff)
downloadhugo-d5eda13cb2e57998210b66e080dc96e95b38e5f0.tar.gz
hugo-d5eda13cb2e57998210b66e080dc96e95b38e5f0.zip
Replace the MD5 hashing of images with xxHash
Note that we only use this for change detection. The previous implementation invoked `MD5FromReaderFast` that created a MD5 has from 8 64 bytes chunks in the file, which is obviously very fast. The new implementation creates the hash from the entire file and ... seems to be even more effective: ``` name old time/op new time/op delta HashImage-10 9.45µs ±21% 10.89µs ± 1% ~ (p=0.343 n=4+4) name old alloc/op new alloc/op delta HashImage-10 144B ± 0% 8B ± 0% -94.44% (p=0.029 n=4+4) name old allocs/op new allocs/op delta HashImage-10 4.00 ± 0% 1.00 ± 0% -75.00% (p=0.029 n=4+4) ```
Diffstat (limited to 'common')
-rw-r--r--common/hashing/hashing.go86
-rw-r--r--common/hashing/hashing_test.go79
2 files changed, 165 insertions, 0 deletions
diff --git a/common/hashing/hashing.go b/common/hashing/hashing.go
new file mode 100644
index 000000000..abf8e6b14
--- /dev/null
+++ b/common/hashing/hashing.go
@@ -0,0 +1,86 @@
+// Copyright 2024 The Hugo Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package hashing provides common hashing utilities.
+package hashing
+
+import (
+ "encoding/hex"
+ "io"
+ "sync"
+
+ "github.com/cespare/xxhash/v2"
+)
+
+// XXHashFromReader calculates the xxHash for the given reader.
+func XXHashFromReader(r io.ReadSeeker) (uint64, int64, error) {
+ h := getXxHashReadFrom()
+ defer putXxHashReadFrom(h)
+
+ size, err := io.Copy(h, r)
+ if err != nil {
+ return 0, 0, err
+ }
+ return h.Sum64(), size, nil
+}
+
+// XXHashFromString calculates the xxHash for the given string.
+func XXHashFromString(s string) (uint64, error) {
+ h := xxhash.New()
+ h.WriteString(s)
+ return h.Sum64(), nil
+}
+
+// XxHashFromStringHexEncoded calculates the xxHash for the given string
+// and returns the hash as a hex encoded string.
+func XxHashFromStringHexEncoded(f string) string {
+ h := xxhash.New()
+ h.WriteString(f)
+ hash := h.Sum(nil)
+ return hex.EncodeToString(hash)
+}
+
+type xxhashReadFrom struct {
+ buff []byte
+ *xxhash.Digest
+}
+
+func (x *xxhashReadFrom) ReadFrom(r io.Reader) (int64, error) {
+ for {
+ n, err := r.Read(x.buff)
+ if n > 0 {
+ x.Digest.Write(x.buff[:n])
+ }
+ if err != nil {
+ if err == io.EOF {
+ err = nil
+ }
+ return int64(n), err
+ }
+ }
+}
+
+var xXhashReadFromPool = sync.Pool{
+ New: func() any {
+ return &xxhashReadFrom{Digest: xxhash.New(), buff: make([]byte, 48*1024)}
+ },
+}
+
+func getXxHashReadFrom() *xxhashReadFrom {
+ return xXhashReadFromPool.Get().(*xxhashReadFrom)
+}
+
+func putXxHashReadFrom(h *xxhashReadFrom) {
+ h.Reset()
+ xXhashReadFromPool.Put(h)
+}
diff --git a/common/hashing/hashing_test.go b/common/hashing/hashing_test.go
new file mode 100644
index 000000000..2e79b36b9
--- /dev/null
+++ b/common/hashing/hashing_test.go
@@ -0,0 +1,79 @@
+// Copyright 2024 The Hugo Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package hashing
+
+import (
+ "strings"
+ "testing"
+
+ "github.com/cespare/xxhash/v2"
+ qt "github.com/frankban/quicktest"
+)
+
+func TestXxHashFromReader(t *testing.T) {
+ c := qt.New(t)
+ s := "Hello World"
+ r := strings.NewReader(s)
+ got, size, err := XXHashFromReader(r)
+ c.Assert(err, qt.IsNil)
+ c.Assert(size, qt.Equals, int64(len(s)))
+ c.Assert(got, qt.Equals, uint64(7148569436472236994))
+}
+
+func TestXxHashFromString(t *testing.T) {
+ c := qt.New(t)
+ s := "Hello World"
+ got, err := XXHashFromString(s)
+ c.Assert(err, qt.IsNil)
+ c.Assert(got, qt.Equals, uint64(7148569436472236994))
+}
+
+func TestXxHashFromStringHexEncoded(t *testing.T) {
+ c := qt.New(t)
+ s := "The quick brown fox jumps over the lazy dog"
+ got := XxHashFromStringHexEncoded(s)
+ // Facit: https://asecuritysite.com/encryption/xxhash?val=The%20quick%20brown%20fox%20jumps%20over%20the%20lazy%20dog
+ c.Assert(got, qt.Equals, "0b242d361fda71bc")
+}
+
+func BenchmarkXXHashFromReader(b *testing.B) {
+ r := strings.NewReader("Hello World")
+ b.ResetTimer()
+ for i := 0; i < b.N; i++ {
+ XXHashFromReader(r)
+ r.Seek(0, 0)
+ }
+}
+
+func BenchmarkXXHashFromString(b *testing.B) {
+ s := "Hello World"
+ b.ResetTimer()
+ for i := 0; i < b.N; i++ {
+ XXHashFromString(s)
+ }
+}
+
+func BenchmarkXXHashFromStringHexEncoded(b *testing.B) {
+ s := "The quick brown fox jumps over the lazy dog"
+ b.ResetTimer()
+ for i := 0; i < b.N; i++ {
+ XxHashFromStringHexEncoded(s)
+ }
+}
+
+func xxHashFromString(f string) uint64 {
+ h := xxhash.New()
+ h.WriteString(f)
+ return h.Sum64()
+}