diff options
author | Bjørn Erik Pedersen <[email protected]> | 2024-07-30 12:52:54 +0200 |
---|---|---|
committer | Bjørn Erik Pedersen <[email protected]> | 2024-07-31 16:44:06 +0200 |
commit | d5eda13cb2e57998210b66e080dc96e95b38e5f0 (patch) | |
tree | b52bc254cf9d6dcb768a491d73c725d7104dd9cd /common | |
parent | 8b5d796989cf0798ee61003159ba8b332675bdf2 (diff) | |
download | hugo-d5eda13cb2e57998210b66e080dc96e95b38e5f0.tar.gz hugo-d5eda13cb2e57998210b66e080dc96e95b38e5f0.zip |
Replace the MD5 hashing of images with xxHash
Note that we only use this for change detection.
The previous implementation invoked `MD5FromReaderFast` that created a MD5 has from 8 64 bytes chunks in the file, which is obviously very fast. The new implementation creates the hash from the entire file and ... seems to be even more effective:
```
name old time/op new time/op delta
HashImage-10 9.45µs ±21% 10.89µs ± 1% ~ (p=0.343 n=4+4)
name old alloc/op new alloc/op delta
HashImage-10 144B ± 0% 8B ± 0% -94.44% (p=0.029 n=4+4)
name old allocs/op new allocs/op delta
HashImage-10 4.00 ± 0% 1.00 ± 0% -75.00% (p=0.029 n=4+4)
```
Diffstat (limited to 'common')
-rw-r--r-- | common/hashing/hashing.go | 86 | ||||
-rw-r--r-- | common/hashing/hashing_test.go | 79 |
2 files changed, 165 insertions, 0 deletions
diff --git a/common/hashing/hashing.go b/common/hashing/hashing.go new file mode 100644 index 000000000..abf8e6b14 --- /dev/null +++ b/common/hashing/hashing.go @@ -0,0 +1,86 @@ +// Copyright 2024 The Hugo Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package hashing provides common hashing utilities. +package hashing + +import ( + "encoding/hex" + "io" + "sync" + + "github.com/cespare/xxhash/v2" +) + +// XXHashFromReader calculates the xxHash for the given reader. +func XXHashFromReader(r io.ReadSeeker) (uint64, int64, error) { + h := getXxHashReadFrom() + defer putXxHashReadFrom(h) + + size, err := io.Copy(h, r) + if err != nil { + return 0, 0, err + } + return h.Sum64(), size, nil +} + +// XXHashFromString calculates the xxHash for the given string. +func XXHashFromString(s string) (uint64, error) { + h := xxhash.New() + h.WriteString(s) + return h.Sum64(), nil +} + +// XxHashFromStringHexEncoded calculates the xxHash for the given string +// and returns the hash as a hex encoded string. +func XxHashFromStringHexEncoded(f string) string { + h := xxhash.New() + h.WriteString(f) + hash := h.Sum(nil) + return hex.EncodeToString(hash) +} + +type xxhashReadFrom struct { + buff []byte + *xxhash.Digest +} + +func (x *xxhashReadFrom) ReadFrom(r io.Reader) (int64, error) { + for { + n, err := r.Read(x.buff) + if n > 0 { + x.Digest.Write(x.buff[:n]) + } + if err != nil { + if err == io.EOF { + err = nil + } + return int64(n), err + } + } +} + +var xXhashReadFromPool = sync.Pool{ + New: func() any { + return &xxhashReadFrom{Digest: xxhash.New(), buff: make([]byte, 48*1024)} + }, +} + +func getXxHashReadFrom() *xxhashReadFrom { + return xXhashReadFromPool.Get().(*xxhashReadFrom) +} + +func putXxHashReadFrom(h *xxhashReadFrom) { + h.Reset() + xXhashReadFromPool.Put(h) +} diff --git a/common/hashing/hashing_test.go b/common/hashing/hashing_test.go new file mode 100644 index 000000000..2e79b36b9 --- /dev/null +++ b/common/hashing/hashing_test.go @@ -0,0 +1,79 @@ +// Copyright 2024 The Hugo Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package hashing + +import ( + "strings" + "testing" + + "github.com/cespare/xxhash/v2" + qt "github.com/frankban/quicktest" +) + +func TestXxHashFromReader(t *testing.T) { + c := qt.New(t) + s := "Hello World" + r := strings.NewReader(s) + got, size, err := XXHashFromReader(r) + c.Assert(err, qt.IsNil) + c.Assert(size, qt.Equals, int64(len(s))) + c.Assert(got, qt.Equals, uint64(7148569436472236994)) +} + +func TestXxHashFromString(t *testing.T) { + c := qt.New(t) + s := "Hello World" + got, err := XXHashFromString(s) + c.Assert(err, qt.IsNil) + c.Assert(got, qt.Equals, uint64(7148569436472236994)) +} + +func TestXxHashFromStringHexEncoded(t *testing.T) { + c := qt.New(t) + s := "The quick brown fox jumps over the lazy dog" + got := XxHashFromStringHexEncoded(s) + // Facit: https://asecuritysite.com/encryption/xxhash?val=The%20quick%20brown%20fox%20jumps%20over%20the%20lazy%20dog + c.Assert(got, qt.Equals, "0b242d361fda71bc") +} + +func BenchmarkXXHashFromReader(b *testing.B) { + r := strings.NewReader("Hello World") + b.ResetTimer() + for i := 0; i < b.N; i++ { + XXHashFromReader(r) + r.Seek(0, 0) + } +} + +func BenchmarkXXHashFromString(b *testing.B) { + s := "Hello World" + b.ResetTimer() + for i := 0; i < b.N; i++ { + XXHashFromString(s) + } +} + +func BenchmarkXXHashFromStringHexEncoded(b *testing.B) { + s := "The quick brown fox jumps over the lazy dog" + b.ResetTimer() + for i := 0; i < b.N; i++ { + XxHashFromStringHexEncoded(s) + } +} + +func xxHashFromString(f string) uint64 { + h := xxhash.New() + h.WriteString(f) + return h.Sum64() +} |