aboutsummaryrefslogtreecommitdiffhomepage
path: root/hugofs
diff options
context:
space:
mode:
authorBjørn Erik Pedersen <[email protected]>2024-07-06 16:06:24 +0200
committerBjørn Erik Pedersen <[email protected]>2024-07-06 16:57:16 +0200
commitfb8909d5b0ed01e6d12e0c5c5bd679cbc80159ed (patch)
tree999f8ebb4842e92fab54355a272bc6897fb8c277 /hugofs
parent0ee2610d7c67bad1c3d8de579c6aa368ebcf761e (diff)
downloadhugo-fb8909d5b0ed01e6d12e0c5c5bd679cbc80159ed.tar.gz
hugo-fb8909d5b0ed01e6d12e0c5c5bd679cbc80159ed.zip
Use xxHash for the change detector
Much faster compared to MD5: ``` name old time/op new time/op delta HashingFs-10 21.3µs ± 2% 3.2µs ±17% -84.96% (p=0.029 n=4+4) name old alloc/op new alloc/op delta HashingFs-10 12.9kB ± 0% 12.8kB ± 1% -1.31% (p=0.029 n=4+4) name old allocs/op new allocs/op delta HashingFs-10 10.0 ± 0% 7.0 ± 0% -30.00% (p=0.029 n=4+4) ``` Updates #12643
Diffstat (limited to 'hugofs')
-rw-r--r--hugofs/hashing_fs.go32
-rw-r--r--hugofs/hashing_fs_test.go35
2 files changed, 45 insertions, 22 deletions
diff --git a/hugofs/hashing_fs.go b/hugofs/hashing_fs.go
index d15ba5863..5072e2586 100644
--- a/hugofs/hashing_fs.go
+++ b/hugofs/hashing_fs.go
@@ -14,25 +14,24 @@
package hugofs
import (
- "crypto/md5"
- "encoding/hex"
"hash"
"os"
+ "github.com/cespare/xxhash/v2"
"github.com/spf13/afero"
)
var (
- _ afero.Fs = (*md5HashingFs)(nil)
- _ FilesystemUnwrapper = (*md5HashingFs)(nil)
+ _ afero.Fs = (*hashingFs)(nil)
+ _ FilesystemUnwrapper = (*hashingFs)(nil)
)
// FileHashReceiver will receive the filename an the content's MD5 sum on file close.
type FileHashReceiver interface {
- OnFileClose(name, md5sum string)
+ OnFileClose(name string, checksum uint64)
}
-type md5HashingFs struct {
+type hashingFs struct {
afero.Fs
hashReceiver FileHashReceiver
}
@@ -45,14 +44,14 @@ type md5HashingFs struct {
// Note that this will only work for file operations that use the io.Writer
// to write content to file, but that is fine for the "publish content" use case.
func NewHashingFs(delegate afero.Fs, hashReceiver FileHashReceiver) afero.Fs {
- return &md5HashingFs{Fs: delegate, hashReceiver: hashReceiver}
+ return &hashingFs{Fs: delegate, hashReceiver: hashReceiver}
}
-func (fs *md5HashingFs) UnwrapFilesystem() afero.Fs {
+func (fs *hashingFs) UnwrapFilesystem() afero.Fs {
return fs.Fs
}
-func (fs *md5HashingFs) Create(name string) (afero.File, error) {
+func (fs *hashingFs) Create(name string) (afero.File, error) {
f, err := fs.Fs.Create(name)
if err == nil {
f = fs.wrapFile(f)
@@ -60,7 +59,7 @@ func (fs *md5HashingFs) Create(name string) (afero.File, error) {
return f, err
}
-func (fs *md5HashingFs) OpenFile(name string, flag int, perm os.FileMode) (afero.File, error) {
+func (fs *hashingFs) OpenFile(name string, flag int, perm os.FileMode) (afero.File, error) {
f, err := fs.Fs.OpenFile(name, flag, perm)
if err == nil && isWrite(flag) {
f = fs.wrapFile(f)
@@ -68,17 +67,17 @@ func (fs *md5HashingFs) OpenFile(name string, flag int, perm os.FileMode) (afero
return f, err
}
-func (fs *md5HashingFs) wrapFile(f afero.File) afero.File {
- return &hashingFile{File: f, h: md5.New(), hashReceiver: fs.hashReceiver}
+func (fs *hashingFs) wrapFile(f afero.File) afero.File {
+ return &hashingFile{File: f, h: xxhash.New(), hashReceiver: fs.hashReceiver}
}
-func (fs *md5HashingFs) Name() string {
- return "md5HashingFs"
+func (fs *hashingFs) Name() string {
+ return "hashingFs"
}
type hashingFile struct {
hashReceiver FileHashReceiver
- h hash.Hash
+ h hash.Hash64
afero.File
}
@@ -91,7 +90,6 @@ func (h *hashingFile) Write(p []byte) (n int, err error) {
}
func (h *hashingFile) Close() error {
- sum := hex.EncodeToString(h.h.Sum(nil))
- h.hashReceiver.OnFileClose(h.Name(), sum)
+ h.hashReceiver.OnFileClose(h.Name(), h.h.Sum64())
return h.File.Close()
}
diff --git a/hugofs/hashing_fs_test.go b/hugofs/hashing_fs_test.go
index 3e1f6c41d..74781ba8d 100644
--- a/hugofs/hashing_fs_test.go
+++ b/hugofs/hashing_fs_test.go
@@ -14,6 +14,8 @@
package hugofs
import (
+ "fmt"
+ "strings"
"testing"
qt "github.com/frankban/quicktest"
@@ -21,13 +23,13 @@ import (
)
type testHashReceiver struct {
- sum string
name string
+ sum uint64
}
-func (t *testHashReceiver) OnFileClose(name, md5hash string) {
+func (t *testHashReceiver) OnFileClose(name string, checksum uint64) {
t.name = name
- t.sum = md5hash
+ t.sum = checksum
}
func TestHashingFs(t *testing.T) {
@@ -42,11 +44,34 @@ func TestHashingFs(t *testing.T) {
_, err = f.Write([]byte("content"))
c.Assert(err, qt.IsNil)
c.Assert(f.Close(), qt.IsNil)
- c.Assert(observer.sum, qt.Equals, "9a0364b9e99bb480dd25e1f0284c8555")
+ c.Assert(observer.sum, qt.Equals, uint64(7807861979271768572))
c.Assert(observer.name, qt.Equals, "hashme")
f, err = ofs.Create("nowrites")
c.Assert(err, qt.IsNil)
c.Assert(f.Close(), qt.IsNil)
- c.Assert(observer.sum, qt.Equals, "d41d8cd98f00b204e9800998ecf8427e")
+ c.Assert(observer.sum, qt.Equals, uint64(17241709254077376921))
+}
+
+func BenchmarkHashingFs(b *testing.B) {
+ fs := afero.NewMemMapFs()
+ observer := &testHashReceiver{}
+ ofs := NewHashingFs(fs, observer)
+ content := []byte(strings.Repeat("lorem ipsum ", 1000))
+
+ b.ResetTimer()
+
+ for i := 0; i < b.N; i++ {
+ f, err := ofs.Create(fmt.Sprintf("file%d", i))
+ if err != nil {
+ b.Fatal(err)
+ }
+ _, err = f.Write(content)
+ if err != nil {
+ b.Fatal(err)
+ }
+ if err := f.Close(); err != nil {
+ b.Fatal(err)
+ }
+ }
}