diff options
author | Bjørn Erik Pedersen <[email protected]> | 2024-07-06 16:06:24 +0200 |
---|---|---|
committer | Bjørn Erik Pedersen <[email protected]> | 2024-07-06 16:57:16 +0200 |
commit | fb8909d5b0ed01e6d12e0c5c5bd679cbc80159ed (patch) | |
tree | 999f8ebb4842e92fab54355a272bc6897fb8c277 /hugofs | |
parent | 0ee2610d7c67bad1c3d8de579c6aa368ebcf761e (diff) | |
download | hugo-fb8909d5b0ed01e6d12e0c5c5bd679cbc80159ed.tar.gz hugo-fb8909d5b0ed01e6d12e0c5c5bd679cbc80159ed.zip |
Use xxHash for the change detector
Much faster compared to MD5:
```
name old time/op new time/op delta
HashingFs-10 21.3µs ± 2% 3.2µs ±17% -84.96% (p=0.029 n=4+4)
name old alloc/op new alloc/op delta
HashingFs-10 12.9kB ± 0% 12.8kB ± 1% -1.31% (p=0.029 n=4+4)
name old allocs/op new allocs/op delta
HashingFs-10 10.0 ± 0% 7.0 ± 0% -30.00% (p=0.029 n=4+4)
```
Updates #12643
Diffstat (limited to 'hugofs')
-rw-r--r-- | hugofs/hashing_fs.go | 32 | ||||
-rw-r--r-- | hugofs/hashing_fs_test.go | 35 |
2 files changed, 45 insertions, 22 deletions
diff --git a/hugofs/hashing_fs.go b/hugofs/hashing_fs.go index d15ba5863..5072e2586 100644 --- a/hugofs/hashing_fs.go +++ b/hugofs/hashing_fs.go @@ -14,25 +14,24 @@ package hugofs import ( - "crypto/md5" - "encoding/hex" "hash" "os" + "github.com/cespare/xxhash/v2" "github.com/spf13/afero" ) var ( - _ afero.Fs = (*md5HashingFs)(nil) - _ FilesystemUnwrapper = (*md5HashingFs)(nil) + _ afero.Fs = (*hashingFs)(nil) + _ FilesystemUnwrapper = (*hashingFs)(nil) ) // FileHashReceiver will receive the filename an the content's MD5 sum on file close. type FileHashReceiver interface { - OnFileClose(name, md5sum string) + OnFileClose(name string, checksum uint64) } -type md5HashingFs struct { +type hashingFs struct { afero.Fs hashReceiver FileHashReceiver } @@ -45,14 +44,14 @@ type md5HashingFs struct { // Note that this will only work for file operations that use the io.Writer // to write content to file, but that is fine for the "publish content" use case. func NewHashingFs(delegate afero.Fs, hashReceiver FileHashReceiver) afero.Fs { - return &md5HashingFs{Fs: delegate, hashReceiver: hashReceiver} + return &hashingFs{Fs: delegate, hashReceiver: hashReceiver} } -func (fs *md5HashingFs) UnwrapFilesystem() afero.Fs { +func (fs *hashingFs) UnwrapFilesystem() afero.Fs { return fs.Fs } -func (fs *md5HashingFs) Create(name string) (afero.File, error) { +func (fs *hashingFs) Create(name string) (afero.File, error) { f, err := fs.Fs.Create(name) if err == nil { f = fs.wrapFile(f) @@ -60,7 +59,7 @@ func (fs *md5HashingFs) Create(name string) (afero.File, error) { return f, err } -func (fs *md5HashingFs) OpenFile(name string, flag int, perm os.FileMode) (afero.File, error) { +func (fs *hashingFs) OpenFile(name string, flag int, perm os.FileMode) (afero.File, error) { f, err := fs.Fs.OpenFile(name, flag, perm) if err == nil && isWrite(flag) { f = fs.wrapFile(f) @@ -68,17 +67,17 @@ func (fs *md5HashingFs) OpenFile(name string, flag int, perm os.FileMode) (afero return f, err } -func (fs *md5HashingFs) wrapFile(f afero.File) afero.File { - return &hashingFile{File: f, h: md5.New(), hashReceiver: fs.hashReceiver} +func (fs *hashingFs) wrapFile(f afero.File) afero.File { + return &hashingFile{File: f, h: xxhash.New(), hashReceiver: fs.hashReceiver} } -func (fs *md5HashingFs) Name() string { - return "md5HashingFs" +func (fs *hashingFs) Name() string { + return "hashingFs" } type hashingFile struct { hashReceiver FileHashReceiver - h hash.Hash + h hash.Hash64 afero.File } @@ -91,7 +90,6 @@ func (h *hashingFile) Write(p []byte) (n int, err error) { } func (h *hashingFile) Close() error { - sum := hex.EncodeToString(h.h.Sum(nil)) - h.hashReceiver.OnFileClose(h.Name(), sum) + h.hashReceiver.OnFileClose(h.Name(), h.h.Sum64()) return h.File.Close() } diff --git a/hugofs/hashing_fs_test.go b/hugofs/hashing_fs_test.go index 3e1f6c41d..74781ba8d 100644 --- a/hugofs/hashing_fs_test.go +++ b/hugofs/hashing_fs_test.go @@ -14,6 +14,8 @@ package hugofs import ( + "fmt" + "strings" "testing" qt "github.com/frankban/quicktest" @@ -21,13 +23,13 @@ import ( ) type testHashReceiver struct { - sum string name string + sum uint64 } -func (t *testHashReceiver) OnFileClose(name, md5hash string) { +func (t *testHashReceiver) OnFileClose(name string, checksum uint64) { t.name = name - t.sum = md5hash + t.sum = checksum } func TestHashingFs(t *testing.T) { @@ -42,11 +44,34 @@ func TestHashingFs(t *testing.T) { _, err = f.Write([]byte("content")) c.Assert(err, qt.IsNil) c.Assert(f.Close(), qt.IsNil) - c.Assert(observer.sum, qt.Equals, "9a0364b9e99bb480dd25e1f0284c8555") + c.Assert(observer.sum, qt.Equals, uint64(7807861979271768572)) c.Assert(observer.name, qt.Equals, "hashme") f, err = ofs.Create("nowrites") c.Assert(err, qt.IsNil) c.Assert(f.Close(), qt.IsNil) - c.Assert(observer.sum, qt.Equals, "d41d8cd98f00b204e9800998ecf8427e") + c.Assert(observer.sum, qt.Equals, uint64(17241709254077376921)) +} + +func BenchmarkHashingFs(b *testing.B) { + fs := afero.NewMemMapFs() + observer := &testHashReceiver{} + ofs := NewHashingFs(fs, observer) + content := []byte(strings.Repeat("lorem ipsum ", 1000)) + + b.ResetTimer() + + for i := 0; i < b.N; i++ { + f, err := ofs.Create(fmt.Sprintf("file%d", i)) + if err != nil { + b.Fatal(err) + } + _, err = f.Write(content) + if err != nil { + b.Fatal(err) + } + if err := f.Close(); err != nil { + b.Fatal(err) + } + } } |