From 3c29c5af8ee865ef20741f576088e031e940c3d2 Mon Sep 17 00:00:00 2001 From: Bjørn Erik Pedersen Date: Wed, 14 Nov 2018 12:06:46 +0100 Subject: cache/filecache: Add a cache prune func Fixes #5439 --- cache/filecache/filecache.go | 48 ++++++++++++--- cache/filecache/filecache_pruner.go | 80 +++++++++++++++++++++++++ cache/filecache/filecache_pruner_test.go | 100 +++++++++++++++++++++++++++++++ cache/filecache/filecache_test.go | 6 ++ 4 files changed, 225 insertions(+), 9 deletions(-) create mode 100644 cache/filecache/filecache_pruner.go create mode 100644 cache/filecache/filecache_pruner_test.go (limited to 'cache/filecache') diff --git a/cache/filecache/filecache.go b/cache/filecache/filecache.go index a934dd89c..e9f72cb92 100644 --- a/cache/filecache/filecache.go +++ b/cache/filecache/filecache.go @@ -19,6 +19,7 @@ import ( "io/ioutil" "path/filepath" "strings" + "sync" "time" "github.com/gohugoio/hugo/common/hugio" @@ -44,7 +45,30 @@ type Cache struct { // 0 is effectively turning this cache off. maxAge time.Duration - nlocker *locker.Locker + nlocker *lockTracker +} + +type lockTracker struct { + seenMu sync.RWMutex + seen map[string]struct{} + + *locker.Locker +} + +// Lock tracks the ids in use. We use this information to do garbage collection +// after a Hugo build. +func (l *lockTracker) Lock(id string) { + l.seenMu.RLock() + if _, seen := l.seen[id]; !seen { + l.seenMu.RUnlock() + l.seenMu.Lock() + l.seen[id] = struct{}{} + l.seenMu.Unlock() + } else { + l.seenMu.RUnlock() + } + + l.Locker.Lock(id) } // ItemInfo contains info about a cached file. @@ -57,7 +81,7 @@ type ItemInfo struct { func NewCache(fs afero.Fs, maxAge time.Duration) *Cache { return &Cache{ Fs: fs, - nlocker: locker.NewLocker(), + nlocker: &lockTracker{Locker: locker.NewLocker(), seen: make(map[string]struct{})}, maxAge: maxAge, } } @@ -232,7 +256,7 @@ func (c *Cache) getOrRemove(id string) hugio.ReadSeekCloser { return nil } - if time.Now().Sub(fi.ModTime()) > c.maxAge { + if c.isExpired(fi.ModTime()) { c.Fs.Remove(id) return nil } @@ -247,6 +271,10 @@ func (c *Cache) getOrRemove(id string) hugio.ReadSeekCloser { return f } +func (c *Cache) isExpired(modTime time.Time) bool { + return c.maxAge >= 0 && time.Now().Sub(modTime) > c.maxAge +} + // For testing func (c *Cache) getString(id string) string { id = cleanID(id) @@ -254,13 +282,15 @@ func (c *Cache) getString(id string) string { c.nlocker.Lock(id) defer c.nlocker.Unlock(id) - if r := c.getOrRemove(id); r != nil { - defer r.Close() - b, _ := ioutil.ReadAll(r) - return string(b) + f, err := c.Fs.Open(id) + + if err != nil { + return "" } + defer f.Close() - return "" + b, _ := ioutil.ReadAll(f) + return string(b) } @@ -309,5 +339,5 @@ func NewCachesFromPaths(p *paths.Paths) (Caches, error) { } func cleanID(name string) string { - return filepath.Clean(name) + return strings.TrimPrefix(filepath.Clean(name), helpers.FilePathSeparator) } diff --git a/cache/filecache/filecache_pruner.go b/cache/filecache/filecache_pruner.go new file mode 100644 index 000000000..322eabf92 --- /dev/null +++ b/cache/filecache/filecache_pruner.go @@ -0,0 +1,80 @@ +// Copyright 2018 The Hugo Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package filecache + +import ( + "io" + "os" + + "github.com/pkg/errors" + "github.com/spf13/afero" +) + +// Prune removes expired and unused items from this cache. +// The last one requires a full build so the cache usage can be tracked. +// Note that we operate directly on the filesystem here, so this is not +// thread safe. +func (c Caches) Prune() (int, error) { + counter := 0 + for k, cache := range c { + err := afero.Walk(cache.Fs, "", func(name string, info os.FileInfo, err error) error { + if info == nil { + return nil + } + + name = cleanID(name) + + if info.IsDir() { + f, err := cache.Fs.Open(name) + if err != nil { + // This cache dir may not exist. + return nil + } + defer f.Close() + _, err = f.Readdirnames(1) + if err == io.EOF { + // Empty dir. + return cache.Fs.Remove(name) + } + + return nil + } + + shouldRemove := cache.isExpired(info.ModTime()) + + if !shouldRemove && len(cache.nlocker.seen) > 0 { + // Remove it if it's not been touched/used in the last build. + _, seen := cache.nlocker.seen[name] + shouldRemove = !seen + } + + if shouldRemove { + err := cache.Fs.Remove(name) + if err == nil { + counter++ + } + return err + } + + return nil + }) + + if err != nil { + return counter, errors.Wrapf(err, "failed to prune cache %q", k) + } + + } + + return counter, nil +} diff --git a/cache/filecache/filecache_pruner_test.go b/cache/filecache/filecache_pruner_test.go new file mode 100644 index 000000000..64d10149d --- /dev/null +++ b/cache/filecache/filecache_pruner_test.go @@ -0,0 +1,100 @@ +// Copyright 2018 The Hugo Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package filecache + +import ( + "fmt" + "testing" + "time" + + "github.com/gohugoio/hugo/config" + "github.com/gohugoio/hugo/hugofs" + "github.com/gohugoio/hugo/hugolib/paths" + + "github.com/stretchr/testify/require" +) + +func TestPrune(t *testing.T) { + t.Parallel() + + assert := require.New(t) + + configStr := ` +resourceDir = "myresources" +[caches] +[caches.getjson] +maxAge = "200ms" +dir = "/cache/c" + +` + + cfg, err := config.FromConfigString(configStr, "toml") + assert.NoError(err) + fs := hugofs.NewMem(cfg) + p, err := paths.New(fs, cfg) + assert.NoError(err) + + caches, err := NewCachesFromPaths(p) + assert.NoError(err) + + jsonCache := caches.GetJSONCache() + for i := 0; i < 10; i++ { + id := fmt.Sprintf("i%d", i) + jsonCache.GetOrCreateBytes(id, func() ([]byte, error) { + return []byte("abc"), nil + }) + if i == 4 { + // This will expire the first 5 + time.Sleep(201 * time.Millisecond) + } + } + + count, err := caches.Prune() + assert.NoError(err) + assert.Equal(5, count) + + for i := 0; i < 10; i++ { + id := fmt.Sprintf("i%d", i) + v := jsonCache.getString(id) + if i < 5 { + assert.Equal("", v, id) + } else { + assert.Equal("abc", v, id) + } + } + + caches, err = NewCachesFromPaths(p) + assert.NoError(err) + jsonCache = caches.GetJSONCache() + // Touch one and then prune. + jsonCache.GetOrCreateBytes("i5", func() ([]byte, error) { + return []byte("abc"), nil + }) + + count, err = caches.Prune() + assert.NoError(err) + assert.Equal(4, count) + + // Now only the i5 should be left. + for i := 0; i < 10; i++ { + id := fmt.Sprintf("i%d", i) + v := jsonCache.getString(id) + if i != 5 { + assert.Equal("", v, id) + } else { + assert.Equal("abc", v, id) + } + } + +} diff --git a/cache/filecache/filecache_test.go b/cache/filecache/filecache_test.go index 986d41f7b..9696b6143 100644 --- a/cache/filecache/filecache_test.go +++ b/cache/filecache/filecache_test.go @@ -209,3 +209,9 @@ dir = "/cache/c" } wg.Wait() } + +func TestCleanID(t *testing.T) { + assert := require.New(t) + assert.Equal(filepath.FromSlash("a/b/c.txt"), cleanID(filepath.FromSlash("/a/b//c.txt"))) + assert.Equal(filepath.FromSlash("a/b/c.txt"), cleanID(filepath.FromSlash("a/b//c.txt"))) +} -- cgit v1.2.3