From eada236f87d9669885da1ff647672bb3dc6b4954 Mon Sep 17 00:00:00 2001 From: Bjørn Erik Pedersen Date: Tue, 10 Sep 2019 11:26:34 +0200 Subject: Introduce a tree map for all content MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit introduces a new data structure to store pages and their resources. This data structure is backed by radix trees. This simplies tree operations, makes all pages a bundle, and paves the way for #6310. It also solves a set of annoying issues (see list below). Not a motivation behind this, but this commit also makes Hugo in general a little bit faster and more memory effective (see benchmarks). Especially for partial rebuilds on content edits, but also when taxonomies is in use. ``` name old time/op new time/op delta SiteNew/Bundle_with_image/Edit-16 1.32ms ± 8% 1.00ms ± 9% -24.42% (p=0.029 n=4+4) SiteNew/Bundle_with_JSON_file/Edit-16 1.28ms ± 0% 0.94ms ± 0% -26.26% (p=0.029 n=4+4) SiteNew/Tags_and_categories/Edit-16 33.9ms ± 2% 21.8ms ± 1% -35.67% (p=0.029 n=4+4) SiteNew/Canonify_URLs/Edit-16 40.6ms ± 1% 37.7ms ± 3% -7.20% (p=0.029 n=4+4) SiteNew/Deep_content_tree/Edit-16 56.7ms ± 0% 51.7ms ± 1% -8.82% (p=0.029 n=4+4) SiteNew/Many_HTML_templates/Edit-16 19.9ms ± 2% 18.3ms ± 3% -7.64% (p=0.029 n=4+4) SiteNew/Page_collections/Edit-16 37.9ms ± 4% 34.0ms ± 2% -10.28% (p=0.029 n=4+4) SiteNew/Bundle_with_image-16 10.7ms ± 0% 10.6ms ± 0% -1.15% (p=0.029 n=4+4) SiteNew/Bundle_with_JSON_file-16 10.8ms ± 0% 10.7ms ± 0% -1.05% (p=0.029 n=4+4) SiteNew/Tags_and_categories-16 43.2ms ± 1% 39.6ms ± 1% -8.35% (p=0.029 n=4+4) SiteNew/Canonify_URLs-16 47.6ms ± 1% 47.3ms ± 0% ~ (p=0.057 n=4+4) SiteNew/Deep_content_tree-16 73.0ms ± 1% 74.2ms ± 1% ~ (p=0.114 n=4+4) SiteNew/Many_HTML_templates-16 37.9ms ± 0% 38.1ms ± 1% ~ (p=0.114 n=4+4) SiteNew/Page_collections-16 53.6ms ± 1% 54.7ms ± 1% +2.09% (p=0.029 n=4+4) name old alloc/op new alloc/op delta SiteNew/Bundle_with_image/Edit-16 486kB ± 0% 430kB ± 0% -11.47% (p=0.029 n=4+4) SiteNew/Bundle_with_JSON_file/Edit-16 265kB ± 0% 209kB ± 0% -21.06% (p=0.029 n=4+4) SiteNew/Tags_and_categories/Edit-16 13.6MB ± 0% 8.8MB ± 0% -34.93% (p=0.029 n=4+4) SiteNew/Canonify_URLs/Edit-16 66.5MB ± 0% 63.9MB ± 0% -3.95% (p=0.029 n=4+4) SiteNew/Deep_content_tree/Edit-16 28.8MB ± 0% 25.8MB ± 0% -10.55% (p=0.029 n=4+4) SiteNew/Many_HTML_templates/Edit-16 6.16MB ± 0% 5.56MB ± 0% -9.86% (p=0.029 n=4+4) SiteNew/Page_collections/Edit-16 16.9MB ± 0% 16.0MB ± 0% -5.19% (p=0.029 n=4+4) SiteNew/Bundle_with_image-16 2.28MB ± 0% 2.29MB ± 0% +0.35% (p=0.029 n=4+4) SiteNew/Bundle_with_JSON_file-16 2.07MB ± 0% 2.07MB ± 0% ~ (p=0.114 n=4+4) SiteNew/Tags_and_categories-16 14.3MB ± 0% 13.2MB ± 0% -7.30% (p=0.029 n=4+4) SiteNew/Canonify_URLs-16 69.1MB ± 0% 69.0MB ± 0% ~ (p=0.343 n=4+4) SiteNew/Deep_content_tree-16 31.3MB ± 0% 31.8MB ± 0% +1.49% (p=0.029 n=4+4) SiteNew/Many_HTML_templates-16 10.8MB ± 0% 10.9MB ± 0% +1.11% (p=0.029 n=4+4) SiteNew/Page_collections-16 21.4MB ± 0% 21.6MB ± 0% +1.15% (p=0.029 n=4+4) name old allocs/op new allocs/op delta SiteNew/Bundle_with_image/Edit-16 4.74k ± 0% 3.86k ± 0% -18.57% (p=0.029 n=4+4) SiteNew/Bundle_with_JSON_file/Edit-16 4.73k ± 0% 3.85k ± 0% -18.58% (p=0.029 n=4+4) SiteNew/Tags_and_categories/Edit-16 301k ± 0% 198k ± 0% -34.14% (p=0.029 n=4+4) SiteNew/Canonify_URLs/Edit-16 389k ± 0% 373k ± 0% -4.07% (p=0.029 n=4+4) SiteNew/Deep_content_tree/Edit-16 338k ± 0% 262k ± 0% -22.63% (p=0.029 n=4+4) SiteNew/Many_HTML_templates/Edit-16 102k ± 0% 88k ± 0% -13.81% (p=0.029 n=4+4) SiteNew/Page_collections/Edit-16 176k ± 0% 152k ± 0% -13.32% (p=0.029 n=4+4) SiteNew/Bundle_with_image-16 26.8k ± 0% 26.8k ± 0% +0.05% (p=0.029 n=4+4) SiteNew/Bundle_with_JSON_file-16 26.8k ± 0% 26.8k ± 0% +0.05% (p=0.029 n=4+4) SiteNew/Tags_and_categories-16 273k ± 0% 245k ± 0% -10.36% (p=0.029 n=4+4) SiteNew/Canonify_URLs-16 396k ± 0% 398k ± 0% +0.39% (p=0.029 n=4+4) SiteNew/Deep_content_tree-16 317k ± 0% 325k ± 0% +2.53% (p=0.029 n=4+4) SiteNew/Many_HTML_templates-16 146k ± 0% 147k ± 0% +0.98% (p=0.029 n=4+4) SiteNew/Page_collections-16 210k ± 0% 215k ± 0% +2.44% (p=0.029 n=4+4) ``` Fixes #6312 Fixes #6087 Fixes #6738 Fixes #6412 Fixes #6743 Fixes #6875 Fixes #6034 Fixes #6902 Fixes #6173 Fixes #6590 --- parser/metadecoders/format.go | 18 ------------ parser/metadecoders/format_test.go | 18 ------------ parser/pageparser/pageparser.go | 56 ++++++++++++++++++++++++++++++++++++ parser/pageparser/pageparser_test.go | 19 ++++++++++++ 4 files changed, 75 insertions(+), 36 deletions(-) (limited to 'parser') diff --git a/parser/metadecoders/format.go b/parser/metadecoders/format.go index 4f81528c3..9e9cc2e1f 100644 --- a/parser/metadecoders/format.go +++ b/parser/metadecoders/format.go @@ -18,8 +18,6 @@ import ( "strings" "github.com/gohugoio/hugo/media" - - "github.com/gohugoio/hugo/parser/pageparser" ) type Format string @@ -72,22 +70,6 @@ func FormatFromMediaType(m media.Type) Format { return "" } -// FormatFromFrontMatterType will return empty if not supported. -func FormatFromFrontMatterType(typ pageparser.ItemType) Format { - switch typ { - case pageparser.TypeFrontMatterJSON: - return JSON - case pageparser.TypeFrontMatterORG: - return ORG - case pageparser.TypeFrontMatterTOML: - return TOML - case pageparser.TypeFrontMatterYAML: - return YAML - default: - return "" - } -} - // FormatFromContentString tries to detect the format (JSON, YAML or TOML) // in the given string. // It return an empty string if no format could be detected. diff --git a/parser/metadecoders/format_test.go b/parser/metadecoders/format_test.go index 74d105010..2f625935e 100644 --- a/parser/metadecoders/format_test.go +++ b/parser/metadecoders/format_test.go @@ -18,8 +18,6 @@ import ( "github.com/gohugoio/hugo/media" - "github.com/gohugoio/hugo/parser/pageparser" - qt "github.com/frankban/quicktest" ) @@ -57,22 +55,6 @@ func TestFormatFromMediaType(t *testing.T) { } } -func TestFormatFromFrontMatterType(t *testing.T) { - c := qt.New(t) - for _, test := range []struct { - typ pageparser.ItemType - expect Format - }{ - {pageparser.TypeFrontMatterJSON, JSON}, - {pageparser.TypeFrontMatterTOML, TOML}, - {pageparser.TypeFrontMatterYAML, YAML}, - {pageparser.TypeFrontMatterORG, ORG}, - {pageparser.TypeIgnore, ""}, - } { - c.Assert(FormatFromFrontMatterType(test.typ), qt.Equals, test.expect) - } -} - func TestFormatFromContentString(t *testing.T) { t.Parallel() c := qt.New(t) diff --git a/parser/pageparser/pageparser.go b/parser/pageparser/pageparser.go index acdb09587..f73eee706 100644 --- a/parser/pageparser/pageparser.go +++ b/parser/pageparser/pageparser.go @@ -22,6 +22,7 @@ import ( "io" "io/ioutil" + "github.com/gohugoio/hugo/parser/metadecoders" "github.com/pkg/errors" ) @@ -43,6 +44,61 @@ func Parse(r io.Reader, cfg Config) (Result, error) { return parseSection(r, cfg, lexIntroSection) } +type ContentFrontMatter struct { + Content []byte + FrontMatter map[string]interface{} + FrontMatterFormat metadecoders.Format +} + +// ParseFrontMatterAndContent is a convenience method to extract front matter +// and content from a content page. +func ParseFrontMatterAndContent(r io.Reader) (ContentFrontMatter, error) { + var cf ContentFrontMatter + + psr, err := Parse(r, Config{}) + if err != nil { + return cf, err + } + + var frontMatterSource []byte + + iter := psr.Iterator() + + walkFn := func(item Item) bool { + if frontMatterSource != nil { + // The rest is content. + cf.Content = psr.Input()[item.Pos:] + // Done + return false + } else if item.IsFrontMatter() { + cf.FrontMatterFormat = FormatFromFrontMatterType(item.Type) + frontMatterSource = item.Val + } + return true + + } + + iter.PeekWalk(walkFn) + + cf.FrontMatter, err = metadecoders.Default.UnmarshalToMap(frontMatterSource, cf.FrontMatterFormat) + return cf, err +} + +func FormatFromFrontMatterType(typ ItemType) metadecoders.Format { + switch typ { + case TypeFrontMatterJSON: + return metadecoders.JSON + case TypeFrontMatterORG: + return metadecoders.ORG + case TypeFrontMatterTOML: + return metadecoders.TOML + case TypeFrontMatterYAML: + return metadecoders.YAML + default: + return "" + } +} + // ParseMain parses starting with the main section. Used in tests. func ParseMain(r io.Reader, cfg Config) (Result, error) { return parseSection(r, cfg, lexMainSection) diff --git a/parser/pageparser/pageparser_test.go b/parser/pageparser/pageparser_test.go index f54376c33..f7f719938 100644 --- a/parser/pageparser/pageparser_test.go +++ b/parser/pageparser/pageparser_test.go @@ -16,6 +16,9 @@ package pageparser import ( "strings" "testing" + + qt "github.com/frankban/quicktest" + "github.com/gohugoio/hugo/parser/metadecoders" ) func BenchmarkParse(b *testing.B) { @@ -69,3 +72,19 @@ This is some summary. This is some summary. This is some summary. This is some s } } } + +func TestFormatFromFrontMatterType(t *testing.T) { + c := qt.New(t) + for _, test := range []struct { + typ ItemType + expect metadecoders.Format + }{ + {TypeFrontMatterJSON, metadecoders.JSON}, + {TypeFrontMatterTOML, metadecoders.TOML}, + {TypeFrontMatterYAML, metadecoders.YAML}, + {TypeFrontMatterORG, metadecoders.ORG}, + {TypeIgnore, ""}, + } { + c.Assert(FormatFromFrontMatterType(test.typ), qt.Equals, test.expect) + } +} -- cgit v1.2.3