aboutsummaryrefslogtreecommitdiffhomepage
path: root/parser
diff options
context:
space:
mode:
authorBjørn Erik Pedersen <[email protected]>2019-09-10 11:26:34 +0200
committerBjørn Erik Pedersen <[email protected]>2020-02-18 09:49:42 +0100
commiteada236f87d9669885da1ff647672bb3dc6b4954 (patch)
treea0303f067b2cbe06c55637013dbd7702a551c64f /parser
parente5329f13c02b87f0c30f8837759c810cd90ff8da (diff)
downloadhugo-eada236f87d9669885da1ff647672bb3dc6b4954.tar.gz
hugo-eada236f87d9669885da1ff647672bb3dc6b4954.zip
Introduce a tree map for all content
This commit introduces a new data structure to store pages and their resources. This data structure is backed by radix trees. This simplies tree operations, makes all pages a bundle, and paves the way for #6310. It also solves a set of annoying issues (see list below). Not a motivation behind this, but this commit also makes Hugo in general a little bit faster and more memory effective (see benchmarks). Especially for partial rebuilds on content edits, but also when taxonomies is in use. ``` name old time/op new time/op delta SiteNew/Bundle_with_image/Edit-16 1.32ms ± 8% 1.00ms ± 9% -24.42% (p=0.029 n=4+4) SiteNew/Bundle_with_JSON_file/Edit-16 1.28ms ± 0% 0.94ms ± 0% -26.26% (p=0.029 n=4+4) SiteNew/Tags_and_categories/Edit-16 33.9ms ± 2% 21.8ms ± 1% -35.67% (p=0.029 n=4+4) SiteNew/Canonify_URLs/Edit-16 40.6ms ± 1% 37.7ms ± 3% -7.20% (p=0.029 n=4+4) SiteNew/Deep_content_tree/Edit-16 56.7ms ± 0% 51.7ms ± 1% -8.82% (p=0.029 n=4+4) SiteNew/Many_HTML_templates/Edit-16 19.9ms ± 2% 18.3ms ± 3% -7.64% (p=0.029 n=4+4) SiteNew/Page_collections/Edit-16 37.9ms ± 4% 34.0ms ± 2% -10.28% (p=0.029 n=4+4) SiteNew/Bundle_with_image-16 10.7ms ± 0% 10.6ms ± 0% -1.15% (p=0.029 n=4+4) SiteNew/Bundle_with_JSON_file-16 10.8ms ± 0% 10.7ms ± 0% -1.05% (p=0.029 n=4+4) SiteNew/Tags_and_categories-16 43.2ms ± 1% 39.6ms ± 1% -8.35% (p=0.029 n=4+4) SiteNew/Canonify_URLs-16 47.6ms ± 1% 47.3ms ± 0% ~ (p=0.057 n=4+4) SiteNew/Deep_content_tree-16 73.0ms ± 1% 74.2ms ± 1% ~ (p=0.114 n=4+4) SiteNew/Many_HTML_templates-16 37.9ms ± 0% 38.1ms ± 1% ~ (p=0.114 n=4+4) SiteNew/Page_collections-16 53.6ms ± 1% 54.7ms ± 1% +2.09% (p=0.029 n=4+4) name old alloc/op new alloc/op delta SiteNew/Bundle_with_image/Edit-16 486kB ± 0% 430kB ± 0% -11.47% (p=0.029 n=4+4) SiteNew/Bundle_with_JSON_file/Edit-16 265kB ± 0% 209kB ± 0% -21.06% (p=0.029 n=4+4) SiteNew/Tags_and_categories/Edit-16 13.6MB ± 0% 8.8MB ± 0% -34.93% (p=0.029 n=4+4) SiteNew/Canonify_URLs/Edit-16 66.5MB ± 0% 63.9MB ± 0% -3.95% (p=0.029 n=4+4) SiteNew/Deep_content_tree/Edit-16 28.8MB ± 0% 25.8MB ± 0% -10.55% (p=0.029 n=4+4) SiteNew/Many_HTML_templates/Edit-16 6.16MB ± 0% 5.56MB ± 0% -9.86% (p=0.029 n=4+4) SiteNew/Page_collections/Edit-16 16.9MB ± 0% 16.0MB ± 0% -5.19% (p=0.029 n=4+4) SiteNew/Bundle_with_image-16 2.28MB ± 0% 2.29MB ± 0% +0.35% (p=0.029 n=4+4) SiteNew/Bundle_with_JSON_file-16 2.07MB ± 0% 2.07MB ± 0% ~ (p=0.114 n=4+4) SiteNew/Tags_and_categories-16 14.3MB ± 0% 13.2MB ± 0% -7.30% (p=0.029 n=4+4) SiteNew/Canonify_URLs-16 69.1MB ± 0% 69.0MB ± 0% ~ (p=0.343 n=4+4) SiteNew/Deep_content_tree-16 31.3MB ± 0% 31.8MB ± 0% +1.49% (p=0.029 n=4+4) SiteNew/Many_HTML_templates-16 10.8MB ± 0% 10.9MB ± 0% +1.11% (p=0.029 n=4+4) SiteNew/Page_collections-16 21.4MB ± 0% 21.6MB ± 0% +1.15% (p=0.029 n=4+4) name old allocs/op new allocs/op delta SiteNew/Bundle_with_image/Edit-16 4.74k ± 0% 3.86k ± 0% -18.57% (p=0.029 n=4+4) SiteNew/Bundle_with_JSON_file/Edit-16 4.73k ± 0% 3.85k ± 0% -18.58% (p=0.029 n=4+4) SiteNew/Tags_and_categories/Edit-16 301k ± 0% 198k ± 0% -34.14% (p=0.029 n=4+4) SiteNew/Canonify_URLs/Edit-16 389k ± 0% 373k ± 0% -4.07% (p=0.029 n=4+4) SiteNew/Deep_content_tree/Edit-16 338k ± 0% 262k ± 0% -22.63% (p=0.029 n=4+4) SiteNew/Many_HTML_templates/Edit-16 102k ± 0% 88k ± 0% -13.81% (p=0.029 n=4+4) SiteNew/Page_collections/Edit-16 176k ± 0% 152k ± 0% -13.32% (p=0.029 n=4+4) SiteNew/Bundle_with_image-16 26.8k ± 0% 26.8k ± 0% +0.05% (p=0.029 n=4+4) SiteNew/Bundle_with_JSON_file-16 26.8k ± 0% 26.8k ± 0% +0.05% (p=0.029 n=4+4) SiteNew/Tags_and_categories-16 273k ± 0% 245k ± 0% -10.36% (p=0.029 n=4+4) SiteNew/Canonify_URLs-16 396k ± 0% 398k ± 0% +0.39% (p=0.029 n=4+4) SiteNew/Deep_content_tree-16 317k ± 0% 325k ± 0% +2.53% (p=0.029 n=4+4) SiteNew/Many_HTML_templates-16 146k ± 0% 147k ± 0% +0.98% (p=0.029 n=4+4) SiteNew/Page_collections-16 210k ± 0% 215k ± 0% +2.44% (p=0.029 n=4+4) ``` Fixes #6312 Fixes #6087 Fixes #6738 Fixes #6412 Fixes #6743 Fixes #6875 Fixes #6034 Fixes #6902 Fixes #6173 Fixes #6590
Diffstat (limited to 'parser')
-rw-r--r--parser/metadecoders/format.go18
-rw-r--r--parser/metadecoders/format_test.go18
-rw-r--r--parser/pageparser/pageparser.go56
-rw-r--r--parser/pageparser/pageparser_test.go19
4 files changed, 75 insertions, 36 deletions
diff --git a/parser/metadecoders/format.go b/parser/metadecoders/format.go
index 4f81528c3..9e9cc2e1f 100644
--- a/parser/metadecoders/format.go
+++ b/parser/metadecoders/format.go
@@ -18,8 +18,6 @@ import (
"strings"
"github.com/gohugoio/hugo/media"
-
- "github.com/gohugoio/hugo/parser/pageparser"
)
type Format string
@@ -72,22 +70,6 @@ func FormatFromMediaType(m media.Type) Format {
return ""
}
-// FormatFromFrontMatterType will return empty if not supported.
-func FormatFromFrontMatterType(typ pageparser.ItemType) Format {
- switch typ {
- case pageparser.TypeFrontMatterJSON:
- return JSON
- case pageparser.TypeFrontMatterORG:
- return ORG
- case pageparser.TypeFrontMatterTOML:
- return TOML
- case pageparser.TypeFrontMatterYAML:
- return YAML
- default:
- return ""
- }
-}
-
// FormatFromContentString tries to detect the format (JSON, YAML or TOML)
// in the given string.
// It return an empty string if no format could be detected.
diff --git a/parser/metadecoders/format_test.go b/parser/metadecoders/format_test.go
index 74d105010..2f625935e 100644
--- a/parser/metadecoders/format_test.go
+++ b/parser/metadecoders/format_test.go
@@ -18,8 +18,6 @@ import (
"github.com/gohugoio/hugo/media"
- "github.com/gohugoio/hugo/parser/pageparser"
-
qt "github.com/frankban/quicktest"
)
@@ -57,22 +55,6 @@ func TestFormatFromMediaType(t *testing.T) {
}
}
-func TestFormatFromFrontMatterType(t *testing.T) {
- c := qt.New(t)
- for _, test := range []struct {
- typ pageparser.ItemType
- expect Format
- }{
- {pageparser.TypeFrontMatterJSON, JSON},
- {pageparser.TypeFrontMatterTOML, TOML},
- {pageparser.TypeFrontMatterYAML, YAML},
- {pageparser.TypeFrontMatterORG, ORG},
- {pageparser.TypeIgnore, ""},
- } {
- c.Assert(FormatFromFrontMatterType(test.typ), qt.Equals, test.expect)
- }
-}
-
func TestFormatFromContentString(t *testing.T) {
t.Parallel()
c := qt.New(t)
diff --git a/parser/pageparser/pageparser.go b/parser/pageparser/pageparser.go
index acdb09587..f73eee706 100644
--- a/parser/pageparser/pageparser.go
+++ b/parser/pageparser/pageparser.go
@@ -22,6 +22,7 @@ import (
"io"
"io/ioutil"
+ "github.com/gohugoio/hugo/parser/metadecoders"
"github.com/pkg/errors"
)
@@ -43,6 +44,61 @@ func Parse(r io.Reader, cfg Config) (Result, error) {
return parseSection(r, cfg, lexIntroSection)
}
+type ContentFrontMatter struct {
+ Content []byte
+ FrontMatter map[string]interface{}
+ FrontMatterFormat metadecoders.Format
+}
+
+// ParseFrontMatterAndContent is a convenience method to extract front matter
+// and content from a content page.
+func ParseFrontMatterAndContent(r io.Reader) (ContentFrontMatter, error) {
+ var cf ContentFrontMatter
+
+ psr, err := Parse(r, Config{})
+ if err != nil {
+ return cf, err
+ }
+
+ var frontMatterSource []byte
+
+ iter := psr.Iterator()
+
+ walkFn := func(item Item) bool {
+ if frontMatterSource != nil {
+ // The rest is content.
+ cf.Content = psr.Input()[item.Pos:]
+ // Done
+ return false
+ } else if item.IsFrontMatter() {
+ cf.FrontMatterFormat = FormatFromFrontMatterType(item.Type)
+ frontMatterSource = item.Val
+ }
+ return true
+
+ }
+
+ iter.PeekWalk(walkFn)
+
+ cf.FrontMatter, err = metadecoders.Default.UnmarshalToMap(frontMatterSource, cf.FrontMatterFormat)
+ return cf, err
+}
+
+func FormatFromFrontMatterType(typ ItemType) metadecoders.Format {
+ switch typ {
+ case TypeFrontMatterJSON:
+ return metadecoders.JSON
+ case TypeFrontMatterORG:
+ return metadecoders.ORG
+ case TypeFrontMatterTOML:
+ return metadecoders.TOML
+ case TypeFrontMatterYAML:
+ return metadecoders.YAML
+ default:
+ return ""
+ }
+}
+
// ParseMain parses starting with the main section. Used in tests.
func ParseMain(r io.Reader, cfg Config) (Result, error) {
return parseSection(r, cfg, lexMainSection)
diff --git a/parser/pageparser/pageparser_test.go b/parser/pageparser/pageparser_test.go
index f54376c33..f7f719938 100644
--- a/parser/pageparser/pageparser_test.go
+++ b/parser/pageparser/pageparser_test.go
@@ -16,6 +16,9 @@ package pageparser
import (
"strings"
"testing"
+
+ qt "github.com/frankban/quicktest"
+ "github.com/gohugoio/hugo/parser/metadecoders"
)
func BenchmarkParse(b *testing.B) {
@@ -69,3 +72,19 @@ This is some summary. This is some summary. This is some summary. This is some s
}
}
}
+
+func TestFormatFromFrontMatterType(t *testing.T) {
+ c := qt.New(t)
+ for _, test := range []struct {
+ typ ItemType
+ expect metadecoders.Format
+ }{
+ {TypeFrontMatterJSON, metadecoders.JSON},
+ {TypeFrontMatterTOML, metadecoders.TOML},
+ {TypeFrontMatterYAML, metadecoders.YAML},
+ {TypeFrontMatterORG, metadecoders.ORG},
+ {TypeIgnore, ""},
+ } {
+ c.Assert(FormatFromFrontMatterType(test.typ), qt.Equals, test.expect)
+ }
+}