diff options
author | Bjørn Erik Pedersen <[email protected]> | 2023-02-11 16:20:24 +0100 |
---|---|---|
committer | Bjørn Erik Pedersen <[email protected]> | 2023-02-21 17:56:41 +0100 |
commit | 90da7664bf1f3a0ca2e18144b5deacf532c6e3cf (patch) | |
tree | 78d8ac72ebb2ccee4ca4bbeeb9add3365c743e90 /related | |
parent | 0afec0a9f4aace1f5f4af6822aeda6223ee3e3a9 (diff) | |
download | hugo-90da7664bf1f3a0ca2e18144b5deacf532c6e3cf.tar.gz hugo-90da7664bf1f3a0ca2e18144b5deacf532c6e3cf.zip |
Add page fragments support to Related
The main topic of this commit is that you can now index fragments (content heading identifiers) when calling `.Related`.
You can do this by:
* Configure one or more indices with type `fragments`
* The name of those index configurations maps to an (optional) front matter slice with fragment references. This allows you to link
page<->fragment and page<->page.
* This also will index all the fragments (heading identifiers) of the pages.
It's also possible to use type `fragments` indices in shortcode, e.g.:
```
{{ $related := site.RegularPages.Related .Page }}
```
But, and this is important, you need to include the shortcode using the `{{<` delimiter. Not doing so will create infinite loops and timeouts.
This commit also:
* Adds two new methods to Page: Fragments (can also be used to build ToC) and HeadingsFiltered (this is only used in Related Content with
index type `fragments` and `enableFilter` set to true.
* Consolidates all `.Related*` methods into one, which takes either a `Page` or an options map as its only argument.
* Add `context.Context` to all of the content related Page API. Turns out it wasn't strictly needed for this particular feature, but it will
soon become usefil, e.g. in #9339.
Closes #10711
Updates #9339
Updates #10725
Diffstat (limited to 'related')
-rw-r--r-- | related/integration_test.go | 121 | ||||
-rw-r--r-- | related/inverted_index.go | 275 | ||||
-rw-r--r-- | related/inverted_index_test.go | 41 |
3 files changed, 346 insertions, 91 deletions
diff --git a/related/integration_test.go b/related/integration_test.go index 31af01614..70875b212 100644 --- a/related/integration_test.go +++ b/related/integration_test.go @@ -21,6 +21,123 @@ import ( "github.com/gohugoio/hugo/hugolib" ) +func TestRelatedFragments(t *testing.T) { + t.Parallel() + + files := ` +-- hugo.toml -- +baseURL = "http://example.com/" +disableKinds = ["taxonomy", "term", "RSS", "sitemap", "robotsTXT"] +[related] + includeNewer = false + threshold = 80 + toLower = false +[[related.indices]] + name = 'pagerefs' + type = 'fragments' + applyFilter = true + weight = 90 +[[related.indices]] + name = 'keywords' + weight = 80 +-- content/p1.md -- +--- +title: p1 +pagerefs: ['ref1'] +--- +{{< see-also >}} + +## P1 title + +-- content/p2.md -- +--- +title: p2 +--- + +## P2 title 1 + +## P2 title 2 + +## First title {#ref1} +{{< see-also "ref1" >}} +-- content/p3.md -- +--- +title: p3 +keywords: ['foo'] +--- + +## P3 title 1 + +## P3 title 2 + +## Common p3, p4, p5 +-- content/p4.md -- +--- +title: p4 +--- + +## Common p3, p4, p5 + +## P4 title 1 + +-- content/p5.md -- +--- +title: p5 +keywords: ['foo'] +--- + +## P5 title 1 + +## Common p3, p4, p5 + +-- layouts/shortcodes/see-also.html -- +{{ $p1 := site.GetPage "p1" }} +{{ $p2 := site.GetPage "p2" }} +{{ $p3 := site.GetPage "p3" }} +P1 Fragments: {{ $p1.Fragments.Identifiers }} +P2 Fragments: {{ $p2.Fragments.Identifiers }} +Contains ref1: {{ $p2.Fragments.Identifiers.Contains "ref1" }} +Count ref1: {{ $p2.Fragments.Identifiers.Count "ref1" }} +{{ $opts := dict "document" .Page "fragments" $.Params }} +{{ $related1 := site.RegularPages.Related $opts }} +{{ $related2 := site.RegularPages.Related $p3 }} +Len Related 1: {{ len $related1 }} +Len Related 2: {{ len $related2 }} +Related 1: {{ template "list-related" $related1 }} +Related 2: {{ template "list-related" $related2 }} + +{{ define "list-related" }}{{ range $i, $e := . }} {{ $i }}: {{ .Title }}: {{ with .HeadingsFiltered}}{{ range $i, $e := .}}h{{ $i }}: {{ .Title }}|{{ .ID }}|{{ end }}{{ end }}::END{{ end }}{{ end }} + +-- layouts/_default/single.html -- +Content: {{ .Content }} + + +` + + b := hugolib.NewIntegrationTestBuilder( + hugolib.IntegrationTestConfig{ + T: t, + TxtarString: files, + }).Build() + + expect := ` +P1 Fragments: [p1-title] +P2 Fragments: [p2-title-1 p2-title-2 ref1] +Len Related 1: 1 +Related 2: 2 +` + + for _, p := range []string{"p1", "p2"} { + b.AssertFileContent("public/"+p+"/index.html", expect) + } + + b.AssertFileContent("public/p1/index.html", + "Related 1: 0: p2: h0: First title|ref1|::END", + "Related 2: 0: p5: h0: Common p3, p4, p5|common-p3-p4-p5|::END 1: p4: h0: Common p3, p4, p5|common-p3-p4-p5|::END", + ) + +} + func BenchmarkRelatedSite(b *testing.B) { files := ` -- config.toml -- @@ -33,6 +150,10 @@ disableKinds = ["taxonomy", "term", "RSS", "sitemap", "robotsTXT"] [[related.indices]] name = 'keywords' weight = 70 +[[related.indices]] + name = 'pagerefs' + type = 'fragments' + weight = 30 -- layouts/_default/single.html -- {{ range site.RegularPages }} {{ $tmp := .WordCount }} diff --git a/related/inverted_index.go b/related/inverted_index.go index 5502f9f11..eab97098a 100644 --- a/related/inverted_index.go +++ b/related/inverted_index.go @@ -15,20 +15,37 @@ package related import ( + "context" "errors" "fmt" "math" "sort" "strings" + "sync" "time" + xmaps "golang.org/x/exp/maps" + + "github.com/gohugoio/hugo/common/collections" "github.com/gohugoio/hugo/common/maps" + "github.com/gohugoio/hugo/compare" + "github.com/gohugoio/hugo/markup/tableofcontents" "github.com/spf13/cast" "github.com/gohugoio/hugo/common/types" "github.com/mitchellh/mapstructure" ) +const ( + TypeBasic = "basic" + TypeFragments = "fragments" +) + +var validTypes = map[string]bool{ + TypeBasic: true, + TypeFragments: true, +} + var ( _ Keyword = (*StringKeyword)(nil) zeroDate = time.Time{} @@ -37,8 +54,8 @@ var ( DefaultConfig = Config{ Threshold: 80, Indices: IndexConfigs{ - IndexConfig{Name: "keywords", Weight: 100}, - IndexConfig{Name: "date", Weight: 10}, + IndexConfig{Name: "keywords", Weight: 100, Type: TypeBasic}, + IndexConfig{Name: "date", Weight: 10, Type: TypeBasic}, }, } ) @@ -84,6 +101,15 @@ func (c *Config) Add(index IndexConfig) { c.Indices = append(c.Indices, index) } +func (c *Config) HasType(s string) bool { + for _, i := range c.Indices { + if i.Type == s { + return true + } + } + return false +} + // IndexConfigs holds a set of index configurations. type IndexConfigs []IndexConfig @@ -92,6 +118,13 @@ type IndexConfig struct { // The index name. This directly maps to a field or Param name. Name string + // The index type. + Type string + + // Enable to apply a type specific filter to the results. + // This is currently only used for the "fragments" type. + ApplyFilter bool + // Contextual pattern used to convert the Param value into a string. // Currently only used for dates. Can be used to, say, bump posts in the same // time frame when searching for related documents. @@ -120,6 +153,14 @@ type Document interface { Name() string } +// FragmentProvider is an optional interface that can be implemented by a Document. +type FragmentProvider interface { + Fragments(context.Context) *tableofcontents.Fragments + + // For internal use. + ApplyFilterToHeadings(context.Context, func(*tableofcontents.Heading) bool) Document +} + // InvertedIndex holds an inverted index, also sometimes named posting list, which // lists, for every possible search term, the documents that contain that term. type InvertedIndex struct { @@ -160,7 +201,7 @@ func NewInvertedIndex(cfg Config) *InvertedIndex { // Add documents to the inverted index. // The value must support == and !=. -func (idx *InvertedIndex) Add(docs ...Document) error { +func (idx *InvertedIndex) Add(ctx context.Context, docs ...Document) error { var err error for _, config := range idx.cfg.Indices { if config.Weight == 0 { @@ -179,6 +220,14 @@ func (idx *InvertedIndex) Add(docs ...Document) error { for _, keyword := range words { setm[keyword] = append(setm[keyword], doc) } + + if config.Type == TypeFragments { + if fp, ok := doc.(FragmentProvider); ok { + for _, fragment := range fp.Fragments(ctx).Identifiers { + setm[FragmentKeyword(fragment)] = append(setm[FragmentKeyword(fragment)], doc) + } + } + } } } @@ -209,8 +258,22 @@ func (r *rank) addWeight(w int) { r.Matches++ } -func newRank(doc Document, weight int) *rank { - return &rank{Doc: doc, Weight: weight, Matches: 1} +var rankPool = sync.Pool{ + New: func() interface{} { + return &rank{} + }, +} + +func getRank(doc Document, weight int) *rank { + r := rankPool.Get().(*rank) + r.Doc = doc + r.Weight = weight + r.Matches = 1 + return r +} + +func putRank(r *rank) { + rankPool.Put(r) } func (r ranks) Len() int { return len(r) } @@ -225,22 +288,41 @@ func (r ranks) Less(i, j int) bool { return r[i].Weight > r[j].Weight } -// SearchDoc finds the documents matching any of the keywords in the given indices -// against the given document. +// SearchOpts holds the options for a related search. +type SearchOpts struct { + // The Document to search for related content for. + Document Document + + // The keywords to search for. + NamedSlices []types.KeyValues + + // The indices to search in. + Indices []string + + // Fragments holds a a list of special keywords that is used + // for indices configured as type "fragments". + // This will match the fragment identifiers of the documents. + Fragments []string +} + +// Search finds the documents matching any of the keywords in the given indices +// against query options in opts. // The resulting document set will be sorted according to number of matches // and the index weights, and any matches with a rank below the configured // threshold (normalize to 0..100) will be removed. // If an index name is provided, only that index will be queried. -func (idx *InvertedIndex) SearchDoc(doc Document, indices ...string) ([]Document, error) { - var q []queryElement +func (idx *InvertedIndex) Search(ctx context.Context, opts SearchOpts) ([]Document, error) { - var configs IndexConfigs + var ( + queryElements []queryElement + configs IndexConfigs + ) - if len(indices) == 0 { + if len(opts.Indices) == 0 { configs = idx.cfg.Indices } else { - configs = make(IndexConfigs, len(indices)) - for i, indexName := range indices { + configs = make(IndexConfigs, len(opts.Indices)) + for i, indexName := range opts.Indices { cfg, found := idx.getIndexCfg(indexName) if !found { return nil, fmt.Errorf("index %q not found", indexName) @@ -250,40 +332,78 @@ func (idx *InvertedIndex) SearchDoc(doc Document, indices ...string) ([]Document } for _, cfg := range configs { - keywords, err := doc.RelatedKeywords(cfg) - if err != nil { - return nil, err + var keywords []Keyword + if opts.Document != nil { + k, err := opts.Document.RelatedKeywords(cfg) + if err != nil { + return nil, err + } + keywords = append(keywords, k...) + } + if cfg.Type == TypeFragments { + for _, fragment := range opts.Fragments { + keywords = append(keywords, FragmentKeyword(fragment)) + } + if opts.Document != nil { + if fp, ok := opts.Document.(FragmentProvider); ok { + for _, fragment := range fp.Fragments(ctx).Identifiers { + keywords = append(keywords, FragmentKeyword(fragment)) + } + } + } + } + queryElements = append(queryElements, newQueryElement(cfg.Name, keywords...)) + } + for _, slice := range opts.NamedSlices { + var keywords []Keyword + key := slice.KeyString() + if key == "" { + return nil, fmt.Errorf("index %q not valid", slice.Key) + } + conf, found := idx.getIndexCfg(key) + if !found { + return nil, fmt.Errorf("index %q not found", key) } - q = append(q, newQueryElement(cfg.Name, keywords...)) + for _, val := range slice.Values { + k, err := conf.ToKeywords(val) + if err != nil { + return nil, err + } + keywords = append(keywords, k...) + } + queryElements = append(queryElements, newQueryElement(conf.Name, keywords...)) + } + if opts.Document != nil { + return idx.searchDate(ctx, opts.Document, opts.Document.PublishDate(), queryElements...) } + return idx.search(ctx, queryElements...) +} - return idx.searchDate(doc.PublishDate(), q...) +func (cfg IndexConfig) stringToKeyword(s string) Keyword { + if cfg.ToLower { + s = strings.ToLower(s) + } + if cfg.Type == TypeFragments { + return FragmentKeyword(s) + } + return StringKeyword(s) } // ToKeywords returns a Keyword slice of the given input. func (cfg IndexConfig) ToKeywords(v any) ([]Keyword, error) { - var ( - keywords []Keyword - toLower = cfg.ToLower - ) + var keywords []Keyword + switch vv := v.(type) { case string: - if toLower { - vv = strings.ToLower(vv) - } - keywords = append(keywords, StringKeyword(vv)) + keywords = append(keywords, cfg.stringToKeyword(vv)) case []string: - if toLower { - vc := make([]string, len(vv)) - copy(vc, vv) - for i := 0; i < len(vc); i++ { - vc[i] = strings.ToLower(vc[i]) - } - vv = vc + vvv := make([]Keyword, len(vv)) + for i := 0; i < len(vvv); i++ { + vvv[i] = cfg.stringToKeyword(vv[i]) } - keywords = append(keywords, StringsToKeywords(vv...)...) + keywords = append(keywords, vvv...) case []any: return cfg.ToKeywords(cast.ToStringSlice(vv)) case time.Time: @@ -301,46 +421,20 @@ func (cfg IndexConfig) ToKeywords(v any) ([]Keyword, error) { return keywords, nil } -// SearchKeyValues finds the documents matching any of the keywords in the given indices. -// The resulting document set will be sorted according to number of matches -// and the index weights, and any matches with a rank below the configured -// threshold (normalize to 0..100) will be removed. -func (idx *InvertedIndex) SearchKeyValues(args ...types.KeyValues) ([]Document, error) { - q := make([]queryElement, len(args)) - - for i, arg := range args { - var keywords []Keyword - key := arg.KeyString() - if key == "" { - return nil, fmt.Errorf("index %q not valid", arg.Key) - } - conf, found := idx.getIndexCfg(key) - if !found { - return nil, fmt.Errorf("index %q not found", key) - } - - for _, val := range arg.Values { - k, err := conf.ToKeywords(val) - if err != nil { - return nil, err - } - keywords = append(keywords, k...) - } - - q[i] = newQueryElement(conf.Name, keywords...) - - } - - return idx.search(q...) +func (idx *InvertedIndex) search(ctx context.Context, query ...queryElement) ([]Document, error) { + return idx.searchDate(ctx, nil, zeroDate, query...) } -func (idx *InvertedIndex) search(query ...queryElement) ([]Document, error) { - return idx.searchDate(zeroDate, query...) -} - -func (idx *InvertedIndex) searchDate(upperDate time.Time, query ...queryElement) ([]Document, error) { +func (idx *InvertedIndex) searchDate(ctx context.Context, self Document, upperDate time.Time, query ...queryElement) ([]Document, error) { matchm := make(map[Document]*rank, 200) + defer func() { + for _, r := range matchm { + putRank(r) + } + }() + applyDateFilter := !idx.cfg.IncludeNewer && !upperDate.IsZero() + var fragmentsFilter collections.SortedStringSlice for _, el := range query { setm, found := idx.index[el.Index] @@ -356,15 +450,27 @@ func (idx *InvertedIndex) searchDate(upperDate time.Time, query ...queryElement) for _, kw := range el.Keywords { if docs, found := setm[kw]; found { for _, doc := range docs { + if compare.Eq(doc, self) { + continue + } + if applyDateFilter { // Exclude newer than the limit given if doc.PublishDate().After(upperDate) { continue } } + + if config.Type == TypeFragments && config.ApplyFilter { + if fkw, ok := kw.(FragmentKeyword); ok { + fragmentsFilter = append(fragmentsFilter, string(fkw)) + } + } + r, found := matchm[doc] if !found { - matchm[doc] = newRank(doc, config.Weight) + r = getRank(doc, config.Weight) + matchm[doc] = r } else { r.addWeight(config.Weight) } @@ -390,11 +496,19 @@ func (idx *InvertedIndex) searchDate(upperDate time.Time, query ...queryElement) } sort.Stable(matches) + sort.Strings(fragmentsFilter) result := make([]Document, len(matches)) for i, m := range matches { result[i] = m.Doc + if len(fragmentsFilter) > 0 { + if dp, ok := result[i].(FragmentProvider); ok { + result[i] = dp.ApplyFilterToHeadings(ctx, func(h *tableofcontents.Heading) bool { + return fragmentsFilter.Contains(h.ID) + }) + } + } } return result, nil @@ -433,6 +547,14 @@ func DecodeConfig(m maps.Params) (Config, error) { c.Indices[i].ToLower = true } } + for i := range c.Indices { + if c.Indices[i].Type == "" { + c.Indices[i].Type = TypeBasic + } + if !validTypes[c.Indices[i].Type] { + return c, fmt.Errorf("invalid index type %q. Must be one of %v", c.Indices[i].Type, xmaps.Keys(validTypes)) + } + } return c, nil } @@ -444,17 +566,24 @@ func (s StringKeyword) String() string { return string(s) } +// FragmentKeyword represents a document fragment. +type FragmentKeyword string + +func (f FragmentKeyword) String() string { + return string(f) +} + // Keyword is the interface a keyword in the search index must implement. type Keyword interface { String() string } // StringsToKeywords converts the given slice of strings to a slice of Keyword. -func StringsToKeywords(s ...string) []Keyword { +func (cfg IndexConfig) StringsToKeywords(s ...string) []Keyword { kw := make([]Keyword, len(s)) for i := 0; i < len(s); i++ { - kw[i] = StringKeyword(s[i]) + kw[i] = cfg.stringToKeyword(s[i]) } return kw diff --git a/related/inverted_index_test.go b/related/inverted_index_test.go index 7a3bd89c7..d38a7f6eb 100644 --- a/related/inverted_index_test.go +++ b/related/inverted_index_test.go @@ -14,6 +14,7 @@ package related import ( + "context" "fmt" "math/rand" "testing" @@ -105,7 +106,7 @@ func TestSearch(t *testing.T) { newTestDoc("tags", "g", "h").addKeywords("keywords", "a", "b"), } - idx.Add(docs...) + idx.Add(context.Background(), docs...) t.Run("count", func(t *testing.T) { c := qt.New(t) @@ -122,7 +123,8 @@ func TestSearch(t *testing.T) { t.Run("search-tags", func(t *testing.T) { c := qt.New(t) - m, err := idx.search(newQueryElement("tags", StringsToKeywords("a", "b", "d", "z")...)) + var cfg IndexConfig + m, err := idx.search(context.Background(), newQueryElement("tags", cfg.StringsToKeywords("a", "b", "d", "z")...)) c.Assert(err, qt.IsNil) c.Assert(len(m), qt.Equals, 2) c.Assert(m[0], qt.Equals, docs[0]) @@ -131,9 +133,10 @@ func TestSearch(t *testing.T) { t.Run("search-tags-and-keywords", func(t *testing.T) { c := qt.New(t) - m, err := idx.search( - newQueryElement("tags", StringsToKeywords("a", "b", "z")...), - newQueryElement("keywords", StringsToKeywords("a", "b")...)) + var cfg IndexConfig + m, err := idx.search(context.Background(), + newQueryElement("tags", cfg.StringsToKeywords("a", "b", "z")...), + newQueryElement("keywords", cfg.StringsToKeywords("a", "b")...)) c.Assert(err, qt.IsNil) c.Assert(len(m), qt.Equals, 3) c.Assert(m[0], qt.Equals, docs[3]) @@ -144,7 +147,7 @@ func TestSearch(t *testing.T) { t.Run("searchdoc-all", func(t *testing.T) { c := qt.New(t) doc := newTestDoc("tags", "a").addKeywords("keywords", "a") - m, err := idx.SearchDoc(doc) + m, err := idx.Search(context.Background(), SearchOpts{Document: doc}) c.Assert(err, qt.IsNil) c.Assert(len(m), qt.Equals, 2) c.Assert(m[0], qt.Equals, docs[3]) @@ -154,7 +157,7 @@ func TestSearch(t *testing.T) { t.Run("searchdoc-tags", func(t *testing.T) { c := qt.New(t) doc := newTestDoc("tags", "a", "b", "d", "z").addKeywords("keywords", "a", "b") - m, err := idx.SearchDoc(doc, "tags") + m, err := idx.Search(context.Background(), SearchOpts{Document: doc, Indices: []string{"tags"}}) c.Assert(err, qt.IsNil) c.Assert(len(m), qt.Equals, 2) c.Assert(m[0], qt.Equals, docs[0]) @@ -166,9 +169,9 @@ func TestSearch(t *testing.T) { doc := newTestDoc("tags", "a", "b", "d", "z").addKeywords("keywords", "a", "b") // This will get a date newer than the others. newDoc := newTestDoc("keywords", "a", "b") - idx.Add(newDoc) + idx.Add(context.Background(), newDoc) - m, err := idx.SearchDoc(doc, "keywords") + m, err := idx.Search(context.Background(), SearchOpts{Document: doc, Indices: []string{"keywords"}}) c.Assert(err, qt.IsNil) c.Assert(len(m), qt.Equals, 2) c.Assert(m[0], qt.Equals, docs[3]) @@ -186,10 +189,10 @@ func TestSearch(t *testing.T) { for i := 0; i < 10; i++ { docc := *doc docc.name = fmt.Sprintf("doc%d", i) - idx.Add(&docc) + idx.Add(context.Background(), &docc) } - m, err := idx.SearchDoc(doc, "keywords") + m, err := idx.Search(context.Background(), SearchOpts{Document: doc, Indices: []string{"keywords"}}) c.Assert(err, qt.IsNil) c.Assert(len(m), qt.Equals, 10) for i := 0; i < 10; i++ { @@ -265,7 +268,7 @@ func BenchmarkRelatedNewIndex(b *testing.B) { for i := 0; i < b.N; i++ { idx := NewInvertedIndex(cfg) for _, doc := range pages { - idx.Add(doc) + idx.Add(context.Background(), doc) } } }) @@ -277,14 +280,15 @@ func BenchmarkRelatedNewIndex(b *testing.B) { for i := 0; i < len(pages); i++ { docs[i] = pages[i] } - idx.Add(docs...) + idx.Add(context.Background(), docs...) } }) } func BenchmarkRelatedMatchesIn(b *testing.B) { - q1 := newQueryElement("tags", StringsToKeywords("keyword2", "keyword5", "keyword32", "asdf")...) - q2 := newQueryElement("keywords", StringsToKeywords("keyword3", "keyword4")...) + var icfg IndexConfig + q1 := newQueryElement("tags", icfg.StringsToKeywords("keyword2", "keyword5", "keyword32", "asdf")...) + q2 := newQueryElement("keywords", icfg.StringsToKeywords("keyword3", "keyword4")...) docs := make([]*testDoc, 1000) numkeywords := 20 @@ -315,15 +319,16 @@ func BenchmarkRelatedMatchesIn(b *testing.B) { index = "keywords" } - idx.Add(newTestDoc(index, allKeywords[start:end]...)) + idx.Add(context.Background(), newTestDoc(index, allKeywords[start:end]...)) } b.ResetTimer() + ctx := context.Background() for i := 0; i < b.N; i++ { if i%10 == 0 { - idx.search(q2) + idx.search(ctx, q2) } else { - idx.search(q1) + idx.search(ctx, q1) } } } |