From 6968ad205726258117f996b8ef59ba9020182dc0 Mon Sep 17 00:00:00 2001 From: "Xiaochao Dong (@damnever)" Date: Thu, 11 Aug 2022 21:17:32 +0800 Subject: [PATCH] Store: improve index header reading performance by sorting labels first Signed-off-by: Xiaochao Dong (@damnever) --- CHANGELOG.md | 1 + pkg/store/bucket.go | 17 +++++++++++------ 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f5d84137fbf..e008dbd46d0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -40,6 +40,7 @@ We use *breaking :warning:* to mark changes that are not backward compatible (re - [#5451](https://github.com/thanos-io/thanos/pull/5451) Azure: Reduce memory usage by not buffering file downloads entirely in memory. - [#5484](https://github.com/thanos-io/thanos/pull/5484) Update Prometheus deps to v2.36.2. - [#5511](https://github.com/thanos-io/thanos/pull/5511) Update Prometheus deps to v2.37.0. +- [#5588](https://github.com/thanos-io/thanos/pull/5588) Store: improve index header reading performance by sorting labels first. ### Removed diff --git a/pkg/store/bucket.go b/pkg/store/bucket.go index 8cd3d264196..a617dd49e2f 100644 --- a/pkg/store/bucket.go +++ b/pkg/store/bucket.go @@ -1855,6 +1855,10 @@ func (r *bucketIndexReader) ExpandedPostings(ctx context.Context, ms []*labels.M keys = append(keys, allPostingsLabel) } + // Sort label name and value will improve the performance dramatically + // if the dataset is relatively large, since entries in postings offset table + // are sorted by label name and value, sequential reading is always faster. + sort.Sort(labels.Labels(keys)) fetchedPostings, err := r.fetchPostings(ctx, keys) if err != nil { return nil, errors.Wrap(err, "get postings") @@ -1934,13 +1938,14 @@ func checkNilPosting(l labels.Label, p index.Postings) index.Postings { // NOTE: Derived from tsdb.postingsForMatcher. index.Merge is equivalent to map duplication. func toPostingGroup(lvalsFn func(name string) ([]string, error), m *labels.Matcher) (*postingGroup, error) { - if m.Type == labels.MatchRegexp && len(findSetMatches(m.Value)) > 0 { - vals := findSetMatches(m.Value) - toAdd := make([]labels.Label, 0, len(vals)) - for _, val := range vals { - toAdd = append(toAdd, labels.Label{Name: m.Name, Value: val}) + if m.Type == labels.MatchRegexp { + if vals := findSetMatches(m.Value); len(vals) > 0 { + toAdd := make([]labels.Label, 0, len(vals)) + for _, val := range vals { + toAdd = append(toAdd, labels.Label{Name: m.Name, Value: val}) + } + return newPostingGroup(false, toAdd, nil), nil } - return newPostingGroup(false, toAdd, nil), nil } // If the matcher selects an empty value, it selects all the series which don't