From 6968ad205726258117f996b8ef59ba9020182dc0 Mon Sep 17 00:00:00 2001
From: "Xiaochao Dong (@damnever)" <the.xcdong@gmail.com>
Date: Thu, 11 Aug 2022 21:17:32 +0800
Subject: [PATCH] Store: improve index header reading performance by sorting
 labels first

Signed-off-by: Xiaochao Dong (@damnever) <the.xcdong@gmail.com>
---
 CHANGELOG.md        |  1 +
 pkg/store/bucket.go | 17 +++++++++++------
 2 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index f5d84137fbf..e008dbd46d0 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -40,6 +40,7 @@ We use *breaking :warning:* to mark changes that are not backward compatible (re
 - [#5451](https://github.com/thanos-io/thanos/pull/5451) Azure: Reduce memory usage by not buffering file downloads entirely in memory.
 - [#5484](https://github.com/thanos-io/thanos/pull/5484) Update Prometheus deps to v2.36.2.
 - [#5511](https://github.com/thanos-io/thanos/pull/5511) Update Prometheus deps to v2.37.0.
+- [#5588](https://github.com/thanos-io/thanos/pull/5588) Store: improve index header reading performance by sorting labels first.
 
 ### Removed
 
diff --git a/pkg/store/bucket.go b/pkg/store/bucket.go
index 8cd3d264196..a617dd49e2f 100644
--- a/pkg/store/bucket.go
+++ b/pkg/store/bucket.go
@@ -1855,6 +1855,10 @@ func (r *bucketIndexReader) ExpandedPostings(ctx context.Context, ms []*labels.M
 		keys = append(keys, allPostingsLabel)
 	}
 
+	// Sort label name and value will improve the performance dramatically
+	// if the dataset is relatively large, since entries in postings offset table
+	// are sorted by label name and value, sequential reading is always faster.
+	sort.Sort(labels.Labels(keys))
 	fetchedPostings, err := r.fetchPostings(ctx, keys)
 	if err != nil {
 		return nil, errors.Wrap(err, "get postings")
@@ -1934,13 +1938,14 @@ func checkNilPosting(l labels.Label, p index.Postings) index.Postings {
 
 // NOTE: Derived from tsdb.postingsForMatcher. index.Merge is equivalent to map duplication.
 func toPostingGroup(lvalsFn func(name string) ([]string, error), m *labels.Matcher) (*postingGroup, error) {
-	if m.Type == labels.MatchRegexp && len(findSetMatches(m.Value)) > 0 {
-		vals := findSetMatches(m.Value)
-		toAdd := make([]labels.Label, 0, len(vals))
-		for _, val := range vals {
-			toAdd = append(toAdd, labels.Label{Name: m.Name, Value: val})
+	if m.Type == labels.MatchRegexp {
+		if vals := findSetMatches(m.Value); len(vals) > 0 {
+			toAdd := make([]labels.Label, 0, len(vals))
+			for _, val := range vals {
+				toAdd = append(toAdd, labels.Label{Name: m.Name, Value: val})
+			}
+			return newPostingGroup(false, toAdd, nil), nil
 		}
-		return newPostingGroup(false, toAdd, nil), nil
 	}
 
 	// If the matcher selects an empty value, it selects all the series which don't