Skip to content

Commit

Permalink
querier: Allows single store in case of duplicates. Drop others. (#1338)
Browse files Browse the repository at this point in the history
* querier: Allows single store in case of duplicates. Drop others.

Fixes #1337

Signed-off-by: Bartek Plotka <bwplotka@gmail.com>

* Updated CHANGELOG.

Signed-off-by: Bartek Plotka <bwplotka@gmail.com>
  • Loading branch information
bwplotka authored Jul 18, 2019
1 parent aa47901 commit ed1a59f
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 44 deletions.
9 changes: 7 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ We use *breaking* word for marking changes that are not backward compatible (rel

## Unreleased.

- [#1338](https://github.com/improbable-eng/thanos/pull/1338) Querier still warns on store API duplicate, but allows a single one from duplicated set. This is gracefully warn about the problematic logic and not disrupt immediately.

### Fixed

- [#1327](https://github.com/improbable-eng/thanos/pull/1327) `/series` API end-point now properly returns an empty array just like Prometheus if there are no results
Expand Down Expand Up @@ -42,6 +44,7 @@ We use *breaking* word for marking changes that are not backward compatible (rel
To migrate over the old `--gcloudtrace.*` configuration, your tracing configuration should look like this:

```yaml

---
type: STACKDRIVER
config:
Expand All @@ -54,7 +57,9 @@ The other `type` you can use is `JAEGER` now. The `config` keys and values are J

### Changed

- [#1284](https://github.com/improbable-eng/thanos/pull/1284) Add support for multiple label-sets in Info gRPC service. This deprecates the single `Labels` slice of the `InfoResponse`, in a future release backward compatible handling for the single set of Labels will be removed. Upgrading to v0.6.0 or higher is advised.
- [#1284](https://github.com/improbable-eng/thanos/pull/1284) Add support for multiple label-sets in Info gRPC service.
This deprecates the single `Labels` slice of the `InfoResponse`, in a future release backward compatible handling for the single set of Labels will be removed. Upgrading to v0.6.0 or higher is advised.
*breaking* If you run have duplicate queries in your Querier configuration with hierarchical federation of multiple Queries this PR makes Thanos Querier to detect this case and block all duplicates. Refer to 0.6.1 which at least allows for single replica to work.

- [#1314](https://github.com/improbable-eng/thanos/pull/1314) Removes `http_request_duration_microseconds` (Summary) and adds `http_request_duration_seconds` (Histogram) from http server instrumentation used in Thanos APIs and UIs.

Expand All @@ -70,7 +75,7 @@ The other `type` you can use is `JAEGER` now. The `config` keys and values are J

- [#1227](https://github.com/improbable-eng/thanos/pull/1227) Some context handling issues were fixed in Thanos Compact; some unnecessary memory allocations were removed in the hot path of Thanos Store.

- [#1183](https://github.com/improbable-eng/thanos/pull/1183) Compactor now correctly propogates retriable/haltable errors which means that it will not unnecessarily restart if such an error occurs
- [#1183](https://github.com/improbable-eng/thanos/pull/1183) Compactor now correctly propagates retriable/haltable errors which means that it will not unnecessarily restart if such an error occurs

- [#1231](https://github.com/improbable-eng/thanos/pull/1231) Receive now correctly handles SIGINT and closes without deadlocking

Expand Down
26 changes: 15 additions & 11 deletions pkg/query/storeset.go
Original file line number Diff line number Diff line change
Expand Up @@ -240,28 +240,32 @@ func (s *StoreSet) Update(ctx context.Context) {

// Add stores that are not yet in s.stores.
for addr, store := range healthyStores {
// Check if it has some external labels specified.
// No external labels means strictly store gateway or ruler and it is fine to have access to multiple instances of them.
if _, ok := s.stores[addr]; ok {
s.updateStoreStatus(store, nil)
continue
}

// Check if the store has some external labels specified and if any if there are duplicates.
// We warn and exclude duplicates because it unnecessarily puts additional load on querier, network and underlying StoreAPIs and
// it indicates misconfiguration.
//
// Sidecar will error out if it will be configured with empty external labels.
// Note: No external labels means strictly store gateway or ruler and it is fine to have access to multiple instances of them.
// Any other component will error out if it will be configured with empty external labels.
externalLabels := externalLabelsFromStore(store)
storesWithExternalLabels := externalLabelOccurrencesInStores[externalLabels]
if len(store.LabelSets()) > 0 && storesWithExternalLabels != 1 {
if len(store.LabelSets()) > 0 && externalLabelOccurrencesInStores[externalLabels] != 1 {
store.close()
s.updateStoreStatus(store, errors.New(droppingStoreMessage))
level.Warn(s.logger).Log("msg", droppingStoreMessage, "address", addr, "extLset", externalLabels, "duplicates", storesWithExternalLabels)
continue
}

if _, ok := s.stores[addr]; ok {
s.updateStoreStatus(store, nil)
level.Warn(s.logger).Log("msg", droppingStoreMessage, "address", addr, "extLset", externalLabels, "duplicates", externalLabelOccurrencesInStores[externalLabels])
// We don't want to block all of them. Leave one to not disrupt in terms of migration.
externalLabelOccurrencesInStores[externalLabels]--
continue
}

s.stores[addr] = store
s.updateStoreStatus(store, nil)
level.Info(s.logger).Log("msg", "adding new store to query storeset", "address", addr)
}

s.externalLabelOccurrencesInStores = externalLabelOccurrencesInStores
s.storeNodeConnections.Set(float64(len(s.stores)))
s.cleanUpStoreStatuses()
Expand Down
55 changes: 24 additions & 31 deletions pkg/query/storeset_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -241,38 +241,35 @@ func TestStoreSet_AllAvailable_BlockExtLsetDuplicates(t *testing.T) {

storeExtLabels := [][]storepb.Label{
{
{
Name: "l1",
Value: "v1",
},
{Name: "l1", Value: "v1"},
},
{
{
Name: "l1",
Value: "v2",
},
{Name: "l1", Value: "v2"},
{Name: "l2", Value: "v3"},
},
{
{
// Duplicate with above.
Name: "l1",
Value: "v2",
},
// Duplicate with above.
{Name: "l1", Value: "v2"},
{Name: "l2", Value: "v3"},
},
// Two store nodes, they don't have ext labels set.
nil,
nil,
{
// Duplicate with two others.
{Name: "l1", Value: "v2"},
{Name: "l2", Value: "v3"},
},
}
st, err := newTestStores(5, storeExtLabels...)

st, err := newTestStores(6, storeExtLabels...)
testutil.Ok(t, err)
defer st.Close()

initialStoreAddr := st.StoreAddresses()

logger := log.NewLogfmtLogger(log.NewSyncWriter(os.Stderr))
logger = level.NewFilter(logger, level.AllowDebug())
logger = log.With(logger, "ts", log.DefaultTimestampUTC, "caller", log.DefaultCaller)
storeSet := NewStoreSet(logger, nil, specsFromAddrFunc(initialStoreAddr), testGRPCOpts, time.Minute)
storeSet := NewStoreSet(logger, nil, specsFromAddrFunc(st.StoreAddresses()), testGRPCOpts, time.Minute)
storeSet.gRPCInfoCallTimeout = 2 * time.Second
defer storeSet.Close()

Expand All @@ -282,12 +279,9 @@ func TestStoreSet_AllAvailable_BlockExtLsetDuplicates(t *testing.T) {
storeSet.Update(context.Background())
storeSet.Update(context.Background())

storeNum := len(storeSet.stores)
expectedStoreNum := 5 - 2
testutil.Assert(t, storeNum == expectedStoreNum, fmt.Sprintf("all services should respond just fine, but we expect duplicates being blocked. Expected %d stores, got %d", expectedStoreNum, storeNum))
testutil.Assert(t, len(storeSet.stores) == 4, fmt.Sprintf("all services should respond just fine, but we expect duplicates being blocked. Expected %d stores, got %d", 4, len(storeSet.stores)))

// Sort result to be able to compare.

var existingStoreLabels [][]storepb.Label
for _, store := range storeSet.stores {
for _, ls := range store.LabelSets() {
Expand All @@ -298,14 +292,13 @@ func TestStoreSet_AllAvailable_BlockExtLsetDuplicates(t *testing.T) {
return len(existingStoreLabels[i]) > len(existingStoreLabels[j])
})

var i int
for _, lset := range existingStoreLabels {
testutil.Equals(t, storeExtLabels[i], lset)

i++
if i == 1 {
// Store 1 and 2 should be blocked, so fast forward.
i += 2
}
}
testutil.Equals(t, [][]storepb.Label{
{
{Name: "l1", Value: "v2"},
{Name: "l2", Value: "v3"},
},
{
{Name: "l1", Value: "v1"},
},
}, existingStoreLabels)
}

0 comments on commit ed1a59f

Please sign in to comment.