Skip to content

Commit

Permalink
Add a circuit-breaker to memcached dial calls
Browse files Browse the repository at this point in the history
If the server is unavailable, this will avoid Cortex hammering it with
new connection requests and flooding the log.

We use a 3rd-party fork of the gomemcached library so we can set a
custom dialer function - see bradfitz/gomemcache#86
(this branch is slightly behind the version we used before, but only
in code we don't call)

Signed-off-by: Bryan Boreham <bjboreham@gmail.com>
  • Loading branch information
bboreham committed Aug 17, 2020
1 parent e8a6686 commit fadcd89
Show file tree
Hide file tree
Showing 12 changed files with 587 additions and 34 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
* [ENHANCEMENT] Query-tee: added a small tolerance to floating point sample values comparison. #2994
* [ENHANCEMENT] Query-tee: add support for doing a passthrough of requests to preferred backend for unregistered routes #3018
* [ENHANCEMENT] Expose `storage.aws.dynamodb.backoff_config` configuration file field. #3026
* [ENHANCEMENT] Memcached dial() calls now have a circuit-breaker to avoid hammering a broken cache #3051
* [BUGFIX] Query-frontend: Fixed rounding for incoming query timestamps, to be 100% Prometheus compatible. #2990
* [BUGFIX] Querier: query /series from ingesters regardless the `-querier.query-ingesters-within` setting. #3035
* [BUGFIX] Experimental blocks storage: Ingester is less likely to hit gRPC message size limit when streaming data to queriers. #3015
Expand Down
4 changes: 4 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ require (
github.com/prometheus/prometheus v1.8.2-0.20200811193703-869f1bc587e6
github.com/rafaeljusto/redigomock v0.0.0-20190202135759-257e089e14a1
github.com/segmentio/fasthash v0.0.0-20180216231524-a72b379d632e
github.com/sony/gobreaker v0.4.1
github.com/spf13/afero v1.2.2
github.com/stretchr/testify v1.5.1
github.com/thanos-io/thanos v0.13.1-0.20200807203500-9b578afb4763
Expand Down Expand Up @@ -79,3 +80,6 @@ replace github.com/gocql/gocql => github.com/grafana/gocql v0.0.0-20200605141915

// We can't upgrade to grpc 1.30.0 until go.etcd.io/etcd will support it.
replace google.golang.org/grpc => google.golang.org/grpc v1.29.1

// Using a 3rd-party branch for custom dialer - see https://github.com/bradfitz/gomemcache/pull/86
replace github.com/bradfitz/gomemcache => github.com/themihai/gomemcache v0.0.0-20180902122335-24332e2d58ab
3 changes: 3 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -1018,6 +1018,7 @@ github.com/smartystreets/goconvey v1.6.4 h1:fv0U8FUIMPNf1L9lnHLvLhgicrIVChEkdzIK
github.com/smartystreets/goconvey v1.6.4/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA=
github.com/soheilhy/cmux v0.1.4 h1:0HKaf1o97UwFjHH9o5XsHUOF+tqmdA7KEzXLpiyaw0E=
github.com/soheilhy/cmux v0.1.4/go.mod h1:IM3LyeVVIOuxMH7sFAkER9+bJ4dT7Ms6E4xg4kGIyLM=
github.com/sony/gobreaker v0.4.1 h1:oMnRNZXX5j85zso6xCPRNPtmAycat+WcoKbklScLDgQ=
github.com/sony/gobreaker v0.4.1/go.mod h1:ZKptC7FHNvhBz7dN2LGjPVBz2sZJmc0/PkyDJOjmxWY=
github.com/soundcloud/go-runit v0.0.0-20150630195641-06ad41a06c4a/go.mod h1:LeFCbQYJ3KJlPs/FvPz2dy1tkpxyeNESVyCNNzRXFR0=
github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA=
Expand Down Expand Up @@ -1050,6 +1051,8 @@ github.com/thanos-io/thanos v0.8.1-0.20200109203923-552ffa4c1a0d/go.mod h1:usT/T
github.com/thanos-io/thanos v0.13.1-0.20200731083140-69b87607decf/go.mod h1:G8caR6G7pSDreRDvFm9wFuyjEBztmr8Ag3kBYpa/fEc=
github.com/thanos-io/thanos v0.13.1-0.20200807203500-9b578afb4763 h1:c84P3YUu8bxLWE2csCSK4XJNi5FxcC+HL4WDNDEbTwA=
github.com/thanos-io/thanos v0.13.1-0.20200807203500-9b578afb4763/go.mod h1:KyW0a93tsh7v4hXAwo2CVAIRYuZT1Kkf4e04gisQjAg=
github.com/themihai/gomemcache v0.0.0-20180902122335-24332e2d58ab h1:7ZR3hmisBWw77ZpO1/o86g+JV3VKlk3d48jopJxzTjU=
github.com/themihai/gomemcache v0.0.0-20180902122335-24332e2d58ab/go.mod h1:eheTFp954zcWZXCU8d0AT76ftsQOTo4DTqkN/h3k1MY=
github.com/tidwall/pretty v0.0.0-20180105212114-65a9db5fad51/go.mod h1:XNkn88O1ChpSDQmQeStsy+sBenx6DDtFZJxhVysOjyk=
github.com/tidwall/pretty v1.0.0 h1:HsD+QiTn7sK6flMKIvNmpqz1qrpP3Ps6jOKIKMooyg4=
github.com/tidwall/pretty v1.0.0/go.mod h1:XNkn88O1ChpSDQmQeStsy+sBenx6DDtFZJxhVysOjyk=
Expand Down
48 changes: 48 additions & 0 deletions pkg/chunk/cache/memcached_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import (
"github.com/pkg/errors"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
"github.com/sony/gobreaker"
"github.com/thanos-io/thanos/pkg/discovery/dns"

"github.com/cortexproject/cortex/pkg/util"
Expand All @@ -35,6 +36,8 @@ type serverSelector interface {
// memcachedClient is a memcache client that gets its server list from SRV
// records, and periodically updates that ServerList.
type memcachedClient struct {
sync.Mutex
name string
*memcache.Client
serverList serverSelector

Expand All @@ -43,6 +46,7 @@ type memcachedClient struct {

addresses []string
provider *dns.Provider
cbs map[ /*address*/ string]*gobreaker.CircuitBreaker

quit chan struct{}
wait sync.WaitGroup
Expand Down Expand Up @@ -93,12 +97,14 @@ func NewMemcachedClient(cfg MemcachedClientConfig, name string, r prometheus.Reg
}, r))

newClient := &memcachedClient{
name: name,
Client: client,
serverList: selector,
hostname: cfg.Host,
service: cfg.Service,
logger: logger,
provider: dns.NewProvider(logger, dnsProviderRegisterer, dns.GolangResolverType),
cbs: make(map[string]*gobreaker.CircuitBreaker),
quit: make(chan struct{}),

numServers: promauto.With(r).NewGauge(prometheus.GaugeOpts{
Expand All @@ -108,6 +114,7 @@ func NewMemcachedClient(cfg MemcachedClientConfig, name string, r prometheus.Reg
ConstLabels: prometheus.Labels{"name": name},
}),
}
newClient.Client.DialTimeout = newClient.dial

if len(cfg.Addresses) > 0 {
util.WarnExperimentalUse("DNS-based memcached service discovery")
Expand All @@ -124,6 +131,33 @@ func NewMemcachedClient(cfg MemcachedClientConfig, name string, r prometheus.Reg
return newClient
}

func (c *memcachedClient) circuitBreakerStateChange(name string, from gobreaker.State, to gobreaker.State) {
level.Info(c.logger).Log("msg", "circuit-breaker state change", "name", name, "from", from, "to", to)
}

func (c *memcachedClient) dial(network, address string, timeout time.Duration) (net.Conn, error) {
c.Lock()
cb := c.cbs[address]
if cb == nil {
cb = gobreaker.NewCircuitBreaker(gobreaker.Settings{
Name: c.name + ":" + address,
Interval: 10 * time.Second, // reset error count after this long
Timeout: 10 * time.Second, // remain closed for this long after N errors
OnStateChange: c.circuitBreakerStateChange,
})
c.cbs[address] = cb
}
c.Unlock()

conn, err := cb.Execute(func() (interface{}, error) {
return net.DialTimeout(network, address, timeout)
})
if err != nil {
return nil, err
}
return conn.(net.Conn), nil
}

// Stop the memcache client.
func (c *memcachedClient) Stop() {
close(c.quit)
Expand Down Expand Up @@ -186,6 +220,20 @@ func (c *memcachedClient) updateMemcacheServers() error {
}
}

if len(servers) > 0 {
// Copy across circuit-breakers for current set of addresses, thus
// leaving behind any for servers we won't talk to again
c.Lock()
newCBs := make(map[string]*gobreaker.CircuitBreaker, len(servers))
for _, address := range servers {
if cb, exists := c.cbs[address]; exists {
newCBs[address] = cb
}
}
c.cbs = newCBs
c.Unlock()
}

// ServerList deterministically maps keys to _index_ of the server list.
// Since DNS returns records in different order each time, we sort to
// guarantee best possible match between nodes.
Expand Down
39 changes: 6 additions & 33 deletions vendor/github.com/bradfitz/gomemcache/memcache/memcache.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

15 changes: 15 additions & 0 deletions vendor/github.com/sony/gobreaker/.travis.yml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

21 changes: 21 additions & 0 deletions vendor/github.com/sony/gobreaker/LICENSE

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

128 changes: 128 additions & 0 deletions vendor/github.com/sony/gobreaker/README.md

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions vendor/github.com/sony/gobreaker/go.mod

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 7 additions & 0 deletions vendor/github.com/sony/gobreaker/go.sum

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit fadcd89

Please sign in to comment.