From bb4657ced40d0f8b4787c1b543e4cad1d1eb7b1e Mon Sep 17 00:00:00 2001 From: disksing Date: Tue, 10 Jul 2018 11:11:04 +0800 Subject: [PATCH] schedule: add options to disable replica checker features (#1140) --- server/cluster_info.go | 20 +++++++++ server/config.go | 54 ++++++++++++++++-------- server/option.go | 20 +++++++++ server/schedule/mockcluster.go | 66 ++++++++++++++++++++++-------- server/schedule/opts.go | 7 ++++ server/schedule/replica_checker.go | 18 ++++++-- server/schedulers/balance_test.go | 44 ++++++++++++++++++++ 7 files changed, 192 insertions(+), 37 deletions(-) diff --git a/server/cluster_info.go b/server/cluster_info.go index 2820bc06e43..6b2c3f5b8ae 100644 --- a/server/cluster_info.go +++ b/server/cluster_info.go @@ -621,6 +621,26 @@ func (c *clusterInfo) IsRaftLearnerEnabled() bool { return c.opt.IsRaftLearnerEnabled() } +func (c *clusterInfo) IsRemoveDownReplicaEnabled() bool { + return c.opt.IsRemoveDownReplicaEnabled() +} + +func (c *clusterInfo) IsReplaceOfflineReplicaEnabled() bool { + return c.opt.IsReplaceOfflineReplicaEnabled() +} + +func (c *clusterInfo) IsMakeUpReplicaEnabled() bool { + return c.opt.IsMakeUpReplicaEnabled() +} + +func (c *clusterInfo) IsRemoveExtraReplicaEnabled() bool { + return c.opt.IsRemoveExtraReplicaEnabled() +} + +func (c *clusterInfo) IsLocationReplacementEnabled() bool { + return c.opt.IsLocationReplacementEnabled() +} + func (c *clusterInfo) CheckLabelProperty(typ string, labels []*metapb.StoreLabel) bool { return c.opt.CheckLabelProperty(typ, labels) } diff --git a/server/config.go b/server/config.go index c5b00d4db95..791da94314c 100644 --- a/server/config.go +++ b/server/config.go @@ -410,6 +410,23 @@ type ScheduleConfig struct { HighSpaceRatio float64 `toml:"high-space-ratio,omitempty" json:"high-space-ratio"` // DisableLearner is the option to disable using AddLearnerNode instead of AddNode DisableLearner bool `toml:"disable-raft-learner" json:"disable-raft-learner,string"` + + // DisableRemoveDownReplica is the option to prevent replica checker from + // removing down replicas. + DisableRemoveDownReplica bool `toml:"disable-remove-down-replica" json:"disable-remove-down-replica,string"` + // DisableReplaceOfflineReplica is the option to prevent replica checker from + // repalcing offline replicas. + DisableReplaceOfflineReplica bool `toml:"disable-replace-offline-replica" json:"disable-replace-offline-replica,string"` + // DisableMakeUpReplica is the option to prevent replica checker from making up + // replicas when replica count is less than expected. + DisableMakeUpReplica bool `toml:"disable-make-up-replica" json:"disable-make-up-replica,string"` + // DisableRemoveExtraReplica is the option to prevent replica checker from + // removing extra replicas. + DisableRemoveExtraReplica bool `toml:"disable-remove-extra-replica" json:"disable-remove-extra-replica,string"` + // DisableLocationReplacement is the option to prevent replica checker from + // moving replica to a better location. + DisableLocationReplacement bool `toml:"disable-location-replacement" json:"disable-location-replacement,string"` + // Schedulers support for loding customized schedulers Schedulers SchedulerConfigs `toml:"schedulers,omitempty" json:"schedulers-v2"` // json v2 is for the sake of compatible upgrade } @@ -418,22 +435,27 @@ func (c *ScheduleConfig) clone() *ScheduleConfig { schedulers := make(SchedulerConfigs, len(c.Schedulers)) copy(schedulers, c.Schedulers) return &ScheduleConfig{ - MaxSnapshotCount: c.MaxSnapshotCount, - MaxPendingPeerCount: c.MaxPendingPeerCount, - MaxMergeRegionSize: c.MaxMergeRegionSize, - MaxMergeRegionRows: c.MaxMergeRegionRows, - SplitMergeInterval: c.SplitMergeInterval, - PatrolRegionInterval: c.PatrolRegionInterval, - MaxStoreDownTime: c.MaxStoreDownTime, - LeaderScheduleLimit: c.LeaderScheduleLimit, - RegionScheduleLimit: c.RegionScheduleLimit, - ReplicaScheduleLimit: c.ReplicaScheduleLimit, - MergeScheduleLimit: c.MergeScheduleLimit, - TolerantSizeRatio: c.TolerantSizeRatio, - LowSpaceRatio: c.LowSpaceRatio, - HighSpaceRatio: c.HighSpaceRatio, - DisableLearner: c.DisableLearner, - Schedulers: schedulers, + MaxSnapshotCount: c.MaxSnapshotCount, + MaxPendingPeerCount: c.MaxPendingPeerCount, + MaxMergeRegionSize: c.MaxMergeRegionSize, + MaxMergeRegionRows: c.MaxMergeRegionRows, + SplitMergeInterval: c.SplitMergeInterval, + PatrolRegionInterval: c.PatrolRegionInterval, + MaxStoreDownTime: c.MaxStoreDownTime, + LeaderScheduleLimit: c.LeaderScheduleLimit, + RegionScheduleLimit: c.RegionScheduleLimit, + ReplicaScheduleLimit: c.ReplicaScheduleLimit, + MergeScheduleLimit: c.MergeScheduleLimit, + TolerantSizeRatio: c.TolerantSizeRatio, + LowSpaceRatio: c.LowSpaceRatio, + HighSpaceRatio: c.HighSpaceRatio, + DisableLearner: c.DisableLearner, + DisableRemoveDownReplica: c.DisableRemoveDownReplica, + DisableReplaceOfflineReplica: c.DisableReplaceOfflineReplica, + DisableMakeUpReplica: c.DisableMakeUpReplica, + DisableRemoveExtraReplica: c.DisableRemoveExtraReplica, + DisableLocationReplacement: c.DisableLocationReplacement, + Schedulers: schedulers, } } diff --git a/server/option.go b/server/option.go index 645a31df647..5298d9288b7 100644 --- a/server/option.go +++ b/server/option.go @@ -144,6 +144,26 @@ func (o *scheduleOption) IsRaftLearnerEnabled() bool { return !o.load().DisableLearner } +func (o *scheduleOption) IsRemoveDownReplicaEnabled() bool { + return !o.load().DisableRemoveDownReplica +} + +func (o *scheduleOption) IsReplaceOfflineReplicaEnabled() bool { + return !o.load().DisableReplaceOfflineReplica +} + +func (o *scheduleOption) IsMakeUpReplicaEnabled() bool { + return !o.load().DisableMakeUpReplica +} + +func (o *scheduleOption) IsRemoveExtraReplicaEnabled() bool { + return !o.load().DisableRemoveExtraReplica +} + +func (o *scheduleOption) IsLocationReplacementEnabled() bool { + return !o.load().DisableLocationReplacement +} + func (o *scheduleOption) GetSchedulers() SchedulerConfigs { return o.load().Schedulers } diff --git a/server/schedule/mockcluster.go b/server/schedule/mockcluster.go index 2c4a07ac69a..d0aa69f22f4 100644 --- a/server/schedule/mockcluster.go +++ b/server/schedule/mockcluster.go @@ -432,24 +432,29 @@ const ( // MockSchedulerOptions is a mock of SchedulerOptions // which implements Options interface type MockSchedulerOptions struct { - RegionScheduleLimit uint64 - LeaderScheduleLimit uint64 - ReplicaScheduleLimit uint64 - MergeScheduleLimit uint64 - MaxSnapshotCount uint64 - MaxPendingPeerCount uint64 - MaxMergeRegionSize uint64 - MaxMergeRegionRows uint64 - SplitMergeInterval time.Duration - MaxStoreDownTime time.Duration - MaxReplicas int - LocationLabels []string - HotRegionLowThreshold int - TolerantSizeRatio float64 - LowSpaceRatio float64 - HighSpaceRatio float64 - DisableLearner bool - LabelProperties map[string][]*metapb.StoreLabel + RegionScheduleLimit uint64 + LeaderScheduleLimit uint64 + ReplicaScheduleLimit uint64 + MergeScheduleLimit uint64 + MaxSnapshotCount uint64 + MaxPendingPeerCount uint64 + MaxMergeRegionSize uint64 + MaxMergeRegionRows uint64 + SplitMergeInterval time.Duration + MaxStoreDownTime time.Duration + MaxReplicas int + LocationLabels []string + HotRegionLowThreshold int + TolerantSizeRatio float64 + LowSpaceRatio float64 + HighSpaceRatio float64 + DisableLearner bool + DisableRemoveDownReplica bool + DisableReplaceOfflineReplica bool + DisableMakeUpReplica bool + DisableRemoveExtraReplica bool + DisableLocationReplacement bool + LabelProperties map[string][]*metapb.StoreLabel } // NewMockSchedulerOptions creates a mock schedule option. @@ -562,3 +567,28 @@ func (mso *MockSchedulerOptions) SetMaxReplicas(replicas int) { func (mso *MockSchedulerOptions) IsRaftLearnerEnabled() bool { return !mso.DisableLearner } + +// IsRemoveDownReplicaEnabled mock method. +func (mso *MockSchedulerOptions) IsRemoveDownReplicaEnabled() bool { + return !mso.DisableRemoveDownReplica +} + +// IsReplaceOfflineReplicaEnabled mock method. +func (mso *MockSchedulerOptions) IsReplaceOfflineReplicaEnabled() bool { + return !mso.DisableReplaceOfflineReplica +} + +// IsMakeUpReplicaEnabled mock method. +func (mso *MockSchedulerOptions) IsMakeUpReplicaEnabled() bool { + return !mso.DisableMakeUpReplica +} + +// IsRemoveExtraReplicaEnabled mock method. +func (mso *MockSchedulerOptions) IsRemoveExtraReplicaEnabled() bool { + return !mso.DisableRemoveExtraReplica +} + +// IsLocationReplacementEnabled mock method. +func (mso *MockSchedulerOptions) IsLocationReplacementEnabled() bool { + return !mso.DisableLocationReplacement +} diff --git a/server/schedule/opts.go b/server/schedule/opts.go index 73849de5428..45b954cccc2 100644 --- a/server/schedule/opts.go +++ b/server/schedule/opts.go @@ -46,6 +46,13 @@ type Options interface { GetHighSpaceRatio() float64 IsRaftLearnerEnabled() bool + + IsRemoveDownReplicaEnabled() bool + IsReplaceOfflineReplicaEnabled() bool + IsMakeUpReplicaEnabled() bool + IsRemoveExtraReplicaEnabled() bool + IsLocationReplacementEnabled() bool + CheckLabelProperty(typ string, labels []*metapb.StoreLabel) bool } diff --git a/server/schedule/replica_checker.go b/server/schedule/replica_checker.go index 0ce3ccfb229..571d19941f2 100644 --- a/server/schedule/replica_checker.go +++ b/server/schedule/replica_checker.go @@ -55,7 +55,7 @@ func (r *ReplicaChecker) Check(region *core.RegionInfo) *Operator { return op } - if len(region.GetPeers()) < r.cluster.GetMaxReplicas() { + if len(region.GetPeers()) < r.cluster.GetMaxReplicas() && r.cluster.IsMakeUpReplicaEnabled() { log.Debugf("[region %d] has %d peers fewer than max replicas", region.GetId(), len(region.GetPeers())) newPeer, _ := r.selectBestPeerToAddReplica(region, NewStorageThresholdFilter()) if newPeer == nil { @@ -79,7 +79,7 @@ func (r *ReplicaChecker) Check(region *core.RegionInfo) *Operator { // when add learner peer, the number of peer will exceed max replicas for a wille, // just comparing the the number of voters to avoid too many cancel add operator log. - if len(region.GetVoters()) > r.cluster.GetMaxReplicas() { + if len(region.GetVoters()) > r.cluster.GetMaxReplicas() && r.cluster.IsRemoveExtraReplicaEnabled() { log.Debugf("[region %d] has %d peers more than max replicas", region.GetId(), len(region.GetPeers())) oldPeer, _ := r.selectWorstPeer(region) if oldPeer == nil { @@ -150,6 +150,10 @@ func (r *ReplicaChecker) selectWorstPeer(region *core.RegionInfo) (*metapb.Peer, } func (r *ReplicaChecker) checkDownPeer(region *core.RegionInfo) *Operator { + if !r.cluster.IsRemoveDownReplicaEnabled() { + return nil + } + for _, stats := range region.DownPeers { peer := stats.GetPeer() if peer == nil { @@ -172,6 +176,10 @@ func (r *ReplicaChecker) checkDownPeer(region *core.RegionInfo) *Operator { } func (r *ReplicaChecker) checkOfflinePeer(region *core.RegionInfo) *Operator { + if !r.cluster.IsReplaceOfflineReplicaEnabled() { + return nil + } + // just skip learner if len(region.Learners) != 0 { return nil @@ -209,13 +217,17 @@ func (r *ReplicaChecker) checkOfflinePeer(region *core.RegionInfo) *Operator { if err != nil { return nil } - return CreateMovePeerOperator("makeUpOfflineReplica", r.cluster, region, OpReplica, peer.GetStoreId(), newPeer.GetStoreId(), newPeer.GetId()) + return CreateMovePeerOperator("replaceOfflineReplica", r.cluster, region, OpReplica, peer.GetStoreId(), newPeer.GetStoreId(), newPeer.GetId()) } return nil } func (r *ReplicaChecker) checkBestReplacement(region *core.RegionInfo) *Operator { + if !r.cluster.IsLocationReplacementEnabled() { + return nil + } + oldPeer, oldScore := r.selectWorstPeer(region) if oldPeer == nil { checkerCounter.WithLabelValues("replica_checker", "all_right").Inc() diff --git a/server/schedulers/balance_test.go b/server/schedulers/balance_test.go index fbd99db5eea..e1968c51b74 100644 --- a/server/schedulers/balance_test.go +++ b/server/schedulers/balance_test.go @@ -496,6 +496,11 @@ func (s *testReplicaCheckerSuite) TestBasic(c *C) { region := tc.GetRegion(1) testutil.CheckAddPeer(c, rc.Check(region), schedule.OpReplica, 4) + // Disable make up replica feature. + opt.DisableMakeUpReplica = true + c.Assert(rc.Check(region), IsNil) + opt.DisableMakeUpReplica = false + // Test healthFilter. // If store 4 is down, we add to store 3. tc.SetStoreDown(4) @@ -520,6 +525,12 @@ func (s *testReplicaCheckerSuite) TestBasic(c *C) { peer3, _ := tc.AllocPeer(3) region.AddPeer(peer3) testutil.CheckRemovePeer(c, rc.Check(region), 1) + + // Disable remove extra replica feature. + opt.DisableRemoveExtraReplica = true + c.Assert(rc.Check(region), IsNil) + opt.DisableRemoveExtraReplica = false + region.RemoveStorePeer(1) // Peer in store 2 is down, remove it. @@ -663,6 +674,10 @@ func (s *testReplicaCheckerSuite) TestDistinctScore(c *C) { // Replace peer in store 1 with store 6 because it has a different rack. testutil.CheckTransferPeer(c, rc.Check(region), schedule.OpReplica, 1, 6) + // Disable locationReplacement feature. + opt.DisableLocationReplacement = true + c.Assert(rc.Check(region), IsNil) + opt.DisableLocationReplacement = false peer6, _ := tc.AllocPeer(6) region.AddPeer(peer6) testutil.CheckRemovePeer(c, rc.Check(region), 1) @@ -751,6 +766,35 @@ func (s *testReplicaCheckerSuite) TestStorageThreshold(c *C) { testutil.CheckAddPeer(c, rc.Check(region), schedule.OpReplica, 2) } +func (s *testReplicaCheckerSuite) TestOpts(c *C) { + opt := schedule.NewMockSchedulerOptions() + tc := schedule.NewMockCluster(opt) + rc := schedule.NewReplicaChecker(tc, namespace.DefaultClassifier) + + tc.AddRegionStore(1, 100) + tc.AddRegionStore(2, 100) + tc.AddRegionStore(3, 100) + tc.AddRegionStore(4, 100) + tc.AddLeaderRegion(1, 1, 2, 3) + + region := tc.GetRegion(1) + // Test remove down replica and replace offline replica. + tc.SetStoreDown(1) + region.DownPeers = []*pdpb.PeerStats{ + { + Peer: region.GetStorePeer(1), + DownSeconds: 24 * 60 * 60, + }, + } + tc.SetStoreOffline(2) + // RemoveDownReplica has higher priority than replaceOfflineReplica. + testutil.CheckRemovePeer(c, rc.Check(region), 1) + opt.DisableRemoveDownReplica = true + testutil.CheckTransferPeer(c, rc.Check(region), schedule.OpReplica, 2, 4) + opt.DisableReplaceOfflineReplica = true + c.Assert(rc.Check(region), IsNil) +} + var _ = Suite(&testMergeCheckerSuite{}) type testMergeCheckerSuite struct {