Skip to content

Commit

Permalink
schedule: add options to disable replica checker features (#1140)
Browse files Browse the repository at this point in the history
  • Loading branch information
disksing authored and nolouch committed Jul 10, 2018
1 parent 8e72f3b commit bb4657c
Show file tree
Hide file tree
Showing 7 changed files with 192 additions and 37 deletions.
20 changes: 20 additions & 0 deletions server/cluster_info.go
Original file line number Diff line number Diff line change
Expand Up @@ -621,6 +621,26 @@ func (c *clusterInfo) IsRaftLearnerEnabled() bool {
return c.opt.IsRaftLearnerEnabled()
}

func (c *clusterInfo) IsRemoveDownReplicaEnabled() bool {
return c.opt.IsRemoveDownReplicaEnabled()
}

func (c *clusterInfo) IsReplaceOfflineReplicaEnabled() bool {
return c.opt.IsReplaceOfflineReplicaEnabled()
}

func (c *clusterInfo) IsMakeUpReplicaEnabled() bool {
return c.opt.IsMakeUpReplicaEnabled()
}

func (c *clusterInfo) IsRemoveExtraReplicaEnabled() bool {
return c.opt.IsRemoveExtraReplicaEnabled()
}

func (c *clusterInfo) IsLocationReplacementEnabled() bool {
return c.opt.IsLocationReplacementEnabled()
}

func (c *clusterInfo) CheckLabelProperty(typ string, labels []*metapb.StoreLabel) bool {
return c.opt.CheckLabelProperty(typ, labels)
}
Expand Down
54 changes: 38 additions & 16 deletions server/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -410,6 +410,23 @@ type ScheduleConfig struct {
HighSpaceRatio float64 `toml:"high-space-ratio,omitempty" json:"high-space-ratio"`
// DisableLearner is the option to disable using AddLearnerNode instead of AddNode
DisableLearner bool `toml:"disable-raft-learner" json:"disable-raft-learner,string"`

// DisableRemoveDownReplica is the option to prevent replica checker from
// removing down replicas.
DisableRemoveDownReplica bool `toml:"disable-remove-down-replica" json:"disable-remove-down-replica,string"`
// DisableReplaceOfflineReplica is the option to prevent replica checker from
// repalcing offline replicas.
DisableReplaceOfflineReplica bool `toml:"disable-replace-offline-replica" json:"disable-replace-offline-replica,string"`
// DisableMakeUpReplica is the option to prevent replica checker from making up
// replicas when replica count is less than expected.
DisableMakeUpReplica bool `toml:"disable-make-up-replica" json:"disable-make-up-replica,string"`
// DisableRemoveExtraReplica is the option to prevent replica checker from
// removing extra replicas.
DisableRemoveExtraReplica bool `toml:"disable-remove-extra-replica" json:"disable-remove-extra-replica,string"`
// DisableLocationReplacement is the option to prevent replica checker from
// moving replica to a better location.
DisableLocationReplacement bool `toml:"disable-location-replacement" json:"disable-location-replacement,string"`

// Schedulers support for loding customized schedulers
Schedulers SchedulerConfigs `toml:"schedulers,omitempty" json:"schedulers-v2"` // json v2 is for the sake of compatible upgrade
}
Expand All @@ -418,22 +435,27 @@ func (c *ScheduleConfig) clone() *ScheduleConfig {
schedulers := make(SchedulerConfigs, len(c.Schedulers))
copy(schedulers, c.Schedulers)
return &ScheduleConfig{
MaxSnapshotCount: c.MaxSnapshotCount,
MaxPendingPeerCount: c.MaxPendingPeerCount,
MaxMergeRegionSize: c.MaxMergeRegionSize,
MaxMergeRegionRows: c.MaxMergeRegionRows,
SplitMergeInterval: c.SplitMergeInterval,
PatrolRegionInterval: c.PatrolRegionInterval,
MaxStoreDownTime: c.MaxStoreDownTime,
LeaderScheduleLimit: c.LeaderScheduleLimit,
RegionScheduleLimit: c.RegionScheduleLimit,
ReplicaScheduleLimit: c.ReplicaScheduleLimit,
MergeScheduleLimit: c.MergeScheduleLimit,
TolerantSizeRatio: c.TolerantSizeRatio,
LowSpaceRatio: c.LowSpaceRatio,
HighSpaceRatio: c.HighSpaceRatio,
DisableLearner: c.DisableLearner,
Schedulers: schedulers,
MaxSnapshotCount: c.MaxSnapshotCount,
MaxPendingPeerCount: c.MaxPendingPeerCount,
MaxMergeRegionSize: c.MaxMergeRegionSize,
MaxMergeRegionRows: c.MaxMergeRegionRows,
SplitMergeInterval: c.SplitMergeInterval,
PatrolRegionInterval: c.PatrolRegionInterval,
MaxStoreDownTime: c.MaxStoreDownTime,
LeaderScheduleLimit: c.LeaderScheduleLimit,
RegionScheduleLimit: c.RegionScheduleLimit,
ReplicaScheduleLimit: c.ReplicaScheduleLimit,
MergeScheduleLimit: c.MergeScheduleLimit,
TolerantSizeRatio: c.TolerantSizeRatio,
LowSpaceRatio: c.LowSpaceRatio,
HighSpaceRatio: c.HighSpaceRatio,
DisableLearner: c.DisableLearner,
DisableRemoveDownReplica: c.DisableRemoveDownReplica,
DisableReplaceOfflineReplica: c.DisableReplaceOfflineReplica,
DisableMakeUpReplica: c.DisableMakeUpReplica,
DisableRemoveExtraReplica: c.DisableRemoveExtraReplica,
DisableLocationReplacement: c.DisableLocationReplacement,
Schedulers: schedulers,
}
}

Expand Down
20 changes: 20 additions & 0 deletions server/option.go
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,26 @@ func (o *scheduleOption) IsRaftLearnerEnabled() bool {
return !o.load().DisableLearner
}

func (o *scheduleOption) IsRemoveDownReplicaEnabled() bool {
return !o.load().DisableRemoveDownReplica
}

func (o *scheduleOption) IsReplaceOfflineReplicaEnabled() bool {
return !o.load().DisableReplaceOfflineReplica
}

func (o *scheduleOption) IsMakeUpReplicaEnabled() bool {
return !o.load().DisableMakeUpReplica
}

func (o *scheduleOption) IsRemoveExtraReplicaEnabled() bool {
return !o.load().DisableRemoveExtraReplica
}

func (o *scheduleOption) IsLocationReplacementEnabled() bool {
return !o.load().DisableLocationReplacement
}

func (o *scheduleOption) GetSchedulers() SchedulerConfigs {
return o.load().Schedulers
}
Expand Down
66 changes: 48 additions & 18 deletions server/schedule/mockcluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -432,24 +432,29 @@ const (
// MockSchedulerOptions is a mock of SchedulerOptions
// which implements Options interface
type MockSchedulerOptions struct {
RegionScheduleLimit uint64
LeaderScheduleLimit uint64
ReplicaScheduleLimit uint64
MergeScheduleLimit uint64
MaxSnapshotCount uint64
MaxPendingPeerCount uint64
MaxMergeRegionSize uint64
MaxMergeRegionRows uint64
SplitMergeInterval time.Duration
MaxStoreDownTime time.Duration
MaxReplicas int
LocationLabels []string
HotRegionLowThreshold int
TolerantSizeRatio float64
LowSpaceRatio float64
HighSpaceRatio float64
DisableLearner bool
LabelProperties map[string][]*metapb.StoreLabel
RegionScheduleLimit uint64
LeaderScheduleLimit uint64
ReplicaScheduleLimit uint64
MergeScheduleLimit uint64
MaxSnapshotCount uint64
MaxPendingPeerCount uint64
MaxMergeRegionSize uint64
MaxMergeRegionRows uint64
SplitMergeInterval time.Duration
MaxStoreDownTime time.Duration
MaxReplicas int
LocationLabels []string
HotRegionLowThreshold int
TolerantSizeRatio float64
LowSpaceRatio float64
HighSpaceRatio float64
DisableLearner bool
DisableRemoveDownReplica bool
DisableReplaceOfflineReplica bool
DisableMakeUpReplica bool
DisableRemoveExtraReplica bool
DisableLocationReplacement bool
LabelProperties map[string][]*metapb.StoreLabel
}

// NewMockSchedulerOptions creates a mock schedule option.
Expand Down Expand Up @@ -562,3 +567,28 @@ func (mso *MockSchedulerOptions) SetMaxReplicas(replicas int) {
func (mso *MockSchedulerOptions) IsRaftLearnerEnabled() bool {
return !mso.DisableLearner
}

// IsRemoveDownReplicaEnabled mock method.
func (mso *MockSchedulerOptions) IsRemoveDownReplicaEnabled() bool {
return !mso.DisableRemoveDownReplica
}

// IsReplaceOfflineReplicaEnabled mock method.
func (mso *MockSchedulerOptions) IsReplaceOfflineReplicaEnabled() bool {
return !mso.DisableReplaceOfflineReplica
}

// IsMakeUpReplicaEnabled mock method.
func (mso *MockSchedulerOptions) IsMakeUpReplicaEnabled() bool {
return !mso.DisableMakeUpReplica
}

// IsRemoveExtraReplicaEnabled mock method.
func (mso *MockSchedulerOptions) IsRemoveExtraReplicaEnabled() bool {
return !mso.DisableRemoveExtraReplica
}

// IsLocationReplacementEnabled mock method.
func (mso *MockSchedulerOptions) IsLocationReplacementEnabled() bool {
return !mso.DisableLocationReplacement
}
7 changes: 7 additions & 0 deletions server/schedule/opts.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,13 @@ type Options interface {
GetHighSpaceRatio() float64

IsRaftLearnerEnabled() bool

IsRemoveDownReplicaEnabled() bool
IsReplaceOfflineReplicaEnabled() bool
IsMakeUpReplicaEnabled() bool
IsRemoveExtraReplicaEnabled() bool
IsLocationReplacementEnabled() bool

CheckLabelProperty(typ string, labels []*metapb.StoreLabel) bool
}

Expand Down
18 changes: 15 additions & 3 deletions server/schedule/replica_checker.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ func (r *ReplicaChecker) Check(region *core.RegionInfo) *Operator {
return op
}

if len(region.GetPeers()) < r.cluster.GetMaxReplicas() {
if len(region.GetPeers()) < r.cluster.GetMaxReplicas() && r.cluster.IsMakeUpReplicaEnabled() {
log.Debugf("[region %d] has %d peers fewer than max replicas", region.GetId(), len(region.GetPeers()))
newPeer, _ := r.selectBestPeerToAddReplica(region, NewStorageThresholdFilter())
if newPeer == nil {
Expand All @@ -79,7 +79,7 @@ func (r *ReplicaChecker) Check(region *core.RegionInfo) *Operator {

// when add learner peer, the number of peer will exceed max replicas for a wille,
// just comparing the the number of voters to avoid too many cancel add operator log.
if len(region.GetVoters()) > r.cluster.GetMaxReplicas() {
if len(region.GetVoters()) > r.cluster.GetMaxReplicas() && r.cluster.IsRemoveExtraReplicaEnabled() {
log.Debugf("[region %d] has %d peers more than max replicas", region.GetId(), len(region.GetPeers()))
oldPeer, _ := r.selectWorstPeer(region)
if oldPeer == nil {
Expand Down Expand Up @@ -150,6 +150,10 @@ func (r *ReplicaChecker) selectWorstPeer(region *core.RegionInfo) (*metapb.Peer,
}

func (r *ReplicaChecker) checkDownPeer(region *core.RegionInfo) *Operator {
if !r.cluster.IsRemoveDownReplicaEnabled() {
return nil
}

for _, stats := range region.DownPeers {
peer := stats.GetPeer()
if peer == nil {
Expand All @@ -172,6 +176,10 @@ func (r *ReplicaChecker) checkDownPeer(region *core.RegionInfo) *Operator {
}

func (r *ReplicaChecker) checkOfflinePeer(region *core.RegionInfo) *Operator {
if !r.cluster.IsReplaceOfflineReplicaEnabled() {
return nil
}

// just skip learner
if len(region.Learners) != 0 {
return nil
Expand Down Expand Up @@ -209,13 +217,17 @@ func (r *ReplicaChecker) checkOfflinePeer(region *core.RegionInfo) *Operator {
if err != nil {
return nil
}
return CreateMovePeerOperator("makeUpOfflineReplica", r.cluster, region, OpReplica, peer.GetStoreId(), newPeer.GetStoreId(), newPeer.GetId())
return CreateMovePeerOperator("replaceOfflineReplica", r.cluster, region, OpReplica, peer.GetStoreId(), newPeer.GetStoreId(), newPeer.GetId())
}

return nil
}

func (r *ReplicaChecker) checkBestReplacement(region *core.RegionInfo) *Operator {
if !r.cluster.IsLocationReplacementEnabled() {
return nil
}

oldPeer, oldScore := r.selectWorstPeer(region)
if oldPeer == nil {
checkerCounter.WithLabelValues("replica_checker", "all_right").Inc()
Expand Down
44 changes: 44 additions & 0 deletions server/schedulers/balance_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -496,6 +496,11 @@ func (s *testReplicaCheckerSuite) TestBasic(c *C) {
region := tc.GetRegion(1)
testutil.CheckAddPeer(c, rc.Check(region), schedule.OpReplica, 4)

// Disable make up replica feature.
opt.DisableMakeUpReplica = true
c.Assert(rc.Check(region), IsNil)
opt.DisableMakeUpReplica = false

// Test healthFilter.
// If store 4 is down, we add to store 3.
tc.SetStoreDown(4)
Expand All @@ -520,6 +525,12 @@ func (s *testReplicaCheckerSuite) TestBasic(c *C) {
peer3, _ := tc.AllocPeer(3)
region.AddPeer(peer3)
testutil.CheckRemovePeer(c, rc.Check(region), 1)

// Disable remove extra replica feature.
opt.DisableRemoveExtraReplica = true
c.Assert(rc.Check(region), IsNil)
opt.DisableRemoveExtraReplica = false

region.RemoveStorePeer(1)

// Peer in store 2 is down, remove it.
Expand Down Expand Up @@ -663,6 +674,10 @@ func (s *testReplicaCheckerSuite) TestDistinctScore(c *C) {

// Replace peer in store 1 with store 6 because it has a different rack.
testutil.CheckTransferPeer(c, rc.Check(region), schedule.OpReplica, 1, 6)
// Disable locationReplacement feature.
opt.DisableLocationReplacement = true
c.Assert(rc.Check(region), IsNil)
opt.DisableLocationReplacement = false
peer6, _ := tc.AllocPeer(6)
region.AddPeer(peer6)
testutil.CheckRemovePeer(c, rc.Check(region), 1)
Expand Down Expand Up @@ -751,6 +766,35 @@ func (s *testReplicaCheckerSuite) TestStorageThreshold(c *C) {
testutil.CheckAddPeer(c, rc.Check(region), schedule.OpReplica, 2)
}

func (s *testReplicaCheckerSuite) TestOpts(c *C) {
opt := schedule.NewMockSchedulerOptions()
tc := schedule.NewMockCluster(opt)
rc := schedule.NewReplicaChecker(tc, namespace.DefaultClassifier)

tc.AddRegionStore(1, 100)
tc.AddRegionStore(2, 100)
tc.AddRegionStore(3, 100)
tc.AddRegionStore(4, 100)
tc.AddLeaderRegion(1, 1, 2, 3)

region := tc.GetRegion(1)
// Test remove down replica and replace offline replica.
tc.SetStoreDown(1)
region.DownPeers = []*pdpb.PeerStats{
{
Peer: region.GetStorePeer(1),
DownSeconds: 24 * 60 * 60,
},
}
tc.SetStoreOffline(2)
// RemoveDownReplica has higher priority than replaceOfflineReplica.
testutil.CheckRemovePeer(c, rc.Check(region), 1)
opt.DisableRemoveDownReplica = true
testutil.CheckTransferPeer(c, rc.Check(region), schedule.OpReplica, 2, 4)
opt.DisableReplaceOfflineReplica = true
c.Assert(rc.Check(region), IsNil)
}

var _ = Suite(&testMergeCheckerSuite{})

type testMergeCheckerSuite struct {
Expand Down

0 comments on commit bb4657c

Please sign in to comment.