Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

schedule: add options to disable replica checker features #1140

Merged
merged 5 commits into from
Jul 10, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions server/cluster_info.go
Original file line number Diff line number Diff line change
Expand Up @@ -621,6 +621,26 @@ func (c *clusterInfo) IsRaftLearnerEnabled() bool {
return c.opt.IsRaftLearnerEnabled()
}

func (c *clusterInfo) IsRemoveDownReplicaEnabled() bool {
return c.opt.IsRemoveDownReplicaEnabled()
}

func (c *clusterInfo) IsReplaceOfflineReplicaEnabled() bool {
return c.opt.IsReplaceOfflineReplicaEnabled()
}

func (c *clusterInfo) IsMakeUpReplicaEnabled() bool {
return c.opt.IsMakeUpReplicaEnabled()
}

func (c *clusterInfo) IsRemoveExtraReplicaEnabled() bool {
return c.opt.IsRemoveExtraReplicaEnabled()
}

func (c *clusterInfo) IsLocationReplacementEnabled() bool {
return c.opt.IsLocationReplacementEnabled()
}

func (c *clusterInfo) CheckLabelProperty(typ string, labels []*metapb.StoreLabel) bool {
return c.opt.CheckLabelProperty(typ, labels)
}
Expand Down
54 changes: 38 additions & 16 deletions server/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -410,6 +410,23 @@ type ScheduleConfig struct {
HighSpaceRatio float64 `toml:"high-space-ratio,omitempty" json:"high-space-ratio"`
// DisableLearner is the option to disable using AddLearnerNode instead of AddNode
DisableLearner bool `toml:"disable-raft-learner" json:"disable-raft-learner,string"`

// DisableRemoveDownReplica is the option to prevent replica checker from
// removing down replicas.
DisableRemoveDownReplica bool `toml:"disable-remove-down-replica" json:"disable-remove-down-replica,string"`
// DisableReplaceOfflineReplica is the option to prevent replica checker from
// repalcing offline replicas.
DisableReplaceOfflineReplica bool `toml:"disable-replace-offline-replica" json:"disable-replace-offline-replica,string"`
// DisableMakeUpReplica is the option to prevent replica checker from making up
// replicas when replica count is less than expected.
DisableMakeUpReplica bool `toml:"disable-make-up-replica" json:"disable-make-up-replica,string"`
// DisableRemoveExtraReplica is the option to prevent replica checker from
// removing extra replicas.
DisableRemoveExtraReplica bool `toml:"disable-remove-extra-replica" json:"disable-remove-extra-replica,string"`
// DisableLocationReplacement is the option to prevent replica checker from
// moving replica to a better location.
DisableLocationReplacement bool `toml:"disable-location-replacement" json:"disable-location-replacement,string"`

// Schedulers support for loding customized schedulers
Schedulers SchedulerConfigs `toml:"schedulers,omitempty" json:"schedulers-v2"` // json v2 is for the sake of compatible upgrade
}
Expand All @@ -418,22 +435,27 @@ func (c *ScheduleConfig) clone() *ScheduleConfig {
schedulers := make(SchedulerConfigs, len(c.Schedulers))
copy(schedulers, c.Schedulers)
return &ScheduleConfig{
MaxSnapshotCount: c.MaxSnapshotCount,
MaxPendingPeerCount: c.MaxPendingPeerCount,
MaxMergeRegionSize: c.MaxMergeRegionSize,
MaxMergeRegionRows: c.MaxMergeRegionRows,
SplitMergeInterval: c.SplitMergeInterval,
PatrolRegionInterval: c.PatrolRegionInterval,
MaxStoreDownTime: c.MaxStoreDownTime,
LeaderScheduleLimit: c.LeaderScheduleLimit,
RegionScheduleLimit: c.RegionScheduleLimit,
ReplicaScheduleLimit: c.ReplicaScheduleLimit,
MergeScheduleLimit: c.MergeScheduleLimit,
TolerantSizeRatio: c.TolerantSizeRatio,
LowSpaceRatio: c.LowSpaceRatio,
HighSpaceRatio: c.HighSpaceRatio,
DisableLearner: c.DisableLearner,
Schedulers: schedulers,
MaxSnapshotCount: c.MaxSnapshotCount,
MaxPendingPeerCount: c.MaxPendingPeerCount,
MaxMergeRegionSize: c.MaxMergeRegionSize,
MaxMergeRegionRows: c.MaxMergeRegionRows,
SplitMergeInterval: c.SplitMergeInterval,
PatrolRegionInterval: c.PatrolRegionInterval,
MaxStoreDownTime: c.MaxStoreDownTime,
LeaderScheduleLimit: c.LeaderScheduleLimit,
RegionScheduleLimit: c.RegionScheduleLimit,
ReplicaScheduleLimit: c.ReplicaScheduleLimit,
MergeScheduleLimit: c.MergeScheduleLimit,
TolerantSizeRatio: c.TolerantSizeRatio,
LowSpaceRatio: c.LowSpaceRatio,
HighSpaceRatio: c.HighSpaceRatio,
DisableLearner: c.DisableLearner,
DisableRemoveDownReplica: c.DisableRemoveDownReplica,
DisableReplaceOfflineReplica: c.DisableReplaceOfflineReplica,
DisableMakeUpReplica: c.DisableMakeUpReplica,
DisableRemoveExtraReplica: c.DisableRemoveExtraReplica,
DisableLocationReplacement: c.DisableLocationReplacement,
Schedulers: schedulers,
}
}

Expand Down
20 changes: 20 additions & 0 deletions server/option.go
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,26 @@ func (o *scheduleOption) IsRaftLearnerEnabled() bool {
return !o.load().DisableLearner
}

func (o *scheduleOption) IsRemoveDownReplicaEnabled() bool {
return !o.load().DisableRemoveDownReplica
}

func (o *scheduleOption) IsReplaceOfflineReplicaEnabled() bool {
return !o.load().DisableReplaceOfflineReplica
}

func (o *scheduleOption) IsMakeUpReplicaEnabled() bool {
return !o.load().DisableMakeUpReplica
}

func (o *scheduleOption) IsRemoveExtraReplicaEnabled() bool {
return !o.load().DisableRemoveExtraReplica
}

func (o *scheduleOption) IsLocationReplacementEnabled() bool {
return !o.load().DisableLocationReplacement
}

func (o *scheduleOption) GetSchedulers() SchedulerConfigs {
return o.load().Schedulers
}
Expand Down
66 changes: 48 additions & 18 deletions server/schedule/mockcluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -432,24 +432,29 @@ const (
// MockSchedulerOptions is a mock of SchedulerOptions
// which implements Options interface
type MockSchedulerOptions struct {
RegionScheduleLimit uint64
LeaderScheduleLimit uint64
ReplicaScheduleLimit uint64
MergeScheduleLimit uint64
MaxSnapshotCount uint64
MaxPendingPeerCount uint64
MaxMergeRegionSize uint64
MaxMergeRegionRows uint64
SplitMergeInterval time.Duration
MaxStoreDownTime time.Duration
MaxReplicas int
LocationLabels []string
HotRegionLowThreshold int
TolerantSizeRatio float64
LowSpaceRatio float64
HighSpaceRatio float64
DisableLearner bool
LabelProperties map[string][]*metapb.StoreLabel
RegionScheduleLimit uint64
LeaderScheduleLimit uint64
ReplicaScheduleLimit uint64
MergeScheduleLimit uint64
MaxSnapshotCount uint64
MaxPendingPeerCount uint64
MaxMergeRegionSize uint64
MaxMergeRegionRows uint64
SplitMergeInterval time.Duration
MaxStoreDownTime time.Duration
MaxReplicas int
LocationLabels []string
HotRegionLowThreshold int
TolerantSizeRatio float64
LowSpaceRatio float64
HighSpaceRatio float64
DisableLearner bool
DisableRemoveDownReplica bool
DisableReplaceOfflineReplica bool
DisableMakeUpReplica bool
DisableRemoveExtraReplica bool
DisableLocationReplacement bool
LabelProperties map[string][]*metapb.StoreLabel
}

// NewMockSchedulerOptions creates a mock schedule option.
Expand Down Expand Up @@ -562,3 +567,28 @@ func (mso *MockSchedulerOptions) SetMaxReplicas(replicas int) {
func (mso *MockSchedulerOptions) IsRaftLearnerEnabled() bool {
return !mso.DisableLearner
}

// IsRemoveDownReplicaEnabled mock method.
func (mso *MockSchedulerOptions) IsRemoveDownReplicaEnabled() bool {
return !mso.DisableRemoveDownReplica
}

// IsReplaceOfflineReplicaEnabled mock method.
func (mso *MockSchedulerOptions) IsReplaceOfflineReplicaEnabled() bool {
return !mso.DisableReplaceOfflineReplica
}

// IsMakeUpReplicaEnabled mock method.
func (mso *MockSchedulerOptions) IsMakeUpReplicaEnabled() bool {
return !mso.DisableMakeUpReplica
}

// IsRemoveExtraReplicaEnabled mock method.
func (mso *MockSchedulerOptions) IsRemoveExtraReplicaEnabled() bool {
return !mso.DisableRemoveExtraReplica
}

// IsLocationReplacementEnabled mock method.
func (mso *MockSchedulerOptions) IsLocationReplacementEnabled() bool {
return !mso.DisableLocationReplacement
}
7 changes: 7 additions & 0 deletions server/schedule/opts.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,13 @@ type Options interface {
GetHighSpaceRatio() float64

IsRaftLearnerEnabled() bool

IsRemoveDownReplicaEnabled() bool
IsReplaceOfflineReplicaEnabled() bool
IsMakeUpReplicaEnabled() bool
IsRemoveExtraReplicaEnabled() bool
IsLocationReplacementEnabled() bool

CheckLabelProperty(typ string, labels []*metapb.StoreLabel) bool
}

Expand Down
18 changes: 15 additions & 3 deletions server/schedule/replica_checker.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ func (r *ReplicaChecker) Check(region *core.RegionInfo) *Operator {
return op
}

if len(region.GetPeers()) < r.cluster.GetMaxReplicas() {
if len(region.GetPeers()) < r.cluster.GetMaxReplicas() && r.cluster.IsMakeUpReplicaEnabled() {
log.Debugf("[region %d] has %d peers fewer than max replicas", region.GetId(), len(region.GetPeers()))
newPeer, _ := r.selectBestPeerToAddReplica(region, NewStorageThresholdFilter())
if newPeer == nil {
Expand All @@ -79,7 +79,7 @@ func (r *ReplicaChecker) Check(region *core.RegionInfo) *Operator {

// when add learner peer, the number of peer will exceed max replicas for a wille,
// just comparing the the number of voters to avoid too many cancel add operator log.
if len(region.GetVoters()) > r.cluster.GetMaxReplicas() {
if len(region.GetVoters()) > r.cluster.GetMaxReplicas() && r.cluster.IsRemoveExtraReplicaEnabled() {
log.Debugf("[region %d] has %d peers more than max replicas", region.GetId(), len(region.GetPeers()))
oldPeer, _ := r.selectWorstPeer(region)
if oldPeer == nil {
Expand Down Expand Up @@ -150,6 +150,10 @@ func (r *ReplicaChecker) selectWorstPeer(region *core.RegionInfo) (*metapb.Peer,
}

func (r *ReplicaChecker) checkDownPeer(region *core.RegionInfo) *Operator {
if !r.cluster.IsRemoveDownReplicaEnabled() {
return nil
}

for _, stats := range region.DownPeers {
peer := stats.GetPeer()
if peer == nil {
Expand All @@ -172,6 +176,10 @@ func (r *ReplicaChecker) checkDownPeer(region *core.RegionInfo) *Operator {
}

func (r *ReplicaChecker) checkOfflinePeer(region *core.RegionInfo) *Operator {
if !r.cluster.IsReplaceOfflineReplicaEnabled() {
return nil
}

// just skip learner
if len(region.Learners) != 0 {
return nil
Expand Down Expand Up @@ -209,13 +217,17 @@ func (r *ReplicaChecker) checkOfflinePeer(region *core.RegionInfo) *Operator {
if err != nil {
return nil
}
return CreateMovePeerOperator("makeUpOfflineReplica", r.cluster, region, OpReplica, peer.GetStoreId(), newPeer.GetStoreId(), newPeer.GetId())
return CreateMovePeerOperator("replaceOfflineReplica", r.cluster, region, OpReplica, peer.GetStoreId(), newPeer.GetStoreId(), newPeer.GetId())
}

return nil
}

func (r *ReplicaChecker) checkBestReplacement(region *core.RegionInfo) *Operator {
if !r.cluster.IsLocationReplacementEnabled() {
return nil
}

oldPeer, oldScore := r.selectWorstPeer(region)
if oldPeer == nil {
checkerCounter.WithLabelValues("replica_checker", "all_right").Inc()
Expand Down
44 changes: 44 additions & 0 deletions server/schedulers/balance_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -496,6 +496,11 @@ func (s *testReplicaCheckerSuite) TestBasic(c *C) {
region := tc.GetRegion(1)
testutil.CheckAddPeer(c, rc.Check(region), schedule.OpReplica, 4)

// Disable make up replica feature.
opt.DisableMakeUpReplica = true
c.Assert(rc.Check(region), IsNil)
opt.DisableMakeUpReplica = false

// Test healthFilter.
// If store 4 is down, we add to store 3.
tc.SetStoreDown(4)
Expand All @@ -520,6 +525,12 @@ func (s *testReplicaCheckerSuite) TestBasic(c *C) {
peer3, _ := tc.AllocPeer(3)
region.AddPeer(peer3)
testutil.CheckRemovePeer(c, rc.Check(region), 1)

// Disable remove extra replica feature.
opt.DisableRemoveExtraReplica = true
c.Assert(rc.Check(region), IsNil)
opt.DisableRemoveExtraReplica = false

region.RemoveStorePeer(1)

// Peer in store 2 is down, remove it.
Expand Down Expand Up @@ -663,6 +674,10 @@ func (s *testReplicaCheckerSuite) TestDistinctScore(c *C) {

// Replace peer in store 1 with store 6 because it has a different rack.
testutil.CheckTransferPeer(c, rc.Check(region), schedule.OpReplica, 1, 6)
// Disable locationReplacement feature.
opt.DisableLocationReplacement = true
c.Assert(rc.Check(region), IsNil)
opt.DisableLocationReplacement = false
peer6, _ := tc.AllocPeer(6)
region.AddPeer(peer6)
testutil.CheckRemovePeer(c, rc.Check(region), 1)
Expand Down Expand Up @@ -751,6 +766,35 @@ func (s *testReplicaCheckerSuite) TestStorageThreshold(c *C) {
testutil.CheckAddPeer(c, rc.Check(region), schedule.OpReplica, 2)
}

func (s *testReplicaCheckerSuite) TestOpts(c *C) {
opt := schedule.NewMockSchedulerOptions()
tc := schedule.NewMockCluster(opt)
rc := schedule.NewReplicaChecker(tc, namespace.DefaultClassifier)

tc.AddRegionStore(1, 100)
tc.AddRegionStore(2, 100)
tc.AddRegionStore(3, 100)
tc.AddRegionStore(4, 100)
tc.AddLeaderRegion(1, 1, 2, 3)

region := tc.GetRegion(1)
// Test remove down replica and replace offline replica.
tc.SetStoreDown(1)
region.DownPeers = []*pdpb.PeerStats{
{
Peer: region.GetStorePeer(1),
DownSeconds: 24 * 60 * 60,
},
}
tc.SetStoreOffline(2)
// RemoveDownReplica has higher priority than replaceOfflineReplica.
testutil.CheckRemovePeer(c, rc.Check(region), 1)
opt.DisableRemoveDownReplica = true
testutil.CheckTransferPeer(c, rc.Check(region), schedule.OpReplica, 2, 4)
opt.DisableReplaceOfflineReplica = true
c.Assert(rc.Check(region), IsNil)
}

var _ = Suite(&testMergeCheckerSuite{})

type testMergeCheckerSuite struct {
Expand Down