Skip to content
This repository has been archived by the owner on Jul 24, 2024. It is now read-only.

Commit

Permalink
scatter: check pending peers before scattering (#31691)
Browse files Browse the repository at this point in the history
  • Loading branch information
YuJuncen authored Feb 7, 2022
1 parent ab8f063 commit 79a78bd
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 7 deletions.
6 changes: 4 additions & 2 deletions pkg/restore/range.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,10 @@ func SortRanges(ranges []rtree.Range, rewriteRules *RewriteRules) ([]rtree.Range

// RegionInfo includes a region and the leader of the region.
type RegionInfo struct {
Region *metapb.Region
Leader *metapb.Peer
Region *metapb.Region
Leader *metapb.Peer
PendingPeers []*metapb.Peer
DownPeers []*metapb.Peer
}

// ContainsInterior returns whether the region contains the given key, and also
Expand Down
21 changes: 18 additions & 3 deletions pkg/restore/split.go
Original file line number Diff line number Diff line change
Expand Up @@ -185,12 +185,27 @@ SplitRegions:
return nil
}

func (rs *RegionSplitter) hasRegion(ctx context.Context, regionID uint64) (bool, error) {
func (rs *RegionSplitter) hasHealthyRegion(ctx context.Context, regionID uint64) (bool, error) {
regionInfo, err := rs.client.GetRegionByID(ctx, regionID)
if err != nil {
return false, errors.Trace(err)
}
return regionInfo != nil, nil
// the region hasn't get ready.
if regionInfo == nil {
return false, nil
}

// check whether the region is healthy and report.
// TODO: the log may be too verbose. we should use Prometheus metrics once it get ready for BR.
for _, peer := range regionInfo.PendingPeers {
log.Debug("unhealthy region detected", logutil.Peer(peer), zap.String("type", "pending"))
}
for _, peer := range regionInfo.DownPeers {
log.Debug("unhealthy region detected", logutil.Peer(peer), zap.String("type", "down"))
}
// we ignore down peers for they are (normally) hard to be fixed in reasonable time.
// (or once there is a peer down, we may get stuck at waiting region get ready.)
return len(regionInfo.PendingPeers) == 0, nil
}

func (rs *RegionSplitter) isScatterRegionFinished(ctx context.Context, regionID uint64) (bool, error) {
Expand Down Expand Up @@ -218,7 +233,7 @@ func (rs *RegionSplitter) isScatterRegionFinished(ctx context.Context, regionID
func (rs *RegionSplitter) waitForSplit(ctx context.Context, regionID uint64) {
interval := SplitCheckInterval
for i := 0; i < SplitCheckMaxRetryTimes; i++ {
ok, err := rs.hasRegion(ctx, regionID)
ok, err := rs.hasHealthyRegion(ctx, regionID)
if err != nil {
log.Warn("wait for split failed", zap.Error(err))
return
Expand Down
6 changes: 4 additions & 2 deletions pkg/restore/split_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -172,8 +172,10 @@ func (c *pdClient) GetRegionByID(ctx context.Context, regionID uint64) (*RegionI
return nil, nil
}
return &RegionInfo{
Region: region.Meta,
Leader: region.Leader,
Region: region.Meta,
Leader: region.Leader,
PendingPeers: region.PendingPeers,
DownPeers: region.DownPeers,
}, nil
}

Expand Down

0 comments on commit 79a78bd

Please sign in to comment.