Skip to content
This repository has been archived by the owner on Jul 24, 2024. It is now read-only.

validate: manual reset pd config back #530

Merged
merged 9 commits into from
Sep 27, 2020
Merged
43 changes: 43 additions & 0 deletions cmd/validate.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"encoding/hex"
"encoding/json"
"fmt"
"strings"

"github.com/gogo/protobuf/proto"
"github.com/pingcap/errors"
Expand All @@ -20,6 +21,7 @@ import (
"github.com/tikv/pd/pkg/mock/mockid"
"go.uber.org/zap"

"github.com/pingcap/br/pkg/conn"
"github.com/pingcap/br/pkg/restore"
"github.com/pingcap/br/pkg/rtree"
"github.com/pingcap/br/pkg/task"
Expand All @@ -45,6 +47,7 @@ func NewValidateCommand() *cobra.Command {
meta.AddCommand(newBackupMetaCommand())
meta.AddCommand(decodeBackupMetaCommand())
meta.AddCommand(encodeBackupMetaCommand())
meta.AddCommand(setPDConfigCommand())
meta.Hidden = true

return meta
Expand Down Expand Up @@ -323,3 +326,43 @@ func encodeBackupMetaCommand() *cobra.Command {
}
return encodeBackupMetaCmd
}

func setPDConfigCommand() *cobra.Command {
pdConfigCmd := &cobra.Command{
Use: "reset-pd-config-as-default",
Short: "reset pd scheduler and config adjusted by BR to default value",
RunE: func(cmd *cobra.Command, args []string) error {
ctx, cancel := context.WithCancel(GetDefaultContext())
defer cancel()

var cfg task.Config
if err := cfg.ParseFromFlags(cmd.Flags()); err != nil {
return err
}

mgr, err := task.NewMgr(ctx, tidbGlue, cfg.PD, cfg.TLS, cfg.CheckRequirements)
if err != nil {
return err
}
defer mgr.Close()

for scheduler := range conn.Schedulers {
if strings.HasPrefix(scheduler, "balance") {
err := mgr.AddScheduler(ctx, scheduler)
if err != nil {
return err
}
log.Info("add pd schedulers succeed",
zap.String("schedulers", scheduler))
}
}

if err := mgr.UpdatePDScheduleConfig(ctx, conn.DefaultPDCfg); err != nil {
return errors.Annotate(err, "fail to update PD merge config")
}
log.Info("add pd configs succeed", zap.Any("config", conn.DefaultPDCfg))
return nil
},
}
return pdConfigCmd
}
15 changes: 13 additions & 2 deletions pkg/conn/scheduler_utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ type clusterConfig struct {
}

var (
schedulers = map[string]struct{}{
// Schedulers represent region/leader schedulers which can impact on performance.
Schedulers = map[string]struct{}{
"balance-leader-scheduler": {},
"balance-hot-region-scheduler": {},
"balance-region-scheduler": {},
Expand All @@ -30,6 +31,7 @@ var (
"shuffle-region-scheduler": {},
"shuffle-hot-region-scheduler": {},
}

pdRegionMergeCfg = []string{
"max-merge-region-keys",
"max-merge-region-size",
Expand All @@ -39,6 +41,15 @@ var (
"region-schedule-limit",
"max-snapshot-count",
}

// DefaultPDCfg find by https://github.com/tikv/pd/blob/master/conf/config.toml.
DefaultPDCfg = map[string]interface{}{
"max-merge-region-keys": 200000,
"max-merge-region-size": 20,
"leader-schedule-limit": 4,
"region-schedule-limit": 2048,
"max-snapshot-count": 3,
}
)

func addPDLeaderScheduler(ctx context.Context, mgr *Mgr, removedSchedulers []string) error {
Expand Down Expand Up @@ -101,7 +112,7 @@ func (mgr *Mgr) RemoveSchedulers(ctx context.Context) (undo utils.UndoFunc, err
}
needRemoveSchedulers := make([]string, 0, len(existSchedulers))
for _, s := range existSchedulers {
if _, ok := schedulers[s]; ok {
if _, ok := Schedulers[s]; ok {
needRemoveSchedulers = append(needRemoveSchedulers, s)
}
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/task/backup.go
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ func RunBackup(c context.Context, g glue.Glue, cmdName string, cfg *BackupConfig
if err != nil {
return err
}
mgr, err := newMgr(ctx, g, cfg.PD, cfg.TLS, cfg.CheckRequirements)
mgr, err := NewMgr(ctx, g, cfg.PD, cfg.TLS, cfg.CheckRequirements)
if err != nil {
return err
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/task/backup_raw.go
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ func RunBackupRaw(c context.Context, g glue.Glue, cmdName string, cfg *RawKvConf
if err != nil {
return err
}
mgr, err := newMgr(ctx, g, cfg.PD, cfg.TLS, cfg.CheckRequirements)
mgr, err := NewMgr(ctx, g, cfg.PD, cfg.TLS, cfg.CheckRequirements)
if err != nil {
return err
}
Expand Down
4 changes: 2 additions & 2 deletions pkg/task/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -277,8 +277,8 @@ func (cfg *Config) ParseFromFlags(flags *pflag.FlagSet) error {
return cfg.TLS.ParseFromFlags(flags)
}

// newMgr creates a new mgr at the given PD address.
func newMgr(ctx context.Context,
// NewMgr creates a new mgr at the given PD address.
func NewMgr(ctx context.Context,
g glue.Glue, pds []string,
tlsConfig TLSConfig,
checkRequirements bool) (*conn.Mgr, error) {
Expand Down
4 changes: 2 additions & 2 deletions pkg/task/restore.go
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ func RunRestore(c context.Context, g glue.Glue, cmdName string, cfg *RestoreConf
ctx, cancel := context.WithCancel(c)
defer cancel()

mgr, err := newMgr(ctx, g, cfg.PD, cfg.TLS, cfg.CheckRequirements)
mgr, err := NewMgr(ctx, g, cfg.PD, cfg.TLS, cfg.CheckRequirements)
if err != nil {
return err
}
Expand Down Expand Up @@ -369,7 +369,7 @@ func RunRestoreTiflashReplica(c context.Context, g glue.Glue, cmdName string, cf
ctx, cancel := context.WithCancel(c)
defer cancel()

mgr, err := newMgr(ctx, g, cfg.PD, cfg.TLS, cfg.CheckRequirements)
mgr, err := NewMgr(ctx, g, cfg.PD, cfg.TLS, cfg.CheckRequirements)
if err != nil {
return err
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/task/restore_log.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ func RunLogRestore(c context.Context, g glue.Glue, cfg *LogRestoreConfig) error
ctx, cancel := context.WithCancel(c)
defer cancel()

mgr, err := newMgr(ctx, g, cfg.PD, cfg.TLS, cfg.CheckRequirements)
mgr, err := NewMgr(ctx, g, cfg.PD, cfg.TLS, cfg.CheckRequirements)
if err != nil {
return err
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/task/restore_raw.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ func RunRestoreRaw(c context.Context, g glue.Glue, cmdName string, cfg *RestoreR
ctx, cancel := context.WithCancel(c)
defer cancel()

mgr, err := newMgr(ctx, g, cfg.PD, cfg.TLS, cfg.CheckRequirements)
mgr, err := NewMgr(ctx, g, cfg.PD, cfg.TLS, cfg.CheckRequirements)
if err != nil {
return err
}
Expand Down
29 changes: 27 additions & 2 deletions tests/br_other/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -95,8 +95,33 @@ fi

# make sure we won't stuck in non-scheduler state, even we send a SIGTERM to it.
# give enough time to BR so it can gracefully stop.
sleep 10
! curl http://$PD_ADDR/pd/api/v1/config/schedule | grep '"disable": true'
sleep 5
if curl http://$PD_ADDR/pd/api/v1/config/schedule | jq '[."schedulers-v2"][0][0]' | grep -q '"disable": false'
then
echo "TEST: [$TEST_NAME] failed because scheduler has not been removed"
exit 1
fi

pd_settings=5
# we need reset pd scheduler/config to default
# until pd has the solution to temporary set these scheduler/configs.
run_br validate reset-pd-config-as-default

# max-merge-region-size set to default 20
curl http://$PD_ADDR/pd/api/v1/config/schedule | jq '."max-merge-region-size"' | grep "20" || ((pd_settings--))
# max-merge-region-keys set to default 200000
curl http://$PD_ADDR/pd/api/v1/config/schedule | jq '."max-merge-region-keys"' | grep "200000" || ((pd_settings--))
# balance-region scheduler enabled
curl http://$PD_ADDR/pd/api/v1/config/schedule | jq '[."schedulers-v2"][0][0]' | grep '"disable": false' || ((pd_settings--))
# balance-leader scheduler enabled
curl http://$PD_ADDR/pd/api/v1/config/schedule | jq '[."schedulers-v2"][0][1]' | grep '"disable": false' || ((pd_settings--))
# hot region scheduler enabled
curl http://$PD_ADDR/pd/api/v1/config/schedule | jq '[."schedulers-v2"][0][2]' | grep '"disable": false' || ((pd_settings--))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Try jq '."schedulers-v2"[] | {disable: .disable, type: ."type" | select (.=="hot-region")}'


if [ "$pd_settings" -ne "5" ];then
echo "TEST: [$TEST_NAME] test validate reset pd config failed!"
exit 1
fi

run_sql "DROP DATABASE $DB;"

Expand Down