From 8c5ca7b2008f6b6ee3a1a4c42de08d651e359cca Mon Sep 17 00:00:00 2001 From: 3pointer Date: Fri, 28 Jul 2023 16:06:05 +0800 Subject: [PATCH] restore: precheck cluster is empty when first time full restore (#45014) close pingcap/tidb#35744 --- br/cmd/br/restore.go | 6 +++--- br/pkg/backup/push.go | 2 +- br/pkg/task/restore.go | 38 +++++++++++++++++++-------------- br/tests/_utils/run_br | 2 +- br/tests/br_backup_empty/run.sh | 3 +++ br/tests/br_full_ddl/run.sh | 1 + br/tests/br_systables/run.sh | 2 +- 7 files changed, 32 insertions(+), 22 deletions(-) diff --git a/br/cmd/br/restore.go b/br/cmd/br/restore.go index 96f5a048ee042..41635e9fc1e25 100644 --- a/br/cmd/br/restore.go +++ b/br/cmd/br/restore.go @@ -77,12 +77,12 @@ func printWorkaroundOnFullRestoreError(command *cobra.Command, err error) { fmt.Println("#######################################################################") switch { case errors.ErrorEqual(err, berrors.ErrRestoreNotFreshCluster): - fmt.Println("# the target cluster is not fresh, br cannot restore system tables.") + fmt.Println("# the target cluster is not fresh, cannot restore.") + fmt.Println("# you can drop existing databases and tables and start restore again") case errors.ErrorEqual(err, berrors.ErrRestoreIncompatibleSys): fmt.Println("# the target cluster is not compatible with the backup data,") - fmt.Println("# br cannot restore system tables.") + fmt.Println("# you can remove 'with-sys-table' flag to skip restoring system tables") } - fmt.Println("# you can remove 'with-sys-table' flag to skip restoring system tables") fmt.Println("#######################################################################") } diff --git a/br/pkg/backup/push.go b/br/pkg/backup/push.go index 4713509024a25..83df6f46ff3a3 100644 --- a/br/pkg/backup/push.go +++ b/br/pkg/backup/push.go @@ -130,7 +130,7 @@ func (push *pushDown) pushBackup( } failpoint.Inject("backup-timeout-error", func(val failpoint.Value) { msg := val.(string) - logutil.CL(ctx).Debug("failpoint backup-timeout-error injected.", zap.String("msg", msg)) + logutil.CL(ctx).Info("failpoint backup-timeout-error injected.", zap.String("msg", msg)) resp.Error = &backuppb.Error{ Msg: msg, } diff --git a/br/pkg/task/restore.go b/br/pkg/task/restore.go index 975114d0e5835..b7a04f2426992 100644 --- a/br/pkg/task/restore.go +++ b/br/pkg/task/restore.go @@ -708,22 +708,6 @@ func runRestore(c context.Context, g glue.Glue, cmdName string, cfg *RestoreConf return errors.Trace(err) } - // todo: move this check into InitFullClusterRestore, we should move restore config into a separate package - // to avoid import cycle problem which we won't do it in this pr, then refactor this - // - // if it's point restore and reached here, then cmdName=FullRestoreCmd and len(cfg.FullBackupStorage) > 0 - if cmdName == FullRestoreCmd && cfg.WithSysTable { - client.InitFullClusterRestore(cfg.ExplicitFilter) - } - if client.IsFullClusterRestore() && client.HasBackedUpSysDB() { - if err = client.CheckTargetClusterFresh(ctx); err != nil { - return errors.Trace(err) - } - if err = client.CheckSysTableCompatibility(mgr.GetDomain(), tables); err != nil { - return errors.Trace(err) - } - } - if client.IsIncremental() { // don't support checkpoint for the ddl restore log.Info("the incremental snapshot restore doesn't support checkpoint mode, so unuse checkpoint.") @@ -769,6 +753,28 @@ func runRestore(c context.Context, g glue.Glue, cmdName string, cfg *RestoreConf }() } + if isFullRestore(cmdName) { + // we need check cluster is fresh every time. except restore from a checkpoint. + if client.IsFull() && len(checkpointSetWithTableID) == 0 { + if err = client.CheckTargetClusterFresh(ctx); err != nil { + return errors.Trace(err) + } + } + // todo: move this check into InitFullClusterRestore, we should move restore config into a separate package + // to avoid import cycle problem which we won't do it in this pr, then refactor this + // + // if it's point restore and reached here, then cmdName=FullRestoreCmd and len(cfg.FullBackupStorage) > 0 + if cfg.WithSysTable { + client.InitFullClusterRestore(cfg.ExplicitFilter) + } + } + + if client.IsFullClusterRestore() && client.HasBackedUpSysDB() { + if err = client.CheckSysTableCompatibility(mgr.GetDomain(), tables); err != nil { + return errors.Trace(err) + } + } + sp := utils.BRServiceSafePoint{ BackupTS: restoreTS, TTL: utils.DefaultBRGCSafePointTTL, diff --git a/br/tests/_utils/run_br b/br/tests/_utils/run_br index 5e6588ee3198c..4151e2e92ccda 100755 --- a/br/tests/_utils/run_br +++ b/br/tests/_utils/run_br @@ -17,7 +17,7 @@ set -eux br.test -test.coverprofile="$TEST_DIR/cov.$TEST_NAME.$$.out.log" DEVEL "$@" \ - -L "debug" \ + -L "info" \ --ca "$TEST_DIR/certs/ca.pem" \ --cert "$TEST_DIR/certs/br.pem" \ --key "$TEST_DIR/certs/br.key" diff --git a/br/tests/br_backup_empty/run.sh b/br/tests/br_backup_empty/run.sh index 0289f1d401068..2d1de4796a1d6 100644 --- a/br/tests/br_backup_empty/run.sh +++ b/br/tests/br_backup_empty/run.sh @@ -48,6 +48,7 @@ if [ $? -ne 0 ]; then exit 1 fi +i=1 while [ $i -le $DB_COUNT ]; do run_sql "DROP DATABASE $DB$i;" i=$(($i+1)) @@ -71,6 +72,7 @@ run_sql "CREATE TABLE ${DB}1.usertable1 ( \ echo "backup empty table start..." run_br --pd $PD_ADDR backup full -s "local://$TEST_DIR/empty_table" +i=1 while [ $i -le $DB_COUNT ]; do run_sql "DROP DATABASE $DB$i;" i=$(($i+1)) @@ -83,6 +85,7 @@ run_br --pd $PD_ADDR restore full -s "local://$TEST_DIR/empty_table" # insert one row to make sure table is restored. run_sql "INSERT INTO ${DB}1.usertable1 VALUES (\"a\", \"b\");" +i=1 while [ $i -le $DB_COUNT ]; do run_sql "DROP DATABASE $DB$i;" i=$(($i+1)) diff --git a/br/tests/br_full_ddl/run.sh b/br/tests/br_full_ddl/run.sh index 68ea7266a042f..2e18e280fb691 100755 --- a/br/tests/br_full_ddl/run.sh +++ b/br/tests/br_full_ddl/run.sh @@ -136,6 +136,7 @@ fi # clear restore environment run_sql "DROP DATABASE $DB;" +run_sql "DROP DATABASE __tidb_br_temporary_mysql;" # restore full echo "restore start..." export GO_FAILPOINTS="github.com/pingcap/tidb/br/pkg/pdutil/PDEnabledPauseConfig=return(true)" diff --git a/br/tests/br_systables/run.sh b/br/tests/br_systables/run.sh index e4074e6e4814c..c0d74ae369960 100644 --- a/br/tests/br_systables/run.sh +++ b/br/tests/br_systables/run.sh @@ -43,7 +43,7 @@ add_test_data() { } delete_test_data() { - run_sql "DROP TABLE usertest.test;" + run_sql "DROP DATABASE usertest;" } rollback_modify() {