Skip to content
This repository has been archived by the owner on Jul 24, 2024. It is now read-only.

restore: support online restore #114

Merged
merged 33 commits into from
Mar 13, 2020
Merged
Show file tree
Hide file tree
Changes from 32 commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
e47560d
*: update tidb
disksing Dec 13, 2019
7148fa4
Merge branch 'master' into disksing/update-tidb
disksing Dec 13, 2019
b36fd13
fix test
disksing Dec 13, 2019
4f19e9e
fix deprecated config
disksing Dec 13, 2019
cd14fec
Merge branch 'master' into disksing/update-tidb
disksing Dec 13, 2019
e0aad97
fix panic
disksing Dec 13, 2019
8ce26ac
restore: support online restore
disksing Dec 16, 2019
9b6c4b6
Merge branch 'master' into disksing/online-restore
disksing Dec 17, 2019
af99c96
add logs
disksing Dec 23, 2019
d64e9e2
Merge branch 'master' into disksing/online-restore
disksing Dec 23, 2019
1a1e755
fix key codec and log
disksing Dec 26, 2019
de6de8f
Merge branch 'master' into disksing/online-restore
disksing Dec 26, 2019
b4aefb9
Merge branch 'master' into disksing/online-restore
disksing Feb 20, 2020
2e9f38d
merge master
disksing Feb 25, 2020
ff52237
resolve conflicts
disksing Feb 25, 2020
8539fb9
Merge branch 'master' into disksing/online-restore
disksing Feb 25, 2020
5f12413
fix import
disksing Feb 25, 2020
b884609
add online restore tests
disksing Feb 26, 2020
ee4217e
add log
disksing Feb 26, 2020
0da88b3
add log
disksing Feb 26, 2020
e317849
fix path
disksing Feb 26, 2020
c8fe26d
update pd-ctl usage
disksing Feb 26, 2020
cb95c14
cleanup log
disksing Feb 26, 2020
dd272d3
update restore_util dependency
disksing Feb 27, 2020
23dc3ec
Merge branch 'master' into disksing/online-restore
disksing Feb 27, 2020
b18f7e1
fix compile
disksing Feb 27, 2020
8b8290f
go mod tidy
disksing Feb 27, 2020
ed6c35d
Merge branch 'master' into disksing/online-restore
disksing Mar 2, 2020
32e6f40
Merge branch 'master' into disksing/online-restore
disksing Mar 4, 2020
8c54b53
address comment
disksing Mar 6, 2020
c03aa62
Merge branch 'master' into disksing/online-restore
disksing Mar 6, 2020
2db0475
fix compile
disksing Mar 9, 2020
317b145
Merge branch 'master' into disksing/online-restore
disksing Mar 13, 2020
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,11 @@ require (
github.com/onsi/gomega v1.8.1 // indirect
github.com/pingcap/check v0.0.0-20200212061837-5e12011dc712
github.com/pingcap/errors v0.11.5-0.20190809092503-95897b64e011
github.com/pingcap/kvproto v0.0.0-20200217103621-528e82bf7248
github.com/pingcap/kvproto v0.0.0-20200221125103-35b65c96516e
github.com/pingcap/log v0.0.0-20200117041106-d28c14d3b1cd
github.com/pingcap/parser v0.0.0-20200218113622-517beb2e39c2
github.com/pingcap/pd v1.1.0-beta.0.20200106144140-f5a7aa985497
github.com/pingcap/tidb v1.1.0-beta.0.20200223044457-aedea3ec5e1e
github.com/pingcap/tidb v1.1.0-beta.0.20200225081522-143b3bd72848
github.com/pingcap/tidb-tools v4.0.0-beta+incompatible
github.com/pingcap/tipb v0.0.0-20200212061130-c4d518eb1d60
github.com/prometheus/client_golang v1.0.0
Expand Down
8 changes: 4 additions & 4 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -309,8 +309,8 @@ github.com/pingcap/goleveldb v0.0.0-20191226122134-f82aafb29989 h1:surzm05a8C9dN
github.com/pingcap/goleveldb v0.0.0-20191226122134-f82aafb29989/go.mod h1:O17XtbryoCJhkKGbT62+L2OlrniwqiGLSqrmdHCMzZw=
github.com/pingcap/kvproto v0.0.0-20191211054548-3c6b38ea5107/go.mod h1:WWLmULLO7l8IOcQG+t+ItJ3fEcrL5FxF0Wu+HrMy26w=
github.com/pingcap/kvproto v0.0.0-20191213111810-93cb7c623c8b/go.mod h1:WWLmULLO7l8IOcQG+t+ItJ3fEcrL5FxF0Wu+HrMy26w=
github.com/pingcap/kvproto v0.0.0-20200217103621-528e82bf7248 h1:DhGKu4ACa5v0Z70J1NWrc9ti+OqihhxmyzsK7YDTpVQ=
github.com/pingcap/kvproto v0.0.0-20200217103621-528e82bf7248/go.mod h1:IOdRDPLyda8GX2hE/jO7gqaCV/PNFh8BZQCQZXfIOqI=
github.com/pingcap/kvproto v0.0.0-20200221125103-35b65c96516e h1:z7j9uyuG/6I4god5h5NbsbMDSfhoOYAvVW6JxhwdHHw=
github.com/pingcap/kvproto v0.0.0-20200221125103-35b65c96516e/go.mod h1:IOdRDPLyda8GX2hE/jO7gqaCV/PNFh8BZQCQZXfIOqI=
github.com/pingcap/log v0.0.0-20191012051959-b742a5d432e9 h1:AJD9pZYm72vMgPcQDww9rkZ1DnWfl0pXV3BOWlkYIjA=
github.com/pingcap/log v0.0.0-20191012051959-b742a5d432e9/go.mod h1:4rbK1p9ILyIfb6hU7OG2CiWSqMXnp3JMbiaVJ6mvoY8=
github.com/pingcap/log v0.0.0-20200117041106-d28c14d3b1cd h1:CV3VsP3Z02MVtdpTMfEgRJ4T9NGgGTxdHpJerent7rM=
Expand All @@ -323,8 +323,8 @@ github.com/pingcap/sysutil v0.0.0-20191216090214-5f9620d22b3b h1:EEyo/SCRswLGuSk
github.com/pingcap/sysutil v0.0.0-20191216090214-5f9620d22b3b/go.mod h1:EB/852NMQ+aRKioCpToQ94Wl7fktV+FNnxf3CX/TTXI=
github.com/pingcap/sysutil v0.0.0-20200206130906-2bfa6dc40bcd h1:k7CIHMFVKjHsda3PKkiN4zv++NEnexlUwiJEhryWpG0=
github.com/pingcap/sysutil v0.0.0-20200206130906-2bfa6dc40bcd/go.mod h1:EB/852NMQ+aRKioCpToQ94Wl7fktV+FNnxf3CX/TTXI=
github.com/pingcap/tidb v1.1.0-beta.0.20200223044457-aedea3ec5e1e h1:HPSJdnkI6mt0qEIbSkJzVsq99929Ki5VblkJMmlqhI0=
github.com/pingcap/tidb v1.1.0-beta.0.20200223044457-aedea3ec5e1e/go.mod h1:zzO/kysmwHMkr0caH2NmuSAKLdsySXKDQGTCYrb7Gx8=
github.com/pingcap/tidb v1.1.0-beta.0.20200225081522-143b3bd72848 h1:10dV8P+KWfSIG0cX2OINtfh17HxPK8ZTS4vztyIuMjc=
github.com/pingcap/tidb v1.1.0-beta.0.20200225081522-143b3bd72848/go.mod h1:eg/iVKxfMTQLn81eKT9yrczAhnWQtt0z7jnt3QjbTfM=
github.com/pingcap/tidb-tools v3.0.6-0.20191106033616-90632dda3863+incompatible h1:H1jg0aDWz2SLRh3hNBo2HFtnuHtudIUvBumU7syRkic=
github.com/pingcap/tidb-tools v3.0.6-0.20191106033616-90632dda3863+incompatible/go.mod h1:XGdcy9+yqlDSEMTpOXnwf3hiTeqrV6MN/u1se9N8yIM=
github.com/pingcap/tidb-tools v4.0.0-beta+incompatible h1:+XJdcVLCM8GDgXiMS6lFV59N3XPVOqtNHeWNLVrb2pg=
Expand Down
174 changes: 169 additions & 5 deletions pkg/restore/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,21 +3,28 @@ package restore
import (
"context"
"crypto/tls"
"encoding/hex"
"encoding/json"
"fmt"
"math"
"sort"
"strconv"
"sync"
"time"

"github.com/pingcap/errors"
"github.com/pingcap/kvproto/pkg/backup"
"github.com/pingcap/kvproto/pkg/import_sstpb"
"github.com/pingcap/kvproto/pkg/metapb"
"github.com/pingcap/log"
"github.com/pingcap/parser/model"
pd "github.com/pingcap/pd/client"
"github.com/pingcap/pd/server/schedule/placement"
"github.com/pingcap/tidb/domain"
"github.com/pingcap/tidb/kv"
"github.com/pingcap/tidb/store/tikv/oracle"
"github.com/pingcap/tidb/tablecodec"
"github.com/pingcap/tidb/util/codec"
"go.uber.org/zap"
"google.golang.org/grpc"
"google.golang.org/grpc/backoff"
Expand All @@ -40,6 +47,7 @@ type Client struct {
cancel context.CancelFunc

pdClient pd.Client
toolClient SplitClient
fileImporter FileImporter
workerPool *utils.WorkerPool
tlsConf *tls.Config
Expand All @@ -51,6 +59,8 @@ type Client struct {
rateLimit uint64
isOnline bool
hasSpeedLimited bool

restoreStores []uint64
}

// NewRestoreClient returns a new RestoreClient
Expand All @@ -69,11 +79,12 @@ func NewRestoreClient(
}

return &Client{
ctx: ctx,
cancel: cancel,
pdClient: pdClient,
db: db,
tlsConf: tlsConf,
ctx: ctx,
cancel: cancel,
pdClient: pdClient,
toolClient: NewSplitClient(pdClient, tlsConf),
db: db,
tlsConf: tlsConf,
}, nil
}

Expand Down Expand Up @@ -455,6 +466,159 @@ func (rc *Client) ValidateChecksum(
return nil
}

const (
restoreLabelKey = "exclusive"
restoreLabelValue = "restore"
)

// LoadRestoreStores loads the stores used to restore data.
func (rc *Client) LoadRestoreStores(ctx context.Context) error {
if !rc.isOnline {
return nil
}

stores, err := rc.pdClient.GetAllStores(ctx)
if err != nil {
return err
}
for _, s := range stores {
if s.GetState() != metapb.StoreState_Up {
continue
}
for _, l := range s.GetLabels() {
if l.GetKey() == restoreLabelKey && l.GetValue() == restoreLabelValue {
rc.restoreStores = append(rc.restoreStores, s.GetId())
break
}
}
}
log.Info("load restore stores", zap.Uint64s("store-ids", rc.restoreStores))
return nil
}

// ResetRestoreLabels removes the exclusive labels of the restore stores.
func (rc *Client) ResetRestoreLabels(ctx context.Context) error {
if !rc.isOnline {
return nil
}
log.Info("start reseting store labels")
return rc.toolClient.SetStoresLabel(ctx, rc.restoreStores, restoreLabelKey, "")
}

// SetupPlacementRules sets rules for the tables' regions.
func (rc *Client) SetupPlacementRules(ctx context.Context, tables []*model.TableInfo) error {
if !rc.isOnline || len(rc.restoreStores) == 0 {
3pointer marked this conversation as resolved.
Show resolved Hide resolved
return nil
}
log.Info("start setting placement rules")
rule, err := rc.toolClient.GetPlacementRule(ctx, "pd", "default")
if err != nil {
return err
}
rule.Index = 100
rule.Override = true
rule.LabelConstraints = append(rule.LabelConstraints, placement.LabelConstraint{
Key: restoreLabelKey,
Op: "in",
Values: []string{restoreLabelValue},
})
for _, t := range tables {
rule.ID = rc.getRuleID(t.ID)
rule.StartKeyHex = hex.EncodeToString(codec.EncodeBytes([]byte{}, tablecodec.EncodeTablePrefix(t.ID)))
rule.EndKeyHex = hex.EncodeToString(codec.EncodeBytes([]byte{}, tablecodec.EncodeTablePrefix(t.ID+1)))
err = rc.toolClient.SetPlacementRule(ctx, rule)
if err != nil {
return err
}
}
log.Info("finish setting placement rules")
return nil
}

// WaitPlacementSchedule waits PD to move tables to restore stores.
func (rc *Client) WaitPlacementSchedule(ctx context.Context, tables []*model.TableInfo) error {
if !rc.isOnline || len(rc.restoreStores) == 0 {
return nil
}
log.Info("start waiting placement schedule")
ticker := time.NewTicker(time.Second * 10)
defer ticker.Stop()
for {
select {
case <-ticker.C:
ok, progress, err := rc.checkRegions(ctx, tables)
if err != nil {
return err
}
if ok {
log.Info("finish waiting placement schedule")
return nil
}
log.Info("placement schedule progress: " + progress)
case <-ctx.Done():
return ctx.Err()
}
}
}

func (rc *Client) checkRegions(ctx context.Context, tables []*model.TableInfo) (bool, string, error) {
for i, t := range tables {
start := codec.EncodeBytes([]byte{}, tablecodec.EncodeTablePrefix(t.ID))
end := codec.EncodeBytes([]byte{}, tablecodec.EncodeTablePrefix(t.ID+1))
ok, regionProgress, err := rc.checkRange(ctx, start, end)
if err != nil {
return false, "", err
}
if !ok {
return false, fmt.Sprintf("table %v/%v, %s", i, len(tables), regionProgress), nil
}
}
return true, "", nil
}

func (rc *Client) checkRange(ctx context.Context, start, end []byte) (bool, string, error) {
regions, err := rc.toolClient.ScanRegions(ctx, start, end, -1)
3pointer marked this conversation as resolved.
Show resolved Hide resolved
if err != nil {
return false, "", err
}
for i, r := range regions {
NEXT_PEER:
for _, p := range r.Region.GetPeers() {
for _, storeID := range rc.restoreStores {
if p.GetStoreId() == storeID {
continue NEXT_PEER
}
}
return false, fmt.Sprintf("region %v/%v", i, len(regions)), nil
}
}
return true, "", nil
}

// ResetPlacementRules removes placement rules for tables.
func (rc *Client) ResetPlacementRules(ctx context.Context, tables []*model.TableInfo) error {
if !rc.isOnline || len(rc.restoreStores) == 0 {
return nil
}
log.Info("start reseting placement rules")
var failedTables []int64
for _, t := range tables {
err := rc.toolClient.DeletePlacementRule(ctx, "pd", rc.getRuleID(t.ID))
if err != nil {
3pointer marked this conversation as resolved.
Show resolved Hide resolved
log.Info("failed to delete placement rule for table", zap.Int64("table-id", t.ID))
failedTables = append(failedTables, t.ID)
}
}
if len(failedTables) > 0 {
return errors.Errorf("failed to delete placement rules for tables %v", failedTables)
}
return nil
}

func (rc *Client) getRuleID(tableID int64) string {
return "restore-t" + strconv.FormatInt(tableID, 10)
}

// IsIncremental returns whether this backup is incremental
func (rc *Client) IsIncremental() bool {
return !(rc.backupMeta.StartVersion == rc.backupMeta.EndVersion ||
Expand Down
41 changes: 41 additions & 0 deletions pkg/task/restore.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"github.com/pingcap/errors"
"github.com/pingcap/kvproto/pkg/backup"
"github.com/pingcap/log"
"github.com/pingcap/parser/model"
"github.com/pingcap/tidb-tools/pkg/filter"
"github.com/spf13/pflag"
"go.uber.org/zap"
Expand Down Expand Up @@ -90,6 +91,10 @@ func RunRestore(c context.Context, g glue.Glue, cmdName string, cfg *RestoreConf
if cfg.Online {
client.EnableOnline()
}
err = client.LoadRestoreStores(ctx)
if err != nil {
return err
}

defer summary.Summary(cmdName)

Expand Down Expand Up @@ -136,6 +141,10 @@ func RunRestore(c context.Context, g glue.Glue, cmdName string, cfg *RestoreConf
}
summary.CollectInt("restore ranges", len(ranges))

if err = splitPrepareWork(ctx, client, newTables); err != nil {
return err
}

ranges = restore.AttachFilesToRanges(files, ranges)

// Redirect to log if there is no log file to avoid unreadable output.
Expand Down Expand Up @@ -202,6 +211,10 @@ func RunRestore(c context.Context, g glue.Glue, cmdName string, cfg *RestoreConf
return err
}

if err = splitPostWork(ctx, client, newTables); err != nil {
return err
}

// Restore has finished.
close(updateCh)

Expand Down Expand Up @@ -304,3 +317,31 @@ func addPDLeaderScheduler(ctx context.Context, mgr *conn.Mgr, removedSchedulers
}
return nil
}

func splitPrepareWork(ctx context.Context, client *restore.Client, tables []*model.TableInfo) error {
err := client.SetupPlacementRules(ctx, tables)
if err != nil {
log.Error("setup placement rules failed", zap.Error(err))
return errors.Trace(err)
}

err = client.WaitPlacementSchedule(ctx, tables)
if err != nil {
log.Error("wait placement schedule failed", zap.Error(err))
return errors.Trace(err)
}
return nil
}

func splitPostWork(ctx context.Context, client *restore.Client, tables []*model.TableInfo) error {
err := client.ResetPlacementRules(ctx, tables)
if err != nil {
return errors.Trace(err)
}

err = client.ResetRestoreLabels(ctx)
if err != nil {
return errors.Trace(err)
}
return nil
}
54 changes: 54 additions & 0 deletions tests/br_db_online/run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
#!/bin/sh
#
# Copyright 2020 PingCAP, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# See the License for the specific language governing permissions and
# limitations under the License.

set -eu
DB="$TEST_NAME"

run_sql "CREATE DATABASE $DB;"

run_sql "CREATE TABLE $DB.usertable1 ( \
YCSB_KEY varchar(64) NOT NULL, \
FIELD0 varchar(1) DEFAULT NULL, \
PRIMARY KEY (YCSB_KEY) \
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin;"

run_sql "INSERT INTO $DB.usertable1 VALUES (\"a\", \"b\");"
run_sql "INSERT INTO $DB.usertable1 VALUES (\"aa\", \"b\");"

run_sql "CREATE TABLE $DB.usertable2 ( \
YCSB_KEY varchar(64) NOT NULL, \
FIELD0 varchar(1) DEFAULT NULL, \
PRIMARY KEY (YCSB_KEY) \
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin;"

run_sql "INSERT INTO $DB.usertable2 VALUES (\"c\", \"d\");"

# backup db
echo "backup start..."
run_br --pd $PD_ADDR backup db --db "$DB" -s "local://$TEST_DIR/$DB" --ratelimit 5 --concurrency 4

run_sql "DROP DATABASE $DB;"

# restore db
echo "restore start..."
run_br restore db --db $DB -s "local://$TEST_DIR/$DB" --pd $PD_ADDR --online

table_count=$(run_sql "use $DB; show tables;" | grep "Tables_in" | wc -l)
if [ "$table_count" -ne "2" ];then
echo "TEST: [$TEST_NAME] failed!"
exit 1
fi

run_sql "DROP DATABASE $DB;"
Loading