From 101368ff0e3d31cda131bfa39221cb486a4a7395 Mon Sep 17 00:00:00 2001 From: ti-srebot <66930949+ti-srebot@users.noreply.github.com> Date: Thu, 8 Sep 2022 13:00:56 +0800 Subject: [PATCH] lightning: add retryable error on checksum (#37691) (#37702) close pingcap/tidb#37690 --- br/pkg/lightning/common/retry.go | 31 +++++++++++++++++++-------- br/pkg/lightning/common/retry_test.go | 8 +++++++ 2 files changed, 30 insertions(+), 9 deletions(-) diff --git a/br/pkg/lightning/common/retry.go b/br/pkg/lightning/common/retry.go index a3dbd2dd539e2..074edaebf9ce3 100644 --- a/br/pkg/lightning/common/retry.go +++ b/br/pkg/lightning/common/retry.go @@ -25,8 +25,8 @@ import ( "github.com/go-sql-driver/mysql" "github.com/pingcap/errors" - berrors "github.com/pingcap/tidb/br/pkg/errors" tmysql "github.com/pingcap/tidb/errno" + drivererr "github.com/pingcap/tidb/store/driver/error" "google.golang.org/grpc/codes" "google.golang.org/grpc/status" ) @@ -67,6 +67,26 @@ func IsRetryableError(err error) bool { return true } +var retryableErrorIDs = map[errors.ErrorID]struct{}{ + ErrKVEpochNotMatch.ID(): {}, + ErrKVNotLeader.ID(): {}, + ErrKVRegionNotFound.ID(): {}, + // common.ErrKVServerIsBusy is a little duplication with tmysql.ErrTiKVServerBusy + // it's because the response of sst.ingest gives us a sst.IngestResponse which doesn't contain error code, + // so we have to transform it into a defined code + ErrKVServerIsBusy.ID(): {}, + ErrKVReadIndexNotReady.ID(): {}, + ErrKVIngestFailed.ID(): {}, + ErrKVRaftProposalDropped.ID(): {}, + // during checksum coprocessor will transform error into driver error in handleCopResponse using ToTiDBErr + // met ErrRegionUnavailable on free-tier import during checksum, others hasn't met yet + drivererr.ErrRegionUnavailable.ID(): {}, + drivererr.ErrTiKVStaleCommand.ID(): {}, + drivererr.ErrTiKVServerTimeout.ID(): {}, + drivererr.ErrTiKVServerBusy.ID(): {}, + drivererr.ErrUnknown.ID(): {}, +} + func isSingleRetryableError(err error) bool { err = errors.Cause(err) @@ -101,14 +121,7 @@ func isSingleRetryableError(err error) bool { return false } case *errors.Error: - switch { - case berrors.Is(nerr, ErrKVEpochNotMatch), berrors.Is(nerr, ErrKVNotLeader), - berrors.Is(nerr, ErrKVRegionNotFound), berrors.Is(nerr, ErrKVServerIsBusy), - berrors.Is(nerr, ErrKVReadIndexNotReady), berrors.Is(nerr, ErrKVIngestFailed), - berrors.Is(nerr, ErrKVRaftProposalDropped): - // common.ErrKVServerIsBusy is a little duplication with tmysql.ErrTiKVServerBusy - // it's because the response of sst.ingest gives us a sst.IngestResponse which doesn't contain error code, - // so we have to transform it into a defined code + if _, ok := retryableErrorIDs[nerr.ID()]; ok { return true } return false diff --git a/br/pkg/lightning/common/retry_test.go b/br/pkg/lightning/common/retry_test.go index 78719f28c53a5..5d7369e272262 100644 --- a/br/pkg/lightning/common/retry_test.go +++ b/br/pkg/lightning/common/retry_test.go @@ -24,6 +24,7 @@ import ( "github.com/go-sql-driver/mysql" "github.com/pingcap/errors" tmysql "github.com/pingcap/tidb/errno" + drivererr "github.com/pingcap/tidb/store/driver/error" "github.com/stretchr/testify/require" "go.uber.org/multierr" "google.golang.org/grpc/codes" @@ -54,6 +55,13 @@ func TestIsRetryableError(t *testing.T) { require.True(t, IsRetryableError(ErrKVIngestFailed.GenWithStack("test"))) require.True(t, IsRetryableError(ErrKVRaftProposalDropped.GenWithStack("test"))) + // tidb error + require.True(t, IsRetryableError(drivererr.ErrRegionUnavailable)) + require.True(t, IsRetryableError(drivererr.ErrTiKVStaleCommand)) + require.True(t, IsRetryableError(drivererr.ErrTiKVServerTimeout)) + require.True(t, IsRetryableError(drivererr.ErrTiKVServerBusy)) + require.True(t, IsRetryableError(drivererr.ErrUnknown)) + // net: connection refused _, err := net.Dial("tcp", "localhost:65533") require.Error(t, err)