From 130c189bae8df8b7ea046796b2576f16a7d3e7a8 Mon Sep 17 00:00:00 2001 From: Marius Posta Date: Mon, 7 Nov 2022 10:50:58 -0500 Subject: [PATCH] schemachanger: improve panic- and error handling Previously, the declarative schema changer would only recover from runtime errors in certain subsystems like building the targets or planning the execution of operations. Consequently an implementation bug leading to a runtime error in the execution layer would trigger a panic which would not be recovered and which would cause the whole process to crash. This commit fixes this by introducing a common error handler in the form of scerrors.HandleErrorOrPanic, which recovers from panics, wraps errors, and prints informative log messages in a uniform way, to be used at the top of (or near the top of) the declarative schema changer call stack. Fixes #91400. Release note (bug fix): fixed a bug in which panics triggered by certain DDL statements were not properly recovered, leading to the cluster node crashing. --- pkg/sql/schemachanger/scbuild/BUILD.bazel | 2 - pkg/sql/schemachanger/scbuild/build.go | 29 ++----------- pkg/sql/schemachanger/scerrors/BUILD.bazel | 2 + pkg/sql/schemachanger/scerrors/errors.go | 43 +++++++++++++++++++ pkg/sql/schemachanger/scplan/BUILD.bazel | 1 + pkg/sql/schemachanger/scplan/plan.go | 14 ++---- pkg/sql/schemachanger/scrun/BUILD.bazel | 3 +- pkg/sql/schemachanger/scrun/scrun.go | 25 +++++------ .../lint/passes/fmtsafe/functions.go | 2 + 9 files changed, 66 insertions(+), 55 deletions(-) diff --git a/pkg/sql/schemachanger/scbuild/BUILD.bazel b/pkg/sql/schemachanger/scbuild/BUILD.bazel index b152068161a0..75a7c4a9a84a 100644 --- a/pkg/sql/schemachanger/scbuild/BUILD.bazel +++ b/pkg/sql/schemachanger/scbuild/BUILD.bazel @@ -47,8 +47,6 @@ go_library( "//pkg/sql/sessiondatapb", "//pkg/sql/sqlerrors", "//pkg/sql/types", - "//pkg/util/log", - "//pkg/util/timeutil", "//pkg/util/uuid", "@com_github_cockroachdb_errors//:errors", "@com_github_cockroachdb_redact//:redact", diff --git a/pkg/sql/schemachanger/scbuild/build.go b/pkg/sql/schemachanger/scbuild/build.go index 2c76fc3c9da6..1651dc97b79e 100644 --- a/pkg/sql/schemachanger/scbuild/build.go +++ b/pkg/sql/schemachanger/scbuild/build.go @@ -12,7 +12,6 @@ package scbuild import ( "context" - "runtime" "github.com/cockroachdb/cockroach/pkg/settings/cluster" "github.com/cockroachdb/cockroach/pkg/sql/catalog" @@ -27,9 +26,6 @@ import ( "github.com/cockroachdb/cockroach/pkg/sql/sem/eval" "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" "github.com/cockroachdb/cockroach/pkg/sql/sessiondatapb" - "github.com/cockroachdb/cockroach/pkg/util/log" - "github.com/cockroachdb/cockroach/pkg/util/timeutil" - "github.com/cockroachdb/errors" ) // Build constructs a new state from an initial state and a statement. @@ -39,13 +35,9 @@ import ( func Build( ctx context.Context, dependencies Dependencies, initial scpb.CurrentState, n tree.Statement, ) (_ scpb.CurrentState, err error) { - start := timeutil.Now() - defer func() { - if err != nil || !log.ExpensiveLogEnabled(ctx, 2) { - return - } - log.Infof(ctx, "build for %s took %v", n.StatementTag(), timeutil.Since(start)) - }() + defer scerrors.HandleErrorOrPanic(ctx, &err, + "building declarative schema change targets for %s", n.StatementTag(), + )() initial = initial.DeepCopy() bs := newBuilderState(ctx, dependencies, initial) els := newEventLogState(dependencies, initial, n) @@ -64,21 +56,6 @@ func Build( TreeAnnotator: an, SchemaFeatureChecker: dependencies.FeatureChecker(), } - defer func() { - switch recErr := recover().(type) { - case nil: - // No error. - case runtime.Error: - err = errors.WithAssertionFailure(recErr) - case error: - err = recErr - default: - err = errors.AssertionFailedf( - "unexpected error encountered while building schema change plan %s", - recErr, - ) - } - }() scbuildstmt.Process(b, an.GetStatement()) an.ValidateAnnotations() els.statements[len(els.statements)-1].RedactedStatement = diff --git a/pkg/sql/schemachanger/scerrors/BUILD.bazel b/pkg/sql/schemachanger/scerrors/BUILD.bazel index 54658eb0a929..3e60521621fe 100644 --- a/pkg/sql/schemachanger/scerrors/BUILD.bazel +++ b/pkg/sql/schemachanger/scerrors/BUILD.bazel @@ -10,6 +10,8 @@ go_library( "//pkg/sql/catalog", "//pkg/sql/catalog/descpb", "//pkg/sql/sem/tree", + "//pkg/util/log", + "//pkg/util/timeutil", "@com_github_cockroachdb_errors//:errors", ], ) diff --git a/pkg/sql/schemachanger/scerrors/errors.go b/pkg/sql/schemachanger/scerrors/errors.go index 785b5db3978b..1ba387f9d22c 100644 --- a/pkg/sql/schemachanger/scerrors/errors.go +++ b/pkg/sql/schemachanger/scerrors/errors.go @@ -11,15 +11,58 @@ package scerrors import ( + "context" "fmt" + "runtime" "strings" + "time" "github.com/cockroachdb/cockroach/pkg/sql/catalog" "github.com/cockroachdb/cockroach/pkg/sql/catalog/descpb" "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" + "github.com/cockroachdb/cockroach/pkg/util/log" + "github.com/cockroachdb/cockroach/pkg/util/timeutil" "github.com/cockroachdb/errors" ) +// HandleErrorOrPanic generates a closure, intended to be deferred, to handle +// panics which reach all the way up to the declarative schema changer's entry +// points. +// Due to it being called at the entry points, this function also handles +// logging and wrapping errors, for convenience. +func HandleErrorOrPanic( + ctx context.Context, err *error, wrapMsgFmt string, wrapMsgArgs ...interface{}, +) func() { + isExpensive := log.ExpensiveLogEnabled(ctx, 2) + var start time.Time + if isExpensive { + start = timeutil.Now() + } + log.Infof(ctx, wrapMsgFmt, wrapMsgArgs...) + return func() { + switch recErr := recover().(type) { + case nil: + // No panicked error. + case runtime.Error: + *err = errors.WithAssertionFailure(recErr) + case error: + *err = recErr + default: + *err = errors.AssertionFailedf("recovered from uncategorizable panic: %v", recErr) + } + *err = errors.Wrapf(*err, wrapMsgFmt, wrapMsgArgs...) + wrapMsgFmt = "done " + wrapMsgFmt + if *err != nil { + wrapMsgFmt = wrapMsgFmt + " with error: %v" + wrapMsgArgs = append(wrapMsgArgs, *err) + } else if isExpensive { + wrapMsgFmt = wrapMsgFmt + "in %s" + wrapMsgArgs = append(wrapMsgArgs, timeutil.Since(start)) + } + log.Infof(ctx, wrapMsgFmt, wrapMsgArgs...) + } +} + type notImplementedError struct { n tree.NodeFormatter detail string diff --git a/pkg/sql/schemachanger/scplan/BUILD.bazel b/pkg/sql/schemachanger/scplan/BUILD.bazel index 7efd047612b5..db072f07e070 100644 --- a/pkg/sql/schemachanger/scplan/BUILD.bazel +++ b/pkg/sql/schemachanger/scplan/BUILD.bazel @@ -12,6 +12,7 @@ go_library( deps = [ "//pkg/clusterversion", "//pkg/jobs/jobspb", + "//pkg/sql/schemachanger/scerrors", "//pkg/sql/schemachanger/scop", "//pkg/sql/schemachanger/scpb", "//pkg/sql/schemachanger/scplan/internal/opgen", diff --git a/pkg/sql/schemachanger/scplan/plan.go b/pkg/sql/schemachanger/scplan/plan.go index 0bb0e47b1b4c..b76d35425763 100644 --- a/pkg/sql/schemachanger/scplan/plan.go +++ b/pkg/sql/schemachanger/scplan/plan.go @@ -15,6 +15,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/clusterversion" "github.com/cockroachdb/cockroach/pkg/jobs/jobspb" + "github.com/cockroachdb/cockroach/pkg/sql/schemachanger/scerrors" "github.com/cockroachdb/cockroach/pkg/sql/schemachanger/scop" "github.com/cockroachdb/cockroach/pkg/sql/schemachanger/scpb" "github.com/cockroachdb/cockroach/pkg/sql/schemachanger/scplan/internal/opgen" @@ -79,6 +80,9 @@ func (p Plan) StagesForCurrentPhase() []scstage.Stage { // MakePlan generates a Plan for a particular phase of a schema change, given // the initial state for a set of targets. Returns an error when planning fails. func MakePlan(ctx context.Context, initial scpb.CurrentState, params Params) (p Plan, err error) { + defer scerrors.HandleErrorOrPanic(ctx, &err, + "building declarative schema changer plan", + )() p = Plan{ CurrentState: initial, Params: params, @@ -91,16 +95,6 @@ func MakePlan(ctx context.Context, initial scpb.CurrentState, params Params) (p } func makePlan(ctx context.Context, p *Plan) (err error) { - defer func() { - if r := recover(); r != nil { - rAsErr, ok := r.(error) - if !ok { - rAsErr = errors.Errorf("panic during MakePlan: %v", r) - } - err = rAsErr - } - err = errors.WithAssertionFailure(err) - }() { start := timeutil.Now() p.Graph = buildGraph(ctx, p.Params.ActiveVersion, p.CurrentState) diff --git a/pkg/sql/schemachanger/scrun/BUILD.bazel b/pkg/sql/schemachanger/scrun/BUILD.bazel index 2255f7cccce5..db75cea6687d 100644 --- a/pkg/sql/schemachanger/scrun/BUILD.bazel +++ b/pkg/sql/schemachanger/scrun/BUILD.bazel @@ -21,9 +21,8 @@ go_library( "//pkg/sql/schemachanger/scop", "//pkg/sql/schemachanger/scpb", "//pkg/sql/schemachanger/scplan", - "//pkg/util/log", - "//pkg/util/timeutil", "@com_github_cockroachdb_errors//:errors", + "@com_github_cockroachdb_redact//:redact", ], ) diff --git a/pkg/sql/schemachanger/scrun/scrun.go b/pkg/sql/schemachanger/scrun/scrun.go index 0df19a0194db..b9352064750f 100644 --- a/pkg/sql/schemachanger/scrun/scrun.go +++ b/pkg/sql/schemachanger/scrun/scrun.go @@ -24,9 +24,8 @@ import ( "github.com/cockroachdb/cockroach/pkg/sql/schemachanger/scop" "github.com/cockroachdb/cockroach/pkg/sql/schemachanger/scpb" "github.com/cockroachdb/cockroach/pkg/sql/schemachanger/scplan" - "github.com/cockroachdb/cockroach/pkg/util/log" - "github.com/cockroachdb/cockroach/pkg/util/timeutil" "github.com/cockroachdb/errors" + "github.com/cockroachdb/redact" ) // RunStatementPhase executes in-transaction schema changes for the targeted @@ -158,22 +157,15 @@ func executeStage( stageIdx int, stage scplan.Stage, ) (err error) { - - log.Infof(ctx, "executing %s (rollback=%v)", stage, p.InRollback) - + defer scerrors.HandleErrorOrPanic(ctx, &err, + "executing declarative schema change %s (rollback=%v)", + redact.Safe(stage), redact.Safe(p.InRollback), + )() if knobs != nil && knobs.BeforeStage != nil { if err := knobs.BeforeStage(p, stageIdx); err != nil { return err } } - - start := timeutil.Now() - defer func() { - if log.ExpensiveLogEnabled(ctx, 2) { - log.Infof(ctx, "executing %s (rollback=%v) took %v: err = %v", - stage, p.InRollback, timeutil.Since(start), err) - } - }() if err := scexec.ExecuteStage(ctx, deps, stage.Ops()); err != nil { // Don't go through the effort to wrap the error if it's a retry or it's a // cancelation. @@ -209,7 +201,10 @@ func makeState( descriptorIDs []descpb.ID, rollback bool, withCatalog withCatalogFunc, -) (scpb.CurrentState, error) { +) (state scpb.CurrentState, err error) { + defer scerrors.HandleErrorOrPanic(ctx, &err, + "rebuilding declarative schema change state from descriptors %v", descriptorIDs, + )() descError := func(desc catalog.Descriptor, err error) error { return errors.Wrapf(err, "descriptor %q (%d)", desc.GetName(), desc.GetID()) } @@ -273,7 +268,7 @@ func makeState( }); err != nil { return scpb.CurrentState{}, err } - state, err := scpb.MakeCurrentStateFromDescriptors(descriptorStates) + state, err = scpb.MakeCurrentStateFromDescriptors(descriptorStates) if err != nil { return scpb.CurrentState{}, err } diff --git a/pkg/testutils/lint/passes/fmtsafe/functions.go b/pkg/testutils/lint/passes/fmtsafe/functions.go index d5a2ffa82b73..0432edb60e9c 100644 --- a/pkg/testutils/lint/passes/fmtsafe/functions.go +++ b/pkg/testutils/lint/passes/fmtsafe/functions.go @@ -138,6 +138,8 @@ var requireConstFmt = map[string]bool{ "(*github.com/cockroachdb/cockroach/pkg/sql/logictest.logicTest).Errorf": true, "(*github.com/cockroachdb/cockroach/pkg/sql/logictest.logicTest).Fatalf": true, + "github.com/cockroachdb/cockroach/pkg/sql/schemachanger/scerrors.HandleErrorOrPanic": true, + "github.com/cockroachdb/cockroach/pkg/server.serverErrorf": true, "github.com/cockroachdb/cockroach/pkg/server.guaranteedExitFatal": true,