Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

op-node: handle sequencer errors with derivation-levels, reset when L1 origins are inconsistent #4930

Merged
merged 3 commits into from
Feb 24, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
op-node: fix-sequencer-err-handling PR 4930 suggestions
  • Loading branch information
protolambda committed Feb 24, 2023
commit 91516a6d2d42bcca817413b6341a2c1ddd429af5
6 changes: 4 additions & 2 deletions op-e2e/actions/l2_sequencer.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"github.com/stretchr/testify/require"

"github.com/ethereum-optimism/optimism/op-node/eth"
"github.com/ethereum-optimism/optimism/op-node/metrics"
"github.com/ethereum-optimism/optimism/op-node/rollup"
"github.com/ethereum-optimism/optimism/op-node/rollup/derive"
"github.com/ethereum-optimism/optimism/op-node/rollup/driver"
Expand Down Expand Up @@ -47,7 +48,7 @@ func NewL2Sequencer(t Testing, log log.Logger, l1 derive.L1Fetcher, eng L2API, c
}
return &L2Sequencer{
L2Verifier: *ver,
sequencer: driver.NewSequencer(log, cfg, ver.derivation, attrBuilder, l1OriginSelector),
sequencer: driver.NewSequencer(log, cfg, ver.derivation, attrBuilder, l1OriginSelector, metrics.NoopMetrics),
mockL1OriginSelector: l1OriginSelector,
failL2GossipUnsafeBlock: nil,
}
Expand Down Expand Up @@ -121,7 +122,7 @@ func (s *L2Sequencer) ActBuildToL1Head(t Testing) {
// ActBuildToL1HeadUnsafe builds empty blocks until (incl.) the L1 head becomes the L1 origin of the L2 head
func (s *L2Sequencer) ActBuildToL1HeadUnsafe(t Testing) {
for s.derivation.UnsafeL2Head().L1Origin.Number < s.l1State.L1Head().Number {
// Note: the
// Note: the derivation pipeline does not run, we are just sequencing a block on top of the existing L2 chain.
s.ActL2StartBlock(t)
s.ActL2EndBlock(t)
}
Expand All @@ -144,6 +145,7 @@ func (s *L2Sequencer) ActBuildToL1HeadExcl(t Testing) {
// ActBuildToL1HeadExclUnsafe builds empty blocks until (excl.) the L1 head becomes the L1 origin of the L2 head, without safe-head progression.
func (s *L2Sequencer) ActBuildToL1HeadExclUnsafe(t Testing) {
for {
// Note: the derivation pipeline does not run, we are just sequencing a block on top of the existing L2 chain.
nextOrigin, err := s.mockL1OriginSelector.FindL1Origin(t.Ctx(), s.derivation.UnsafeL2Head())
require.NoError(t, err)
if nextOrigin.Number >= s.l1State.L1Head().Number {
Expand Down
7 changes: 1 addition & 6 deletions op-e2e/actions/l2_sequencer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,12 +108,7 @@ func TestL2Sequencer_SequencerDrift(gt *testing.T) {
// while the verifier-codepath only ever sees the valid post-reorg L1 chain.
func TestL2Sequencer_SequencerOnlyReorg(gt *testing.T) {
t := NewDefaultTesting(gt)
p := &e2eutils.TestParams{
MaxSequencerDrift: 20, // larger than L1 block time we simulate in this test (12)
SequencerWindowSize: 24,
ChannelTimeout: 20,
}
dp := e2eutils.MakeDeployParams(t, p)
dp := e2eutils.MakeDeployParams(t, defaultRollupTestParams)
sd := e2eutils.Setup(t, dp, defaultAlloc)
log := testlog.Logger(t, log.LvlDebug)
miner, _, sequencer := setupSequencerTest(t, sd, log)
Expand Down
24 changes: 24 additions & 0 deletions op-node/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@ type Metricer interface {
RecordUnsafePayloadsBuffer(length uint64, memSize uint64, next eth.BlockID)
CountSequencedTxs(count int)
RecordL1ReorgDepth(d uint64)
RecordSequencerInconsistentL1Origin(from eth.BlockID, to eth.BlockID)
RecordSequencerReset()
RecordGossipEvent(evType int32)
IncPeerCount()
DecPeerCount()
Expand Down Expand Up @@ -86,6 +88,9 @@ type Metrics struct {
SequencingErrors *EventMetrics
PublishingErrors *EventMetrics

SequencerInconsistentL1Origin *EventMetrics
SequencerResets *EventMetrics

SequencerBuildingDiffDurationSeconds prometheus.Histogram
SequencerBuildingDiffTotal prometheus.Counter

Expand Down Expand Up @@ -204,6 +209,9 @@ func NewMetrics(procName string) *Metrics {
SequencingErrors: NewEventMetrics(factory, ns, "sequencing_errors", "sequencing errors"),
PublishingErrors: NewEventMetrics(factory, ns, "publishing_errors", "p2p publishing errors"),

SequencerInconsistentL1Origin: NewEventMetrics(factory, ns, "sequencer_inconsistent_l1_origin", "events when the sequencer selects an inconsistent L1 origin"),
SequencerResets: NewEventMetrics(factory, ns, "sequencer_resets", "sequencer resets"),

UnsafePayloadsBufferLen: factory.NewGauge(prometheus.GaugeOpts{
Namespace: ns,
Name: "unsafe_payloads_buffer_len",
Expand Down Expand Up @@ -455,6 +463,16 @@ func (m *Metrics) RecordL1ReorgDepth(d uint64) {
m.L1ReorgDepth.Observe(float64(d))
}

func (m *Metrics) RecordSequencerInconsistentL1Origin(from eth.BlockID, to eth.BlockID) {
m.SequencerInconsistentL1Origin.RecordEvent()
m.recordRef("l1_origin", "inconsistent_from", from.Number, 0, from.Hash)
m.recordRef("l1_origin", "inconsistent_to", to.Number, 0, to.Hash)
}

func (m *Metrics) RecordSequencerReset() {
m.SequencerResets.RecordEvent()
}

func (m *Metrics) RecordGossipEvent(evType int32) {
m.GossipEventsTotal.WithLabelValues(pb.TraceEvent_Type_name[evType]).Inc()
}
Expand Down Expand Up @@ -584,6 +602,12 @@ func (n *noopMetricer) CountSequencedTxs(count int) {
func (n *noopMetricer) RecordL1ReorgDepth(d uint64) {
}

func (n *noopMetricer) RecordSequencerInconsistentL1Origin(from eth.BlockID, to eth.BlockID) {
}

func (n *noopMetricer) RecordSequencerReset() {
}

func (n *noopMetricer) RecordGossipEvent(evType int32) {
}

Expand Down
3 changes: 3 additions & 0 deletions op-node/rollup/derive/pipeline.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ type L1Fetcher interface {
L1TransactionFetcher
}

protolambda marked this conversation as resolved.
Show resolved Hide resolved
// ResettableEngineControl wraps EngineControl with reset-functionality,
// which handles reorgs like the derivation pipeline:
// by determining the last valid block references to continue from.
type ResettableEngineControl interface {
EngineControl
Reset()
Expand Down
3 changes: 2 additions & 1 deletion op-node/rollup/driver/driver.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ type Metrics interface {
RecordL1ReorgDepth(d uint64)

EngineMetrics
SequencerMetrics
}

type L1Chain interface {
Expand Down Expand Up @@ -88,7 +89,7 @@ func NewDriver(driverCfg *Config, cfg *rollup.Config, l2 L2Chain, l1 L1Chain, ne
attrBuilder := derive.NewFetchingAttributesBuilder(cfg, l1, l2)
engine := derivationPipeline
meteredEngine := NewMeteredEngine(cfg, engine, metrics, log)
sequencer := NewSequencer(log, cfg, meteredEngine, attrBuilder, findL1Origin)
sequencer := NewSequencer(log, cfg, meteredEngine, attrBuilder, findL1Origin, metrics)

return &Driver{
l1State: l1State,
Expand Down
2 changes: 1 addition & 1 deletion op-node/rollup/driver/metered_engine.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ type MeteredEngine struct {
buildingStartTime time.Time
}

// MeteredEngine implements derive.EngineControl
// MeteredEngine implements derive.ResettableEngineControl
var _ derive.ResettableEngineControl = (*MeteredEngine)(nil)

func NewMeteredEngine(cfg *rollup.Config, inner derive.ResettableEngineControl, metrics EngineMetrics, log log.Logger) *MeteredEngine {
Expand Down
14 changes: 13 additions & 1 deletion op-node/rollup/driver/sequencer.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,11 @@ type L1OriginSelectorIface interface {
FindL1Origin(ctx context.Context, l2Head eth.L2BlockRef) (eth.L1BlockRef, error)
}

type SequencerMetrics interface {
RecordSequencerInconsistentL1Origin(from eth.BlockID, to eth.BlockID)
RecordSequencerReset()
}

// Sequencer implements the sequencing interface of the driver: it starts and completes block building jobs.
type Sequencer struct {
log log.Logger
Expand All @@ -34,20 +39,23 @@ type Sequencer struct {
attrBuilder derive.AttributesBuilder
l1OriginSelector L1OriginSelectorIface

metrics SequencerMetrics

// timeNow enables sequencer testing to mock the time
timeNow func() time.Time

nextAction time.Time
}

func NewSequencer(log log.Logger, cfg *rollup.Config, engine derive.ResettableEngineControl, attributesBuilder derive.AttributesBuilder, l1OriginSelector L1OriginSelectorIface) *Sequencer {
func NewSequencer(log log.Logger, cfg *rollup.Config, engine derive.ResettableEngineControl, attributesBuilder derive.AttributesBuilder, l1OriginSelector L1OriginSelectorIface, metrics SequencerMetrics) *Sequencer {
return &Sequencer{
log: log,
config: cfg,
engine: engine,
timeNow: time.Now,
attrBuilder: attributesBuilder,
l1OriginSelector: l1OriginSelector,
metrics: metrics,
}
}

Expand All @@ -63,6 +71,7 @@ func (d *Sequencer) StartBuildingBlock(ctx context.Context) error {
}

if !(l2Head.L1Origin.Hash == l1Origin.ParentHash || l2Head.L1Origin.Hash == l1Origin.Hash) {
d.metrics.RecordSequencerInconsistentL1Origin(l2Head.L1Origin, l1Origin.ID())
return derive.NewResetError(fmt.Errorf("cannot build new L2 block with L1 origin %s (parent L1 %s) on current L2 head %s with L1 origin %s", l1Origin, l1Origin.ParentHash, l2Head, l2Head.L1Origin))
}

Expand Down Expand Up @@ -169,6 +178,7 @@ func (d *Sequencer) RunNextSequencerAction(ctx context.Context) (*eth.ExecutionP
return nil, err // bubble up critical errors.
} else if errors.Is(err, derive.ErrReset) {
d.log.Error("sequencer failed to seal new block, requiring derivation reset", "err", err)
d.metrics.RecordSequencerReset()
d.nextAction = d.timeNow().Add(time.Second * time.Duration(d.config.BlockTime)) // hold off from sequencing for a full block
if buildingID != (eth.PayloadID{}) { // cancel what we were doing
protolambda marked this conversation as resolved.
Show resolved Hide resolved
d.CancelBuildingBlock(ctx)
Expand All @@ -180,6 +190,7 @@ func (d *Sequencer) RunNextSequencerAction(ctx context.Context) (*eth.ExecutionP
// Any unfinished block building work eventually times out, and will be cleaned up that way.
} else {
d.log.Error("sequencer failed to seal block with unclassified error", "err", err)
d.nextAction = d.timeNow().Add(time.Second)
if buildingID != (eth.PayloadID{}) { // don't keep stale block building jobs around, try to cancel them
d.CancelBuildingBlock(ctx)
}
Expand All @@ -196,6 +207,7 @@ func (d *Sequencer) RunNextSequencerAction(ctx context.Context) (*eth.ExecutionP
return nil, err
} else if errors.Is(err, derive.ErrReset) {
d.log.Error("sequencer failed to seal new block, requiring derivation reset", "err", err)
d.metrics.RecordSequencerReset()
d.nextAction = d.timeNow().Add(time.Second * time.Duration(d.config.BlockTime)) // hold off from sequencing for a full block
d.engine.Reset()
} else if errors.Is(err, derive.ErrTemporary) {
Expand Down
3 changes: 2 additions & 1 deletion op-node/rollup/driver/sequencer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
"github.com/stretchr/testify/require"

"github.com/ethereum-optimism/optimism/op-node/eth"
"github.com/ethereum-optimism/optimism/op-node/metrics"
"github.com/ethereum-optimism/optimism/op-node/rollup"
"github.com/ethereum-optimism/optimism/op-node/rollup/derive"
"github.com/ethereum-optimism/optimism/op-node/testlog"
Expand Down Expand Up @@ -301,7 +302,7 @@ func TestSequencerChaosMonkey(t *testing.T) {
}
})

seq := NewSequencer(log, cfg, engControl, attrBuilder, originSelector)
seq := NewSequencer(log, cfg, engControl, attrBuilder, originSelector, metrics.NoopMetrics)
seq.timeNow = clockFn

// try to build 1000 blocks, with 5x as many planning attempts, to handle errors and clock problems
Expand Down