Skip to content

Commit

Permalink
raft: add a batch of interaction-driven conf change tests
Browse files Browse the repository at this point in the history
Verifiy the behavior in various v1 and v2 conf change operations.
This also includes various fixups, notably it adds protection
against transitioning in and out of new configs when this is not
permissible.

There are more threads to pull, but those are left for future commits.
  • Loading branch information
tbg committed Aug 15, 2019
1 parent 4e19150 commit 06f17c1
Show file tree
Hide file tree
Showing 10 changed files with 782 additions and 27 deletions.
34 changes: 30 additions & 4 deletions raft/raft.go
Original file line number Diff line number Diff line change
Expand Up @@ -1036,10 +1036,36 @@ func stepLeader(r *raft, m pb.Message) error {

for i := range m.Entries {
e := &m.Entries[i]
if e.Type == pb.EntryConfChange || e.Type == pb.EntryConfChangeV2 {
if r.pendingConfIndex > r.raftLog.applied {
r.logger.Infof("%x propose conf %s ignored since pending unapplied configuration [index %d, applied %d]",
r.id, e, r.pendingConfIndex, r.raftLog.applied)
var cc pb.ConfChangeI
if e.Type == pb.EntryConfChange {
var ccc pb.ConfChange
if err := ccc.Unmarshal(e.Data); err != nil {
panic(err)
}
cc = ccc
} else if e.Type == pb.EntryConfChangeV2 {
var ccc pb.ConfChangeV2
if err := ccc.Unmarshal(e.Data); err != nil {
panic(err)
}
cc = ccc
}
if cc != nil {
alreadyPending := r.pendingConfIndex > r.raftLog.applied
alreadyJoint := len(r.prs.Config.Voters[1]) > 0
wantsLeaveJoint := len(cc.AsV2().Changes) == 0

var refused string
if alreadyPending {
refused = fmt.Sprintf("possible unapplied conf change at index %d (applied to %d)", r.pendingConfIndex, r.raftLog.applied)
} else if alreadyJoint && !wantsLeaveJoint {
refused = "must transition out of joint config first"
} else if !alreadyJoint && wantsLeaveJoint {
refused = "not in joint state; refusing empty conf change"
}

if refused != "" {
r.logger.Infof("%x ignoring conf change %v at config %s: %s", r.id, cc, r.prs.Config, refused)
m.Entries[i] = pb.Entry{Type: pb.EntryNormal}
} else {
r.pendingConfIndex = r.raftLog.lastIndex() + uint64(i) + 1
Expand Down
16 changes: 7 additions & 9 deletions raft/rafttest/interaction_env_handler_process_ready.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ import (

"github.com/cockroachdb/datadriven"
"go.etcd.io/etcd/raft"
"go.etcd.io/etcd/raft/quorum"
"go.etcd.io/etcd/raft/raftpb"
)

Expand Down Expand Up @@ -50,20 +49,21 @@ func (env *InteractionEnv) ProcessReady(idx int) error {
}
for _, ent := range rd.CommittedEntries {
var update []byte
var cs *raftpb.ConfState
switch ent.Type {
case raftpb.EntryConfChange:
var cc raftpb.ConfChange
if err := cc.Unmarshal(ent.Data); err != nil {
return err
}
update = cc.Context
rn.ApplyConfChange(cc)
cs = rn.ApplyConfChange(cc)
case raftpb.EntryConfChangeV2:
var cc raftpb.ConfChangeV2
if err := cc.Unmarshal(ent.Data); err != nil {
return err
}
rn.ApplyConfChange(cc)
cs = rn.ApplyConfChange(cc)
update = cc.Context
default:
update = ent.Data
Expand All @@ -78,13 +78,11 @@ func (env *InteractionEnv) ProcessReady(idx int) error {
snap.Data = append(snap.Data, update...)
snap.Metadata.Index = ent.Index
snap.Metadata.Term = ent.Term
cfg := rn.Status().Config
snap.Metadata.ConfState = raftpb.ConfState{
Voters: cfg.Voters[0].Slice(),
VotersOutgoing: cfg.Voters[1].Slice(),
Learners: quorum.MajorityConfig(cfg.Learners).Slice(),
LearnersNext: quorum.MajorityConfig(cfg.LearnersNext).Slice(),
if cs == nil {
sl := env.Nodes[idx].History
cs = &sl[len(sl)-1].Metadata.ConfState
}
snap.Metadata.ConfState = *cs
env.Nodes[idx].History = append(env.Nodes[idx].History, snap)
}
for _, msg := range rd.Messages {
Expand Down
97 changes: 97 additions & 0 deletions raft/testdata/confchange_v1_add_single.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
# Run a V1 membership change that adds a single voter.

# Bootstrap n1.
add-nodes 1 voters=(1) index=2
----
INFO 1 switched to configuration voters=(1)
INFO 1 became follower at term 0
INFO newRaft 1 [peers: [1], term: 0, commit: 2, applied: 2, lastindex: 2, lastterm: 1]

campaign 1
----
INFO 1 is starting a new election at term 0
INFO 1 became candidate at term 1
INFO 1 received MsgVoteResp from 1 at term 1
INFO 1 became leader at term 1

# Add v2 (with an auto transition).
propose-conf-change 1 v1=true
v2
----
ok

# Pull n2 out of thin air.
add-nodes 1
----
INFO 2 switched to configuration voters=()
INFO 2 became follower at term 0
INFO newRaft 2 [peers: [], term: 0, commit: 0, applied: 0, lastindex: 0, lastterm: 0]

# n1 commits the conf change using itself as commit quorum, immediately transitions into
# the final config, and catches up n2. Note that it's using an EntryConfChange, not an
# EntryConfChangeV2, so this is compatible with nodes that don't know about V2 conf changes.
stabilize
----
> 1 handling Ready
INFO 1 switched to configuration voters=(1 2)
Ready MustSync=true:
Lead:1 State:StateLeader
HardState Term:1 Vote:1 Commit:4
Entries:
1/3 EntryNormal ""
1/4 EntryConfChange v2
CommittedEntries:
1/3 EntryNormal ""
1/4 EntryConfChange v2
> 1 handling Ready
Ready MustSync=false:
Messages:
1->2 MsgApp Term:1 Log:1/3 Commit:4 Entries:[1/4 EntryConfChange v2]
> delivering messages
1->2 MsgApp Term:1 Log:1/3 Commit:4 Entries:[1/4 EntryConfChange v2]
INFO 2 [term: 0] received a MsgApp message with higher term from 1 [term: 1]
INFO 2 became follower at term 1
DEBUG 2 [logterm: 0, index: 3] rejected MsgApp [logterm: 1, index: 3] from 1
> 2 handling Ready
Ready MustSync=true:
Lead:1 State:StateFollower
HardState Term:1 Commit:0
Messages:
2->1 MsgAppResp Term:1 Log:0/3 Rejected (Hint: 0)
> delivering messages
2->1 MsgAppResp Term:1 Log:0/3 Rejected (Hint: 0)
DEBUG 1 received MsgAppResp(MsgApp was rejected, lastindex: 0) from 2 for index 3
DEBUG 1 decreased progress of 2 to [StateProbe match=0 next=1]
DEBUG 1 [firstindex: 3, commit: 4] sent snapshot[index: 4, term: 1] to 2 [StateProbe match=0 next=1]
DEBUG 1 paused sending replication messages to 2 [StateSnapshot match=0 next=1 paused pendingSnap=4]
> 1 handling Ready
Ready MustSync=false:
Messages:
1->2 MsgSnap Term:1 Log:0/0 Snapshot: Index:4 Term:1 ConfState:Voters:[1 2] VotersOutgoing:[] Learners:[] LearnersNext:[] AutoLeave:false
> delivering messages
1->2 MsgSnap Term:1 Log:0/0 Snapshot: Index:4 Term:1 ConfState:Voters:[1 2] VotersOutgoing:[] Learners:[] LearnersNext:[] AutoLeave:false
INFO log [committed=0, applied=0, unstable.offset=1, len(unstable.Entries)=0] starts to restore snapshot [index: 4, term: 1]
INFO 2 switched to configuration voters=(1 2)
INFO 2 [commit: 4, lastindex: 4, lastterm: 1] restored snapshot [index: 4, term: 1]
INFO 2 [commit: 4] restored snapshot [index: 4, term: 1]
> 2 handling Ready
Ready MustSync=false:
HardState Term:1 Commit:4
Snapshot Index:4 Term:1 ConfState:Voters:[1 2] VotersOutgoing:[] Learners:[] LearnersNext:[] AutoLeave:false
Messages:
2->1 MsgAppResp Term:1 Log:0/4
> delivering messages
2->1 MsgAppResp Term:1 Log:0/4
DEBUG 1 recovered from needing snapshot, resumed sending replication messages to 2 [StateSnapshot match=4 next=5 paused pendingSnap=4]
> 1 handling Ready
Ready MustSync=false:
Messages:
1->2 MsgApp Term:1 Log:1/4 Commit:4
> delivering messages
1->2 MsgApp Term:1 Log:1/4 Commit:4
> 2 handling Ready
Ready MustSync=false:
Messages:
2->1 MsgAppResp Term:1 Log:0/4
> delivering messages
2->1 MsgAppResp Term:1 Log:0/4
103 changes: 103 additions & 0 deletions raft/testdata/confchange_v1_remove_leader.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
# We'll turn this back on after the boilerplate.
log-level none
----
ok

# Run a V1 membership change that removes the leader.
# Bootstrap n1, n2, n3.
add-nodes 3 voters=(1,2,3) index=2
----
ok

campaign 1
----
ok

stabilize
----
ok (quiet)

log-level debug
----
ok

# Remove n1.
propose-conf-change 1 v1=true
r1
----
ok

stabilize
----
> 1 handling Ready
Ready MustSync=true:
Entries:
1/4 EntryConfChange r1
Messages:
1->2 MsgApp Term:1 Log:1/3 Commit:3 Entries:[1/4 EntryConfChange r1]
1->3 MsgApp Term:1 Log:1/3 Commit:3 Entries:[1/4 EntryConfChange r1]
> delivering messages
1->2 MsgApp Term:1 Log:1/3 Commit:3 Entries:[1/4 EntryConfChange r1]
> delivering messages
1->3 MsgApp Term:1 Log:1/3 Commit:3 Entries:[1/4 EntryConfChange r1]
> 2 handling Ready
Ready MustSync=true:
Entries:
1/4 EntryConfChange r1
Messages:
2->1 MsgAppResp Term:1 Log:0/4
> 3 handling Ready
Ready MustSync=true:
Entries:
1/4 EntryConfChange r1
Messages:
3->1 MsgAppResp Term:1 Log:0/4
> delivering messages
2->1 MsgAppResp Term:1 Log:0/4
3->1 MsgAppResp Term:1 Log:0/4
> 1 handling Ready
INFO 1 switched to configuration voters=(2 3)
Ready MustSync=false:
HardState Term:1 Vote:1 Commit:4
CommittedEntries:
1/4 EntryConfChange r1
Messages:
1->2 MsgApp Term:1 Log:1/4 Commit:4
1->3 MsgApp Term:1 Log:1/4 Commit:4
> delivering messages
1->2 MsgApp Term:1 Log:1/4 Commit:4
> delivering messages
1->3 MsgApp Term:1 Log:1/4 Commit:4
> 2 handling Ready
INFO 2 switched to configuration voters=(2 3)
Ready MustSync=false:
HardState Term:1 Vote:1 Commit:4
CommittedEntries:
1/4 EntryConfChange r1
Messages:
2->1 MsgAppResp Term:1 Log:0/4
> 3 handling Ready
INFO 3 switched to configuration voters=(2 3)
Ready MustSync=false:
HardState Term:1 Vote:1 Commit:4
CommittedEntries:
1/4 EntryConfChange r1
Messages:
3->1 MsgAppResp Term:1 Log:0/4
> delivering messages
2->1 MsgAppResp Term:1 Log:0/4
3->1 MsgAppResp Term:1 Log:0/4

status 1
----
2: StateReplicate match=4 next=5
3: StateReplicate match=4 next=5

# TODO(tbg): the leader now drops any proposals, but if it has any other
# uncommitted proposals in its log already, it will likely try to distribute
# them which could be buggy. Test that.
propose-conf-change 1 v1=true
v1
----
raft proposal dropped

Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ INFO 1 became candidate at term 1
INFO 1 received MsgVoteResp from 1 at term 1
INFO 1 became leader at term 1

propose-conf-change 1
propose-conf-change 1 transition=auto
v2 v3
----
ok
Expand All @@ -32,7 +32,9 @@ INFO newRaft 3 [peers: [], term: 0, commit: 0, applied: 0, lastindex: 0, lastter

# n1 immediately gets to commit & apply the conf change using only itself. We see that
# it starts transitioning out of that joint configuration (though we will only see that
# proposal in the next ready handling loop, when it is emitted).
# proposal in the next ready handling loop, when it is emitted). We also see that this
# is using joint consensus, which it has to since we're carrying out two additions at
# once.
process-ready 1
----
INFO 1 switched to configuration voters=(1 2 3)&&(1) autoleave
Expand Down Expand Up @@ -85,17 +87,17 @@ stabilize 1 2
> 1 handling Ready
Ready MustSync=false:
Messages:
1->2 MsgSnap Term:1 Log:0/0 Snapshot: Index:4 Term:1 ConfState:Voters:[1 2 3] VotersOutgoing:[1] Learners:[] LearnersNext:[]
1->2 MsgSnap Term:1 Log:0/0 Snapshot: Index:4 Term:1 ConfState:Voters:[1 2 3] VotersOutgoing:[1] Learners:[] LearnersNext:[] AutoLeave:true
> delivering messages
1->2 MsgSnap Term:1 Log:0/0 Snapshot: Index:4 Term:1 ConfState:Voters:[1 2 3] VotersOutgoing:[1] Learners:[] LearnersNext:[]
1->2 MsgSnap Term:1 Log:0/0 Snapshot: Index:4 Term:1 ConfState:Voters:[1 2 3] VotersOutgoing:[1] Learners:[] LearnersNext:[] AutoLeave:true
INFO log [committed=0, applied=0, unstable.offset=1, len(unstable.Entries)=0] starts to restore snapshot [index: 4, term: 1]
INFO 2 switched to configuration voters=(1 2 3)&&(1)
INFO 2 switched to configuration voters=(1 2 3)&&(1) autoleave
INFO 2 [commit: 4, lastindex: 4, lastterm: 1] restored snapshot [index: 4, term: 1]
INFO 2 [commit: 4] restored snapshot [index: 4, term: 1]
> 2 handling Ready
Ready MustSync=false:
HardState Term:1 Commit:4
Snapshot Index:4 Term:1 ConfState:Voters:[1 2 3] VotersOutgoing:[1] Learners:[] LearnersNext:[]
Snapshot Index:4 Term:1 ConfState:Voters:[1 2 3] VotersOutgoing:[1] Learners:[] LearnersNext:[] AutoLeave:true
Messages:
2->1 MsgAppResp Term:1 Log:0/4
> delivering messages
Expand Down Expand Up @@ -159,17 +161,17 @@ stabilize 1 3
> 1 handling Ready
Ready MustSync=false:
Messages:
1->3 MsgSnap Term:1 Log:0/0 Snapshot: Index:5 Term:1 ConfState:Voters:[1 2 3] VotersOutgoing:[] Learners:[] LearnersNext:[]
1->3 MsgSnap Term:1 Log:0/0 Snapshot: Index:5 Term:1 ConfState:Voters:[1 2 3] VotersOutgoing:[] Learners:[] LearnersNext:[] AutoLeave:false
> delivering messages
1->3 MsgSnap Term:1 Log:0/0 Snapshot: Index:5 Term:1 ConfState:Voters:[1 2 3] VotersOutgoing:[] Learners:[] LearnersNext:[]
1->3 MsgSnap Term:1 Log:0/0 Snapshot: Index:5 Term:1 ConfState:Voters:[1 2 3] VotersOutgoing:[] Learners:[] LearnersNext:[] AutoLeave:false
INFO log [committed=0, applied=0, unstable.offset=1, len(unstable.Entries)=0] starts to restore snapshot [index: 5, term: 1]
INFO 3 switched to configuration voters=(1 2 3)
INFO 3 [commit: 5, lastindex: 5, lastterm: 1] restored snapshot [index: 5, term: 1]
INFO 3 [commit: 5] restored snapshot [index: 5, term: 1]
> 3 handling Ready
Ready MustSync=false:
HardState Term:1 Commit:5
Snapshot Index:5 Term:1 ConfState:Voters:[1 2 3] VotersOutgoing:[] Learners:[] LearnersNext:[]
Snapshot Index:5 Term:1 ConfState:Voters:[1 2 3] VotersOutgoing:[] Learners:[] LearnersNext:[] AutoLeave:false
Messages:
3->1 MsgAppResp Term:1 Log:0/5
> delivering messages
Expand Down
Loading

0 comments on commit 06f17c1

Please sign in to comment.