Skip to content

Commit

Permalink
Merge pull request #79 from erikgrinaker/step-down-on-removal
Browse files Browse the repository at this point in the history
  • Loading branch information
tbg authored Jul 17, 2023
2 parents 9903aa6 + ee0fe9d commit 72a6e6c
Show file tree
Hide file tree
Showing 6 changed files with 471 additions and 19 deletions.
15 changes: 13 additions & 2 deletions raft.go
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,13 @@ type Config struct {
// This option may be removed once false positives are no longer possible.
// See: https://github.com/etcd-io/raft/issues/80
DisableConfChangeValidation bool

// StepDownOnRemoval makes the leader step down when it is removed from the
// group or demoted to a learner.
//
// This behavior will become unconditional in the future. See:
// https://github.com/etcd-io/raft/issues/83
StepDownOnRemoval bool
}

func (c *Config) validate() error {
Expand Down Expand Up @@ -408,6 +415,7 @@ type raft struct {
// when raft changes its state to follower or candidate.
randomizedElectionTimeout int
disableProposalForwarding bool
stepDownOnRemoval bool

tick func()
step stepFunc
Expand Down Expand Up @@ -447,6 +455,7 @@ func newRaft(c *Config) *raft {
readOnly: newReadOnly(c.ReadOnlyOption),
disableProposalForwarding: c.DisableProposalForwarding,
disableConfChangeValidation: c.DisableConfChangeValidation,
stepDownOnRemoval: c.StepDownOnRemoval,
}

cfg, prs, err := confchange.Restore(confchange.Changer{
Expand Down Expand Up @@ -1918,15 +1927,17 @@ func (r *raft) switchToConfig(cfg tracker.Config, prs tracker.ProgressMap) pb.Co
r.isLearner = ok && pr.IsLearner

if (!ok || r.isLearner) && r.state == StateLeader {
// This node is leader and was removed or demoted, step down.
// This node is leader and was removed or demoted, step down if requested.
//
// We prevent demotions at the time writing but hypothetically we handle
// them the same way as removing the leader.
//
// TODO(tbg): ask follower with largest Match to TimeoutNow (to avoid
// interruption). This might still drop some proposals but it's better than
// nothing.
r.becomeFollower(r.Term, None)
if r.stepDownOnRemoval {
r.becomeFollower(r.Term, None)
}
return cs
}

Expand Down
2 changes: 2 additions & 0 deletions rafttest/interaction_env_handler_add_nodes.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@ func (env *InteractionEnv) handleAddNodes(t *testing.T, d datadriven.TestData) e
default:
return fmt.Errorf("invalid read-only option %q", arg.Vals[i])
}
case "step-down-on-removal":
arg.Scan(t, i, &cfg.StepDownOnRemoval)
}
}
}
Expand Down
87 changes: 74 additions & 13 deletions testdata/confchange_v1_remove_leader.txt
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,10 @@ propose 1 bar
----
ok

# n1 applies the conf change, removing itself and stepping down. But it still
# has an uncommitted 'bar' entry in the log that it sends out appends for first.
# n1 applies the conf change, so it has now removed itself. But it still has
# an uncommitted entry in the log. If the leader unconditionally counted itself
# as part of the commit quorum, we'd be in trouble. In the block below, we see
# it send out appends to the other nodes for the 'bar' entry.
stabilize 1
----
> 1 handling Ready
Expand All @@ -104,14 +106,10 @@ stabilize 1
1->2 MsgApp Term:1 Log:1/6 Commit:5
1->3 MsgApp Term:1 Log:1/6 Commit:5
INFO 1 switched to configuration voters=(2 3)
INFO 1 became follower at term 1
> 1 handling Ready
Ready MustSync=false:
Lead:0 State:StateFollower

raft-state
----
1: StateFollower (Non-Voter) Term:1 Lead:0
1: StateLeader (Non-Voter) Term:1 Lead:1
2: StateFollower (Voter) Term:1 Lead:1
3: StateFollower (Voter) Term:1 Lead:1

Expand All @@ -136,7 +134,7 @@ stabilize 2
2->1 MsgAppResp Term:1 Log:0/6
INFO 2 switched to configuration voters=(2 3)

# ...because the old leader n1 ignores the append responses.
# ... which thankfully is what we see on the leader.
stabilize 1
----
> 1 receiving messages
Expand Down Expand Up @@ -176,14 +174,77 @@ stabilize
3->1 MsgAppResp Term:1 Log:0/6
3->1 MsgAppResp Term:1 Log:0/6
3->1 MsgAppResp Term:1 Log:0/6
> 1 handling Ready
Ready MustSync=false:
HardState Term:1 Vote:1 Commit:6
CommittedEntries:
1/6 EntryNormal "bar"
Messages:
1->2 MsgApp Term:1 Log:1/6 Commit:6
1->3 MsgApp Term:1 Log:1/6 Commit:6
> 2 receiving messages
1->2 MsgApp Term:1 Log:1/6 Commit:6
> 3 receiving messages
1->3 MsgApp Term:1 Log:1/6 Commit:6
> 2 handling Ready
Ready MustSync=false:
HardState Term:1 Vote:1 Commit:6
CommittedEntries:
1/6 EntryNormal "bar"
Messages:
2->1 MsgAppResp Term:1 Log:0/6
> 3 handling Ready
Ready MustSync=false:
HardState Term:1 Vote:1 Commit:6
CommittedEntries:
1/6 EntryNormal "bar"
Messages:
3->1 MsgAppResp Term:1 Log:0/6
> 1 receiving messages
2->1 MsgAppResp Term:1 Log:0/6
3->1 MsgAppResp Term:1 Log:0/6

# n1 can no longer propose.
# However not all is well. n1 is still leader but unconditionally drops all
# proposals on the floor, so we're effectively stuck if it still heartbeats
# its followers...
propose 1 baz
----
INFO 1 no leader at term 1; dropping proposal
raft proposal dropped

# Nor can it campaign to become leader.
campaign 1
tick-heartbeat 1
----
ok

# ... which, uh oh, it does.
# TODO(tbg): change behavior so that a leader that is removed immediately steps
# down, and initiates an optimistic handover.
stabilize
----
WARN 1 is unpromotable and can not campaign
> 1 handling Ready
Ready MustSync=false:
Messages:
1->2 MsgHeartbeat Term:1 Log:0/0 Commit:6
1->3 MsgHeartbeat Term:1 Log:0/0 Commit:6
> 2 receiving messages
1->2 MsgHeartbeat Term:1 Log:0/0 Commit:6
> 3 receiving messages
1->3 MsgHeartbeat Term:1 Log:0/0 Commit:6
> 2 handling Ready
Ready MustSync=false:
Messages:
2->1 MsgHeartbeatResp Term:1 Log:0/0
> 3 handling Ready
Ready MustSync=false:
Messages:
3->1 MsgHeartbeatResp Term:1 Log:0/0
> 1 receiving messages
2->1 MsgHeartbeatResp Term:1 Log:0/0
3->1 MsgHeartbeatResp Term:1 Log:0/0

# Just confirming the issue above - leader does not automatically step down.
# Expected behavior: a new leader is elected after an election timeout.
raft-state
----
1: StateLeader (Non-Voter) Term:1 Lead:1
2: StateFollower (Voter) Term:1 Lead:1
3: StateFollower (Voter) Term:1 Lead:1
190 changes: 190 additions & 0 deletions testdata/confchange_v1_remove_leader_stepdown.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,190 @@
# We'll turn this back on after the boilerplate.
log-level none
----
ok

# Run a V1 membership change that removes the leader, asking it
# to step down on removal.
# Bootstrap n1, n2, n3.
add-nodes 3 voters=(1,2,3) index=2 step-down-on-removal=true
----
ok

campaign 1
----
ok

stabilize
----
ok

log-level debug
----
ok

raft-state
----
1: StateLeader (Voter) Term:1 Lead:1
2: StateFollower (Voter) Term:1 Lead:1
3: StateFollower (Voter) Term:1 Lead:1

# Start removing n1.
propose-conf-change 1 v1=true
r1
----
ok

raft-state
----
1: StateLeader (Voter) Term:1 Lead:1
2: StateFollower (Voter) Term:1 Lead:1
3: StateFollower (Voter) Term:1 Lead:1

# Propose an extra entry which will be sent out together with the conf change.
propose 1 foo
----
ok

# Send out the corresponding appends.
process-ready 1
----
Ready MustSync=true:
Entries:
1/4 EntryConfChange r1
1/5 EntryNormal "foo"
Messages:
1->2 MsgApp Term:1 Log:1/3 Commit:3 Entries:[1/4 EntryConfChange r1]
1->3 MsgApp Term:1 Log:1/3 Commit:3 Entries:[1/4 EntryConfChange r1]
1->2 MsgApp Term:1 Log:1/4 Commit:3 Entries:[1/5 EntryNormal "foo"]
1->3 MsgApp Term:1 Log:1/4 Commit:3 Entries:[1/5 EntryNormal "foo"]

# Send response from n2 (which is enough to commit the entries so far next time
# n1 runs).
stabilize 2
----
> 2 receiving messages
1->2 MsgApp Term:1 Log:1/3 Commit:3 Entries:[1/4 EntryConfChange r1]
1->2 MsgApp Term:1 Log:1/4 Commit:3 Entries:[1/5 EntryNormal "foo"]
> 2 handling Ready
Ready MustSync=true:
Entries:
1/4 EntryConfChange r1
1/5 EntryNormal "foo"
Messages:
2->1 MsgAppResp Term:1 Log:0/4
2->1 MsgAppResp Term:1 Log:0/5

# Put another entry in n1's log.
propose 1 bar
----
ok

# n1 applies the conf change, removing itself and stepping down. But it still
# has an uncommitted 'bar' entry in the log that it sends out appends for first.
stabilize 1
----
> 1 handling Ready
Ready MustSync=true:
Entries:
1/6 EntryNormal "bar"
Messages:
1->2 MsgApp Term:1 Log:1/5 Commit:3 Entries:[1/6 EntryNormal "bar"]
1->3 MsgApp Term:1 Log:1/5 Commit:3 Entries:[1/6 EntryNormal "bar"]
> 1 receiving messages
2->1 MsgAppResp Term:1 Log:0/4
2->1 MsgAppResp Term:1 Log:0/5
> 1 handling Ready
Ready MustSync=false:
HardState Term:1 Vote:1 Commit:5
CommittedEntries:
1/4 EntryConfChange r1
1/5 EntryNormal "foo"
Messages:
1->2 MsgApp Term:1 Log:1/6 Commit:4
1->3 MsgApp Term:1 Log:1/6 Commit:4
1->2 MsgApp Term:1 Log:1/6 Commit:5
1->3 MsgApp Term:1 Log:1/6 Commit:5
INFO 1 switched to configuration voters=(2 3)
INFO 1 became follower at term 1
> 1 handling Ready
Ready MustSync=false:
Lead:0 State:StateFollower

raft-state
----
1: StateFollower (Non-Voter) Term:1 Lead:0
2: StateFollower (Voter) Term:1 Lead:1
3: StateFollower (Voter) Term:1 Lead:1

# n2 responds, n3 doesn't yet. Quorum for 'bar' should not be reached...
stabilize 2
----
> 2 receiving messages
1->2 MsgApp Term:1 Log:1/5 Commit:3 Entries:[1/6 EntryNormal "bar"]
1->2 MsgApp Term:1 Log:1/6 Commit:4
1->2 MsgApp Term:1 Log:1/6 Commit:5
> 2 handling Ready
Ready MustSync=true:
HardState Term:1 Vote:1 Commit:5
Entries:
1/6 EntryNormal "bar"
CommittedEntries:
1/4 EntryConfChange r1
1/5 EntryNormal "foo"
Messages:
2->1 MsgAppResp Term:1 Log:0/6
2->1 MsgAppResp Term:1 Log:0/6
2->1 MsgAppResp Term:1 Log:0/6
INFO 2 switched to configuration voters=(2 3)

# ...because the old leader n1 ignores the append responses.
stabilize 1
----
> 1 receiving messages
2->1 MsgAppResp Term:1 Log:0/6
2->1 MsgAppResp Term:1 Log:0/6
2->1 MsgAppResp Term:1 Log:0/6

# When n3 responds, quorum is reached and everything falls into place.
stabilize
----
> 3 receiving messages
1->3 MsgApp Term:1 Log:1/3 Commit:3 Entries:[1/4 EntryConfChange r1]
1->3 MsgApp Term:1 Log:1/4 Commit:3 Entries:[1/5 EntryNormal "foo"]
1->3 MsgApp Term:1 Log:1/5 Commit:3 Entries:[1/6 EntryNormal "bar"]
1->3 MsgApp Term:1 Log:1/6 Commit:4
1->3 MsgApp Term:1 Log:1/6 Commit:5
> 3 handling Ready
Ready MustSync=true:
HardState Term:1 Vote:1 Commit:5
Entries:
1/4 EntryConfChange r1
1/5 EntryNormal "foo"
1/6 EntryNormal "bar"
CommittedEntries:
1/4 EntryConfChange r1
1/5 EntryNormal "foo"
Messages:
3->1 MsgAppResp Term:1 Log:0/4
3->1 MsgAppResp Term:1 Log:0/5
3->1 MsgAppResp Term:1 Log:0/6
3->1 MsgAppResp Term:1 Log:0/6
3->1 MsgAppResp Term:1 Log:0/6
INFO 3 switched to configuration voters=(2 3)
> 1 receiving messages
3->1 MsgAppResp Term:1 Log:0/4
3->1 MsgAppResp Term:1 Log:0/5
3->1 MsgAppResp Term:1 Log:0/6
3->1 MsgAppResp Term:1 Log:0/6
3->1 MsgAppResp Term:1 Log:0/6

# n1 can no longer propose.
propose 1 baz
----
INFO 1 no leader at term 1; dropping proposal
raft proposal dropped

# Nor can it campaign to become leader.
campaign 1
----
WARN 1 is unpromotable and can not campaign
4 changes: 0 additions & 4 deletions testdata/confchange_v2_replace_leader.txt
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,6 @@ raft-state
2: StateFollower (Voter) Term:1 Lead:1
3: StateFollower (Voter) Term:1 Lead:1

log-level info
----
ok

# create n4
add-nodes 1
----
Expand Down
Loading

0 comments on commit 72a6e6c

Please sign in to comment.