Skip to content

Commit

Permalink
raft: only read after committing an entry from the current term
Browse files Browse the repository at this point in the history
Otherwise, a newly elected leader that's behind on commit/apply may serve stale
reads.
  • Loading branch information
erikgrinaker committed Jun 7, 2024
1 parent 9a9e022 commit 42230e5
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 15 deletions.
22 changes: 17 additions & 5 deletions src/raft/node.rs
Original file line number Diff line number Diff line change
Expand Up @@ -865,10 +865,6 @@ impl RawNode<Leader> {
}

// A follower received our heartbeat and confirms our leadership.
//
// TODO: this needs to commit and apply entries following a leader
// change too, otherwise we can serve stale reads if the new leader
// hadn't fully committed and applied entries yet.
Message::HeartbeatResponse { match_index, read_seq } => {
let (last_index, _) = self.log.get_last_index();
assert!(match_index <= last_index, "future match index");
Expand Down Expand Up @@ -1072,7 +1068,7 @@ impl RawNode<Leader> {
// If the commit index doesn't advance, do nothing. We don't assert on
// this, since the quorum value may regress e.g. following a restart or
// leader change where followers are initialized with log index 0.
let mut commit_index = self.log.get_commit_index().0;
let (mut commit_index, old_commit_term) = self.log.get_commit_index();
if quorum_index <= commit_index {
return Ok(commit_index);
}
Expand Down Expand Up @@ -1105,6 +1101,12 @@ impl RawNode<Leader> {
Ok(())
})?;

// If the commit term changed, there may be pending reads waiting for us
// to commit an entry from our own term. Execute them.
if old_commit_term != self.term {
self.maybe_read()?;
}

Ok(commit_index)
}

Expand All @@ -1115,6 +1117,16 @@ impl RawNode<Leader> {
return Ok(());
}

// It's only safe to read if we've committed and applied an entry from
// our own term (the leader appends an entry when elected). Otherwise we
// may be behind on application and serve stale reads, violating
// linearizability.
let (commit_index, commit_term) = self.log.get_commit_index();
let applied_index = self.state.get_applied_index();
if commit_term < self.term || applied_index < commit_index {
return Ok(());
}

// Determine the maximum read sequence confirmed by quorum.
let read_seq = self.quorum_value(
self.role
Expand Down
30 changes: 20 additions & 10 deletions src/raft/testscripts/node/request_leader_change_linearizability
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ n1@1 leader last=3@1 commit=3@1 apply=3 progress={2:3→4 3:3→4}
n2@1 follower(n1) last=3@1 commit=2@1 apply=2
n3@1 follower(n1) last=3@1 commit=2@1 apply=2

# n2 campaigns and wins.
# n2 now campaigns and wins, while being behind on commit/apply.
campaign 2
deliver
---
Expand All @@ -36,8 +36,8 @@ n1@2 → n2 CampaignResponse vote=true
n3@1 follower(n1) ⇨ n3@2 follower()
n3@2 → n2 CampaignResponse vote=true

# However, the initial append/heartbeat messages don't make it to the followers,
# so its commit index trails the previous leader.
# The initial append doesn't make it to the followers, so its commit index
# trails the previous leader.
partition 2
deliver 2
---
Expand All @@ -57,12 +57,11 @@ n1@2 follower() last=3@1 commit=3@1 apply=3
n2@2 leader last=4@2 commit=2@1 apply=2 progress={1:0→5 3:0→5}
n3@2 follower() last=3@1 commit=2@1 apply=2

# Reading from n2 should not result in a stale read.
#
# TODO: this currently results in a stale read, since n2 does not wait to commit
# and apply an entry from its own term before serving reads.
# Reading from n2 should not result in a stale read even if followers
# confirm the read sequence.
get 2 b
stabilize
deliver
deliver
---
c2@2 → n2 ClientRequest id=0x03 read 0x000162
n2@2 → n1 Heartbeat last_index=4 commit_index=2 read_seq=1
Expand All @@ -71,18 +70,29 @@ n1@2 follower() ⇨ n1@2 follower(n2)
n1@2 → n2 HeartbeatResponse match_index=0 read_seq=1
n3@2 follower() ⇨ n3@2 follower(n2)
n3@2 → n2 HeartbeatResponse match_index=0 read_seq=1
n2@2 → c2 ClientResponse id=0x03 read 0x0000
c2@2 get b ⇒ None
n2@2 → n1 Append base=3@1 []
n2@2 → n3 Append base=3@1 []

# However, the heartbeat resulted in probes for the dropped appends.
# The leader resends them.
deliver
deliver
---
n1@2 → n2 AppendResponse match_index=3
n3@2 → n2 AppendResponse match_index=3
n2@2 → n1 Append base=3@1 [4@2]
n2@2 → n3 Append base=3@1 [4@2]

# Once the followers acknowledge the appends the leader commits them. The read
# can now be served, resulting in an up-to-date b=2.
stabilize
---
n1@2 append 4@2 None
n1@2 → n2 AppendResponse match_index=4
n3@2 append 4@2 None
n3@2 → n2 AppendResponse match_index=4
n2@2 commit 4@2
n2@2 apply 3@1 put b=2
n2@2 apply 4@2 None
n2@2 → c2 ClientResponse id=0x03 read 0x00010132
c2@2 get b ⇒ 2

0 comments on commit 42230e5

Please sign in to comment.