From a5fdfc618e8b3aab514f3c48577650560e5a021e Mon Sep 17 00:00:00 2001 From: Erik Grinaker Date: Sun, 9 Jun 2024 00:02:22 +0200 Subject: [PATCH] raft: use current term for `Log.append()` and as `splice()` bound --- src/raft/log.rs | 41 +++++++++++++++------------------ src/raft/node.rs | 2 +- src/raft/testscripts/log/append | 38 ++++++++++++++---------------- src/raft/testscripts/log/commit | 13 +++++++---- src/raft/testscripts/log/get | 8 ++++--- src/raft/testscripts/log/has | 8 ++++--- src/raft/testscripts/log/init | 11 +++++---- src/raft/testscripts/log/scan | 8 ++++--- src/raft/testscripts/log/splice | 33 +++++++++++++++++++++----- src/raft/testscripts/log/status | 7 +++--- 10 files changed, 97 insertions(+), 72 deletions(-) diff --git a/src/raft/log.rs b/src/raft/log.rs index e81531c43..a97fb6135 100644 --- a/src/raft/log.rs +++ b/src/raft/log.rs @@ -83,7 +83,9 @@ impl encoding::Key<'_> for KeyPrefix {} /// /// * Entry indexes are contiguous starting at 1 (no index gaps). /// * Entry terms never decrease from the previous entry. +/// * Entry terms are at or below the current term. /// * Appended entries are durable (flushed to disk). +/// * Appended entries use the current term. /// * Committed entries are never changed or removed (no log truncation). /// * Committed entries will eventually be replicated to all nodes. /// * Entries with the same index/term contain the same command. @@ -153,8 +155,9 @@ impl Log { (self.term, self.vote) } - /// Stores the most recent term and cast vote (if any). Enforces that the - /// term does not regress, and that we only vote for one node in a term. + /// Stores the current term and cast vote (if any). Enforces that the term + /// does not regress, and that we only vote for one node in a term. append() + /// will use this term, and splice() can't write entries beyond it. pub fn set_term(&mut self, term: Term, vote: Option) -> Result<()> { assert!(term > 0, "can't set term 0"); assert!(term >= self.term, "term regression {} → {}", self.term, term); @@ -169,20 +172,14 @@ impl Log { Ok(()) } - /// Appends a command to the log and flushes it to disk, returning its - /// index. None implies a noop command, typically after Raft leader changes. - /// The term must be equal to or greater than the previous entry. - /// TODO: use self.term for the term. - pub fn append(&mut self, term: Term, command: Option>) -> Result { - match self.get(self.last_index)? { - Some(e) if term < e.term => panic!("term regression {} → {term}", e.term), - None if self.last_index > 0 => panic!("log gap at {}", self.last_index), - None if term == 0 => panic!("can't append entry with term 0"), - Some(_) | None => {} - } + /// Appends a command to the log at the current term, and flushes it to + /// disk, returning its index. None implies a noop command, typically after + /// Raft leader changes. + pub fn append(&mut self, command: Option>) -> Result { + assert!(self.term > 0, "can't append entry in term 0"); // We could omit the index in the encoded value, since it's also stored // in the key, but we keep it simple. - let entry = Entry { index: self.last_index + 1, term, command }; + let entry = Entry { index: self.last_index + 1, term: self.term, command }; self.engine.set(&Key::Entry(entry.index).encode()?, entry.encode()?)?; self.engine.flush()?; self.last_index = entry.index; @@ -249,11 +246,11 @@ impl Log { /// Splices a set of entries into the log and flushes it to disk. The /// entries must have contiguous indexes and equal/increasing terms, and the /// first entry must be in the range [1,last_index+1] with a term at or - /// equal to the previous (base) entry's term. New indexes will be appended. - /// Overlapping indexes with the same term must be equal and will be - /// ignored. Overlapping indexes with different terms will truncate the - /// existing log at the first conflict and then splice the new entries. - /// TODO: check against self.term. + /// above the previous (base) entry's term and at or below the current term. + /// New indexes will be appended. Overlapping indexes with the same term + /// must be equal and will be ignored. Overlapping indexes with different + /// terms will truncate the existing log at the first conflict and then + /// splice the new entries. pub fn splice(&mut self, entries: Vec) -> Result { let (Some(first), Some(last)) = (entries.first(), entries.last()) else { return Ok(self.last_index); // empty input is noop @@ -272,6 +269,7 @@ impl Log { // Check that the entries connect to the existing log (if any), and that the // term doesn't regress. + assert!(last.term <= self.term, "splice term {} beyond current {}", last.term, self.term); match self.get(first.index - 1)? { Some(base) if first.term < base.term => { panic!("splice term regression {} → {}", base.term, first.term) @@ -349,14 +347,13 @@ mod tests { fn run(&mut self, command: &goldenscript::Command) -> Result> { let mut output = String::new(); match command.name.as_str() { - // append TERM [COMMAND] [oplog=BOOL] + // append [COMMAND] [oplog=BOOL] "append" => { let mut args = command.consume_args(); - let term = args.next_pos().ok_or("term not given")?.parse()?; let command = args.next_pos().map(|a| a.value.as_bytes().to_vec()); let oplog = args.lookup_parse("oplog")?.unwrap_or(false); args.reject_rest()?; - let index = self.log.append(term, command)?; + let index = self.log.append(command)?; let entry = self.log.get(index)?.expect("entry not found"); self.maybe_oplog(oplog, &mut output); output.push_str(&format!("append → {}\n", Self::format_entry(&entry))); diff --git a/src/raft/node.rs b/src/raft/node.rs index 1e731fcbe..73d68b55c 100644 --- a/src/raft/node.rs +++ b/src/raft/node.rs @@ -1028,7 +1028,7 @@ impl RawNode { /// replicating it to peers. If successful, it will eventually be committed /// and applied to the state machine. fn propose(&mut self, command: Option>) -> Result { - let index = self.log.append(self.term, command)?; + let index = self.log.append(command)?; for peer in self.peers.iter().copied().sorted() { // Eagerly send the entry to the peer, but only if it is in steady // state where we've already sent the previous entries. Otherwise, diff --git a/src/raft/testscripts/log/append b/src/raft/testscripts/log/append index b25ed633a..d0ab4dd23 100644 --- a/src/raft/testscripts/log/append +++ b/src/raft/testscripts/log/append @@ -1,19 +1,19 @@ -# Appending an entry with term 0 fails, even on an empty log. -!append 0 foo +# Appending an entry with term 0 fails. +!append foo --- -Panic: can't append entry with term 0 +Panic: can't append entry in term 0 -# Appending to an empty log works. The term doesn't have to be 1, and doesn't -# have to match get_term(). The entry is written to the engine and flushed -# to durable storage. -append 2 foo oplog=true +# Appending to an empty log works. The term doesn't have to be 1. The entry is +# written to the engine and flushed to durable storage. +set_term 2 +append foo oplog=true --- engine: set Entry(1) 0x000000000000000001 = 0x01020103666f6f engine: flush append → 1@2 foo # Appending a noop entry (no command) also works. -append 2 oplog=true +append oplog=true --- engine: set Entry(2) 0x000000000000000002 = 0x020200 engine: flush @@ -25,33 +25,28 @@ status scan dump --- -term=0 last=2@2 commit=0@0 vote=None +term=2 last=2@2 commit=0@0 vote=None 1@2 foo 2@2 None Entry(1) 0x000000000000000001 = 0x01020103666f6f Entry(2) 0x000000000000000002 = 0x020200 +TermVote 0x01 = 0x0200 -# Bumping the term with a command is allowed. Skipping a term and omitting the -# command is also allowed. -append 3 command -append 5 +# Skipping a term then appending is allowed. +set_term 3 +append command +set_term 5 +append --- append → 3@3 command append → 4@5 None -# A term regression fails, as does a 0 term. -!append 4 old -!append 0 ---- -Panic: term regression 5 → 4 -Panic: term regression 5 → 0 - # Dump the final status and data. status scan dump --- -term=0 last=4@5 commit=0@0 vote=None +term=5 last=4@5 commit=0@0 vote=None 1@2 foo 2@2 None 3@3 command @@ -60,3 +55,4 @@ Entry(1) 0x000000000000000001 = 0x01020103666f6f Entry(2) 0x000000000000000002 = 0x020200 Entry(3) 0x000000000000000003 = 0x03030107636f6d6d616e64 Entry(4) 0x000000000000000004 = 0x040500 +TermVote 0x01 = 0x0500 diff --git a/src/raft/testscripts/log/commit b/src/raft/testscripts/log/commit index e00a79e1b..c30613030 100644 --- a/src/raft/testscripts/log/commit +++ b/src/raft/testscripts/log/commit @@ -4,6 +4,7 @@ Panic: commit index 1 does not exist # Add some entries. +set_term 2 splice 1@1= 2@1=foo 3@2=bar --- splice → 3@2 bar @@ -22,7 +23,7 @@ status --- engine: set CommitIndex 0x02 = 0x0101 commit → 1@1 None -term=0 last=3@2 commit=1@1 vote=None +term=2 last=3@2 commit=1@1 vote=None # Dump the raw engine contents tdump dump @@ -30,6 +31,7 @@ dump Entry(1) 0x000000000000000001 = 0x010100 Entry(2) 0x000000000000000002 = 0x02010103666f6f Entry(3) 0x000000000000000003 = 0x03020103626172 +TermVote 0x01 = 0x0200 CommitIndex 0x02 = 0x0101 # Commits are idempotent, which doesn't incur an engine set. @@ -37,28 +39,28 @@ commit 1 oplog=true status --- commit → 1@1 None -term=0 last=3@2 commit=1@1 vote=None +term=2 last=3@2 commit=1@1 vote=None # Commits can skip an entry. commit 3 status --- commit → 3@2 bar -term=0 last=3@2 commit=3@2 vote=None +term=2 last=3@2 commit=3@2 vote=None # Commit regressions error. !commit 2 status --- Panic: commit index regression 3 → 2 -term=0 last=3@2 commit=3@2 vote=None +term=2 last=3@2 commit=3@2 vote=None # Committing non-existant indexes error. !commit 4 status --- Panic: commit index 4 does not exist -term=0 last=3@2 commit=3@2 vote=None +term=2 last=3@2 commit=3@2 vote=None # Dump the raw values. dump @@ -66,4 +68,5 @@ dump Entry(1) 0x000000000000000001 = 0x010100 Entry(2) 0x000000000000000002 = 0x02010103666f6f Entry(3) 0x000000000000000003 = 0x03020103626172 +TermVote 0x01 = 0x0200 CommitIndex 0x02 = 0x0302 diff --git a/src/raft/testscripts/log/get b/src/raft/testscripts/log/get index 45ac7031a..7d821397c 100644 --- a/src/raft/testscripts/log/get +++ b/src/raft/testscripts/log/get @@ -4,9 +4,11 @@ get 1 None # Append a few entries. -append 1 -append 1 foo -append 2 bar +set_term 1 +append +append foo +set_term 2 +append bar --- append → 1@1 None append → 2@1 foo diff --git a/src/raft/testscripts/log/has b/src/raft/testscripts/log/has index b317a217d..4c8841497 100644 --- a/src/raft/testscripts/log/has +++ b/src/raft/testscripts/log/has @@ -4,9 +4,11 @@ has 1@1 false # Append a few entries. -append 1 -append 1 foo -append 2 bar +set_term 1 +append +append foo +set_term 2 +append bar --- append → 1@1 None append → 2@1 foo diff --git a/src/raft/testscripts/log/init b/src/raft/testscripts/log/init index e5ed9758c..4d4be6caa 100644 --- a/src/raft/testscripts/log/init +++ b/src/raft/testscripts/log/init @@ -1,11 +1,12 @@ # Tests that the log correctly initializes cached state when opened. -set_term 1 7 +set_term 1 --- ok -append 1 foo -append 2 bar +append foo +set_term 2 7 +append bar commit 1 --- append → 1@1 foo @@ -14,7 +15,7 @@ commit → 1@1 foo status --- -term=1 last=2@2 commit=1@1 vote=7 +term=2 last=2@2 commit=1@1 vote=7 reload --- @@ -22,7 +23,7 @@ ok status --- -term=1 last=2@2 commit=1@1 vote=7 +term=2 last=2@2 commit=1@1 vote=7 scan --- diff --git a/src/raft/testscripts/log/scan b/src/raft/testscripts/log/scan index 078feadc6..b4f793427 100644 --- a/src/raft/testscripts/log/scan +++ b/src/raft/testscripts/log/scan @@ -6,9 +6,11 @@ scan 3..7 # Append a few entries. -append 1 -append 1 foo -append 2 bar +set_term 1 +append +append foo +set_term 2 +append bar --- append → 1@1 None append → 2@1 foo diff --git a/src/raft/testscripts/log/splice b/src/raft/testscripts/log/splice index 38aacff02..2bc0cf144 100644 --- a/src/raft/testscripts/log/splice +++ b/src/raft/testscripts/log/splice @@ -3,7 +3,15 @@ --- Panic: spliced entry has index or term 0 +# Splicing without a term should fail. +!splice 1@1=foo +--- +Panic: splice term 1 beyond current 0 + + + # Splicing at index 2 should fail (creates gap). +set_term 1 !splice 2@1=foo --- Panic: first index 2 must touch existing log @@ -11,6 +19,7 @@ Panic: first index 2 must touch existing log # Splicing entries at start should work, both with and without commands, and # starting at a term after 1. They should be written to the engine and flushed # to durable storage. It should also update the state. +set_term 2 splice 1@2= 2@2=command oplog=true status scan @@ -19,7 +28,7 @@ engine: set Entry(1) 0x000000000000000001 = 0x010200 engine: set Entry(2) 0x000000000000000002 = 0x02020107636f6d6d616e64 engine: flush splice → 2@2 command -term=0 last=2@2 commit=0@0 vote=None +term=2 last=2@2 commit=0@0 vote=None 1@2 None 2@2 command @@ -29,7 +38,7 @@ status scan --- splice → 2@2 command -term=0 last=2@2 commit=0@0 vote=None +term=2 last=2@2 commit=0@0 vote=None 1@2 None 2@2 command @@ -58,6 +67,13 @@ Panic: first index 4 must touch existing log --- Panic: splice term regression 2 → 1 +# Splicing with a term beyond the current term should fail. +!splice 3@3= +!splice 3@4= +--- +Panic: splice term 3 beyond current 2 +Panic: splice term 4 beyond current 2 + # Fully overlapping entries is a noop. splice 1@2= 2@2=command oplog=true scan @@ -93,6 +109,7 @@ Panic: command mismatch at Entry { index: 2, term: 2, command: Some([99, 111, 10 # Appending a new entry in the same term should work, as should # appending one in a new term. splice 3@2=bar +set_term 3 splice 4@3= scan --- @@ -119,6 +136,7 @@ splice → 6@3 bar 6@3 bar # Splicing at an existing index with a new term should replace the tail. +set_term 4 splice 4@4= oplog=true status scan @@ -128,13 +146,14 @@ engine: delete Entry(5) 0x000000000000000005 engine: delete Entry(6) 0x000000000000000006 engine: flush splice → 4@4 None -term=0 last=4@4 commit=0@0 vote=None +term=4 last=4@4 commit=0@0 vote=None 1@2 None 2@2 command 3@2 bar 4@4 None # This also holds at the start of the log. +set_term 5 splice 1@5= 2@5=foo 3@5=bar oplog=true status scan @@ -145,7 +164,7 @@ engine: set Entry(3) 0x000000000000000003 = 0x03050103626172 engine: delete Entry(4) 0x000000000000000004 engine: flush splice → 3@5 bar -term=0 last=3@5 commit=0@0 vote=None +term=5 last=3@5 commit=0@0 vote=None 1@5 None 2@5 foo 3@5 bar @@ -158,19 +177,20 @@ scan --- commit → 2@5 foo splice → 4@5 None -term=0 last=4@5 commit=2@5 vote=None +term=5 last=4@5 commit=2@5 vote=None 1@5 None 2@5 foo 3@5 bar 4@5 None # Splicing across the commit index can replace a tail after the commit index. +set_term 9 splice 3@6= 4@6=bar status scan --- splice → 4@6 bar -term=0 last=4@6 commit=2@5 vote=None +term=9 last=4@6 commit=2@5 vote=None 1@5 None 2@5 foo 3@6 None @@ -190,4 +210,5 @@ Entry(1) 0x000000000000000001 = 0x010500 Entry(2) 0x000000000000000002 = 0x02050103666f6f Entry(3) 0x000000000000000003 = 0x030600 Entry(4) 0x000000000000000004 = 0x04060103626172 +TermVote 0x01 = 0x0900 CommitIndex 0x02 = 0x0205 diff --git a/src/raft/testscripts/log/status b/src/raft/testscripts/log/status index e0952bff0..6d27dbacb 100644 --- a/src/raft/testscripts/log/status +++ b/src/raft/testscripts/log/status @@ -11,10 +11,11 @@ term=0 last=0@0 commit=0@0 vote=None engine=Status { } # Write some data. +set_term 1 +append +append foo set_term 2 1 -append 1 -append 1 foo -append 2 bar +append bar commit 2 --- append → 1@1 None