-
Notifications
You must be signed in to change notification settings - Fork 555
/
message.rs
205 lines (185 loc) · 8.19 KB
/
message.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
use super::{Entry, Index, NodeID, Term};
use crate::encoding;
use crate::error::Result;
use crate::storage;
use serde_derive::{Deserialize, Serialize};
/// A message envelope specifying the sender and receiver.
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
pub struct Envelope {
/// The sender.
pub from: NodeID,
/// The sender's current term.
pub term: Term,
/// The recipient.
pub to: NodeID,
/// The message.
pub message: Message,
}
impl encoding::Value for Envelope {}
/// A message sent between Raft nodes. Messages are sent asynchronously (i.e.
/// they are not request/response) and may be dropped or reordered.
///
/// In practice, they are sent across a TCP connection and crossbeam channels
/// ensuring messages are not dropped or reordered as long as the connection
/// remains intact. A message and its response are sent across separate TCP
/// connections (outbound from their respective senders).
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
pub enum Message {
/// Candidates campaign for leadership by soliciting votes from peers.
/// Votes will only be granted if the candidate's log is at least as
/// up-to-date as the voter.
Campaign {
/// The index of the candidate's last log entry.
last_index: Index,
/// The term of the candidate's last log entry.
last_term: Term,
},
/// Followers may vote for a single candidate per term, but only if the
/// candidate's log is at least as up-to-date as the follower. Candidates
/// implicitly vote for themselves.
CampaignResponse {
/// If true, the follower granted the candidate a vote. A false response
/// isn't necessary, but is emitted for clarity.
vote: bool,
},
/// Leaders send periodic heartbeats. This serves several purposes:
///
/// * Inform nodes about the leader, and prevent elections.
/// * Detect lost appends and reads, as a retry mechanism.
/// * Advance followers' commit indexes, so they can apply entries.
///
/// The Raft paper does not have a distinct heartbeat message, and instead
/// uses an empty AppendEntries RPC, but we choose to add one for better
/// separation of concerns.
Heartbeat {
/// The index of the leader's last log entry. The term is the leader's
/// current term, since it appends a noop entry on election win. The
/// follower compares this to its own log to determine if it's
/// up-to-date.
last_index: Index,
/// The index of the leader's last committed log entry. Followers use
/// this to advance their commit index and apply entries. It's only safe
/// to commit this if the local log matches last_index, such that the
/// follower's log is identical to the leader at the commit index.
commit_index: Index,
/// The leader's latest read sequence number in this term.
read_seq: ReadSequence,
},
/// Followers respond to leader heartbeats if they still consider it leader.
HeartbeatResponse {
/// If non-zero, the heartbeat's last_index which was matched in the
/// follower's log. Otherwise, the follower is either divergent or
/// lagging behind the leader.
match_index: Index,
/// The heartbeat's read sequence number.
read_seq: ReadSequence,
},
/// Leaders replicate log entries to followers by appending to their logs
/// after the given base entry.
///
/// If the base entry matches the follower's log then their logs are
/// identical up to it (see section 5.3 in the Raft paper), and the entries
/// can be appended -- possibly replacing conflicting entries. Otherwise,
/// the append is rejected and the leader must retry an earlier base index
/// until a common base is found.
///
/// Empty appends messages (no entries) are used to probe follower logs for
/// a common match index in the case of divergent logs, restarted nodes, or
/// dropped messages. This is typically done by sending probes with a
/// decrementing base index until a match is found, at which point the
/// subsequent entries can be sent.
Append {
/// The index of the log entry to append after.
base_index: Index,
/// The term of the base entry.
base_term: Term,
/// Log entries to append. Must start at base_index + 1.
entries: Vec<Entry>,
},
/// Followers accept or reject appends from the leader depending on whether
/// the base entry matches their log.
AppendResponse {
/// If non-zero, the follower appended entries up to this index. The
/// entire log up to this index is consistent with the leader. If no
/// entries were sent (a probe), this will be the matching base index.
match_index: Index,
/// If non-zero, the follower rejected an append at this base index
/// because the base index/term did not match its log. If the follower's
/// log is shorter than the base index, the reject index will be lowered
/// to the index after its last local index, to avoid probing each
/// missing index.
reject_index: Index,
},
/// Leaders need to confirm they are still the leader before serving reads,
/// to guarantee linearizability in case a different leader has been
/// estalished elsewhere. Read requests are served once the sequence number
/// has been confirmed by a quorum.
Read { seq: ReadSequence },
/// Followers confirm leadership at the read sequence numbers.
ReadResponse { seq: ReadSequence },
/// A client request. This can be submitted to the leader, or to a follower
/// which will forward it to its leader. If there is no leader, or the
/// leader or term changes, the request is aborted with an Error::Abort
/// ClientResponse and the client must retry.
ClientRequest {
/// The request ID. Must be globally unique for the request duration.
id: RequestID,
/// The request itself.
request: Request,
},
/// A client response.
ClientResponse {
/// The ID of the original ClientRequest.
id: RequestID,
/// The response, or an error.
response: Result<Response>,
},
}
/// A client request ID. Must be globally unique for the duration of the
/// request. For simplicity, a random UUIDv4 is used -- the node ID and process
/// ID could be incorporated for further collision avoidance, but it does not
/// matter at this scale.
pub type RequestID = uuid::Uuid;
/// A read sequence number, used to confirm leadership for linearizable reads.
pub type ReadSequence = u64;
/// A client request, typically passed through to the state machine.
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
pub enum Request {
/// A state machine read command, executed via `State::read`. This is not
/// replicated, and only evaluated on the leader.
Read(Vec<u8>),
/// A state machine write command, executed via `State::apply`. This is
/// replicated across all nodes, and must produce a deterministic result.
Write(Vec<u8>),
/// Requests Raft cluster status from the leader.
Status,
}
impl encoding::Value for Request {}
/// A client response. This will be wrapped in a Result for error handling.
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
pub enum Response {
/// A state machine read result.
Read(Vec<u8>),
/// A state machine write result.
Write(Vec<u8>),
/// The current Raft leader status.
Status(Status),
}
impl encoding::Value for Response {}
/// Raft cluster status. Generated by the leader.
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
pub struct Status {
/// The current Raft leader, which generated this status.
pub leader: NodeID,
/// The current Raft term.
pub term: Term,
/// The match indexes of all nodes, indicating replication progress. Uses a
/// BTreeMap for test determinism.
pub match_index: std::collections::BTreeMap<NodeID, Index>,
/// The current commit index.
pub commit_index: Index,
/// The current applied index.
pub applied_index: Index,
/// The log storage engine status.
pub storage: storage::Status,
}