Skip to content

Commit

Permalink
Improve emitted change feed sequence after a split
Browse files Browse the repository at this point in the history
When we get sequence before the split, we'll fill in the missing (now split)
ranges with a special `{split, OldNodeUUid}` marker. However, when sequences
are emitted in the changes API, that will make the N- prefix (SeqSum) bounce
around from higher to lower numbers, while users expect those to be mostly
incrementing. So take a conservative approach and assume it will be a full
rewind for that range, and use 0 instead. This is a purely cosmetic thing, when
we decode the sequence that prefix gets thrown away anyway.

To emphasize that the sequence prefix is mostly a visual aid, rename the seq/1
function to fake_packed_seq/1 with a comment about what it does and a note
about the special split case.

Fixes a part of issue: #4640
  • Loading branch information
nickva committed Jun 16, 2023
1 parent bca3ab0 commit 484cdac
Showing 1 changed file with 33 additions and 4 deletions.
37 changes: 33 additions & 4 deletions src/fabric/src/fabric_view_changes.erl
Original file line number Diff line number Diff line change
Expand Up @@ -421,13 +421,28 @@ pending_count(Dict) ->

pack_seqs(Workers) ->
SeqList = [{N, R, S} || {#shard{node = N, range = R}, S} <- Workers],
SeqSum = lists:sum([seq(S) || {_, _, S} <- SeqList]),
SeqSum = lists:sum([fake_packed_seq(S) || {_, _, S} <- SeqList]),
Opaque = couch_util:encodeBase64Url(?term_to_bin(SeqList, [compressed])),
?l2b([integer_to_list(SeqSum), $-, Opaque]).

seq({Seq, _Uuid, _Node}) -> Seq;
seq({Seq, _Uuid}) -> Seq;
seq(Seq) -> Seq.
% Generate the sequence number used to build the emitted N-... prefix.
%
% The prefix is discarded when the sequence is decoded in the current CouchDB
% version. It's mostly there as a visual heuristic about all the packed
% sequences and to keep backwards compatibility with previous CouchDB versions.
%
% We handle of few backwards compatibility clauses, and also the when we get a
% sequence before a shard split. In that case we fill in the missing (now
% split) ranges with a special {split, OldNodeUUid} marker. However, when
% sequences are emitted, that will make the N- prefix (SeqSum) bounce around
% from higher to lower numbers if we leave it as is. To fix that, use 0 for
% that range, assuming it will be full rewind. That way the N-... prefix is
% more likely to be incrementing, which is what users would generally expect.
%
fake_packed_seq({Seq, {split, <<_/binary>>}, _Node}) when is_integer(Seq) -> 0;
fake_packed_seq({Seq, _Uuid, _Node}) -> Seq;
fake_packed_seq({Seq, _Uuid}) -> Seq;
fake_packed_seq(Seq) -> Seq.

unpack_seq_regex_match(Packed) ->
Pattern = "^\"?([0-9]+-)?(?<opaque>.*?)\"?$",
Expand Down Expand Up @@ -1065,4 +1080,18 @@ find_replacement_sequence_test() ->
?assertEqual(0, find_replacement_sequence(Dead4, [0, 10])),
?assertEqual(0, find_replacement_sequence(Dead4, [0, 5])).

pack_split_seq_test() ->
Shards = [{"n1", 0, 10}, {"n2", 11, ?RING_END}],
Workers = mk_workers(Shards, {42, {split, <<"abc1234">>}, '[email protected]'}),
PackedSeq = pack_seqs(Workers),
?assertMatch(<<"0-", _/binary>>, PackedSeq),
DecodedSeq = decode_seq(PackedSeq),
?assertEqual(
[
{n1, [0, 10], {42, {split, <<"abc1234">>}, '[email protected]'}},
{n2, [11, 4294967295], {42, {split, <<"abc1234">>}, '[email protected]'}}
],
DecodedSeq
).

-endif.

0 comments on commit 484cdac

Please sign in to comment.