Skip to content

Commit

Permalink
Ensure deterministic revisions for attachments
Browse files Browse the repository at this point in the history
This re-fixes a corner case when recreating a document with an
attachment in a single multipart request. Since we don't detect that we
need a new revision until after the document has been serialized we need
to be able to deserialize the body so that we can generate the same
revisions regardless of the contents of the database. If we don't do
this then we end up including information from the position of the
attachment on disk in the revision calculation which can introduce
branches in the revision tree.

I've left this as a separate commit from the pluggable storage engine
work so that its called out clearly for us to revisit.

COUCHDB-3255
  • Loading branch information
davisp committed May 15, 2017
1 parent 4211100 commit 6375cd4
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 13 deletions.
10 changes: 9 additions & 1 deletion src/couch/src/couch_bt_engine.erl
Original file line number Diff line number Diff line change
Expand Up @@ -325,7 +325,15 @@ serialize_doc(#st{} = St, #doc{} = Doc) ->
SummaryBin = ?term_to_bin({Body, Atts}),
Md5 = couch_crypto:hash(md5, SummaryBin),
Data = couch_file:assemble_file_chunk(SummaryBin, Md5),
Doc#doc{body = Data}.
% TODO: This is a terrible hack to get around the issues
% in COUCHDB-3255. We'll need to come back and figure
% out a better approach to handling the case when we
% need to generate a new revision id after the doc
% has been serialized.
Doc#doc{
body = Data,
meta = [{comp_body, Body} | Doc#doc.meta]
}.


write_doc_body(St, #doc{} = Doc) ->
Expand Down
12 changes: 1 addition & 11 deletions src/couch/src/couch_db.erl
Original file line number Diff line number Diff line change
Expand Up @@ -942,24 +942,14 @@ prep_and_validate_replicated_updates(Db, [Bucket|RestBuckets], [OldInfo|RestOldI



new_revid(#doc{body=Body0, revs={OldStart,OldRevs}, atts=Atts, deleted=Deleted}) ->
new_revid(#doc{body=Body, revs={OldStart,OldRevs}, atts=Atts, deleted=Deleted}) ->
DigestedAtts = lists:foldl(fun(Att, Acc) ->
[N, T, M] = couch_att:fetch([name, type, md5], Att),
case M == <<>> of
true -> Acc;
false -> [{N, T, M} | Acc]
end
end, [], Atts),
Body = case Body0 of
{summary, [_Len, _Md5, BodyAtts], _SizeInfo, _AttsFd} ->
{CompBody, _CompAtts} = binary_to_term(BodyAtts),
couch_compress:decompress(CompBody);
{summary, [_Len, BodyAtts], _SizeInfo, _AttsFd} ->
{CompBody, _CompAtts} = binary_to_term(BodyAtts),
couch_compress:decompress(CompBody);
Else ->
Else
end,
case DigestedAtts of
Atts2 when length(Atts) =/= length(Atts2) ->
% We must have old style non-md5 attachments
Expand Down
12 changes: 11 additions & 1 deletion src/couch/src/couch_db_updater.erl
Original file line number Diff line number Diff line change
Expand Up @@ -525,7 +525,17 @@ merge_rev_tree(OldInfo, NewDoc, Client, Limit, false)
% Update the new doc based on revisions in OldInfo
#doc_info{revs=[WinningRev | _]} = couch_doc:to_doc_info(OldInfo),
#rev_info{rev={OldPos, OldRev}} = WinningRev,
NewRevId = couch_db:new_revid(NewDoc#doc{revs={OldPos, [OldRev]}}),
Body = case couch_util:get_value(comp_body, NewDoc#doc.meta) of
CompBody when is_binary(CompBody) ->
couch_compress:decompress(CompBody);
_ ->
NewDoc#doc.body
end,
RevIdDoc = NewDoc#doc{
revs = {OldPos, [OldRev]},
body = Body
},
NewRevId = couch_db:new_revid(RevIdDoc),
NewDoc2 = NewDoc#doc{revs={OldPos + 1, [NewRevId, OldRev]}},

% Merge our modified new doc into the tree
Expand Down

0 comments on commit 6375cd4

Please sign in to comment.