Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DISCUSS] Validate new document writes against max_http_request_size #1253

Open
wants to merge 9 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion rel/overlay/etc/default.ini
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ enable_xframe_options = false
; x_forwarded_proto = X-Forwarded-Proto
; x_forwarded_ssl = X-Forwarded-Ssl
; Maximum allowed http request size. Applies to both clustered and local port.
max_http_request_size = 67108864 ; 64 MB
max_http_request_size = 4294967296 ; 2 GB
wohali marked this conversation as resolved.
Show resolved Hide resolved

; [httpd_design_handlers]
; _view =
Expand Down
6 changes: 5 additions & 1 deletion src/couch/src/couch_att.erl
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,8 @@

-export([
upgrade/1,
downgrade/1
downgrade/1,
to_tuple/1
]).

-export([
Expand Down Expand Up @@ -708,6 +709,9 @@ upgrade(#att{} = Att) ->
upgrade(Att) ->
Att.

to_tuple(#att{name=Name, att_len=Len, type=Type, encoding=Encoding}) ->
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not keen on the name. a record already is a tuple, so I'm guessing the only purpose here is to ignore other fields (and any new fields).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The only reason this exists is because couch_multipart_httpd:length_multipart_stream() makes assumptions about the format of the stub. This is usually called from chttpd. I didn’t find a way around this without exposing couch_att’s att# record.

But yeah, I’m very okay with any other name here. What would be better?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[16:26:10]  <+jan____>	rnewson: do you have a better name for to_tuple in https://github.com/apache/couchdb/pull/1253/files#r178029434?
[16:27:14]  <+rnewson>	I don't, proceed with to_tuple.

{att, Name, Len, Type, Encoding}.


%% Downgrade is exposed for interactive convenience. In practice, unless done
%% manually, upgrades are always one-way.
Expand Down
15 changes: 15 additions & 0 deletions src/couch/src/couch_doc.erl
Original file line number Diff line number Diff line change
Expand Up @@ -136,12 +136,27 @@ from_json_obj_validate(EJson, DbName) ->
case couch_ejson_size:encoded_size(Doc#doc.body) =< MaxSize of
true ->
validate_attachment_sizes(Doc#doc.atts),
validate_total_document_size(Doc),
Doc;
false ->
throw({request_entity_too_large, Doc#doc.id})
end.


% sum up the json body size + attachment body size and
% make sure it is < max_http_request_size
validate_total_document_size(#doc{id=DocId, body=Body, atts=Atts0}) ->
MaxReqSize = config:get_integer("httpd", "max_http_request_size", 4294967296), % 2 GB
Boundary = couch_uuids:random(), % mock boundary, is only used for the length
wohali marked this conversation as resolved.
Show resolved Hide resolved
Atts = lists:map(fun couch_att:to_tuple/1, Atts0),
{_, DocSum} = couch_httpd_multipart:length_multipart_stream(Boundary,
?JSON_ENCODE(Body), Atts),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When we re-encode, we use jiffy, but unless it is a request made by the replicator, the user probably didn't use erlang to encode the data, so we could get a different value. There is also some performance loss in say re-encoding larger document bodies back to json just to check their size.

There is no canonical json encoding and so no canonical encoded json size. Above we are calculating the encoded size using a conservative size estimate (giving the user a benefit of the doubt) and with better performance (https://github.com/nickva/couch_ebench). Maybe make a version of length calculation that takes sizes and then we'd pass the already computed couch_ejson_size:encoded_size(Doc#doc.body) and 32 for boundary size.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks @nickva, I’ll give this a ponder!

case DocSum =< MaxReqSize of
true -> ok;
false -> throw({request_entity_too_large, DocId})
end.


validate_attachment_sizes([]) ->
ok;
validate_attachment_sizes(Atts) ->
Expand Down
88 changes: 86 additions & 2 deletions src/couch/test/couch_doc_json_tests.erl
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,11 @@ mock(couch_log) ->
ok;
mock(config) ->
meck:new(config, [passthrough]),
meck:expect(config, get_integer,
fun("couchdb", "max_document_size", 4294967296) -> 1024 end),
meck:expect(config, get_integer, fun
("couchdb", "max_document_size", 4294967296) -> 1024;
("httpd", "max_http_request_size", 4294967296) -> 1024
end),

meck:expect(config, get, fun(_, _) -> undefined end),
meck:expect(config, get, fun(_, _, Default) -> Default end),
ok.
Expand Down Expand Up @@ -124,6 +127,44 @@ from_json_success_cases() ->
]},
"Attachments are parsed correctly."
},
% see if we count our bytes correctly. This doc should be *exactly* 1024 bytes
{
{[
{<<"_attachments">>, {[
{<<"big.xml">>, {[
{<<"content_type">>, <<"xml/yay">>},
{<<"revpos">>, 1},
{<<"length">>, 319},
{<<"stub">>, true}
]}},
{<<"big.json">>, {[
{<<"content_type">>, <<"json/ftw">>},
{<<"revpos">>, 1},
{<<"length">>, 319},
{<<"stub">>, true}
]}}
]}}
]},
#doc{atts = [
couch_att:new([
{name, <<"big.xml">>},
{data, stub},
{type, <<"xml/yay">>},
{att_len, 319},
{disk_len, 319},
{revpos, 1}
]),
couch_att:new([
{name, <<"big.json">>},
{data, stub},
{type, <<"json/ftw">>},
{att_len, 319},
{disk_len, 319},
{revpos, 1}
])
]},
"Document and attachments == max_http_request_size"
},
{
{[{<<"_deleted">>, true}]},
#doc{deleted = true},
Expand Down Expand Up @@ -281,6 +322,49 @@ from_json_error_cases() ->
end,
{request_entity_too_large, <<"large_doc">>},
"Document too large."
},
% doc json body and each attachment are small enough, but combined are >
% max_http_request_size
{
{[
{<<"_id">>, <<"normal_doc_with_atts">>},
{<<"_attachments">>, {[
{<<"big.xml">>, {[
{<<"content_type">>, <<"xml/yay">>},
{<<"revpos">>, 1},
{<<"length">>, 768},
{<<"stub">>, true}
]}},
{<<"big.json">>, {[
{<<"content_type">>, <<"json/ftw">>},
{<<"revpos">>, 1},
{<<"length">>, 768},
{<<"stub">>, true}
]}}
]}}
]},
{request_entity_too_large, <<"normal_doc_with_atts">>},
"Document too large because of attachments."
},
% see if we count our bytes correctly. This doc should be *exactly* 1025 bytes
{
{[
{<<"_attachments">>, {[
{<<"big.xml">>, {[
{<<"content_type">>, <<"xml/yay">>},
{<<"revpos">>, 1},
{<<"length">>, 320},
{<<"stub">>, true}
]}},
{<<"big.json">>, {[
{<<"content_type">>, <<"json/ftw">>},
{<<"revpos">>, 1},
{<<"length">>, 319},
{<<"stub">>, true}
]}}
]}}
]},
"Document and attachments == max_http_request_size + 1"
}
],

Expand Down