From fe899d0cdf767a7289a8bf746b7f72c2907a1b4b Mon Sep 17 00:00:00 2001 From: pgjones Date: Sun, 29 Jan 2023 15:17:37 +0000 Subject: [PATCH 1/3] limit the maximum number of multipart form parts Add a limit to the number of multipart form data parts the parser will attempt to parse. If the limit is exceeded, it raises `RequestEntityTooLargeError`. A default of 1000 seems large enough to allow legitimate use cases while preventing the previous unlimited parsing. This differs from similar settings that are unset by default, as I think safe defaults are better practice. The limit can be adjusted per request by changing it on the request object before parsing. For example, it can be set based on what you expect a given endpoint to handle. ```python req.max_form_parts = 20 form = req.form ``` --- CHANGES.rst | 4 ++++ src/werkzeug/formparser.py | 13 ++++++++++++- src/werkzeug/sansio/multipart.py | 7 +++++++ src/werkzeug/wrappers/request.py | 11 +++++++++++ tests/test_formparser.py | 9 +++++++++ 5 files changed, 43 insertions(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index 470f70cf7..0d89c6d01 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -21,6 +21,10 @@ Unreleased the requested size in one ``read`` call. :issue:`2558` - A cookie header that starts with ``=`` is treated as an empty key and discarded, rather than stripping the leading ``==``. +- Specify a maximum number of multipart parts, default 100, after + which a RequestEntityTooLarge exception is raised on parsing. The + mitigates a DOS attack whereby a larger number file/form parts are + sent resulting in a heavy parsing cost. Version 2.2.2 diff --git a/src/werkzeug/formparser.py b/src/werkzeug/formparser.py index 10d58ca3f..820190be2 100644 --- a/src/werkzeug/formparser.py +++ b/src/werkzeug/formparser.py @@ -179,6 +179,10 @@ class FormDataParser: :param cls: an optional dict class to use. If this is not specified or `None` the default :class:`MultiDict` is used. :param silent: If set to False parsing errors will not be caught. + :param max_form_parts: the maximum number of parts to be accepted for the + multipart data sent. If this is exceeded an + :exc:`~exceptions.RequestEntityTooLarge` exception + is raised. """ def __init__( @@ -190,6 +194,7 @@ def __init__( max_content_length: t.Optional[int] = None, cls: t.Optional[t.Type[MultiDict]] = None, silent: bool = True, + max_form_parts: t.Optional[int] = None, ) -> None: if stream_factory is None: stream_factory = default_stream_factory @@ -205,6 +210,7 @@ def __init__( self.cls = cls self.silent = silent + self.max_form_parts = max_form_parts def get_parse_func( self, mimetype: str, options: t.Dict[str, str] @@ -281,6 +287,7 @@ def _parse_multipart( self.errors, max_form_memory_size=self.max_form_memory_size, cls=self.cls, + max_form_parts=self.max_form_parts, ) boundary = options.get("boundary", "").encode("ascii") @@ -346,10 +353,12 @@ def __init__( max_form_memory_size: t.Optional[int] = None, cls: t.Optional[t.Type[MultiDict]] = None, buffer_size: int = 64 * 1024, + max_form_parts: t.Optional[int] = None, ) -> None: self.charset = charset self.errors = errors self.max_form_memory_size = max_form_memory_size + self.max_form_parts = max_form_parts if stream_factory is None: stream_factory = default_stream_factory @@ -409,7 +418,9 @@ def parse( [None], ) - parser = MultipartDecoder(boundary, self.max_form_memory_size) + parser = MultipartDecoder( + boundary, self.max_form_memory_size, self.max_form_parts + ) fields = [] files = [] diff --git a/src/werkzeug/sansio/multipart.py b/src/werkzeug/sansio/multipart.py index d8abeb354..d90b445b9 100644 --- a/src/werkzeug/sansio/multipart.py +++ b/src/werkzeug/sansio/multipart.py @@ -87,10 +87,12 @@ def __init__( self, boundary: bytes, max_form_memory_size: Optional[int] = None, + max_parts: Optional[int] = None, ) -> None: self.buffer = bytearray() self.complete = False self.max_form_memory_size = max_form_memory_size + self.max_parts = max_parts self.state = State.PREAMBLE self.boundary = boundary @@ -118,6 +120,7 @@ def __init__( re.MULTILINE, ) self._search_position = 0 + self._parts_decoded = 0 def last_newline(self) -> int: try: @@ -191,6 +194,10 @@ def next_event(self) -> Event: ) self.state = State.DATA self._search_position = 0 + self._parts_decoded += 1 + + if self.max_parts is not None and self._parts_decoded > self.max_parts: + raise RequestEntityTooLarge() else: # Update the search start position to be equal to the # current buffer length (already searched) minus a diff --git a/src/werkzeug/wrappers/request.py b/src/werkzeug/wrappers/request.py index 283e59229..fa5b18586 100644 --- a/src/werkzeug/wrappers/request.py +++ b/src/werkzeug/wrappers/request.py @@ -83,6 +83,16 @@ class Request(_SansIORequest): #: .. versionadded:: 0.5 max_form_memory_size: t.Optional[int] = None + #: the maximum number of multipart parts. This is forwarded to teh + #: form data parsing function (:func:`parse_form_data`). When the + #: :attr:`form` or :attr:`files` attribute is accessed and the + #: parsing fails because more parts than the specified value is + #: transmitted a :exc:`~werkzeug.exceptions.RequestEntityTooLarge` + #: exception is raised. + #: + #: .. versionadded:: 2.2.3 + max_form_parts = 1000 + #: The form data parser that should be used. Can be replaced to customize #: the form date parsing. form_data_parser_class: t.Type[FormDataParser] = FormDataParser @@ -246,6 +256,7 @@ def make_form_data_parser(self) -> FormDataParser: self.max_form_memory_size, self.max_content_length, self.parameter_storage_class, + max_form_parts=self.max_form_parts, ) def _load_form_data(self) -> None: diff --git a/tests/test_formparser.py b/tests/test_formparser.py index 49010b46c..4c518b14b 100644 --- a/tests/test_formparser.py +++ b/tests/test_formparser.py @@ -127,6 +127,15 @@ def test_limiting(self): req.max_form_memory_size = 400 assert req.form["foo"] == "Hello World" + req = Request.from_values( + input_stream=io.BytesIO(data), + content_length=len(data), + content_type="multipart/form-data; boundary=foo", + method="POST", + ) + req.max_form_parts = 1 + pytest.raises(RequestEntityTooLarge, lambda: req.form["foo"]) + def test_missing_multipart_boundary(self): data = ( b"--foo\r\nContent-Disposition: form-field; name=foo\r\n\r\n" From 09449ee77934a0c883f5959785864ecae6aaa2c9 Mon Sep 17 00:00:00 2001 From: David Lord Date: Wed, 1 Feb 2023 10:31:05 -0800 Subject: [PATCH 2/3] clean up docs --- CHANGES.rst | 8 ++++---- src/werkzeug/formparser.py | 11 +++++------ src/werkzeug/sansio/multipart.py | 1 + src/werkzeug/wrappers/request.py | 9 +++------ 4 files changed, 13 insertions(+), 16 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 0d89c6d01..2e408af14 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -21,10 +21,10 @@ Unreleased the requested size in one ``read`` call. :issue:`2558` - A cookie header that starts with ``=`` is treated as an empty key and discarded, rather than stripping the leading ``==``. -- Specify a maximum number of multipart parts, default 100, after - which a RequestEntityTooLarge exception is raised on parsing. The - mitigates a DOS attack whereby a larger number file/form parts are - sent resulting in a heavy parsing cost. +- Specify a maximum number of multipart parts, default 1000, after which a + ``RequestEntityTooLarge`` exception is raised on parsing. This mitigates a DoS + attack where a larger number of form/file parts would result in disproportionate + resource use. Version 2.2.2 diff --git a/src/werkzeug/formparser.py b/src/werkzeug/formparser.py index 820190be2..bebb2fc8f 100644 --- a/src/werkzeug/formparser.py +++ b/src/werkzeug/formparser.py @@ -179,10 +179,8 @@ class FormDataParser: :param cls: an optional dict class to use. If this is not specified or `None` the default :class:`MultiDict` is used. :param silent: If set to False parsing errors will not be caught. - :param max_form_parts: the maximum number of parts to be accepted for the - multipart data sent. If this is exceeded an - :exc:`~exceptions.RequestEntityTooLarge` exception - is raised. + :param max_form_parts: The maximum number of parts to be parsed. If this is + exceeded, a :exc:`~exceptions.RequestEntityTooLarge` exception is raised. """ def __init__( @@ -194,6 +192,7 @@ def __init__( max_content_length: t.Optional[int] = None, cls: t.Optional[t.Type[MultiDict]] = None, silent: bool = True, + *, max_form_parts: t.Optional[int] = None, ) -> None: if stream_factory is None: @@ -204,13 +203,13 @@ def __init__( self.errors = errors self.max_form_memory_size = max_form_memory_size self.max_content_length = max_content_length + self.max_form_parts = max_form_parts if cls is None: cls = MultiDict self.cls = cls self.silent = silent - self.max_form_parts = max_form_parts def get_parse_func( self, mimetype: str, options: t.Dict[str, str] @@ -419,7 +418,7 @@ def parse( ) parser = MultipartDecoder( - boundary, self.max_form_memory_size, self.max_form_parts + boundary, self.max_form_memory_size, max_parts=self.max_form_parts ) fields = [] diff --git a/src/werkzeug/sansio/multipart.py b/src/werkzeug/sansio/multipart.py index d90b445b9..2684e5dd6 100644 --- a/src/werkzeug/sansio/multipart.py +++ b/src/werkzeug/sansio/multipart.py @@ -87,6 +87,7 @@ def __init__( self, boundary: bytes, max_form_memory_size: Optional[int] = None, + *, max_parts: Optional[int] = None, ) -> None: self.buffer = bytearray() diff --git a/src/werkzeug/wrappers/request.py b/src/werkzeug/wrappers/request.py index fa5b18586..2de77df42 100644 --- a/src/werkzeug/wrappers/request.py +++ b/src/werkzeug/wrappers/request.py @@ -83,12 +83,9 @@ class Request(_SansIORequest): #: .. versionadded:: 0.5 max_form_memory_size: t.Optional[int] = None - #: the maximum number of multipart parts. This is forwarded to teh - #: form data parsing function (:func:`parse_form_data`). When the - #: :attr:`form` or :attr:`files` attribute is accessed and the - #: parsing fails because more parts than the specified value is - #: transmitted a :exc:`~werkzeug.exceptions.RequestEntityTooLarge` - #: exception is raised. + #: The maximum number of multipart parts to parse, passed to + #: :attr:`form_data_parser_class`. Parsing form data with more than this + #: many parts will raise :exc:`~.RequestEntityTooLarge`. #: #: .. versionadded:: 2.2.3 max_form_parts = 1000 From babc8d9e8c9fa995ef26050698bc9b5a92803664 Mon Sep 17 00:00:00 2001 From: David Lord Date: Thu, 2 Feb 2023 09:19:15 -0800 Subject: [PATCH 3/3] rewrite docs about request data limits --- docs/request_data.rst | 37 ++++++++++++++++++++----------------- 1 file changed, 20 insertions(+), 17 deletions(-) diff --git a/docs/request_data.rst b/docs/request_data.rst index 83c627804..e55841e34 100644 --- a/docs/request_data.rst +++ b/docs/request_data.rst @@ -73,23 +73,26 @@ read the stream *or* call :meth:`~Request.get_data`. Limiting Request Data --------------------- -To avoid being the victim of a DDOS attack you can set the maximum -accepted content length and request field sizes. The :class:`Request` -class has two attributes for that: :attr:`~Request.max_content_length` -and :attr:`~Request.max_form_memory_size`. - -The first one can be used to limit the total content length. For example -by setting it to ``1024 * 1024 * 16`` the request won't accept more than -16MB of transmitted data. - -Because certain data can't be moved to the hard disk (regular post data) -whereas temporary files can, there is a second limit you can set. The -:attr:`~Request.max_form_memory_size` limits the size of `POST` -transmitted form data. By setting it to ``1024 * 1024 * 2`` you can make -sure that all in memory-stored fields are not more than 2MB in size. - -This however does *not* affect in-memory stored files if the -`stream_factory` used returns a in-memory file. +The :class:`Request` class provides a few attributes to control how much data is +processed from the request body. This can help mitigate DoS attacks that craft the +request in such a way that the server uses too many resources to handle it. Each of +these limits will raise a :exc:`~werkzeug.exceptions.RequestEntityTooLarge` if they are +exceeded. + +- :attr:`~Request.max_content_length` Stop reading request data after this number + of bytes. It's better to configure this in the WSGI server or HTTP server, rather + than the WSGI application. +- :attr:`~Request.max_form_memory_size` Stop reading request data if any form part is + larger than this number of bytes. While file parts can be moved to disk, regular + form field data is stored in memory only. +- :attr:`~Request.max_form_parts` Stop reading request data if more than this number + of parts are sent in multipart form data. This is useful to stop a very large number + of very small parts, especially file parts. The default is 1000. + +Using Werkzeug to set these limits is only one layer of protection. WSGI servers +and HTTPS servers should set their own limits on size and timeouts. The operating system +or container manager should set limits on memory and processing time for server +processes. How to extend Parsing?