LibWeb: Support unbuffered fetch requests

Supporting unbuffered fetches is actually part of the fetch spec in its HTTP-network-fetch algorithm. We had previously implemented this method in a very ad-hoc manner as a simple wrapper around ResourceLoader. This is still the case, but we now implement a good amount of these steps according to spec, using ResourceLoader's unbuffered API. The response data is forwarded through to the fetch response using streams. This will eventually let us remove the use of ResourceLoader's buffered API, as all responses should just be streamed this way. The streams spec then supplies ways to wait for completion, thus allowing fully buffered responses. However, we have more work to do to make the other parts of our fetch implementation (namely, Body::fully_read) use streams before we can do this.
Cnidarias · May 26, 2024 · 6056428 · 6056428
1 parent 1e97ae6
commit 6056428
Show file tree

Hide file tree

Showing 8 changed files with 266 additions and 8 deletions.
diff --git a/Meta/gn/secondary/Userland/Libraries/LibWeb/Fetch/Fetching/BUILD.gn b/Meta/gn/secondary/Userland/Libraries/LibWeb/Fetch/Fetching/BUILD.gn
@@ -3,6 +3,7 @@ source_set("Fetching") {
  deps = [ "//Userland/Libraries/LibWeb:all_generated" ]
  sources = [
  "Checks.cpp",
+ "FetchedDataReceiver.cpp",
  "Fetching.cpp",
  "PendingResponse.cpp",
  "RefCountedFlag.cpp",

diff --git a/Userland/Libraries/LibWeb/CMakeLists.txt b/Userland/Libraries/LibWeb/CMakeLists.txt
@@ -204,6 +204,7 @@ set(SOURCES
  Fetch/BodyInit.cpp
  Fetch/Enums.cpp
  Fetch/Fetching/Checks.cpp
+ Fetch/Fetching/FetchedDataReceiver.cpp
  Fetch/Fetching/Fetching.cpp
  Fetch/Fetching/PendingResponse.cpp
  Fetch/Fetching/RefCountedFlag.cpp

diff --git a/Userland/Libraries/LibWeb/Fetch/Fetching/FetchedDataReceiver.cpp b/Userland/Libraries/LibWeb/Fetch/Fetching/FetchedDataReceiver.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2024, Tim Flynn <[email protected]>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#include <LibJS/Heap/HeapFunction.h>
+#include <LibWeb/Bindings/ExceptionOrUtils.h>
+#include <LibWeb/Bindings/HostDefined.h>
+#include <LibWeb/Fetch/Fetching/FetchedDataReceiver.h>
+#include <LibWeb/Fetch/Infrastructure/FetchParams.h>
+#include <LibWeb/Fetch/Infrastructure/Task.h>
+#include <LibWeb/HTML/Scripting/ExceptionReporter.h>
+#include <LibWeb/HTML/Scripting/TemporaryExecutionContext.h>
+#include <LibWeb/Streams/AbstractOperations.h>
+#include <LibWeb/WebIDL/Promise.h>
+
+namespace Web::Fetch::Fetching {
+
+JS_DEFINE_ALLOCATOR(FetchedDataReceiver);
+
+FetchedDataReceiver::FetchedDataReceiver(JS::NonnullGCPtr<Infrastructure::FetchParams const> fetch_params, JS::NonnullGCPtr<Streams::ReadableStream> stream)
+ : m_fetch_params(fetch_params)
+ , m_stream(stream)
+{
+}
+
+FetchedDataReceiver::~FetchedDataReceiver() = default;
+
+void FetchedDataReceiver::visit_edges(Visitor& visitor)
+{
+ Base::visit_edges(visitor);
+ visitor.visit(m_fetch_params);
+ visitor.visit(m_stream);
+ visitor.visit(m_pending_promise);
+}
+
+void FetchedDataReceiver::set_pending_promise(JS::NonnullGCPtr<WebIDL::Promise> promise)
+{
+ auto had_pending_promise = m_pending_promise != nullptr;
+ m_pending_promise = promise;
+
+ if (!had_pending_promise && !m_buffer.is_empty()) {
+ on_data_received(m_buffer);
+ m_buffer.clear();
+ }
+}
+
+// This implements the parallel steps of the pullAlgorithm in HTTP-network-fetch.
+// https://fetch.spec.whatwg.org/#ref-for-in-parallel④
+void FetchedDataReceiver::on_data_received(ReadonlyBytes bytes)
+{
+ // FIXME: 1. If the size of buffer is smaller than a lower limit chosen by the user agent and the ongoing fetch
+ // is suspended, resume the fetch.
+ // FIXME: 2. Wait until buffer is not empty.
+
+ // If the remote end sends data immediately after we receive headers, we will often get that data here before the
+ // stream tasks have all been queued internally. Just hold onto that data.
+ if (!m_pending_promise) {
+ m_buffer.append(bytes);
+ return;
+ }
+
+ // 3. Queue a fetch task to run the following steps, with fetchParams’s task destination.
+ Infrastructure::queue_fetch_task(
+ m_fetch_params->controller(),
+ m_fetch_params->task_destination().get<JS::NonnullGCPtr<JS::Object>>(),
+ JS::create_heap_function(heap(), [this, bytes = MUST(ByteBuffer::copy(bytes))]() mutable {
+ HTML::TemporaryExecutionContext execution_context { Bindings::host_defined_environment_settings_object(m_stream->realm()), HTML::TemporaryExecutionContext::CallbacksEnabled::Yes };
+
+ // 1. Pull from bytes buffer into stream.
+ if (auto result = Streams::readable_stream_pull_from_bytes(m_stream, move(bytes)); result.is_error()) {
+ auto throw_completion = Bindings::dom_exception_to_throw_completion(m_stream->vm(), result.release_error());
+
+ dbgln("FetchedDataReceiver: Stream error pulling bytes");
+ HTML::report_exception(throw_completion, m_stream->realm());
+
+ return;
+ }
+
+ // 2. If stream is errored, then terminate fetchParams’s controller.
+ if (m_stream->is_errored())
+ m_fetch_params->controller()->terminate();
+
+ // 3. Resolve promise with undefined.
+ WebIDL::resolve_promise(m_stream->realm(), *m_pending_promise, JS::js_undefined());
+ }));
+}
+
+}
diff --git a/Userland/Libraries/LibWeb/Fetch/Fetching/FetchedDataReceiver.h b/Userland/Libraries/LibWeb/Fetch/Fetching/FetchedDataReceiver.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2024, Tim Flynn <[email protected]>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#pragma once
+
+#include <AK/ByteBuffer.h>
+#include <LibJS/Heap/Cell.h>
+#include <LibJS/Heap/CellAllocator.h>
+#include <LibWeb/Forward.h>
+
+namespace Web::Fetch::Fetching {
+
+class FetchedDataReceiver final : public JS::Cell {
+ JS_CELL(FetchedDataReceiver, JS::Cell);
+ JS_DECLARE_ALLOCATOR(FetchedDataReceiver);
+
+public:
+ virtual ~FetchedDataReceiver() override;
+
+ void set_pending_promise(JS::NonnullGCPtr<WebIDL::Promise>);
+ void on_data_received(ReadonlyBytes);
+
+private:
+ FetchedDataReceiver(JS::NonnullGCPtr<Infrastructure::FetchParams const>, JS::NonnullGCPtr<Streams::ReadableStream>);
+
+ virtual void visit_edges(Visitor& visitor) override;
+
+ JS::NonnullGCPtr<Infrastructure::FetchParams const> m_fetch_params;
+ JS::NonnullGCPtr<Streams::ReadableStream> m_stream;
+ JS::GCPtr<WebIDL::Promise> m_pending_promise;
+ ByteBuffer m_buffer;
+};
+
+}
diff --git a/Userland/Libraries/LibWeb/Fetch/Fetching/Fetching.cpp b/Userland/Libraries/LibWeb/Fetch/Fetching/Fetching.cpp
@@ -17,6 +17,7 @@
 #include <LibWeb/DOMURL/DOMURL.h>
 #include <LibWeb/Fetch/BodyInit.h>
 #include <LibWeb/Fetch/Fetching/Checks.h>
+#include <LibWeb/Fetch/Fetching/FetchedDataReceiver.h>
 #include <LibWeb/Fetch/Fetching/Fetching.h>
 #include <LibWeb/Fetch/Fetching/PendingResponse.h>
 #include <LibWeb/Fetch/Fetching/RefCountedFlag.h>
@@ -1962,8 +1963,10 @@ WebIDL::ExceptionOr<JS::NonnullGCPtr<PendingResponse>> nonstandard_resource_load
  load_request.set_url(request->current_url());
  load_request.set_page(page);
  load_request.set_method(ByteString::copy(request->method()));
+
  for (auto const& header : *request->header_list())
  load_request.set_header(ByteString::copy(header.name), ByteString::copy(header.value));
+
  if (auto const* body = request->body().get_pointer<JS::NonnullGCPtr<Infrastructure::Body>>()) {
  TRY((*body)->source().visit(
  [&](ByteBuffer const& byte_buffer) -> WebIDL::ExceptionOr<void> {
@@ -1981,13 +1984,121 @@ WebIDL::ExceptionOr<JS::NonnullGCPtr<PendingResponse>> nonstandard_resource_load
 
  auto pending_response = PendingResponse::create(vm, request);
 
- dbgln_if(WEB_FETCH_DEBUG, "Fetch: Invoking ResourceLoader");
- if constexpr (WEB_FETCH_DEBUG)
+ if constexpr (WEB_FETCH_DEBUG) {
+  dbgln("Fetch: Invoking ResourceLoader");
  log_load_request(load_request);
+ }
+
+ // FIXME: This check should be removed and all HTTP requests should go through the `ResourceLoader::load_unbuffered`
+ // path. The buffer option should then be supplied to the steps below that allow us to buffer data up to a
+ // user-agent-defined limit (or not). However, we will need to fully use stream operations throughout the
+ // fetch process to enable this (e.g. Body::fully_read must use streams for this to work).
+ if (request->buffer_policy() == Infrastructure::Request::BufferPolicy::DoNotBufferResponse) {
+ HTML::TemporaryExecutionContext execution_context { Bindings::host_defined_environment_settings_object(realm), HTML::TemporaryExecutionContext::CallbacksEnabled::Yes };
+
+ // 12. Let stream be a new ReadableStream.
+ auto stream = realm.heap().allocate<Streams::ReadableStream>(realm, realm);
+ auto fetched_data_receiver = realm.heap().allocate<FetchedDataReceiver>(realm, fetch_params, stream);
+
+ // 10. Let pullAlgorithm be the followings steps:
+ auto pull_algorithm = JS::create_heap_function(realm.heap(), [&realm, fetched_data_receiver]() {
+ // 1. Let promise be a new promise.
+ auto promise = WebIDL::create_promise(realm);
+
+ // 2. Run the following steps in parallel:
+ // NOTE: This is handled by FetchedDataReceiver.
+ fetched_data_receiver->set_pending_promise(promise);
+
+ // 3. Return promise.
+ return promise;
+ });
+
+ // 11. Let cancelAlgorithm be an algorithm that aborts fetchParams’s controller with reason, given reason.
+ auto cancel_algorithm = JS::create_heap_function(realm.heap(), [&realm, &fetch_params](JS::Value reason) {
+ fetch_params.controller()->abort(realm, reason);
+ return WebIDL::create_resolved_promise(realm, JS::js_undefined());
+ });
+
+ // 13. Set up stream with byte reading support with pullAlgorithm set to pullAlgorithm, cancelAlgorithm set to cancelAlgorithm.
+ Streams::set_up_readable_stream_controller_with_byte_reading_support(stream, pull_algorithm, cancel_algorithm);
+
+ auto on_headers_received = [&vm, request, pending_response, stream](auto const& response_headers, Optional<u32> status_code) {
+ if (pending_response->is_resolved()) {
+ // RequestServer will send us the response headers twice, the second time being for HTTP trailers. This
+ // fetch algorithm is not interested in trailers, so just drop them here.
+ return;
+ }
+
+ auto response = Infrastructure::Response::create(vm);
+ response->set_status(status_code.value_or(200));
+ // FIXME: Set response status message
+
+ if constexpr (WEB_FETCH_DEBUG) {
+ dbgln("Fetch: ResourceLoader load for '{}' {}: (status {})",
+ request->url(),
+ Infrastructure::is_ok_status(response->status()) ? "complete"sv : "failed"sv,
+ response->status());
+ log_response(status_code, response_headers, ReadonlyBytes {});
+ }
+
+ for (auto const& [name, value] : response_headers) {
+ auto header = Infrastructure::Header::from_string_pair(name, value);
+ response->header_list()->append(move(header));
+ }
+
+ // 14. Set response’s body to a new body whose stream is stream.
+ response->set_body(Infrastructure::Body::create(vm, stream));
+
+ // 17. Return response.
+ // NOTE: Typically response’s body’s stream is still being enqueued to after returning.
+ pending_response->resolve(response);
+ };
+
+ // 16. Run these steps in parallel:
+ // FIXME: 1. Run these steps, but abort when fetchParams is canceled:
+ auto on_data_received = [fetched_data_receiver](auto bytes) {
+ // 1. If one or more bytes have been transmitted from response’s message body, then:
+ if (!bytes.is_empty()) {
+ // 1. Let bytes be the transmitted bytes.
+
+ // FIXME: 2. Let codings be the result of extracting header list values given `Content-Encoding` and response’s header list.
+ // FIXME: 3. Increase response’s body info’s encoded size by bytes’s length.
+ // FIXME: 4. Set bytes to the result of handling content codings given codings and bytes.
+ // FIXME: 5. Increase response’s body info’s decoded size by bytes’s length.
+ // FIXME: 6. If bytes is failure, then terminate fetchParams’s controller.
+
+ // 7. Append bytes to buffer.
+ fetched_data_receiver->on_data_received(bytes);
+
+ // FIXME: 8. If the size of buffer is larger than an upper limit chosen by the user agent, ask the user agent
+ // to suspend the ongoing fetch.
+ }
+ };
+
+ auto on_complete = [&vm, &realm, pending_response, stream](auto success, auto error_message) {
+ HTML::TemporaryExecutionContext execution_context { Bindings::host_defined_environment_settings_object(realm), HTML::TemporaryExecutionContext::CallbacksEnabled::Yes };
+
+ // 16.1.1.2. Otherwise, if the bytes transmission for response’s message body is done normally and stream is readable,
+ // then close stream, and abort these in-parallel steps.
+ if (success) {
+ if (stream->is_readable())
+ stream->close();
+ }
+ // 16.1.2.2. Otherwise, if stream is readable, error stream with a TypeError.
+ else {
+ auto error = MUST(String::formatted("Load failed: {}", error_message));
 
- ResourceLoader::the().load(
- load_request,
- [&realm, &vm, request, pending_response](auto data, auto& response_headers, auto status_code) {
+ if (stream->is_readable())
+ stream->error(JS::TypeError::create(realm, error));
+
+ if (!pending_response->is_resolved())
+ pending_response->resolve(Infrastructure::Response::network_error(vm, error));
+ }
+ };
+
+ ResourceLoader::the().load_unbuffered(load_request, move(on_headers_received), move(on_data_received), move(on_complete));
+ } else {
+ auto on_load_success = [&realm, &vm, request, pending_response](auto data, auto& response_headers, auto status_code) {
  dbgln_if(WEB_FETCH_DEBUG, "Fetch: ResourceLoader load for '{}' complete", request->url());
  if constexpr (WEB_FETCH_DEBUG)
  log_response(status_code, response_headers, data);
@@ -2001,8 +2112,9 @@ WebIDL::ExceptionOr<JS::NonnullGCPtr<PendingResponse>> nonstandard_resource_load
  }
  // FIXME: Set response status message
  pending_response->resolve(response);
- },
- [&realm, &vm, request, pending_response](auto& error, auto status_code, auto data, auto& response_headers) {
+ };
+
+ auto on_load_error = [&realm, &vm, request, pending_response](auto& error, auto status_code, auto data, auto& response_headers) {
  dbgln_if(WEB_FETCH_DEBUG, "Fetch: ResourceLoader load for '{}' failed: {} (status {})", request->url(), error, status_code.value_or(0));
  if constexpr (WEB_FETCH_DEBUG)
  log_response(status_code, response_headers, data);
@@ -2022,7 +2134,10 @@ WebIDL::ExceptionOr<JS::NonnullGCPtr<PendingResponse>> nonstandard_resource_load
  // FIXME: Set response status message
  }
  pending_response->resolve(response);
- });
+ };
+
+ ResourceLoader::the().load(load_request, move(on_load_success), move(on_load_error));
+ }
 
  return pending_response;
 }

diff --git a/Userland/Libraries/LibWeb/Fetch/Fetching/PendingResponse.h b/Userland/Libraries/LibWeb/Fetch/Fetching/PendingResponse.h
@@ -30,6 +30,7 @@ class PendingResponse : public JS::Cell {
 
  void when_loaded(Callback);
  void resolve(JS::NonnullGCPtr<Infrastructure::Response>);
+ bool is_resolved() const { return m_response != nullptr; }
 
 private:
  PendingResponse(JS::NonnullGCPtr<Infrastructure::Request>, JS::GCPtr<Infrastructure::Response> = {});

diff --git a/Userland/Libraries/LibWeb/Fetch/Infrastructure/HTTP/Requests.cpp b/Userland/Libraries/LibWeb/Fetch/Infrastructure/HTTP/Requests.cpp
@@ -250,6 +250,7 @@ JS::NonnullGCPtr<Request> Request::clone(JS::Realm& realm) const
  new_request->set_prevent_no_cache_cache_control_header_modification(m_prevent_no_cache_cache_control_header_modification);
  new_request->set_done(m_done);
  new_request->set_timing_allow_failed(m_timing_allow_failed);
+ new_request->set_buffer_policy(m_buffer_policy);
 
  // 2. If request’s body is non-null, set newRequest’s body to the result of cloning request’s body.
  if (auto const* body = m_body.get_pointer<JS::NonnullGCPtr<Body>>())

diff --git a/Userland/Libraries/LibWeb/Fetch/Infrastructure/HTTP/Requests.h b/Userland/Libraries/LibWeb/Fetch/Infrastructure/HTTP/Requests.h
@@ -159,6 +159,13 @@ class Request final : public JS::Cell {
  Auto
  };
 
+ // AD-HOC: Some web features need to receive data as it arrives, rather than when the response is fully complete
+ // or when enough data has been buffered. Use this buffer policy to inform fetch of that requirement.
+ enum class BufferPolicy {
+ BufferResponse,
+ DoNotBufferResponse,
+ };
+
  // Members are implementation-defined
  struct InternalPriority { };
 
@@ -325,6 +332,9 @@ class Request final : public JS::Cell {
  m_pending_responses.remove_first_matching([&](auto gc_ptr) { return gc_ptr == pending_response; });
  }
 
+ [[nodiscard]] BufferPolicy buffer_policy() const { return m_buffer_policy; }
+ void set_buffer_policy(BufferPolicy buffer_policy) { m_buffer_policy = buffer_policy; }
+
 private:
  explicit Request(JS::NonnullGCPtr<HeaderList>);
 
@@ -515,6 +525,8 @@ class Request final : public JS::Cell {
 
  // Non-standard
  Vector<JS::NonnullGCPtr<Fetching::PendingResponse>> m_pending_responses;
+
+ BufferPolicy m_buffer_policy { BufferPolicy::BufferResponse };
 };
 
 StringView request_destination_to_string(Request::Destination);