Skip to content

Commit

Permalink
LibWeb: Support unbuffered fetch requests
Browse files Browse the repository at this point in the history
Supporting unbuffered fetches is actually part of the fetch spec in its
HTTP-network-fetch algorithm. We had previously implemented this method
in a very ad-hoc manner as a simple wrapper around ResourceLoader. This
is still the case, but we now implement a good amount of these steps
according to spec, using ResourceLoader's unbuffered API. The response
data is forwarded through to the fetch response using streams.

This will eventually let us remove the use of ResourceLoader's buffered
API, as all responses should just be streamed this way. The streams spec
then supplies ways to wait for completion, thus allowing fully buffered
responses. However, we have more work to do to make the other parts of
our fetch implementation (namely, Body::fully_read) use streams before
we can do this.
  • Loading branch information
trflynn89 authored and awesomekling committed May 26, 2024
1 parent 1e97ae6 commit 6056428
Show file tree
Hide file tree
Showing 8 changed files with 266 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ source_set("Fetching") {
deps = [ "//Userland/Libraries/LibWeb:all_generated" ]
sources = [
"Checks.cpp",
"FetchedDataReceiver.cpp",
"Fetching.cpp",
"PendingResponse.cpp",
"RefCountedFlag.cpp",
Expand Down
1 change: 1 addition & 0 deletions Userland/Libraries/LibWeb/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,7 @@ set(SOURCES
Fetch/BodyInit.cpp
Fetch/Enums.cpp
Fetch/Fetching/Checks.cpp
Fetch/Fetching/FetchedDataReceiver.cpp
Fetch/Fetching/Fetching.cpp
Fetch/Fetching/PendingResponse.cpp
Fetch/Fetching/RefCountedFlag.cpp
Expand Down
90 changes: 90 additions & 0 deletions Userland/Libraries/LibWeb/Fetch/Fetching/FetchedDataReceiver.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
/*
* Copyright (c) 2024, Tim Flynn <[email protected]>
*
* SPDX-License-Identifier: BSD-2-Clause
*/

#include <LibJS/Heap/HeapFunction.h>
#include <LibWeb/Bindings/ExceptionOrUtils.h>
#include <LibWeb/Bindings/HostDefined.h>
#include <LibWeb/Fetch/Fetching/FetchedDataReceiver.h>
#include <LibWeb/Fetch/Infrastructure/FetchParams.h>
#include <LibWeb/Fetch/Infrastructure/Task.h>
#include <LibWeb/HTML/Scripting/ExceptionReporter.h>
#include <LibWeb/HTML/Scripting/TemporaryExecutionContext.h>
#include <LibWeb/Streams/AbstractOperations.h>
#include <LibWeb/WebIDL/Promise.h>

namespace Web::Fetch::Fetching {

JS_DEFINE_ALLOCATOR(FetchedDataReceiver);

FetchedDataReceiver::FetchedDataReceiver(JS::NonnullGCPtr<Infrastructure::FetchParams const> fetch_params, JS::NonnullGCPtr<Streams::ReadableStream> stream)
: m_fetch_params(fetch_params)
, m_stream(stream)
{
}

FetchedDataReceiver::~FetchedDataReceiver() = default;

void FetchedDataReceiver::visit_edges(Visitor& visitor)
{
Base::visit_edges(visitor);
visitor.visit(m_fetch_params);
visitor.visit(m_stream);
visitor.visit(m_pending_promise);
}

void FetchedDataReceiver::set_pending_promise(JS::NonnullGCPtr<WebIDL::Promise> promise)
{
auto had_pending_promise = m_pending_promise != nullptr;
m_pending_promise = promise;

if (!had_pending_promise && !m_buffer.is_empty()) {
on_data_received(m_buffer);
m_buffer.clear();
}
}

// This implements the parallel steps of the pullAlgorithm in HTTP-network-fetch.
// https://fetch.spec.whatwg.org/#ref-for-in-parallel④
void FetchedDataReceiver::on_data_received(ReadonlyBytes bytes)
{
// FIXME: 1. If the size of buffer is smaller than a lower limit chosen by the user agent and the ongoing fetch
// is suspended, resume the fetch.
// FIXME: 2. Wait until buffer is not empty.

// If the remote end sends data immediately after we receive headers, we will often get that data here before the
// stream tasks have all been queued internally. Just hold onto that data.
if (!m_pending_promise) {
m_buffer.append(bytes);
return;
}

// 3. Queue a fetch task to run the following steps, with fetchParams’s task destination.
Infrastructure::queue_fetch_task(
m_fetch_params->controller(),
m_fetch_params->task_destination().get<JS::NonnullGCPtr<JS::Object>>(),
JS::create_heap_function(heap(), [this, bytes = MUST(ByteBuffer::copy(bytes))]() mutable {
HTML::TemporaryExecutionContext execution_context { Bindings::host_defined_environment_settings_object(m_stream->realm()), HTML::TemporaryExecutionContext::CallbacksEnabled::Yes };

// 1. Pull from bytes buffer into stream.
if (auto result = Streams::readable_stream_pull_from_bytes(m_stream, move(bytes)); result.is_error()) {
auto throw_completion = Bindings::dom_exception_to_throw_completion(m_stream->vm(), result.release_error());

dbgln("FetchedDataReceiver: Stream error pulling bytes");
HTML::report_exception(throw_completion, m_stream->realm());

return;
}

// 2. If stream is errored, then terminate fetchParams’s controller.
if (m_stream->is_errored())
m_fetch_params->controller()->terminate();

// 3. Resolve promise with undefined.
WebIDL::resolve_promise(m_stream->realm(), *m_pending_promise, JS::js_undefined());
}));
}

}
37 changes: 37 additions & 0 deletions Userland/Libraries/LibWeb/Fetch/Fetching/FetchedDataReceiver.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
/*
* Copyright (c) 2024, Tim Flynn <[email protected]>
*
* SPDX-License-Identifier: BSD-2-Clause
*/

#pragma once

#include <AK/ByteBuffer.h>
#include <LibJS/Heap/Cell.h>
#include <LibJS/Heap/CellAllocator.h>
#include <LibWeb/Forward.h>

namespace Web::Fetch::Fetching {

class FetchedDataReceiver final : public JS::Cell {
JS_CELL(FetchedDataReceiver, JS::Cell);
JS_DECLARE_ALLOCATOR(FetchedDataReceiver);

public:
virtual ~FetchedDataReceiver() override;

void set_pending_promise(JS::NonnullGCPtr<WebIDL::Promise>);
void on_data_received(ReadonlyBytes);

private:
FetchedDataReceiver(JS::NonnullGCPtr<Infrastructure::FetchParams const>, JS::NonnullGCPtr<Streams::ReadableStream>);

virtual void visit_edges(Visitor& visitor) override;

JS::NonnullGCPtr<Infrastructure::FetchParams const> m_fetch_params;
JS::NonnullGCPtr<Streams::ReadableStream> m_stream;
JS::GCPtr<WebIDL::Promise> m_pending_promise;
ByteBuffer m_buffer;
};

}
131 changes: 123 additions & 8 deletions Userland/Libraries/LibWeb/Fetch/Fetching/Fetching.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include <LibWeb/DOMURL/DOMURL.h>
#include <LibWeb/Fetch/BodyInit.h>
#include <LibWeb/Fetch/Fetching/Checks.h>
#include <LibWeb/Fetch/Fetching/FetchedDataReceiver.h>
#include <LibWeb/Fetch/Fetching/Fetching.h>
#include <LibWeb/Fetch/Fetching/PendingResponse.h>
#include <LibWeb/Fetch/Fetching/RefCountedFlag.h>
Expand Down Expand Up @@ -1962,8 +1963,10 @@ WebIDL::ExceptionOr<JS::NonnullGCPtr<PendingResponse>> nonstandard_resource_load
load_request.set_url(request->current_url());
load_request.set_page(page);
load_request.set_method(ByteString::copy(request->method()));

for (auto const& header : *request->header_list())
load_request.set_header(ByteString::copy(header.name), ByteString::copy(header.value));

if (auto const* body = request->body().get_pointer<JS::NonnullGCPtr<Infrastructure::Body>>()) {
TRY((*body)->source().visit(
[&](ByteBuffer const& byte_buffer) -> WebIDL::ExceptionOr<void> {
Expand All @@ -1981,13 +1984,121 @@ WebIDL::ExceptionOr<JS::NonnullGCPtr<PendingResponse>> nonstandard_resource_load

auto pending_response = PendingResponse::create(vm, request);

dbgln_if(WEB_FETCH_DEBUG, "Fetch: Invoking ResourceLoader");
if constexpr (WEB_FETCH_DEBUG)
if constexpr (WEB_FETCH_DEBUG) {
dbgln("Fetch: Invoking ResourceLoader");
log_load_request(load_request);
}

// FIXME: This check should be removed and all HTTP requests should go through the `ResourceLoader::load_unbuffered`
// path. The buffer option should then be supplied to the steps below that allow us to buffer data up to a
// user-agent-defined limit (or not). However, we will need to fully use stream operations throughout the
// fetch process to enable this (e.g. Body::fully_read must use streams for this to work).
if (request->buffer_policy() == Infrastructure::Request::BufferPolicy::DoNotBufferResponse) {
HTML::TemporaryExecutionContext execution_context { Bindings::host_defined_environment_settings_object(realm), HTML::TemporaryExecutionContext::CallbacksEnabled::Yes };

// 12. Let stream be a new ReadableStream.
auto stream = realm.heap().allocate<Streams::ReadableStream>(realm, realm);
auto fetched_data_receiver = realm.heap().allocate<FetchedDataReceiver>(realm, fetch_params, stream);

// 10. Let pullAlgorithm be the followings steps:
auto pull_algorithm = JS::create_heap_function(realm.heap(), [&realm, fetched_data_receiver]() {
// 1. Let promise be a new promise.
auto promise = WebIDL::create_promise(realm);

// 2. Run the following steps in parallel:
// NOTE: This is handled by FetchedDataReceiver.
fetched_data_receiver->set_pending_promise(promise);

// 3. Return promise.
return promise;
});

// 11. Let cancelAlgorithm be an algorithm that aborts fetchParams’s controller with reason, given reason.
auto cancel_algorithm = JS::create_heap_function(realm.heap(), [&realm, &fetch_params](JS::Value reason) {
fetch_params.controller()->abort(realm, reason);
return WebIDL::create_resolved_promise(realm, JS::js_undefined());
});

// 13. Set up stream with byte reading support with pullAlgorithm set to pullAlgorithm, cancelAlgorithm set to cancelAlgorithm.
Streams::set_up_readable_stream_controller_with_byte_reading_support(stream, pull_algorithm, cancel_algorithm);

auto on_headers_received = [&vm, request, pending_response, stream](auto const& response_headers, Optional<u32> status_code) {
if (pending_response->is_resolved()) {
// RequestServer will send us the response headers twice, the second time being for HTTP trailers. This
// fetch algorithm is not interested in trailers, so just drop them here.
return;
}

auto response = Infrastructure::Response::create(vm);
response->set_status(status_code.value_or(200));
// FIXME: Set response status message

if constexpr (WEB_FETCH_DEBUG) {
dbgln("Fetch: ResourceLoader load for '{}' {}: (status {})",
request->url(),
Infrastructure::is_ok_status(response->status()) ? "complete"sv : "failed"sv,
response->status());
log_response(status_code, response_headers, ReadonlyBytes {});
}

for (auto const& [name, value] : response_headers) {
auto header = Infrastructure::Header::from_string_pair(name, value);
response->header_list()->append(move(header));
}

// 14. Set response’s body to a new body whose stream is stream.
response->set_body(Infrastructure::Body::create(vm, stream));

// 17. Return response.
// NOTE: Typically response’s body’s stream is still being enqueued to after returning.
pending_response->resolve(response);
};

// 16. Run these steps in parallel:
// FIXME: 1. Run these steps, but abort when fetchParams is canceled:
auto on_data_received = [fetched_data_receiver](auto bytes) {
// 1. If one or more bytes have been transmitted from response’s message body, then:
if (!bytes.is_empty()) {
// 1. Let bytes be the transmitted bytes.

// FIXME: 2. Let codings be the result of extracting header list values given `Content-Encoding` and response’s header list.
// FIXME: 3. Increase response’s body info’s encoded size by bytes’s length.
// FIXME: 4. Set bytes to the result of handling content codings given codings and bytes.
// FIXME: 5. Increase response’s body info’s decoded size by bytes’s length.
// FIXME: 6. If bytes is failure, then terminate fetchParams’s controller.

// 7. Append bytes to buffer.
fetched_data_receiver->on_data_received(bytes);

// FIXME: 8. If the size of buffer is larger than an upper limit chosen by the user agent, ask the user agent
// to suspend the ongoing fetch.
}
};

auto on_complete = [&vm, &realm, pending_response, stream](auto success, auto error_message) {
HTML::TemporaryExecutionContext execution_context { Bindings::host_defined_environment_settings_object(realm), HTML::TemporaryExecutionContext::CallbacksEnabled::Yes };

// 16.1.1.2. Otherwise, if the bytes transmission for response’s message body is done normally and stream is readable,
// then close stream, and abort these in-parallel steps.
if (success) {
if (stream->is_readable())
stream->close();
}
// 16.1.2.2. Otherwise, if stream is readable, error stream with a TypeError.
else {
auto error = MUST(String::formatted("Load failed: {}", error_message));

ResourceLoader::the().load(
load_request,
[&realm, &vm, request, pending_response](auto data, auto& response_headers, auto status_code) {
if (stream->is_readable())
stream->error(JS::TypeError::create(realm, error));

if (!pending_response->is_resolved())
pending_response->resolve(Infrastructure::Response::network_error(vm, error));
}
};

ResourceLoader::the().load_unbuffered(load_request, move(on_headers_received), move(on_data_received), move(on_complete));
} else {
auto on_load_success = [&realm, &vm, request, pending_response](auto data, auto& response_headers, auto status_code) {
dbgln_if(WEB_FETCH_DEBUG, "Fetch: ResourceLoader load for '{}' complete", request->url());
if constexpr (WEB_FETCH_DEBUG)
log_response(status_code, response_headers, data);
Expand All @@ -2001,8 +2112,9 @@ WebIDL::ExceptionOr<JS::NonnullGCPtr<PendingResponse>> nonstandard_resource_load
}
// FIXME: Set response status message
pending_response->resolve(response);
},
[&realm, &vm, request, pending_response](auto& error, auto status_code, auto data, auto& response_headers) {
};

auto on_load_error = [&realm, &vm, request, pending_response](auto& error, auto status_code, auto data, auto& response_headers) {
dbgln_if(WEB_FETCH_DEBUG, "Fetch: ResourceLoader load for '{}' failed: {} (status {})", request->url(), error, status_code.value_or(0));
if constexpr (WEB_FETCH_DEBUG)
log_response(status_code, response_headers, data);
Expand All @@ -2022,7 +2134,10 @@ WebIDL::ExceptionOr<JS::NonnullGCPtr<PendingResponse>> nonstandard_resource_load
// FIXME: Set response status message
}
pending_response->resolve(response);
});
};

ResourceLoader::the().load(load_request, move(on_load_success), move(on_load_error));
}

return pending_response;
}
Expand Down
1 change: 1 addition & 0 deletions Userland/Libraries/LibWeb/Fetch/Fetching/PendingResponse.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ class PendingResponse : public JS::Cell {

void when_loaded(Callback);
void resolve(JS::NonnullGCPtr<Infrastructure::Response>);
bool is_resolved() const { return m_response != nullptr; }

private:
PendingResponse(JS::NonnullGCPtr<Infrastructure::Request>, JS::GCPtr<Infrastructure::Response> = {});
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,7 @@ JS::NonnullGCPtr<Request> Request::clone(JS::Realm& realm) const
new_request->set_prevent_no_cache_cache_control_header_modification(m_prevent_no_cache_cache_control_header_modification);
new_request->set_done(m_done);
new_request->set_timing_allow_failed(m_timing_allow_failed);
new_request->set_buffer_policy(m_buffer_policy);

// 2. If request’s body is non-null, set newRequest’s body to the result of cloning request’s body.
if (auto const* body = m_body.get_pointer<JS::NonnullGCPtr<Body>>())
Expand Down
12 changes: 12 additions & 0 deletions Userland/Libraries/LibWeb/Fetch/Infrastructure/HTTP/Requests.h
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,13 @@ class Request final : public JS::Cell {
Auto
};

// AD-HOC: Some web features need to receive data as it arrives, rather than when the response is fully complete
// or when enough data has been buffered. Use this buffer policy to inform fetch of that requirement.
enum class BufferPolicy {
BufferResponse,
DoNotBufferResponse,
};

// Members are implementation-defined
struct InternalPriority { };

Expand Down Expand Up @@ -325,6 +332,9 @@ class Request final : public JS::Cell {
m_pending_responses.remove_first_matching([&](auto gc_ptr) { return gc_ptr == pending_response; });
}

[[nodiscard]] BufferPolicy buffer_policy() const { return m_buffer_policy; }
void set_buffer_policy(BufferPolicy buffer_policy) { m_buffer_policy = buffer_policy; }

private:
explicit Request(JS::NonnullGCPtr<HeaderList>);

Expand Down Expand Up @@ -515,6 +525,8 @@ class Request final : public JS::Cell {

// Non-standard
Vector<JS::NonnullGCPtr<Fetching::PendingResponse>> m_pending_responses;

BufferPolicy m_buffer_policy { BufferPolicy::BufferResponse };
};

StringView request_destination_to_string(Request::Destination);
Expand Down

0 comments on commit 6056428

Please sign in to comment.