Skip to content

Commit

Permalink
LibWeb: Add basic support for dynamic markup insertion
Browse files Browse the repository at this point in the history
This implements basic support for dynamic markup insertion, adding
 * Document::open()
 * Document::write(Vector<String> const&)
 * Document::writeln(Vector<String> const&)
 * Document::close()

The HTMLParser is modified to make it possible to create a
script-created parser which initially only contains a HTMLTokenizer
without any data. Aditionally the HTMLParser::run method gains an
overload which does not modify the Document and does not run
HTMLParser::the_end() so that we can reenter the parser at a later time.
Furthermore all FIXMEs that consern the insertion point are implemented
wich is defined in the HTMLTokenizer. Additionally the following
member-variables of the HTMLParser are now exposed by getter funcions:
 * m_tokenizer
 * m_aborted
 * m_script_nesting_level

The HTMLTokenizer is modified so that it contains an insertion
point which keeps track of where the next input from the Document::write
functions will be inserted. The insertion point is implemented as the
charakter offset into m_decoded_input and a boolean describing if the
insertion point is defined. Functions to update, check and {re}store the
insertion point are also added.
The function HTMLTokenizer::insert_eof is added to tell a script-created
parser that document::close was called and HTMLParser::the_end() should
be called.
Lastly an explicit default constructor is added to HTMLTokenizer to
create a empty HTMLTokenizer into which data can be inserted.
  • Loading branch information
L0ric0 authored and awesomekling committed Feb 21, 2022
1 parent d29d946 commit db78981
Show file tree
Hide file tree
Showing 7 changed files with 282 additions and 19 deletions.
145 changes: 141 additions & 4 deletions Userland/Libraries/LibWeb/DOM/Document.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
#include <LibWeb/HTML/HTMLScriptElement.h>
#include <LibWeb/HTML/HTMLTitleElement.h>
#include <LibWeb/HTML/MessageEvent.h>
#include <LibWeb/HTML/Parser/HTMLParser.h>
#include <LibWeb/HTML/Scripting/ExceptionReporter.h>
#include <LibWeb/HTML/Scripting/WindowEnvironmentSettingsObject.h>
#include <LibWeb/Layout/BlockFormattingContext.h>
Expand Down Expand Up @@ -148,15 +149,151 @@ void Document::removed_last_ref()
}

// https://html.spec.whatwg.org/multipage/dynamic-markup-insertion.html#dom-document-write
void Document::write(Vector<String> const& strings)
ExceptionOr<void> Document::write(Vector<String> const& strings)
{
dbgln("TODO: document.write({})", strings);
// 1. If document is an XML document, then throw an "InvalidStateError" DOMException.
if (doctype() && doctype()->name() == "xml")
return DOM::InvalidStateError::create("write() called on XML document.");

// 2. If document's throw-on-dynamic-markup-insertion counter is greater than 0, then throw an "InvalidStateError" DOMException.
if (m_throw_on_dynamic_markup_insertion_counter > 0)
return DOM::InvalidStateError::create("throw-on-dynamic-markup-insertion-counter greater than zero.");

// 3. If document's active parser was aborted is true, then return.
if (m_active_parser_was_aborted)
return {};

// 4. If the insertion point is undefined, then:
if (!(m_parser && m_parser->tokenizer().is_insertion_point_defined())) {
// 1. If document's unload counter is greater than 0 or document's ignore-destructive-writes counter is greater than 0, then return.
if (m_unload_counter > 0 || m_ignore_destructive_writes_counter > 0)
return {};

// 2. Run the document open steps with document.
open();
}

// 5. Insert input into the input stream just before the insertion point.
StringBuilder builder;
builder.join("", strings);
m_parser->tokenizer().insert_input_at_insertion_point(builder.build());

// 6. If there is no pending parsing-blocking script, have the HTML parser process input, one code point at a time, processing resulting tokens as they are emitted, and stopping when the tokenizer reaches the insertion point or when the processing of the tokenizer is aborted by the tree construction stage (this can happen if a script end tag token is emitted by the tokenizer).
if (!pending_parsing_blocking_script())
m_parser->run();

return {};
}

// https://html.spec.whatwg.org/multipage/dynamic-markup-insertion.html#dom-document-writeln
void Document::writeln(Vector<String> const& strings)
ExceptionOr<void> Document::writeln(Vector<String> const& strings)
{
dbgln("TODO: document.writeln({})", strings);

// FIXME: No need to allocate a new vector
Vector<String> new_strings;
for (auto const& element : strings) {
new_strings.append(String::formatted("{}\n", element));
}

return write(strings);
}

// https://html.spec.whatwg.org/multipage/dynamic-markup-insertion.html#dom-document-open
ExceptionOr<Document*> Document::open(String const&, String const&)
{
// 1. If document is an XML document, then throw an "InvalidStateError" DOMException exception.
if (doctype() && doctype()->name() == "xml")
return DOM::InvalidStateError::create("open() called on XML document.");

// 2. If document's throw-on-dynamic-markup-insertion counter is greater than 0, then throw an "InvalidStateError" DOMException.
if (m_throw_on_dynamic_markup_insertion_counter > 0)
return DOM::InvalidStateError::create("throw-on-dynamic-markup-insertion-counter greater than zero.");

// FIXME: 3. Let entryDocument be the entry global object's associated Document.
auto& entry_document = *this;

// 4. If document's origin is not same origin to entryDocument's origin, then throw a "SecurityError" DOMException.
if (origin() != entry_document.origin())
return DOM::SecurityError::create("Document.origin() not the same as entryDocument's.");

// 5. If document has an active parser whose script nesting level is greater than 0, then return document.
if (m_parser && m_parser->script_nesting_level() > 0)
return this;

// 6. Similarly, if document's unload counter is greater than 0, then return document.
if (m_unload_counter > 0)
return this;

// 7. If document's active parser was aborted is true, then return document.
if (m_active_parser_was_aborted)
return this;

// FIXME: 8. If document's browsing context is non-null and there is an existing attempt to navigate document's browsing context, then stop document loading given document.

// FIXME: 9. For each shadow-including inclusive descendant node of document, erase all event listeners and handlers given node.

// FIXME 10. If document is the associated Document of document's relevant global object, then erase all event listeners and handlers given document's relevant global object.

// 11. Replace all with null within document, without firing any mutation events.
replace_all(nullptr);

// 12. If document is fully active, then:
if (is_fully_active()) {
// 1. Let newURL be a copy of entryDocument's URL.
auto new_url = entry_document.url();
// 2. If entryDocument is not document, then set newURL's fragment to null.
if (&entry_document != this)
new_url.set_fragment("");

// FIXME: 3. Run the URL and history update steps with document and newURL.
}

// FIXME: 13. Set document's is initial about:blank to false.

// FIXME: 14. If document's iframe load in progress flag is set, then set document's mute iframe load flag.

// 15. Set document to no-quirks mode.
set_quirks_mode(QuirksMode::No);

// 16. Create a new HTML parser and associate it with document. This is a script-created parser (meaning that it can be closed by the document.open() and document.close() methods, and that the tokenizer will wait for an explicit call to document.close() before emitting an end-of-file token). The encoding confidence is irrelevant.
m_parser = make<HTML::HTMLParser>(*this);

// 17. Set the insertion point to point at just before the end of the input stream (which at this point will be empty).
m_parser->tokenizer().update_insertion_point();

// 18. Update the current document readiness of document to "loading".
update_readiness(HTML::DocumentReadyState::Loading);

// 19. Return document.
return this;
}

// https://html.spec.whatwg.org/multipage/dynamic-markup-insertion.html#closing-the-input-stream
ExceptionOr<void> Document::close()
{
// 1. If document is an XML document, then throw an "InvalidStateError" DOMException exception.
if (doctype() && doctype()->name() == "xml")
return DOM::InvalidStateError::create("close() called on XML document.");

// 2. If document's throw-on-dynamic-markup-insertion counter is greater than 0, then throw an "InvalidStateError" DOMException.
if (m_throw_on_dynamic_markup_insertion_counter > 0)
return DOM::InvalidStateError::create("throw-on-dynamic-markup-insertion-counter greater than zero.");

// 3. If there is no script-created parser associated with the document, then return.
if (!m_parser)
return {};

// FIXME: 4. Insert an explicit "EOF" character at the end of the parser's input stream.
m_parser->tokenizer().insert_eof();

// 5. If there is a pending parsing-blocking script, then return.
if (pending_parsing_blocking_script())
return {};

// FIXME: 6. Run the tokenizer, processing resulting tokens as they are emitted, and stopping when the tokenizer reaches the explicit "EOF" character or spins the event loop.
m_parser->run();

return {};
}

Origin Document::origin() const
Expand Down
17 changes: 14 additions & 3 deletions Userland/Libraries/LibWeb/DOM/Document.h
Original file line number Diff line number Diff line change
Expand Up @@ -244,8 +244,11 @@ class Document

Window& window() { return *m_window; }

void write(Vector<String> const& strings);
void writeln(Vector<String> const& strings);
ExceptionOr<void> write(Vector<String> const& strings);
ExceptionOr<void> writeln(Vector<String> const& strings);

ExceptionOr<Document*> open(String const& = "", String const& = "");
ExceptionOr<void> close();

Window* default_view() { return m_window; }

Expand Down Expand Up @@ -355,6 +358,9 @@ class Document
RefPtr<Core::Timer> m_style_update_timer;
RefPtr<Core::Timer> m_layout_update_timer;

OwnPtr<HTML::HTMLParser> m_parser;
bool m_active_parser_was_aborted { false };

String m_source;

OwnPtr<JS::Interpreter> m_interpreter;
Expand Down Expand Up @@ -385,6 +391,12 @@ class Document

u32 m_ignore_destructive_writes_counter { 0 };

// https://html.spec.whatwg.org/multipage/browsing-the-web.html#unload-counter
u32 m_unload_counter { 0 };

// https://html.spec.whatwg.org/multipage/dynamic-markup-insertion.html#throw-on-dynamic-markup-insertion-counter
u32 m_throw_on_dynamic_markup_insertion_counter { 0 };

// https://html.spec.whatwg.org/multipage/semantics.html#script-blocking-style-sheet-counter
u32 m_script_blocking_style_sheet_counter { 0 };

Expand All @@ -403,5 +415,4 @@ class Document

bool m_needs_layout { false };
};

}
8 changes: 6 additions & 2 deletions Userland/Libraries/LibWeb/DOM/Document.idl
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,12 @@ interface Document : Node {

readonly attribute Window? defaultView;

undefined write(DOMString... text);
undefined writeln(DOMString... text);
[CEReactions] Document open(optional DOMString unused1, optional DOMString unused2);
// FIXME: implement ExceptionOr<Window> Document::open(...)
// WindowProxy? open(USVString url, DOMString name, DOMString features);
[CEReactions] undefined close();
[CEReactions] undefined write(DOMString... text);
[CEReactions] undefined writeln(DOMString... text);

attribute DOMString cookie;

Expand Down
58 changes: 49 additions & 9 deletions Userland/Libraries/LibWeb/HTML/Parser/HTMLParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -137,17 +137,24 @@ HTMLParser::HTMLParser(DOM::Document& document, StringView input, const String&
m_document->set_encoding(standardized_encoding.value());
}

HTMLParser::HTMLParser(DOM::Document& document)
: m_document(document)
{
m_tokenizer.set_parser({}, *this);
}

HTMLParser::~HTMLParser()
{
m_document->set_should_invalidate_styles_on_attribute_changes(true);
}

void HTMLParser::run(const AK::URL& url)
void HTMLParser::run()
{
m_document->set_url(url);
m_document->set_source(m_tokenizer.source());

for (;;) {
// FIXME: Find a better way to say that we come from Document::close() and want to process EOF.
if (!m_tokenizer.is_eof_inserted() && m_tokenizer.is_insertion_point_reached())
return;

auto optional_token = m_tokenizer.next_token();
if (!optional_token.has_value())
break;
Expand Down Expand Up @@ -186,7 +193,13 @@ void HTMLParser::run(const AK::URL& url)
}

flush_character_insertions();
}

void HTMLParser::run(const AK::URL& url)
{
m_document->set_url(url);
m_document->set_source(m_tokenizer.source());
run();
the_end();
}

Expand All @@ -197,7 +210,8 @@ void HTMLParser::the_end()

// FIXME: 1. If the active speculative HTML parser is not null, then stop the speculative HTML parser and return.

// FIXME: 2. Set the insertion point to undefined.
// 2. Set the insertion point to undefined.
m_tokenizer.undefine_insertion_point();

// 3. Update the current document readiness to "interactive".
m_document->update_readiness(HTML::DocumentReadyState::Interactive);
Expand Down Expand Up @@ -2003,6 +2017,7 @@ void HTMLParser::decrement_script_nesting_level()
--m_script_nesting_level;
}

// https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-incdata
void HTMLParser::handle_text(HTMLToken& token)
{
if (token.is_character()) {
Expand All @@ -2025,13 +2040,18 @@ void HTMLParser::handle_text(HTMLToken& token)
NonnullRefPtr<HTMLScriptElement> script = verify_cast<HTMLScriptElement>(current_node());
(void)m_stack_of_open_elements.pop();
m_insertion_mode = m_original_insertion_mode;
// FIXME: Handle tokenizer insertion point stuff here.
// Let the old insertion point have the same value as the current insertion point.
m_tokenizer.store_insertion_point();
// Let the insertion point be just before the next input character.
m_tokenizer.update_insertion_point();
increment_script_nesting_level();
// FIXME: Check if active speculative HTML parser is null.
script->prepare_script({});
decrement_script_nesting_level();
if (script_nesting_level() == 0)
m_parser_pause_flag = false;
// FIXME: Handle tokenizer insertion point stuff here too.
// Let the insertion point have the value of the old insertion point.
m_tokenizer.restore_insertion_point();

while (document().pending_parsing_blocking_script()) {
if (script_nesting_level() != 0) {
Expand Down Expand Up @@ -2065,7 +2085,8 @@ void HTMLParser::handle_text(HTMLToken& token)

m_tokenizer.set_blocked(false);

// FIXME: Handle tokenizer insertion point stuff here too.
// Let the insertion point be just before the next input character.
m_tokenizer.update_insertion_point();

VERIFY(script_nesting_level() == 0);
increment_script_nesting_level();
Expand All @@ -2076,7 +2097,8 @@ void HTMLParser::handle_text(HTMLToken& token)
VERIFY(script_nesting_level() == 0);
m_parser_pause_flag = false;

// FIXME: Handle tokenizer insertion point stuff here too.
// Let the insertion point be undefined again.
m_tokenizer.undefine_insertion_point();
}
}
return;
Expand Down Expand Up @@ -2986,8 +3008,26 @@ void HTMLParser::process_using_the_rules_for_foreign_content(HTMLToken& token)

if (token.is_end_tag() && current_node().namespace_() == Namespace::SVG && current_node().tag_name() == SVG::TagNames::script) {
ScriptEndTag:
// Pop the current node off the stack of open elements.
(void)m_stack_of_open_elements.pop();
// Let the old insertion point have the same value as the current insertion point.
m_tokenizer.store_insertion_point();
// Let the insertion point be just before the next input character.
m_tokenizer.update_insertion_point();
// Increment the parser's script nesting level by one.
increment_script_nesting_level();
// Set the parser pause flag to true.
m_parser_pause_flag = true;
// FIXME: Implement SVG script parsing.
TODO();
// Decrement the parser's script nesting level by one.
decrement_script_nesting_level();
// If the parser's script nesting level is zero, then set the parser pause flag to false.
if (script_nesting_level() == 0)
m_parser_pause_flag = false;

// Let the insertion point have the value of the old insertion point.
m_tokenizer.restore_insertion_point();
}

if (token.is_end_tag()) {
Expand Down
9 changes: 8 additions & 1 deletion Userland/Libraries/LibWeb/HTML/Parser/HTMLParser.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,10 +46,12 @@ class HTMLParser {

public:
HTMLParser(DOM::Document&, StringView input, const String& encoding);
HTMLParser(DOM::Document&);
~HTMLParser();

static NonnullOwnPtr<HTMLParser> create_with_uncertain_encoding(DOM::Document&, const ByteBuffer& input);

void run();
void run(const AK::URL&);

DOM::Document& document();
Expand All @@ -67,6 +69,12 @@ class HTMLParser {

static bool is_special_tag(const FlyString& tag_name, const FlyString& namespace_);

HTMLTokenizer& tokenizer() { return m_tokenizer; }

bool aborted() const { return m_aborted; }

size_t script_nesting_level() const { return m_script_nesting_level; }

private:
const char* insertion_mode_name() const;

Expand Down Expand Up @@ -127,7 +135,6 @@ class HTMLParser {
void parse_generic_raw_text_element(HTMLToken&);
void increment_script_nesting_level();
void decrement_script_nesting_level();
size_t script_nesting_level() const { return m_script_nesting_level; }
void reset_the_insertion_mode_appropriately();

void adjust_mathml_attributes(HTMLToken&);
Expand Down
Loading

0 comments on commit db78981

Please sign in to comment.