From 918bde98b17ad890ba0a83c1de137da86519f9b1 Mon Sep 17 00:00:00 2001 From: Max Wipfli Date: Wed, 14 Jul 2021 23:53:11 +0200 Subject: [PATCH] LibWeb: Hide implementation details of HTMLToken attribute list Previously, HTMLToken would expose the Vector directly to its users. In preparation for a future change, all users now use implementation-agnostic APIs which do not expose the Vector directly. --- Tests/LibWeb/TestHTMLTokenizer.cpp | 2 +- .../LibWeb/HTML/Parser/HTMLDocumentParser.cpp | 25 +++--- .../LibWeb/HTML/Parser/HTMLToken.cpp | 5 +- .../Libraries/LibWeb/HTML/Parser/HTMLToken.h | 82 +++++++++++++++---- .../LibWeb/HTML/Parser/HTMLTokenizer.cpp | 48 +++++------ .../SyntaxHighlighter/SyntaxHighlighter.cpp | 5 +- 6 files changed, 108 insertions(+), 59 deletions(-) diff --git a/Tests/LibWeb/TestHTMLTokenizer.cpp b/Tests/LibWeb/TestHTMLTokenizer.cpp index c001f00d2bdb99..c3d5c2c195ab94 100644 --- a/Tests/LibWeb/TestHTMLTokenizer.cpp +++ b/Tests/LibWeb/TestHTMLTokenizer.cpp @@ -61,7 +61,7 @@ using Token = Web::HTML::HTMLToken; #define EXPECT_TAG_TOKEN_ATTRIBUTE_COUNT(count) \ VERIFY(last_token.has_value()); \ - EXPECT_EQ(last_token->attributes().size(), (size_t)count); + EXPECT_EQ(last_token->attribute_count(), (size_t)(count)); static Vector run_tokenizer(StringView const& input) { diff --git a/Userland/Libraries/LibWeb/HTML/Parser/HTMLDocumentParser.cpp b/Userland/Libraries/LibWeb/HTML/Parser/HTMLDocumentParser.cpp index 88c7de4ff34765..10fd18ed0e4afa 100644 --- a/Userland/Libraries/LibWeb/HTML/Parser/HTMLDocumentParser.cpp +++ b/Userland/Libraries/LibWeb/HTML/Parser/HTMLDocumentParser.cpp @@ -436,9 +436,10 @@ HTMLDocumentParser::AdjustedInsertionLocation HTMLDocumentParser::find_appropria NonnullRefPtr HTMLDocumentParser::create_element_for(const HTMLToken& token, const FlyString& namespace_) { auto element = create_element(document(), token.tag_name(), namespace_); - for (auto& attribute : token.m_tag.attributes) { + token.for_each_attribute([&](auto& attribute) { element->set_attribute(attribute.local_name, attribute.value); - } + return IterationDecision::Continue; + }); return element; } @@ -1117,11 +1118,11 @@ void HTMLDocumentParser::handle_in_body(HTMLToken& token) log_parse_error(); if (m_stack_of_open_elements.contains(HTML::TagNames::template_)) return; - for (auto& attribute : token.m_tag.attributes) { - if (current_node().has_attribute(attribute.local_name)) - continue; - current_node().set_attribute(attribute.local_name, attribute.value); - } + token.for_each_attribute([&](auto& attribute) { + if (!current_node().has_attribute(attribute.local_name)) + current_node().set_attribute(attribute.local_name, attribute.value); + return IterationDecision::Continue; + }); return; } if (token.is_start_tag() && token.tag_name().is_one_of(HTML::TagNames::base, HTML::TagNames::basefont, HTML::TagNames::bgsound, HTML::TagNames::link, HTML::TagNames::meta, HTML::TagNames::noframes, HTML::TagNames::script, HTML::TagNames::style, HTML::TagNames::template_, HTML::TagNames::title)) { @@ -1144,11 +1145,11 @@ void HTMLDocumentParser::handle_in_body(HTMLToken& token) } m_frameset_ok = false; auto& body_element = m_stack_of_open_elements.elements().at(1); - for (auto& attribute : token.m_tag.attributes) { - if (body_element.has_attribute(attribute.local_name)) - continue; - body_element.set_attribute(attribute.local_name, attribute.value); - } + token.for_each_attribute([&](auto& attribute) { + if (!body_element.has_attribute(attribute.local_name)) + body_element.set_attribute(attribute.local_name, attribute.value); + return IterationDecision::Continue; + }); return; } diff --git a/Userland/Libraries/LibWeb/HTML/Parser/HTMLToken.cpp b/Userland/Libraries/LibWeb/HTML/Parser/HTMLToken.cpp index f5c324f85e3e34..39fd79e9ab34df 100644 --- a/Userland/Libraries/LibWeb/HTML/Parser/HTMLToken.cpp +++ b/Userland/Libraries/LibWeb/HTML/Parser/HTMLToken.cpp @@ -42,12 +42,13 @@ String HTMLToken::to_string() const builder.append(" { name: '"); builder.append(tag_name()); builder.append("', { "); - for (auto& attribute : m_tag.attributes) { + for_each_attribute([&](auto& attribute) { builder.append(attribute.local_name); builder.append("=\""); builder.append(attribute.value); builder.append("\" "); - } + return IterationDecision::Continue; + }); builder.append("} }"); } diff --git a/Userland/Libraries/LibWeb/HTML/Parser/HTMLToken.h b/Userland/Libraries/LibWeb/HTML/Parser/HTMLToken.h index 12ffa10999f66b..ea310fe8c92a95 100644 --- a/Userland/Libraries/LibWeb/HTML/Parser/HTMLToken.h +++ b/Userland/Libraries/LibWeb/HTML/Parser/HTMLToken.h @@ -8,6 +8,7 @@ #pragma once #include +#include #include #include #include @@ -150,6 +151,62 @@ class HTMLToken { m_tag.self_closing_acknowledged = true; } + bool has_attributes() const + { + VERIFY(is_start_tag() || is_end_tag()); + return !m_tag.attributes.is_empty(); + } + + size_t attribute_count() const + { + VERIFY(is_start_tag() || is_end_tag()); + return m_tag.attributes.size(); + } + + void add_attribute(Attribute attribute) + { + VERIFY(is_start_tag() || is_end_tag()); + m_tag.attributes.append(move(attribute)); + } + + Attribute const& last_attribute() const + { + VERIFY(is_start_tag() || is_end_tag()); + VERIFY(!m_tag.attributes.is_empty()); + return m_tag.attributes.last(); + } + + Attribute& last_attribute() + { + VERIFY(is_start_tag() || is_end_tag()); + VERIFY(!m_tag.attributes.is_empty()); + return m_tag.attributes.last(); + } + + void drop_attributes() + { + VERIFY(is_start_tag() || is_end_tag()); + m_tag.attributes.clear(); + } + + void for_each_attribute(Function callback) const + { + VERIFY(is_start_tag() || is_end_tag()); + for (auto& attribute : m_tag.attributes) { + if (callback(attribute) == IterationDecision::Break) + break; + } + } + + void for_each_attribute(Function callback) + { + VERIFY(is_start_tag() || is_end_tag()); + for (auto& attribute : m_tag.attributes) { + if (callback(attribute) == IterationDecision::Break) + break; + } + } + StringView attribute(FlyString const& attribute_name) { VERIFY(is_start_tag() || is_end_tag()); @@ -175,29 +232,24 @@ class HTMLToken { void adjust_attribute_name(FlyString const& old_name, FlyString const& new_name) { VERIFY(is_start_tag() || is_end_tag()); - for (auto& attribute : m_tag.attributes) { - if (old_name == attribute.local_name) { + for_each_attribute([&](Attribute& attribute) { + if (old_name == attribute.local_name) attribute.local_name = new_name; - } - } + return IterationDecision::Continue; + }); } void adjust_foreign_attribute(FlyString const& old_name, FlyString const& prefix, FlyString const& local_name, FlyString const& namespace_) { VERIFY(is_start_tag() || is_end_tag()); - for (auto& attribute : m_tag.attributes) { + for_each_attribute([&](Attribute& attribute) { if (old_name == attribute.local_name) { attribute.prefix = prefix; attribute.local_name = local_name; attribute.namespace_ = namespace_; } - } - } - - void drop_attributes() - { - VERIFY(is_start_tag() || is_end_tag()); - m_tag.attributes.clear(); + return IterationDecision::Continue; + }); } Type type() const { return m_type; } @@ -207,12 +259,6 @@ class HTMLToken { Position const& start_position() const { return m_start_position; } Position const& end_position() const { return m_end_position; } - Vector const& attributes() const - { - VERIFY(is_start_tag() || is_end_tag()); - return m_tag.attributes; - } - private: Type m_type { Type::Invalid }; diff --git a/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp b/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp index fa2489a90bd115..f43f6bc702f0fe 100644 --- a/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp +++ b/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp @@ -996,8 +996,8 @@ Optional HTMLTokenizer::next_token() } ON('/') { - if (!m_current_token.m_tag.attributes.is_empty()) - m_current_token.m_tag.attributes.last().name_end_position = nth_last_position(1); + if (m_current_token.has_attributes()) + m_current_token.last_attribute().name_end_position = nth_last_position(1); RECONSUME_IN(AfterAttributeName); } ON('>') @@ -1014,14 +1014,14 @@ Optional HTMLTokenizer::next_token() HTMLToken::Attribute new_attribute; new_attribute.name_start_position = nth_last_position(1); m_current_builder.append_code_point(current_input_character.value()); - m_current_token.m_tag.attributes.append(new_attribute); + m_current_token.add_attribute(move(new_attribute)); SWITCH_TO_WITH_UNCLEAN_BUILDER(AttributeName); } ANYTHING_ELSE { HTMLToken::Attribute new_attribute; new_attribute.name_start_position = nth_last_position(1); - m_current_token.m_tag.attributes.append(move(new_attribute)); + m_current_token.add_attribute(move(new_attribute)); RECONSUME_IN(AttributeName); } } @@ -1051,28 +1051,28 @@ Optional HTMLTokenizer::next_token() { ON_WHITESPACE { - m_current_token.m_tag.attributes.last().local_name = consume_current_builder(); + m_current_token.last_attribute().local_name = consume_current_builder(); RECONSUME_IN(AfterAttributeName); } ON('/') { - m_current_token.m_tag.attributes.last().local_name = consume_current_builder(); + m_current_token.last_attribute().local_name = consume_current_builder(); RECONSUME_IN(AfterAttributeName); } ON('>') { - m_current_token.m_tag.attributes.last().local_name = consume_current_builder(); + m_current_token.last_attribute().local_name = consume_current_builder(); RECONSUME_IN(AfterAttributeName); } ON_EOF { - m_current_token.m_tag.attributes.last().local_name = consume_current_builder(); + m_current_token.last_attribute().local_name = consume_current_builder(); RECONSUME_IN(AfterAttributeName); } ON('=') { - m_current_token.m_tag.attributes.last().name_end_position = nth_last_position(1); - m_current_token.m_tag.attributes.last().local_name = consume_current_builder(); + m_current_token.last_attribute().name_end_position = nth_last_position(1); + m_current_token.last_attribute().local_name = consume_current_builder(); SWITCH_TO(BeforeAttributeValue); } ON_ASCII_UPPER_ALPHA @@ -1122,7 +1122,7 @@ Optional HTMLTokenizer::next_token() } ON('=') { - m_current_token.m_tag.attributes.last().name_end_position = nth_last_position(1); + m_current_token.last_attribute().name_end_position = nth_last_position(1); SWITCH_TO(BeforeAttributeValue); } ON('>') @@ -1136,8 +1136,8 @@ Optional HTMLTokenizer::next_token() } ANYTHING_ELSE { - m_current_token.m_tag.attributes.append({}); - m_current_token.m_tag.attributes.last().name_start_position = m_source_positions.last(); + m_current_token.add_attribute({}); + m_current_token.last_attribute().name_start_position = m_source_positions.last(); RECONSUME_IN(AttributeName); } } @@ -1145,7 +1145,7 @@ Optional HTMLTokenizer::next_token() BEGIN_STATE(BeforeAttributeValue) { - m_current_token.m_tag.attributes.last().value_start_position = nth_last_position(1); + m_current_token.last_attribute().value_start_position = nth_last_position(1); ON_WHITESPACE { continue; @@ -1174,12 +1174,12 @@ Optional HTMLTokenizer::next_token() { ON('"') { - m_current_token.m_tag.attributes.last().value = consume_current_builder(); + m_current_token.last_attribute().value = consume_current_builder(); SWITCH_TO(AfterAttributeValueQuoted); } ON('&') { - m_current_token.m_tag.attributes.last().value = consume_current_builder(); + m_current_token.last_attribute().value = consume_current_builder(); m_return_state = State::AttributeValueDoubleQuoted; SWITCH_TO(CharacterReference); } @@ -1206,12 +1206,12 @@ Optional HTMLTokenizer::next_token() { ON('\'') { - m_current_token.m_tag.attributes.last().value = consume_current_builder(); + m_current_token.last_attribute().value = consume_current_builder(); SWITCH_TO(AfterAttributeValueQuoted); } ON('&') { - m_current_token.m_tag.attributes.last().value = consume_current_builder(); + m_current_token.last_attribute().value = consume_current_builder(); m_return_state = State::AttributeValueSingleQuoted; SWITCH_TO(CharacterReference); } @@ -1238,20 +1238,20 @@ Optional HTMLTokenizer::next_token() { ON_WHITESPACE { - m_current_token.m_tag.attributes.last().value = consume_current_builder(); - m_current_token.m_tag.attributes.last().value_end_position = nth_last_position(2); + m_current_token.last_attribute().value = consume_current_builder(); + m_current_token.last_attribute().value_end_position = nth_last_position(2); SWITCH_TO(BeforeAttributeName); } ON('&') { - m_current_token.m_tag.attributes.last().value = consume_current_builder(); + m_current_token.last_attribute().value = consume_current_builder(); m_return_state = State::AttributeValueUnquoted; SWITCH_TO(CharacterReference); } ON('>') { - m_current_token.m_tag.attributes.last().value = consume_current_builder(); - m_current_token.m_tag.attributes.last().value_end_position = nth_last_position(1); + m_current_token.last_attribute().value = consume_current_builder(); + m_current_token.last_attribute().value_end_position = nth_last_position(1); SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data); } ON(0) @@ -1301,7 +1301,7 @@ Optional HTMLTokenizer::next_token() BEGIN_STATE(AfterAttributeValueQuoted) { - m_current_token.m_tag.attributes.last().value_end_position = nth_last_position(1); + m_current_token.last_attribute().value_end_position = nth_last_position(1); ON_WHITESPACE { SWITCH_TO(BeforeAttributeName); diff --git a/Userland/Libraries/LibWeb/HTML/SyntaxHighlighter/SyntaxHighlighter.cpp b/Userland/Libraries/LibWeb/HTML/SyntaxHighlighter/SyntaxHighlighter.cpp index d08ea0c11f2c14..cae5183436e4de 100644 --- a/Userland/Libraries/LibWeb/HTML/SyntaxHighlighter/SyntaxHighlighter.cpp +++ b/Userland/Libraries/LibWeb/HTML/SyntaxHighlighter/SyntaxHighlighter.cpp @@ -132,7 +132,7 @@ void SyntaxHighlighter::rehighlight(Palette const& palette) { palette.syntax_keyword(), {}, false, true }, token->is_start_tag() ? AugmentedTokenKind::OpenTag : AugmentedTokenKind::CloseTag); - for (auto& attribute : token->attributes()) { + token->for_each_attribute([&](auto& attribute) { highlight( attribute.name_start_position.line, attribute.name_start_position.column + token_start_offset, @@ -147,7 +147,8 @@ void SyntaxHighlighter::rehighlight(Palette const& palette) attribute.value_end_position.column + token_start_offset, { palette.syntax_string(), {} }, AugmentedTokenKind::AttributeValue); - } + return IterationDecision::Continue; + }); } else if (token->is_doctype()) { highlight( token->start_position().line,