From 3d3084f0881249b01c58985dbad90970e2cc3f99 Mon Sep 17 00:00:00 2001 From: Andreas Kling Date: Mon, 21 Dec 2020 12:32:27 +0100 Subject: [PATCH] LibGUi: Rework GML parser into a lexer+parser This will make it easier to add GML syntax highlighting. :^) --- Libraries/LibGUI/CMakeLists.txt | 1 + Libraries/LibGUI/GMLLexer.cpp | 180 ++++++++++++++++++++++++++++++++ Libraries/LibGUI/GMLLexer.h | 90 ++++++++++++++++ Libraries/LibGUI/GMLParser.cpp | 101 +++++++++--------- 4 files changed, 323 insertions(+), 49 deletions(-) create mode 100644 Libraries/LibGUI/GMLLexer.cpp create mode 100644 Libraries/LibGUI/GMLLexer.h diff --git a/Libraries/LibGUI/CMakeLists.txt b/Libraries/LibGUI/CMakeLists.txt index ce7dcc7ce3da12..15b93688c5f929 100644 --- a/Libraries/LibGUI/CMakeLists.txt +++ b/Libraries/LibGUI/CMakeLists.txt @@ -30,6 +30,7 @@ set(SOURCES FileSystemModel.cpp FilteringProxyModel.cpp Frame.cpp + GMLLexer.cpp GMLParser.cpp GroupBox.cpp HeaderView.cpp diff --git a/Libraries/LibGUI/GMLLexer.cpp b/Libraries/LibGUI/GMLLexer.cpp new file mode 100644 index 00000000000000..1d999b4301fbb6 --- /dev/null +++ b/Libraries/LibGUI/GMLLexer.cpp @@ -0,0 +1,180 @@ +/* + * Copyright (c) 2020, Andreas Kling + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "GMLLexer.h" +#include +#include + +namespace GUI { + +GMLLexer::GMLLexer(const StringView& input) + : m_input(input) +{ +} + +char GMLLexer::peek(size_t offset) const +{ + if ((m_index + offset) >= m_input.length()) + return 0; + return m_input[m_index + offset]; +} + +char GMLLexer::consume() +{ + ASSERT(m_index < m_input.length()); + char ch = m_input[m_index++]; + m_previous_position = m_position; + if (ch == '\n') { + m_position.line++; + m_position.column = 0; + } else { + m_position.column++; + } + return ch; +} + +static bool is_valid_identifier_start(char ch) +{ + return isalpha(ch) || ch == '_'; +} + +static bool is_valid_identifier_character(char ch) +{ + return isalnum(ch) || ch == '_'; +} + +static bool is_valid_class_start(char ch) +{ + return isalpha(ch) || ch == '_'; +} + +static bool is_valid_class_character(char ch) +{ + return isalnum(ch) || ch == '_' || ch == ':'; +} + +Vector GMLLexer::lex() +{ + Vector tokens; + + size_t token_start_index = 0; + GMLPosition token_start_position; + + auto begin_token = [&] { + token_start_index = m_index; + token_start_position = m_position; + }; + + auto commit_token = [&](auto type) { + GMLToken token; + token.m_view = m_input.substring_view(token_start_index, m_index - token_start_index); + token.m_type = type; + token.m_start = token_start_position; + token.m_end = m_previous_position; + tokens.append(token); + }; + + auto consume_class = [&] { + begin_token(); + consume(); + commit_token(GMLToken::Type::ClassMarker); + begin_token(); + while (is_valid_class_character(peek())) + consume(); + commit_token(GMLToken::Type::ClassName); + }; + + while (m_index < m_input.length()) { + if (isspace(peek(0))) { + begin_token(); + while (isspace(peek())) + consume(); + continue; + } + + // C++ style comments + if (peek(0) && peek(0) == '/' && peek(1) == '/') { + begin_token(); + while (peek() && peek() != '\n') + consume(); + commit_token(GMLToken::Type::Comment); + continue; + } + + if (peek(0) == '{') { + begin_token(); + consume(); + commit_token(GMLToken::Type::LeftCurly); + continue; + } + + if (peek(0) == '}') { + begin_token(); + consume(); + commit_token(GMLToken::Type::RightCurly); + continue; + } + + if (peek(0) == '@' && is_valid_class_start(peek(1))) { + consume_class(); + continue; + } + + if (is_valid_identifier_start(peek(0))) { + begin_token(); + consume(); + while (is_valid_identifier_character(peek(0))) + consume(); + commit_token(GMLToken::Type::Identifier); + continue; + } + + if (peek(0) == ':') { + begin_token(); + consume(); + commit_token(GMLToken::Type::Colon); + + while (isspace(peek())) + consume(); + + if (peek(0) == '@' && is_valid_class_start(peek(1))) { + consume_class(); + } else { + begin_token(); + while (peek() && peek() != '\n') + consume(); + commit_token(GMLToken::Type::JsonValue); + } + continue; + } + + consume(); + commit_token(GMLToken::Type::Unknown); + } + return tokens; +} + +} diff --git a/Libraries/LibGUI/GMLLexer.h b/Libraries/LibGUI/GMLLexer.h new file mode 100644 index 00000000000000..0803747dd91e56 --- /dev/null +++ b/Libraries/LibGUI/GMLLexer.h @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2020, Andreas Kling + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#include + +namespace GUI { + +#define FOR_EACH_TOKEN_TYPE \ + __TOKEN(Unknown) \ + __TOKEN(Comment) \ + __TOKEN(ClassMarker) \ + __TOKEN(ClassName) \ + __TOKEN(LeftCurly) \ + __TOKEN(RightCurly) \ + __TOKEN(Identifier) \ + __TOKEN(Colon) \ + __TOKEN(JsonValue) + +struct GMLPosition { + size_t line; + size_t column; +}; + +struct GMLToken { + enum class Type { +#define __TOKEN(x) x, + FOR_EACH_TOKEN_TYPE +#undef __TOKEN + }; + + const char* to_string() const + { + switch (m_type) { +#define __TOKEN(x) \ + case Type::x: \ + return #x; + FOR_EACH_TOKEN_TYPE +#undef __TOKEN + } + ASSERT_NOT_REACHED(); + } + + Type m_type { Type::Unknown }; + StringView m_view; + GMLPosition m_start; + GMLPosition m_end; +}; + +class GMLLexer { +public: + GMLLexer(const StringView&); + + Vector lex(); + +private: + char peek(size_t offset = 0) const; + char consume(); + + StringView m_input; + size_t m_index { 0 }; + GMLPosition m_previous_position { 0, 0 }; + GMLPosition m_position { 0, 0 }; +}; + +} diff --git a/Libraries/LibGUI/GMLParser.cpp b/Libraries/LibGUI/GMLParser.cpp index cc8d8e9901d780..67e7bc8099a3d3 100644 --- a/Libraries/LibGUI/GMLParser.cpp +++ b/Libraries/LibGUI/GMLParser.cpp @@ -27,87 +27,81 @@ #include #include #include +#include +#include #include #include namespace GUI { -static bool is_valid_class_name_character(char ch) -{ - return isalpha(ch) || ch == ':'; -} - -static bool is_valid_property_name_character(char ch) -{ - return isalpha(ch) || ch == '_'; -} - -static void swallow_whitespace(GenericLexer& scanner) -{ - scanner.consume_while([](auto ch) { return isspace(ch); }); -} - -static Optional parse_core_object(GenericLexer& scanner) +static Optional parse_core_object(Queue& tokens) { JsonObject object; JsonArray children; - // '@Foo' means new Core::Object of class Foo - if (!scanner.consume_specific('@')) { - dbgln("Expected '@'"); + auto peek = [&] { + if (tokens.is_empty()) + return GMLToken::Type::Unknown; + return tokens.head().m_type; + }; + + if (peek() != GMLToken::Type::ClassMarker) { + dbgln("Expected class marker"); return {}; } - auto class_name = scanner.consume_while([](auto ch) { return is_valid_class_name_character(ch); }); - object.set("class", JsonValue(class_name)); - - swallow_whitespace(scanner); + tokens.dequeue(); - if (!scanner.consume_specific('{')) { - dbgln("Expected '{{'"); + if (peek() != GMLToken::Type::ClassName) { + dbgln("Expected class name"); return {}; } - swallow_whitespace(scanner); + auto class_name = tokens.dequeue(); + object.set("class", JsonValue(class_name.m_view)); - for (;;) { - swallow_whitespace(scanner); + if (peek() != GMLToken::Type::LeftCurly) { + dbgln("Expected {{"); + return {}; + } + tokens.dequeue(); - if (scanner.peek() == '}') { + for (;;) { + if (peek() == GMLToken::Type::RightCurly) { // End of object break; } - if (scanner.peek() == '@') { + if (peek() == GMLToken::Type::ClassMarker) { // It's a child object. - auto value = parse_core_object(scanner); - if (!value.has_value()) + auto value = parse_core_object(tokens); + if (!value.has_value()) { + dbgln("Parsing child object failed"); return {}; + } if (!value.value().is_object()) { dbgln("Expected child to be Core::Object"); return {}; } children.append(value.release_value()); - } else { + } else if (peek() == GMLToken::Type::Identifier) { // It's a property. - auto property_name = scanner.consume_while([](auto ch) { return is_valid_property_name_character(ch); }); - swallow_whitespace(scanner); + auto property_name = tokens.dequeue(); - if (property_name.is_empty()) { + if (property_name.m_view.is_empty()) { dbgln("Expected non-empty property name"); return {}; } - if (!scanner.consume_specific(':')) { + if (peek() != GMLToken::Type::Colon) { dbgln("Expected ':'"); return {}; } - - swallow_whitespace(scanner); + tokens.dequeue(); JsonValue value; - if (scanner.peek() == '@') { - auto parsed_value = parse_core_object(scanner); + if (peek() == GMLToken::Type::ClassMarker) { + auto parsed_value = parse_core_object(tokens); if (!parsed_value.has_value()) return {}; if (!parsed_value.value().is_object()) { @@ -115,23 +109,27 @@ static Optional parse_core_object(GenericLexer& scanner) return {}; } value = parsed_value.release_value(); - } else { - auto value_string = scanner.consume_line(); - auto parsed_value = JsonValue::from_string(value_string); + } else if (peek() == GMLToken::Type::JsonValue) { + auto value_string = tokens.dequeue(); + auto parsed_value = JsonValue::from_string(value_string.m_view); if (!parsed_value.has_value()) { dbgln("Expected property to be JSON value"); return {}; } value = parsed_value.release_value(); } - object.set(property_name, move(value)); + object.set(property_name.m_view, move(value)); + } else { + dbgln("Expected child, property, or }}"); + return {}; } } - if (!scanner.consume_specific('}')) { - dbgln("Expected '}'"); + if (peek() != GMLToken::Type::RightCurly) { + dbgln("Expected }}"); return {}; } + tokens.dequeue(); if (!children.is_empty()) object.set("children", move(children)); @@ -141,8 +139,13 @@ static Optional parse_core_object(GenericLexer& scanner) JsonValue parse_gml(const StringView& string) { - GenericLexer scanner(string); - auto root = parse_core_object(scanner); + auto lexer = GMLLexer(string); + + Queue tokens; + for (auto& token : lexer.lex()) + tokens.enqueue(token); + + auto root = parse_core_object(tokens); if (!root.has_value()) return JsonValue();