Skip to content

Commit

Permalink
LibRegex: Make parse_disjunction() consume all disjunctions in one frame
Browse files Browse the repository at this point in the history
This helps us not blow up when too many disjunctions are chained togther
in the regex we're parsing.
Fixes SerenityOS#12615.
  • Loading branch information
alimpfard authored and awesomekling committed Feb 20, 2022
1 parent 627bbee commit 4be7239
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 21 deletions.
7 changes: 5 additions & 2 deletions Tests/LibRegex/Regex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -498,6 +498,8 @@ TEST_CASE(posix_extended_nested_capture_group)
EXPECT_EQ(result.capture_group_matches[0][2].view, "llo"sv);
}

auto parse_test_case_long_disjunction_chain = String::repeated("a|"sv, 10000);

TEST_CASE(ECMA262_parse)
{
struct _test {
Expand All @@ -506,7 +508,7 @@ TEST_CASE(ECMA262_parse)
regex::ECMAScriptFlags flags {};
};

constexpr _test tests[] {
_test const tests[] {
{ "^hello.$"sv },
{ "^(hello.)$"sv },
{ "^h{0,1}ello.$"sv },
Expand Down Expand Up @@ -599,7 +601,8 @@ TEST_CASE(ECMA262_parse)
{ "(?<$$_$$>a)"sv },
{ "(?<ÿ>a)"sv },
{ "(?<𝓑𝓻𝓸𝔀𝓷>a)"sv },
{ "((?=lg)?[vl]k\\-?\\d{3}) bui| 3\\.[-\\w; ]{10}lg?-([06cv9]{3,4})"sv, regex::Error::NoError, ECMAScriptFlags::BrowserExtended } // #12373, quantifiable assertions.
{ "((?=lg)?[vl]k\\-?\\d{3}) bui| 3\\.[-\\w; ]{10}lg?-([06cv9]{3,4})"sv, regex::Error::NoError, ECMAScriptFlags::BrowserExtended }, // #12373, quantifiable assertions.
{ parse_test_case_long_disjunction_chain.view() }, // A whole lot of disjunctions, should not overflow the stack.
};

for (auto& test : tests) {
Expand Down
47 changes: 28 additions & 19 deletions Userland/Libraries/LibRegex/RegexParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -956,28 +956,37 @@ bool ECMA262Parser::parse_pattern(ByteCode& stack, size_t& match_length_minimum,

bool ECMA262Parser::parse_disjunction(ByteCode& stack, size_t& match_length_minimum, bool unicode, bool named)
{
ByteCode left_alternative_stack;
size_t left_alternative_min_length = 0;
auto alt_ok = parse_alternative(left_alternative_stack, left_alternative_min_length, unicode, named);
if (!alt_ok)
return false;
size_t total_match_length_minimum = NumericLimits<size_t>::max();
Vector<ByteCode> alternatives;
do {
ByteCode alternative_stack;
size_t alternative_minimum_length = 0;
auto alt_ok = parse_alternative(alternative_stack, alternative_minimum_length, unicode, named);
if (!alt_ok)
return false;

if (!match(TokenType::Pipe)) {
stack.extend(left_alternative_stack);
match_length_minimum = left_alternative_min_length;
return alt_ok;
}
alternatives.append(move(alternative_stack));
total_match_length_minimum = min(alternative_minimum_length, total_match_length_minimum);

consume();
ByteCode right_alternative_stack;
size_t right_alternative_min_length = 0;
auto continuation_ok = parse_disjunction(right_alternative_stack, right_alternative_min_length, unicode, named);
if (!continuation_ok)
return false;
if (!match(TokenType::Pipe))
break;
consume();
} while (true);

Optional<ByteCode> alternative_stack {};
for (auto& alternative : alternatives) {
if (alternative_stack.has_value()) {
ByteCode target_stack;
target_stack.insert_bytecode_alternation(alternative_stack.release_value(), move(alternative));
alternative_stack = move(target_stack);
} else {
alternative_stack = move(alternative);
}
}

stack.insert_bytecode_alternation(move(left_alternative_stack), move(right_alternative_stack));
match_length_minimum = min(left_alternative_min_length, right_alternative_min_length);
return continuation_ok;
stack.extend(alternative_stack.release_value());
match_length_minimum = total_match_length_minimum;
return true;
}

bool ECMA262Parser::parse_alternative(ByteCode& stack, size_t& match_length_minimum, bool unicode, bool named)
Expand Down

0 comments on commit 4be7239

Please sign in to comment.