Skip to content

Commit

Permalink
LibRegex: Ensure the GoBack operation decrements the code unit index
Browse files Browse the repository at this point in the history
This was missed in commit 27d555b.
  • Loading branch information
trflynn89 authored and alimpfard committed Aug 18, 2021
1 parent c4ee576 commit 325eabc
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 2 deletions.
4 changes: 4 additions & 0 deletions Tests/LibRegex/Regex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -637,6 +637,8 @@ TEST_CASE(ECMA262_match)
{ "(a{4}){2}"sv, "aaaaaaaa"sv },
{ "(a{4}){2}"sv, "aaaaaabaa"sv, false },
{ "\\u{4}"sv, "uuuu" },
{ "(?<=.{3})f"sv, "abcdef"sv, true, (ECMAScriptFlags)regex::AllFlags::Global },
{ "(?<=.{3})f"sv, "abc😀ef"sv, true, (ECMAScriptFlags)regex::AllFlags::Global },
// ECMA262, B.1.4. Regular Expression Pattern extensions for browsers
{ "{"sv, "{"sv, true, ECMAScriptFlags::BrowserExtended },
{ "\\5"sv, "\5"sv, true, ECMAScriptFlags::BrowserExtended },
Expand Down Expand Up @@ -694,6 +696,8 @@ TEST_CASE(ECMA262_unicode_match)
{ "\\u{1f600}"sv, "😀"sv, true, ECMAScriptFlags::Unicode },
{ "\\ud83d\\ud83d"sv, "\xed\xa0\xbd\xed\xa0\xbd"sv, true },
{ "\\ud83d\\ud83d"sv, "\xed\xa0\xbd\xed\xa0\xbd"sv, true, ECMAScriptFlags::Unicode },
{ "(?<=.{3})f"sv, "abcdef"sv, true, ECMAScriptFlags::Unicode },
{ "(?<=.{3})f"sv, "abc😀ef"sv, true, ECMAScriptFlags::Unicode },
};

for (auto& test : tests) {
Expand Down
15 changes: 13 additions & 2 deletions Userland/Libraries/LibRegex/RegexByteCode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,17 @@ static void advance_string_position(MatchState& state, RegexStringView const& vi
}
}

static void reverse_string_position(MatchState& state, RegexStringView const& view, size_t amount)
{
VERIFY(state.string_position >= amount);
state.string_position -= amount;

if (view.unicode())
state.string_position_in_code_units = view.code_unit_offset_of(state.string_position);
else
state.string_position_in_code_units -= amount;
}

static void save_string_position(MatchInput const& input, MatchState const& state)
{
input.saved_positions.append(state.string_position);
Expand Down Expand Up @@ -226,12 +237,12 @@ ALWAYS_INLINE ExecutionResult OpCode_Restore::execute(MatchInput const& input, M
return ExecutionResult::Continue;
}

ALWAYS_INLINE ExecutionResult OpCode_GoBack::execute(MatchInput const&, MatchState& state) const
ALWAYS_INLINE ExecutionResult OpCode_GoBack::execute(MatchInput const& input, MatchState& state) const
{
if (count() > state.string_position)
return ExecutionResult::Failed_ExecuteLowPrioForks;

state.string_position -= count();
reverse_string_position(state, input.view, count());
return ExecutionResult::Continue;
}

Expand Down
16 changes: 16 additions & 0 deletions Userland/Libraries/LibRegex/RegexMatch.h
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,22 @@ class RegexStringView {
});
}

size_t code_unit_offset_of(size_t code_point_index) const
{
return m_view.visit(
[&](StringView const& view) -> u32 {
Utf8View utf8_view { view };
return utf8_view.byte_offset_of(code_point_index);
},
[&](Utf32View const&) -> u32 { return code_point_index; },
[&](Utf16View const& view) -> u32 {
return view.code_unit_offset_of(code_point_index);
},
[&](Utf8View const& view) -> u32 {
return view.byte_offset_of(code_point_index);
});
}

bool operator==(char const* cstring) const
{
return m_view.visit(
Expand Down

0 comments on commit 325eabc

Please sign in to comment.