Skip to content

Commit

Permalink
LibRegex: Implement multiline stateful matches
Browse files Browse the repository at this point in the history
  • Loading branch information
alimpfard authored and linusg committed Apr 23, 2021
1 parent bb40d4d commit bf9c04a
Show file tree
Hide file tree
Showing 2 changed files with 69 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -155,3 +155,53 @@ test("undefined match result", () => {
r.exec = () => ({});
expect(r[Symbol.replace]()).toBe("undefined");
});

// Multiline test
test("multiline match", () => {
let reg = /\s*\/\/.*$/gm;
let string = `
(
(?:[a-fA-F\d]{1,4}:){7}(?:[a-fA-F\d]{1,4}|:)| // 1:2:3:4:5:6:7:: 1:2:3:4:5:6:7:8
(?:[a-fA-F\d]{1,4}:){6}(?:(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)(?:\.(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)){3}|:[a-fA-F\d]{1,4}|:)| // 1:2:3:4:5:6:: 1:2:3:4:5:6::8 1:2:3:4:5:6::8 1:2:3:4:5:6::1.2.3.4
(?:[a-fA-F\d]{1,4}:){5}(?::(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)(?:\.(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)){3}|(:[a-fA-F\d]{1,4}){1,2}|:)| // 1:2:3:4:5:: 1:2:3:4:5::7:8 1:2:3:4:5::8 1:2:3:4:5::7:1.2.3.4
(?:[a-fA-F\d]{1,4}:){4}(?:(:[a-fA-F\d]{1,4}){0,1}:(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)(?:\.(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)){3}|(:[a-fA-F\d]{1,4}){1,3}|:)| // 1:2:3:4:: 1:2:3:4::6:7:8 1:2:3:4::8 1:2:3:4::6:7:1.2.3.4
(?:[a-fA-F\d]{1,4}:){3}(?:(:[a-fA-F\d]{1,4}){0,2}:(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)(?:\.(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)){3}|(:[a-fA-F\d]{1,4}){1,4}|:)| // 1:2:3:: 1:2:3::5:6:7:8 1:2:3::8 1:2:3::5:6:7:1.2.3.4
(?:[a-fA-F\d]{1,4}:){2}(?:(:[a-fA-F\d]{1,4}){0,3}:(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)(?:\.(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)){3}|(:[a-fA-F\d]{1,4}){1,5}|:)| // 1:2:: 1:2::4:5:6:7:8 1:2::8 1:2::4:5:6:7:1.2.3.4
(?:[a-fA-F\d]{1,4}:){1}(?:(:[a-fA-F\d]{1,4}){0,4}:(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)(?:\.(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)){3}|(:[a-fA-F\d]{1,4}){1,6}|:)| // 1:: 1::3:4:5:6:7:8 1::8 1::3:4:5:6:7:1.2.3.4
(?::((?::[a-fA-F\d]{1,4}){0,5}:(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)(?:\.(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)){3}|(?::[a-fA-F\d]{1,4}){1,7}|:)) // ::2:3:4:5:6:7:8 ::2:3:4:5:6:7:8 ::8 ::1.2.3.4
)(%[0-9a-zA-Z]{1,})? // %eth0 %1
`;

let res = reg.exec(string);
expect(res.length).toBe(1);
expect(res[0]).toBe(" // 1:2:3:4:5:6:7:: 1:2:3:4:5:6:7:8");
expect(res.index).toBe(46);
});

test("multiline stateful match", () => {
let reg = /\s*\/\/.*$/gm;
let string = `
(
(?:[a-fA-F\d]{1,4}:){7}(?:[a-fA-F\d]{1,4}|:)| // 1:2:3:4:5:6:7:: 1:2:3:4:5:6:7:8
(?:[a-fA-F\d]{1,4}:){6}(?:(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)(?:\.(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)){3}|:[a-fA-F\d]{1,4}|:)| // 1:2:3:4:5:6:: 1:2:3:4:5:6::8 1:2:3:4:5:6::8 1:2:3:4:5:6::1.2.3.4
(?:[a-fA-F\d]{1,4}:){5}(?::(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)(?:\.(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)){3}|(:[a-fA-F\d]{1,4}){1,2}|:)| // 1:2:3:4:5:: 1:2:3:4:5::7:8 1:2:3:4:5::8 1:2:3:4:5::7:1.2.3.4
(?:[a-fA-F\d]{1,4}:){4}(?:(:[a-fA-F\d]{1,4}){0,1}:(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)(?:\.(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)){3}|(:[a-fA-F\d]{1,4}){1,3}|:)| // 1:2:3:4:: 1:2:3:4::6:7:8 1:2:3:4::8 1:2:3:4::6:7:1.2.3.4
(?:[a-fA-F\d]{1,4}:){3}(?:(:[a-fA-F\d]{1,4}){0,2}:(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)(?:\.(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)){3}|(:[a-fA-F\d]{1,4}){1,4}|:)| // 1:2:3:: 1:2:3::5:6:7:8 1:2:3::8 1:2:3::5:6:7:1.2.3.4
(?:[a-fA-F\d]{1,4}:){2}(?:(:[a-fA-F\d]{1,4}){0,3}:(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)(?:\.(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)){3}|(:[a-fA-F\d]{1,4}){1,5}|:)| // 1:2:: 1:2::4:5:6:7:8 1:2::8 1:2::4:5:6:7:1.2.3.4
(?:[a-fA-F\d]{1,4}:){1}(?:(:[a-fA-F\d]{1,4}){0,4}:(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)(?:\.(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)){3}|(:[a-fA-F\d]{1,4}){1,6}|:)| // 1:: 1::3:4:5:6:7:8 1::8 1::3:4:5:6:7:1.2.3.4
(?::((?::[a-fA-F\d]{1,4}){0,5}:(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)(?:\.(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)){3}|(?::[a-fA-F\d]{1,4}){1,7}|:)) // ::2:3:4:5:6:7:8 ::2:3:4:5:6:7:8 ::8 ::1.2.3.4
)(%[0-9a-zA-Z]{1,})? // %eth0 %1
`;

let res = reg.exec(string);
expect(res.length).toBe(1);
expect(res[0]).toBe(" // 1:2:3:4:5:6:7:: 1:2:3:4:5:6:7:8");
expect(res.index).toBe(46);

res = reg.exec(string);
expect(res.length).toBe(1);
expect(res[0]).toBe(
" // 1:2:3:4:5:6:: 1:2:3:4:5:6::8 1:2:3:4:5:6::8 1:2:3:4:5:6::1.2.3.4"
);
expect(res.index).toBe(231);
});
21 changes: 19 additions & 2 deletions Userland/Libraries/LibRegex/RegexMatcher.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,9 +82,21 @@ RegexResult Matcher<Parser>::match(const Vector<RegexStringView> views, Optional
input.regex_options = m_regex_options | regex_options.value_or({}).value();
input.start_offset = m_pattern.start_offset;
output.operations = 0;
size_t lines_to_skip = 0;

if (input.regex_options.has_flag_set(AllFlags::Internal_Stateful))
VERIFY(views.size() == 1);
if (input.regex_options.has_flag_set(AllFlags::Internal_Stateful)) {
if (views.size() > 1 && input.start_offset > views.first().length()) {
dbgln("Started with start={}, goff={}, skip={}", input.start_offset, input.global_offset, lines_to_skip);
for (auto& view : views) {
if (input.start_offset < view.length() + 1)
break;
++lines_to_skip;
input.start_offset -= view.length() + 1;
input.global_offset += view.length() + 1;
}
dbgln("Ended with start={}, goff={}, skip={}", input.start_offset, input.global_offset, lines_to_skip);
}
}

if (c_match_preallocation_count) {
output.matches.ensure_capacity(c_match_preallocation_count);
Expand Down Expand Up @@ -127,6 +139,11 @@ RegexResult Matcher<Parser>::match(const Vector<RegexStringView> views, Optional
continue_search = false;

for (auto& view : views) {
if (lines_to_skip != 0) {
++input.line;
--lines_to_skip;
continue;
}
input.view = view;
dbgln_if(REGEX_DEBUG, "[match] Starting match with view ({}): _{}_", view.length(), view);

Expand Down

0 comments on commit bf9c04a

Please sign in to comment.