From c501ef7a396672f601cceb290aab2af1c34a174a Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Thu, 25 Apr 2019 15:46:41 -0400 Subject: [PATCH 1/2] add findall(pattern, string) --- NEWS.md | 1 + base/regex.jl | 24 ++++++++++++++++++++++++ test/regex.jl | 6 ++++++ test/strings/search.jl | 10 ++++++++++ 4 files changed, 41 insertions(+) diff --git a/NEWS.md b/NEWS.md index e5d3619986d08..807eaf14edd52 100644 --- a/NEWS.md +++ b/NEWS.md @@ -20,6 +20,7 @@ Build system changes New library functions --------------------- +* New `findall(pattern, string)` method where `pattern` is a string or regex. Standard library changes ------------------------ diff --git a/base/regex.jl b/base/regex.jl index 3bee6dbd64947..3043de4252a6a 100644 --- a/base/regex.jl +++ b/base/regex.jl @@ -310,6 +310,30 @@ findnext(r::Regex, s::AbstractString, idx::Integer) = throw(ArgumentError( )) findfirst(r::Regex, s::AbstractString) = findnext(r,s,firstindex(s)) + +""" + findall(pattern::Union{AbstractString,Regex}, string::AbstractString; overlap::Bool=false) + +Return a `Vector{UnitRange{Int}}` of all the matches for `pattern` in `string`. +Each element of the returned vector is a range of indices where the +matching sequence is found, like the return value of [`findnext`](@ref). + +If `overlap=true`, the matching sequences are allowed to overlap indices in the +original string, otherwise they must be from distinct character ranges. +""" +function findall(t::Union{AbstractString,Regex}, s::AbstractString; overlap::Bool=false) + found = UnitRange{Int}[] + i, e = firstindex(s), lastindex(s) + while true + r = findnext(t, s, i) + isnothing(r) && return found + push!(found, r) + j = overlap || isempty(r) ? first(r) : last(r) + j > e && return found + @inbounds i = nextind(s, j) + end +end + """ SubstitutionString(substr) diff --git a/test/regex.jl b/test/regex.jl index cb3fa965f8a50..cb9bd6ff5eab8 100644 --- a/test/regex.jl +++ b/test/regex.jl @@ -46,6 +46,12 @@ @test_throws ArgumentError match(r"test", GenericString("this is a test")) @test_throws ArgumentError findfirst(r"test", GenericString("this is a test")) + # findall: + @test findall(r"\w+", "foo bar") == [1:3, 5:7] + @test findall(r"\w+", "foo bar", overlap=true) == [1:3, 2:3, 3:3, 5:7, 6:7, 7:7] + @test findall(r"\w*", "foo bar") == [1:3, 4:3, 5:7, 8:7] + @test findall(r"\b", "foo bar") == [1:0, 4:3, 5:4, 8:7] + # Named subpatterns let m = match(r"(?.)(.)(?.)", "xyz") @test (m[:a], m[2], m["b"]) == ("x", "y", "z") diff --git a/test/strings/search.jl b/test/strings/search.jl index e9b6a678baa3a..61aaea463eae8 100644 --- a/test/strings/search.jl +++ b/test/strings/search.jl @@ -333,3 +333,13 @@ end s_18109 = "fooα🐨βcd3" @test findlast(isequal('o'), s_18109) == 3 @test findfirst(isequal('d'), s_18109) == 13 + +# findall (issue #31788) +@testset "findall" begin + @test findall("fooo", "foo") == UnitRange{Int}[] + @test findall("ing", "Spinning laughing dancing") == [6:8, 15:17, 23:25] + @test findall("", "foo") == [1:0, 2:1, 3:2, 4:3] + @test findall("αβ", "blαh blαβ blαββy") == findall("αβ", "blαh blαβ blαββy", overlap=true) == [9:11, 16:18] + @test findall("aa", "aaaaaa") == [1:2, 3:4, 5:6] + @test findall("aa", "aaaaaa", overlap=true) == [1:2, 2:3, 3:4, 4:5, 5:6] +end From 25a56f6a925d0e47c07e3d8f931ef79e893b9ea5 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Thu, 25 Apr 2019 15:47:58 -0400 Subject: [PATCH 2/2] add PR # to NEWS --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 807eaf14edd52..84adca46a158f 100644 --- a/NEWS.md +++ b/NEWS.md @@ -20,7 +20,7 @@ Build system changes New library functions --------------------- -* New `findall(pattern, string)` method where `pattern` is a string or regex. +* New `findall(pattern, string)` method where `pattern` is a string or regex ([#31834]). Standard library changes ------------------------