JuliaLang · vtjnash · Oct 11, 2023 · Oct 9, 2023
diff --git a/NEWS.md b/NEWS.md
@@ -41,6 +41,7 @@ New library functions
 * `hardlink(src, dst)` can be used to create hard links. ([#41639])
 * `diskstat(path=pwd())` can be used to return statistics about the disk. ([#42248])
 * `copyuntil(out, io, delim)` and `copyline(out, io)` copy data into an `out::IO` stream ([#48273]).
+* `eachrsplit(string, pattern)` iterates split substrings right to left.
 
 New library features
 --------------------

diff --git a/base/exports.jl b/base/exports.jl
@@ -593,6 +593,7 @@ export
  digits,
  digits!,
  eachsplit,
+ eachrsplit,
  escape_string,
  hex2bytes,
  hex2bytes!,

diff --git a/base/strings/util.jl b/base/strings/util.jl
@@ -592,6 +592,101 @@ eachsplit(str::T, splitter::AbstractChar; limit::Integer=0, keepempty=true) wher
 eachsplit(str::AbstractString; limit::Integer=0, keepempty=false) =
  eachsplit(str, isspace; limit, keepempty)
 
+"""
+ eachrsplit(str::AbstractString, dlm; limit::Integer=0, keepempty::Bool=true)
+ eachrsplit(str::AbstractString; limit::Integer=0, keepempty::Bool=false)
+
+Return an iterator over `SubString`s of `str`, produced when splitting on
+the delimiter(s) `dlm`, and yielded in reverse order (from right to left).
+`dlm` can be any of the formats allowed by [`findprev`](@ref)'s first argument
+(i.e. a string, a single character or a function), or a collection of characters.
+
+If `dlm` is omitted, it defaults to [`isspace`](@ref), and `keepempty` default to `false`.
+
+The optional keyword arguments are:
+ - If `limit > 0`, the iterator will split at most `limit - 1` times before returning
+ the rest of the string unsplit. `limit < 1` implies no cap to splits (default).
+ - `keepempty`: whether empty fields should be returned when iterating
+ Default is `false` without a `dlm` argument, `true` with a `dlm` argument.
+
+Note that unlike [`split`](@ref), [`rsplit`](@ref) and [`eachsplit`](@ref), this
+function iterates the substrings right to left as they occur in the input.
+
+See also [`eachsplit`](@ref), [`rsplit`](@ref).
+
+!!! compat "Julia 1.11"
+ This function requires Julia 1.11 or later.
+
+# Examples
+```jldoctest
+julia> a = "Ma.r.ch";
+
+julia> collect(eachrsplit(a, ".")) == ["ch", "r", "Ma"]
+true
+
+julia> collect(eachrsplit(a, "."; limit=2)) == ["ch", "Ma.r"]
+true
+```
+"""
+function eachrsplit end
+
+struct RSplitIterator{S <: AbstractString, F}
+ str::S
+ splitter::F
+ limit::Int
+ keepempty::Bool
+end
+
+eltype(::Type{<:RSplitIterator{T}}) where T = SubString{T}
+eltype(::Type{<:RSplitIterator{<:SubString{T}}}) where T = SubString{T}
+
+IteratorSize(::Type{<:RSplitIterator}) = SizeUnknown()
+
+eachrsplit(str::T, splitter; limit::Integer=0, keepempty::Bool=true) where {T<:AbstractString} =
+ RSplitIterator(str, splitter, limit, keepempty)
+
+eachrsplit(str::T, splitter::Union{Tuple{Vararg{AbstractChar}},AbstractVector{<:AbstractChar},Set{<:AbstractChar}};
+ limit::Integer=0, keepempty=true) where {T<:AbstractString} =
+ eachrsplit(str, in(splitter); limit, keepempty)
+
+eachrsplit(str::T, splitter::AbstractChar; limit::Integer=0, keepempty=true) where {T<:AbstractString} =
+ eachrsplit(str, isequal(splitter); limit, keepempty)
+
+# a bit oddball, but standard behavior in Perl, Ruby & Python:
+eachrsplit(str::AbstractString; limit::Integer=0, keepempty=false) =
+ eachrsplit(str, isspace; limit, keepempty)
+
+function Base.iterate(it::RSplitIterator, (to, remaining_splits)=(lastindex(it.str), it.limit-1))
+ to < 0 && return nothing
+ from = 1
+ next_to = -1
+ while !iszero(remaining_splits)
+ pos = findprev(it.splitter, it.str, to)
+ # If no matches: It returns the rest of the string, then the iterator stops.
+ if pos === nothing
+ from = 1
+ next_to = -1
+ break
+ else
+ from = nextind(it.str, last(pos))
+ # pos can be empty if we search for a zero-width delimiter, in which
+ # case pos is to:to-1.
+ # In this case, next_to must be to - 1, except if to is 0 or 1, in
+ # which case, we must stop iteration for some reason.
+ next_to = (isempty(pos) & (to < 2)) ? -1 : prevind(it.str, first(pos))
+
+ # If the element we emit is empty, discard it based on keepempty
+ if from > to && !(it.keepempty)
+ to = next_to
+ continue
+ end
+ break
+ end
+ end
+ from > to && !(it.keepempty) && return nothing
+ return (SubString(it.str, from, to), (next_to, remaining_splits-1))
+end
+
 """
  split(str::AbstractString, dlm; limit::Integer=0, keepempty::Bool=true)
  split(str::AbstractString; limit::Integer=0, keepempty::Bool=false)
@@ -660,37 +755,15 @@ julia> rsplit(a, "."; limit=2)
  "h"
 ```
 """
-function rsplit end
-
 function rsplit(str::T, splitter;
- limit::Integer=0, keepempty::Bool=true) where {T<:AbstractString}
- _rsplit(str, splitter, limit, keepempty, T <: SubString ? T[] : SubString{T}[])
-end
-function rsplit(str::T, splitter::Union{Tuple{Vararg{AbstractChar}},AbstractVector{<:AbstractChar},Set{<:AbstractChar}};
- limit::Integer=0, keepempty::Bool=true) where {T<:AbstractString}
- _rsplit(str, in(splitter), limit, keepempty, T <: SubString ? T[] : SubString{T}[])
-end
-function rsplit(str::T, splitter::AbstractChar;
- limit::Integer=0, keepempty::Bool=true) where {T<:AbstractString}
- _rsplit(str, isequal(splitter), limit, keepempty, T <: SubString ? T[] : SubString{T}[])
+ limit::Integer=0, keepempty::Bool=true) where {T<:AbstractString}
+ reverse!(collect(eachrsplit(str, splitter; limit, keepempty)))
 end
 
-function _rsplit(str::AbstractString, splitter, limit::Integer, keepempty::Bool, strs::Array)
- n = lastindex(str)::Int
- r = something(findlast(splitter, str)::Union{Nothing,Int,UnitRange{Int}}, 0)
- j, k = first(r), last(r)
- while j > 0 && k > 0 && length(strs) != limit-1
- (keepempty || k < n) && pushfirst!(strs, @inbounds SubString(str,nextind(str,k)::Int,n))
- n = prevind(str, j)::Int
- r = something(findprev(splitter,str,n)::Union{Nothing,Int,UnitRange{Int}}, 0)
- j, k = first(r), last(r)
- end
- (keepempty || n > 0) && pushfirst!(strs, SubString(str,1,n))
- return strs
-end
+# a bit oddball, but standard behavior in Perl, Ruby & Python:
 rsplit(str::AbstractString;
  limit::Integer=0, keepempty::Bool=false) =
- rsplit(str, isspace; limit=limit, keepempty=keepempty)
+ rsplit(str, isspace; limit, keepempty)
 
 _replace(io, repl, str, r, pattern) = print(io, repl)
 _replace(io, repl::Function, str, r, pattern) =

diff --git a/doc/src/base/strings.md b/doc/src/base/strings.md
@@ -53,6 +53,7 @@ Base.occursin
 Base.reverse(::Union{String,SubString{String}})
 Base.replace(::IO, s::AbstractString, ::Pair...)
 Base.eachsplit
+Base.eachrsplit
 Base.split
 Base.rsplit
 Base.strip

diff --git a/test/strings/util.jl b/test/strings/util.jl
@@ -212,6 +212,28 @@ end
  @test split("α β γ", "β") == rsplit("α β γ", "β") == ["α "," γ"]
 end
 
+@testset "eachrsplit" begin
+ @test collect(eachrsplit("", 'a')) == [""]
+ @test collect(eachrsplit("", isspace; limit=3)) == [""]
+ @test collect(eachrsplit("b c d"; limit=2)) == ["d", "b c "]
+ @test collect(eachrsplit("a.b.c", '.'; limit=1)) == ["a.b.c"]
+ @test collect(eachrsplit("a..b..c", '.')) == ["c", "", "b", "", "a"]
+ @test collect(eachrsplit("ax b c")) == ["c", "b", "ax"]
+ @test collect(eachrsplit(" a 12 4 v ", isnumeric)) == [" v ", " ", "", " a "]
+ @test collect(eachrsplit("ba", 'a')) == ["", "b"]
+ @test collect(eachrsplit(" ")) == []
+ @test collect(eachrsplit("aaaa", 'a'; keepempty=false)) == []
+ @test collect(eachrsplit("aaaa", 'a'; limit=2)) == ["", "aaa"]
+ @test collect(eachrsplit("abcdef", ['b', 'e'])) == ["f", "cd", "a"]
+ @test collect(eachrsplit("abc", isletter)) == ["", "", "", ""]
+
+ # This behaviour is quite surprising, but is consistent with split
+ # See issue 45916
+ @test collect(eachrsplit("a b"; limit=2)) == ["b", "a "] # only one trailing space
+ @test collect(eachrsplit("a "; limit=1)) == ["a "]
+ @test collect(eachrsplit(" a b c d"; limit=3)) == ["d", "c", " a b "]
+end
+
 @testset "replace" begin
  @test replace("\u2202", '*' => '\0') == "\u2202"