Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[RFC] Add eachrsplit iterator #51646

Merged
merged 1 commit into from
Oct 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ New library functions
* `hardlink(src, dst)` can be used to create hard links. ([#41639])
* `diskstat(path=pwd())` can be used to return statistics about the disk. ([#42248])
* `copyuntil(out, io, delim)` and `copyline(out, io)` copy data into an `out::IO` stream ([#48273]).
* `eachrsplit(string, pattern)` iterates split substrings right to left.

New library features
--------------------
Expand Down
1 change: 1 addition & 0 deletions base/exports.jl
Original file line number Diff line number Diff line change
Expand Up @@ -593,6 +593,7 @@ export
digits,
digits!,
eachsplit,
eachrsplit,
escape_string,
hex2bytes,
hex2bytes!,
Expand Down
125 changes: 99 additions & 26 deletions base/strings/util.jl
Original file line number Diff line number Diff line change
Expand Up @@ -592,6 +592,101 @@ eachsplit(str::T, splitter::AbstractChar; limit::Integer=0, keepempty=true) wher
eachsplit(str::AbstractString; limit::Integer=0, keepempty=false) =
eachsplit(str, isspace; limit, keepempty)

"""
eachrsplit(str::AbstractString, dlm; limit::Integer=0, keepempty::Bool=true)
eachrsplit(str::AbstractString; limit::Integer=0, keepempty::Bool=false)

Return an iterator over `SubString`s of `str`, produced when splitting on
the delimiter(s) `dlm`, and yielded in reverse order (from right to left).
`dlm` can be any of the formats allowed by [`findprev`](@ref)'s first argument
(i.e. a string, a single character or a function), or a collection of characters.

If `dlm` is omitted, it defaults to [`isspace`](@ref), and `keepempty` default to `false`.

The optional keyword arguments are:
- If `limit > 0`, the iterator will split at most `limit - 1` times before returning
the rest of the string unsplit. `limit < 1` implies no cap to splits (default).
- `keepempty`: whether empty fields should be returned when iterating
Default is `false` without a `dlm` argument, `true` with a `dlm` argument.

Note that unlike [`split`](@ref), [`rsplit`](@ref) and [`eachsplit`](@ref), this
function iterates the substrings right to left as they occur in the input.

See also [`eachsplit`](@ref), [`rsplit`](@ref).

!!! compat "Julia 1.11"
This function requires Julia 1.11 or later.

# Examples
```jldoctest
julia> a = "Ma.r.ch";

julia> collect(eachrsplit(a, ".")) == ["ch", "r", "Ma"]
true

julia> collect(eachrsplit(a, "."; limit=2)) == ["ch", "Ma.r"]
true
```
"""
function eachrsplit end

struct RSplitIterator{S <: AbstractString, F}
str::S
splitter::F
limit::Int
keepempty::Bool
end

eltype(::Type{<:RSplitIterator{T}}) where T = SubString{T}
eltype(::Type{<:RSplitIterator{<:SubString{T}}}) where T = SubString{T}

IteratorSize(::Type{<:RSplitIterator}) = SizeUnknown()

eachrsplit(str::T, splitter; limit::Integer=0, keepempty::Bool=true) where {T<:AbstractString} =
RSplitIterator(str, splitter, limit, keepempty)

eachrsplit(str::T, splitter::Union{Tuple{Vararg{AbstractChar}},AbstractVector{<:AbstractChar},Set{<:AbstractChar}};
limit::Integer=0, keepempty=true) where {T<:AbstractString} =
eachrsplit(str, in(splitter); limit, keepempty)

eachrsplit(str::T, splitter::AbstractChar; limit::Integer=0, keepempty=true) where {T<:AbstractString} =
eachrsplit(str, isequal(splitter); limit, keepempty)

# a bit oddball, but standard behavior in Perl, Ruby & Python:
eachrsplit(str::AbstractString; limit::Integer=0, keepempty=false) =
eachrsplit(str, isspace; limit, keepempty)

function Base.iterate(it::RSplitIterator, (to, remaining_splits)=(lastindex(it.str), it.limit-1))
to < 0 && return nothing
from = 1
next_to = -1
while !iszero(remaining_splits)
pos = findprev(it.splitter, it.str, to)
# If no matches: It returns the rest of the string, then the iterator stops.
if pos === nothing
from = 1
next_to = -1
break
else
from = nextind(it.str, last(pos))
# pos can be empty if we search for a zero-width delimiter, in which
# case pos is to:to-1.
# In this case, next_to must be to - 1, except if to is 0 or 1, in
# which case, we must stop iteration for some reason.
next_to = (isempty(pos) & (to < 2)) ? -1 : prevind(it.str, first(pos))

# If the element we emit is empty, discard it based on keepempty
if from > to && !(it.keepempty)
to = next_to
continue
end
break
end
end
from > to && !(it.keepempty) && return nothing
return (SubString(it.str, from, to), (next_to, remaining_splits-1))
end

"""
split(str::AbstractString, dlm; limit::Integer=0, keepempty::Bool=true)
split(str::AbstractString; limit::Integer=0, keepempty::Bool=false)
Expand Down Expand Up @@ -660,37 +755,15 @@ julia> rsplit(a, "."; limit=2)
"h"
```
"""
function rsplit end

function rsplit(str::T, splitter;
limit::Integer=0, keepempty::Bool=true) where {T<:AbstractString}
_rsplit(str, splitter, limit, keepempty, T <: SubString ? T[] : SubString{T}[])
end
function rsplit(str::T, splitter::Union{Tuple{Vararg{AbstractChar}},AbstractVector{<:AbstractChar},Set{<:AbstractChar}};
limit::Integer=0, keepempty::Bool=true) where {T<:AbstractString}
_rsplit(str, in(splitter), limit, keepempty, T <: SubString ? T[] : SubString{T}[])
end
function rsplit(str::T, splitter::AbstractChar;
limit::Integer=0, keepempty::Bool=true) where {T<:AbstractString}
_rsplit(str, isequal(splitter), limit, keepempty, T <: SubString ? T[] : SubString{T}[])
limit::Integer=0, keepempty::Bool=true) where {T<:AbstractString}
reverse!(collect(eachrsplit(str, splitter; limit, keepempty)))
end

function _rsplit(str::AbstractString, splitter, limit::Integer, keepempty::Bool, strs::Array)
n = lastindex(str)::Int
r = something(findlast(splitter, str)::Union{Nothing,Int,UnitRange{Int}}, 0)
j, k = first(r), last(r)
while j > 0 && k > 0 && length(strs) != limit-1
(keepempty || k < n) && pushfirst!(strs, @inbounds SubString(str,nextind(str,k)::Int,n))
n = prevind(str, j)::Int
r = something(findprev(splitter,str,n)::Union{Nothing,Int,UnitRange{Int}}, 0)
j, k = first(r), last(r)
end
(keepempty || n > 0) && pushfirst!(strs, SubString(str,1,n))
return strs
end
# a bit oddball, but standard behavior in Perl, Ruby & Python:
rsplit(str::AbstractString;
limit::Integer=0, keepempty::Bool=false) =
rsplit(str, isspace; limit=limit, keepempty=keepempty)
rsplit(str, isspace; limit, keepempty)

_replace(io, repl, str, r, pattern) = print(io, repl)
_replace(io, repl::Function, str, r, pattern) =
Expand Down
1 change: 1 addition & 0 deletions doc/src/base/strings.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ Base.occursin
Base.reverse(::Union{String,SubString{String}})
Base.replace(::IO, s::AbstractString, ::Pair...)
Base.eachsplit
Base.eachrsplit
Base.split
Base.rsplit
Base.strip
Expand Down
22 changes: 22 additions & 0 deletions test/strings/util.jl
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,28 @@ end
@test split("α β γ", "β") == rsplit("α β γ", "β") == ["α "," γ"]
end

@testset "eachrsplit" begin
@test collect(eachrsplit("", 'a')) == [""]
@test collect(eachrsplit("", isspace; limit=3)) == [""]
@test collect(eachrsplit("b c d"; limit=2)) == ["d", "b c "]
@test collect(eachrsplit("a.b.c", '.'; limit=1)) == ["a.b.c"]
@test collect(eachrsplit("a..b..c", '.')) == ["c", "", "b", "", "a"]
@test collect(eachrsplit("ax b c")) == ["c", "b", "ax"]
@test collect(eachrsplit(" a 12 4 v ", isnumeric)) == [" v ", " ", "", " a "]
@test collect(eachrsplit("ba", 'a')) == ["", "b"]
@test collect(eachrsplit(" ")) == []
@test collect(eachrsplit("aaaa", 'a'; keepempty=false)) == []
@test collect(eachrsplit("aaaa", 'a'; limit=2)) == ["", "aaa"]
@test collect(eachrsplit("abcdef", ['b', 'e'])) == ["f", "cd", "a"]
@test collect(eachrsplit("abc", isletter)) == ["", "", "", ""]

# This behaviour is quite surprising, but is consistent with split
# See issue 45916
@test collect(eachrsplit("a b"; limit=2)) == ["b", "a "] # only one trailing space
@test collect(eachrsplit("a "; limit=1)) == ["a "]
@test collect(eachrsplit(" a b c d"; limit=3)) == ["d", "c", " a b "]
end

@testset "replace" begin
@test replace("\u2202", '*' => '\0') == "\u2202"

Expand Down