From 93eef2d08fe34f18936cee358277e831bd7b6b39 Mon Sep 17 00:00:00 2001 From: Milan Bouchet-Valat Date: Mon, 20 Nov 2017 22:02:53 +0100 Subject: [PATCH] Merge search into findfirst/findnext and rsearch into findlast/findprev --- NEWS.md | 4 + base/abstractarray.jl | 2 +- base/deprecated.jl | 39 +- base/docs/utils.jl | 2 +- base/exports.jl | 55 +- base/iobuffer.jl | 8 +- base/libgit2/types.jl | 4 +- base/methodshow.jl | 2 +- base/operators.jl | 19 + base/parse.jl | 8 +- base/precompile.jl | 18 +- base/regex.jl | 7 +- base/repl/LineEdit.jl | 12 +- base/repl/REPL.jl | 13 +- base/repl/REPLCompletions.jl | 16 +- base/stream.jl | 6 +- base/strings/search.jl | 199 +++++--- base/strings/util.jl | 64 ++- doc/src/manual/strings.md | 17 +- doc/src/stdlib/strings.md | 6 +- stdlib/Distributed/test/distributed_exec.jl | 4 +- test/arrayops.jl | 6 +- test/bitset.jl | 2 +- test/choosetests.jl | 4 +- test/compile.jl | 4 +- test/reflection.jl | 4 +- test/regex.jl | 2 +- test/repl.jl | 8 +- test/serialize.jl | 4 +- test/sets.jl | 2 +- test/spawn.jl | 4 +- test/strings/search.jl | 525 ++++++++++---------- test/strings/types.jl | 10 +- 33 files changed, 620 insertions(+), 460 deletions(-) diff --git a/NEWS.md b/NEWS.md index c5428bcce5629..5e0fcfebb5f51 100644 --- a/NEWS.md +++ b/NEWS.md @@ -900,6 +900,10 @@ Deprecated or removed in favor of dot overloading (`getproperty`) so factors should now be accessed as e.g. `F.Q` instead of `F[:Q]` ([#25184]). + * `search` and `rsearch` have been deprecated in favor of `findfirst`/`findnext` and + `findlast`/`findprev` respectively, in combination with the new `equalto` and `occursin` + predicates for some methods ([#24673] + Command-line option changes --------------------------- diff --git a/base/abstractarray.jl b/base/abstractarray.jl index a763308716d8a..2b4aa0909dc8f 100644 --- a/base/abstractarray.jl +++ b/base/abstractarray.jl @@ -1237,7 +1237,7 @@ _cs(d, a, b) = (a == b ? a : throw(DimensionMismatch( "mismatch in dimension $d (expected $a got $b)"))) dims2cat(::Val{n}) where {n} = ntuple(i -> (i == n), Val(n)) -dims2cat(dims) = ntuple(i -> (i in dims), maximum(dims)) +dims2cat(dims) = ntuple(occursin(dims), maximum(dims)) cat(dims, X...) = cat_t(dims, promote_eltypeof(X...), X...) diff --git a/base/deprecated.jl b/base/deprecated.jl index c66718f1cd409..7fbb581804863 100644 --- a/base/deprecated.jl +++ b/base/deprecated.jl @@ -3806,7 +3806,6 @@ end @deprecate getq(F::Factorization) F.Q end -# issue #5290 @deprecate lexcmp(x::AbstractArray, y::AbstractArray) cmp(x, y) @deprecate lexcmp(x::Real, y::Real) cmp(isless, x, y) @deprecate lexcmp(x::Complex, y::Complex) cmp((real(x),imag(x)), (real(y),imag(y))) @@ -3814,8 +3813,44 @@ end @deprecate lexless isless -# END 0.7 deprecations +@deprecate search(str::Union{String,SubString}, re::Regex, idx::Integer) findnext(re, str, idx) +@deprecate search(s::AbstractString, r::Regex, idx::Integer) findnext(r, s, idx) +@deprecate search(s::AbstractString, r::Regex) findfirst(r, s) +@deprecate search(s::AbstractString, c::Char, i::Integer) findnext(equalto(c), s, i) +@deprecate search(s::AbstractString, c::Char) findfirst(equalto(c), s) +@deprecate search(a::ByteArray, b::Union{Int8,UInt8}, i::Integer) findnext(equalto(b), a, i) +@deprecate search(a::ByteArray, b::Union{Int8,UInt8}) findfirst(equalto(b), a) +@deprecate search(a::String, b::Union{Int8,UInt8}, i::Integer) findnext(equalto(b), unsafe_wrap(Vector{UInt8}, a), i) +@deprecate search(a::String, b::Union{Int8,UInt8}) findfirst(equalto(b), unsafe_wrap(Vector{UInt8}, a)) +@deprecate search(a::ByteArray, b::Char, i::Integer) findnext(equalto(UInt8(b)), a, i) +@deprecate search(a::ByteArray, b::Char) findfirst(equalto(UInt8(b)), a) + +@deprecate search(s::AbstractString, c::Union{Tuple{Vararg{Char}},AbstractVector{Char},Set{Char}}, i::Integer) findnext(occursin(c), s, i) +@deprecate search(s::AbstractString, c::Union{Tuple{Vararg{Char}},AbstractVector{Char},Set{Char}}) findfirst(occursin(c), s) +@deprecate search(s::AbstractString, t::AbstractString, i::Integer) findnext(t, s, i) +@deprecate search(s::AbstractString, t::AbstractString) findfirst(t, s) + +@deprecate search(buf::IOBuffer, delim::UInt8) findfirst(equalto(delim), buf) +@deprecate search(buf::Base.GenericIOBuffer, delim::UInt8) findfirst(equalto(delim), buf) + +@deprecate rsearch(s::AbstractString, c::Union{Tuple{Vararg{Char}},AbstractVector{Char},Set{Char}}, i::Integer) findprev(occursin(c), s, i) +@deprecate rsearch(s::AbstractString, c::Union{Tuple{Vararg{Char}},AbstractVector{Char},Set{Char}}) findlast(occursin(c), s) +@deprecate rsearch(s::AbstractString, t::AbstractString, i::Integer) findprev(t, s, i) +@deprecate rsearch(s::AbstractString, t::AbstractString) findlast(t, s) +@deprecate rsearch(s::ByteArray, t::ByteArray, i::Integer) findprev(t, s, i) +@deprecate rsearch(s::ByteArray, t::ByteArray) findlast(t, s) + +@deprecate rsearch(str::Union{String,SubString}, re::Regex, idx::Integer) findprev(re, str, idx) +@deprecate rsearch(str::Union{String,SubString}, re::Regex) findlast(re, str) +@deprecate rsearch(s::AbstractString, r::Regex, idx::Integer) findprev(r, s, idx) +@deprecate rsearch(s::AbstractString, r::Regex) findlast(r, s) +@deprecate rsearch(s::AbstractString, c::Char, i::Integer) findprev(equalto(c), s, i) +@deprecate rsearch(s::AbstractString, c::Char) findlast(equalto(c), s) +@deprecate rsearch(a::Union{String,ByteArray}, b::Union{Int8,UInt8}, i::Integer = endof(a)) findprev(equalto(b), a, i) +@deprecate rsearch(a::String, b::Union{Int8,UInt8}, i::Integer = endof(a)) findprev(equalto(Char(b)), a, i) +@deprecate rsearch(a::ByteArray, b::Char, i::Integer = endof(a)) findprev(equalto(UInt8(b)), a, i) +# END 0.7 deprecations # BEGIN 1.0 deprecations # END 1.0 deprecations diff --git a/base/docs/utils.jl b/base/docs/utils.jl index e7e5ceb5c46cd..8d4cccd1e081d 100644 --- a/base/docs/utils.jl +++ b/base/docs/utils.jl @@ -373,7 +373,7 @@ completions(name::Symbol) = completions(string(name)) # Searching and apropos # Docsearch simply returns true or false if an object contains the given needle -docsearch(haystack::AbstractString, needle) = !isempty(search(haystack, needle)) +docsearch(haystack::AbstractString, needle) = !isempty(findfirst(needle, haystack)) docsearch(haystack::Symbol, needle) = docsearch(string(haystack), needle) docsearch(::Nothing, needle) = false function docsearch(haystack::Array, needle) diff --git a/base/exports.jl b/base/exports.jl index 59592751c3d20..4a12f208e2a0e 100644 --- a/base/exports.jl +++ b/base/exports.jl @@ -409,18 +409,6 @@ export extrema, fill!, fill, - find, - findfirst, - findlast, - findin, - findmax, - findmin, - findmin!, - findmax!, - findn, - findnext, - findprev, - findnz, first, flipdim, hcat, @@ -476,9 +464,6 @@ export rot180, rotl90, rotr90, - searchsorted, - searchsortedfirst, - searchsortedlast, shuffle, shuffle!, size, @@ -501,6 +486,34 @@ export view, zeros, +# search, find, match and related functions + contains, + eachmatch, + endswith, + equalto, + find, + findfirst, + findlast, + findin, + findmax, + findmin, + findmin!, + findmax!, + findn, + findnext, + findprev, + findnz, + ismatch, + occursin, + match, + matchall, + rsearchindex, + searchindex, + searchsorted, + searchsortedfirst, + searchsortedlast, + startswith, + # linear algebra bkfact!, bkfact, @@ -611,7 +624,6 @@ export any!, any, collect, - contains, count, delete!, deleteat!, @@ -679,7 +691,6 @@ export # strings and text output ascii, base, - startswith, bin, bitstring, bytes2hex, @@ -691,22 +702,17 @@ export digits, digits!, dump, - eachmatch, - endswith, escape_string, hex, hex2bytes, hex2bytes!, info, isascii, - ismatch, isvalid, join, logging, lpad, lstrip, - match, - matchall, ncodeunits, ndigits, nextind, @@ -723,12 +729,8 @@ export repr, reverseind, rpad, - rsearch, - rsearchindex, rsplit, rstrip, - search, - searchindex, show, showcompact, showerror, @@ -800,7 +802,6 @@ export identity, isbits, isequal, - equalto, isimmutable, isless, ifelse, diff --git a/base/iobuffer.jl b/base/iobuffer.jl index b94b1f045e895..e91e8c30cd213 100644 --- a/base/iobuffer.jl +++ b/base/iobuffer.jl @@ -426,18 +426,18 @@ read(io::GenericIOBuffer) = read!(io,StringVector(nb_available(io))) readavailable(io::GenericIOBuffer) = read(io) read(io::GenericIOBuffer, nb::Integer) = read!(io,StringVector(min(nb, nb_available(io)))) -function search(buf::IOBuffer, delim::UInt8) +function findfirst(delim::EqualTo{UInt8}, buf::IOBuffer) p = pointer(buf.data, buf.ptr) - q = @gc_preserve buf ccall(:memchr,Ptr{UInt8},(Ptr{UInt8},Int32,Csize_t),p,delim,nb_available(buf)) + q = @gc_preserve buf ccall(:memchr,Ptr{UInt8},(Ptr{UInt8},Int32,Csize_t),p,delim.x,nb_available(buf)) nb::Int = (q == C_NULL ? 0 : q-p+1) return nb end -function search(buf::GenericIOBuffer, delim::UInt8) +function findfirst(delim::EqualTo{UInt8}, buf::GenericIOBuffer) data = buf.data for i = buf.ptr : buf.size @inbounds b = data[i] - if b == delim + if b == delim.x return i - buf.ptr + 1 end end diff --git a/base/libgit2/types.jl b/base/libgit2/types.jl index 30cc7fdcf068a..7cd6c36f15917 100644 --- a/base/libgit2/types.jl +++ b/base/libgit2/types.jl @@ -894,8 +894,8 @@ function Base.split(ce::ConfigEntry) key = unsafe_string(ce.name) # Determine the positions of the delimiters - subsection_delim = search(key, '.') - name_delim = rsearch(key, '.') + subsection_delim = findfirst(equalto('.'), key) + name_delim = findlast(equalto('.'), key) section = SubString(key, 1, subsection_delim - 1) subsection = SubString(key, subsection_delim + 1, name_delim - 1) diff --git a/base/methodshow.jl b/base/methodshow.jl index 4ff02dcb77818..a1c14eb7b0057 100644 --- a/base/methodshow.jl +++ b/base/methodshow.jl @@ -11,7 +11,7 @@ function argtype_decl(env, n, sig::DataType, i::Int, nargs, isva::Bool) # -> (ar n = n.args[1] # handle n::T in arg list end s = string(n) - i = search(s,'#') + i = findfirst(equalto('#'), s) if i > 0 s = s[1:i-1] end diff --git a/base/operators.jl b/base/operators.jl index 50f5af8a39e4e..3020df9e29e5f 100644 --- a/base/operators.jl +++ b/base/operators.jl @@ -843,3 +843,22 @@ The returned function is of type `Base.EqualTo`. This allows dispatching to specialized methods by using e.g. `f::Base.EqualTo` in a method signature. """ const equalto = EqualTo + +struct OccursIn{T} <: Function + x::T + + OccursIn(x::T) where {T} = new{T}(x) +end + +(f::OccursIn)(y) = y in f.x + +""" + occursin(x) + +Create a function that checks whether its argument is [`in`](@ref) `x`; i.e. returns +`y -> y in x`. + +The returned function is of type `Base.OccursIn`. This allows dispatching to +specialized methods by using e.g. `f::Base.OccursIn` in a method signature. +""" +const occursin = OccursIn diff --git a/base/parse.jl b/base/parse.jl index 303d622035837..e0da3ea303058 100644 --- a/base/parse.jl +++ b/base/parse.jl @@ -281,16 +281,16 @@ function tryparse_internal(::Type{Complex{T}}, s::Union{String,SubString{String} end # find index of ± separating real/imaginary parts (if any) - i₊ = search(s, ('+','-'), i) + i₊ = findnext(occursin(('+','-')), s, i) if i₊ == i # leading ± sign - i₊ = search(s, ('+','-'), i₊+1) + i₊ = findnext(occursin(('+','-')), s, i₊+1) end if i₊ != 0 && s[i₊-1] in ('e','E') # exponent sign - i₊ = search(s, ('+','-'), i₊+1) + i₊ = findnext(occursin(('+','-')), s, i₊+1) end # find trailing im/i/j - iᵢ = rsearch(s, ('m','i','j'), e) + iᵢ = findprev(occursin(('m','i','j')), s, e) if iᵢ > 0 && s[iᵢ] == 'm' # im iᵢ -= 1 if s[iᵢ] != 'i' diff --git a/base/precompile.jl b/base/precompile.jl index dcd614ba8fbf4..e6e23bc2bcfe6 100644 --- a/base/precompile.jl +++ b/base/precompile.jl @@ -181,16 +181,16 @@ precompile(Tuple{typeof(Base.REPL.ip_matches_func), Ptr{Cvoid}, Symbol}) precompile(Tuple{typeof(Base.throw_boundserror), Array{Ptr{Cvoid}, 1}, Tuple{Base.UnitRange{Int64}}}) precompile(Tuple{typeof(Base.unsafe_copyto!), Array{Ptr{Cvoid}, 1}, Int64, Array{Ptr{Cvoid}, 1}, Int64, Int64}) precompile(Tuple{Type{Base.Channel{Any}}, Int64}) -precompile(Tuple{typeof(Base.rsearch), String, UInt8, Int64}) -precompile(Tuple{typeof(Base.rsearch), String, Char, Int64}) -precompile(Tuple{typeof(Base.rsearch), Array{UInt8, 1}, UInt8, Int64}) +precompile(Tuple{typeof(Base._rsearch), String, UInt8, Int64}) +precompile(Tuple{typeof(Base._rsearch), String, Char, Int64}) +precompile(Tuple{typeof(Base._rsearch), Array{UInt8, 1}, UInt8, Int64}) precompile(Tuple{typeof(Base._rsearchindex), Array{UInt8, 1}, Array{UInt8, 1}, Int64}) precompile(Tuple{typeof(Base._rsearch), Array{UInt8, 1}, Array{UInt8, 1}, Int64}) -precompile(Tuple{typeof(Base.rsearch), Array{Int8, 1}, UInt8, Int64}) +precompile(Tuple{typeof(Base._rsearch), Array{Int8, 1}, UInt8, Int64}) precompile(Tuple{typeof(Base._rsearchindex), Array{Int8, 1}, Array{UInt8, 1}, Int64}) precompile(Tuple{typeof(Base._rsearch), Array{Int8, 1}, Array{UInt8, 1}, Int64}) -precompile(Tuple{typeof(Base.rsearch), Array{UInt8, 1}, Char, Int64}) -precompile(Tuple{typeof(Base.rsearch), Array{Int8, 1}, Char, Int64}) +precompile(Tuple{typeof(Base._rsearch), Array{UInt8, 1}, Char, Int64}) +precompile(Tuple{typeof(Base._rsearch), Array{Int8, 1}, Char, Int64}) precompile(Tuple{typeof(Base.splice!), Array{Base.Multimedia.AbstractDisplay, 1}, Int64, Array{Any, 1}}) precompile(Tuple{typeof(Core.Inference.isbits), Base.LineEdit.EmptyCompletionProvider}) precompile(Tuple{typeof(Core.Inference.isbits), Base.LineEdit.EmptyHistoryProvider}) @@ -547,7 +547,7 @@ precompile(Tuple{typeof(Base.LineEdit.complete_line), Base.LineEdit.PromptState, precompile(Tuple{typeof(Base.LineEdit.input_string_newlines_aftercursor), Base.LineEdit.PromptState}) precompile(Tuple{typeof(Base.LineEdit.complete_line), Base.REPL.REPLCompletionProvider, Base.LineEdit.PromptState}) precompile(Tuple{getfield(Base, Symbol("#kw##parse")), Array{Any, 1}, typeof(Base.parse), String}) -precompile(Tuple{typeof(Base.rsearch), String, Array{Char, 1}, Int64}) +precompile(Tuple{typeof(Base._rsearch), String, Array{Char, 1}, Int64}) precompile(Tuple{getfield(Base.REPLCompletions, Symbol("#kw##find_start_brace")), Array{Any, 1}, typeof(Base.REPLCompletions.find_start_brace), String}) precompile(Tuple{typeof(Core.Inference.isbits), Tuple{Nothing, Nothing, Nothing}}) precompile(Tuple{typeof(Base.isidentifier), Base.SubString{String}}) @@ -625,7 +625,7 @@ precompile(Tuple{typeof(Base.rsearchindex), String, String, Int64}) precompile(Tuple{typeof(Base._rsearch), String, String, Int64}) precompile(Tuple{typeof(Base.pushfirst!), Array{Base.SubString{String}, 1}, Base.SubString{String}}) precompile(Tuple{typeof(Base.startswith), String, Base.SubString{String}}) -precompile(Tuple{typeof(Base.rsearch), String, Array{Char, 1}}) +precompile(Tuple{typeof(Base._rsearch), String, Array{Char, 1}}) precompile(Tuple{getfield(Base, Symbol("#kw##rsplit")), Array{Any, 1}, typeof(Base.rsplit), String, String}) precompile(Tuple{typeof(Base.sort!), Array{String, 1}, Base.Sort.MergeSortAlg, Base.Order.ForwardOrdering}) precompile(Tuple{typeof(Base.sort!), Array{String, 1}, Int64, Int64, Base.Sort.InsertionSortAlg, Base.Order.ForwardOrdering}) @@ -1157,7 +1157,7 @@ precompile(Tuple{typeof(Base.resize!), Array{Base.Semaphore, 1}, Int64}) precompile(Tuple{typeof(Base.acquire), Base.Semaphore}) precompile(Tuple{typeof(Base.release), Base.Semaphore}) precompile(Tuple{typeof(Base.isreadable), Base.PipeEndpoint}) -precompile(Tuple{typeof(Base.search), Base.GenericIOBuffer{Array{UInt8, 1}}, UInt8}) +precompile(Tuple{typeof(Base.findfirst), UInt8, Base.GenericIOBuffer{Array{UInt8, 1}}}) precompile(Tuple{typeof(Base.start_reading), Base.PipeEndpoint}) precompile(Tuple{typeof(Base.wait_readbyte), Base.PipeEndpoint, UInt8}) precompile(Tuple{typeof(Base.readuntil), Base.PipeEndpoint, UInt8}) diff --git a/base/regex.jl b/base/regex.jl index 726c33f77f02f..0c28617852cca 100644 --- a/base/regex.jl +++ b/base/regex.jl @@ -285,7 +285,8 @@ end matchall(re::Regex, str::SubString, overlap::Bool=false) = matchall(re, String(str), overlap) -function search(str::Union{String,SubString}, re::Regex, idx::Integer) +# TODO: return only start index and update deprecation +function findnext(re::Regex, str::Union{String,SubString}, idx::Integer) if idx > nextind(str,endof(str)) throw(BoundsError()) end @@ -294,10 +295,10 @@ function search(str::Union{String,SubString}, re::Regex, idx::Integer) PCRE.exec(re.regex, str, idx-1, opts, re.match_data) ? ((Int(re.ovec[1])+1):prevind(str,Int(re.ovec[2])+1)) : (0:-1) end -search(s::AbstractString, r::Regex, idx::Integer) = throw(ArgumentError( +findnext(r::Regex, s::AbstractString, idx::Integer) = throw(ArgumentError( "regex search is only available for the String type; use String(s) to convert" )) -search(s::AbstractString, r::Regex) = search(s,r,start(s)) +findfirst(r::Regex, s::AbstractString) = findnext(r,s,start(s)) struct SubstitutionString{T<:AbstractString} <: AbstractString string::T diff --git a/base/repl/LineEdit.jl b/base/repl/LineEdit.jl index 0bf91d142daf0..7f92d6d047b4d 100644 --- a/base/repl/LineEdit.jl +++ b/base/repl/LineEdit.jl @@ -580,11 +580,11 @@ end # of the line. function edit_move_up(buf::IOBuffer) - npos = rsearch(buf.data, '\n', position(buf)) + npos = findprev(equalto(UInt8('\n')), buf.data, position(buf)) npos == 0 && return false # we're in the first line # We're interested in character count, not byte count offset = length(content(buf, npos => position(buf))) - npos2 = rsearch(buf.data, '\n', npos-1) + npos2 = findprev(equalto(UInt8('\n')), buf.data, npos-1) seek(buf, npos2) for _ = 1:offset pos = position(buf) @@ -603,10 +603,10 @@ function edit_move_up(s) end function edit_move_down(buf::IOBuffer) - npos = rsearch(buf.data[1:buf.size], '\n', position(buf)) + npos = findprev(equalto(UInt8('\n')), buf.data[1:buf.size], position(buf)) # We're interested in character count, not byte count offset = length(String(buf.data[(npos+1):(position(buf))])) - npos2 = search(buf.data[1:buf.size], '\n', position(buf)+1) + npos2 = findnext(equalto(UInt8('\n')), buf.data[1:buf.size], position(buf)+1) if npos2 == 0 #we're in the last line return false end @@ -1849,7 +1849,7 @@ function move_line_start(s::MIState) if s.key_repeats > 0 move_input_start(s) else - seek(buf, rsearch(buf.data, '\n', curpos)) + seek(buf, findprev(equalto(UInt8('\n')), buf.data, curpos)) end end @@ -1862,7 +1862,7 @@ end function move_line_end(buf::IOBuffer) eof(buf) && return - pos = search(buf.data, '\n', position(buf)+1) + pos = findnext(equalto(UInt8('\n')), buf.data, position(buf)+1) if pos == 0 move_input_end(buf) return diff --git a/base/repl/REPL.jl b/base/repl/REPL.jl index 34a6a6de923de..033398e5ecb83 100644 --- a/base/repl/REPL.jl +++ b/base/repl/REPL.jl @@ -617,14 +617,15 @@ function history_search(hist::REPLHistoryProvider, query_buffer::IOBuffer, respo !skip_current && searchdata == response_str[a:b] && return true - searchfunc, searchstart, skipfunc = backwards ? (rsearch, b, prevind) : - (search, a, nextind) + searchfunc1, searchfunc2, searchstart, skipfunc = backwards ? + (findlast, findprev, b, prevind) : + (findfirst, findnext, a, nextind) skip_current && (searchstart = skipfunc(response_str, searchstart)) # Start searching # First the current response buffer if 1 <= searchstart <= endof(response_str) - match = searchfunc(response_str, searchdata, searchstart) + match = searchfunc2(searchdata, response_str, searchstart) if match != 0:-1 seek(response_buffer, first(match) - 1) return true @@ -635,7 +636,7 @@ function history_search(hist::REPLHistoryProvider, query_buffer::IOBuffer, respo idxs = backwards ? ((hist.cur_idx-1):-1:1) : ((hist.cur_idx+1):length(hist.history)) for idx in idxs h = hist.history[idx] - match = searchfunc(h, searchdata) + match = searchfunc1(searchdata, h) if match != 0:-1 && h != response_str && haskey(hist.mode_mapping, hist.modes[idx]) truncate(response_buffer, 0) write(response_buffer, h) @@ -887,7 +888,7 @@ function setup_interface( sbuffer = LineEdit.buffer(s) curspos = position(sbuffer) seek(sbuffer, 0) - shouldeval = (nb_available(sbuffer) == curspos && search(sbuffer, UInt8('\n')) == 0) + shouldeval = (nb_available(sbuffer) == curspos && findfirst(equalto(UInt8('\n')), sbuffer) == 0) seek(sbuffer, curspos) if curspos == 0 # if pasting at the beginning, strip leading whitespace @@ -1049,7 +1050,7 @@ input_color(r::StreamREPL) = r.input_color # heuristic function to decide if the presence of a semicolon # at the end of the expression was intended for suppressing output function ends_with_semicolon(line::AbstractString) - match = rsearch(line, ';') + match = findlast(equalto(';'), line) if match != 0 # state for comment parser, assuming that the `;` isn't in a string or comment # so input like ";#" will still thwart this to give the wrong (anti-conservative) answer diff --git a/base/repl/REPLCompletions.jl b/base/repl/REPLCompletions.jl index d5be637da4380..ae7241aa5b941 100644 --- a/base/repl/REPLCompletions.jl +++ b/base/repl/REPLCompletions.jl @@ -40,7 +40,7 @@ function complete_symbol(sym, ffunc) lookup_module = true t = Union{} - if rsearch(sym, non_identifier_chars) < rsearch(sym, '.') + if findlast(occursin(non_identifier_chars), sym) < findlast(equalto('.'), sym) # Find module lookup_name, name = rsplit(sym, ".", limit=2) @@ -258,7 +258,7 @@ function find_start_brace(s::AbstractString; c_start='(', c_end=')') end braces != 1 && return 0:-1, -1 method_name_end = reverseind(s, i) - startind = nextind(s, rsearch(s, non_identifier_chars, method_name_end)) + startind = nextind(s, findprev(occursin(non_identifier_chars), s, method_name_end)) return (startind:endof(s), method_name_end) end @@ -406,15 +406,15 @@ function afterusing(string::String, startpos::Int) str = string[1:prevind(string,startpos)] isempty(str) && return false rstr = reverse(str) - r = search(rstr, r"\s(gnisu|tropmi)\b") + r = findfirst(r"\s(gnisu|tropmi)\b", rstr) isempty(r) && return false fr = reverseind(str, last(r)) return ismatch(r"^\b(using|import)\s*((\w+[.])*\w+\s*,\s*)*$", str[fr:end]) end function bslash_completions(string, pos) - slashpos = rsearch(string, '\\', pos) - if (rsearch(string, bslash_separators, pos) < slashpos && + slashpos = findprev(equalto('\\'), string, pos) + if (findprev(occursin(bslash_separators), string, pos) < slashpos && !(1 < slashpos && (string[prevind(string, slashpos)]=='\\'))) # latex / emoji symbol substitution s = string[slashpos:pos] @@ -459,7 +459,7 @@ function dict_identifier_key(str,tag) # Avoid `isdefined(::Array, ::Symbol)` isa(obj, Array) && return (nothing, nothing, nothing) end - begin_of_key = first(search(str, r"\S", nextind(str, end_of_identifier) + 1)) # 1 for [ + begin_of_key = first(findnext(r"\S", str, nextind(str, end_of_identifier) + 1)) # 1 for [ begin_of_key==0 && return (true, nothing, nothing) partial_key = str[begin_of_key:end] (isa(obj, AbstractDict) && length(obj) < 1e6) || return (true, nothing, nothing) @@ -533,8 +533,8 @@ function completions(string, pos) return String[], 0:-1, false end - dotpos = rsearch(string, '.', pos) - startpos = nextind(string, rsearch(string, non_identifier_chars, pos)) + dotpos = findprev(equalto('.'), string, pos) + startpos = nextind(string, findprev(occursin(non_identifier_chars), string, pos)) ffunc = (mod,x)->true suggestions = String[] diff --git a/base/stream.jl b/base/stream.jl index 5a0143da2534d..58bf399afb26b 100644 --- a/base/stream.jl +++ b/base/stream.jl @@ -271,13 +271,13 @@ end function wait_readbyte(x::LibuvStream, c::UInt8) if isopen(x) # fast path - search(x.buffer, c) > 0 && return + findfirst(equalto(c), x.buffer) > 0 && return else return end preserve_handle(x) try - while isopen(x) && search(x.buffer, c) <= 0 + while isopen(x) && findfirst(equalto(c), x.buffer) <= 0 start_reading(x) # ensure we are reading wait(x.readnotify) end @@ -1237,7 +1237,7 @@ end show(io::IO, s::BufferStream) = print(io,"BufferStream() bytes waiting:",nb_available(s.buffer),", isopen:", s.is_open) function wait_readbyte(s::BufferStream, c::UInt8) - while isopen(s) && search(s.buffer,c) <= 0 + while isopen(s) && findfirst(equalto(c), s.buffer) <= 0 wait(s.r_c) end end diff --git a/base/strings/search.jl b/base/strings/search.jl index f920b77520252..9481776904188 100644 --- a/base/strings/search.jl +++ b/base/strings/search.jl @@ -1,20 +1,26 @@ # This file is a part of Julia. License is MIT: https://julialang.org/license -function search(s::String, c::Char, i::Integer = 1) +function findnext(pred::EqualTo{Char}, s::String, i::Integer) if i < 1 || i > sizeof(s) i == sizeof(s) + 1 && return 0 throw(BoundsError(s, i)) end @inbounds isvalid(s, i) || string_index_err(s, i) - c ≤ '\x7f' && return search(s, c % UInt8, i) + c = pred.x + c ≤ '\x7f' && return _search(s, c % UInt8, i) while true - i = search(s, first_utf8_byte(c), i) + i = _search(s, first_utf8_byte(c), i) (i == 0 || s[i] == c) && return i i = next(s, i)[2] end end -function search(a::Union{String,ByteArray}, b::Union{Int8,UInt8}, i::Integer = 1) +findfirst(pred::EqualTo{<:Union{Int8,UInt8}}, a::ByteArray) = _search(a, pred.x) + +findnext(pred::EqualTo{<:Union{Int8,UInt8}}, a::ByteArray, i::Integer) = + _search(a, pred.x, i) + +function _search(a::Union{String,ByteArray}, b::Union{Int8,UInt8}, i::Integer = 1) if i < 1 throw(BoundsError(a, i)) end @@ -27,25 +33,31 @@ function search(a::Union{String,ByteArray}, b::Union{Int8,UInt8}, i::Integer = 1 q == C_NULL ? 0 : Int(q-p+1) end -function search(a::ByteArray, b::Char, i::Integer = 1) +function _search(a::ByteArray, b::Char, i::Integer = 1) if isascii(b) - search(a,UInt8(b),i) + _search(a,UInt8(b),i) else - search(a,unsafe_wrap(Vector{UInt8},string(b)),i).start + _search(a,unsafe_wrap(Vector{UInt8},string(b)),i).start end end -function rsearch(s::String, c::Char, i::Integer = sizeof(s)) - c ≤ '\x7f' && return rsearch(s, c % UInt8, i) +function findprev(pred::EqualTo{Char}, s::String, i::Integer) + c = pred.x + c ≤ '\x7f' && return _rsearch(s, c % UInt8, i) b = first_utf8_byte(c) while true - i = rsearch(s, b, i) + i = _rsearch(s, b, i) (i == 0 || s[i] == c) && return i i = prevind(s, i) end end -function rsearch(a::Union{String,ByteArray}, b::Union{Int8,UInt8}, i::Integer = sizeof(s)) +findlast(pred::EqualTo{<:Union{Int8,UInt8}}, a::ByteArray) = _rsearch(a, pred.x) + +findprev(pred::EqualTo{<:Union{Int8,UInt8}}, a::ByteArray, i::Integer) = + _rsearch(a, pred.x, i) + +function _rsearch(a::Union{String,ByteArray}, b::Union{Int8,UInt8}, i::Integer = sizeof(a)) if i < 1 return i == 0 ? 0 : throw(BoundsError(a, i)) end @@ -58,65 +70,60 @@ function rsearch(a::Union{String,ByteArray}, b::Union{Int8,UInt8}, i::Integer = q == C_NULL ? 0 : Int(q-p+1) end -function rsearch(a::ByteArray, b::Char, i::Integer = length(a)) +function _rsearch(a::ByteArray, b::Char, i::Integer = length(a)) if isascii(b) - rsearch(a,UInt8(b),i) + _rsearch(a,UInt8(b),i) else - rsearch(a,unsafe_wrap(Vector{UInt8},string(b)),i).start + _rsearch(a,unsafe_wrap(Vector{UInt8},string(b)),i).start end end -const Chars = Union{Char,Tuple{Vararg{Char}},AbstractVector{Char},Set{Char}} - """ - search(string::AbstractString, chars::Chars, [start::Integer]) + findfirst(pattern::AbstractString, string::AbstractString) + findfirst(pattern::Regex, string::String) -Search for the first occurrence of the given characters within the given string. The second -argument may be a single character, a vector or a set of characters, a string, or a regular -expression (though regular expressions are only allowed on contiguous strings, such as ASCII -or UTF-8 strings). The third argument optionally specifies a starting index. The return -value is a range of indices where the matching sequence is found, such that `s[search(s,x)] == x`: - -`search(string, "substring")` = `start:end` such that `string[start:end] == "substring"`, or -`0:-1` if unmatched. - -`search(string, 'c')` = `index` such that `string[index] == 'c'`, or `0` if unmatched. +Find the first occurrence of `pattern` in `string`. Equivalent to +[`findnext(pattern, string, start(s))`](@ref). # Examples ```jldoctest -julia> search("Hello to the world", "z") +julia> findfirst("z", "Hello to the world") 0:-1 -julia> search("JuliaLang","Julia") +julia> findfirst("Julia", "JuliaLang") 1:5 ``` """ -function search(s::AbstractString, c::Chars, i::Integer) +findfirst(pattern::AbstractString, string::AbstractString) = + findnext(pattern, string, start(string)) + +# AbstractString implementation of the generic findnext interface +function findnext(testf::Function, s::AbstractString, i::Integer) z = ncodeunits(s) + 1 - isempty(c) && return 1 ≤ i ≤ z ? i : throw(BoundsError(s, i)) 1 ≤ i ≤ z || throw(BoundsError(s, i)) @inbounds i == z || isvalid(s, i) || string_index_err(s, i) while !done(s,i) d, j = next(s,i) - if d in c + if testf(d) return i end i = j end return 0 end -search(s::AbstractString, c::Chars) = search(s,c,start(s)) -in(c::Char, s::AbstractString) = (search(s,c)!=0) +in(c::Char, s::AbstractString) = (findfirst(equalto(c),s)!=0) -function _searchindex(s, t, i) +function _searchindex(s::Union{AbstractString,ByteArray}, + t::Union{AbstractString,Char,Int8,UInt8}, + i::Integer) if isempty(t) return 1 <= i <= nextind(s,endof(s)) ? i : throw(BoundsError(s, i)) end t1, j2 = next(t,start(t)) while true - i = search(s,t1,i) + i = findnext(equalto(t1),s,i) if i == 0 return 0 end c, ii = next(s,i) j = j2; k = ii @@ -140,7 +147,7 @@ function _searchindex(s, t, i) end end -_searchindex(s, t::Char, i) = search(s, t, i) +_searchindex(s::AbstractString, t::Char, i::Integer) = findnext(equalto(t), s, i) function _search_bloom_mask(c) UInt64(1) << (c & 63) @@ -149,7 +156,10 @@ end _nthbyte(s::String, i) = codeunit(s, i) _nthbyte(a::Union{AbstractVector{UInt8},AbstractVector{Int8}}, i) = a[i] -function _searchindex(s::Union{String,ByteArray}, t::Union{String,ByteArray}, i) +_searchindex(s::String, t::String, i::Integer) = + _searchindex(unsafe_wrap(Vector{UInt8},s), unsafe_wrap(Vector{UInt8},t), i) + +function _searchindex(s::ByteArray, t::ByteArray, i::Integer) n = sizeof(t) m = sizeof(s) @@ -158,7 +168,7 @@ function _searchindex(s::Union{String,ByteArray}, t::Union{String,ByteArray}, i) elseif m == 0 return 0 elseif n == 1 - return search(s, _nthbyte(t,1), i) + return findnext(equalto(_nthbyte(t,1)), s, i) end w = m - n @@ -210,12 +220,12 @@ function _searchindex(s::Union{String,ByteArray}, t::Union{String,ByteArray}, i) 0 end -searchindex(s::ByteArray, t::ByteArray, i) = _searchindex(s,t,i) +searchindex(s::ByteArray, t::ByteArray, i::Integer) = _searchindex(s,t,i) """ searchindex(s::AbstractString, substring, [start::Integer]) -Similar to [`search`](@ref), but return only the start index at which +Similar to `search`, but return only the start index at which the substring is found, or `0` if it is not. # Examples @@ -239,7 +249,7 @@ function searchindex(s::String, t::String, i::Integer=1) # Check for fast case of a single byte # (for multi-byte UTF-8 sequences, use searchindex on byte arrays instead) if endof(t) == 1 - search(s, t[1], i) + findnext(equalto(t[1]), s, i) else _searchindex(s, t, i) end @@ -254,22 +264,57 @@ function _search(s, t, i::Integer) end end -search(s::AbstractString, t::AbstractString, i::Integer=start(s)) = _search(s, t, i) -search(s::ByteArray, t::ByteArray, i::Integer=start(s)) = _search(s, t, i) +""" + findnext(pattern::AbstractString, string::AbstractString, start::Integer) + findnext(pattern::Regex, string::String, start::Integer) + +Find the next occurrence of `pattern` in `string` starting at position `start`. +`pattern` can be either a string, or a regular expression, in which case `string` +must be of type `String`. + +The return value is a range of indexes where the matching sequence is found, such that +`s[findnext(x, s, i)] == x`: + +`findnext("substring", string, i)` = `start:end` such that +`string[start:end] == "substring"`, or `0:-1` if unmatched. + +# Examples +```jldoctest +julia> findnext("z", "Hello to the world", 1) +0:-1 + +julia> findnext("o", "Hello to the world", 6) +8:8 +julia> findnext("Julia", "JuliaLang", 2) +1:5 +``` """ - rsearch(s::AbstractString, chars::Chars, [start::Integer]) +findnext(t::AbstractString, s::AbstractString, i::Integer) = _search(s, t, i) +# TODO: remove? +findnext(t::ByteArray, s::ByteArray, i::Integer) = _search(s, t, i) -Similar to [`search`](@ref), but returning the last occurrence of the given characters within the -given string, searching in reverse from `start`. +""" + findlast(pattern::AbstractString, string::AbstractString) + findlast(pattern::Regex, string::String) + +Find the last occurrence of `pattern` in `string`. Equivalent to +[`findlast(pattern, string, endof(s))`](@ref). # Examples ```jldoctest -julia> rsearch("aaabbb","b") -6:6 +julia> findlast("o", "Hello to the world") +15:15 + +julia> findfirst("Julia", "JuliaLang") +1:5 ``` """ -function rsearch(s::AbstractString, c::Chars, i::Integer=start(s)) +findlast(pattern::AbstractString, string::AbstractString) = + findprev(pattern, string, endof(string)) + +# AbstractString implementation of the generic findprev interface +function findprev(testf::Function, s::AbstractString, i::Integer) if i < 1 return i == 0 ? 0 : throw(BoundsError(s, i)) end @@ -280,21 +325,23 @@ function rsearch(s::AbstractString, c::Chars, i::Integer=start(s)) # r[reverseind(r,i)] == reverse(r)[i] == s[i] # s[reverseind(s,j)] == reverse(s)[j] == r[j] r = reverse(s) - j = search(r, c, reverseind(r, i)) + j = findnext(testf, r, reverseind(r, i)) j == 0 ? 0 : reverseind(s, j) end -function _rsearchindex(s, t, i) +function _rsearchindex(s::AbstractString, + t::Union{AbstractString,Char,Int8,UInt8}, + i::Integer) if isempty(t) return 1 <= i <= nextind(s, endof(s)) ? i : throw(BoundsError(s, i)) end - t = reverse(t) + t = t isa AbstractString ? reverse(t) : t rs = reverse(s) l = endof(s) t1, j2 = next(t, start(t)) while true - i = rsearch(s, t1, i) + i = findprev(equalto(t1), s, i) i == 0 && return 0 c, ii = next(rs, reverseind(rs, i)) j = j2; k = ii @@ -316,7 +363,10 @@ function _rsearchindex(s, t, i) end end -function _rsearchindex(s::Union{String,ByteArray}, t::Union{String,ByteArray}, k) +_rsearchindex(s::String, t::String, i::Integer) = + _rsearchindex(unsafe_wrap(Vector{UInt8}, s), unsafe_wrap(Vector{UInt8}, t), i) + +function _rsearchindex(s::ByteArray, t::ByteArray, k::Integer) n = sizeof(t) m = sizeof(s) @@ -325,7 +375,7 @@ function _rsearchindex(s::Union{String,ByteArray}, t::Union{String,ByteArray}, k elseif m == 0 return 0 elseif n == 1 - return rsearch(s, _nthbyte(t,1), k) + return findprev(equalto(_nthbyte(t,1)), s, k) end w = m - n @@ -382,7 +432,7 @@ rsearchindex(s::ByteArray, t::ByteArray, i::Integer) = _rsearchindex(s,t,i) """ rsearchindex(s::AbstractString, substring, [start::Integer]) -Similar to [`rsearch`](@ref), but return only the start index at which the substring is found, or `0` if it is not. +Similar to `rsearch`, but return only the start index at which the substring is found, or `0` if it is not. # Examples ```jldoctest @@ -400,7 +450,7 @@ function rsearchindex(s::String, t::String) # Check for fast case of a single byte # (for multi-byte UTF-8 sequences, use rsearchindex instead) if endof(t) == 1 - rsearch(s, t[1]) + findprev(equalto(t[1]), s) else _rsearchindex(s, t, sizeof(s)) end @@ -410,7 +460,7 @@ function rsearchindex(s::String, t::String, i::Integer) # Check for fast case of a single byte # (for multi-byte UTF-8 sequences, use rsearchindex instead) if endof(t) == 1 - rsearch(s, t[1], i) + findprev(equalto(t[1]), s, i) elseif endof(t) != 0 j = i ≤ ncodeunits(s) ? nextind(s, i)-1 : i _rsearchindex(s, t, j) @@ -432,8 +482,35 @@ function _rsearch(s, t, i::Integer) end end -rsearch(s::AbstractString, t::AbstractString, i::Integer=endof(s)) = _rsearch(s, t, i) -rsearch(s::ByteArray, t::ByteArray, i::Integer=endof(s)) = _rsearch(s, t, i) +""" + findprev(pattern::AbstractString, string::AbstractString, start::Integer) + findprev(pattern::Regex, string::String, start::Integer) + +Find the previous occurrence of `pattern` in `string` starting at position `start`. +`pattern` can be either a string, or a regular expression, in which case `string` +must be of type `String`. + +The return value is a range of indexes where the matching sequence is found, such that +`s[findprev(x, s, i)] == x`: + +`findprev("substring", string, i)` = `start:end` such that +`string[start:end] == "substring"`, or `0:-1` if unmatched. + +# Examples +```jldoctest +julia> findprev("z", "Hello to the world", 18) +0:-1 + +julia> findprev("o", "Hello to the world", 18) +15:15 + +julia> findprev("Julia", "JuliaLang", 6) +1:5 +``` +""" +findprev(t::AbstractString, s::AbstractString, i::Integer) = _rsearch(s, t, i) +# TODO: remove? +findprev(t::ByteArray, s::ByteArray, i::Integer) = _rsearch(s, t, i) """ contains(haystack::AbstractString, needle::Union{AbstractString,Char}) diff --git a/base/strings/util.jl b/base/strings/util.jl index 522ecf1fd7623..80788e9b07e13 100644 --- a/base/strings/util.jl +++ b/base/strings/util.jl @@ -1,5 +1,7 @@ # This file is a part of Julia. License is MIT: https://julialang.org/license +const Chars = Union{Char,Tuple{Vararg{Char}},AbstractVector{Char},Set{Char}} + # starts with and ends with predicates """ @@ -258,18 +260,15 @@ function rpad( r == 0 ? string(s, p^q) : string(s, p^q, first(p, r)) end -# splitter can be a Char, Vector{Char}, AbstractString, Regex, ... -# any splitter that provides search(s::AbstractString, splitter) -split(str::T, splitter; limit::Integer=0, keep::Bool=true) where {T<:SubString} = - _split(str, splitter, limit, keep, T[]) - """ split(s::AbstractString, [chars]; limit::Integer=0, keep::Bool=true) Return an array of substrings by splitting the given string on occurrences of the given -character delimiters, which may be specified in any of the formats allowed by `search`'s -second argument (i.e. a single character, collection of characters, string, or regular -expression). If `chars` is omitted, it defaults to the set of all space characters, and +character delimiters, which may be specified in any of the formats allowed by +[`findnext`](@ref)'s first argument (i.e. as a string, regular expression or a function), +or as a single character or collection of characters. + +If `chars` is omitted, it defaults to the set of all space characters, and `keep` is taken to be `false`. The two keyword arguments are optional: they are a maximum size for the result and a flag determining whether empty fields should be kept in the result. @@ -285,12 +284,22 @@ julia> split(a,".") "rch" ``` """ -split(str::T, splitter; limit::Integer=0, keep::Bool=true) where {T<:AbstractString} = - _split(str, splitter, limit, keep, SubString{T}[]) +function split end + +split(str::T, splitter; + limit::Integer=0, keep::Bool=true) where {T<:AbstractString} = + _split(str, splitter, limit, keep, T <: SubString ? T[] : SubString{T}[]) +split(str::T, splitter::Union{Tuple{Vararg{Char}},AbstractVector{Char},Set{Char}}; + limit::Integer=0, keep::Bool=true) where {T<:AbstractString} = + _split(str, occursin(splitter), limit, keep, T <: SubString ? T[] : SubString{T}[]) +split(str::T, splitter::Char; + limit::Integer=0, keep::Bool=true) where {T<:AbstractString} = + _split(str, equalto(splitter), limit, keep, T <: SubString ? T[] : SubString{T}[]) + function _split(str::AbstractString, splitter, limit::Integer, keep_empty::Bool, strs::Array) i = start(str) n = endof(str) - r = search(str,splitter,i) + r = findfirst(splitter,str) if r != 0:-1 j, k = first(r), nextind(str,last(r)) while 0 < j <= n && length(strs) != limit-1 @@ -301,7 +310,7 @@ function _split(str::AbstractString, splitter, limit::Integer, keep_empty::Bool, i = k end (k <= j) && (k = nextind(str,j)) - r = search(str,splitter,k) + r = findnext(splitter,str,k) r == 0:-1 && break j, k = first(r), nextind(str,last(r)) end @@ -315,9 +324,6 @@ end # a bit oddball, but standard behavior in Perl, Ruby & Python: split(str::AbstractString) = split(str, _default_delims; limit=0, keep=false) -rsplit(str::T, splitter; limit::Integer=0, keep::Bool=true) where {T<:SubString} = - _rsplit(str, splitter, limit, keep, T[]) - """ rsplit(s::AbstractString, [chars]; limit::Integer=0, keep::Bool=true) @@ -346,12 +352,21 @@ julia> rsplit(a,".";limit=2) "h" ``` """ +function rsplit end + rsplit(str::T, splitter; limit::Integer=0, keep::Bool=true) where {T<:AbstractString} = - _rsplit(str, splitter, limit, keep, SubString{T}[]) + _rsplit(str, splitter, limit, keep, T <: SubString ? T[] : SubString{T}[]) +rsplit(str::T, splitter::Union{Tuple{Vararg{Char}},AbstractVector{Char},Set{Char}}; + limit::Integer=0, keep::Bool=true) where {T<:AbstractString} = + _rsplit(str, occursin(splitter), limit, keep, T <: SubString ? T[] : SubString{T}[]) +rsplit(str::T, splitter::Char; + limit::Integer=0, keep::Bool=true) where {T<:AbstractString} = + _rsplit(str, equalto(splitter), limit, keep, T <: SubString ? T[] : SubString{T}[]) + function _rsplit(str::AbstractString, splitter, limit::Integer, keep_empty::Bool, strs::Array) i = start(str) n = endof(str) - r = rsearch(str,splitter) + r = findlast(splitter, str) j = first(r)-1 k = last(r) while((0 <= j < n) && (length(strs) != limit-1)) @@ -360,7 +375,7 @@ function _rsplit(str::AbstractString, splitter, limit::Integer, keep_empty::Bool n = j end (k <= j) && (j = prevind(str,j)) - r = rsearch(str,splitter,j) + r = findprev(splitter,str,j) j = first(r)-1 k = last(r) end @@ -373,6 +388,11 @@ _replace(io, repl, str, r, pattern) = print(io, repl) _replace(io, repl::Function, str, r, pattern) = print(io, repl(SubString(str, first(r), last(r)))) +replace(str::String, pat_repl::Pair{Char}; count::Integer=typemax(Int)) = + replace(str, equalto(first(pat_repl)) => last(pat_repl); count=count) +replace(str::String, pat_repl::Pair{<:Union{Tuple{Vararg{Char}},AbstractVector{Char},Set{Char}}}; + count::Integer=typemax(Int)) = + replace(str, occursin(first(pat_repl)) => last(pat_repl), count) function replace(str::String, pat_repl::Pair; count::Integer=typemax(Int)) pattern, repl = pat_repl count == 0 && return str @@ -380,7 +400,7 @@ function replace(str::String, pat_repl::Pair; count::Integer=typemax(Int)) n = 1 e = endof(str) i = a = start(str) - r = search(str,pattern,i) + r = findnext(pattern,str,i) j, k = first(r), last(r) out = IOBuffer(StringVector(floor(Int, 1.2sizeof(str))), true, true) out.size = 0 @@ -397,7 +417,7 @@ function replace(str::String, pat_repl::Pair; count::Integer=typemax(Int)) else i = k = nextind(str, k) end - r = search(str,pattern,k) + r = findnext(pattern,str,k) r == 0:-1 || n == count && break j, k = first(r), last(r) n += 1 @@ -411,8 +431,8 @@ end Search for the given pattern `pat` in `s`, and replace each occurrence with `r`. If `count` is provided, replace at most `count` occurrences. -As with [`search`](@ref), `pat` may be a -single character, a vector or a set of characters, a string, or a regular expression. If `r` +`pat` may be a single character, a vector or a set of characters, a string, +or a regular expression. If `r` is a function, each occurrence is replaced with `r(s)` where `s` is the matched substring. If `pat` is a regular expression and `r` is a `SubstitutionString`, then capture group references in `r` are replaced with the corresponding matched text. diff --git a/doc/src/manual/strings.md b/doc/src/manual/strings.md index 37e36f548f01b..017ce693cdc32 100644 --- a/doc/src/manual/strings.md +++ b/doc/src/manual/strings.md @@ -507,29 +507,30 @@ julia> "1 + 2 = 3" == "1 + 2 = $(1 + 2)" true ``` -You can search for the index of a particular character using the [`search`](@ref) function: +You can search for the index of a particular character using the [`findfirst`](@ref) function: ```jldoctest -julia> search("xylophone", 'x') +julia> findfirst(equalto('x'), "xylophone") 1 -julia> search("xylophone", 'p') +julia> findfirst(equalto('p'), "xylophone") 5 -julia> search("xylophone", 'z') +julia> findfirst(equalto('z'), "xylophone") 0 ``` -You can start the search for a character at a given offset by providing a third argument: +You can start the search for a character at a given offset by using [`findnext`](@ref) +with a third argument: ```jldoctest -julia> search("xylophone", 'o') +julia> findnext(equalto('o'), "xylophone", 1) 4 -julia> search("xylophone", 'o', 5) +julia> findnext(equalto('o'), "xylophone", 5) 7 -julia> search("xylophone", 'o', 8) +julia> findnext(equalto('o'), "xylophone", 8) 0 ``` diff --git a/doc/src/stdlib/strings.md b/doc/src/stdlib/strings.md index 79e2b1b9fb448..9a014aee44532 100644 --- a/doc/src/stdlib/strings.md +++ b/doc/src/stdlib/strings.md @@ -33,8 +33,10 @@ Base.:(==)(::AbstractString, ::AbstractString) Base.cmp(::AbstractString, ::AbstractString) Base.lpad Base.rpad -Base.search -Base.rsearch +Base.findfirst(::AbstractString, ::AbstractString) +Base.findnext(::AbstractString, ::AbstractString, ::Integer) +Base.findlast(::AbstractString, ::AbstractString) +Base.findprev(::AbstractString, ::AbstractString, ::Integer) Base.searchindex Base.rsearchindex Base.contains(::AbstractString, ::AbstractString) diff --git a/stdlib/Distributed/test/distributed_exec.jl b/stdlib/Distributed/test/distributed_exec.jl index 9f219085b0a54..d4342d1d4336c 100644 --- a/stdlib/Distributed/test/distributed_exec.jl +++ b/stdlib/Distributed/test/distributed_exec.jl @@ -1017,7 +1017,7 @@ if DoFullTest pids=addprocs_with_testenv(4); @test_throws ErrorException rmprocs(pids; waitfor=0.001); # wait for workers to be removed - while any(x -> (x in procs()), pids) + while any(occursin(procs()), pids) sleep(0.1) end end @@ -1482,7 +1482,7 @@ function reuseport_tests() end # Ensure that the code has indeed been successfully executed everywhere - @test all(p -> p in results, procs()) + @test all(occursin(results), procs()) end # Test that the client port is reused. SO_REUSEPORT may not be supported on diff --git a/test/arrayops.jl b/test/arrayops.jl index 20ef8b699ccfe..cde448b25052c 100644 --- a/test/arrayops.jl +++ b/test/arrayops.jl @@ -459,6 +459,10 @@ end @test findprev(equalto(1),a,8) == 6 @test findprev(isodd, [2,4,5,3,9,2,0], 7) == 5 @test findprev(isodd, [2,4,5,3,9,2,0], 2) == 0 + @test findfirst(equalto(0x00), [0x01, 0x00]) == 2 + @test findlast(equalto(0x00), [0x01, 0x00]) == 2 + @test findnext(equalto(0x00), [0x00, 0x01, 0x00], 2) == 3 + @test findprev(equalto(0x00), [0x00, 0x01, 0x00], 2) == 1 end @testset "find with general iterables" begin s = "julia" @@ -1173,7 +1177,7 @@ end # logical indexing a = [1:10;]; acopy = copy(a) - @test deleteat!(a, map(i -> i in idx, 1:length(a))) == [acopy[1:(first(idx)-1)]; acopy[(last(idx)+1):end]] + @test deleteat!(a, map(occursin(idx), 1:length(a))) == [acopy[1:(first(idx)-1)]; acopy[(last(idx)+1):end]] end a = [1:10;] @test deleteat!(a, 11:10) == [1:10;] diff --git a/test/bitset.jl b/test/bitset.jl index b30e19b242e49..c6ab52adc52c1 100644 --- a/test/bitset.jl +++ b/test/bitset.jl @@ -6,7 +6,7 @@ data_in = (1,5,100) s = BitSet(data_in) data_out = collect(s) - @test all(map(d->in(d,data_out), data_in)) + @test all(map(occursin(data_out), data_in)) @test length(data_out) === length(data_in) end diff --git a/test/choosetests.jl b/test/choosetests.jl index 5989e266bbde5..21b8585bcbbd2 100644 --- a/test/choosetests.jl +++ b/test/choosetests.jl @@ -150,7 +150,7 @@ function choosetests(choices = []) end if !net_on - filter!(x -> !(x in net_required_for), tests) + filter!(!occursin(net_required_for), tests) end if "stdlib" in skip_tests @@ -178,7 +178,7 @@ function choosetests(choices = []) # The shift and invert solvers need SuiteSparse for sparse input Base.USE_GPL_LIBS || filter!(x->x != "IterativeEigensolvers", STDLIBS) - filter!(x -> !(x in skip_tests), tests) + filter!(!occursin(skip_tests), tests) tests, net_on, exit_on_error, seed end diff --git a/test/compile.jl b/test/compile.jl index a086045c1cf9e..d5c2562c185da 100644 --- a/test/compile.jl +++ b/test/compile.jl @@ -274,7 +274,7 @@ try error("__precompile__ disabled test failed") catch exc isa(exc, ErrorException) || rethrow(exc) - !isempty(search(exc.msg, "__precompile__(false)")) && rethrow(exc) + contains(exc.msg, "__precompile__(false)") && rethrow(exc) end # Issue #12720 @@ -341,7 +341,7 @@ try error("\"LoadError: break me\" test failed") catch exc isa(exc, ErrorException) || rethrow(exc) - !isempty(search(exc.msg, "ERROR: LoadError: break me")) && rethrow(exc) + contains(exc.msg, "ERROR: LoadError: break me") && rethrow(exc) end # Test transitive dependency for #21266 diff --git a/test/reflection.jl b/test/reflection.jl index 263ad8a3a425b..b80b6e1191356 100644 --- a/test/reflection.jl +++ b/test/reflection.jl @@ -61,7 +61,7 @@ function warntype_hastag(f, types, tag) iob = IOBuffer() code_warntype(iob, f, types) str = String(take!(iob)) - return !isempty(search(str, tag)) + return contains(str, tag) end pos_stable(x) = x > 0 ? x : zero(x) @@ -90,7 +90,7 @@ tag = "ANY" iob = IOBuffer() show(iob, Meta.lower(Main, :(x -> x^2))) str = String(take!(iob)) -@test isempty(search(str, tag)) +@test !contains(str, tag) # Make sure non used variables are not emphasized has_unused() = (a = rand(5)) diff --git a/test/regex.jl b/test/regex.jl index 742d4fc8dabb5..050428a8f959d 100644 --- a/test/regex.jl +++ b/test/regex.jl @@ -36,7 +36,7 @@ show(buf, r"") # regex match / search string must be a String @test_throws ArgumentError match(r"test", GenericString("this is a test")) -@test_throws ArgumentError search(GenericString("this is a test"), r"test") +@test_throws ArgumentError findfirst(r"test", GenericString("this is a test")) # Named subpatterns let m = match(r"(?.)(.)(?.)", "xyz") diff --git a/test/repl.jl b/test/repl.jl index 7317a01353c3b..ec3748d9bca08 100644 --- a/test/repl.jl +++ b/test/repl.jl @@ -195,10 +195,10 @@ fake_repl() do stdin_write, stdout_read, repl # Issue #10222 # Test ignoring insert key in standard and prefix search modes write(stdin_write, "\e[2h\e[2h\n") # insert (VT100-style) - @test search(readline(stdout_read), "[2h") == 0:-1 + @test findfirst("[2h", readline(stdout_read)) == 0:-1 readline(stdout_read) write(stdin_write, "\e[2~\e[2~\n") # insert (VT220-style) - @test search(readline(stdout_read), "[2~") == 0:-1 + @test findfirst("[2~", readline(stdout_read)) == 0:-1 readline(stdout_read) write(stdin_write, "1+1\n") # populate history with a trivial input readline(stdout_read) @@ -818,8 +818,8 @@ for keys = [altkeys, merge(altkeys...)], # Check that the correct prompt was displayed output = readuntil(stdout_read, "1 * 1;") - @test !isempty(search(output, LineEdit.prompt_string(altprompt))) - @test isempty(search(output, "julia> ")) + @test !contains(LineEdit.prompt_string(altprompt), output) + @test !contains("julia> ", output) # Check the history file history = read(histfile, String) diff --git a/test/serialize.jl b/test/serialize.jl index 2f68f74fd9dbb..e35720f2982f1 100644 --- a/test/serialize.jl +++ b/test/serialize.jl @@ -441,8 +441,8 @@ using .Shell, .Instance1 io = IOBuffer() serialize(io, foo) str = String(take!(io)) -@test isempty(search(str, "Instance1")) -@test !isempty(search(str, "Shell")) +@test !contains(str, "Instance1") +@test contains(str, "Shell") end # module Test13452 diff --git a/test/sets.jl b/test/sets.jl index 1d802e0b63f62..dd9f10a7cd9a2 100644 --- a/test/sets.jl +++ b/test/sets.jl @@ -11,7 +11,7 @@ using Main.TestHelpers.OAs s = Set(data_in) data_out = collect(s) @test ===(typeof(data_out), Array{Any,1}) - @test all(map(d->in(d,data_out), data_in)) + @test all(map(occursin(data_out), data_in)) @test length(data_out) == length(data_in) let f17741 = x -> x < 0 ? false : 1 @test isa(Set(x for x = 1:3), Set{Int}) diff --git a/test/spawn.jl b/test/spawn.jl index 90d9cc66a3566..dd33d032a7309 100644 --- a/test/spawn.jl +++ b/test/spawn.jl @@ -46,8 +46,8 @@ end @test length(spawn(pipeline(`$echocmd hello`, sortcmd)).processes) == 2 out = read(`$echocmd hello` & `$echocmd world`, String) -@test search(out,"world") != 0:-1 -@test search(out,"hello") != 0:-1 +@test contains(out,"world") +@test contains(out,"hello") @test read(pipeline(`$echocmd hello` & `$echocmd world`, sortcmd), String) == "hello\nworld\n" @test (run(`$printfcmd " \033[34m[stdio passthrough ok]\033[0m\n"`); true) diff --git a/test/strings/search.jl b/test/strings/search.jl index c609066c4f05c..78546cf6b59ee 100644 --- a/test/strings/search.jl +++ b/test/strings/search.jl @@ -7,19 +7,19 @@ u8str = "∀ ε > 0, ∃ δ > 0: |x-y| < δ ⇒ |f(x)-f(y)| < ε" # I think these should give error on 4 also, and "" is not treated # consistently with SubString("",1,1), nor with Char[] for ind in (0, 5) - @test_throws BoundsError search("foo", SubString("",1,1), ind) - @test_throws BoundsError rsearch("foo", SubString("",1,1), ind) + @test_throws BoundsError findnext(SubString("",1,1), "foo", ind) + @test_throws BoundsError findprev(SubString("",1,1), "foo", ind) @test_throws BoundsError searchindex("foo", SubString("",1,1), ind) @test_throws BoundsError rsearchindex("foo", SubString("",1,1), ind) end # Note: the commented out tests will be enabled after fixes to make -# sure that search/rsearch/searchindex/rsearchindex are consistent +# sure that findnext/findprev/searchindex/rsearchindex are consistent # no matter what type of AbstractString the second argument is -@test_throws BoundsError search("foo", Char[], 0) -@test_throws BoundsError search("foo", Char[], 5) -# @test_throws BoundsError rsearch("foo", Char[], 0) -@test_throws BoundsError rsearch("foo", Char[], 5) +@test_throws BoundsError findnext(equalto('a'), "foo", 0) +@test_throws BoundsError findnext(occursin(Char[]), "foo", 5) +# @test_throws BoundsError findprev(occursin(Char[]), "foo", 0) +@test_throws BoundsError findprev(occursin(Char[]), "foo", 5) # @test_throws BoundsError searchindex("foo", Char[], 0) # @test_throws BoundsError searchindex("foo", Char[], 5) @@ -27,309 +27,304 @@ end # @test_throws BoundsError rsearchindex("foo", Char[], 5) # @test_throws ErrorException in("foobar","bar") -@test_throws BoundsError search(b"\x1\x2",0x1,0) +@test_throws BoundsError findnext(equalto(0x1),b"\x1\x2",0) @test rsearchindex(b"foo",b"o",0) == 0 @test rsearchindex(SubString("",1,0),SubString("",1,0)) == 1 -@test search(b"foo",'o') == 2 -@test rsearch(b"foo",'o') == 3 -@test search(b"foó",'ó') == 3 -@test rsearch(b"foó",'ó') == 3 - -# ascii search +# ascii forward search for str in [astr, GenericString(astr)] - @test_throws BoundsError search(str, 'z', 0) - @test_throws BoundsError search(str, '∀', 0) - @test search(str, 'x') == 0 - @test search(str, '\0') == 0 - @test search(str, '\u80') == 0 - @test search(str, '∀') == 0 - @test search(str, 'H') == 1 - @test search(str, 'l') == 3 - @test search(str, 'l', 4) == 4 - @test search(str, 'l', 5) == 11 - @test search(str, 'l', 12) == 0 - @test search(str, ',') == 6 - @test search(str, ',', 7) == 0 - @test search(str, '\n') == 14 - @test search(str, '\n', 15) == 0 - @test_throws BoundsError search(str, 'ε', nextind(str,endof(str))+1) - @test_throws BoundsError search(str, 'a', nextind(str,endof(str))+1) + @test_throws BoundsError findnext(equalto('z'), str, 0) + @test_throws BoundsError findnext(equalto('∀'), str, 0) + @test findfirst(equalto('x'), str) == 0 + @test findfirst(equalto('\0'), str) == 0 + @test findfirst(equalto('\u80'), str) == 0 + @test findfirst(equalto('∀'), str) == 0 + @test findfirst(equalto('H'), str) == 1 + @test findfirst(equalto('l'), str) == 3 + @test findnext(equalto('l'), str, 4) == 4 + @test findnext(equalto('l'), str, 5) == 11 + @test findnext(equalto('l'), str, 12) == 0 + @test findfirst(equalto(','), str) == 6 + @test findnext(equalto(','), str, 7) == 0 + @test findfirst(equalto('\n'), str) == 14 + @test findnext(equalto('\n'), str, 15) == 0 + @test_throws BoundsError findnext(equalto('ε'), str, nextind(str,endof(str))+1) + @test_throws BoundsError findnext(equalto('a'), str, nextind(str,endof(str))+1) end -# ascii rsearch +# ascii backward search for str in [astr] - @test rsearch(str, 'x') == 0 - @test rsearch(str, '\0') == 0 - @test rsearch(str, '\u80') == 0 - @test rsearch(str, '∀') == 0 - @test rsearch(str, 'H') == 1 - @test rsearch(str, 'H', 0) == 0 - @test rsearch(str, 'l') == 11 - @test rsearch(str, 'l', 5) == 4 - @test rsearch(str, 'l', 4) == 4 - @test rsearch(str, 'l', 3) == 3 - @test rsearch(str, 'l', 2) == 0 - @test rsearch(str, ',') == 6 - @test rsearch(str, ',', 5) == 0 - @test rsearch(str, '\n') == 14 + @test findlast(equalto('x'), str) == 0 + @test findlast(equalto('\0'), str) == 0 + @test findlast(equalto('\u80'), str) == 0 + @test findlast(equalto('∀'), str) == 0 + @test findlast(equalto('H'), str) == 1 + @test findprev(equalto('H'), str, 0) == 0 + @test findlast(equalto('l'), str) == 11 + @test findprev(equalto('l'), str, 5) == 4 + @test findprev(equalto('l'), str, 4) == 4 + @test findprev(equalto('l'), str, 3) == 3 + @test findprev(equalto('l'), str, 2) == 0 + @test findlast(equalto(','), str) == 6 + @test findprev(equalto(','), str, 5) == 0 + @test findlast(equalto('\n'), str) == 14 end -# utf-8 search +# utf-8 forward search for str in (u8str, GenericString(u8str)) - @test_throws BoundsError search(str, 'z', 0) - @test_throws BoundsError search(str, '∀', 0) - @test search(str, 'z') == 0 - @test search(str, '\0') == 0 - @test search(str, '\u80') == 0 - @test search(str, '∄') == 0 - @test search(str, '∀') == 1 - @test_throws StringIndexError search(str, '∀', 2) - @test search(str, '∀', 4) == 0 - @test search(str, '∃') == 13 - @test_throws StringIndexError search(str, '∃', 15) - @test search(str, '∃', 16) == 0 - @test search(str, 'x') == 26 - @test search(str, 'x', 27) == 43 - @test search(str, 'x', 44) == 0 - @test search(str, 'δ') == 17 - @test_throws StringIndexError search(str, 'δ', 18) - @test search(str, 'δ', nextind(str,17)) == 33 - @test search(str, 'δ', nextind(str,33)) == 0 - @test search(str, 'ε') == 5 - @test search(str, 'ε', nextind(str,5)) == 54 - @test search(str, 'ε', nextind(str,54)) == 0 - @test search(str, 'ε', nextind(str,endof(str))) == 0 - @test search(str, 'a', nextind(str,endof(str))) == 0 - @test_throws BoundsError search(str, 'ε', nextind(str,endof(str))+1) - @test_throws BoundsError search(str, 'a', nextind(str,endof(str))+1) + @test_throws BoundsError findnext(equalto('z'), str, 0) + @test_throws BoundsError findnext(equalto('∀'), str, 0) + @test findfirst(equalto('z'), str) == 0 + @test findfirst(equalto('\0'), str) == 0 + @test findfirst(equalto('\u80'), str) == 0 + @test findfirst(equalto('∄'), str) == 0 + @test findfirst(equalto('∀'), str) == 1 + @test_throws StringIndexError findnext(equalto('∀'), str, 2) + @test findnext(equalto('∀'), str, 4) == 0 + @test findfirst(equalto('∃'), str) == 13 + @test_throws StringIndexError findnext(equalto('∃'), str, 15) + @test findnext(equalto('∃'), str, 16) == 0 + @test findfirst(equalto('x'), str) == 26 + @test findnext(equalto('x'), str, 27) == 43 + @test findnext(equalto('x'), str, 44) == 0 + @test findfirst(equalto('δ'), str) == 17 + @test_throws StringIndexError findnext(equalto('δ'), str, 18) + @test findnext(equalto('δ'), str, nextind(str,17)) == 33 + @test findnext(equalto('δ'), str, nextind(str,33)) == 0 + @test findfirst(equalto('ε'), str) == 5 + @test findnext(equalto('ε'), str, nextind(str,5)) == 54 + @test findnext(equalto('ε'), str, nextind(str,54)) == 0 + @test findnext(equalto('ε'), str, nextind(str,endof(str))) == 0 + @test findnext(equalto('a'), str, nextind(str,endof(str))) == 0 + @test_throws BoundsError findnext(equalto('ε'), str, nextind(str,endof(str))+1) + @test_throws BoundsError findnext(equalto('a'), str, nextind(str,endof(str))+1) end -# utf-8 rsearch +# utf-8 backward search for str in [u8str] - @test rsearch(str, 'z') == 0 - @test rsearch(str, '\0') == 0 - @test rsearch(str, '\u80') == 0 - @test rsearch(str, '∄') == 0 - @test rsearch(str, '∀') == 1 - @test rsearch(str, '∀', 0) == 0 - @test rsearch(str, '∃') == 13 - @test rsearch(str, '∃', 14) == 13 - @test rsearch(str, '∃', 13) == 13 - @test rsearch(str, '∃', 12) == 0 - @test rsearch(str, 'x') == 43 - @test rsearch(str, 'x', 42) == 26 - @test rsearch(str, 'x', 25) == 0 - @test rsearch(str, 'δ') == 33 - @test rsearch(str, 'δ', 32) == 17 - @test rsearch(str, 'δ', 16) == 0 - @test rsearch(str, 'ε') == 54 - @test rsearch(str, 'ε', 53) == 5 - @test rsearch(str, 'ε', 4) == 0 + @test findlast(equalto('z'), str) == 0 + @test findlast(equalto('\0'), str) == 0 + @test findlast(equalto('\u80'), str) == 0 + @test findlast(equalto('∄'), str) == 0 + @test findlast(equalto('∀'), str) == 1 + @test findprev(equalto('∀'), str, 0) == 0 + @test findlast(equalto('∃'), str) == 13 + @test findprev(equalto('∃'), str, 14) == 13 + @test findprev(equalto('∃'), str, 13) == 13 + @test findprev(equalto('∃'), str, 12) == 0 + @test findlast(equalto('x'), str) == 43 + @test findprev(equalto('x'), str, 42) == 26 + @test findprev(equalto('x'), str, 25) == 0 + @test findlast(equalto('δ'), str) == 33 + @test findprev(equalto('δ'), str, 32) == 17 + @test findprev(equalto('δ'), str, 16) == 0 + @test findlast(equalto('ε'), str) == 54 + @test findprev(equalto('ε'), str, 53) == 5 + @test findprev(equalto('ε'), str, 4) == 0 end -# string search with a single-char string -@test search(astr, "x") == 0:-1 -@test search(astr, "H") == 1:1 -@test search(astr, "H", 2) == 0:-1 -@test search(astr, "l") == 3:3 -@test search(astr, "l", 4) == 4:4 -@test search(astr, "l", 5) == 11:11 -@test search(astr, "l", 12) == 0:-1 -@test search(astr, "\n") == 14:14 -@test search(astr, "\n", 15) == 0:-1 - -@test search(u8str, "z") == 0:-1 -@test search(u8str, "∄") == 0:-1 -@test search(u8str, "∀") == 1:1 -@test search(u8str, "∀", 4) == 0:-1 -@test search(u8str, "∃") == 13:13 -@test search(u8str, "∃", 16) == 0:-1 -@test search(u8str, "x") == 26:26 -@test search(u8str, "x", 27) == 43:43 -@test search(u8str, "x", 44) == 0:-1 -@test search(u8str, "ε") == 5:5 -@test search(u8str, "ε", 7) == 54:54 -@test search(u8str, "ε", 56) == 0:-1 - -# string rsearch with a single-char string -@test rsearch(astr, "x") == 0:-1 -@test rsearch(astr, "H") == 1:1 -@test rsearch(astr, "H", 2) == 1:1 -@test rsearch(astr, "H", 0) == 0:-1 -@test rsearch(astr, "l") == 11:11 -@test rsearch(astr, "l", 10) == 4:4 -@test rsearch(astr, "l", 4) == 4:4 -@test rsearch(astr, "l", 3) == 3:3 -@test rsearch(astr, "l", 2) == 0:-1 -@test rsearch(astr, "\n") == 14:14 -@test rsearch(astr, "\n", 13) == 0:-1 - -@test rsearch(u8str, "z") == 0:-1 -@test rsearch(u8str, "∄") == 0:-1 -@test rsearch(u8str, "∀") == 1:1 -@test rsearch(u8str, "∀", 0) == 0:-1 +# string forward search with a single-char string +@test findfirst("x", astr) == 0:-1 +@test findfirst("H", astr) == 1:1 +@test findnext("H", astr, 2) == 0:-1 +@test findfirst("l", astr) == 3:3 +@test findnext("l", astr, 4) == 4:4 +@test findnext("l", astr, 5) == 11:11 +@test findnext("l", astr, 12) == 0:-1 +@test findfirst("\n", astr) == 14:14 +@test findnext("\n", astr, 15) == 0:-1 + +@test findfirst("z", u8str) == 0:-1 +@test findfirst("∄", u8str) == 0:-1 +@test findfirst("∀", u8str) == 1:1 +@test findnext("∀", u8str, 4) == 0:-1 +@test findfirst("∃", u8str) == 13:13 +@test findnext("∃", u8str, 16) == 0:-1 +@test findfirst("x", u8str) == 26:26 +@test findnext("x", u8str, 27) == 43:43 +@test findnext("x", u8str, 44) == 0:-1 +@test findfirst("ε", u8str) == 5:5 +@test findnext("ε", u8str, 7) == 54:54 +@test findnext("ε", u8str, 56) == 0:-1 + +# strifindprev backward search with a single-char string +@test findlast("x", astr) == 0:-1 +@test findlast("H", astr) == 1:1 +@test findprev("H", astr, 2) == 1:1 +@test findprev("H", astr, 0) == 0:-1 +@test findlast("l", astr) == 11:11 +@test findprev("l", astr, 10) == 4:4 +@test findprev("l", astr, 4) == 4:4 +@test findprev("l", astr, 3) == 3:3 +@test findprev("l", astr, 2) == 0:-1 +@test findlast("\n", astr) == 14:14 +@test findprev("\n", astr, 13) == 0:-1 + +@test findlast("z", u8str) == 0:-1 +@test findlast("∄", u8str) == 0:-1 +@test findlast("∀", u8str) == 1:1 +@test findprev("∀", u8str, 0) == 0:-1 #TODO: setting the limit in the middle of a wide char -# makes search fail but rsearch succeed. -# Should rsearch fail as well? -#@test rsearch(u8str, "∀", 2) == 0:-1 # gives 1:3 -@test rsearch(u8str, "∃") == 13:13 -@test rsearch(u8str, "∃", 12) == 0:-1 -@test rsearch(u8str, "x") == 43:43 -@test rsearch(u8str, "x", 42) == 26:26 -@test rsearch(u8str, "x", 25) == 0:-1 -@test rsearch(u8str, "ε") == 54:54 -@test rsearch(u8str, "ε", 53) == 5:5 -@test rsearch(u8str, "ε", 4) == 0:-1 - -# string search with a single-char regex -@test search(astr, r"x") == 0:-1 -@test search(astr, r"H") == 1:1 -@test search(astr, r"H", 2) == 0:-1 -@test search(astr, r"l") == 3:3 -@test search(astr, r"l", 4) == 4:4 -@test search(astr, r"l", 5) == 11:11 -@test search(astr, r"l", 12) == 0:-1 -@test search(astr, r"\n") == 14:14 -@test search(astr, r"\n", 15) == 0:-1 -@test search(u8str, r"z") == 0:-1 -@test search(u8str, r"∄") == 0:-1 -@test search(u8str, r"∀") == 1:1 -@test search(u8str, r"∀", 4) == 0:-1 -@test search(u8str, r"∀") == search(u8str, r"\u2200") -@test search(u8str, r"∀", 4) == search(u8str, r"\u2200", 4) -@test search(u8str, r"∃") == 13:13 -@test search(u8str, r"∃", 16) == 0:-1 -@test search(u8str, r"x") == 26:26 -@test search(u8str, r"x", 27) == 43:43 -@test search(u8str, r"x", 44) == 0:-1 -@test search(u8str, r"ε") == 5:5 -@test search(u8str, r"ε", 7) == 54:54 -@test search(u8str, r"ε", 56) == 0:-1 +# makes findnext fail but findprev succeed. +# Should findprev fail as well? +#@test findprev("∀", u8str, 2) == 0:-1 # gives 1:3 +@test findlast("∃", u8str) == 13:13 +@test findprev("∃", u8str, 12) == 0:-1 +@test findlast("x", u8str) == 43:43 +@test findprev("x", u8str, 42) == 26:26 +@test findprev("x", u8str, 25) == 0:-1 +@test findlast("ε", u8str) == 54:54 +@test findprev("ε", u8str, 53) == 5:5 +@test findprev("ε", u8str, 4) == 0:-1 + +# string forward search with a single-char regex +@test findfirst(r"x", astr) == 0:-1 +@test findfirst(r"H", astr) == 1:1 +@test findnext(r"H", astr, 2) == 0:-1 +@test findfirst(r"l", astr) == 3:3 +@test findnext(r"l", astr, 4) == 4:4 +@test findnext(r"l", astr, 5) == 11:11 +@test findnext(r"l", astr, 12) == 0:-1 +@test findfirst(r"\n", astr) == 14:14 +@test findnext(r"\n", astr, 15) == 0:-1 +@test findfirst(r"z", u8str) == 0:-1 +@test findfirst(r"∄", u8str) == 0:-1 +@test findfirst(r"∀", u8str) == 1:1 +@test findnext(r"∀", u8str, 4) == 0:-1 +@test findfirst(r"∀", u8str) == findfirst(r"\u2200", u8str) +@test findnext(r"∀", u8str, 4) == findnext(r"\u2200", u8str, 4) +@test findfirst(r"∃", u8str) == 13:13 +@test findnext(r"∃", u8str, 16) == 0:-1 +@test findfirst(r"x", u8str) == 26:26 +@test findnext(r"x", u8str, 27) == 43:43 +@test findnext(r"x", u8str, 44) == 0:-1 +@test findfirst(r"ε", u8str) == 5:5 +@test findnext(r"ε", u8str, 7) == 54:54 +@test findnext(r"ε", u8str, 56) == 0:-1 for i = 1:endof(astr) - @test search(astr, r"."s, i) == i:i + @test findnext(r"."s, astr, i) == i:i end for i = 1:endof(u8str) if isvalid(u8str,i) - @test search(u8str, r"."s, i) == i:i + @test findnext(r"."s, u8str, i) == i:i end end -# string search with a zero-char string +# string forward search with a zero-char string for i = 1:endof(astr) - @test search(astr, "", i) == i:i-1 + @test findnext("", astr, i) == i:i-1 end for i = 1:endof(u8str) - @test search(u8str, "", i) == i:i-1 + @test findnext("", u8str, i) == i:i-1 end -@test search("", "") == 1:0 +@test findfirst("", "") == 1:0 -# string rsearch with a zero-char string +# string backward search with a zero-char string for i = 1:endof(astr) - @test rsearch(astr, "", i) == i:i-1 + @test findprev("", astr, i) == i:i-1 end for i = 1:endof(u8str) - @test rsearch(u8str, "", i) == i:i-1 + @test findprev("", u8str, i) == i:i-1 end -@test rsearch("", "") == 1:0 +@test findlast("", "") == 1:0 -# string search with a zero-char regex +# string forward search with a zero-char regex for i = 1:endof(astr) - @test search(astr, r"", i) == i:i-1 + @test findnext(r"", astr, i) == i:i-1 end for i = 1:endof(u8str) # TODO: should regex search fast-forward invalid indices? if isvalid(u8str,i) - @test search(u8str, r""s, i) == i:i-1 + @test findnext(r"", u8str, i) == i:i-1 end end -# string search with a two-char string literal -@test search("foo,bar,baz", "xx") == 0:-1 -@test search("foo,bar,baz", "fo") == 1:2 -@test search("foo,bar,baz", "fo", 3) == 0:-1 -@test search("foo,bar,baz", "oo") == 2:3 -@test search("foo,bar,baz", "oo", 4) == 0:-1 -@test search("foo,bar,baz", "o,") == 3:4 -@test search("foo,bar,baz", "o,", 5) == 0:-1 -@test search("foo,bar,baz", ",b") == 4:5 -@test search("foo,bar,baz", ",b", 6) == 8:9 -@test search("foo,bar,baz", ",b", 10) == 0:-1 -@test search("foo,bar,baz", "az") == 10:11 -@test search("foo,bar,baz", "az", 12) == 0:-1 +# string forward search with a two-char string literal +@test findfirst("xx", "foo,bar,baz") == 0:-1 +@test findfirst("fo", "foo,bar,baz") == 1:2 +@test findnext("fo", "foo,bar,baz", 3) == 0:-1 +@test findfirst("oo", "foo,bar,baz") == 2:3 +@test findnext("oo", "foo,bar,baz", 4) == 0:-1 +@test findfirst("o,", "foo,bar,baz") == 3:4 +@test findnext("o,", "foo,bar,baz", 5) == 0:-1 +@test findfirst(",b", "foo,bar,baz") == 4:5 +@test findnext(",b", "foo,bar,baz", 6) == 8:9 +@test findnext(",b", "foo,bar,baz", 10) == 0:-1 +@test findfirst("az", "foo,bar,baz") == 10:11 +@test findnext("az", "foo,bar,baz", 12) == 0:-1 # issue #9365 -# string search with a two-char UTF-8 (2 byte) string literal -@test search("ééé", "éé") == 1:3 -@test search("ééé", "éé", 1) == 1:3 -# string search with a two-char UTF-8 (3 byte) string literal -@test search("€€€", "€€") == 1:4 -@test search("€€€", "€€", 1) == 1:4 -# string search with a two-char UTF-8 (4 byte) string literal -@test search("\U1f596\U1f596\U1f596", "\U1f596\U1f596") == 1:5 -@test search("\U1f596\U1f596\U1f596", "\U1f596\U1f596", 1) == 1:5 - -# string search with a two-char UTF-8 (2 byte) string literal -@test search("éé", "éé") == 1:3 -@test search("éé", "éé", 1) == 1:3 -# string search with a two-char UTF-8 (3 byte) string literal -@test search("€€", "€€") == 1:4 -@test search("€€", "€€", 1) == 1:4 -# string search with a two-char UTF-8 (4 byte) string literal -@test search("\U1f596\U1f596", "\U1f596\U1f596") == 1:5 -@test search("\U1f596\U1f596", "\U1f596\U1f596", 1) == 1:5 - -# string rsearch with a two-char UTF-8 (2 byte) string literal -@test rsearch("ééé", "éé") == 3:5 -@test rsearch("ééé", "éé", endof("ééé")) == 3:5 -# string rsearch with a two-char UTF-8 (3 byte) string literal -@test rsearch("€€€", "€€") == 4:7 -@test rsearch("€€€", "€€", endof("€€€")) == 4:7 -# string rsearch with a two-char UTF-8 (4 byte) string literal -@test rsearch("\U1f596\U1f596\U1f596", "\U1f596\U1f596") == 5:9 -@test rsearch("\U1f596\U1f596\U1f596", "\U1f596\U1f596", endof("\U1f596\U1f596\U1f596")) == 5:9 - -# string rsearch with a two-char UTF-8 (2 byte) string literal -@test rsearch("éé", "éé") == 1:3 # should really be 1:4! -@test rsearch("éé", "éé", endof("ééé")) == 1:3 -# string search with a two-char UTF-8 (3 byte) string literal -@test rsearch("€€", "€€") == 1:4 # should really be 1:6! -@test rsearch("€€", "€€", endof("€€€")) == 1:4 -# string search with a two-char UTF-8 (4 byte) string literal -@test rsearch("\U1f596\U1f596", "\U1f596\U1f596") == 1:5 # should really be 1:8! -@test rsearch("\U1f596\U1f596", "\U1f596\U1f596", endof("\U1f596\U1f596\U1f596")) == 1:5 - -# string rsearch with a two-char string literal -@test rsearch("foo,bar,baz", "xx") == 0:-1 -@test rsearch("foo,bar,baz", "fo") == 1:2 -@test rsearch("foo,bar,baz", "fo", 1) == 0:-1 -@test rsearch("foo,bar,baz", "oo") == 2:3 -@test rsearch("foo,bar,baz", "oo", 2) == 0:-1 -@test rsearch("foo,bar,baz", "o,") == 3:4 -@test rsearch("foo,bar,baz", "o,", 1) == 0:-1 -@test rsearch("foo,bar,baz", ",b") == 8:9 -@test rsearch("foo,bar,baz", ",b", 6) == 4:5 -@test rsearch("foo,bar,baz", ",b", 3) == 0:-1 -@test rsearch("foo,bar,baz", "az") == 10:11 -@test rsearch("foo,bar,baz", "az", 10) == 0:-1 - -# array rsearch -@test rsearch(UInt8[1,2,3],UInt8[2,3],3) == 2:3 -@test rsearch(UInt8[1,2,3],UInt8[2,3],1) == 0:-1 +# string forward search with a two-char UTF-8 (2 byte) string literal +@test findfirst("éé", "ééé") == 1:3 +@test findnext("éé", "ééé", 1) == 1:3 +# string forward search with a two-char UTF-8 (3 byte) string literal +@test findfirst("€€", "€€€") == 1:4 +@test findnext("€€", "€€€", 1) == 1:4 +# string forward search with a two-char UTF-8 (4 byte) string literal +@test findfirst("\U1f596\U1f596", "\U1f596\U1f596\U1f596") == 1:5 +@test findnext("\U1f596\U1f596", "\U1f596\U1f596\U1f596", 1) == 1:5 + +# string forward search with a two-char UTF-8 (2 byte) string literal +@test findfirst("éé", "éé") == 1:3 +@test findnext("éé", "éé", 1) == 1:3 +# string forward search with a two-char UTF-8 (3 byte) string literal +@test findfirst("€€", "€€") == 1:4 +@test findnext("€€", "€€", 1) == 1:4 +# string forward search with a two-char UTF-8 (4 byte) string literal +@test findfirst("\U1f596\U1f596", "\U1f596\U1f596") == 1:5 +@test findnext("\U1f596\U1f596", "\U1f596\U1f596", 1) == 1:5 + +# string backward search with a two-char UTF-8 (2 byte) string literal +@test findlast("éé", "ééé") == 3:5 +@test findprev("éé", "ééé", endof("ééé")) == 3:5 +# string backward search with a two-char UTF-8 (3 byte) string literal +@test findlast("€€", "€€€") == 4:7 +@test findprev("€€", "€€€", endof("€€€")) == 4:7 +# string backward search with a two-char UTF-8 (4 byte) string literal +@test findlast("\U1f596\U1f596", "\U1f596\U1f596\U1f596") == 5:9 +@test findprev("\U1f596\U1f596", "\U1f596\U1f596\U1f596", endof("\U1f596\U1f596\U1f596")) == 5:9 + +# string backward search with a two-char UTF-8 (2 byte) string literal +@test findlast("éé", "éé") == 1:3 # should really be 1:4! +@test findprev("éé", "éé", endof("ééé")) == 1:3 +# string backward search with a two-char UTF-8 (3 byte) string literal +@test findlast("€€", "€€") == 1:4 # should really be 1:6! +@test findprev("€€", "€€", endof("€€€")) == 1:4 +# string backward search with a two-char UTF-8 (4 byte) string literal +@test findlast("\U1f596\U1f596", "\U1f596\U1f596") == 1:5 # should really be 1:8! +@test findprev("\U1f596\U1f596", "\U1f596\U1f596", endof("\U1f596\U1f596\U1f596")) == 1:5 + +# string backward search with a two-char string literal +@test findlast("xx", "foo,bar,baz") == 0:-1 +@test findlast("fo", "foo,bar,baz") == 1:2 +@test findprev("fo", "foo,bar,baz", 1) == 0:-1 +@test findlast("oo", "foo,bar,baz") == 2:3 +@test findprev("oo", "foo,bar,baz", 2) == 0:-1 +@test findlast("o,", "foo,bar,baz") == 3:4 +@test findprev("o,", "foo,bar,baz", 1) == 0:-1 +@test findlast(",b", "foo,bar,baz") == 8:9 +@test findprev(",b", "foo,bar,baz", 6) == 4:5 +@test findprev(",b", "foo,bar,baz", 3) == 0:-1 +@test findlast("az", "foo,bar,baz") == 10:11 +@test findprev("az", "foo,bar,baz", 10) == 0:-1 + +# array backward search +@test findprev(UInt8[2,3],UInt8[1,2,3],3) == 2:3 +@test findprev(UInt8[2,3],UInt8[1,2,3],1) == 0:-1 # string search with a two-char regex -@test search("foo,bar,baz", r"xx") == 0:-1 -@test search("foo,bar,baz", r"fo") == 1:2 -@test search("foo,bar,baz", r"fo", 3) == 0:-1 -@test search("foo,bar,baz", r"oo") == 2:3 -@test search("foo,bar,baz", r"oo", 4) == 0:-1 -@test search("foo,bar,baz", r"o,") == 3:4 -@test search("foo,bar,baz", r"o,", 5) == 0:-1 -@test search("foo,bar,baz", r",b") == 4:5 -@test search("foo,bar,baz", r",b", 6) == 8:9 -@test search("foo,bar,baz", r",b", 10) == 0:-1 -@test search("foo,bar,baz", r"az") == 10:11 -@test search("foo,bar,baz", r"az", 12) == 0:-1 +@test findfirst(r"xx", "foo,bar,baz") == 0:-1 +@test findfirst(r"fo", "foo,bar,baz") == 1:2 +@test findnext(r"fo", "foo,bar,baz", 3) == 0:-1 +@test findfirst(r"oo", "foo,bar,baz") == 2:3 +@test findnext(r"oo", "foo,bar,baz", 4) == 0:-1 +@test findfirst(r"o,", "foo,bar,baz") == 3:4 +@test findnext(r"o,", "foo,bar,baz", 5) == 0:-1 +@test findfirst(r",b", "foo,bar,baz") == 4:5 +@test findnext(r",b", "foo,bar,baz", 6) == 8:9 +@test findnext(r",b", "foo,bar,baz", 10) == 0:-1 +@test findfirst(r"az", "foo,bar,baz") == 10:11 +@test findnext(r"az", "foo,bar,baz", 12) == 0:-1 @test searchindex("foo", 'o') == 2 @test searchindex("foo", 'o', 3) == 3 diff --git a/test/strings/types.jl b/test/strings/types.jl index 7db39722a5349..22125b52c7463 100644 --- a/test/strings/types.jl +++ b/test/strings/types.jl @@ -118,9 +118,9 @@ end # search and SubString (issue #5679) let str = "Hello, world!" u = SubString(str, 1, 5) - @test rsearch(u, "World") == 0:-1 - @test rsearch(u, 'z') == 0 - @test rsearch(u, "ll") == 3:4 + @test findlast("World", u) == 0:-1 + @test findlast(equalto('z'), u) == 0 + @test findlast("ll", u) == 3:4 end # SubString created from SubString @@ -237,14 +237,14 @@ end for c in ('X', 'δ', '\U0001d6a5') s = convert(T, string(prefix, c, suffix)) r = reverse(s) - ri = search(r, c) + ri = findfirst(equalto(c), r) @test c == s[reverseind(s, ri)] == r[ri] s = convert(T, string(prefix, prefix, c, suffix, suffix)) pre = convert(T, prefix) sb = SubString(s, nextind(pre, endof(pre)), endof(convert(T, string(prefix, prefix, c, suffix)))) r = reverse(sb) - ri = search(r, c) + ri = findfirst(equalto(c), r) @test c == sb[reverseind(sb, ri)] == r[ri] end end