Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Clean up search and find API #24673

Merged
merged 4 commits into from
Jan 4, 2018
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Deprecate (r)searchindex(...) in favor of first(findnext/findprev(...))
  • Loading branch information
nalimilan committed Jan 4, 2018
commit d5f74cddc3d367baafa49077f931281fc61e3c3f
10 changes: 10 additions & 0 deletions base/deprecated.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3850,6 +3850,16 @@ end
@deprecate rsearch(a::String, b::Union{Int8,UInt8}, i::Integer = endof(a)) findprev(equalto(Char(b)), a, i)
@deprecate rsearch(a::ByteArray, b::Char, i::Integer = endof(a)) findprev(equalto(UInt8(b)), a, i)

@deprecate searchindex(s::AbstractString, t::AbstractString) first(findfirst(t, s))
@deprecate searchindex(s::AbstractString, t::AbstractString, i::Integer) first(findnext(t, s, i))
@deprecate rsearchindex(s::AbstractString, t::AbstractString) first(findlast(t, s))
@deprecate rsearchindex(s::AbstractString, t::AbstractString, i::Integer) first(findprev(t, s, i))

@deprecate searchindex(s::AbstractString, c::Char) first(findfirst(equalto(c), s))
@deprecate searchindex(s::AbstractString, c::Char, i::Integer) first(findnext(equalto(c), s, i))
@deprecate rsearchindex(s::AbstractString, c::Char) first(findlast(equalto(c), s))
@deprecate rsearchindex(s::AbstractString, c::Char, i::Integer) first(findprev(equalto(c), s, i))

# END 0.7 deprecations
# BEGIN 1.0 deprecations

Expand Down
2 changes: 0 additions & 2 deletions base/exports.jl
Original file line number Diff line number Diff line change
Expand Up @@ -507,8 +507,6 @@ export
occursin,
match,
matchall,
rsearchindex,
searchindex,
searchsorted,
searchsortedfirst,
searchsortedlast,
Expand Down
2 changes: 0 additions & 2 deletions base/precompile.jl
Original file line number Diff line number Diff line change
Expand Up @@ -621,7 +621,6 @@ precompile(Tuple{typeof(Base.unique), Array{String, 1}})
precompile(Tuple{typeof(Base.REPL.beforecursor), Base.GenericIOBuffer{Array{UInt8, 1}}})
precompile(Tuple{typeof(Base.REPLCompletions.completions), String, Int64})
precompile(Tuple{typeof(Base.incomplete_tag), Symbol})
precompile(Tuple{typeof(Base.rsearchindex), String, String, Int64})
precompile(Tuple{typeof(Base._rsearch), String, String, Int64})
precompile(Tuple{typeof(Base.pushfirst!), Array{Base.SubString{String}, 1}, Base.SubString{String}})
precompile(Tuple{typeof(Base.startswith), String, Base.SubString{String}})
Expand Down Expand Up @@ -918,7 +917,6 @@ precompile(Tuple{typeof(Base.Markdown.parseinline), Base.GenericIOBuffer{Array{U
precompile(Tuple{typeof(Base.Markdown.config), Base.Markdown.MD})
precompile(Tuple{typeof(Base.Markdown.parseinline), Base.GenericIOBuffer{Array{UInt8, 1}}, Base.Markdown.MD, Base.Markdown.Config})
precompile(Tuple{typeof(Base.Markdown.list), Base.GenericIOBuffer{Array{UInt8, 1}}, Base.Markdown.MD})
precompile(Tuple{typeof(Base.searchindex), String, String})
precompile(Tuple{typeof(Base._searchindex), Base.SubString{String}, String, Int64})
precompile(Tuple{getfield(Base.Markdown, Symbol("#kw##skipwhitespace")), Array{Any, 1}, typeof(Base.Markdown.skipwhitespace), Base.GenericIOBuffer{Array{UInt8, 1}}})
precompile(Tuple{typeof(Base.ht_keyindex), Base.Dict{Symbol, Base.Markdown.Config}, Symbol})
Expand Down
122 changes: 29 additions & 93 deletions base/strings/search.jl
Original file line number Diff line number Diff line change
Expand Up @@ -156,8 +156,11 @@ end
_nthbyte(s::String, i) = codeunit(s, i)
_nthbyte(a::Union{AbstractVector{UInt8},AbstractVector{Int8}}, i) = a[i]

_searchindex(s::String, t::String, i::Integer) =
function _searchindex(s::String, t::String, i::Integer)
# Check for fast case of a single byte
endof(t) == 1 && return findnext(equalto(t[1]), s, i)
_searchindex(unsafe_wrap(Vector{UInt8},s), unsafe_wrap(Vector{UInt8},t), i)
end

function _searchindex(s::ByteArray, t::ByteArray, i::Integer)
n = sizeof(t)
Expand Down Expand Up @@ -220,43 +223,10 @@ function _searchindex(s::ByteArray, t::ByteArray, i::Integer)
0
end

searchindex(s::ByteArray, t::ByteArray, i::Integer) = _searchindex(s,t,i)

"""
searchindex(s::AbstractString, substring, [start::Integer])

Similar to `search`, but return only the start index at which
the substring is found, or `0` if it is not.

# Examples
```jldoctest
julia> searchindex("Hello to the world", "z")
0

julia> searchindex("JuliaLang","Julia")
1

julia> searchindex("JuliaLang","Lang")
6
```
"""
searchindex(s::AbstractString, t::AbstractString, i::Integer) = _searchindex(s,t,i)
searchindex(s::AbstractString, t::AbstractString) = searchindex(s,t,start(s))
searchindex(s::AbstractString, c::Char, i::Integer) = _searchindex(s,c,i)
searchindex(s::AbstractString, c::Char) = searchindex(s,c,start(s))

function searchindex(s::String, t::String, i::Integer=1)
# Check for fast case of a single byte
# (for multi-byte UTF-8 sequences, use searchindex on byte arrays instead)
if endof(t) == 1
findnext(equalto(t[1]), s, i)
else
_searchindex(s, t, i)
end
end

function _search(s, t, i::Integer)
idx = searchindex(s,t,i)
function _search(s::Union{AbstractString,ByteArray},
t::Union{AbstractString,Char,Int8,UInt8},
i::Integer)
idx = _searchindex(s,t,i)
if isempty(t)
idx:idx-1
else
Expand Down Expand Up @@ -291,8 +261,6 @@ julia> findnext("Julia", "JuliaLang", 2)
```
"""
findnext(t::AbstractString, s::AbstractString, i::Integer) = _search(s, t, i)
# TODO: remove?
findnext(t::ByteArray, s::ByteArray, i::Integer) = _search(s, t, i)

"""
findlast(pattern::AbstractString, string::AbstractString)
Expand Down Expand Up @@ -363,8 +331,21 @@ function _rsearchindex(s::AbstractString,
end
end

_rsearchindex(s::String, t::String, i::Integer) =
_rsearchindex(unsafe_wrap(Vector{UInt8}, s), unsafe_wrap(Vector{UInt8}, t), i)
function _rsearchindex(s::String, t::String, i::Integer)
# Check for fast case of a single byte
if endof(t) == 1
return findprev(equalto(t[1]), s, i)
elseif endof(t) != 0
j = i ≤ ncodeunits(s) ? nextind(s, i)-1 : i
return _rsearchindex(unsafe_wrap(Vector{UInt8}, s), unsafe_wrap(Vector{UInt8}, t), j)
elseif i > sizeof(s)
return 0
elseif i == 0
return 1
else
return i
end
end

function _rsearchindex(s::ByteArray, t::ByteArray, k::Integer)
n = sizeof(t)
Expand Down Expand Up @@ -427,54 +408,10 @@ function _rsearchindex(s::ByteArray, t::ByteArray, k::Integer)
0
end

rsearchindex(s::ByteArray, t::ByteArray, i::Integer) = _rsearchindex(s,t,i)

"""
rsearchindex(s::AbstractString, substring, [start::Integer])

Similar to `rsearch`, but return only the start index at which the substring is found, or `0` if it is not.

# Examples
```jldoctest
julia> rsearchindex("aaabbb","b")
6

julia> rsearchindex("aaabbb","a")
3
```
"""
rsearchindex(s::AbstractString, t::AbstractString, i::Integer) = _rsearchindex(s,t,i)
rsearchindex(s::AbstractString, t::AbstractString) = (isempty(s) && isempty(t)) ? 1 : rsearchindex(s,t,endof(s))

function rsearchindex(s::String, t::String)
# Check for fast case of a single byte
# (for multi-byte UTF-8 sequences, use rsearchindex instead)
if endof(t) == 1
findprev(equalto(t[1]), s)
else
_rsearchindex(s, t, sizeof(s))
end
end

function rsearchindex(s::String, t::String, i::Integer)
# Check for fast case of a single byte
# (for multi-byte UTF-8 sequences, use rsearchindex instead)
if endof(t) == 1
findprev(equalto(t[1]), s, i)
elseif endof(t) != 0
j = i ≤ ncodeunits(s) ? nextind(s, i)-1 : i
_rsearchindex(s, t, j)
elseif i > sizeof(s)
return 0
elseif i == 0
return 1
else
return i
end
end

function _rsearch(s, t, i::Integer)
idx = rsearchindex(s,t,i)
function _rsearch(s::Union{AbstractString,ByteArray},
t::Union{AbstractString,Char,Int8,UInt8},
i::Integer)
idx = _rsearchindex(s,t,i)
if isempty(t)
idx:idx-1
else
Expand Down Expand Up @@ -509,8 +446,6 @@ julia> findprev("Julia", "JuliaLang", 6)
```
"""
findprev(t::AbstractString, s::AbstractString, i::Integer) = _rsearch(s, t, i)
# TODO: remove?
findprev(t::ByteArray, s::ByteArray, i::Integer) = _rsearch(s, t, i)

"""
contains(haystack::AbstractString, needle::Union{AbstractString,Char})
Expand All @@ -523,6 +458,7 @@ julia> contains("JuliaLang is pretty cool!", "Julia")
true
```
"""
contains(haystack::AbstractString, needle::Union{AbstractString,Char}) = searchindex(haystack,needle)!=0
contains(haystack::AbstractString, needle::Union{AbstractString,Char}) =
_searchindex(haystack, needle, start(haystack)) != 0

in(::AbstractString, ::AbstractString) = error("use contains(x,y) for string containment")
2 changes: 0 additions & 2 deletions doc/src/stdlib/strings.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,6 @@ Base.findfirst(::AbstractString, ::AbstractString)
Base.findnext(::AbstractString, ::AbstractString, ::Integer)
Base.findlast(::AbstractString, ::AbstractString)
Base.findprev(::AbstractString, ::AbstractString, ::Integer)
Base.searchindex
Base.rsearchindex
Base.contains(::AbstractString, ::AbstractString)
Base.reverse(::Union{String,SubString{String}})
Base.replace(s::AbstractString, ::Pair)
Expand Down
60 changes: 2 additions & 58 deletions test/strings/search.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,27 +9,18 @@ u8str = "∀ ε > 0, ∃ δ > 0: |x-y| < δ ⇒ |f(x)-f(y)| < ε"
for ind in (0, 5)
@test_throws BoundsError findnext(SubString("",1,1), "foo", ind)
@test_throws BoundsError findprev(SubString("",1,1), "foo", ind)
@test_throws BoundsError searchindex("foo", SubString("",1,1), ind)
@test_throws BoundsError rsearchindex("foo", SubString("",1,1), ind)
end

# Note: the commented out tests will be enabled after fixes to make
# sure that findnext/findprev/searchindex/rsearchindex are consistent
# Note: the commented out test will be enabled after fixes to make
# sure that findnext/findprev are consistent
# no matter what type of AbstractString the second argument is
@test_throws BoundsError findnext(equalto('a'), "foo", 0)
@test_throws BoundsError findnext(occursin(Char[]), "foo", 5)
# @test_throws BoundsError findprev(occursin(Char[]), "foo", 0)
@test_throws BoundsError findprev(occursin(Char[]), "foo", 5)

# @test_throws BoundsError searchindex("foo", Char[], 0)
# @test_throws BoundsError searchindex("foo", Char[], 5)
# @test_throws BoundsError rsearchindex("foo", Char[], 0)
# @test_throws BoundsError rsearchindex("foo", Char[], 5)

# @test_throws ErrorException in("foobar","bar")
@test_throws BoundsError findnext(equalto(0x1),b"\x1\x2",0)
@test rsearchindex(b"foo",b"o",0) == 0
@test rsearchindex(SubString("",1,0),SubString("",1,0)) == 1

# ascii forward search
for str in [astr, GenericString(astr)]
Expand Down Expand Up @@ -308,10 +299,6 @@ end
@test findlast("az", "foo,bar,baz") == 10:11
@test findprev("az", "foo,bar,baz", 10) == 0:-1

# array backward search
@test findprev(UInt8[2,3],UInt8[1,2,3],3) == 2:3
@test findprev(UInt8[2,3],UInt8[1,2,3],1) == 0:-1

# string search with a two-char regex
@test findfirst(r"xx", "foo,bar,baz") == 0:-1
@test findfirst(r"fo", "foo,bar,baz") == 1:2
Expand All @@ -326,53 +313,10 @@ end
@test findfirst(r"az", "foo,bar,baz") == 10:11
@test findnext(r"az", "foo,bar,baz", 12) == 0:-1

@test searchindex("foo", 'o') == 2
@test searchindex("foo", 'o', 3) == 3

# string searchindex with a two-char UTF-8 (2 byte) string literal
@test searchindex("ééé", "éé") == 1
@test searchindex("ééé", "éé", 1) == 1
# string searchindex with a two-char UTF-8 (3 byte) string literal
@test searchindex("€€€", "€€") == 1
@test searchindex("€€€", "€€", 1) == 1
# string searchindex with a two-char UTF-8 (4 byte) string literal
@test searchindex("\U1f596\U1f596\U1f596", "\U1f596\U1f596") == 1
@test searchindex("\U1f596\U1f596\U1f596", "\U1f596\U1f596", 1) == 1

# string searchindex with a two-char UTF-8 (2 byte) string literal
@test searchindex("éé", "éé") == 1
@test searchindex("éé", "éé", 1) == 1
# string searchindex with a two-char UTF-8 (3 byte) string literal
@test searchindex("€€", "€€") == 1
@test searchindex("€€", "€€", 1) == 1
# string searchindex with a two-char UTF-8 (4 byte) string literal
@test searchindex("\U1f596\U1f596", "\U1f596\U1f596") == 1
@test searchindex("\U1f596\U1f596", "\U1f596\U1f596", 1) == 1

# contains with a String and Char needle
@test contains("foo", "o")
@test contains("foo", 'o')

# string rsearchindex with a two-char UTF-8 (2 byte) string literal
@test rsearchindex("ééé", "éé") == 3
@test rsearchindex("ééé", "éé", endof("ééé")) == 3
# string rsearchindex with a two-char UTF-8 (3 byte) string literal
@test rsearchindex("€€€", "€€") == 4
@test rsearchindex("€€€", "€€", endof("€€€")) == 4
# string rsearchindex with a two-char UTF-8 (4 byte) string literal
@test rsearchindex("\U1f596\U1f596\U1f596", "\U1f596\U1f596") == 5
@test rsearchindex("\U1f596\U1f596\U1f596", "\U1f596\U1f596", endof("\U1f596\U1f596\U1f596")) == 5

# string rsearchindex with a two-char UTF-8 (2 byte) string literal
@test rsearchindex("éé", "éé") == 1
@test rsearchindex("éé", "éé", endof("ééé")) == 1
# string searchindex with a two-char UTF-8 (3 byte) string literal
@test rsearchindex("€€", "€€") == 1
@test rsearchindex("€€", "€€", endof("€€€")) == 1
# string searchindex with a two-char UTF-8 (4 byte) string literal
@test rsearchindex("\U1f596\U1f596", "\U1f596\U1f596") == 1
@test rsearchindex("\U1f596\U1f596", "\U1f596\U1f596", endof("\U1f596\U1f596\U1f596")) == 1

@test_throws ErrorException "ab" ∈ "abc"

# issue #15723
Expand Down