Deprecate matchall (JuliaLang#26071)

It is not obvious that returning matching SubStrings rather than RegexMatch objects is a good idea.
tekverkp · Feb 22, 2018 · da98033 · da98033
1 parent f82e845
commit da98033
Show file tree

Hide file tree

Showing 8 changed files with 26 additions and 82 deletions.
diff --git a/NEWS.md b/NEWS.md
@@ -1001,6 +1001,8 @@ Deprecated or removed
 
  * `ismatch(regex, str)` has been deprecated in favor of `contains(str, regex)` ([#24673]).
 
+ * `matchall` has been deprecated in favor of `collect(m.match for m in eachmatch(r, s))` ([#26071]).
+
  * `similar(::Associative)` has been deprecated in favor of `empty(::Associative)`, and
  `similar(::Associative, ::Pair{K, V})` has been deprecated in favour of
  `empty(::Associative, K, V)` ([#24390]).
@@ -1359,3 +1361,4 @@ Command-line option changes
 [#25990]: https://github.com/JuliaLang/julia/issues/25990
 [#25998]: https://github.com/JuliaLang/julia/issues/25998
 [#26009]: https://github.com/JuliaLang/julia/issues/26009
+[#26071]: https://github.com/JuliaLang/julia/issues/26071
diff --git a/base/deprecated.jl b/base/deprecated.jl
@@ -1345,7 +1345,7 @@ end
 @deprecate names(m, all) names(m, all = all)
 @deprecate names(m, all, imported) names(m, all = all, imported = imported)
 @deprecate eachmatch(re, str, overlap) eachmatch(re, str, overlap = overlap)
-@deprecate matchall(re, str, overlap) matchall(re, str, overlap = overlap)
+@deprecate matchall(re, str, overlap) collect(m.match for m in eachmatch(re, str, overlap = overlap))
 @deprecate chop(s, head) chop(s, head = head)
 @deprecate chop(s, head, tail) chop(s, head = head, tail = tail)
 @deprecate tryparse(T::Type{<:Integer}, s, base) tryparse(T, s, base = base)
@@ -1405,6 +1405,10 @@ end
  Base.@deprecate log Base.log
 end
 
+# PR 26071
+@deprecate(matchall(r::Regex, s::AbstractString; overlap::Bool = false),
+ collect(m.match for m in eachmatch(r, s, overlap = overlap)))
+
 # END 0.7 deprecations
 
 # BEGIN 1.0 deprecations

diff --git a/base/exports.jl b/base/exports.jl
@@ -462,7 +462,6 @@ export
  findprev,
  occursin,
  match,
- matchall,
  searchsorted,
  searchsortedfirst,
  searchsortedlast,

diff --git a/base/regex.jl b/base/regex.jl
@@ -205,66 +205,6 @@ match(r::Regex, s::AbstractString, i::Integer) = throw(ArgumentError(
  "regex matching is only available for the String type; use String(s) to convert"
 ))
 
-"""
- matchall(r::Regex, s::AbstractString; overlap::Bool = false]) -> Vector{AbstractString}
-
-Return a vector of the matching substrings from [`eachmatch`](@ref).
-
-
-# Examples
-```jldoctest
-julia> rx = r"a.a"
-r"a.a"
-
-julia> matchall(rx, "a1a2a3a")
-2-element Array{SubString{String},1}:
- "a1a"
- "a3a"
-
-julia> matchall(rx, "a1a2a3a", overlap = true)
-3-element Array{SubString{String},1}:
- "a1a"
- "a2a"
- "a3a"
-```
-"""
-function matchall(re::Regex, str::String; overlap::Bool = false)
- regex = compile(re).regex
- n = sizeof(str)
- matches = SubString{String}[]
- offset = UInt32(0)
- opts = re.match_options
- opts_nonempty = opts | PCRE.ANCHORED | PCRE.NOTEMPTY_ATSTART
- prevempty = false
- ovec = re.ovec
- while true
- result = PCRE.exec(regex, str, offset, prevempty ? opts_nonempty : opts, re.match_data)
- if !result
- if prevempty && offset < n
- offset = UInt32(nextind(str, offset + 1) - 1)
- prevempty = false
- continue
- else
- break
- end
- end
-
- push!(matches, SubString(str, ovec[1]+1, ovec[2]))
- prevempty = offset == ovec[2]
- if overlap
- if !prevempty
- offset = UInt32(ovec[1]+1)
- end
- else
- offset = ovec[2]
- end
- end
- matches
-end
-
-matchall(re::Regex, str::SubString; overlap::Bool = false) =
- matchall(re, String(str), overlap = overlap)
-
 # TODO: return only start index and update deprecation
 function findnext(re::Regex, str::Union{String,SubString}, idx::Integer)
  if idx > nextind(str,lastindex(str))

diff --git a/doc/src/base/strings.md b/doc/src/base/strings.md
@@ -26,7 +26,6 @@ Base.isvalid(::Any, ::Any)
 Base.isvalid(::AbstractString, ::Integer)
 Base.match
 Base.eachmatch
-Base.matchall
 Base.isless(::AbstractString, ::AbstractString)
 Base.:(==)(::AbstractString, ::AbstractString)
 Base.cmp(::AbstractString, ::AbstractString)

diff --git a/stdlib/Markdown/src/render/plain.jl b/stdlib/Markdown/src/render/plain.jl
@@ -22,7 +22,7 @@ end
 function plain(io::IO, code::Code)
  # If the code includes a fenced block this will break parsing,
  # so it must be enclosed by a longer ````-sequence.
- n = mapreduce(length, max, 2, matchall(r"^`+"m, code.code)) + 1
+ n = mapreduce(m -> length(m.match), max, 2, eachmatch(r"^`+"m, code.code)) + 1
  println(io, "`" ^ n, code.language)
  println(io, code.code)
  println(io, "`" ^ n)
@@ -121,7 +121,7 @@ plaininline(io::IO, md::Italic) = plaininline(io, "*", md.text, "*")
 
 function plaininline(io::IO, md::Code)
  if contains(md.code, "`")
- n = maximum(length(m) for m in matchall(r"(`+)", md.code))
+ n = maximum(length(m.match) for m in eachmatch(r"(`+)", md.code))
  s = "`"^((iseven(n) ? 1 : 2) + n)
  print(io, s, Base.startswith(md.code, "`") ? " " : "")
  print(io, md.code, endswith(md.code, "`") ? " " : "", s)

diff --git a/stdlib/REPL/src/REPLCompletions.jl b/stdlib/REPL/src/REPLCompletions.jl
@@ -201,7 +201,7 @@ function complete_path(path::AbstractString, pos; use_envpath=false)
  end
 
  matchList = String[replace(s, r"\s" => "\\ ") for s in matches]
- startpos = pos - lastindex(prefix) + 1 - length(matchall(r" ", prefix))
+ startpos = pos - lastindex(prefix) + 1 - count(equalto(' '), prefix)
  # The pos - lastindex(prefix) + 1 is correct due to `lastindex(prefix)-lastindex(prefix)==0`,
  # hence we need to add one to get the first index. This is also correct when considering
  # pos, because pos is the `lastindex` a larger string which `endswith(path)==true`.

diff --git a/test/regex.jl b/test/regex.jl
@@ -4,22 +4,21 @@ function collect_eachmatch(re, str; overlap=false)
  [m.match for m in collect(eachmatch(re, str, overlap = overlap))]
 end
 
-for f in [matchall, collect_eachmatch]
- @test f(r"a?b?", "asbd") == ["a","","b","",""] == f(r"""a?b?""", "asbd")
- @test f(r"a?b?", "asbd", overlap=true) == ["a","","b","",""]
- @test f(r"\w+", "hello", overlap=true) == ["hello","ello","llo","lo","o"]
- @test f(r".\s", "x \u2200 x \u2203 y") == ["x ", "∀ ", "x ", "∃ "]
- @test f(r"(\w+)(\s*)", "The dark side of the moon") ==
- ["The ", "dark ", "side ", "of ", "the ", "moon"]
- @test f(r"", "") == [""]
- @test f(r"", "", overlap=true) == [""]
- @test f(r"aa", "aaaa") == ["aa", "aa"]
- @test f(r"aa", "aaaa", overlap=true) == ["aa", "aa", "aa"]
- @test f(r"", "aaa") == ["", "", "", ""]
- @test f(r"", "aaa", overlap=true) == ["", "", "", ""]
- @test f(r"GCG","GCGCG") == ["GCG"]
- @test f(r"GCG","GCGCG",overlap=true) == ["GCG","GCG"]
-end
+@test collect_eachmatch(r"a?b?", "asbd") == ["a","","b","",""] ==
+ collect_eachmatch(r"""a?b?""", "asbd")
+@test collect_eachmatch(r"a?b?", "asbd", overlap=true) == ["a","","b","",""]
+@test collect_eachmatch(r"\w+", "hello", overlap=true) == ["hello","ello","llo","lo","o"]
+@test collect_eachmatch(r".\s", "x \u2200 x \u2203 y") == ["x ", "∀ ", "x ", "∃ "]
+@test collect_eachmatch(r"(\w+)(\s*)", "The dark side of the moon") ==
+ ["The ", "dark ", "side ", "of ", "the ", "moon"]
+@test collect_eachmatch(r"", "") == [""]
+@test collect_eachmatch(r"", "", overlap=true) == [""]
+@test collect_eachmatch(r"aa", "aaaa") == ["aa", "aa"]
+@test collect_eachmatch(r"aa", "aaaa", overlap=true) == ["aa", "aa", "aa"]
+@test collect_eachmatch(r"", "aaa") == ["", "", "", ""]
+@test collect_eachmatch(r"", "aaa", overlap=true) == ["", "", "", ""]
+@test collect_eachmatch(r"GCG","GCGCG") == ["GCG"]
+@test collect_eachmatch(r"GCG","GCGCG",overlap=true) == ["GCG","GCG"]
 
 # Issue 8278
 target = """71.163.72.113 - - [30/Jul/2014:16:40:55 -0700] "GET emptymind.org/thevacantwall/wp-content/uploads/2013/02/DSC_006421.jpg HTTP/1.1" 200 492513 "http:https://images.search.yahoo.com/images/view;_ylt=AwrB8py9gdlTGEwADcSjzbkF;_ylu=X3oDMTI2cGZrZTA5BHNlYwNmcC1leHAEc2xrA2V4cARvaWQDNTA3NTRiMzYzY2E5OTEwNjBiMjc2YWJhMjkxMTEzY2MEZ3BvcwM0BGl0A2Jpbmc-?back=http%3A%2F%2Fus.yhs4.search.yahoo.com%2Fyhs%2Fsearch%3Fei%3DUTF-8%26p%3Dapartheid%2Bwall%2Bin%2Bpalestine%26type%3Dgrvydef%26param1%3D1%26param2%3Dsid%253Db01676f9c26355f014f8a9db87545d61%2526b%253DChrome%2526ip%253D71.163.72.113%2526p%253Dgroovorio%2526x%253DAC811262A746D3CD%2526dt%253DS940%2526f%253D7%2526a%253Dgrv_tuto1_14_30%26hsimp%3Dyhs-fullyhosted_003%26hspart%3Dironsource&w=588&h=387&imgurl=occupiedpalestine.files.wordpress.com%2F2012%2F08%2F5-peeking-through-the-wall.jpg%3Fw%3D588%26h%3D387&rurl=http%3A%2F%2Fwww.stopdebezetting.com%2Fwereldpers%2Fcompare-the-berlin-wall-vs-israel-s-apartheid-wall-in-palestine.html&size=49.0KB&name=...+%3Cb%3EApartheid+wall+in+Palestine%3C%2Fb%3E...+%7C+Or+you+go+peeking+through+the+%3Cb%3Ewall%3C%2Fb%3E&p=apartheid+wall+in+palestine&oid=50754b363ca991060b276aba291113cc&fr2=&fr=&tt=...+%3Cb%3EApartheid+wall+in+Palestine%3C%2Fb%3E...+%7C+Or+you+go+peeking+through+the+%3Cb%3Ewall%3C%2Fb%3E&b=0&ni=21&no=4&ts=&tab=organic&sigr=13evdtqdq&sigb=19k7nsjvb&sigi=12o2la1db&sigt=12lia2m0j&sign=12lia2m0j&.crumb=.yUtKgFI6DE&hsimp=yhs-fullyhosted_003&hspart=ironsource" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.125 Safari/537.36"""