allow convert from RegexMatch to Dict/NamedTuple (JuliaLang#50988)

Added implementation + tests, will add to docs if ok --------- Co-authored-by: Dilum Aluthge <[email protected]> Co-authored-by: Jeff Bezanson <[email protected]>
language-core · Mar 4, 2024 · bc2212c · bc2212c
1 parent 7179050
commit bc2212c
Show file tree

Hide file tree

Showing 3 changed files with 58 additions and 3 deletions.
diff --git a/NEWS.md b/NEWS.md
@@ -41,8 +41,26 @@ New library functions
 New library features
 --------------------
 
+* `invmod(n, T)` where `T` is a native integer type now computes the modular inverse of `n` in the modular integer ring that `T` defines ([#52180]).
+* `invmod(n)` is an abbreviation for `invmod(n, typeof(n))` for native integer types ([#52180]).
+* `replace(string, pattern...)` now supports an optional `IO` argument to
+ write the output to a stream rather than returning a string ([#48625]).
+* `sizehint!(s, n)` now supports an optional `shrink` argument to disable shrinking ([#51929]).
+* New function `Docs.hasdoc(module, symbol)` tells whether a name has a docstring ([#52139]).
+* New function `Docs.undocumented_names(module)` returns a module's undocumented public names ([#52413]).
+* Passing an `IOBuffer` as a stdout argument for `Process` spawn now works as
+ expected, synchronized with `wait` or `success`, so a `Base.BufferStream` is
+ no longer required there for correctness to avoid data races ([#52461]).
+* After a process exits, `closewrite` will no longer be automatically called on
+ the stream passed to it. Call `wait` on the process instead to ensure the
+ content is fully written, then call `closewrite` manually to avoid
+ data-races. Or use the callback form of `open` to have all that handled
+ automatically.
+* `@timed` now additionally returns the elapsed compilation and recompilation time ([#52889])
+* `filter` can now act on a `NamedTuple` ([#50795]).
 * `tempname` can now take a suffix string to allow the file name to include a suffix and include that suffix in
  the uniquing checking ([#53474])
+* `RegexMatch` objects can now be used to construct `NamedTuple`s and `Dict`s ([#50988])
 
 Standard library changes
 ------------------------

diff --git a/base/regex.jl b/base/regex.jl
@@ -188,6 +188,11 @@ Methods that accept a `RegexMatch` object are defined for [`iterate`](@ref),
 [`getindex`](@ref), where keys are the names or numbers of a capture group.
 See [`keys`](@ref keys(::RegexMatch)) for more information.
 
+`Tuple(m)`, `NamedTuple(m)`, and `Dict(m)` can be used to construct more flexible collection types from `RegexMatch` objects.
+
+!!! compat "Julia 1.11"
+ Constructing NamedTuples and Dicts from RegexMatches requires Julia 1.11
+
 # Examples
 ```jldoctest
 julia> m = match(r"(?<hour>\\d+):(?<minute>\\d+)(am|pm)?", "11:30 in the morning")
@@ -210,6 +215,12 @@ julia> hr, min, ampm = m; # destructure capture groups by iteration
 
 julia> hr
 "11"
+
+julia> Dict(m)
+Dict{Any, Union{Nothing, SubString{String}}} with 3 entries:
+ "hour" => "11"
+ 3 => nothing
+ "minute" => "30"
 ```
 """
 struct RegexMatch{S<:AbstractString} <: AbstractMatch
@@ -289,6 +300,9 @@ iterate(m::RegexMatch, args...) = iterate(m.captures, args...)
 length(m::RegexMatch) = length(m.captures)
 eltype(m::RegexMatch) = eltype(m.captures)
 
+NamedTuple(m::RegexMatch) = NamedTuple{Symbol.(Tuple(keys(m)))}(values(m))
+Dict(m::RegexMatch) = Dict(pairs(m))
+
 function occursin(r::Regex, s::AbstractString; offset::Integer=0)
  compile(r)
  return PCRE.exec_r(r.regex, String(s), offset, r.match_options)
@@ -381,9 +395,13 @@ end
  match(r::Regex, s::AbstractString[, idx::Integer[, addopts]])
 
 Search for the first match of the regular expression `r` in `s` and return a [`RegexMatch`](@ref)
-object containing the match, or nothing if the match failed. The matching substring can be
-retrieved by accessing `m.match` and the captured sequences can be retrieved by accessing
-`m.captures` The optional `idx` argument specifies an index at which to start the search.
+object containing the match, or nothing if the match failed.
+The optional `idx` argument specifies an index at which to start the search.
+The matching substring can be retrieved by accessing `m.match`, the captured sequences can be retrieved by accessing `m.captures`.
+The resulting [`RegexMatch`](@ref) object can be used to construct other collections: e.g. `Tuple(m)`, `NamedTuple(m)`.
+
+!!! compat "Julia 1.11"
+ Constructing NamedTuples and Dicts requires Julia 1.11
 
 # Examples
 ```jldoctest

diff --git a/test/regex.jl b/test/regex.jl
@@ -101,15 +101,34 @@
  @test haskey(m, 3)
  @test !haskey(m, 44)
  @test (m[1], m[2], m[3]) == ("x", "y", "z")
+ @test Tuple(m) == ("x", "y", "z")
+ @test NamedTuple(m) == (var"1"="x", var"2"="y", var"3"="z")
+ @test Dict(m) == Dict([1=>"x", 2=>"y", 3=>"z"])
  @test sprint(show, m) == "RegexMatch(\"xyz\", 1=\"x\", 2=\"y\", 3=\"z\")"
  end
 
  # Named subpatterns
+ let m = match(r"(?<a>.)(?<c>.)(?<b>.)", "xyz")
+ @test haskey(m, :a)
+ @test haskey(m, "b")
+ @test !haskey(m, "foo")
+ @test (m[:a], m[:c], m["b"]) == ("x", "y", "z")
+ @test Tuple(m) == ("x", "y", "z")
+ @test NamedTuple(m) == (a="x", c="y", b="z")
+ @test Dict(m) == Dict(["a"=>"x", "c"=>"y", "b"=>"z"])
+ @test sprint(show, m) == "RegexMatch(\"xyz\", a=\"x\", c=\"y\", b=\"z\")"
+ @test keys(m) == ["a", "c", "b"]
+ end
+
+ # Named and unnamed subpatterns
  let m = match(r"(?<a>.)(.)(?<b>.)", "xyz")
  @test haskey(m, :a)
  @test haskey(m, "b")
  @test !haskey(m, "foo")
  @test (m[:a], m[2], m["b"]) == ("x", "y", "z")
+ @test Tuple(m) == ("x", "y", "z")
+ @test NamedTuple(m) == (a="x", var"2"="y", b="z")
+ @test Dict(m) == Dict(["a"=>"x", 2=>"y", "b"=>"z"])
  @test sprint(show, m) == "RegexMatch(\"xyz\", a=\"x\", 2=\"y\", b=\"z\")"
  @test keys(m) == ["a", 2, "b"]
  end