Skip to content

Commit

Permalink
add *(::Union{Regex, AbstractString, AbstractChar}...) (#23422)
Browse files Browse the repository at this point in the history
  • Loading branch information
rfourquet authored and StefanKarpinski committed Apr 29, 2019
1 parent 458380c commit 0140ce8
Show file tree
Hide file tree
Showing 4 changed files with 148 additions and 4 deletions.
2 changes: 1 addition & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ New library functions
Standard library changes
------------------------

* Cmd interpolation (``` `$(x::Cmd) a b c` ``` where) now propagates `x`'s process flags (environment, flags, working directory, etc) if `x` is the first interpolant and errors otherwise ([#24353]).
* `Regex` can now be multiplied (`*`) and exponentiated (`^`), like strings ([#23422]).

#### LinearAlgebra

Expand Down
104 changes: 104 additions & 0 deletions base/regex.jl
Original file line number Diff line number Diff line change
Expand Up @@ -541,3 +541,107 @@ function hash(r::Regex, h::UInt)
h = hash(r.compile_options, h)
h = hash(r.match_options, h)
end

## String operations ##

"""
*(s::Regex, t::Union{Regex,AbstractString,AbstractChar}) -> Regex
*(s::Union{Regex,AbstractString,AbstractChar}, t::Regex) -> Regex
Concatenate regexes, strings and/or characters, producing a [`Regex`](@ref).
String and character arguments must be matched exactly in the resulting regex,
meaning that the contained characters are devoid of any special meaning
(they are quoted with "\\Q" and "\\E").
!!! compat "Julia 1.3"
This method requires at least Julia 1.3.
# Examples
```jldoctest
julia> match(r"Hello|Good bye" * ' ' * "world", "Hello world")
RegexMatch("Hello world")
julia> r = r"a|b" * "c|d"
r"(?:a|b)\\Qc|d\\E"
julia> match(r, "ac") == nothing
true
julia> match(r, "ac|d")
RegexMatch("ac|d")
```
"""
function *(r1::Union{Regex,AbstractString,AbstractChar}, rs::Union{Regex,AbstractString,AbstractChar}...)
mask = PCRE.CASELESS | PCRE.MULTILINE | PCRE.DOTALL | PCRE.EXTENDED # imsx
match_opts = nothing # all args must agree on this
compile_opts = nothing # all args must agree on this
shared = mask
for r in (r1, rs...)
r isa Regex || continue
if match_opts == nothing
match_opts = r.match_options
compile_opts = r.compile_options & ~mask
else
r.match_options == match_opts &&
r.compile_options & ~mask == compile_opts ||
throw(ArgumentError("cannot multiply regexes: incompatible options"))
end
shared &= r.compile_options
end
unshared = mask & ~shared
Regex(string(wrap_string(r1, unshared), wrap_string.(rs, Ref(unshared))...), compile_opts | shared, match_opts)
end

*(r::Regex) = r # avoids wrapping r in a useless subpattern

wrap_string(r::Regex, unshared::UInt32) = string("(?", regex_opts_str(r.compile_options & unshared), ':', r.pattern, ')')
# if s contains raw"\E", split '\' and 'E' within two distinct \Q...\E groups:
wrap_string(s::AbstractString, ::UInt32) = string("\\Q", replace(s, raw"\E" => raw"\\E\QE"), "\\E")
wrap_string(s::AbstractChar, ::UInt32) = string("\\Q", s, "\\E")

regex_opts_str(opts) = (isassigned(_regex_opts_str) ? _regex_opts_str[] : init_regex())[opts]

# UInt32 to String mapping for some compile options
const _regex_opts_str = Ref{ImmutableDict{UInt32,String}}()

init_regex() = _regex_opts_str[] = foldl(0:15, init=ImmutableDict{UInt32,String}()) do d, o
opt = UInt32(0)
str = ""
if o & 1 != 0
opt |= PCRE.CASELESS
str *= 'i'
end
if o & 2 != 0
opt |= PCRE.MULTILINE
str *= 'm'
end
if o & 4 != 0
opt |= PCRE.DOTALL
str *= 's'
end
if o & 8 != 0
opt |= PCRE.EXTENDED
str *= 'x'
end
ImmutableDict(d, opt => str)
end


"""
^(s::Regex, n::Integer)
Repeat a regex `n` times.
!!! compat "Julia 1.3"
This method requires at least Julia 1.3.
# Examples
```jldoctest
julia> r"Test "^2
r"(?:Test ){2}"
julia> match(r"Test "^2, "Test Test ")
RegexMatch("Test Test ")
```
"""
^(r::Regex, i::Integer) = Regex(string("(?:", r.pattern, "){$i}"), r.compile_options, r.match_options)
6 changes: 3 additions & 3 deletions stdlib/REPL/src/REPL.jl
Original file line number Diff line number Diff line change
Expand Up @@ -925,6 +925,7 @@ function setup_interface(
oldpos = firstindex(input)
firstline = true
isprompt_paste = false
jl_prompt_len = 7 # "julia> "
while oldpos <= lastindex(input) # loop until all lines have been executed
if JL_PROMPT_PASTE[]
# Check if the next statement starts with "julia> ", in that case
Expand All @@ -934,7 +935,6 @@ function setup_interface(
oldpos >= sizeof(input) && return
end
# Check if input line starts with "julia> ", remove it if we are in prompt paste mode
jl_prompt_len = 7
if (firstline || isprompt_paste) && startswith(SubString(input, oldpos), JULIA_PROMPT)
isprompt_paste = true
oldpos += jl_prompt_len
Expand All @@ -959,7 +959,7 @@ function setup_interface(
tail = lstrip(tail)
end
if isprompt_paste # remove indentation spaces corresponding to the prompt
tail = replace(tail, r"^ {7}"m => "") # 7: jl_prompt_len
tail = replace(tail, r"^"m * ' '^jl_prompt_len => "")
end
LineEdit.replace_line(s, tail, true)
LineEdit.refresh_line(s)
Expand All @@ -969,7 +969,7 @@ function setup_interface(
line = strip(input[oldpos:prevind(input, pos)])
if !isempty(line)
if isprompt_paste # remove indentation spaces corresponding to the prompt
line = replace(line, r"^ {7}"m => "") # 7: jl_prompt_len
line = replace(line, r"^"m * ' '^jl_prompt_len => "")
end
# put the line on the screen and history
LineEdit.replace_line(s, line)
Expand Down
40 changes: 40 additions & 0 deletions test/regex.jl
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,46 @@
@test !endswith("abc", r"C")
@test endswith("abc", r"C"i)

@testset "multiplication & exponentiation" begin
@test *(r"a") == r"a"

@test r"a" * r"b" == r"(?:a)(?:b)"
@test r"a" * "b" == r"(?:a)\Qb\E"
@test r"a" * 'b' == r"(?:a)\Qb\E"
@test "a" * r"b" == r"\Qa\E(?:b)"
@test 'a' * r"b" == r"\Qa\E(?:b)"
for a = (r"a", "a", 'a'),
b = (r"b", "b", 'b'),
c = (r"c", "c", 'c')
a isa Regex || b isa Regex || c isa Regex || continue
@test match(a * b * c, "abc") !== nothing
end
for s = ["thiscat", "thishat", "thatcat", "thathat"]
@test match(r"this|that" * r"cat|hat", s) !== nothing
end

@test r"a"i * r"b"i == r"(?:a)(?:b)"i
@test r"a"i * "b" == r"(?:a)\Qb\E"i
@test r"a"i * 'b' == r"(?:a)\Qb\E"i
@test "a" * r"b"i == r"\Qa\E(?:b)"i
@test 'a' * r"b"i == r"\Qa\E(?:b)"i

@test r"a"i * r"b"m == r"(?i:a)(?m:b)"
@test r"a"im * r"b"m == r"(?i:a)(?:b)"m
@test r"a"im * r"b"im == r"(?:a)(?:b)"im
@test r"a"im * r"b"i == r"(?m:a)(?:b)"i

r = r"" * raw"a\Eb|c"
@test match(r, raw"a\Eb|c").match == raw"a\Eb|c"
@test match(r, raw"c") == nothing

# error for really incompatible options
@test_throws ArgumentError r"a" * Regex("b", Base.DEFAULT_COMPILER_OPTS & ~Base.PCRE.UCP, Base.DEFAULT_MATCH_OPTS)
@test_throws ArgumentError r"a" * Regex("b", Base.DEFAULT_COMPILER_OPTS, Base.DEFAULT_MATCH_OPTS & ~Base.PCRE.NO_UTF_CHECK)

@test r"this|that"^2 == r"(?:this|that){2}"
end

# Test that PCRE throws the correct kind of error
# TODO: Uncomment this once the corresponding change has propagated to CI
#@test_throws ErrorException Base.PCRE.info(C_NULL, Base.PCRE.INFO_NAMECOUNT, UInt32)
Expand Down

0 comments on commit 0140ce8

Please sign in to comment.