Skip to content

Commit

Permalink
Revert "Enable PCRE UTF-8 validity string checks (JuliaLang#26731)" (J…
Browse files Browse the repository at this point in the history
…uliaLang#28259)

This reverts commit 627173b.
  • Loading branch information
KristofferC authored and StefanKarpinski committed Jul 25, 2018
1 parent 68744d9 commit 1033b88
Show file tree
Hide file tree
Showing 2 changed files with 2 additions and 18 deletions.
4 changes: 2 additions & 2 deletions base/regex.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@

include("pcre.jl")

const DEFAULT_COMPILER_OPTS = PCRE.UTF | PCRE.ALT_BSUX | PCRE.UCP
const DEFAULT_MATCH_OPTS = zero(UInt32)
const DEFAULT_COMPILER_OPTS = PCRE.UTF | PCRE.NO_UTF_CHECK | PCRE.ALT_BSUX | PCRE.UCP
const DEFAULT_MATCH_OPTS = PCRE.NO_UTF_CHECK

mutable struct Regex
pattern::String
Expand Down
16 changes: 0 additions & 16 deletions test/regex.jl
Original file line number Diff line number Diff line change
Expand Up @@ -58,22 +58,6 @@ end
# Proper unicode handling
@test match(r"∀∀", "∀x∀∀∀").match == "∀∀"

@test_throws ErrorException match(r"a", "\xe2\x88") # 1 byte missing at end
@test_throws ErrorException match(r"a", "\xe2\x08\x80") # byte 2 top bits not 0x80
@test_throws ErrorException match(r"a", "\xf8\x89\x89\x80\x80") # 5-byte character is not allowed (RFC 3629)
@test_throws ErrorException match(r"a", "\xf4\x9f\xbf\xbf") # code points greater than 0x10ffff are not defined
@test_throws ErrorException match(r"a", "\Udfff") # code points 0xd800-0xdfff are not defined
@test_throws ErrorException match(r"a", "\xc0\x80") # overlong 2-byte sequence
@test_throws ErrorException match(r"a", "\xff") # illegal byte (0xfe or 0xff)

@test_throws ErrorException Regex("\xe2\x88") # 1 byte missing at end
@test_throws ErrorException Regex("\xe2\x08\x80") # byte 2 top bits not 0x80
@test_throws ErrorException Regex("\xf8\x89\x89\x80\x80") # 5-byte character is not allowed (RFC 3629)
@test_throws ErrorException Regex("\xf4\x9f\xbf\xbf") # code points greater than 0x10ffff are not defined
@test_throws ErrorException Regex("\Udfff") # code points 0xd800-0xdfff are not defined
@test_throws ErrorException Regex("\xc0\x80") # overlong 2-byte sequence
@test_throws ErrorException Regex("\xff") # illegal byte (0xfe or 0xff)

# 'a' flag to disable UCP
@test match(r"\w+", "Düsseldorf").match == "Düsseldorf"
@test match(r"\w+"a, "Düsseldorf").match == "D"

0 comments on commit 1033b88

Please sign in to comment.