Skip to content

Commit

Permalink
Unicode-compliant islower/uppercase (JuliaLang#38574)
Browse files Browse the repository at this point in the history
* Unicode-compliant islower/uppercase

* don't test isletter for non-L* letters

* include titlecase in alphas test

* add news
  • Loading branch information
stevengj authored and ElOceanografo committed May 4, 2021
1 parent e2cc689 commit e729741
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 15 deletions.
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ New library features
Standard library changes
------------------------

* `islowercase` and `isuppercase` are now compliant with the Unicode lower/uppercase categories ([#38574]).

#### Package Manager

Expand Down
17 changes: 6 additions & 11 deletions base/strings/unicode.jl
Original file line number Diff line number Diff line change
Expand Up @@ -280,9 +280,8 @@ isassigned(c) = UTF8PROC_CATEGORY_CN < category_code(c) <= UTF8PROC_CATEGORY_CO
"""
islowercase(c::AbstractChar) -> Bool
Tests whether a character is a lowercase letter.
A character is classified as lowercase if it belongs to Unicode category Ll,
Letter: Lowercase.
Tests whether a character is a lowercase letter (according to the Unicode
standard's `Lowercase` derived property).
See also: [`isuppercase`](@ref).
Expand All @@ -298,16 +297,15 @@ julia> islowercase('❤')
false
```
"""
islowercase(c::AbstractChar) = category_code(c) == UTF8PROC_CATEGORY_LL
islowercase(c::AbstractChar) = ismalformed(c) ? false : Bool(ccall(:utf8proc_islower, Cint, (UInt32,), UInt32(c)))

# true for Unicode upper and mixed case

"""
isuppercase(c::AbstractChar) -> Bool
Tests whether a character is an uppercase letter.
A character is classified as uppercase if it belongs to Unicode category Lu,
Letter: Uppercase, or Lt, Letter: Titlecase.
Tests whether a character is an uppercase letter (according to the Unicode
standard's `Uppercase` derived property).
See also: [`islowercase`](@ref).
Expand All @@ -323,10 +321,7 @@ julia> isuppercase('❤')
false
```
"""
function isuppercase(c::AbstractChar)
cat = category_code(c)
cat == UTF8PROC_CATEGORY_LU || cat == UTF8PROC_CATEGORY_LT
end
isuppercase(c::AbstractChar) = ismalformed(c) ? false : Bool(ccall(:utf8proc_isupper, Cint, (UInt32,), UInt32(c)))

"""
iscased(c::AbstractChar) -> Bool
Expand Down
11 changes: 7 additions & 4 deletions stdlib/Unicode/test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -93,25 +93,28 @@ end
@testset "#5939 uft8proc character predicates" begin
alower=['a', 'd', 'j', 'y', 'z']
ulower=['α', 'β', 'γ', 'δ', 'ф', 'я']
for c in vcat(alower,ulower)
for c in vcat(alower,ulower,['ª'])
@test islowercase(c) == true
@test isuppercase(c) == false
@test isdigit(c) == false
@test isnumeric(c) == false
end

aupper=['A', 'D', 'J', 'Y', 'Z']
uupper= ['Δ', 'Γ', 'Π', 'Ψ', 'Dž', 'Ж', 'Д']
uupper= ['Δ', 'Γ', 'Π', 'Ψ', 'Ж', 'Д']

for c in vcat(aupper,uupper)
for c in vcat(aupper,uupper,[''])
@test islowercase(c) == false
@test isuppercase(c) == true
@test isdigit(c) == false
@test isnumeric(c) == false
end

@test !isuppercase('Dž') # titlecase is not uppercase
@test Base.Unicode.iscased('Dž') # but is "cased"

nocase=['א','']
alphas=vcat(alower,ulower,aupper,uupper,nocase)
alphas=vcat(alower,ulower,aupper,uupper,nocase,['Dž'])

for c in alphas
@test isletter(c) == true
Expand Down

0 comments on commit e729741

Please sign in to comment.