Skip to content

Commit

Permalink
base/char.jl: tweak doc strings
Browse files Browse the repository at this point in the history
  • Loading branch information
StefanKarpinski committed Sep 14, 2018
1 parent b9c7a72 commit fc04d73
Showing 1 changed file with 21 additions and 21 deletions.
42 changes: 21 additions & 21 deletions base/char.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

"""
The `AbstractChar` type is the supertype of all character implementations
in Julia. A character represents a Unicode code point, and can be converted
in Julia. A character represents a Unicode code point, and can be converted
to an integer via the [`codepoint`](@ref) function in order to obtain the
numerical value of the code point, or constructed from the same integer.
These numerical values determine how characters are compared with `<` and `==`,
Expand All @@ -11,7 +11,7 @@ method and a `T(::UInt32)` constructor, at minimum.
A given `AbstractChar` subtype may be capable of representing only a subset
of Unicode, in which case conversion from an unsupported `UInt32` value
may throw an error. Conversely, the built-in [`Char`](@ref) type represents
may throw an error. Conversely, the built-in [`Char`](@ref) type represents
a *superset* of Unicode (in order to losslessly encode invalid byte streams),
in which case conversion of a non-Unicode value *to* `UInt32` throws an error.
The [`isvalid`](@ref) function can be used to check which codepoints are
Expand All @@ -34,7 +34,7 @@ AbstractChar
Char(c::Union{Number,AbstractChar})
`Char` is a 32-bit [`AbstractChar`](@ref) type that is the default representation
of characters in Julia. `Char` is the type used for character literals like `'x'`
of characters in Julia. `Char` is the type used for character literals like `'x'`
and it is also the element type of [`String`](@ref).
In order to losslessly represent arbitrary byte streams stored in a `String`,
Expand All @@ -50,18 +50,18 @@ Char
(::Type{T})(x::AbstractChar) where {T<:Union{Number,AbstractChar}} = T(codepoint(x))
(::Type{T})(x::T) where {T<:AbstractChar} = x

codepoint(c::Char) = UInt32(c)

"""
codepoint(c::AbstractChar)
codepoint(c::AbstractChar) -> Integer
Return the Unicode codepoint (an unsigned integer) corresponding
to the character `c` (or throw an exception if `c` does not represent
a valid character). For `Char`, this is a `UInt32` value, but
a valid character). For `Char`, this is a `UInt32` value, but
`AbstractChar` types that represent only a subset of Unicode may
return a different-sized integer (e.g. `UInt8`).
"""
codepoint # defined for Char in boot.jl
function codepoint end

codepoint(c::Char) = UInt32(c)

struct InvalidCharError{T<:AbstractChar} <: Exception
char::T
Expand Down Expand Up @@ -91,7 +91,7 @@ end
# not to support malformed or overlong encodings.

"""
ismalformed(c::AbstractChar)
ismalformed(c::AbstractChar) -> Bool
Return `true` if `c` represents malformed (non-Unicode) data according to the
encoding used by `c`. Defaults to `false` for non-`Char` types. See also
Expand All @@ -100,9 +100,9 @@ encoding used by `c`. Defaults to `false` for non-`Char` types. See also
ismalformed(c::AbstractChar) = false

"""
isoverlong(c::AbstractChar)
isoverlong(c::AbstractChar) -> Bool
Return `true` if `c` represents an overlong UTF-8 sequence. Defaults
Return `true` if `c` represents an overlong UTF-8 sequence. Defaults
to `false` for non-`Char` types. See also [`decode_overlong`](@ref)
and [`show_invalid`](@ref).
"""
Expand All @@ -123,6 +123,15 @@ function UInt32(c::Char)
((u & 0x007f0000) >> 4) | ((u & 0x7f000000) >> 6)
end

"""
decode_overlong(c::AbstractChar) -> Integer
When [`isoverlong(c)`](@ref) is `true`, `decode_overlong(c)` returns
the Unicode codepoint value of `c`. `AbstractChar` implementations
that support overlong encodings should implement `Base.decode_overlong`.
"""
function decode_overlong end

function decode_overlong(c::Char)
u = reinterpret(UInt32, c)
l1 = leading_ones(u)
Expand All @@ -133,15 +142,6 @@ function decode_overlong(c::Char)
((u & 0x007f0000) >> 4) | ((u & 0x7f000000) >> 6)
end

"""
decode_overlong(c::AbstractChar)
When [`isoverlong(c)`](@ref) is `true`, `decode_overlong(c)` returns
the Unicode codepoint value of `c`. `AbstractChar` implementations
that support overlong encodings should implement `Base.decode_overlong`.
"""
decode_overlong

function Char(u::UInt32)
u < 0x80 && return reinterpret(Char, u << 24)
u < 0x00200000 || code_point_err(u)::Union{}
Expand Down Expand Up @@ -270,7 +270,7 @@ function show(io::IO, c::AbstractChar)
write(io, 0x27)
else # unprintable, well-formed, non-overlong Unicode
u = codepoint(c)
write(io, 0x27, 0x5c, c <= '\x7f' ? 0x78 : c <= '\uffff' ? 0x75 : 0x55)
write(io, 0x27, 0x5c, u <= 0x7f ? 0x78 : u <= 0xffff ? 0x75 : 0x55)
d = max(2, 8 - (leading_zeros(u) >> 2))
while 0 < d
write(io, hex_chars[((u >> ((d -= 1) << 2)) & 0xf) + 1])
Expand Down

0 comments on commit fc04d73

Please sign in to comment.