Skip to content

Commit

Permalink
Better TwicePrecision utilities
Browse files Browse the repository at this point in the history
  • Loading branch information
timholy committed Aug 20, 2017
1 parent e701561 commit 67a8605
Show file tree
Hide file tree
Showing 7 changed files with 452 additions and 125 deletions.
41 changes: 11 additions & 30 deletions base/float.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# This file is a part of Julia. License is MIT: https://julialang.org/license

const IEEEFloat = Union{Float16, Float32, Float64}

## floating point traits ##

"""
Expand Down Expand Up @@ -605,7 +607,7 @@ uabs(x::Signed) = unsigned(abs(x))
The result of `n` iterative applications of `nextfloat` to `x` if `n >= 0`, or `-n`
applications of `prevfloat` if `n < 0`.
"""
function nextfloat(f::Union{Float16,Float32,Float64}, d::Integer)
function nextfloat(f::IEEEFloat, d::Integer)
F = typeof(f)
fumax = reinterpret(Unsigned, F(Inf))
U = typeof(fumax)
Expand Down Expand Up @@ -711,12 +713,12 @@ end
Test whether a floating point number is subnormal.
"""
function issubnormal end
function issubnormal(x::T) where {T<:IEEEFloat}
y = reinterpret(Unsigned, x)
(y & exponent_mask(T) == 0) & (y & significand_mask(T) != 0)
end

@eval begin
issubnormal(x::Float32) = (abs(x) < $(bitcast(Float32, 0x00800000))) & (x!=0)
issubnormal(x::Float64) = (abs(x) < $(bitcast(Float64, 0x0010000000000000))) & (x!=0)

typemin(::Type{Float16}) = $(bitcast(Float16, 0xfc00))
typemax(::Type{Float16}) = $(Inf16)
typemin(::Type{Float32}) = $(-Inf32)
Expand Down Expand Up @@ -864,32 +866,11 @@ exponent_half(::Type{Float16}) = 0x3800
significand_mask(::Type{Float16}) = 0x03ff

# integer size of float
fpinttype(::Type{Float64}) = UInt64
fpinttype(::Type{Float32}) = UInt32
fpinttype(::Type{Float16}) = UInt16

## TwicePrecision utilities
# The numeric constants are half the number of bits in the mantissa
for (F, T, n) in ((Float16, UInt16, 5), (Float32, UInt32, 12), (Float64, UInt64, 26))
@eval begin
function truncbits(x::$F, nb)
@_inline_meta
truncmask(x, typemax($T) << nb)
end
function truncmask(x::$F, mask)
@_inline_meta
reinterpret($F, mask & reinterpret($T, x))
end
function splitprec(x::$F)
@_inline_meta
hi = truncmask(x, typemax($T) << $n)
hi, x-hi
end
end
end
uinttype(::Type{Float64}) = UInt64
uinttype(::Type{Float32}) = UInt32
uinttype(::Type{Float16}) = UInt16

truncbits(x, nb) = x
truncmask(x, mask) = x
Base.iszero(x::Float16) = reinterpret(UInt16, x) & ~sign_mask(Float16) == 0x0000

## Array operations on floating point numbers ##

Expand Down
8 changes: 4 additions & 4 deletions base/math.jl
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,11 @@ import Base: log, exp, sin, cos, tan, sinh, cosh, tanh, asin,
exp10, expm1, log1p

using Base: sign_mask, exponent_mask, exponent_one,
exponent_half, fpinttype, significand_mask
exponent_half, uinttype, significand_mask

using Core.Intrinsics: sqrt_llvm

const IEEEFloat = Union{Float16, Float32, Float64}
using Base.IEEEFloat

@noinline function throw_complex_domainerror(f, x)
throw(DomainError(x, string("$f will only return a complex result if called with a ",
Expand Down Expand Up @@ -588,7 +588,7 @@ function ldexp(x::T, e::Integer) where T<:IEEEFloat
return flipsign(T(Inf), x)
end
if k > 0 # normal case
xu = (xu & ~exponent_mask(T)) | (rem(k, fpinttype(T)) << significand_bits(T))
xu = (xu & ~exponent_mask(T)) | (rem(k, uinttype(T)) << significand_bits(T))
return reinterpret(T, xu)
else # subnormal case
if k <= -significand_bits(T) # underflow
Expand All @@ -598,7 +598,7 @@ function ldexp(x::T, e::Integer) where T<:IEEEFloat
end
k += significand_bits(T)
z = T(2.0)^-significand_bits(T)
xu = (xu & ~exponent_mask(T)) | (rem(k, fpinttype(T)) << significand_bits(T))
xu = (xu & ~exponent_mask(T)) | (rem(k, uinttype(T)) << significand_bits(T))
return z*reinterpret(T, xu)
end
end
Expand Down
4 changes: 2 additions & 2 deletions base/special/exp.jl
Original file line number Diff line number Diff line change
Expand Up @@ -117,11 +117,11 @@ function exp(x::T) where T<:Union{Float32,Float64}
if k > -significand_bits(T)
# multiply by 2.0 first to prevent overflow, which helps extends the range
k == exponent_max(T) && return y * T(2.0) * T(2.0)^(exponent_max(T) - 1)
twopk = reinterpret(T, rem(exponent_bias(T) + k, fpinttype(T)) << significand_bits(T))
twopk = reinterpret(T, rem(exponent_bias(T) + k, uinttype(T)) << significand_bits(T))
return y*twopk
else
# add significand_bits(T) + 1 to lift the range outside the subnormals
twopk = reinterpret(T, rem(exponent_bias(T) + significand_bits(T) + 1 + k, fpinttype(T)) << significand_bits(T))
twopk = reinterpret(T, rem(exponent_bias(T) + significand_bits(T) + 1 + k, uinttype(T)) << significand_bits(T))
return y * twopk * T(2.0)^(-significand_bits(T) - 1)
end
elseif xa < reinterpret(Unsigned, exp_small_thres(T)) # |x| < exp_small_thres
Expand Down
4 changes: 2 additions & 2 deletions base/special/exp10.jl
Original file line number Diff line number Diff line change
Expand Up @@ -119,11 +119,11 @@ function exp10(x::T) where T<:Union{Float32,Float64}
if k > -significand_bits(T)
# multiply by 2.0 first to prevent overflow, extending the range
k == exponent_max(T) && return y * T(2.0) * T(2.0)^(exponent_max(T) - 1)
twopk = reinterpret(T, rem(exponent_bias(T) + k, fpinttype(T)) << significand_bits(T))
twopk = reinterpret(T, rem(exponent_bias(T) + k, uinttype(T)) << significand_bits(T))
return y*twopk
else
# add significand_bits(T) + 1 to lift the range outside the subnormals
twopk = reinterpret(T, rem(exponent_bias(T) + significand_bits(T) + 1 + k, fpinttype(T)) << significand_bits(T))
twopk = reinterpret(T, rem(exponent_bias(T) + significand_bits(T) + 1 + k, uinttype(T)) << significand_bits(T))
return y * twopk * T(2.0)^(-significand_bits(T) - 1)
end
elseif xa < reinterpret(Unsigned, exp10_small_thres(T)) # |x| < exp10_small_thres
Expand Down
2 changes: 1 addition & 1 deletion base/sysimg.jl
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,6 @@ include("tuple.jl")
include("pair.jl")
include("traits.jl")
include("range.jl")
include("twiceprecision.jl")
include("expr.jl")
include("error.jl")

Expand Down Expand Up @@ -135,6 +134,7 @@ include("hashing.jl")
include("rounding.jl")
importall .Rounding
include("float.jl")
include("twiceprecision.jl")
include("complex.jl")
include("rational.jl")
include("multinverses.jl")
Expand Down
Loading

0 comments on commit 67a8605

Please sign in to comment.