Better TwicePrecision utilities

halleysfifthinc · Aug 20, 2017 · 67a8605 · 67a8605
1 parent e701561
commit 67a8605
Show file tree

Hide file tree

Showing 7 changed files with 452 additions and 125 deletions.
diff --git a/base/float.jl b/base/float.jl
@@ -1,5 +1,7 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
+const IEEEFloat = Union{Float16, Float32, Float64}
+
 ## floating point traits ##
 
 """
@@ -605,7 +607,7 @@ uabs(x::Signed) = unsigned(abs(x))
 The result of `n` iterative applications of `nextfloat` to `x` if `n >= 0`, or `-n`
 applications of `prevfloat` if `n < 0`.
 """
-function nextfloat(f::Union{Float16,Float32,Float64}, d::Integer)
+function nextfloat(f::IEEEFloat, d::Integer)
  F = typeof(f)
  fumax = reinterpret(Unsigned, F(Inf))
  U = typeof(fumax)
@@ -711,12 +713,12 @@ end
 
 Test whether a floating point number is subnormal.
 """
-function issubnormal end
+function issubnormal(x::T) where {T<:IEEEFloat}
+ y = reinterpret(Unsigned, x)
+ (y & exponent_mask(T) == 0) & (y & significand_mask(T) != 0)
+end
 
 @eval begin
- issubnormal(x::Float32) = (abs(x) < $(bitcast(Float32, 0x00800000))) & (x!=0)
- issubnormal(x::Float64) = (abs(x) < $(bitcast(Float64, 0x0010000000000000))) & (x!=0)
-
  typemin(::Type{Float16}) = $(bitcast(Float16, 0xfc00))
  typemax(::Type{Float16}) = $(Inf16)
  typemin(::Type{Float32}) = $(-Inf32)
@@ -864,32 +866,11 @@ exponent_half(::Type{Float16}) = 0x3800
 significand_mask(::Type{Float16}) = 0x03ff
 
 # integer size of float
-fpinttype(::Type{Float64}) = UInt64
-fpinttype(::Type{Float32}) = UInt32
-fpinttype(::Type{Float16}) = UInt16
-
-## TwicePrecision utilities
-# The numeric constants are half the number of bits in the mantissa
-for (F, T, n) in ((Float16, UInt16, 5), (Float32, UInt32, 12), (Float64, UInt64, 26))
- @eval begin
- function truncbits(x::$F, nb)
- @_inline_meta
- truncmask(x, typemax($T) << nb)
- end
- function truncmask(x::$F, mask)
- @_inline_meta
- reinterpret($F, mask & reinterpret($T, x))
- end
- function splitprec(x::$F)
- @_inline_meta
- hi = truncmask(x, typemax($T) << $n)
- hi, x-hi
- end
- end
-end
+uinttype(::Type{Float64}) = UInt64
+uinttype(::Type{Float32}) = UInt32
+uinttype(::Type{Float16}) = UInt16
 
-truncbits(x, nb) = x
-truncmask(x, mask) = x
+Base.iszero(x::Float16) = reinterpret(UInt16, x) & ~sign_mask(Float16) == 0x0000
 
 ## Array operations on floating point numbers ##
 

diff --git a/base/math.jl b/base/math.jl
@@ -21,11 +21,11 @@ import Base: log, exp, sin, cos, tan, sinh, cosh, tanh, asin,
  exp10, expm1, log1p
 
 using Base: sign_mask, exponent_mask, exponent_one,
- exponent_half, fpinttype, significand_mask
+ exponent_half, uinttype, significand_mask
 
 using Core.Intrinsics: sqrt_llvm
 
-const IEEEFloat = Union{Float16, Float32, Float64}
+using Base.IEEEFloat
 
 @noinline function throw_complex_domainerror(f, x)
  throw(DomainError(x, string("$f will only return a complex result if called with a ",
@@ -588,7 +588,7 @@ function ldexp(x::T, e::Integer) where T<:IEEEFloat
  return flipsign(T(Inf), x)
  end
  if k > 0 # normal case
- xu = (xu & ~exponent_mask(T)) | (rem(k, fpinttype(T)) << significand_bits(T))
+ xu = (xu & ~exponent_mask(T)) | (rem(k, uinttype(T)) << significand_bits(T))
  return reinterpret(T, xu)
  else # subnormal case
  if k <= -significand_bits(T) # underflow
@@ -598,7 +598,7 @@ function ldexp(x::T, e::Integer) where T<:IEEEFloat
  end
  k += significand_bits(T)
  z = T(2.0)^-significand_bits(T)
- xu = (xu & ~exponent_mask(T)) | (rem(k, fpinttype(T)) << significand_bits(T))
+ xu = (xu & ~exponent_mask(T)) | (rem(k, uinttype(T)) << significand_bits(T))
  return z*reinterpret(T, xu)
  end
 end

diff --git a/base/special/exp.jl b/base/special/exp.jl
@@ -117,11 +117,11 @@ function exp(x::T) where T<:Union{Float32,Float64}
  if k > -significand_bits(T)
  # multiply by 2.0 first to prevent overflow, which helps extends the range
  k == exponent_max(T) && return y * T(2.0) * T(2.0)^(exponent_max(T) - 1)
- twopk = reinterpret(T, rem(exponent_bias(T) + k, fpinttype(T)) << significand_bits(T))
+ twopk = reinterpret(T, rem(exponent_bias(T) + k, uinttype(T)) << significand_bits(T))
  return y*twopk
  else
  # add significand_bits(T) + 1 to lift the range outside the subnormals
- twopk = reinterpret(T, rem(exponent_bias(T) + significand_bits(T) + 1 + k, fpinttype(T)) << significand_bits(T))
+ twopk = reinterpret(T, rem(exponent_bias(T) + significand_bits(T) + 1 + k, uinttype(T)) << significand_bits(T))
  return y * twopk * T(2.0)^(-significand_bits(T) - 1)
  end
  elseif xa < reinterpret(Unsigned, exp_small_thres(T)) # |x| < exp_small_thres

diff --git a/base/special/exp10.jl b/base/special/exp10.jl
@@ -119,11 +119,11 @@ function exp10(x::T) where T<:Union{Float32,Float64}
  if k > -significand_bits(T)
  # multiply by 2.0 first to prevent overflow, extending the range
  k == exponent_max(T) && return y * T(2.0) * T(2.0)^(exponent_max(T) - 1)
- twopk = reinterpret(T, rem(exponent_bias(T) + k, fpinttype(T)) << significand_bits(T))
+ twopk = reinterpret(T, rem(exponent_bias(T) + k, uinttype(T)) << significand_bits(T))
  return y*twopk
  else
  # add significand_bits(T) + 1 to lift the range outside the subnormals
- twopk = reinterpret(T, rem(exponent_bias(T) + significand_bits(T) + 1 + k, fpinttype(T)) << significand_bits(T))
+ twopk = reinterpret(T, rem(exponent_bias(T) + significand_bits(T) + 1 + k, uinttype(T)) << significand_bits(T))
  return y * twopk * T(2.0)^(-significand_bits(T) - 1)
  end
  elseif xa < reinterpret(Unsigned, exp10_small_thres(T)) # |x| < exp10_small_thres

diff --git a/base/sysimg.jl b/base/sysimg.jl
@@ -84,7 +84,6 @@ include("tuple.jl")
 include("pair.jl")
 include("traits.jl")
 include("range.jl")
-include("twiceprecision.jl")
 include("expr.jl")
 include("error.jl")
 
@@ -135,6 +134,7 @@ include("hashing.jl")
 include("rounding.jl")
 importall .Rounding
 include("float.jl")
+include("twiceprecision.jl")
 include("complex.jl")
 include("rational.jl")
 include("multinverses.jl")