Faster implementation of Float64(::BigInt), Float32(::BigInt) and Flo…

…at16(::BigInt) (#31502)
JuliaLang · Apr 22, 2019 · 85daeab · 85daeab
1 parent 87e93b2
commit 85daeab
Show file tree

Hide file tree

Showing 2 changed files with 93 additions and 17 deletions.
diff --git a/base/gmp.jl b/base/gmp.jl
@@ -374,26 +374,72 @@ function (::Type{T})(n::BigInt, ::RoundingMode{:Up}) where T<:CdoubleMax
  x < n ? nextfloat(x) : x
 end
 
-function (::Type{T})(n::BigInt, ::RoundingMode{:Nearest}) where T<:CdoubleMax
- x = T(n,RoundToZero)
- if maxintfloat(T) <= abs(x) < T(Inf)
- r = n-BigInt(x)
- h = eps(x)/2
- if iseven(reinterpret(Unsigned,x)) # check if last bit is odd/even
- if r < -h
- return prevfloat(x)
- elseif r > h
- return nextfloat(x)
- end
+function Float64(x::BigInt, ::RoundingMode{:Nearest})
+ x == 0 && return 0.0
+ xsize = abs(x.size)
+ if xsize*BITS_PER_LIMB > 1024
+ z = Inf64
+ elseif xsize == 1
+ z = Float64(unsafe_load(x.d))
+ elseif Limb == UInt32 && xsize == 2
+ z = Float64((unsafe_load(x.d, 2) % UInt64) << BITS_PER_LIMB + unsafe_load(x.d))
+ else
+ y1 = unsafe_load(x.d, xsize) % UInt64
+ n = 64 - leading_zeros(y1)
+ # load first 54(1 + 52 bits of fraction + 1 for rounding)
+ y = y1 >> (n - (precision(Float64)+1))
+ if Limb == UInt64
+ y += n > precision(Float64) ? 0 : (unsafe_load(x.d, xsize-1) >> (10+n))
  else
- if r <= -h
- return prevfloat(x)
- elseif r >= h
- return nextfloat(x)
- end
+ y += (unsafe_load(x.d, xsize-1) % UInt64) >> (n-22)
+ y += n > (precision(Float64) - 32) ? 0 : (unsafe_load(x.d, xsize-2) >> (10+n))
  end
+ y = (y + 1) >> 1 # round, ties up
+ y &= ~UInt64(trailing_zeros(x) == (n-54 + (xsize-1)*BITS_PER_LIMB)) # fix last bit to round to even
+ d = ((n+1021) % UInt64) << 52
+ z = reinterpret(Float64, d+y)
+ z = ldexp(z, (xsize-1)*BITS_PER_LIMB)
+ end
+ return flipsign(z, x.size)
+end
+
+function Float32(x::BigInt, ::RoundingMode{:Nearest})
+ x == 0 && return 0f0
+ xsize = abs(x.size)
+ if xsize*BITS_PER_LIMB > 128
+ z = Inf32
+ elseif xsize == 1
+ z = Float32(unsafe_load(x.d))
+ else
+ y1 = unsafe_load(x.d, xsize)
+ n = BITS_PER_LIMB - leading_zeros(y1)
+ # load first 25(1 + 23 bits of fraction + 1 for rounding)
+ y = (y1 >> (n - (precision(Float32)+1))) % UInt32
+ y += (n > precision(Float32) ? 0 : unsafe_load(x.d, xsize-1) >> (BITS_PER_LIMB - (25-n))) % UInt32
+ y = (y + one(UInt32)) >> 1 # round, ties up
+ y &= ~UInt32(trailing_zeros(x) == (n-25 + (xsize-1)*BITS_PER_LIMB)) # fix last bit to round to even
+ d = ((n+125) % UInt32) << 23
+ z = reinterpret(Float32, d+y)
+ z = ldexp(z, (xsize-1)*BITS_PER_LIMB)
+ end
+ return flipsign(z, x.size)
+end
+
+function Float16(x::BigInt, ::RoundingMode{:Nearest})
+ x == 0 && return Float16(0.0)
+ y1 = unsafe_load(x.d)
+ n = BITS_PER_LIMB - leading_zeros(y1)
+ if n > 16 || abs(x.size) > 1
+ z = Inf16
+ else
+ # load first 12(1 + 10 bits for fraction + 1 for rounding)
+ y = (y1 >> (n - (precision(Float16)+1))) % UInt16
+ y = (y + one(UInt16)) >> 1 # round, ties up
+ y &= ~UInt16(trailing_zeros(x) == (n-12)) # fix last bit to round to even
+ d = ((n+13) % UInt16) << 10
+ z = reinterpret(Float16, d+y)
  end
- x
+ return flipsign(z, x.size)
 end
 
 Float64(n::BigInt) = Float64(n, RoundNearest)

diff --git a/test/bigint.jl b/test/bigint.jl
@@ -440,3 +440,33 @@ end
  @test big(Int64(-9223372036854775808)) == big"-9223372036854775808"
  @test big(Int128(-170141183460469231731687303715884105728)) == big"-170141183460469231731687303715884105728"
 end
+
+@testset "conversion to Float" begin
+ x = big"2"^256 + big"2"^(256-53) + 1
+ @test Float64(x) == reinterpret(Float64, 0x4ff0000000000001)
+ @test Float64(-x) == -Float64(x)
+ x = (x >> 1) + 1
+ @test Float64(x) == reinterpret(Float64, 0x4fe0000000000001)
+ @test Float64(-x) == -Float64(x)
+
+ x = big"2"^64 + big"2"^(64-24) + 1
+ @test Float32(x) == reinterpret(Float32, 0x5f800001)
+ @test Float32(-x) == -Float32(x)
+ x = (x >> 1) + 1
+ @test Float32(x) == reinterpret(Float32, 0x5f000001)
+ @test Float32(-x) == -Float32(x)
+
+ x = big"2"^15 + big"2"^(15-11) + 1
+ @test Float16(x) == reinterpret(Float16, 0x7801)
+ @test Float16(-x) == -Float16(x)
+ x = (x >> 1) + 1
+ @test Float16(x) == reinterpret(Float16, 0x7401)
+ @test Float16(-x) == -Float16(x)
+
+ for T in (Float16, Float32, Float64)
+ n = exponent(floatmax(T))
+ @test T(big"2"^(n+1)) === T(Inf)
+ @test T(big"2"^(n+1) - big"2"^(n-precision(T))) === T(Inf)
+ @test T(big"2"^(n+1) - big"2"^(n-precision(T)) - 1) === floatmax(T)
+ end
+end