Skip to content

Commit

Permalink
Faster implementation of Float64(::BigInt), Float32(::BigInt) and Flo…
Browse files Browse the repository at this point in the history
…at16(::BigInt) (#31502)
  • Loading branch information
narendrakpatel authored and simonbyrne committed Apr 22, 2019
1 parent 87e93b2 commit 85daeab
Show file tree
Hide file tree
Showing 2 changed files with 93 additions and 17 deletions.
80 changes: 63 additions & 17 deletions base/gmp.jl
Original file line number Diff line number Diff line change
Expand Up @@ -374,26 +374,72 @@ function (::Type{T})(n::BigInt, ::RoundingMode{:Up}) where T<:CdoubleMax
x < n ? nextfloat(x) : x
end

function (::Type{T})(n::BigInt, ::RoundingMode{:Nearest}) where T<:CdoubleMax
x = T(n,RoundToZero)
if maxintfloat(T) <= abs(x) < T(Inf)
r = n-BigInt(x)
h = eps(x)/2
if iseven(reinterpret(Unsigned,x)) # check if last bit is odd/even
if r < -h
return prevfloat(x)
elseif r > h
return nextfloat(x)
end
function Float64(x::BigInt, ::RoundingMode{:Nearest})
x == 0 && return 0.0
xsize = abs(x.size)
if xsize*BITS_PER_LIMB > 1024
z = Inf64
elseif xsize == 1
z = Float64(unsafe_load(x.d))
elseif Limb == UInt32 && xsize == 2
z = Float64((unsafe_load(x.d, 2) % UInt64) << BITS_PER_LIMB + unsafe_load(x.d))
else
y1 = unsafe_load(x.d, xsize) % UInt64
n = 64 - leading_zeros(y1)
# load first 54(1 + 52 bits of fraction + 1 for rounding)
y = y1 >> (n - (precision(Float64)+1))
if Limb == UInt64
y += n > precision(Float64) ? 0 : (unsafe_load(x.d, xsize-1) >> (10+n))
else
if r <= -h
return prevfloat(x)
elseif r >= h
return nextfloat(x)
end
y += (unsafe_load(x.d, xsize-1) % UInt64) >> (n-22)
y += n > (precision(Float64) - 32) ? 0 : (unsafe_load(x.d, xsize-2) >> (10+n))
end
y = (y + 1) >> 1 # round, ties up
y &= ~UInt64(trailing_zeros(x) == (n-54 + (xsize-1)*BITS_PER_LIMB)) # fix last bit to round to even
d = ((n+1021) % UInt64) << 52
z = reinterpret(Float64, d+y)
z = ldexp(z, (xsize-1)*BITS_PER_LIMB)
end
return flipsign(z, x.size)
end

function Float32(x::BigInt, ::RoundingMode{:Nearest})
x == 0 && return 0f0
xsize = abs(x.size)
if xsize*BITS_PER_LIMB > 128
z = Inf32
elseif xsize == 1
z = Float32(unsafe_load(x.d))
else
y1 = unsafe_load(x.d, xsize)
n = BITS_PER_LIMB - leading_zeros(y1)
# load first 25(1 + 23 bits of fraction + 1 for rounding)
y = (y1 >> (n - (precision(Float32)+1))) % UInt32
y += (n > precision(Float32) ? 0 : unsafe_load(x.d, xsize-1) >> (BITS_PER_LIMB - (25-n))) % UInt32
y = (y + one(UInt32)) >> 1 # round, ties up
y &= ~UInt32(trailing_zeros(x) == (n-25 + (xsize-1)*BITS_PER_LIMB)) # fix last bit to round to even
d = ((n+125) % UInt32) << 23
z = reinterpret(Float32, d+y)
z = ldexp(z, (xsize-1)*BITS_PER_LIMB)
end
return flipsign(z, x.size)
end

function Float16(x::BigInt, ::RoundingMode{:Nearest})
x == 0 && return Float16(0.0)
y1 = unsafe_load(x.d)
n = BITS_PER_LIMB - leading_zeros(y1)
if n > 16 || abs(x.size) > 1
z = Inf16
else
# load first 12(1 + 10 bits for fraction + 1 for rounding)
y = (y1 >> (n - (precision(Float16)+1))) % UInt16
y = (y + one(UInt16)) >> 1 # round, ties up
y &= ~UInt16(trailing_zeros(x) == (n-12)) # fix last bit to round to even
d = ((n+13) % UInt16) << 10
z = reinterpret(Float16, d+y)
end
x
return flipsign(z, x.size)
end

Float64(n::BigInt) = Float64(n, RoundNearest)
Expand Down
30 changes: 30 additions & 0 deletions test/bigint.jl
Original file line number Diff line number Diff line change
Expand Up @@ -440,3 +440,33 @@ end
@test big(Int64(-9223372036854775808)) == big"-9223372036854775808"
@test big(Int128(-170141183460469231731687303715884105728)) == big"-170141183460469231731687303715884105728"
end

@testset "conversion to Float" begin
x = big"2"^256 + big"2"^(256-53) + 1
@test Float64(x) == reinterpret(Float64, 0x4ff0000000000001)
@test Float64(-x) == -Float64(x)
x = (x >> 1) + 1
@test Float64(x) == reinterpret(Float64, 0x4fe0000000000001)
@test Float64(-x) == -Float64(x)

x = big"2"^64 + big"2"^(64-24) + 1
@test Float32(x) == reinterpret(Float32, 0x5f800001)
@test Float32(-x) == -Float32(x)
x = (x >> 1) + 1
@test Float32(x) == reinterpret(Float32, 0x5f000001)
@test Float32(-x) == -Float32(x)

x = big"2"^15 + big"2"^(15-11) + 1
@test Float16(x) == reinterpret(Float16, 0x7801)
@test Float16(-x) == -Float16(x)
x = (x >> 1) + 1
@test Float16(x) == reinterpret(Float16, 0x7401)
@test Float16(-x) == -Float16(x)

for T in (Float16, Float32, Float64)
n = exponent(floatmax(T))
@test T(big"2"^(n+1)) === T(Inf)
@test T(big"2"^(n+1) - big"2"^(n-precision(T))) === T(Inf)
@test T(big"2"^(n+1) - big"2"^(n-precision(T)) - 1) === floatmax(T)
end
end

0 comments on commit 85daeab

Please sign in to comment.