Skip to content

Commit

Permalink
fix spurious overflow for Float16(::Rational) (#52395)
Browse files Browse the repository at this point in the history
Fixes #52394.

Also fixes `Float32` for `UInt128`, since currently
`Float32((typemax(UInt128)-0x01) // typemax(UInt128))` gives `Nan32`.
  • Loading branch information
stevengj committed Feb 7, 2024
1 parent d765ad1 commit bead1d3
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 0 deletions.
8 changes: 8 additions & 0 deletions base/rational.jl
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,14 @@ function (::Type{T})(x::Rational{S}) where T<:AbstractFloat where S
P = promote_type(T,S)
convert(T, convert(P,x.num)/convert(P,x.den))::T
end
# avoid spurious overflow (#52394). (Needed for UInt16 or larger;
# we also include Int16 for consistency of accuracy.)
Float16(x::Rational{<:Union{Int16,Int32,Int64,UInt16,UInt32,UInt64}}) =
Float16(Float32(x))
Float16(x::Rational{<:Union{Int128,UInt128}}) =
Float16(Float64(x)) # UInt128 overflows Float32, include Int128 for consistency
Float32(x::Rational{<:Union{Int128,UInt128}}) =
Float32(Float64(x)) # UInt128 overflows Float32, include Int128 for consistency

function Rational{T}(x::AbstractFloat) where T<:Integer
r = rationalize(T, x, tol=0)
Expand Down
5 changes: 5 additions & 0 deletions test/float16.jl
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,11 @@ const minsubf16_32 = Float32(minsubf16)
# issues #33076
@test Float16(1f5) == Inf16

# issue #52394
@test Float16(10^8 // (10^9 + 1)) == convert(Float16, 10^8 // (10^9 + 1)) == Float16(0.1)
@test Float16((typemax(UInt128)-0x01) // typemax(UInt128)) == Float16(1.0)
@test Float32((typemax(UInt128)-0x01) // typemax(UInt128)) == Float32(1.0)

@testset "conversion to Float16 from" begin
for T in (Float32, Float64, BigFloat)
@testset "conversion from $T" begin
Expand Down

0 comments on commit bead1d3

Please sign in to comment.