Skip to content

Commit

Permalink
add RoundingMode argument to Float32/64 ctors
Browse files Browse the repository at this point in the history
  • Loading branch information
simonbyrne authored and nolta committed Nov 4, 2014
1 parent 55fe2a9 commit 3fe2da2
Show file tree
Hide file tree
Showing 6 changed files with 150 additions and 8 deletions.
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,8 @@ Library improvements

* `String` has been renamed to `AbstractString`.

* Added optional rounding argument to floating-point constructors ([#8845])

Deprecated or removed
---------------------

Expand Down
5 changes: 5 additions & 0 deletions base/constants.jl
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@ convert(::Type{Float16}, x::MathConst) = float16(float32(x))
convert{T<:Real}(::Type{Complex{T}}, x::MathConst) = convert(Complex{T}, convert(T,x))
convert{T<:Integer}(::Type{Rational{T}}, x::MathConst) = convert(Rational{T}, float64(x))

stagedfunction call{T<:Union(Float32,Float64),s}(t::Type{T},c::MathConst{s},r::RoundingMode)
f = T(big(c()),r())
:($f)
end

=={s}(::MathConst{s}, ::MathConst{s}) = true
==(::MathConst, ::MathConst) = false

Expand Down
13 changes: 11 additions & 2 deletions base/mpfr.jl
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ import
realmin, realmax, get_rounding, set_rounding, maxintfloat, widen,
significand, frexp

import Base.Rounding: get_rounding_raw, set_rounding_raw

import Base.GMP: ClongMax, CulongMax, CdoubleMax

import Base.Math.lgamma_r
Expand Down Expand Up @@ -118,6 +120,11 @@ convert(::Type{Float64}, x::BigFloat) =
convert(::Type{Float32}, x::BigFloat) =
ccall((:mpfr_get_flt,:libmpfr), Float32, (Ptr{BigFloat},Int32), &x, ROUNDING_MODE[end])

call(::Type{Float64}, x::BigFloat, r::RoundingMode) =
ccall((:mpfr_get_d,:libmpfr), Float64, (Ptr{BigFloat},Int32), &x, to_mpfr(r))
call(::Type{Float32}, x::BigFloat, r::RoundingMode) =
ccall((:mpfr_get_flt,:libmpfr), Float32, (Ptr{BigFloat},Int32), &x, to_mpfr(r))

convert(::Type{Integer}, x::BigFloat) = convert(BigInt, x)

promote_rule{T<:Real}(::Type{BigFloat}, ::Type{T}) = BigFloat
Expand Down Expand Up @@ -597,8 +604,10 @@ function from_mpfr(c::Integer)
RoundingMode(c)
end

get_rounding(::Type{BigFloat}) = from_mpfr(ROUNDING_MODE[end])
set_rounding(::Type{BigFloat},r::RoundingMode) = ROUNDING_MODE[end] = to_mpfr(r)
get_rounding_raw(::Type{BigFloat}) = ROUNDING_MODE[end]
set_rounding_raw(::Type{BigFloat},i::Integer) = ROUNDING_MODE[end] = i
get_rounding(::Type{BigFloat}) = from_mpfr(get_rounding_raw(BigFloat))
set_rounding(::Type{BigFloat},r::RoundingMode) = set_rounding_raw(BigFloat,to_mpfr(r))

function copysign(x::BigFloat, y::BigFloat)
z = BigFloat()
Expand Down
38 changes: 34 additions & 4 deletions base/rounding.jl
Original file line number Diff line number Diff line change
Expand Up @@ -34,16 +34,46 @@ function from_fenv(r::Integer)
end
end

set_rounding{T<:Union(Float32,Float64)}(::Type{T},r::RoundingMode) = ccall(:fesetround, Cint, (Cint,), to_fenv(r))
get_rounding{T<:Union(Float32,Float64)}(::Type{T}) = from_fenv(ccall(:fegetround, Cint, ()))
set_rounding_raw{T<:Union(Float32,Float64)}(::Type{T},i::Integer) = ccall(:fesetround, Cint, (Cint,), i)
get_rounding_raw{T<:Union(Float32,Float64)}(::Type{T}) = ccall(:fegetround, Cint, ())

set_rounding{T<:Union(Float32,Float64)}(::Type{T},r::RoundingMode) = set_rounding_raw(T,to_fenv(r))
get_rounding{T<:Union(Float32,Float64)}(::Type{T}) = from_fenv(get_rounding_raw(T))

function with_rounding{T}(f::Function, ::Type{T}, rounding::RoundingMode)
old_rounding = get_rounding(T)
old_rounding_raw = get_rounding_raw(T)
set_rounding(T,rounding)
try
return f()
finally
set_rounding(T,old_rounding)
set_rounding_raw(T,old_rounding_raw)
end
end


# Should be equivalent to:
# call(::Type{Float32},x::Float64,r::RoundingMode) = with_rounding(Float64,r) do
# convert(Float32,x)
# end
# but explicit checks are currently quicker (~20x).
# Assumes current rounding mode is RoundToNearest

call(::Type{Float32},x::Float64,r::RoundingMode{:TiesToEven}) = convert(Float32,x)

function call(::Type{Float32},x::Float64,r::RoundingMode{:TowardNegative})
y = convert(Float32,x)
y > x ? prevfloat(y) : y
end
function call(::Type{Float32},x::Float64,r::RoundingMode{:TowardPositive})
y = convert(Float32,x)
y < x ? nextfloat(y) : y
end
function call(::Type{Float32},x::Float64,r::RoundingMode{:TowardZero})
y = convert(Float32,x)
if x > 0.0
y > x ? prevfloat(y) : y
else
y < x ? nextfloat(y) : y
end
end

Expand Down
64 changes: 62 additions & 2 deletions doc/stdlib/base.rst
Original file line number Diff line number Diff line change
Expand Up @@ -233,9 +233,39 @@ All Objects
With a single symbol argument, tests whether a global variable with that
name is defined in ``current_module()``.

.. function:: convert(type, x)
.. function:: convert(T, x)

Try to convert ``x`` to the given type. Conversion to a different numeric type will raise an ``InexactError`` if ``x`` cannot be represented exactly in the new type.
Convert ``x`` to a value of type ``T``.

If ``T`` is an ``Integer`` type, an ``InexactError`` will be raised if
``x`` is not representable by ``T``, for example if ``x`` is not
integer-valued, or is outside the range supported by ``T``.

.. doctest::

julia> convert(Int, 3.0)
3

julia> convert(Int, 3.5)
ERROR: InexactError()
in convert at int.jl:185

If ``T`` is a ``FloatingPoint`` or ``Rational`` type, then it will return
the closest value to ``x`` representable by ``T``.

.. doctest::

julia> x = 1/3
0.3333333333333333

julia> convert(Float32, x)
0.33333334f0

julia> convert(Rational{Int32}, x)
1//3

julia> convert(Rational{Int64}, x)
6004799503160661//18014398509481984

.. function:: promote(xs...)

Expand Down Expand Up @@ -3769,6 +3799,36 @@ Numbers

Test whether ``x`` or all its elements are numerically equal to some real number

.. function:: Float32(x [, mode::RoundingMode])

Create a Float32 from ``x``. If ``x`` is not exactly representable then
``mode`` determines how ``x`` is rounded.

.. doctest::

julia> Float32(1/3, RoundDown)
0.3333333f0

julia> Float32(1/3, RoundUp)
0.33333334f0

See ``get_rounding`` for available rounding modes.

.. function:: Float64(x [, mode::RoundingMode])

Create a Float64 from ``x``. If ``x`` is not exactly representable then
``mode`` determines how ``x`` is rounded.

.. doctest::

julia> Float64(pi, RoundDown)
3.141592653589793

julia> Float64(pi, RoundUp)
3.1415926535897936

See ``get_rounding`` for available rounding modes.

.. function:: BigInt(x)

Create an arbitrary precision integer. ``x`` may be an ``Int`` (or anything that can be converted to an ``Int``) or a ``AbstractString``.
Expand Down
36 changes: 36 additions & 0 deletions test/rounding.jl
Original file line number Diff line number Diff line change
Expand Up @@ -87,3 +87,39 @@ with_rounding(Float32,RoundDown) do
@test a32 - b32 === -c32
@test b32 - a32 === c32
end

# convert with rounding
for v = [sqrt(2),-1/3,nextfloat(1.0),prevfloat(1.0),nextfloat(-1.0),
prevfloat(-1.0),nextfloat(0.0),prevfloat(0.0)]
pn = Float32(v,RoundNearest)
@test pn == convert(Float32,v)
pz = Float32(v,RoundToZero)
@test pz == with_rounding(()->convert(Float32,v), Float64, RoundToZero)
pd = Float32(v,RoundDown)
@test pd == with_rounding(()->convert(Float32,v), Float64, RoundDown)
pu = Float32(v,RoundUp)
@test pu == with_rounding(()->convert(Float32,v), Float64, RoundUp)

@test pn == pd || pn == pu
@test v > 0 ? pz == pd : pz == pu
@test pu - pd == eps(pz)
end

for T in [Float32,Float64]
for v in [sqrt(big(2.0)),-big(1.0)/big(3.0),nextfloat(big(1.0)),
prevfloat(big(1.0)),nextfloat(big(0.0)),prevfloat(big(0.0)),
pi,e,eulergamma,catalan,golden,]
pn = T(v,RoundNearest)
@test pn == convert(T,v)
pz = T(v,RoundToZero)
@test pz == with_rounding(()->convert(T,v), BigFloat, RoundToZero)
pd = T(v,RoundDown)
@test pd == with_rounding(()->convert(T,v), BigFloat, RoundDown)
pu = T(v,RoundUp)
@test pu == with_rounding(()->convert(T,v), BigFloat, RoundUp)

@test pn == pd || pn == pu
@test v > 0 ? pz == pd : pz == pu
@test pu - pd == eps(pz)
end
end

0 comments on commit 3fe2da2

Please sign in to comment.