add RoundingMode argument to Float32/64 ctors

Closes JuliaLang#8845.
jiahao · Nov 4, 2014 · 3fe2da2 · 3fe2da2
1 parent 55fe2a9
commit 3fe2da2
Show file tree

Hide file tree

Showing 6 changed files with 150 additions and 8 deletions.
diff --git a/NEWS.md b/NEWS.md
@@ -101,6 +101,8 @@ Library improvements
 
  * `String` has been renamed to `AbstractString`.
 
+ * Added optional rounding argument to floating-point constructors ([#8845])
+
 Deprecated or removed
 ---------------------
 

diff --git a/base/constants.jl b/base/constants.jl
@@ -13,6 +13,11 @@ convert(::Type{Float16}, x::MathConst) = float16(float32(x))
 convert{T<:Real}(::Type{Complex{T}}, x::MathConst) = convert(Complex{T}, convert(T,x))
 convert{T<:Integer}(::Type{Rational{T}}, x::MathConst) = convert(Rational{T}, float64(x))
 
+stagedfunction call{T<:Union(Float32,Float64),s}(t::Type{T},c::MathConst{s},r::RoundingMode)
+ f = T(big(c()),r())
+ :($f)
+end
+
 =={s}(::MathConst{s}, ::MathConst{s}) = true
 ==(::MathConst, ::MathConst) = false
 

diff --git a/base/mpfr.jl b/base/mpfr.jl
@@ -20,6 +20,8 @@ import
  realmin, realmax, get_rounding, set_rounding, maxintfloat, widen,
  significand, frexp
 
+import Base.Rounding: get_rounding_raw, set_rounding_raw
+
 import Base.GMP: ClongMax, CulongMax, CdoubleMax
 
 import Base.Math.lgamma_r
@@ -118,6 +120,11 @@ convert(::Type{Float64}, x::BigFloat) =
 convert(::Type{Float32}, x::BigFloat) =
  ccall((:mpfr_get_flt,:libmpfr), Float32, (Ptr{BigFloat},Int32), &x, ROUNDING_MODE[end])
 
+call(::Type{Float64}, x::BigFloat, r::RoundingMode) =
+ ccall((:mpfr_get_d,:libmpfr), Float64, (Ptr{BigFloat},Int32), &x, to_mpfr(r))
+call(::Type{Float32}, x::BigFloat, r::RoundingMode) =
+ ccall((:mpfr_get_flt,:libmpfr), Float32, (Ptr{BigFloat},Int32), &x, to_mpfr(r))
+
 convert(::Type{Integer}, x::BigFloat) = convert(BigInt, x)
 
 promote_rule{T<:Real}(::Type{BigFloat}, ::Type{T}) = BigFloat
@@ -597,8 +604,10 @@ function from_mpfr(c::Integer)
  RoundingMode(c)
 end
 
-get_rounding(::Type{BigFloat}) = from_mpfr(ROUNDING_MODE[end])
-set_rounding(::Type{BigFloat},r::RoundingMode) = ROUNDING_MODE[end] = to_mpfr(r)
+get_rounding_raw(::Type{BigFloat}) = ROUNDING_MODE[end]
+set_rounding_raw(::Type{BigFloat},i::Integer) = ROUNDING_MODE[end] = i
+get_rounding(::Type{BigFloat}) = from_mpfr(get_rounding_raw(BigFloat))
+set_rounding(::Type{BigFloat},r::RoundingMode) = set_rounding_raw(BigFloat,to_mpfr(r))
 
 function copysign(x::BigFloat, y::BigFloat)
  z = BigFloat()

diff --git a/base/rounding.jl b/base/rounding.jl
@@ -34,16 +34,46 @@ function from_fenv(r::Integer)
  end
 end
 
-set_rounding{T<:Union(Float32,Float64)}(::Type{T},r::RoundingMode) = ccall(:fesetround, Cint, (Cint,), to_fenv(r))
-get_rounding{T<:Union(Float32,Float64)}(::Type{T}) = from_fenv(ccall(:fegetround, Cint, ()))
+set_rounding_raw{T<:Union(Float32,Float64)}(::Type{T},i::Integer) = ccall(:fesetround, Cint, (Cint,), i)
+get_rounding_raw{T<:Union(Float32,Float64)}(::Type{T}) = ccall(:fegetround, Cint, ())
+
+set_rounding{T<:Union(Float32,Float64)}(::Type{T},r::RoundingMode) = set_rounding_raw(T,to_fenv(r))
+get_rounding{T<:Union(Float32,Float64)}(::Type{T}) = from_fenv(get_rounding_raw(T))
 
 function with_rounding{T}(f::Function, ::Type{T}, rounding::RoundingMode)
- old_rounding = get_rounding(T)
+ old_rounding_raw = get_rounding_raw(T)
  set_rounding(T,rounding)
  try
  return f()
  finally
- set_rounding(T,old_rounding)
+ set_rounding_raw(T,old_rounding_raw)
+ end
+end
+
+
+# Should be equivalent to:
+# call(::Type{Float32},x::Float64,r::RoundingMode) = with_rounding(Float64,r) do
+# convert(Float32,x)
+# end
+# but explicit checks are currently quicker (~20x).
+# Assumes current rounding mode is RoundToNearest
+
+call(::Type{Float32},x::Float64,r::RoundingMode{:TiesToEven}) = convert(Float32,x)
+
+function call(::Type{Float32},x::Float64,r::RoundingMode{:TowardNegative})
+ y = convert(Float32,x)
+ y > x ? prevfloat(y) : y
+end
+function call(::Type{Float32},x::Float64,r::RoundingMode{:TowardPositive})
+ y = convert(Float32,x)
+ y < x ? nextfloat(y) : y
+end
+function call(::Type{Float32},x::Float64,r::RoundingMode{:TowardZero})
+ y = convert(Float32,x)
+ if x > 0.0
+ y > x ? prevfloat(y) : y
+ else
+ y < x ? nextfloat(y) : y
  end
 end
 

diff --git a/doc/stdlib/base.rst b/doc/stdlib/base.rst
@@ -233,9 +233,39 @@ All Objects
  With a single symbol argument, tests whether a global variable with that
  name is defined in ``current_module()``.
 
-.. function:: convert(type, x)
+.. function:: convert(T, x)
 
- Try to convert ``x`` to the given type. Conversion to a different numeric type will raise an ``InexactError`` if ``x`` cannot be represented exactly in the new type.
+ Convert ``x`` to a value of type ``T``.
+
+ If ``T`` is an ``Integer`` type, an ``InexactError`` will be raised if
+ ``x`` is not representable by ``T``, for example if ``x`` is not
+ integer-valued, or is outside the range supported by ``T``.
+
+ .. doctest::
+
+ julia> convert(Int, 3.0)
+ 3
+
+ julia> convert(Int, 3.5)
+ ERROR: InexactError()
+ in convert at int.jl:185
+
+ If ``T`` is a ``FloatingPoint`` or ``Rational`` type, then it will return
+ the closest value to ``x`` representable by ``T``.
+
+ .. doctest::
+
+ julia> x = 1/3
+ 0.3333333333333333
+
+ julia> convert(Float32, x)
+ 0.33333334f0
+
+ julia> convert(Rational{Int32}, x)
+ 1//3
+
+ julia> convert(Rational{Int64}, x)
+ 6004799503160661//18014398509481984
 
 .. function:: promote(xs...)
 
@@ -3769,6 +3799,36 @@ Numbers
 
  Test whether ``x`` or all its elements are numerically equal to some real number
 
+.. function:: Float32(x [, mode::RoundingMode])
+
+ Create a Float32 from ``x``. If ``x`` is not exactly representable then
+ ``mode`` determines how ``x`` is rounded.
+
+ .. doctest::
+
+ julia> Float32(1/3, RoundDown)
+ 0.3333333f0
+
+ julia> Float32(1/3, RoundUp)
+ 0.33333334f0
+
+ See ``get_rounding`` for available rounding modes.
+
+.. function:: Float64(x [, mode::RoundingMode])
+
+ Create a Float64 from ``x``. If ``x`` is not exactly representable then
+ ``mode`` determines how ``x`` is rounded.
+
+ .. doctest::
+
+ julia> Float64(pi, RoundDown)
+ 3.141592653589793
+
+ julia> Float64(pi, RoundUp)
+ 3.1415926535897936
+
+ See ``get_rounding`` for available rounding modes.
+
 .. function:: BigInt(x)
 
  Create an arbitrary precision integer. ``x`` may be an ``Int`` (or anything that can be converted to an ``Int``) or a ``AbstractString``.

diff --git a/test/rounding.jl b/test/rounding.jl
@@ -87,3 +87,39 @@ with_rounding(Float32,RoundDown) do
  @test a32 - b32 === -c32
  @test b32 - a32 === c32
 end
+
+# convert with rounding
+for v = [sqrt(2),-1/3,nextfloat(1.0),prevfloat(1.0),nextfloat(-1.0),
+ prevfloat(-1.0),nextfloat(0.0),prevfloat(0.0)]
+ pn = Float32(v,RoundNearest)
+ @test pn == convert(Float32,v)
+ pz = Float32(v,RoundToZero)
+ @test pz == with_rounding(()->convert(Float32,v), Float64, RoundToZero)
+ pd = Float32(v,RoundDown)
+ @test pd == with_rounding(()->convert(Float32,v), Float64, RoundDown)
+ pu = Float32(v,RoundUp)
+ @test pu == with_rounding(()->convert(Float32,v), Float64, RoundUp)
+
+ @test pn == pd || pn == pu
+ @test v > 0 ? pz == pd : pz == pu
+ @test pu - pd == eps(pz)
+end
+
+for T in [Float32,Float64]
+ for v in [sqrt(big(2.0)),-big(1.0)/big(3.0),nextfloat(big(1.0)),
+ prevfloat(big(1.0)),nextfloat(big(0.0)),prevfloat(big(0.0)),
+ pi,e,eulergamma,catalan,golden,]
+ pn = T(v,RoundNearest)
+ @test pn == convert(T,v)
+ pz = T(v,RoundToZero)
+ @test pz == with_rounding(()->convert(T,v), BigFloat, RoundToZero)
+ pd = T(v,RoundDown)
+ @test pd == with_rounding(()->convert(T,v), BigFloat, RoundDown)
+ pu = T(v,RoundUp)
+ @test pu == with_rounding(()->convert(T,v), BigFloat, RoundUp)
+
+ @test pn == pd || pn == pu
+ @test v > 0 ? pz == pd : pz == pu
+ @test pu - pd == eps(pz)
+ end
+end