fix nextfloat 0.0

JuliaLang · oscardssmith · Nov 4, 2021 · Oct 28, 2021 · Oct 29, 2021 · Oct 29, 2021
commit 41533ef17cde7e618327399446d5cb90d2b3acad
diff --git a/base/mpfr.jl b/base/mpfr.jl
@@ -344,7 +344,10 @@ function Float16(x::BigFloat) :: Float16
  if (resi&0x7fffffff) < 0x38800000 # if Float16(res) is subnormal
  #shift so that the mantissa lines up where it would for normal Float16
  shift = 113-((resi & 0x7f800000)>>23)
- shift<23 && (resi >>= shift)
+ if shift<23
+ resi |= 0x0080_0000 # set implicit bit
+ resi >>= shift
+ end
  end
  if (resi & 0x1fff == 0x1000) # if we are halfway between 2 Float16 values
  # adjust the value by 1 ULP in the direction that will make Float16(res) give the right answer

diff --git a/src/runtime_intrinsics.c b/src/runtime_intrinsics.c
@@ -211,8 +211,10 @@ JL_DLLEXPORT uint16_t __truncdfhf2(double param)
  if ((resi&0x7fffffffu) < 0x38800000u){ // if Float16(res) is subnormal
  // shift so that the mantissa lines up where it would for normal Float16
  uint32_t shift = 113u-((resi & 0x7f800000u)>>23u);
- if (shift<23u)
+ if (shift<23u) {
+ resi |= 0x00800000; // set implicit bit
  resi >>= shift;
+ }
  }
  if ((resi & 0x1fffu) == 0x1000u) { // if we are halfway between 2 Float16 values
  memcpy(&resi, &res, sizeof(res));

diff --git a/test/float16.jl b/test/float16.jl
@@ -209,16 +209,15 @@ const minsubf16_32 = Float32(minsubf16)
  for i in 1:2^16
  f = reinterpret(Float16, UInt16(i-1))
  isfinite(f) || continue
- abs(f)<=eps(f) && continue
  if f < 0
  epsdown = T(eps(f))/2
  epsup = issubnormal(f) ? epsdown : T(eps(nextfloat(f)))/2
  else
  epsup = T(eps(f))/2
  epsdown = issubnormal(f) ? epsup : T(eps(prevfloat(f)))/2
  end
- @test isequal(f, Float16(nextfloat(T(f) - epsdown)))
- @test isequal(f, Float16(prevfloat(T(f) + epsup)))
+ @test isequal(f*(-1)^(f === Float16(0)), Float16(nextfloat(T(f) - epsdown)))
+ @test isequal(f*(-1)^(f === -Float16(0)), Float16(prevfloat(T(f) + epsup)))
  @test isequal(prevfloat(f), Float16(prevfloat(T(f) - epsdown)))
  @test isequal(nextfloat(f), Float16(nextfloat(T(f) + epsup)))
  end