Fix overflows in quantile (#145)

The `a + γ*(b-a)` introduced by JuliaLang/julia#16572 has the advantage that it increases with `γ` even when `a` and `b` are very close, but it has the drawback that it is not robust to overflow. This is likely to happen in practice with small integer and floating point types. Conversely, the `(1-γ)*a + γ*b` which is currently used only for non-finite quantities is robust to overflow but may not always increase with `γ` as when `a` and `b` are very close or (more frequently) equal since precision loss can give a slightly smaller value for a larger `γ`. This can be problematic as it breaks an expected invariant. So keep using the `a + γ*(b-a)` formula when `a ≈ b`, in which case it's almost like returning either `a` or `b` but less arbitrary.
JuliaStats · Jul 29, 2023 · 35ca0a0 · 35ca0a0
1 parent bb7063d
commit 35ca0a0
Show file tree

Hide file tree

Showing 2 changed files with 21 additions and 1 deletion.
diff --git a/src/Statistics.jl b/src/Statistics.jl
@@ -1020,7 +1020,10 @@ end
  b = v[j + 1]
  end
 
- if isfinite(a) && isfinite(b)
+ # When a ≉ b, b-a may overflow
+ # When a ≈ b, (1-γ)*a + γ*b may not be increasing with γ due to rounding
+ # Call to float is to work around JuliaLang/julia#50380
+ if isfinite(a) && isfinite(b) && float(a) ≈ float(b)
  return a + γ*(b-a)
  else
  return (1-γ)*a + γ*b

diff --git a/test/runtests.jl b/test/runtests.jl
@@ -753,6 +753,23 @@ let y = [0.40003674665581906, 0.4085630862624367, 0.41662034698690303, 0.4166203
  @test issorted(quantile(y, range(0.01, stop=0.99, length=17)))
 end
 
+@testset "issue #144: no overflow with quantile" begin
+ @test quantile(Float16[-9000, 100], 1.0) ≈ 100
+ @test quantile(Float16[-9000, 100], 0.999999999) ≈ 99.99999
+ @test quantile(Float32[-1e9, 100], 1) ≈ 100
+ @test quantile(Float32[-1e9, 100], 0.9999999999) ≈ 99.89999998
+ @test quantile(Float64[-1e20, 100], 1) ≈ 100
+ @test quantile(Float32[-1e15, -1e14, -1e13, -1e12, -1e11, -1e10, -1e9, 100], 1) ≈ 100
+ @test quantile(Int8[-68, 60], 0.5) ≈ -4
+ @test quantile(Int32[-1e9, 2e9], 1.0) ≈ 2.0e9
+ @test quantile(Int64[-5e18, -2e18, 9e18], 1.0) ≈ 9.0e18
+
+ # check that quantiles are increasing with a, b and p even in corner cases
+ @test issorted(quantile([1.0, 1.0, 1.0+eps(), 1.0+eps()], range(0, 1, length=100)))
+ @test issorted(quantile([1.0, 1.0+1eps(), 1.0+2eps(), 1.0+3eps()], range(0, 1, length=100)))
+ @test issorted(quantile([1.0, 1.0+2eps(), 1.0+4eps(), 1.0+6eps()], range(0, 1, length=100)))
+end
+
 @testset "variance of complex arrays (#13309)" begin
  z = rand(ComplexF64, 10)
  @test var(z) ≈ invoke(var, Tuple{Any}, z) ≈ cov(z) ≈ var(z,dims=1)[1] ≈ sum(abs2, z .- mean(z))/9