deprecate unsafe_length for simply length (JuliaLang#40382)

This seems to be a fairly arbitrary case for throwing exceptions, when the user might often use this value in arithmetic afterwards, which is not checked. It leads to awkward complexity in the API however, where it may be unclear which function to reach for, with no particular justification for why a particular usage is "safe". And it inhibits optimization and performance due to the additional checks and error cases (and is not even entirely type-stable).
shirodkara · Jul 2, 2021 · 3eefaf0 · 3eefaf0
1 parent 4270d3b
commit 3eefaf0
Show file tree

Hide file tree

Showing 15 changed files with 308 additions and 140 deletions.
diff --git a/NEWS.md b/NEWS.md
@@ -39,6 +39,11 @@ New library features
 Standard library changes
 ------------------------
 
+* The `length` function on certain ranges of certain specific element types no longer checks for integer
+ overflow in most cases. The new function `checked_length` is now available, which will try to use checked
+ arithmetic to error if the result may be wrapping. Or use a package such as SaferIntegers.jl when
+ constructing the range. ([#40382])
+
 #### Package Manager
 
 #### LinearAlgebra

diff --git a/base/abstractarray.jl b/base/abstractarray.jl
@@ -116,9 +116,6 @@ axes1(A::AbstractArray{<:Any,0}) = OneTo(1)
 axes1(A::AbstractArray) = (@_inline_meta; axes(A)[1])
 axes1(iter) = oneto(length(iter))
 
-unsafe_indices(A) = axes(A)
-unsafe_indices(r::AbstractRange) = (oneto(unsafe_length(r)),) # Ranges use checked_sub for size
-
 """
  keys(a::AbstractArray)
 
@@ -580,14 +577,14 @@ end
 function trailingsize(inds::Indices, n)
  s = 1
  for i=n:length(inds)
- s *= unsafe_length(inds[i])
+ s *= length(inds[i])
  end
  return s
 end
 # This version is type-stable even if inds is heterogeneous
 function trailingsize(inds::Indices)
  @_inline_meta
- prod(map(unsafe_length, inds))
+ prod(map(length, inds))
 end
 
 ## Bounds checking ##
@@ -688,7 +685,7 @@ function checkbounds_indices(::Type{Bool}, ::Tuple{}, I::Tuple)
  @_inline_meta
  checkindex(Bool, OneTo(1), I[1])::Bool & checkbounds_indices(Bool, (), tail(I))
 end
-checkbounds_indices(::Type{Bool}, IA::Tuple, ::Tuple{}) = (@_inline_meta; all(x->unsafe_length(x)==1, IA))
+checkbounds_indices(::Type{Bool}, IA::Tuple, ::Tuple{}) = (@_inline_meta; all(x->length(x)==1, IA))
 checkbounds_indices(::Type{Bool}, ::Tuple{}, ::Tuple{}) = true
 
 throw_boundserror(A, I) = (@_noinline_meta; throw(BoundsError(A, I)))
@@ -2499,8 +2496,8 @@ function _sub2ind_recurse(inds, L, ind, i::Integer, I::Integer...)
 end
 
 nextL(L, l::Integer) = L*l
-nextL(L, r::AbstractUnitRange) = L*unsafe_length(r)
-nextL(L, r::Slice) = L*unsafe_length(r.indices)
+nextL(L, r::AbstractUnitRange) = L*length(r)
+nextL(L, r::Slice) = L*length(r.indices)
 offsetin(i, l::Integer) = i-1
 offsetin(i, r::AbstractUnitRange) = i-first(r)
 
@@ -2526,7 +2523,7 @@ end
 _lookup(ind, d::Integer) = ind+1
 _lookup(ind, r::AbstractUnitRange) = ind+first(r)
 _div(ind, d::Integer) = div(ind, d), 1, d
-_div(ind, r::AbstractUnitRange) = (d = unsafe_length(r); (div(ind, d), first(r), d))
+_div(ind, r::AbstractUnitRange) = (d = length(r); (div(ind, d), first(r), d))
 
 # Vectorized forms
 function _sub2ind(inds::Indices{1}, I1::AbstractVector{T}, I::AbstractVector{T}...) where T<:Integer

diff --git a/base/broadcast.jl b/base/broadcast.jl
@@ -566,7 +566,7 @@ an `Int`.
 """
 Base.@propagate_inbounds newindex(arg, I::CartesianIndex) = CartesianIndex(_newindex(axes(arg), I.I))
 Base.@propagate_inbounds newindex(arg, I::Integer) = CartesianIndex(_newindex(axes(arg), (I,)))
-Base.@propagate_inbounds _newindex(ax::Tuple, I::Tuple) = (ifelse(Base.unsafe_length(ax[1])==1, ax[1][1], I[1]), _newindex(tail(ax), tail(I))...)
+Base.@propagate_inbounds _newindex(ax::Tuple, I::Tuple) = (ifelse(length(ax[1]) == 1, ax[1][1], I[1]), _newindex(tail(ax), tail(I))...)
 Base.@propagate_inbounds _newindex(ax::Tuple{}, I::Tuple) = ()
 Base.@propagate_inbounds _newindex(ax::Tuple, I::Tuple{}) = (ax[1][1], _newindex(tail(ax), ())...)
 Base.@propagate_inbounds _newindex(ax::Tuple{}, I::Tuple{}) = ()

diff --git a/base/checked.jl b/base/checked.jl
@@ -6,14 +6,14 @@ module Checked
 
 export checked_neg, checked_abs, checked_add, checked_sub, checked_mul,
  checked_div, checked_rem, checked_fld, checked_mod, checked_cld,
- add_with_overflow, sub_with_overflow, mul_with_overflow
+ checked_length, add_with_overflow, sub_with_overflow, mul_with_overflow
 
 import Core.Intrinsics:
  checked_sadd_int, checked_ssub_int, checked_smul_int, checked_sdiv_int,
  checked_srem_int,
  checked_uadd_int, checked_usub_int, checked_umul_int, checked_udiv_int,
  checked_urem_int
-import ..no_op_err, ..@_inline_meta, ..@_noinline_meta
+import ..no_op_err, ..@_inline_meta, ..@_noinline_meta, ..checked_length
 
 # define promotion behavior for checked operations
 checked_add(x::Integer, y::Integer) = checked_add(promote(x,y)...)
@@ -349,4 +349,12 @@ The overflow protection may impose a perceptible performance penalty.
 """
 checked_cld(x::T, y::T) where {T<:Integer} = cld(x, y) # Base.cld already checks
 
+"""
+ Base.checked_length(r)
+
+Calculates `length(r)`, but may check for overflow errors where applicable when
+the result doesn't fit into `Union{Integer(eltype(r)),Int}`.
+"""
+checked_length(r) = length(r) # for most things, length doesn't error
+
 end
diff --git a/base/deprecated.jl b/base/deprecated.jl
@@ -240,6 +240,9 @@ end
 @deprecate cat_shape(dims, shape::Tuple{}, shapes::Tuple...) cat_shape(dims, shapes) false
 cat_shape(dims, shape::Tuple{}) = () # make sure `cat_shape(dims, ())` do not recursively calls itself
 
+@deprecate unsafe_indices(A) axes(A) false
+@deprecate unsafe_length(r) length(r) false
+
 # END 1.6 deprecations
 
 # BEGIN 1.7 deprecations

diff --git a/base/indices.jl b/base/indices.jl
@@ -352,17 +352,14 @@ struct Slice{T<:AbstractUnitRange} <: AbstractUnitRange{Int}
 end
 Slice(S::Slice) = S
 axes(S::Slice) = (IdentityUnitRange(S.indices),)
-unsafe_indices(S::Slice) = (IdentityUnitRange(S.indices),)
 axes1(S::Slice) = IdentityUnitRange(S.indices)
 axes(S::Slice{<:OneTo}) = (S.indices,)
-unsafe_indices(S::Slice{<:OneTo}) = (S.indices,)
 axes1(S::Slice{<:OneTo}) = S.indices
 
 first(S::Slice) = first(S.indices)
 last(S::Slice) = last(S.indices)
 size(S::Slice) = (length(S.indices),)
 length(S::Slice) = length(S.indices)
-unsafe_length(S::Slice) = unsafe_length(S.indices)
 getindex(S::Slice, i::Int) = (@_inline_meta; @boundscheck checkbounds(S, i); i)
 getindex(S::Slice, i::AbstractUnitRange{<:Integer}) = (@_inline_meta; @boundscheck checkbounds(S, i); i)
 getindex(S::Slice, i::StepRange{<:Integer}) = (@_inline_meta; @boundscheck checkbounds(S, i); i)
@@ -383,17 +380,14 @@ end
 IdentityUnitRange(S::IdentityUnitRange) = S
 # IdentityUnitRanges are offset and thus have offset axes, so they are their own axes
 axes(S::IdentityUnitRange) = (S,)
-unsafe_indices(S::IdentityUnitRange) = (S,)
 axes1(S::IdentityUnitRange) = S
 axes(S::IdentityUnitRange{<:OneTo}) = (S.indices,)
-unsafe_indices(S::IdentityUnitRange{<:OneTo}) = (S.indices,)
 axes1(S::IdentityUnitRange{<:OneTo}) = S.indices
 
 first(S::IdentityUnitRange) = first(S.indices)
 last(S::IdentityUnitRange) = last(S.indices)
 size(S::IdentityUnitRange) = (length(S.indices),)
 length(S::IdentityUnitRange) = length(S.indices)
-unsafe_length(S::IdentityUnitRange) = unsafe_length(S.indices)
 getindex(S::IdentityUnitRange, i::Int) = (@_inline_meta; @boundscheck checkbounds(S, i); i)
 getindex(S::IdentityUnitRange, i::AbstractUnitRange{<:Integer}) = (@_inline_meta; @boundscheck checkbounds(S, i); i)
 getindex(S::IdentityUnitRange, i::StepRange{<:Integer}) = (@_inline_meta; @boundscheck checkbounds(S, i); i)
@@ -479,7 +473,7 @@ convert(::Type{LinearIndices{N,R}}, inds::LinearIndices{N}) where {N,R} =
 # AbstractArray implementation
 IndexStyle(::Type{<:LinearIndices}) = IndexLinear()
 axes(iter::LinearIndices) = map(axes1, iter.indices)
-size(iter::LinearIndices) = map(unsafe_length, iter.indices)
+size(iter::LinearIndices) = map(length, iter.indices)
 function getindex(iter::LinearIndices, i::Int)
  @_inline_meta
  @boundscheck checkbounds(iter, i)

diff --git a/base/multidimensional.jl b/base/multidimensional.jl
@@ -849,7 +849,7 @@ function _unsafe_getindex(::IndexStyle, A::AbstractArray, I::Vararg{Union{Real,
  # This is specifically not inlined to prevent excessive allocations in type unstable code
  shape = index_shape(I...)
  dest = similar(A, shape)
- map(unsafe_length, axes(dest)) == map(unsafe_length, shape) || throw_checksize_error(dest, shape)
+ map(length, axes(dest)) == map(length, shape) || throw_checksize_error(dest, shape)
  _unsafe_getindex!(dest, A, I...) # usually a generated function, don't allow it to impact inference result
  return dest
 end

diff --git a/base/range.jl b/base/range.jl
@@ -585,9 +585,11 @@ end
 
 ## interface implementations
 
+length(r::AbstractRange) = error("length implementation missing") # catch mistakes
 size(r::AbstractRange) = (length(r),)
 
 isempty(r::StepRange) =
+ # steprange_last_empty(r.start, r.step, r.stop) == r.stop
  (r.start != r.stop) & ((r.step > zero(r.step)) != (r.stop > r.start))
 isempty(r::AbstractUnitRange) = first(r) > last(r)
 isempty(r::StepRangeLen) = length(r) == 0
@@ -614,68 +616,135 @@ julia> step(range(2.5, stop=10.9, length=85))
 ```
 """
 step(r::StepRange) = r.step
-step(r::AbstractUnitRange{T}) where{T} = oneunit(T) - zero(T)
+step(r::AbstractUnitRange{T}) where {T} = oneunit(T) - zero(T)
 step(r::StepRangeLen) = r.step
 step(r::StepRangeLen{T}) where {T<:AbstractFloat} = T(r.step)
 step(r::LinRange) = (last(r)-first(r))/r.lendiv
 
 step_hp(r::StepRangeLen) = r.step
 step_hp(r::AbstractRange) = step(r)
 
-unsafe_length(r::AbstractRange) = length(r) # generic fallback
-
-function unsafe_length(r::StepRange)
- n = Integer(div((r.stop - r.start) + r.step, r.step))
- isempty(r) ? zero(n) : n
-end
-length(r::StepRange) = unsafe_length(r)
-unsafe_length(r::AbstractUnitRange) = Integer(last(r) - first(r) + step(r))
-unsafe_length(r::OneTo) = Integer(r.stop - zero(r.stop))
-length(r::AbstractUnitRange) = unsafe_length(r)
-length(r::OneTo) = unsafe_length(r)
-length(r::StepRangeLen) = r.len
-length(r::LinRange) = r.len
+axes(r::AbstractRange) = (oneto(length(r)),)
 
 # Needed to fold the `firstindex` call in SimdLoop.simd_index
 firstindex(::UnitRange) = 1
 firstindex(::StepRange) = 1
 firstindex(::LinRange) = 1
 
-function length(r::StepRange{T}) where T<:Union{Int,UInt,Int64,UInt64,Int128,UInt128}
- isempty(r) && return zero(T)
- if r.step > 1
- return checked_add(convert(T, div(unsigned(r.stop - r.start), r.step)), one(T))
- elseif r.step < -1
- return checked_add(convert(T, div(unsigned(r.start - r.stop), -r.step)), one(T))
- elseif r.step > 0
- return checked_add(div(checked_sub(r.stop, r.start), r.step), one(T))
+# n.b. checked_length for these is defined iff checked_add and checked_sub are
+# defined between the relevant types
+function checked_length(r::OrdinalRange{T}) where T
+ s = step(r)
+ # s != 0, by construction, but avoids the division error later
+ start = first(r)
+ if s == zero(s) || isempty(r)
+ return Integer(start - start + zero(s))
+ end
+ stop = last(r)
+ if isless(s, zero(s))
+ diff = checked_sub(start, stop)
+ s = -s
  else
- return checked_add(div(checked_sub(r.start, r.stop), -r.step), one(T))
+ diff = checked_sub(stop, start)
  end
+ a = Integer(div(diff, s))
+ return checked_add(a, one(a))
 end
 
-function length(r::AbstractUnitRange{T}) where T<:Union{Int,Int64,Int128}
+function checked_length(r::AbstractUnitRange{T}) where T
+ # compiler optimization: remove dead cases from above
+ if isempty(r)
+ return Integer(first(r) - first(r))
+ end
+ a = Integer(checked_add(checked_sub(last(r), first(r))))
+ return checked_add(a, one(a))
+end
+
+function length(r::OrdinalRange{T}) where T
+ s = step(r)
+ # s != 0, by construction, but avoids the division error later
+ start = first(r)
+ if s == zero(s) || isempty(r)
+ return Integer(start - start + zero(s))
+ end
+ stop = last(r)
+ if isless(s, zero(s))
+ diff = start - stop
+ s = -s
+ else
+ diff = stop - start
+ end
+ a = Integer(div(diff, s))
+ return a + one(a)
+end
+
+
+function length(r::AbstractUnitRange{T}) where T
  @_inline_meta
- checked_add(checked_sub(last(r), first(r)), one(T))
+ a = Integer(last(r) - first(r)) # even when isempty, by construction (with overflow)
+ return a + one(a)
 end
-length(r::OneTo{T}) where {T<:Union{Int,Int64}} = T(r.stop)
 
-length(r::AbstractUnitRange{T}) where {T<:Union{UInt,UInt64,UInt128}} =
- r.stop < r.start ? zero(T) : checked_add(last(r) - first(r), one(T))
+length(r::OneTo) = Integer(r.stop - zero(r.stop))
+length(r::StepRangeLen) = r.len
+length(r::LinRange) = r.len
 
-# some special cases to favor default Int type
-let smallint = (Int === Int64 ?
- Union{Int8,UInt8,Int16,UInt16,Int32,UInt32} :
- Union{Int8,UInt8,Int16,UInt16})
- global length
-
- function length(r::StepRange{<:smallint})
- isempty(r) && return Int(0)
- div(Int(r.stop)+Int(r.step) - Int(r.start), Int(r.step))
+let bigints = Union{Int, UInt, Int64, UInt64, Int128, UInt128}
+ global length, checked_length
+ # compile optimization for which promote_type(T, Int) == T
+ length(r::OneTo{T}) where {T<:bigints} = r.stop
+ # slightly more accurate length and checked_length in extreme cases
+ # (near typemax) for types with known `unsigned` functions
+ function length(r::OrdinalRange{T}) where T<:bigints
+ s = step(r)
+ s == zero(s) && return zero(T) # unreachable, by construction, but avoids the error case here later
+ isempty(r) && return zero(T)
+ diff = last(r) - first(r)
+ # if |s| > 1, diff might have overflowed, but unsigned(diff)÷s should
+ # therefore still be valid (if the result is representable at all)
+ # n.b. !(s isa T)
+ if s isa Unsigned || -1 <= s <= 1 || s == -s
+ a = div(diff, s)
+ elseif s < 0
+ a = div(unsigned(-diff), -s) % typeof(diff)
+ else
+ a = div(unsigned(diff), s) % typeof(diff)
+ end
+ return Integer(a) + one(a)
+ end
+ function checked_length(r::OrdinalRange{T}) where T<:bigints
+ s = step(r)
+ s == zero(s) && return zero(T) # unreachable, by construction, but avoids the error case here later
+ isempty(r) && return zero(T)
+ stop, start = last(r), first(r)
+ # n.b. !(s isa T)
+ if s > 1
+ diff = stop - start
+ a = convert(T, div(unsigned(diff), s))
+ elseif s < -1
+ diff = start - stop
+ a = convert(T, div(unsigned(diff), -s))
+ elseif s > 0
+ a = div(checked_sub(stop, start), s)
+ else
+ a = div(checked_sub(start, stop), -s)
+ end
+ return checked_add(a, one(a))
  end
+end
 
- length(r::AbstractUnitRange{<:smallint}) = Int(last(r)) - Int(first(r)) + 1
- length(r::OneTo{<:smallint}) = Int(r.stop)
+# some special cases to favor default Int type
+let smallints = (Int === Int64 ?
+ Union{Int8, UInt8, Int16, UInt16, Int32, UInt32} :
+ Union{Int8, UInt8, Int16, UInt16})
+ global length, checked_length
+ # n.b. !(step isa T)
+ length(r::OrdinalRange{<:smallints}) = div(Int(last(r)) - Int(first(r)), step(r)) + 1
+ length(r::AbstractUnitRange{<:smallints}) = Int(last(r)) - Int(first(r)) + 1
+ length(r::OneTo{<:smallints}) = Int(r.stop)
+ checked_length(r::OrdinalRange{<:smallints}) = length(r)
+ checked_length(r::AbstractUnitRange{<:smallints}) = length(r)
+ checked_length(r::OneTo{<:smallints}) = length(r)
 end
 
 first(r::OrdinalRange{T}) where {T} = convert(T, r.start)

diff --git a/base/subarray.jl b/base/subarray.jl
@@ -60,7 +60,7 @@ viewindexing(I::Tuple{Vararg{Any}}) = IndexCartesian()
 viewindexing(I::Tuple{AbstractArray, Vararg{Any}}) = IndexCartesian()
 
 # Simple utilities
-size(V::SubArray) = (@_inline_meta; map(unsafe_length, axes(V)))
+size(V::SubArray) = (@_inline_meta; map(length, axes(V)))
 
 similar(V::SubArray, T::Type, dims::Dims) = similar(V.parent, T, dims)
 
@@ -362,7 +362,7 @@ compute_stride1(parent::AbstractArray, I::NTuple{N,Any}) where {N} =
 compute_stride1(s, inds, I::Tuple{}) = s
 compute_stride1(s, inds, I::Tuple{Vararg{ScalarIndex}}) = s
 compute_stride1(s, inds, I::Tuple{ScalarIndex, Vararg{Any}}) =
- (@_inline_meta; compute_stride1(s*unsafe_length(inds[1]), tail(inds), tail(I)))
+ (@_inline_meta; compute_stride1(s*length(inds[1]), tail(inds), tail(I)))
 compute_stride1(s, inds, I::Tuple{AbstractRange, Vararg{Any}}) = s*step(I[1])
 compute_stride1(s, inds, I::Tuple{Slice, Vararg{Any}}) = s
 compute_stride1(s, inds, I::Tuple{Any, Vararg{Any}}) = throw(ArgumentError("invalid strided index type $(typeof(I[1]))"))
@@ -407,12 +407,12 @@ end
 function compute_linindex(f, s, IP::Tuple, I::Tuple{ScalarIndex, Vararg{Any}})
  @_inline_meta
  Δi = I[1]-first(IP[1])
- compute_linindex(f + Δi*s, s*unsafe_length(IP[1]), tail(IP), tail(I))
+ compute_linindex(f + Δi*s, s*length(IP[1]), tail(IP), tail(I))
 end
 function compute_linindex(f, s, IP::Tuple, I::Tuple{Any, Vararg{Any}})
  @_inline_meta
  Δi = first(I[1])-first(IP[1])
- compute_linindex(f + Δi*s, s*unsafe_length(IP[1]), tail(IP), tail(I))
+ compute_linindex(f + Δi*s, s*length(IP[1]), tail(IP), tail(I))
 end
 compute_linindex(f, s, IP::Tuple, I::Tuple{}) = f
 
@@ -447,5 +447,5 @@ _indices_sub(::Real, I...) = (@_inline_meta; _indices_sub(I...))
 _indices_sub() = ()
 function _indices_sub(i1::AbstractArray, I...)
  @_inline_meta
- (unsafe_indices(i1)..., _indices_sub(I...)...)
+ (axes(i1)..., _indices_sub(I...)...)
 end