Merge pull request #22907 from JuliaLang/jb/keyvalue

make indexable collection API more uniform (keys, values, pairs)
JuliaLang · Sep 5, 2017 · 54b99d0 · 54b99d0
2 parents ac37603 + ef2fd5f
commit 54b99d0
Show file tree

Hide file tree

Showing 21 changed files with 217 additions and 153 deletions.
diff --git a/NEWS.md b/NEWS.md
@@ -194,6 +194,10 @@ This section lists changes that do not have deprecation warnings.
  This avoids stack overflows in the common case of definitions like
  `f(x, y) = f(promote(x, y)...)` ([#22801]).
 
+ * `findmin`, `findmax`, `indmin`, and `indmax` used to always return linear indices.
+ They now return `CartesianIndex`es for all but 1-d arrays, and in general return
+ the `keys` of indexed collections (e.g. dictionaries) ([#22907]).
+
 Library improvements
 --------------------
 

diff --git a/base/abstractarray.jl b/base/abstractarray.jl
@@ -69,9 +69,9 @@ function indices(A)
  map(OneTo, size(A))
 end
 
-# Performance optimization: get rid of a branch on `d` in `indices(A,
-# d)` for d=1. 1d arrays are heavily used, and the first dimension
-# comes up in other applications.
+# Performance optimization: get rid of a branch on `d` in `indices(A, d)`
+# for d=1. 1d arrays are heavily used, and the first dimension comes up
+# in other applications.
 indices1(A::AbstractArray{<:Any,0}) = OneTo(1)
 indices1(A::AbstractArray) = (@_inline_meta; indices(A)[1])
 indices1(iter) = OneTo(length(iter))
@@ -103,6 +103,10 @@ julia> extrema(b)
 """
 linearindices(A) = (@_inline_meta; OneTo(_length(A)))
 linearindices(A::AbstractVector) = (@_inline_meta; indices1(A))
+
+keys(a::AbstractArray) = CartesianRange(indices(a))
+keys(a::AbstractVector) = linearindices(a)
+
 eltype(::Type{<:AbstractArray{E}}) where {E} = E
 elsize(::AbstractArray{T}) where {T} = sizeof(T)
 
@@ -756,8 +760,11 @@ start(A::AbstractArray) = (@_inline_meta; itr = eachindex(A); (itr, start(itr)))
 next(A::AbstractArray, i) = (@_propagate_inbounds_meta; (idx, s) = next(i[1], i[2]); (A[idx], (i[1], s)))
 done(A::AbstractArray, i) = (@_propagate_inbounds_meta; done(i[1], i[2]))
 
+# `eachindex` is mostly an optimization of `keys`
+eachindex(itrs...) = keys(itrs...)
+
 # eachindex iterates over all indices. IndexCartesian definitions are later.
-eachindex(A::Union{Number,AbstractVector}) = (@_inline_meta(); indices1(A))
+eachindex(A::AbstractVector) = (@_inline_meta(); indices1(A))
 
 """
  eachindex(A...)
@@ -826,6 +833,9 @@ end
 
 isempty(a::AbstractArray) = (_length(a) == 0)
 
+# keys with an IndexStyle
+keys(s::IndexStyle, A::AbstractArray, B::AbstractArray...) = eachindex(s, A, B...)
+
 ## Conversions ##
 
 convert(::Type{AbstractArray{T,N}}, A::AbstractArray{T,N}) where {T,N } = A
@@ -1739,7 +1749,7 @@ _sub2ind_vec(i) = ()
 function ind2sub(inds::Union{DimsInteger{N},Indices{N}}, ind::AbstractVector{<:Integer}) where N
  M = length(ind)
  t = ntuple(n->similar(ind),Val(N))
- for (i,idx) in enumerate(IndexLinear(), ind)
+ for (i,idx) in pairs(IndexLinear(), ind)
  sub = ind2sub(inds, idx)
  for j = 1:N
  t[j][i] = sub[j]

diff --git a/base/array.jl b/base/array.jl
@@ -2090,13 +2090,13 @@ function findmax(a)
  if isempty(a)
  throw(ArgumentError("collection must be non-empty"))
  end
- s = start(a)
- mi = i = 1
- m, s = next(a, s)
- while !done(a, s)
+ p = pairs(a)
+ s = start(p)
+ (mi, m), s = next(p, s)
+ i = mi
+ while !done(p, s)
  m != m && break
- ai, s = next(a, s)
- i += 1
+ (i, ai), s = next(p, s)
  if ai != ai || isless(m, ai)
  m = ai
  mi = i
@@ -2131,13 +2131,13 @@ function findmin(a)
  if isempty(a)
  throw(ArgumentError("collection must be non-empty"))
  end
- s = start(a)
- mi = i = 1
- m, s = next(a, s)
- while !done(a, s)
+ p = pairs(a)
+ s = start(p)
+ (mi, m), s = next(p, s)
+ i = mi
+ while !done(p, s)
  m != m && break
- ai, s = next(a, s)
- i += 1
+ (i, ai), s = next(p, s)
  if ai != ai || isless(ai, m)
  m = ai
  mi = i

diff --git a/base/associative.jl b/base/associative.jl
@@ -69,11 +69,18 @@ end
 
 in(k, v::KeyIterator) = get(v.dict, k, secret_table_token) !== secret_table_token
 
+"""
+ keys(iterator)
+
+For an iterator or collection that has keys and values (e.g. arrays and dictionaries),
+return an iterator over the keys.
+"""
+function keys end
 
 """
  keys(a::Associative)
 
-Return an iterator over all keys in a collection.
+Return an iterator over all keys in an associative collection.
 `collect(keys(a))` returns an array of keys.
 Since the keys are stored internally in a hash table,
 the order in which they are returned may vary.
@@ -94,7 +101,6 @@ julia> collect(keys(a))
 ```
 """
 keys(a::Associative) = KeyIterator(a)
-eachindex(a::Associative) = KeyIterator(a)
 
 """
  values(a::Associative)
@@ -121,6 +127,15 @@ julia> collect(values(a))
 """
 values(a::Associative) = ValueIterator(a)
 
+"""
+ pairs(collection)
+
+Return an iterator over `key => value` pairs for any
+collection that maps a set of keys to a set of values.
+This includes arrays, where the keys are the array indices.
+"""
+pairs(collection) = Generator(=>, keys(collection), values(collection))
+
 function copy(a::Associative)
  b = similar(a)
  for (k,v) in a

diff --git a/base/deprecated.jl b/base/deprecated.jl
@@ -1733,6 +1733,11 @@ end
 
 @deprecate promote_noncircular promote false
 
+import .Iterators.enumerate
+
+@deprecate enumerate(i::IndexLinear, A::AbstractArray) pairs(i, A)
+@deprecate enumerate(i::IndexCartesian, A::AbstractArray) pairs(i, A)
+
 # END 0.7 deprecations
 
 # BEGIN 1.0 deprecations

diff --git a/base/essentials.jl b/base/essentials.jl
@@ -676,3 +676,13 @@ false
 ```
 """
 isempty(itr) = done(itr, start(itr))
+
+"""
+ values(iterator)
+
+For an iterator or collection that has keys and values, return an iterator
+over the values.
+This function simply returns its argument by default, since the elements
+of a general iterator are normally considered its "values".
+"""
+values(itr) = itr
diff --git a/base/exports.jl b/base/exports.jl
@@ -688,6 +688,7 @@ export
  mapreducedim,
  merge!,
  merge,
+ pairs,
  #pop!,
  #push!,
  reduce,

diff --git a/base/iterators.jl b/base/iterators.jl
@@ -2,7 +2,7 @@
 
 module Iterators
 
-import Base: start, done, next, isempty, length, size, eltype, iteratorsize, iteratoreltype, indices, ndims
+import Base: start, done, next, isempty, length, size, eltype, iteratorsize, iteratoreltype, indices, ndims, pairs
 
 using Base: tail, tuple_type_head, tuple_type_tail, tuple_type_cons, SizeUnknown, HasLength, HasShape,
  IsInfinite, EltypeUnknown, HasEltype, OneTo, @propagate_inbounds
@@ -78,14 +78,16 @@ struct IndexValue{I,A<:AbstractArray}
 end
 
 """
- enumerate(IndexLinear(), A)
- enumerate(IndexCartesian(), A)
- enumerate(IndexStyle(A), A)
+ pairs(IndexLinear(), A)
+ pairs(IndexCartesian(), A)
+ pairs(IndexStyle(A), A)
 
 An iterator that accesses each element of the array `A`, returning
-`(i, x)`, where `i` is the index for the element and `x = A[i]`. This
-is similar to `enumerate(A)`, except `i` will always be a valid index
-for `A`.
+`i => x`, where `i` is the index for the element and `x = A[i]`.
+Identical to `pairs(A)`, except that the style of index can be selected.
+Also similar to `enumerate(A)`, except `i` will be a valid index
+for `A`, while `enumerate` always counts from 1 regardless of the indices
+of `A`.
 
 Specifying `IndexLinear()` ensures that `i` will be an integer;
 specifying `IndexCartesian()` ensures that `i` will be a
@@ -96,7 +98,7 @@ been defined as the native indexing style for array `A`.
 ```jldoctest
 julia> A = ["a" "d"; "b" "e"; "c" "f"];
 
-julia> for (index, value) in enumerate(IndexStyle(A), A)
+julia> for (index, value) in pairs(IndexStyle(A), A)
  println("\$index \$value")
  end
 1 a
@@ -108,7 +110,7 @@ julia> for (index, value) in enumerate(IndexStyle(A), A)
 
 julia> S = view(A, 1:2, :);
 
-julia> for (index, value) in enumerate(IndexStyle(S), S)
+julia> for (index, value) in pairs(IndexStyle(S), S)
  println("\$index \$value")
  end
 CartesianIndex{2}((1, 1)) a
@@ -117,15 +119,14 @@ CartesianIndex{2}((1, 2)) d
 CartesianIndex{2}((2, 2)) e
 ```
 
-Note that `enumerate(A)` returns `i` as a *counter* (always starting
-at 1), whereas `enumerate(IndexLinear(), A)` returns `i` as an *index*
-(starting at the first linear index of `A`, which may or may not be
-1).
-
 See also: [`IndexStyle`](@ref), [`indices`](@ref).
 """
-enumerate(::IndexLinear, A::AbstractArray) = IndexValue(A, linearindices(A))
-enumerate(::IndexCartesian, A::AbstractArray) = IndexValue(A, CartesianRange(indices(A)))
+pairs(::IndexLinear, A::AbstractArray) = IndexValue(A, linearindices(A))
+pairs(::IndexCartesian, A::AbstractArray) = IndexValue(A, CartesianRange(indices(A)))
+
+# faster than zip(keys(a), values(a)) for arrays
+pairs(A::AbstractArray) = pairs(IndexCartesian(), A)
+pairs(A::AbstractVector) = pairs(IndexLinear(), A)
 
 length(v::IndexValue) = length(v.itr)
 indices(v::IndexValue) = indices(v.itr)
@@ -134,11 +135,11 @@ size(v::IndexValue) = size(v.itr)
 @propagate_inbounds function next(v::IndexValue, state)
  indx, n = next(v.itr, state)
  item = v.data[indx]
- (indx, item), n
+ (indx => item), n
 end
 @inline done(v::IndexValue, state) = done(v.itr, state)
 
-eltype(::Type{IndexValue{I,A}}) where {I,A} = Tuple{eltype(I), eltype(A)}
+eltype(::Type{IndexValue{I,A}}) where {I,A} = Pair{eltype(I), eltype(A)}
 
 iteratorsize(::Type{IndexValue{I}}) where {I} = iteratorsize(I)
 iteratoreltype(::Type{IndexValue{I}}) where {I} = iteratoreltype(I)

diff --git a/base/multidimensional.jl b/base/multidimensional.jl
@@ -4,7 +4,7 @@
 module IteratorsMD
  import Base: eltype, length, size, start, done, next, first, last, in, getindex,
  setindex!, IndexStyle, min, max, zero, one, isless, eachindex,
- ndims, iteratorsize, convert
+ ndims, iteratorsize, convert, show
 
  import Base: +, -, *
  import Base: simd_outer_range, simd_inner_length, simd_index
@@ -80,6 +80,7 @@ module IteratorsMD
  @inline _flatten(i, I...) = (i, _flatten(I...)...)
  @inline _flatten(i::CartesianIndex, I...) = (i.I..., _flatten(I...)...)
  CartesianIndex(index::Tuple{Vararg{Union{Integer, CartesianIndex}}}) = CartesianIndex(index...)
+ show(io::IO, i::CartesianIndex) = (print(io, "CartesianIndex"); show(io, i.I))
 
  # length
  length(::CartesianIndex{N}) where {N} = N

diff --git a/base/number.jl b/base/number.jl
@@ -49,6 +49,7 @@ ndims(::Type{<:Number}) = 0
 length(x::Number) = 1
 endof(x::Number) = 1
 iteratorsize(::Type{<:Number}) = HasShape()
+keys(::Number) = OneTo(1)
 
 getindex(x::Number) = x
 function getindex(x::Number, i::Integer)

diff --git a/base/process.jl b/base/process.jl
@@ -822,7 +822,7 @@ wait(x::ProcessChain) = for p in x.processes; wait(p); end
 show(io::IO, p::Process) = print(io, "Process(", p.cmd, ", ", process_status(p), ")")
 
 # allow the elements of the Cmd to be accessed as an array or iterator
-for f in (:length, :endof, :start, :eachindex, :eltype, :first, :last)
+for f in (:length, :endof, :start, :keys, :eltype, :first, :last)
  @eval $f(cmd::Cmd) = $f(cmd.exec)
 end
 for f in (:next, :done, :getindex)

diff --git a/base/reducedim.jl b/base/reducedim.jl
@@ -635,20 +635,22 @@ function findminmax!(f, Rval, Rind, A::AbstractArray{T,N}) where {T,N}
  # Otherwise, keep the result in Rval/Rind so that we traverse A in storage order.
  indsAt, indsRt = safe_tail(indices(A)), safe_tail(indices(Rval))
  keep, Idefault = Broadcast.shapeindexer(indsAt, indsRt)
- k = 0
+ ks = keys(A)
+ k, kss = next(ks, start(ks))
+ zi = zero(eltype(ks))
  if reducedim1(Rval, A)
  i1 = first(indices1(Rval))
  @inbounds for IA in CartesianRange(indsAt)
  IR = Broadcast.newindex(IA, keep, Idefault)
  tmpRv = Rval[i1,IR]
  tmpRi = Rind[i1,IR]
  for i in indices(A,1)
- k += 1
  tmpAv = A[i,IA]
- if tmpRi == 0 || (tmpRv == tmpRv && (tmpAv != tmpAv || f(tmpAv, tmpRv)))
+ if tmpRi == zi || (tmpRv == tmpRv && (tmpAv != tmpAv || f(tmpAv, tmpRv)))
  tmpRv = tmpAv
  tmpRi = k
  end
+ k, kss = next(ks, kss)
  end
  Rval[i1,IR] = tmpRv
  Rind[i1,IR] = tmpRi
@@ -657,14 +659,14 @@ function findminmax!(f, Rval, Rind, A::AbstractArray{T,N}) where {T,N}
  @inbounds for IA in CartesianRange(indsAt)
  IR = Broadcast.newindex(IA, keep, Idefault)
  for i in indices(A, 1)
- k += 1
  tmpAv = A[i,IA]
  tmpRv = Rval[i,IR]
  tmpRi = Rind[i,IR]
- if tmpRi == 0 || (tmpRv == tmpRv && (tmpAv != tmpAv || f(tmpAv, tmpRv)))
+ if tmpRi == zi || (tmpRv == tmpRv && (tmpAv != tmpAv || f(tmpAv, tmpRv)))
  Rval[i,IR] = tmpAv
  Rind[i,IR] = k
  end
+ k, kss = next(ks, kss)
  end
  end
  end
@@ -680,7 +682,7 @@ dimensions of `rval` and `rind`, and store the results in `rval` and `rind`.
 """
 function findmin!(rval::AbstractArray, rind::AbstractArray, A::AbstractArray;
  init::Bool=true)
- findminmax!(isless, init && !isempty(A) ? fill!(rval, first(A)) : rval, fill!(rind,0), A)
+ findminmax!(isless, init && !isempty(A) ? fill!(rval, first(A)) : rval, fill!(rind,zero(eltype(keys(A)))), A)
 end
 
 """
@@ -709,10 +711,10 @@ function findmin(A::AbstractArray{T}, region) where T
  if prod(map(length, reduced_indices(A, region))) != 0
  throw(ArgumentError("collection slices must be non-empty"))
  end
- (similar(A, ri), similar(dims->zeros(Int, dims), ri))
+ (similar(A, ri), similar(dims->zeros(eltype(keys(A)), dims), ri))
  else
  findminmax!(isless, fill!(similar(A, ri), first(A)),
- similar(dims->zeros(Int, dims), ri), A)
+ similar(dims->zeros(eltype(keys(A)), dims), ri), A)
  end
 end
 
@@ -727,7 +729,7 @@ dimensions of `rval` and `rind`, and store the results in `rval` and `rind`.
 """
 function findmax!(rval::AbstractArray, rind::AbstractArray, A::AbstractArray;
  init::Bool=true)
- findminmax!(isgreater, init && !isempty(A) ? fill!(rval, first(A)) : rval, fill!(rind,0), A)
+ findminmax!(isgreater, init && !isempty(A) ? fill!(rval, first(A)) : rval, fill!(rind,zero(eltype(keys(A)))), A)
 end
 
 """
@@ -756,10 +758,10 @@ function findmax(A::AbstractArray{T}, region) where T
  if prod(map(length, reduced_indices(A, region))) != 0
  throw(ArgumentError("collection slices must be non-empty"))
  end
- similar(A, ri), similar(dims->zeros(Int, dims), ri)
+ similar(A, ri), similar(dims->zeros(eltype(keys(A)), dims), ri)
  else
  findminmax!(isgreater, fill!(similar(A, ri), first(A)),
- similar(dims->zeros(Int, dims), ri), A)
+ similar(dims->zeros(eltype(keys(A)), dims), ri), A)
  end
 end