Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add IdSet and fix IdDict performance problem #26054

Merged
merged 3 commits into from
Feb 24, 2018
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Add IdSet
which is as Set is to Dict. To be used in the new optimizer.
Currently unexported.
  • Loading branch information
Keno authored and Keno Fischer committed Feb 24, 2018
commit 84da967df4aa38b6b29de11dcf861ddf64139463
49 changes: 49 additions & 0 deletions base/abstractdict.jl
Original file line number Diff line number Diff line change
Expand Up @@ -569,6 +569,21 @@ IdDict(ps::Pair{K}...) where {K} = IdDict{K,Any}(ps)
IdDict(ps::(Pair{K,V} where K)...) where {V} = IdDict{Any,V}(ps)
IdDict(ps::Pair...) = IdDict{Any,Any}(ps)

TP{K,V} = Union{Type{Tuple{K,V}},Type{Pair{K,V}}}

dict_with_eltype(DT_apply, kv, ::TP{K,V}) where {K,V} = DT_apply(K, V)(kv)
dict_with_eltype(DT_apply, kv::Generator, ::TP{K,V}) where {K,V} = DT_apply(K, V)(kv)
dict_with_eltype(DT_apply, ::Type{Pair{K,V}}) where {K,V} = DT_apply(K, V)()
dict_with_eltype(DT_apply, ::Type) = DT_apply(Any, Any)()
dict_with_eltype(DT_apply::F, kv, t) where {F} = grow_to!(dict_with_eltype(DT_apply, @default_eltype(typeof(kv))), kv)
function dict_with_eltype(DT_apply::F, kv::Generator, t) where F
T = @default_eltype(kv)
if T <: Union{Pair, Tuple{Any, Any}} && isconcretetype(T)
return dict_with_eltype(DT_apply, kv, T)
end
return grow_to!(dict_with_eltype(DT_apply, T), kv)
end

function IdDict(kv)
try
dict_with_eltype((K, V) -> IdDict{K, V}, kv, eltype(kv))
Expand Down Expand Up @@ -668,3 +683,37 @@ get!(d::IdDict{K,V}, @nospecialize(key), @nospecialize(default)) where {K, V} =
# For some AbstractDict types, it is safe to implement filter!
# by deleting keys during iteration.
filter!(f, d::IdDict) = filter_in_one_pass!(f, d)

# Like Set, but using IdDict
mutable struct IdSet{T} <: AbstractSet{T}
dict::IdDict{T,Nothing}

IdSet{T}() where {T} = new(IdDict{T,Nothing}())
IdSet{T}(s::IdSet{T}) where {T} = new(IdDict{T,Nothing}(s.dict))
end

IdSet{T}(itr) where {T} = union!(IdSet{T}(), itr)
IdSet() = IdSet{Any}()

copy(s::IdSet{T}) where {T} = IdSet{T}(s)
copymutable(s::IdSet{T}) where {T} = IdSet{T}(s)

isempty(s::IdSet) = isempty(s.dict)
length(s::IdSet) = length(s.dict)
in(x, s::IdSet) = haskey(s.dict, x)
push!(s::IdSet, x) = (s.dict[x] = nothing; s)
pop!(s::IdSet, x) = (pop!(s.dict, x); x)
pop!(s::IdSet, x, deflt) = x in s ? pop!(s, x) : deflt
delete!(s::IdSet, x) = (delete!(s.dict, x); s)

sizehint!(s::IdSet, newsz) = (sizehint!(s.dict, newsz); s)
empty!(s::IdSet) = (empty!(s.dict); s)

filter!(f, d::IdSet) = unsafe_filter!(f, d)

start(s::IdSet) = start(s.dict)
done(s::IdSet, state) = done(s.dict, state)
function next(s::IdSet, state)
((k, _), i) = next(s.dict, state)
return (k, i)
end
266 changes: 266 additions & 0 deletions base/abstractset.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,266 @@
eltype(::Type{<:AbstractSet{T}}) where {T} = @isdefined(T) ? T : Any
sizehint!(s::AbstractSet, n) = nothing

"""
union(s, itrs...)
∪(s, itrs...)

Construct the union of sets. Maintain order with arrays.

# Examples
```jldoctest
julia> union([1, 2], [3, 4])
4-element Array{Int64,1}:
1
2
3
4

julia> union([1, 2], [2, 4])
3-element Array{Int64,1}:
1
2
4

julia> union([4, 2], 1:2)
3-element Array{Int64,1}:
4
2
1

julia> union(Set([1, 2]), 2:3)
Set([2, 3, 1])
```
"""
function union end

_in(itr) = x -> x in itr

union(s, sets...) = union!(emptymutable(s, promote_eltype(s, sets...)), s, sets...)
union(s::AbstractSet) = copy(s)

const ∪ = union

"""
union!(s::Union{AbstractSet,AbstractVector}, itrs...)

Construct the union of passed in sets and overwrite `s` with the result.
Maintain order with arrays.

# Examples
```jldoctest
julia> a = Set([1, 3, 4, 5]);

julia> union!(a, 1:2:8);

julia> a
Set([7, 4, 3, 5, 1])
```
"""
union!(s::AbstractSet, sets...) = foldl(union!, s, sets)

max_values(::Type) = typemax(Int)
max_values(T::Type{<:Union{Nothing,BitIntegerSmall}}) = 1 << (8*sizeof(T))
max_values(T::Union) = max(max_values(T.a), max_values(T.b))
max_values(::Type{Bool}) = 2

function union!(s::AbstractSet{T}, itr) where T
haslength(itr) && sizehint!(s, length(s) + length(itr))
for x=itr
push!(s, x)
length(s) == max_values(T) && break
end
s
end

"""
intersect(s, itrs...)
∩(s, itrs...)

Construct the intersection of sets.
Maintain order with arrays.

# Examples
```jldoctest
julia> intersect([1, 2, 3], [3, 4, 5])
1-element Array{Int64,1}:
3

julia> intersect([1, 4, 4, 5, 6], [4, 6, 6, 7, 8])
2-element Array{Int64,1}:
4
6

julia> intersect(Set([1, 2]), BitSet([2, 3]))
Set([2])
```
"""
intersect(s::AbstractSet, itr, itrs...) = intersect!(intersect(s, itr), itrs...)
intersect(s) = union(s)
intersect(s::AbstractSet, itr) = mapfilter(_in(s), push!, itr, emptymutable(s))

const ∩ = intersect

"""
intersect!(s::Union{AbstractSet,AbstractVector}, itrs...)

Intersect all passed in sets and overwrite `s` with the result.
Maintain order with arrays.
"""
intersect!(s::AbstractSet, itrs...) = foldl(intersect!, s, itrs)
intersect!(s::AbstractSet, s2::AbstractSet) = filter!(_in(s2), s)
intersect!(s::AbstractSet, itr) = intersect!(s, union!(emptymutable(s), itr))

"""
setdiff(s, itrs...)

Construct the set of elements in `s` but not in any of the iterables in `itrs`.
Maintain order with arrays.

# Examples
```jldoctest
julia> setdiff([1,2,3], [3,4,5])
2-element Array{Int64,1}:
1
2
```
"""
setdiff(s::AbstractSet, itrs...) = setdiff!(copymutable(s), itrs...)
setdiff(s) = union(s)

"""
setdiff!(s, itrs...)

Remove from set `s` (in-place) each element of each iterable from `itrs`.
Maintain order with arrays.

# Examples
```jldoctest
julia> a = Set([1, 3, 4, 5]);

julia> setdiff!(a, 1:2:6);

julia> a
Set([4])
```
"""
setdiff!(s::AbstractSet, itrs...) = foldl(setdiff!, s, itrs)
setdiff!(s::AbstractSet, itr) = foldl(delete!, s, itr)


"""
symdiff(s, itrs...)

Construct the symmetric difference of elements in the passed in sets.
When `s` is not an `AbstractSet`, the order is maintained.
Note that in this case the multiplicity of elements matters.

# Examples
```jldoctest
julia> symdiff([1,2,3], [3,4,5], [4,5,6])
3-element Array{Int64,1}:
1
2
6

julia> symdiff([1,2,1], [2, 1, 2])
2-element Array{Int64,1}:
1
2

julia> symdiff(unique([1,2,1]), unique([2, 1, 2]))
0-element Array{Int64,1}
```
"""
symdiff(s, sets...) = symdiff!(emptymutable(s, promote_eltype(s, sets...)), s, sets...)
symdiff(s) = symdiff!(copy(s))

"""
symdiff!(s::Union{AbstractSet,AbstractVector}, itrs...)

Construct the symmetric difference of the passed in sets, and overwrite `s` with the result.
When `s` is an array, the order is maintained.
Note that in this case the multiplicity of elements matters.
"""
symdiff!(s::AbstractSet, itrs...) = foldl(symdiff!, s, itrs)

function symdiff!(s::AbstractSet, itr)
for x in itr
x in s ? delete!(s, x) : push!(s, x)
end
s
end

==(l::AbstractSet, r::AbstractSet) = length(l) == length(r) && l ⊆ r
# convenience functions for AbstractSet
# (if needed, only their synonyms ⊊ and ⊆ must be specialized)
<( l::AbstractSet, r::AbstractSet) = l ⊊ r
<=(l::AbstractSet, r::AbstractSet) = l ⊆ r

"""
issubset(a, b)
⊆(a,b) -> Bool
⊈(a,b) -> Bool
⊊(a,b) -> Bool

Determine whether every element of `a` is also in `b`, using [`in`](@ref).

# Examples
```jldoctest
julia> issubset([1, 2], [1, 2, 3])
true

julia> issubset([1, 2, 3], [1, 2])
false
```
"""
function issubset(l, r)
for elt in l
if !in(elt, r)
return false
end
end
return true
end
# use the implementation below when it becoms as efficient
# issubset(l, r) = all(_in(r), l)

const ⊆ = issubset

"""
issetequal(a, b)

Determine whether `a` and `b` have the same elements. Equivalent
to `a ⊆ b && b ⊆ a`.

# Examples
```jldoctest
julia> issetequal([1, 2], [1, 2, 3])
false

julia> issetequal([1, 2], [2, 1])
true
```
"""
issetequal(l, r) = length(l) == length(r) && l ⊆ r
issetequal(l::AbstractSet, r::AbstractSet) = l == r

⊊(l, r) = length(l) < length(r) && l ⊆ r
⊈(l, r) = !⊆(l, r)

⊇(l, r) = r ⊆ l
⊉(l, r) = r ⊈ l
⊋(l, r) = r ⊊ l

filter(pred, s::AbstractSet) = mapfilter(pred, push!, s, emptymutable(s))

# it must be safe to delete the current element while iterating over s:
unsafe_filter!(pred, s::AbstractSet) = mapfilter(!pred, delete!, s, s)

# TODO: delete mapfilter in favor of comprehensions/foldl/filter when competitive
function mapfilter(pred, f, itr, res)
for x in itr
pred(x) && f(res, x)
end
res
end
20 changes: 0 additions & 20 deletions base/dict.jl
Original file line number Diff line number Diff line change
Expand Up @@ -143,21 +143,6 @@ function Dict(kv)
end
end

TP{K,V} = Union{Type{Tuple{K,V}},Type{Pair{K,V}}}

dict_with_eltype(DT_apply, kv, ::TP{K,V}) where {K,V} = DT_apply(K, V)(kv)
dict_with_eltype(DT_apply, kv::Generator, ::TP{K,V}) where {K,V} = DT_apply(K, V)(kv)
dict_with_eltype(DT_apply, ::Type{Pair{K,V}}) where {K,V} = DT_apply(K, V)()
dict_with_eltype(DT_apply, ::Type) = DT_apply(Any, Any)()
dict_with_eltype(DT_apply::F, kv, t) where {F} = grow_to!(dict_with_eltype(DT_apply, @default_eltype(typeof(kv))), kv)
function dict_with_eltype(DT_apply::F, kv::Generator, t) where F
T = @default_eltype(kv)
if T <: Union{Pair, Tuple{Any, Any}} && isconcretetype(T)
return dict_with_eltype(DT_apply, kv, T)
end
return grow_to!(dict_with_eltype(DT_apply, T), kv)
end

# this is a special case due to (1) allowing both Pairs and Tuples as elements,
# and (2) Pair being invariant. a bit annoying.
function grow_to!(dest::AbstractDict, itr)
Expand Down Expand Up @@ -245,11 +230,6 @@ function rehash!(h::Dict{K,V}, newsz = length(h.keys)) where V where K
return h
end

max_values(::Type) = typemax(Int)
max_values(T::Type{<:Union{Nothing,BitIntegerSmall}}) = 1 << (8*sizeof(T))
max_values(T::Union) = max(max_values(T.a), max_values(T.b))
max_values(::Type{Bool}) = 2

function sizehint!(d::Dict{T}, newsz) where T
oldsz = length(d.slots)
if newsz <= oldsz
Expand Down
Loading