From e9a17a6c3dc202153bbce7e56e0bd86d8d7eb941 Mon Sep 17 00:00:00 2001
From: Keno Fischer <keno@alumni.harvard.edu>
Date: Wed, 14 Feb 2018 14:41:47 -0500
Subject: [PATCH 1/3] Add a `count` field to IdDict

To provide an efficient `length` method.

Fixes #26043
---
 Makefile                  |  1 +
 base/abstractdict.jl      | 27 ++++++++--------
 base/compiler/compiler.jl |  3 ++
 base/refpointer.jl        | 67 ++++++++++++++++++++-------------------
 src/gf.c                  |  2 +-
 src/julia.h               |  2 +-
 src/table.c               | 24 ++++++++++----
 7 files changed, 71 insertions(+), 55 deletions(-)

diff --git a/Makefile b/Makefile
index 33b8717d1b1a4..edabdf8ccf699 100644
--- a/Makefile
+++ b/Makefile
@@ -200,6 +200,7 @@ CORE_SRCS := $(addprefix $(JULIAHOME)/, \
 		base/reduce.jl \
 		base/reflection.jl \
 		base/traits.jl \
+		base/refpointer.jl \
 		base/tuple.jl)
 COMPILER_SRCS = $(sort $(shell find $(JULIAHOME)/base/compiler -name \*.jl))
 BASE_SRCS := $(sort $(shell find $(JULIAHOME)/base -name \*.jl) $(shell find $(BUILDROOT)/base -name \*.jl))
diff --git a/base/abstractdict.jl b/base/abstractdict.jl
index 8e2b8a4269367..5a7d9d7f82e6c 100644
--- a/base/abstractdict.jl
+++ b/base/abstractdict.jl
@@ -541,8 +541,9 @@ See [`Dict`](@ref) for further help.
 """
 mutable struct IdDict{K,V} <: AbstractDict{K,V}
     ht::Vector{Any}
+    count::Int
     ndel::Int
-    IdDict{K,V}() where {K, V} = new{K,V}(Vector{Any}(uninitialized, 32), 0)
+    IdDict{K,V}() where {K, V} = new{K,V}(Vector{Any}(uninitialized, 32), 0, 0)
 
     function IdDict{K,V}(itr) where {K, V}
         d = IdDict{K,V}()
@@ -557,7 +558,7 @@ mutable struct IdDict{K,V} <: AbstractDict{K,V}
         d
     end
 
-    IdDict{K,V}(d::IdDict{K,V}) where {K, V} = new{K,V}(copy(d.ht))
+    IdDict{K,V}(d::IdDict{K,V}) where {K, V} = new{K,V}(copy(d.ht), d.count, d.ndel)
 end
 
 IdDict() = IdDict{Any,Any}()
@@ -605,7 +606,9 @@ function setindex!(d::IdDict{K,V}, @nospecialize(val), @nospecialize(key)) where
         rehash!(d, max(length(d.ht)>>1, 32))
         d.ndel = 0
     end
-    d.ht = ccall(:jl_eqtable_put, Array{Any,1}, (Any, Any, Any), d.ht, key, val)
+    inserted = RefValue{Cint}(0)
+    d.ht = ccall(:jl_eqtable_put, Array{Any,1}, (Any, Any, Any, Ptr{Cint}), d.ht, key, val, inserted)
+    d.count += inserted[]
     return d
 end
 
@@ -620,12 +623,13 @@ function getindex(d::IdDict{K,V}, @nospecialize(key)) where {K, V}
 end
 
 function pop!(d::IdDict{K,V}, @nospecialize(key), @nospecialize(default)) where {K, V}
-    val = ccall(:jl_eqtable_pop, Any, (Any, Any, Any), d.ht, key, default)
-    # TODO: this can underestimate `ndel`
-    if val === default
+    found = RefValue{Cint}(0)
+    val = ccall(:jl_eqtable_pop, Any, (Any, Any, Any, Ptr{Cint}), d.ht, key, default, found)
+    if found[] === Cint(0)
         return default
     else
-        (d.ndel += 1)
+        d.count -= 1
+        d.ndel += 1
         return val::V
     end
 end
@@ -645,6 +649,7 @@ function empty!(d::IdDict)
     resize!(d.ht, 32)
     ccall(:memset, Ptr{Cvoid}, (Ptr{Cvoid}, Cint, Csize_t), d.ht, 0, sizeof(d.ht))
     d.ndel = 0
+    d.count = 0
     return d
 end
 
@@ -654,13 +659,7 @@ start(d::IdDict) = _oidd_nextind(d.ht, 0)
 done(d::IdDict, i) = (i == -1)
 next(d::IdDict{K,V}, i) where {K, V} = (Pair{K,V}(d.ht[i+1], d.ht[i+2]), _oidd_nextind(d.ht, i+2))
 
-function length(d::IdDict)
-    n = 0
-    for pair in d
-        n+=1
-    end
-    n
-end
+length(d::IdDict) = d.count
 
 copy(d::IdDict) = IdDict(d)
 
diff --git a/base/compiler/compiler.jl b/base/compiler/compiler.jl
index c4d09ca941e52..f22a026da2f70 100644
--- a/base/compiler/compiler.jl
+++ b/base/compiler/compiler.jl
@@ -45,6 +45,9 @@ include("number.jl")
 include("int.jl")
 include("operators.jl")
 include("pointer.jl")
+include("refpointer.jl")
+
+# checked arithmetic
 const checked_add = +
 const checked_sub = -
 
diff --git a/base/refpointer.jl b/base/refpointer.jl
index 68200db4061e4..a22371bbe2e9a 100644
--- a/base/refpointer.jl
+++ b/base/refpointer.jl
@@ -98,38 +98,6 @@ function unsafe_convert(P::Type{Ptr{Any}}, b::RefArray{Any})
 end
 unsafe_convert(::Type{Ptr{Cvoid}}, b::RefArray{T}) where {T} = convert(Ptr{Cvoid}, unsafe_convert(Ptr{T}, b))
 
-###
-if is_primary_base_module
-    Ref(x::Any) = RefValue(x)
-    Ref{T}() where {T} = RefValue{T}() # Ref{T}()
-    Ref{T}(x) where {T} = RefValue{T}(x) # Ref{T}(x)
-
-    Ref(x::Ref, i::Integer) = (i != 1 && error("Ref only has one element"); x)
-    Ref(x::Ptr{T}, i::Integer) where {T} = x + (i - 1) * Core.sizeof(T)
-
-    # convert Arrays to pointer arrays for ccall
-    function Ref{P}(a::Array{<:Union{Ptr,Cwstring,Cstring}}) where P<:Union{Ptr,Cwstring,Cstring}
-        return RefArray(a) # effectively a no-op
-    end
-    function Ref{P}(a::Array{T}) where P<:Union{Ptr,Cwstring,Cstring} where T
-        if (!isbits(T) && T <: eltype(P))
-            # this Array already has the right memory layout for the requested Ref
-            return RefArray(a,1,false) # root something, so that this function is type-stable
-        else
-            ptrs = Vector{P}(uninitialized, length(a)+1)
-            roots = Vector{Any}(uninitialized, length(a))
-            for i = 1:length(a)
-                root = cconvert(P, a[i])
-                ptrs[i] = unsafe_convert(P, root)::P
-                roots[i] = root
-            end
-            ptrs[length(a)+1] = C_NULL
-            return RefArray(ptrs,1,roots)
-        end
-    end
-    Ref(x::AbstractArray, i::Integer) = RefArray(x, i)
-end
-
 cconvert(::Type{Ptr{P}}, a::Array{<:Ptr}) where {P<:Ptr} = a
 cconvert(::Type{Ref{P}}, a::Array{<:Ptr}) where {P<:Ptr} = a
 cconvert(::Type{Ptr{P}}, a::Array) where {P<:Union{Ptr,Cwstring,Cstring}} = Ref{P}(a)
@@ -144,3 +112,38 @@ setindex!(b::RefValue, x) = (b.x = x; b)
 setindex!(b::RefArray, x) = (b.x[b.i] = x; b)
 
 ###
+
+# Base-only constructors on the shared abstract Ref type
+if nameof(@__MODULE__) === :Base && is_primary_base_module
+
+Ref(x::Any) = RefValue(x)
+Ref{T}() where {T} = RefValue{T}() # Ref{T}()
+Ref{T}(x) where {T} = RefValue{T}(x) # Ref{T}(x)
+convert(::Type{Ref{T}}, x) where {T} = RefValue{T}(x)
+
+Ref(x::Ref, i::Integer) = (i != 1 && error("Ref only has one element"); x)
+Ref(x::Ptr{T}, i::Integer) where {T} = x + (i - 1) * Core.sizeof(T)
+
+Ref(x::AbstractArray, i::Integer) = RefArray(x, i)
+
+# convert Arrays to pointer arrays for ccall
+function Ref{P}(a::Array{<:Union{Ptr,Cwstring,Cstring}}) where P<:Union{Ptr,Cwstring,Cstring}
+    return RefArray(a) # effectively a no-op
+end
+function Ref{P}(a::Array{T}) where P<:Union{Ptr,Cwstring,Cstring} where T
+    if (!isbits(T) && T <: eltype(P))
+        # this Array already has the right memory layout for the requested Ref
+        return RefArray(a,1,false) # root something, so that this function is type-stable
+    else
+        ptrs = Vector{P}(uninitialized, length(a)+1)
+        roots = Vector{Any}(uninitialized, length(a))
+        for i = 1:length(a)
+            root = cconvert(P, a[i])
+            ptrs[i] = unsafe_convert(P, root)::P
+            roots[i] = root
+        end
+    end
+    Ref(x::AbstractArray, i::Integer) = RefArray(x, i)
+end
+
+end
diff --git a/src/gf.c b/src/gf.c
index 62806ce569820..912cf1b863aca 100644
--- a/src/gf.c
+++ b/src/gf.c
@@ -443,7 +443,7 @@ static void foreach_mtable_in_module(
 {
     size_t i;
     void **table = m->bindings.table;
-    jl_eqtable_put(visited, m, jl_true);
+    jl_eqtable_put(visited, m, jl_true, NULL);
     for (i = 1; i < m->bindings.size; i += 2) {
         if (table[i] != HT_NOTFOUND) {
             jl_binding_t *b = (jl_binding_t*)table[i];
diff --git a/src/julia.h b/src/julia.h
index 688f303818589..e174ee6fb1751 100644
--- a/src/julia.h
+++ b/src/julia.h
@@ -1342,7 +1342,7 @@ STATIC_INLINE jl_function_t *jl_get_function(jl_module_t *m, const char *name)
 int jl_is_submodule(jl_module_t *child, jl_module_t *parent);
 
 // eq hash tables
-JL_DLLEXPORT jl_array_t *jl_eqtable_put(jl_array_t *h, void *key, void *val);
+JL_DLLEXPORT jl_array_t *jl_eqtable_put(jl_array_t *h, void *key, void *val, int *inserted);
 JL_DLLEXPORT jl_value_t *jl_eqtable_get(jl_array_t *h, void *key,
                                         jl_value_t *deflt);
 
diff --git a/src/table.c b/src/table.c
index fc136403ad93f..3cea45d6aea2c 100644
--- a/src/table.c
+++ b/src/table.c
@@ -8,7 +8,7 @@
 #define keyhash(k)     jl_object_id(k)
 #define h2index(hv,sz) (size_t)(((hv) & ((sz)-1))*2)
 
-static void **jl_table_lookup_bp(jl_array_t **pa, void *key);
+static void **jl_table_lookup_bp(jl_array_t **pa, void *key, int *inserted);
 
 JL_DLLEXPORT jl_array_t *jl_idtable_rehash(jl_array_t *a, size_t newsz)
 {
@@ -23,7 +23,7 @@ JL_DLLEXPORT jl_array_t *jl_idtable_rehash(jl_array_t *a, size_t newsz)
     JL_GC_PUSH1(&newa);
     for(i=0; i < sz; i+=2) {
         if (ol[i+1] != NULL) {
-            (*jl_table_lookup_bp(&newa, ol[i])) = ol[i+1];
+            (*jl_table_lookup_bp(&newa, ol[i], NULL)) = ol[i+1];
             jl_gc_wb(newa, ol[i+1]);
             // it is however necessary here because allocation
             // can (and will) occur in a recursive call inside table_lookup_bp
@@ -37,7 +37,7 @@ JL_DLLEXPORT jl_array_t *jl_idtable_rehash(jl_array_t *a, size_t newsz)
     return newa;
 }
 
-static void **jl_table_lookup_bp(jl_array_t **pa, void *key)
+static void **jl_table_lookup_bp(jl_array_t **pa, void *key, int *inserted)
 {
     // pa points to a **rooted** gc frame slot
     uint_t hv;
@@ -48,6 +48,9 @@ static void **jl_table_lookup_bp(jl_array_t **pa, void *key)
     size_t maxprobe = max_probe(sz);
     void **tab = (void**)a->data;
 
+    if (inserted)
+        *inserted = 0;
+
     hv = keyhash((jl_value_t*)key);
  retry_bp:
     iter = 0;
@@ -58,6 +61,8 @@ static void **jl_table_lookup_bp(jl_array_t **pa, void *key)
     do {
         if (tab[index+1] == NULL) {
             tab[index] = key;
+            if (inserted)
+                *inserted = 1;
             jl_gc_wb(a, key);
             return &tab[index+1];
         }
@@ -124,11 +129,11 @@ static void **jl_table_peek_bp(jl_array_t *a, void *key)
 }
 
 JL_DLLEXPORT
-jl_array_t *jl_eqtable_put(jl_array_t *h, void *key, void *val)
+jl_array_t *jl_eqtable_put(jl_array_t *h, void *key, void *val, int *inserted)
 {
     JL_GC_PUSH1(&h);
     // &h may be assigned to in jl_idtable_rehash so it need to be rooted
-    void **bp = jl_table_lookup_bp(&h, key);
+    void **bp = jl_table_lookup_bp(&h, key, inserted);
     *bp = val;
     jl_gc_wb(h, val);
     JL_GC_POP();
@@ -145,11 +150,16 @@ jl_value_t *jl_eqtable_get(jl_array_t *h, void *key, jl_value_t *deflt)
 }
 
 JL_DLLEXPORT
-jl_value_t *jl_eqtable_pop(jl_array_t *h, void *key, jl_value_t *deflt)
+jl_value_t *jl_eqtable_pop(jl_array_t *h, void *key, jl_value_t *deflt, int *found)
 {
     void **bp = jl_table_peek_bp(h, key);
-    if (bp == NULL || *bp == NULL)
+    if (bp == NULL || *bp == NULL) {
+        if (found)
+            *found = 0;
         return deflt;
+    }
+    if (found)
+        *found = 1;
     jl_value_t *val = (jl_value_t*)*bp;
     *(bp-1) = jl_nothing; // clear the key
     *bp = NULL;

From 84da967df4aa38b6b29de11dcf861ddf64139463 Mon Sep 17 00:00:00 2001
From: Keno Fischer <keno@alumni.harvard.edu>
Date: Wed, 14 Feb 2018 14:47:32 -0500
Subject: [PATCH 2/3] Add IdSet

which is as Set is to Dict. To be used in the new optimizer.
Currently unexported.
---
 base/abstractdict.jl |  49 ++++++++
 base/abstractset.jl  | 266 +++++++++++++++++++++++++++++++++++++++++++
 base/dict.jl         |  20 ----
 base/set.jl          | 264 ------------------------------------------
 base/sysimg.jl       |   1 +
 test/sets.jl         |   2 +-
 6 files changed, 317 insertions(+), 285 deletions(-)
 create mode 100644 base/abstractset.jl

diff --git a/base/abstractdict.jl b/base/abstractdict.jl
index 5a7d9d7f82e6c..3b538817809ff 100644
--- a/base/abstractdict.jl
+++ b/base/abstractdict.jl
@@ -569,6 +569,21 @@ IdDict(ps::Pair{K}...)             where {K}   = IdDict{K,Any}(ps)
 IdDict(ps::(Pair{K,V} where K)...) where {V}   = IdDict{Any,V}(ps)
 IdDict(ps::Pair...)                            = IdDict{Any,Any}(ps)
 
+TP{K,V} = Union{Type{Tuple{K,V}},Type{Pair{K,V}}}
+
+dict_with_eltype(DT_apply, kv, ::TP{K,V}) where {K,V} = DT_apply(K, V)(kv)
+dict_with_eltype(DT_apply, kv::Generator, ::TP{K,V}) where {K,V} = DT_apply(K, V)(kv)
+dict_with_eltype(DT_apply, ::Type{Pair{K,V}}) where {K,V} = DT_apply(K, V)()
+dict_with_eltype(DT_apply, ::Type) = DT_apply(Any, Any)()
+dict_with_eltype(DT_apply::F, kv, t) where {F} = grow_to!(dict_with_eltype(DT_apply, @default_eltype(typeof(kv))), kv)
+function dict_with_eltype(DT_apply::F, kv::Generator, t) where F
+    T = @default_eltype(kv)
+    if T <: Union{Pair, Tuple{Any, Any}} && isconcretetype(T)
+        return dict_with_eltype(DT_apply, kv, T)
+    end
+    return grow_to!(dict_with_eltype(DT_apply, T), kv)
+end
+
 function IdDict(kv)
     try
         dict_with_eltype((K, V) -> IdDict{K, V}, kv, eltype(kv))
@@ -668,3 +683,37 @@ get!(d::IdDict{K,V}, @nospecialize(key), @nospecialize(default)) where {K, V} =
 # For some AbstractDict types, it is safe to implement filter!
 # by deleting keys during iteration.
 filter!(f, d::IdDict) = filter_in_one_pass!(f, d)
+
+# Like Set, but using IdDict
+mutable struct IdSet{T} <: AbstractSet{T}
+    dict::IdDict{T,Nothing}
+
+    IdSet{T}() where {T} = new(IdDict{T,Nothing}())
+    IdSet{T}(s::IdSet{T}) where {T} = new(IdDict{T,Nothing}(s.dict))
+end
+
+IdSet{T}(itr) where {T} = union!(IdSet{T}(), itr)
+IdSet() = IdSet{Any}()
+
+copy(s::IdSet{T}) where {T} = IdSet{T}(s)
+copymutable(s::IdSet{T}) where {T} = IdSet{T}(s)
+
+isempty(s::IdSet) = isempty(s.dict)
+length(s::IdSet)  = length(s.dict)
+in(x, s::IdSet) = haskey(s.dict, x)
+push!(s::IdSet, x) = (s.dict[x] = nothing; s)
+pop!(s::IdSet, x) = (pop!(s.dict, x); x)
+pop!(s::IdSet, x, deflt) = x in s ? pop!(s, x) : deflt
+delete!(s::IdSet, x) = (delete!(s.dict, x); s)
+
+sizehint!(s::IdSet, newsz) = (sizehint!(s.dict, newsz); s)
+empty!(s::IdSet) = (empty!(s.dict); s)
+
+filter!(f, d::IdSet) = unsafe_filter!(f, d)
+
+start(s::IdSet)       = start(s.dict)
+done(s::IdSet, state) = done(s.dict, state)
+function next(s::IdSet, state)
+    ((k, _), i) = next(s.dict, state)
+    return (k, i)
+end
diff --git a/base/abstractset.jl b/base/abstractset.jl
new file mode 100644
index 0000000000000..8291adf517922
--- /dev/null
+++ b/base/abstractset.jl
@@ -0,0 +1,266 @@
+eltype(::Type{<:AbstractSet{T}}) where {T} = @isdefined(T) ? T : Any
+sizehint!(s::AbstractSet, n) = nothing
+
+"""
+    union(s, itrs...)
+    ∪(s, itrs...)
+
+Construct the union of sets. Maintain order with arrays.
+
+# Examples
+```jldoctest
+julia> union([1, 2], [3, 4])
+4-element Array{Int64,1}:
+ 1
+ 2
+ 3
+ 4
+
+julia> union([1, 2], [2, 4])
+3-element Array{Int64,1}:
+ 1
+ 2
+ 4
+
+julia> union([4, 2], 1:2)
+3-element Array{Int64,1}:
+ 4
+ 2
+ 1
+
+julia> union(Set([1, 2]), 2:3)
+Set([2, 3, 1])
+```
+"""
+function union end
+
+_in(itr) = x -> x in itr
+
+union(s, sets...) = union!(emptymutable(s, promote_eltype(s, sets...)), s, sets...)
+union(s::AbstractSet) = copy(s)
+
+const ∪ = union
+
+"""
+    union!(s::Union{AbstractSet,AbstractVector}, itrs...)
+
+Construct the union of passed in sets and overwrite `s` with the result.
+Maintain order with arrays.
+
+# Examples
+```jldoctest
+julia> a = Set([1, 3, 4, 5]);
+
+julia> union!(a, 1:2:8);
+
+julia> a
+Set([7, 4, 3, 5, 1])
+```
+"""
+union!(s::AbstractSet, sets...) = foldl(union!, s, sets)
+
+max_values(::Type) = typemax(Int)
+max_values(T::Type{<:Union{Nothing,BitIntegerSmall}}) = 1 << (8*sizeof(T))
+max_values(T::Union) = max(max_values(T.a), max_values(T.b))
+max_values(::Type{Bool}) = 2
+
+function union!(s::AbstractSet{T}, itr) where T
+    haslength(itr) && sizehint!(s, length(s) + length(itr))
+    for x=itr
+        push!(s, x)
+        length(s) == max_values(T) && break
+    end
+    s
+end
+
+"""
+    intersect(s, itrs...)
+    ∩(s, itrs...)
+
+Construct the intersection of sets.
+Maintain order with arrays.
+
+# Examples
+```jldoctest
+julia> intersect([1, 2, 3], [3, 4, 5])
+1-element Array{Int64,1}:
+ 3
+
+julia> intersect([1, 4, 4, 5, 6], [4, 6, 6, 7, 8])
+2-element Array{Int64,1}:
+ 4
+ 6
+
+julia> intersect(Set([1, 2]), BitSet([2, 3]))
+Set([2])
+```
+"""
+intersect(s::AbstractSet, itr, itrs...) = intersect!(intersect(s, itr), itrs...)
+intersect(s) = union(s)
+intersect(s::AbstractSet, itr) = mapfilter(_in(s), push!, itr, emptymutable(s))
+
+const ∩ = intersect
+
+"""
+    intersect!(s::Union{AbstractSet,AbstractVector}, itrs...)
+
+Intersect all passed in sets and overwrite `s` with the result.
+Maintain order with arrays.
+"""
+intersect!(s::AbstractSet, itrs...) = foldl(intersect!, s, itrs)
+intersect!(s::AbstractSet, s2::AbstractSet) = filter!(_in(s2), s)
+intersect!(s::AbstractSet, itr) = intersect!(s, union!(emptymutable(s), itr))
+
+"""
+    setdiff(s, itrs...)
+
+Construct the set of elements in `s` but not in any of the iterables in `itrs`.
+Maintain order with arrays.
+
+# Examples
+```jldoctest
+julia> setdiff([1,2,3], [3,4,5])
+2-element Array{Int64,1}:
+ 1
+ 2
+```
+"""
+setdiff(s::AbstractSet, itrs...) = setdiff!(copymutable(s), itrs...)
+setdiff(s) = union(s)
+
+"""
+    setdiff!(s, itrs...)
+
+Remove from set `s` (in-place) each element of each iterable from `itrs`.
+Maintain order with arrays.
+
+# Examples
+```jldoctest
+julia> a = Set([1, 3, 4, 5]);
+
+julia> setdiff!(a, 1:2:6);
+
+julia> a
+Set([4])
+```
+"""
+setdiff!(s::AbstractSet, itrs...) = foldl(setdiff!, s, itrs)
+setdiff!(s::AbstractSet, itr) = foldl(delete!, s, itr)
+
+
+"""
+    symdiff(s, itrs...)
+
+Construct the symmetric difference of elements in the passed in sets.
+When `s` is not an `AbstractSet`, the order is maintained.
+Note that in this case the multiplicity of elements matters.
+
+# Examples
+```jldoctest
+julia> symdiff([1,2,3], [3,4,5], [4,5,6])
+3-element Array{Int64,1}:
+ 1
+ 2
+ 6
+
+julia> symdiff([1,2,1], [2, 1, 2])
+2-element Array{Int64,1}:
+ 1
+ 2
+
+julia> symdiff(unique([1,2,1]), unique([2, 1, 2]))
+0-element Array{Int64,1}
+```
+"""
+symdiff(s, sets...) = symdiff!(emptymutable(s, promote_eltype(s, sets...)), s, sets...)
+symdiff(s) = symdiff!(copy(s))
+
+"""
+    symdiff!(s::Union{AbstractSet,AbstractVector}, itrs...)
+
+Construct the symmetric difference of the passed in sets, and overwrite `s` with the result.
+When `s` is an array, the order is maintained.
+Note that in this case the multiplicity of elements matters.
+"""
+symdiff!(s::AbstractSet, itrs...) = foldl(symdiff!, s, itrs)
+
+function symdiff!(s::AbstractSet, itr)
+    for x in itr
+        x in s ? delete!(s, x) : push!(s, x)
+    end
+    s
+end
+
+==(l::AbstractSet, r::AbstractSet) = length(l) == length(r) && l ⊆ r
+# convenience functions for AbstractSet
+# (if needed, only their synonyms ⊊ and ⊆ must be specialized)
+<( l::AbstractSet, r::AbstractSet) = l ⊊ r
+<=(l::AbstractSet, r::AbstractSet) = l ⊆ r
+
+"""
+    issubset(a, b)
+    ⊆(a,b) -> Bool
+    ⊈(a,b) -> Bool
+    ⊊(a,b) -> Bool
+
+Determine whether every element of `a` is also in `b`, using [`in`](@ref).
+
+# Examples
+```jldoctest
+julia> issubset([1, 2], [1, 2, 3])
+true
+
+julia> issubset([1, 2, 3], [1, 2])
+false
+```
+"""
+function issubset(l, r)
+    for elt in l
+        if !in(elt, r)
+            return false
+        end
+    end
+    return true
+end
+# use the implementation below when it becoms as efficient
+# issubset(l, r) = all(_in(r), l)
+
+const ⊆ = issubset
+
+"""
+    issetequal(a, b)
+
+Determine whether `a` and `b` have the same elements. Equivalent
+to `a ⊆ b && b ⊆ a`.
+
+# Examples
+```jldoctest
+julia> issetequal([1, 2], [1, 2, 3])
+false
+
+julia> issetequal([1, 2], [2, 1])
+true
+```
+"""
+issetequal(l, r) = length(l) == length(r) && l ⊆ r
+issetequal(l::AbstractSet, r::AbstractSet) = l == r
+
+⊊(l, r) = length(l) < length(r) && l ⊆ r
+⊈(l, r) = !⊆(l, r)
+
+⊇(l, r) = r ⊆ l
+⊉(l, r) = r ⊈ l
+⊋(l, r) = r ⊊ l
+
+filter(pred, s::AbstractSet) = mapfilter(pred, push!, s, emptymutable(s))
+
+# it must be safe to delete the current element while iterating over s:
+unsafe_filter!(pred, s::AbstractSet) = mapfilter(!pred, delete!, s, s)
+
+# TODO: delete mapfilter in favor of comprehensions/foldl/filter when competitive
+function mapfilter(pred, f, itr, res)
+    for x in itr
+        pred(x) && f(res, x)
+    end
+    res
+end
diff --git a/base/dict.jl b/base/dict.jl
index e1d74f59c0a6c..b967e957baaf4 100644
--- a/base/dict.jl
+++ b/base/dict.jl
@@ -143,21 +143,6 @@ function Dict(kv)
     end
 end
 
-TP{K,V} = Union{Type{Tuple{K,V}},Type{Pair{K,V}}}
-
-dict_with_eltype(DT_apply, kv, ::TP{K,V}) where {K,V} = DT_apply(K, V)(kv)
-dict_with_eltype(DT_apply, kv::Generator, ::TP{K,V}) where {K,V} = DT_apply(K, V)(kv)
-dict_with_eltype(DT_apply, ::Type{Pair{K,V}}) where {K,V} = DT_apply(K, V)()
-dict_with_eltype(DT_apply, ::Type) = DT_apply(Any, Any)()
-dict_with_eltype(DT_apply::F, kv, t) where {F} = grow_to!(dict_with_eltype(DT_apply, @default_eltype(typeof(kv))), kv)
-function dict_with_eltype(DT_apply::F, kv::Generator, t) where F
-    T = @default_eltype(kv)
-    if T <: Union{Pair, Tuple{Any, Any}} && isconcretetype(T)
-        return dict_with_eltype(DT_apply, kv, T)
-    end
-    return grow_to!(dict_with_eltype(DT_apply, T), kv)
-end
-
 # this is a special case due to (1) allowing both Pairs and Tuples as elements,
 # and (2) Pair being invariant. a bit annoying.
 function grow_to!(dest::AbstractDict, itr)
@@ -245,11 +230,6 @@ function rehash!(h::Dict{K,V}, newsz = length(h.keys)) where V where K
     return h
 end
 
-max_values(::Type) = typemax(Int)
-max_values(T::Type{<:Union{Nothing,BitIntegerSmall}}) = 1 << (8*sizeof(T))
-max_values(T::Union) = max(max_values(T.a), max_values(T.b))
-max_values(::Type{Bool}) = 2
-
 function sizehint!(d::Dict{T}, newsz) where T
     oldsz = length(d.slots)
     if newsz <= oldsz
diff --git a/base/set.jl b/base/set.jl
index 5775af5f3fca4..41238f14dd4a4 100644
--- a/base/set.jl
+++ b/base/set.jl
@@ -1,7 +1,5 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-eltype(::Type{<:AbstractSet{T}}) where {T} = @isdefined(T) ? T : Any
-
 struct Set{T} <: AbstractSet{T}
     dict::Dict{T,Nothing}
 
@@ -72,256 +70,6 @@ done(s::Set, state) = done(s.dict, state)
 # NOTE: manually optimized to take advantage of Dict representation
 next(s::Set, i)     = (s.dict.keys[i], skip_deleted(s.dict, i+1))
 
-"""
-    union(s, itrs...)
-    ∪(s, itrs...)
-
-Construct the union of sets. Maintain order with arrays.
-
-# Examples
-```jldoctest
-julia> union([1, 2], [3, 4])
-4-element Array{Int64,1}:
- 1
- 2
- 3
- 4
-
-julia> union([1, 2], [2, 4])
-3-element Array{Int64,1}:
- 1
- 2
- 4
-
-julia> union([4, 2], 1:2)
-3-element Array{Int64,1}:
- 4
- 2
- 1
-
-julia> union(Set([1, 2]), 2:3)
-Set([2, 3, 1])
-```
-"""
-function union end
-
-_in(itr) = x -> x in itr
-
-union(s, sets...) = union!(emptymutable(s, promote_eltype(s, sets...)), s, sets...)
-union(s::AbstractSet) = copy(s)
-
-const ∪ = union
-
-"""
-    union!(s::Union{AbstractSet,AbstractVector}, itrs...)
-
-Construct the union of passed in sets and overwrite `s` with the result.
-Maintain order with arrays.
-
-# Examples
-```jldoctest
-julia> a = Set([1, 3, 4, 5]);
-
-julia> union!(a, 1:2:8);
-
-julia> a
-Set([7, 4, 3, 5, 1])
-```
-"""
-union!(s::AbstractSet, sets...) = foldl(union!, s, sets)
-
-# default generic 2-args implementation with push!
-union!(s::AbstractSet, itr) = foldl(push!, s, itr)
-
-function union!(s::Set{T}, itr) where T
-    haslength(itr) && sizehint!(s, length(itr))
-    for x=itr
-        push!(s, x)
-        length(s) == max_values(T) && break
-    end
-    s
-end
-
-
-"""
-    intersect(s, itrs...)
-    ∩(s, itrs...)
-
-Construct the intersection of sets.
-Maintain order with arrays.
-
-# Examples
-```jldoctest
-julia> intersect([1, 2, 3], [3, 4, 5])
-1-element Array{Int64,1}:
- 3
-
-julia> intersect([1, 4, 4, 5, 6], [4, 6, 6, 7, 8])
-2-element Array{Int64,1}:
- 4
- 6
-
-julia> intersect(Set([1, 2]), BitSet([2, 3]))
-Set([2])
-```
-"""
-intersect(s::AbstractSet, itr, itrs...) = intersect!(intersect(s, itr), itrs...)
-intersect(s) = union(s)
-intersect(s::AbstractSet, itr) = mapfilter(_in(s), push!, itr, emptymutable(s))
-
-const ∩ = intersect
-
-"""
-    intersect!(s::Union{AbstractSet,AbstractVector}, itrs...)
-
-Intersect all passed in sets and overwrite `s` with the result.
-Maintain order with arrays.
-"""
-intersect!(s::AbstractSet, itrs...) = foldl(intersect!, s, itrs)
-intersect!(s::AbstractSet, s2::AbstractSet) = filter!(_in(s2), s)
-intersect!(s::AbstractSet, itr) = intersect!(s, union!(emptymutable(s), itr))
-
-"""
-    setdiff(s, itrs...)
-
-Construct the set of elements in `s` but not in any of the iterables in `itrs`.
-Maintain order with arrays.
-
-# Examples
-```jldoctest
-julia> setdiff([1,2,3], [3,4,5])
-2-element Array{Int64,1}:
- 1
- 2
-```
-"""
-setdiff(s::AbstractSet, itrs...) = setdiff!(copymutable(s), itrs...)
-setdiff(s) = union(s)
-
-"""
-    setdiff!(s, itrs...)
-
-Remove from set `s` (in-place) each element of each iterable from `itrs`.
-Maintain order with arrays.
-
-# Examples
-```jldoctest
-julia> a = Set([1, 3, 4, 5]);
-
-julia> setdiff!(a, 1:2:6);
-
-julia> a
-Set([4])
-```
-"""
-setdiff!(s::AbstractSet, itrs...) = foldl(setdiff!, s, itrs)
-setdiff!(s::AbstractSet, itr) = foldl(delete!, s, itr)
-
-
-"""
-    symdiff(s, itrs...)
-
-Construct the symmetric difference of elements in the passed in sets.
-When `s` is not an `AbstractSet`, the order is maintained.
-Note that in this case the multiplicity of elements matters.
-
-# Examples
-```jldoctest
-julia> symdiff([1,2,3], [3,4,5], [4,5,6])
-3-element Array{Int64,1}:
- 1
- 2
- 6
-
-julia> symdiff([1,2,1], [2, 1, 2])
-2-element Array{Int64,1}:
- 1
- 2
-
-julia> symdiff(unique([1,2,1]), unique([2, 1, 2]))
-0-element Array{Int64,1}
-```
-"""
-symdiff(s, sets...) = symdiff!(emptymutable(s, promote_eltype(s, sets...)), s, sets...)
-symdiff(s) = symdiff!(copy(s))
-
-"""
-    symdiff!(s::Union{AbstractSet,AbstractVector}, itrs...)
-
-Construct the symmetric difference of the passed in sets, and overwrite `s` with the result.
-When `s` is an array, the order is maintained.
-Note that in this case the multiplicity of elements matters.
-"""
-symdiff!(s::AbstractSet, itrs...) = foldl(symdiff!, s, itrs)
-
-function symdiff!(s::AbstractSet, itr)
-    for x in itr
-        x in s ? delete!(s, x) : push!(s, x)
-    end
-    s
-end
-
-==(l::AbstractSet, r::AbstractSet) = length(l) == length(r) && l ⊆ r
-# convenience functions for AbstractSet
-# (if needed, only their synonyms ⊊ and ⊆ must be specialized)
-<( l::AbstractSet, r::AbstractSet) = l ⊊ r
-<=(l::AbstractSet, r::AbstractSet) = l ⊆ r
-
-"""
-    issubset(a, b)
-    ⊆(a,b) -> Bool
-    ⊈(a,b) -> Bool
-    ⊊(a,b) -> Bool
-
-Determine whether every element of `a` is also in `b`, using [`in`](@ref).
-
-# Examples
-```jldoctest
-julia> issubset([1, 2], [1, 2, 3])
-true
-
-julia> issubset([1, 2, 3], [1, 2])
-false
-```
-"""
-function issubset(l, r)
-    for elt in l
-        if !in(elt, r)
-            return false
-        end
-    end
-    return true
-end
-# use the implementation below when it becoms as efficient
-# issubset(l, r) = all(_in(r), l)
-
-const ⊆ = issubset
-
-"""
-    issetequal(a, b)
-
-Determine whether `a` and `b` have the same elements. Equivalent
-to `a ⊆ b && b ⊆ a`.
-
-# Examples
-```jldoctest
-julia> issetequal([1, 2], [1, 2, 3])
-false
-
-julia> issetequal([1, 2], [2, 1])
-true
-```
-"""
-issetequal(l, r) = length(l) == length(r) && l ⊆ r
-issetequal(l::AbstractSet, r::AbstractSet) = l == r
-
-⊊(l, r) = length(l) < length(r) && l ⊆ r
-⊈(l, r) = !⊆(l, r)
-
-⊇(l, r) = r ⊆ l
-⊉(l, r) = r ⊈ l
-⊋(l, r) = r ⊊ l
-
 """
     unique(itr)
 
@@ -535,20 +283,8 @@ allunique(::Set) = true
 
 allunique(r::AbstractRange{T}) where {T} = (step(r) != zero(T)) || (length(r) <= 1)
 
-filter(pred, s::AbstractSet) = mapfilter(pred, push!, s, emptymutable(s))
 filter!(f, s::Set) = unsafe_filter!(f, s)
 
-# it must be safe to delete the current element while iterating over s:
-unsafe_filter!(pred, s::AbstractSet) = mapfilter(!pred, delete!, s, s)
-
-# TODO: delete mapfilter in favor of comprehensions/foldl/filter when competitive
-function mapfilter(pred, f, itr, res)
-    for x in itr
-        pred(x) && f(res, x)
-    end
-    res
-end
-
 const hashs_seed = UInt === UInt64 ? 0x852ada37cfe8e0ce : 0xcfe8e0ce
 function hash(s::AbstractSet, h::UInt)
     hv = hashs_seed
diff --git a/base/sysimg.jl b/base/sysimg.jl
index 8bb43331d632c..452fecec0ed9f 100644
--- a/base/sysimg.jl
+++ b/base/sysimg.jl
@@ -222,6 +222,7 @@ end
 include("some.jl")
 
 include("dict.jl")
+include("abstractset.jl")
 include("set.jl")
 
 include("char.jl")
diff --git a/test/sets.jl b/test/sets.jl
index 554cb4c2d03dc..8e1dc64a86269 100644
--- a/test/sets.jl
+++ b/test/sets.jl
@@ -556,7 +556,7 @@ end
 @testset "⊆, ⊊, ⊈, ⊇, ⊋, ⊉, <, <=, issetequal" begin
     a = [1, 2]
     b = [2, 1, 3]
-    for C = (Tuple, identity, Set, BitSet)
+    for C = (Tuple, identity, Set, BitSet, Base.IdSet{Int})
         A = C(a)
         B = C(b)
         @test A ⊆ B

From d4929d78acad2606131b332989b8e699fe894bb1 Mon Sep 17 00:00:00 2001
From: Keno Fischer <keno@juliacomputing.com>
Date: Sat, 24 Feb 2018 15:46:57 -0500
Subject: [PATCH 3/3] Split refpointer into RefValue parts and everything else

---
 Makefile                  |  2 +-
 base/compiler/compiler.jl |  2 +-
 base/refpointer.jl        | 99 +++++++++++++--------------------------
 base/refvalue.jl          | 31 ++++++++++++
 base/sysimg.jl            |  1 +
 5 files changed, 66 insertions(+), 69 deletions(-)
 create mode 100644 base/refvalue.jl

diff --git a/Makefile b/Makefile
index edabdf8ccf699..6fb3c46f21938 100644
--- a/Makefile
+++ b/Makefile
@@ -200,7 +200,7 @@ CORE_SRCS := $(addprefix $(JULIAHOME)/, \
 		base/reduce.jl \
 		base/reflection.jl \
 		base/traits.jl \
-		base/refpointer.jl \
+		base/refvalue.jl \
 		base/tuple.jl)
 COMPILER_SRCS = $(sort $(shell find $(JULIAHOME)/base/compiler -name \*.jl))
 BASE_SRCS := $(sort $(shell find $(JULIAHOME)/base -name \*.jl) $(shell find $(BUILDROOT)/base -name \*.jl))
diff --git a/base/compiler/compiler.jl b/base/compiler/compiler.jl
index f22a026da2f70..d25dae6884d01 100644
--- a/base/compiler/compiler.jl
+++ b/base/compiler/compiler.jl
@@ -45,7 +45,7 @@ include("number.jl")
 include("int.jl")
 include("operators.jl")
 include("pointer.jl")
-include("refpointer.jl")
+include("refvalue.jl")
 
 # checked arithmetic
 const checked_add = +
diff --git a/base/refpointer.jl b/base/refpointer.jl
index a22371bbe2e9a..bddaaaeb886a0 100644
--- a/base/refpointer.jl
+++ b/base/refpointer.jl
@@ -40,37 +40,8 @@ convert(::Type{Ref{T}}, x::Ref{T}) where {T} = x
 unsafe_convert(::Type{Ref{T}}, x::Ref{T}) where {T} = unsafe_convert(Ptr{T}, x)
 unsafe_convert(::Type{Ref{T}}, x) where {T} = unsafe_convert(Ptr{T}, x)
 
-### Methods for a Ref object that can store a single value of any type
-
-mutable struct RefValue{T} <: Ref{T}
-    x::T
-    RefValue{T}() where {T} = new()
-    RefValue{T}(x) where {T} = new(x)
-end
-RefValue(x::T) where {T} = RefValue{T}(x)
-isassigned(x::RefValue) = isdefined(x, :x)
-
 convert(::Type{Ref{T}}, x) where {T} = RefValue{T}(x)
 
-function unsafe_convert(P::Type{Ptr{T}}, b::RefValue{T}) where T
-    if datatype_pointerfree(RefValue{T})
-        p = pointer_from_objref(b)
-    elseif isconcretetype(T) && T.mutable
-        p = pointer_from_objref(b.x)
-    else
-        # If the slot is not leaf type, it could be either immutable or not.
-        # If it is actually an immutable, then we can't take it's pointer directly
-        # Instead, explicitly load the pointer from the `RefValue`,
-        # which also ensures this returns same pointer as the one rooted in the `RefValue` object.
-        p = pointerref(Ptr{Ptr{Cvoid}}(pointer_from_objref(b)), 1, Core.sizeof(Ptr{Cvoid}))
-    end
-    return convert(P, p)
-end
-function unsafe_convert(P::Type{Ptr{Any}}, b::RefValue{Any})
-    return convert(P, pointer_from_objref(b))
-end
-unsafe_convert(::Type{Ptr{Cvoid}}, b::RefValue{T}) where {T} = convert(Ptr{Cvoid}, unsafe_convert(Ptr{T}, b))
-
 ### Methods for a Ref object that is backed by an array at index i
 struct RefArray{T,A<:AbstractArray{T},R} <: Ref{T}
     x::A
@@ -98,6 +69,38 @@ function unsafe_convert(P::Type{Ptr{Any}}, b::RefArray{Any})
 end
 unsafe_convert(::Type{Ptr{Cvoid}}, b::RefArray{T}) where {T} = convert(Ptr{Cvoid}, unsafe_convert(Ptr{T}, b))
 
+###
+if is_primary_base_module
+    Ref(x::Any) = RefValue(x)
+    Ref{T}() where {T} = RefValue{T}() # Ref{T}()
+    Ref{T}(x) where {T} = RefValue{T}(x) # Ref{T}(x)
+
+    Ref(x::Ref, i::Integer) = (i != 1 && error("Ref only has one element"); x)
+    Ref(x::Ptr{T}, i::Integer) where {T} = x + (i - 1) * Core.sizeof(T)
+
+    # convert Arrays to pointer arrays for ccall
+    function Ref{P}(a::Array{<:Union{Ptr,Cwstring,Cstring}}) where P<:Union{Ptr,Cwstring,Cstring}
+        return RefArray(a) # effectively a no-op
+    end
+    function Ref{P}(a::Array{T}) where P<:Union{Ptr,Cwstring,Cstring} where T
+        if (!isbits(T) && T <: eltype(P))
+            # this Array already has the right memory layout for the requested Ref
+            return RefArray(a,1,false) # root something, so that this function is type-stable
+        else
+            ptrs = Vector{P}(uninitialized, length(a)+1)
+            roots = Vector{Any}(uninitialized, length(a))
+            for i = 1:length(a)
+                root = cconvert(P, a[i])
+                ptrs[i] = unsafe_convert(P, root)::P
+                roots[i] = root
+            end
+            ptrs[length(a)+1] = C_NULL
+            return RefArray(ptrs,1,roots)
+        end
+    end
+    Ref(x::AbstractArray, i::Integer) = RefArray(x, i)
+end
+
 cconvert(::Type{Ptr{P}}, a::Array{<:Ptr}) where {P<:Ptr} = a
 cconvert(::Type{Ref{P}}, a::Array{<:Ptr}) where {P<:Ptr} = a
 cconvert(::Type{Ptr{P}}, a::Array) where {P<:Union{Ptr,Cwstring,Cstring}} = Ref{P}(a)
@@ -105,45 +108,7 @@ cconvert(::Type{Ref{P}}, a::Array) where {P<:Union{Ptr,Cwstring,Cstring}} = Ref{
 
 ###
 
-getindex(b::RefValue) = b.x
 getindex(b::RefArray) = b.x[b.i]
-
-setindex!(b::RefValue, x) = (b.x = x; b)
 setindex!(b::RefArray, x) = (b.x[b.i] = x; b)
 
 ###
-
-# Base-only constructors on the shared abstract Ref type
-if nameof(@__MODULE__) === :Base && is_primary_base_module
-
-Ref(x::Any) = RefValue(x)
-Ref{T}() where {T} = RefValue{T}() # Ref{T}()
-Ref{T}(x) where {T} = RefValue{T}(x) # Ref{T}(x)
-convert(::Type{Ref{T}}, x) where {T} = RefValue{T}(x)
-
-Ref(x::Ref, i::Integer) = (i != 1 && error("Ref only has one element"); x)
-Ref(x::Ptr{T}, i::Integer) where {T} = x + (i - 1) * Core.sizeof(T)
-
-Ref(x::AbstractArray, i::Integer) = RefArray(x, i)
-
-# convert Arrays to pointer arrays for ccall
-function Ref{P}(a::Array{<:Union{Ptr,Cwstring,Cstring}}) where P<:Union{Ptr,Cwstring,Cstring}
-    return RefArray(a) # effectively a no-op
-end
-function Ref{P}(a::Array{T}) where P<:Union{Ptr,Cwstring,Cstring} where T
-    if (!isbits(T) && T <: eltype(P))
-        # this Array already has the right memory layout for the requested Ref
-        return RefArray(a,1,false) # root something, so that this function is type-stable
-    else
-        ptrs = Vector{P}(uninitialized, length(a)+1)
-        roots = Vector{Any}(uninitialized, length(a))
-        for i = 1:length(a)
-            root = cconvert(P, a[i])
-            ptrs[i] = unsafe_convert(P, root)::P
-            roots[i] = root
-        end
-    end
-    Ref(x::AbstractArray, i::Integer) = RefArray(x, i)
-end
-
-end
diff --git a/base/refvalue.jl b/base/refvalue.jl
new file mode 100644
index 0000000000000..fd815e2b38193
--- /dev/null
+++ b/base/refvalue.jl
@@ -0,0 +1,31 @@
+### Methods for a Ref object that can store a single value of any type
+
+mutable struct RefValue{T} <: Ref{T}
+    x::T
+    RefValue{T}() where {T} = new()
+    RefValue{T}(x) where {T} = new(x)
+end
+RefValue(x::T) where {T} = RefValue{T}(x)
+isassigned(x::RefValue) = isdefined(x, :x)
+
+function unsafe_convert(P::Type{Ptr{T}}, b::RefValue{T}) where T
+    if datatype_pointerfree(RefValue{T})
+        p = pointer_from_objref(b)
+    elseif isconcretetype(T) && T.mutable
+        p = pointer_from_objref(b.x)
+    else
+        # If the slot is not leaf type, it could be either immutable or not.
+        # If it is actually an immutable, then we can't take it's pointer directly
+        # Instead, explicitly load the pointer from the `RefValue`,
+        # which also ensures this returns same pointer as the one rooted in the `RefValue` object.
+        p = pointerref(Ptr{Ptr{Cvoid}}(pointer_from_objref(b)), 1, Core.sizeof(Ptr{Cvoid}))
+    end
+    return convert(P, p)
+end
+function unsafe_convert(P::Type{Ptr{Any}}, b::RefValue{Any})
+    return convert(P, pointer_from_objref(b))
+end
+unsafe_convert(::Type{Ptr{Cvoid}}, b::RefValue{T}) where {T} = convert(Ptr{Cvoid}, unsafe_convert(Ptr{T}, b))
+
+getindex(b::RefValue) = b.x
+setindex!(b::RefValue, x) = (b.x = x; b)
diff --git a/base/sysimg.jl b/base/sysimg.jl
index 452fecec0ed9f..8750bfb0dd366 100644
--- a/base/sysimg.jl
+++ b/base/sysimg.jl
@@ -127,6 +127,7 @@ include("number.jl")
 include("int.jl")
 include("operators.jl")
 include("pointer.jl")
+include("refvalue.jl")
 include("refpointer.jl")
 include("checked.jl")
 using .Checked