ml-matches: update and improve ambiguity computation

JuliaLang · Aug 28, 2020 · d83a051 · d83a051
1 parent 6fafd6b
commit d83a051
Show file tree

Hide file tree

Showing 6 changed files with 448 additions and 199 deletions.
diff --git a/base/reflection.jl b/base/reflection.jl
@@ -1331,6 +1331,8 @@ Determine whether two methods `m1` and `m2` may be ambiguous for some call
 signature. This test is performed in the context of other methods of the same
 function; in isolation, `m1` and `m2` might be ambiguous, but if a third method
 resolving the ambiguity has been defined, this returns `false`.
+Alternatively, in isolation `m1` and `m2` might be ordered, but if a third
+method cannot be sorted with them, they may cause an ambiguity together.
 
 For parametric types, the `ambiguous_bottom` keyword argument controls whether
 `Union{}` counts as an ambiguous intersection of type parameters – when `true`,
@@ -1357,27 +1359,80 @@ false
 ```
 """
 function isambiguous(m1::Method, m2::Method; ambiguous_bottom::Bool=false)
- # TODO: eagerly returning `morespecific` is wrong, and fails to consider
- # the possibility of an ambiguity caused by a third method:
- # see the precise algorithm in ml_matches for a more correct computation
- if m1 === m2 || morespecific(m1.sig, m2.sig) || morespecific(m2.sig, m1.sig)
- return false
- end
+ m1 === m2 && return false
  ti = typeintersect(m1.sig, m2.sig)
- (ti <: m1.sig && ti <: m2.sig) || return false # XXX: completely wrong, obviously
  ti === Bottom && return false
- if !ambiguous_bottom
- has_bottom_parameter(ti) && return false
- end
- matches = _methods_by_ftype(ti, -1, typemax(UInt))
- for match in matches
- m = match.method
- m === m1 && continue
- m === m2 && continue
- if ti <: m.sig && morespecific(m.sig, m1.sig) && morespecific(m.sig, m2.sig)
+ function inner(ti)
+ ti === Bottom && return false
+ if !ambiguous_bottom
+ has_bottom_parameter(ti) && return false
+ end
+ min = UInt[typemin(UInt)]
+ max = UInt[typemax(UInt)]
+ has_ambig = Int32[0]
+ ms = _methods_by_ftype(ti, -1, typemax(UInt), true, min, max, has_ambig)
+ has_ambig[] == 0 && return false
+ if !ambiguous_bottom
+ filter!(ms) do m
+ return !has_bottom_parameter(m.spec_types)
+ end
+ end
+ # if ml-matches reported the existence of an ambiguity over their
+ # intersection, see if both m1 and m2 may be involved in it
+ have_m1 = have_m2 = false
+ for match in ms
+ m = match.method
+ m === m1 && (have_m1 = true)
+ m === m2 && (have_m2 = true)
+ end
+ if !have_m1 || !have_m2
+ # ml-matches did not need both methods to expose the reported ambiguity
  return false
  end
+ if !ambiguous_bottom
+ # since we're intentionally ignoring certain ambiguities (via the
+ # filter call above), see if we can now declare the intersection fully
+ # covered even though it is partially ambiguous over Union{} as a type
+ # parameter somewhere
+ minmax = nothing
+ for match in ms
+ m = match.method
+ match.fully_covers || continue
+ if minmax === nothing || morespecific(m.sig, minmax.sig)
+ minmax = m
+ end
+ end
+ if minmax === nothing
+ return true
+ end
+ for match in ms
+ m = match.method
+ m === minmax && continue
+ if match.fully_covers
+ if !morespecific(minmax.sig, m.sig)
+ return true
+ end
+ else
+ if morespecific(m.sig, minmax.sig)
+ return true
+ end
+ end
+ end
+ return false
+ end
+ return true
+ end
+ if !(ti <: m1.sig && ti <: m2.sig)
+ # When type-intersection fails, it's often also not commutative. Thus
+ # checking the reverse may allow detecting ambiguity solutions
+ # correctly in more cases (and faster).
+ ti2 = typeintersect(m2.sig, m1.sig)
+ if ti != ti2
+ inner(ti2) || return false
+ end
  end
+ inner(ti) || return false
+ # otherwise type-intersection reported an ambiguity we couldn't solve
  return true
 end
 

diff --git a/src/gf.c b/src/gf.c
@@ -1456,7 +1456,7 @@ static int invalidate_mt_cache(jl_typemap_entry_t *oldentry, void *closure0)
  }
  if (intersects) {
  if (_jl_debug_method_invalidation) {
- jl_array_ptr_1d_push(_jl_debug_method_invalidation, (jl_value_t*)oldentry);
+ jl_array_ptr_1d_push(_jl_debug_method_invalidation, (jl_value_t*)mi);
  jl_value_t *loctag = jl_cstr_to_string("invalidate_mt_cache");
  JL_GC_PUSH1(&loctag);
  jl_array_ptr_1d_push(_jl_debug_method_invalidation, loctag);
@@ -2676,84 +2676,76 @@ static jl_value_t *ml_matches(jl_methtable_t *mt, int offs,
  int minmax_ambig = 0;
  int all_subtypes = 1;
  if (len > 1) {
- // first try to pre-process the results to find the most specific result that fully covers the input
- // (since we can do this in linear time, and the rest is O(n^2)
- // - first see if this might even be profitable, given the requested output we need to compute
+ // first try to pre-process the results to find the most specific
+ // result that fully covers the input, since we can do this in linear
+ // time, and the rest is O(n^2)
+ // - first find a candidate for the best of these method results
  for (i = 0; i < len; i++) {
  jl_method_match_t *matc = (jl_method_match_t*)jl_array_ptr_ref(env.t, i);
- if (matc->fully_covers != FULLY_COVERS) {
+ if (matc->fully_covers == FULLY_COVERS) {
+ jl_method_t *m = matc->method;
+ if (minmax != NULL) {
+ jl_method_t *minmaxm = minmax->method;
+ if (jl_type_morespecific((jl_value_t*)minmaxm->sig, (jl_value_t*)m->sig))
+ continue;
+ }
+ minmax = matc;
+ }
+ else {
  all_subtypes = 0;
- break;
  }
  }
- if (all_subtypes || !include_ambiguous) {
- // - then find a candidate for the best of these method results
- // (If we have a reason to compute this. There's no point in
- // finding the minmax now, if we still need to examine all
- // methods for ambiguities later.)
+ // - then see if it dominated all of the other choices
+ if (minmax != NULL) {
  for (i = 0; i < len; i++) {
  jl_method_match_t *matc = (jl_method_match_t*)jl_array_ptr_ref(env.t, i);
+ if (matc == minmax)
+ break;
  if (matc->fully_covers == FULLY_COVERS) {
  jl_method_t *m = matc->method;
- if (minmax != NULL) {
- jl_method_t *minmaxm = minmax->method;
- if (jl_type_morespecific((jl_value_t*)minmaxm->sig, (jl_value_t*)m->sig))
- continue;
- }
- minmax = matc;
- }
- }
- // - then see if it dominated all of the other choices
- if (minmax != NULL) {
- for (i = 0; i < len; i++) {
- jl_method_match_t *matc = (jl_method_match_t*)jl_array_ptr_ref(env.t, i);
- if (matc == minmax)
+ jl_method_t *minmaxm = minmax->method;
+ if (!jl_type_morespecific((jl_value_t*)minmaxm->sig, (jl_value_t*)m->sig)) {
+ minmax_ambig = 1;
+ minmax = NULL;
+ *has_ambiguity = 1;
  break;
- if (matc->fully_covers == FULLY_COVERS) {
- jl_method_t *m = matc->method;
- jl_method_t *minmaxm = minmax->method;
- if (!jl_type_morespecific((jl_value_t*)minmaxm->sig, (jl_value_t*)m->sig)) {
- minmax_ambig = 1;
- *has_ambiguity = 1;
- if (include_ambiguous)
- minmax = NULL;
- break;
- }
  }
  }
  }
- // - it may even dominate some choices that are not subtypes!
- // move those into the subtype group, where we're filter them out shortly after
- if (!all_subtypes && minmax) {
- jl_method_t *minmaxm = minmax->method;
- all_subtypes = 1;
- for (i = 0; i < len; i++) {
- jl_method_match_t *matc = (jl_method_match_t*)jl_array_ptr_ref(env.t, i);
- if (matc->fully_covers != FULLY_COVERS) {
- jl_method_t *m = matc->method;
- if (jl_type_morespecific((jl_value_t*)minmaxm->sig, (jl_value_t*)m->sig))
- matc->fully_covers = SENTINEL; // put a sentinel value here for sorting
- else
- all_subtypes = 0;
- }
+ }
+ // - it may even dominate some choices that are not subtypes!
+ // move those into the subtype group, where we're filter them out shortly after
+ // (and avoid potentially reporting these as an ambiguity)
+ if (!all_subtypes && minmax != NULL && !minmax_ambig) {
+ jl_method_t *minmaxm = minmax->method;
+ if (!include_ambiguous)
+ all_subtypes = 0;
+ for (i = 0; i < len; i++) {
+ jl_method_match_t *matc = (jl_method_match_t*)jl_array_ptr_ref(env.t, i);
+ if (matc->fully_covers != FULLY_COVERS) {
+ jl_method_t *m = matc->method;
+ if (jl_type_morespecific((jl_value_t*)minmaxm->sig, (jl_value_t*)m->sig))
+ matc->fully_covers = SENTINEL; // put a sentinel value here for sorting
+ else
+ all_subtypes = 0;
  }
  }
- // - now we might have a fast-return here, if we see that
- // we've already processed all of the possible outputs
- if (all_subtypes) {
- if (minmax_ambig) {
- if (!include_ambiguous) {
- len = 0;
- env.t = jl_an_empty_vec_any;
- }
- }
- else {
- assert(minmax != NULL);
- jl_array_ptr_set(env.t, 0, minmax);
- jl_array_del_end((jl_array_t*)env.t, len - 1);
- len = 1;
+ }
+ // - now we might have a fast-return here, if we see that
+ // we've already processed all of the possible outputs
+ if (all_subtypes) {
+ if (minmax_ambig) {
+ if (!include_ambiguous) {
+ len = 0;
+ env.t = jl_an_empty_vec_any;
  }
  }
+ else {
+ assert(minmax != NULL);
+ jl_array_ptr_set(env.t, 0, minmax);
+ jl_array_del_end((jl_array_t*)env.t, len - 1);
+ len = 1;
+ }
  }
  }
  if (len > 1) {
@@ -2765,7 +2757,7 @@ static jl_value_t *ml_matches(jl_methtable_t *mt, int offs,
  env.matc = (jl_method_match_t*)jl_array_ptr_ref(env.t, i);
  jl_method_t *m = env.matc->method;
  int subt = env.matc->fully_covers != NOT_FULLY_COVERS;
- if (minmax != NULL && subt) {
+ if ((minmax != NULL || minmax_ambig) && !include_ambiguous && subt) {
  continue; // already the biggest
  }
  for (j = 0; j < i; j++) {
@@ -2786,7 +2778,7 @@ static jl_value_t *ml_matches(jl_methtable_t *mt, int offs,
  // we stopped early with just having all non-subtypes before all
  // subtypes, but the case on the boundary might be wrongly placed:
  // check for that now
- if (!minmax) {
+ if (minmax == NULL && !minmax_ambig) {
  for (i = 0; i < len; i++) {
  jl_method_match_t *matc = (jl_method_match_t*)jl_array_ptr_ref(env.t, i);
  if (matc->fully_covers == FULLY_COVERS)
@@ -2812,17 +2804,9 @@ static jl_value_t *ml_matches(jl_methtable_t *mt, int offs,
  }
  char *skip = (char*)alloca(len);
  memset(skip, 0, len);
- // since we had a minmax method, now may now be able to cleanup some of our sort result
- if (minmax_ambig && !include_ambiguous) {
- for (i = 0; i < len; i++) {
- jl_method_match_t *matc = (jl_method_match_t*)jl_array_ptr_ref(env.t, i);
- if (matc->fully_covers != NOT_FULLY_COVERS) {
- skip[i] = 1;
- }
- }
- }
- else if (minmax != NULL) {
- assert(all_subtypes || !include_ambiguous);
+ // if we had a minmax method (any subtypes), now may now be able to
+ // quickly cleanup some of our sort result
+ if (minmax != NULL || (minmax_ambig && !include_ambiguous)) {
  for (i = 0; i < len; i++) {
  jl_method_match_t *matc = (jl_method_match_t*)jl_array_ptr_ref(env.t, i);
  if (minmax != matc && matc->fully_covers != NOT_FULLY_COVERS) {
@@ -2855,9 +2839,29 @@ static jl_value_t *ml_matches(jl_methtable_t *mt, int offs,
  disjoint = 0;
  ambig_groupid[i] = j - 1; // ambiguity covering range [i:j)
  }
- else if (subt || subt2 || !jl_has_empty_intersection(m->sig, m2->sig)) {
- if (!jl_type_morespecific((jl_value_t*)m2->sig, (jl_value_t*)m->sig))
- ambig_groupid[i] = j - 1; // ambiguity covering range [i:j)
+ else {
+ jl_value_t *ti;
+ if (subt)
+ ti = (jl_value_t*)matc2->spec_types;
+ else if (subt2)
+ ti = (jl_value_t*)matc->spec_types;
+ else
+ ti = env.match.ti = jl_type_intersection((jl_value_t*)matc->spec_types, (jl_value_t*)matc2->spec_types);
+ if (ti != jl_bottom_type && !jl_type_morespecific((jl_value_t*)m2->sig, (jl_value_t*)m->sig)) {
+ // m and m2 are ambiguous, but let's see if we can find another method (m3)
+ // that dominates their intersection, and means we can ignore this
+ size_t k;
+ for (k = j; k > 0; k--) {
+ jl_method_match_t *matc3 = (jl_method_match_t*)jl_array_ptr_ref(env.t, k - 1);
+ jl_method_t *m3 = matc3->method;
+ if (jl_subtype((jl_value_t*)ti, m3->sig)
+ && jl_type_morespecific((jl_value_t*)m3->sig, (jl_value_t*)m->sig)
+ && jl_type_morespecific((jl_value_t*)m3->sig, (jl_value_t*)m2->sig))
+ break;
+ }
+ if (k == 0)
+ ambig_groupid[i] = j - 1; // ambiguity covering range [i:j)
+ }
  disjoint = 0;
  }
  }
@@ -2933,20 +2937,17 @@ static jl_value_t *ml_matches(jl_methtable_t *mt, int offs,
  }
  }
  // Compute whether anything could be ambiguous by seeing if any two
- // methods in the result are in the same ambiguity group.
- for (i = 0; i < len; i++) {
- if (!skip[i]) {
- uint32_t agid = ambig_groupid[i];
- for (; i < len; i++) {
- if (!skip[i]) {
- if (agid == ambig_groupid[i]) {
- *has_ambiguity = 1;
- break;
- }
- agid = ambig_groupid[i];
+ // remaining methods in the result are in the same ambiguity group.
+ if (len > 0) {
+ uint32_t agid = ambig_groupid[0];
+ for (i = 1; i < len; i++) {
+ if (!skip[i]) {
+ if (agid == ambig_groupid[i]) {
+ *has_ambiguity = 1;
+ break;
  }
+ agid = ambig_groupid[i];
  }
- break;
  }
  }
  // If we're only returning possible matches, now filter out any method