Skip to content

Commit

Permalink
IdDict: support deletion, and support nothing used as a key
Browse files Browse the repository at this point in the history
previously, if we deleted one key or added `nothing` as a key,
we might lose some of the other entries too (until rehash)

fix #26833
  • Loading branch information
vtjnash committed Apr 23, 2018
1 parent 55b3fc2 commit 5e57c21
Show file tree
Hide file tree
Showing 3 changed files with 113 additions and 71 deletions.
16 changes: 14 additions & 2 deletions base/libuv.jl
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,20 @@ disassociate_julia_struct(handle::Ptr{Cvoid}) =
# A dict of all libuv handles that are being waited on somewhere in the system
# and should thus not be garbage collected
const uvhandles = IdDict()
preserve_handle(x) = uvhandles[x] = get(uvhandles,x,0)::Int+1
unpreserve_handle(x) = (v = uvhandles[x]::Int; v == 1 ? pop!(uvhandles,x) : (uvhandles[x] = v-1); nothing)
function preserve_handle(x)
v = get(uvhandles, x, 0)::Int
uvhandles[x] = v + 1
nothing
end
function unpreserve_handle(x)
v = uvhandles[x]::Int
if v == 1
pop!(uvhandles, x)
else
uvhandles[x] = v - 1
end
nothing
end

## Libuv error handling ##

Expand Down
144 changes: 75 additions & 69 deletions src/table.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
#define keyhash(k) jl_object_id(k)
#define h2index(hv, sz) (size_t)(((hv) & ((sz)-1)) * 2)

static void **jl_table_lookup_bp(jl_array_t **pa, void *key, int *inserted);
static int jl_table_assign_bp(jl_array_t **pa, void *key, void *val);

JL_DLLEXPORT jl_array_t *jl_idtable_rehash(jl_array_t *a, size_t newsz)
{
Expand All @@ -23,8 +23,7 @@ JL_DLLEXPORT jl_array_t *jl_idtable_rehash(jl_array_t *a, size_t newsz)
JL_GC_PUSH1(&newa);
for (i = 0; i < sz; i += 2) {
if (ol[i + 1] != NULL) {
(*jl_table_lookup_bp(&newa, ol[i], NULL)) = ol[i + 1];
jl_gc_wb(newa, ol[i + 1]);
jl_table_assign_bp(&newa, ol[i], ol[i + 1]);
// it is however necessary here because allocation
// can (and will) occur in a recursive call inside table_lookup_bp
}
Expand All @@ -37,70 +36,80 @@ JL_DLLEXPORT jl_array_t *jl_idtable_rehash(jl_array_t *a, size_t newsz)
return newa;
}

static void **jl_table_lookup_bp(jl_array_t **pa, void *key, int *inserted)
static int jl_table_assign_bp(jl_array_t **pa, void *key, void *val)
{
// pa points to a **rooted** gc frame slot
uint_t hv;
jl_array_t *a = *pa;
size_t orig, index, iter;
size_t orig, index, iter, empty_slot;
size_t newsz, sz = hash_size(a);
assert(sz >= 1);
size_t maxprobe = max_probe(sz);
void **tab = (void **)a->data;

if (inserted)
*inserted = 0;

hv = keyhash((jl_value_t *)key);
retry_bp:
iter = 0;
index = h2index(hv, sz);
sz *= 2;
orig = index;

do {
if (tab[index + 1] == NULL) {
tab[index] = key;
if (inserted)
*inserted = 1;
while (1) {
iter = 0;
index = h2index(hv, sz);
sz *= 2;
orig = index;
empty_slot = -1;

do {
if (tab[index] == NULL) {
if (empty_slot == -1)
empty_slot = index;
break;
}
if (jl_egal((jl_value_t *)key, (jl_value_t *)tab[index])) {
if (tab[index + 1] != NULL) {
tab[index + 1] = val;
jl_gc_wb(a, val);
return 0;
}
// `nothing` is our sentinel value for deletion, so need to keep searching if it's also our search key
assert(key == jl_nothing);
if (empty_slot == -1)
empty_slot = index;
}
if (empty_slot == -1 && tab[index + 1] == NULL) {
assert(tab[index] == jl_nothing);
empty_slot = index;
}

index = (index + 2) & (sz - 1);
iter++;
} while (iter <= maxprobe && index != orig);

if (empty_slot != -1) {
tab[empty_slot] = key;
jl_gc_wb(a, key);
return &tab[index + 1];
tab[empty_slot + 1] = val;
jl_gc_wb(a, val);
return 1;
}

if (jl_egal((jl_value_t *)key, (jl_value_t *)tab[index]))
return &tab[index + 1];

index = (index + 2) & (sz - 1);
iter++;
if (iter > maxprobe)
break;
} while (index != orig);

/* table full */
/* quadruple size, rehash, retry the insert */
/* it's important to grow the table really fast; otherwise we waste */
/* lots of time rehashing all the keys over and over. */
sz = jl_array_len(a);
if (sz >= (1 << 19) || (sz <= (1 << 8)))
newsz = sz << 1;
else if (sz <= HT_N_INLINE)
newsz = HT_N_INLINE;
else
newsz = sz << 2;
*pa = jl_idtable_rehash(*pa, newsz);

a = *pa;
tab = (void **)a->data;
sz = hash_size(a);
maxprobe = max_probe(sz);

goto retry_bp;

return NULL;
/* table full */
/* quadruple size, rehash, retry the insert */
/* it's important to grow the table really fast; otherwise we waste */
/* lots of time rehashing all the keys over and over. */
sz = jl_array_len(a);
if (sz >= (1 << 19) || (sz <= (1 << 8)))
newsz = sz << 1;
else if (sz <= HT_N_INLINE)
newsz = HT_N_INLINE;
else
newsz = sz << 2;
*pa = jl_idtable_rehash(*pa, newsz);

a = *pa;
tab = (void **)a->data;
sz = hash_size(a);
maxprobe = max_probe(sz);
}
}

/* returns bp if key is in hash, otherwise NULL */
/* if return is non-NULL and *bp == NULL then key was deleted */
static void **jl_table_peek_bp(jl_array_t *a, void *key)
{
size_t sz = hash_size(a);
Expand All @@ -116,26 +125,28 @@ static void **jl_table_peek_bp(jl_array_t *a, void *key)
do {
if (tab[index] == NULL)
return NULL;
if (jl_egal((jl_value_t *)key, (jl_value_t *)tab[index]))
return &tab[index + 1];
if (jl_egal((jl_value_t *)key, (jl_value_t *)tab[index])) {
if (tab[index + 1] != NULL)
return &tab[index + 1];
// `nothing` is our sentinel value for deletion, so need to keep searching if it's also our search key
assert(key == jl_nothing);
}

index = (index + 2) & (sz - 1);
iter++;
if (iter > maxprobe)
break;
} while (index != orig);
} while (iter <= maxprobe && index != orig);

return NULL;
}

JL_DLLEXPORT
jl_array_t *jl_eqtable_put(jl_array_t *h, void *key, void *val, int *inserted)
jl_array_t *jl_eqtable_put(jl_array_t *h, void *key, void *val, int *p_inserted)
{
JL_GC_PUSH1(&h);
// &h may be assigned to in jl_idtable_rehash so it need to be rooted
void **bp = jl_table_lookup_bp(&h, key, inserted);
*bp = val;
jl_gc_wb(h, val);
int inserted = jl_table_assign_bp(&h, key, val);
if (p_inserted)
*p_inserted = inserted;
JL_GC_POP();
return h;
}
Expand All @@ -144,22 +155,17 @@ JL_DLLEXPORT
jl_value_t *jl_eqtable_get(jl_array_t *h, void *key, jl_value_t *deflt)
{
void **bp = jl_table_peek_bp(h, key);
if (bp == NULL || *bp == NULL)
return deflt;
return (jl_value_t *)*bp;
return (bp == NULL) ? deflt : (jl_value_t *)*bp;
}

JL_DLLEXPORT
jl_value_t *jl_eqtable_pop(jl_array_t *h, void *key, jl_value_t *deflt, int *found)
{
void **bp = jl_table_peek_bp(h, key);
if (bp == NULL || *bp == NULL) {
if (found)
*found = 0;
return deflt;
}
if (found)
*found = 1;
*found = (bp != NULL);
if (bp == NULL)
return deflt;
jl_value_t *val = (jl_value_t *)*bp;
*(bp - 1) = jl_nothing; // clear the key
*bp = NULL;
Expand Down
24 changes: 24 additions & 0 deletions test/dict.jl
Original file line number Diff line number Diff line change
Expand Up @@ -566,6 +566,30 @@ end
@test_throws DomainError IdDict((sqrt(p[1]), sqrt(p[2])) for p in zip(-1:2, -1:2))
end

@testset "issue #26833, deletion from IdDict" begin
d = IdDict()
i = 1
# generate many hash collisions
while length(d) < 32 # expected to occur at i <≈ 2^16 * 2^5
if objectid(i) % UInt16 == 0x1111
push!(d, i => true)
end
i += 1
end
k = collect(keys(d))
@test haskey(d, k[1])
delete!(d, k[1])
@test length(d) == 31
@test !haskey(d, k[1])
@test haskey(d, k[end])
push!(d, k[end] => false)
@test length(d) == 31
@test haskey(d, k[end])
@test !pop!(d, k[end])
@test !haskey(d, k[end])
@test length(d) == 30
end


@testset "Issue #7944" begin
d = Dict{Int,Int}()
Expand Down

0 comments on commit 5e57c21

Please sign in to comment.