Skip to content

Commit

Permalink
Merge pull request #26839 from JuliaLang/jn/iddict-delete
Browse files Browse the repository at this point in the history
 IdDict: support deletion, and support `nothing` used as a key
  • Loading branch information
vtjnash committed Apr 23, 2018
2 parents 692408c + 5e57c21 commit c0a278c
Show file tree
Hide file tree
Showing 3 changed files with 134 additions and 90 deletions.
16 changes: 14 additions & 2 deletions base/libuv.jl
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,20 @@ disassociate_julia_struct(handle::Ptr{Cvoid}) =
# A dict of all libuv handles that are being waited on somewhere in the system
# and should thus not be garbage collected
const uvhandles = IdDict()
preserve_handle(x) = uvhandles[x] = get(uvhandles,x,0)::Int+1
unpreserve_handle(x) = (v = uvhandles[x]::Int; v == 1 ? pop!(uvhandles,x) : (uvhandles[x] = v-1); nothing)
function preserve_handle(x)
v = get(uvhandles, x, 0)::Int
uvhandles[x] = v + 1
nothing
end
function unpreserve_handle(x)
v = uvhandles[x]::Int
if v == 1
pop!(uvhandles, x)
else
uvhandles[x] = v - 1
end
nothing
end

## Libuv error handling ##

Expand Down
184 changes: 96 additions & 88 deletions src/table.c
Original file line number Diff line number Diff line change
@@ -1,30 +1,29 @@
// This file is a part of Julia. License is MIT: https://julialang.org/license

#define hash_size(h) (jl_array_len(h)/2)
#define hash_size(h) (jl_array_len(h) / 2)

// compute empirical max-probe for a given size
#define max_probe(size) ((size)<=1024 ? 16 : (size)>>6)
#define max_probe(size) ((size) <= 1024 ? 16 : (size) >> 6)

#define keyhash(k) jl_object_id(k)
#define h2index(hv,sz) (size_t)(((hv) & ((sz)-1))*2)
#define keyhash(k) jl_object_id(k)
#define h2index(hv, sz) (size_t)(((hv) & ((sz)-1)) * 2)

static void **jl_table_lookup_bp(jl_array_t **pa, void *key, int *inserted);
static int jl_table_assign_bp(jl_array_t **pa, void *key, void *val);

JL_DLLEXPORT jl_array_t *jl_idtable_rehash(jl_array_t *a, size_t newsz)
{
// Assume *pa don't need a write barrier
// pa doesn't have to be a GC slot but *pa needs to be rooted
size_t sz = jl_array_len(a);
size_t i;
void **ol = (void**)a->data;
void **ol = (void **)a->data;
jl_array_t *newa = jl_alloc_vec_any(newsz);
// keep the original array in the original slot since we need `ol`
// to be valid in the loop below.
JL_GC_PUSH1(&newa);
for(i=0; i < sz; i+=2) {
if (ol[i+1] != NULL) {
(*jl_table_lookup_bp(&newa, ol[i], NULL)) = ol[i+1];
jl_gc_wb(newa, ol[i+1]);
for (i = 0; i < sz; i += 2) {
if (ol[i + 1] != NULL) {
jl_table_assign_bp(&newa, ol[i], ol[i + 1]);
// it is however necessary here because allocation
// can (and will) occur in a recursive call inside table_lookup_bp
}
Expand All @@ -37,77 +36,87 @@ JL_DLLEXPORT jl_array_t *jl_idtable_rehash(jl_array_t *a, size_t newsz)
return newa;
}

static void **jl_table_lookup_bp(jl_array_t **pa, void *key, int *inserted)
static int jl_table_assign_bp(jl_array_t **pa, void *key, void *val)
{
// pa points to a **rooted** gc frame slot
uint_t hv;
jl_array_t *a = *pa;
size_t orig, index, iter;
size_t orig, index, iter, empty_slot;
size_t newsz, sz = hash_size(a);
assert(sz >= 1);
size_t maxprobe = max_probe(sz);
void **tab = (void**)a->data;

if (inserted)
*inserted = 0;

hv = keyhash((jl_value_t*)key);
retry_bp:
iter = 0;
index = h2index(hv,sz);
sz *= 2;
orig = index;

do {
if (tab[index+1] == NULL) {
tab[index] = key;
if (inserted)
*inserted = 1;
void **tab = (void **)a->data;

hv = keyhash((jl_value_t *)key);
while (1) {
iter = 0;
index = h2index(hv, sz);
sz *= 2;
orig = index;
empty_slot = -1;

do {
if (tab[index] == NULL) {
if (empty_slot == -1)
empty_slot = index;
break;
}
if (jl_egal((jl_value_t *)key, (jl_value_t *)tab[index])) {
if (tab[index + 1] != NULL) {
tab[index + 1] = val;
jl_gc_wb(a, val);
return 0;
}
// `nothing` is our sentinel value for deletion, so need to keep searching if it's also our search key
assert(key == jl_nothing);
if (empty_slot == -1)
empty_slot = index;
}
if (empty_slot == -1 && tab[index + 1] == NULL) {
assert(tab[index] == jl_nothing);
empty_slot = index;
}

index = (index + 2) & (sz - 1);
iter++;
} while (iter <= maxprobe && index != orig);

if (empty_slot != -1) {
tab[empty_slot] = key;
jl_gc_wb(a, key);
return &tab[index+1];
tab[empty_slot + 1] = val;
jl_gc_wb(a, val);
return 1;
}

if (jl_egal((jl_value_t*)key, (jl_value_t*)tab[index]))
return &tab[index+1];

index = (index+2) & (sz-1);
iter++;
if (iter > maxprobe)
break;
} while (index != orig);

/* table full */
/* quadruple size, rehash, retry the insert */
/* it's important to grow the table really fast; otherwise we waste */
/* lots of time rehashing all the keys over and over. */
sz = jl_array_len(a);
if (sz >= (1<<19) || (sz <= (1<<8)))
newsz = sz<<1;
else if (sz <= HT_N_INLINE)
newsz = HT_N_INLINE;
else
newsz = sz<<2;
*pa = jl_idtable_rehash(*pa, newsz);

a = *pa;
tab = (void**)a->data;
sz = hash_size(a);
maxprobe = max_probe(sz);

goto retry_bp;

return NULL;
/* table full */
/* quadruple size, rehash, retry the insert */
/* it's important to grow the table really fast; otherwise we waste */
/* lots of time rehashing all the keys over and over. */
sz = jl_array_len(a);
if (sz >= (1 << 19) || (sz <= (1 << 8)))
newsz = sz << 1;
else if (sz <= HT_N_INLINE)
newsz = HT_N_INLINE;
else
newsz = sz << 2;
*pa = jl_idtable_rehash(*pa, newsz);

a = *pa;
tab = (void **)a->data;
sz = hash_size(a);
maxprobe = max_probe(sz);
}
}

/* returns bp if key is in hash, otherwise NULL */
/* if return is non-NULL and *bp == NULL then key was deleted */
static void **jl_table_peek_bp(jl_array_t *a, void *key)
{
size_t sz = hash_size(a);
assert(sz >= 1);
size_t maxprobe = max_probe(sz);
void **tab = (void**)a->data;
uint_t hv = keyhash((jl_value_t*)key);
void **tab = (void **)a->data;
uint_t hv = keyhash((jl_value_t *)key);
size_t index = h2index(hv, sz);
sz *= 2;
size_t orig = index;
Expand All @@ -116,26 +125,28 @@ static void **jl_table_peek_bp(jl_array_t *a, void *key)
do {
if (tab[index] == NULL)
return NULL;
if (jl_egal((jl_value_t*)key, (jl_value_t*)tab[index]))
return &tab[index+1];
if (jl_egal((jl_value_t *)key, (jl_value_t *)tab[index])) {
if (tab[index + 1] != NULL)
return &tab[index + 1];
// `nothing` is our sentinel value for deletion, so need to keep searching if it's also our search key
assert(key == jl_nothing);
}

index = (index+2) & (sz-1);
index = (index + 2) & (sz - 1);
iter++;
if (iter > maxprobe)
break;
} while (index != orig);
} while (iter <= maxprobe && index != orig);

return NULL;
}

JL_DLLEXPORT
jl_array_t *jl_eqtable_put(jl_array_t *h, void *key, void *val, int *inserted)
jl_array_t *jl_eqtable_put(jl_array_t *h, void *key, void *val, int *p_inserted)
{
JL_GC_PUSH1(&h);
// &h may be assigned to in jl_idtable_rehash so it need to be rooted
void **bp = jl_table_lookup_bp(&h, key, inserted);
*bp = val;
jl_gc_wb(h, val);
int inserted = jl_table_assign_bp(&h, key, val);
if (p_inserted)
*p_inserted = inserted;
JL_GC_POP();
return h;
}
Expand All @@ -144,36 +155,33 @@ JL_DLLEXPORT
jl_value_t *jl_eqtable_get(jl_array_t *h, void *key, jl_value_t *deflt)
{
void **bp = jl_table_peek_bp(h, key);
if (bp == NULL || *bp == NULL)
return deflt;
return (jl_value_t*)*bp;
return (bp == NULL) ? deflt : (jl_value_t *)*bp;
}

JL_DLLEXPORT
jl_value_t *jl_eqtable_pop(jl_array_t *h, void *key, jl_value_t *deflt, int *found)
{
void **bp = jl_table_peek_bp(h, key);
if (bp == NULL || *bp == NULL) {
if (found)
*found = 0;
return deflt;
}
if (found)
*found = 1;
jl_value_t *val = (jl_value_t*)*bp;
*(bp-1) = jl_nothing; // clear the key
*found = (bp != NULL);
if (bp == NULL)
return deflt;
jl_value_t *val = (jl_value_t *)*bp;
*(bp - 1) = jl_nothing; // clear the key
*bp = NULL;
return val;
}

JL_DLLEXPORT
size_t jl_eqtable_nextind(jl_array_t *t, size_t i)
{
if (i&1) i++;
if (i & 1)
i++;
size_t alen = jl_array_dim0(t);
while (i < alen && ((void**)t->data)[i+1] == NULL)
i+=2;
if (i >= alen) return (size_t)-1;
while (i < alen && ((void **)t->data)[i + 1] == NULL)
i += 2;
if (i >= alen)
return (size_t)-1;
return i;
}

Expand Down
24 changes: 24 additions & 0 deletions test/dict.jl
Original file line number Diff line number Diff line change
Expand Up @@ -566,6 +566,30 @@ end
@test_throws DomainError IdDict((sqrt(p[1]), sqrt(p[2])) for p in zip(-1:2, -1:2))
end

@testset "issue #26833, deletion from IdDict" begin
d = IdDict()
i = 1
# generate many hash collisions
while length(d) < 32 # expected to occur at i <≈ 2^16 * 2^5
if objectid(i) % UInt16 == 0x1111
push!(d, i => true)
end
i += 1
end
k = collect(keys(d))
@test haskey(d, k[1])
delete!(d, k[1])
@test length(d) == 31
@test !haskey(d, k[1])
@test haskey(d, k[end])
push!(d, k[end] => false)
@test length(d) == 31
@test haskey(d, k[end])
@test !pop!(d, k[end])
@test !haskey(d, k[end])
@test length(d) == 30
end


@testset "Issue #7944" begin
d = Dict{Int,Int}()
Expand Down

0 comments on commit c0a278c

Please sign in to comment.