Skip to content

Commit

Permalink
add PersistentDict based on a HAMT (#51164)
Browse files Browse the repository at this point in the history
The implementation is based on a [Hash Array Mapped Trie
(HAMT)](https://en.wikipedia.org/wiki/Hash_array_mapped_trie)
following [Bagwell
(2000)](http:https://infoscience.epfl.ch/record/64398/files/idealhashtrees.pdf).

A HAMT uses a fixed branching factor (commonly 32) together with each
node being sparse.
In order to search for an entry we take the hash of the key and chunk it
up into blocks,
with a branching factor of 32 each block is 5 bits. We use those 5 bits
to calculate the
index inside the node and use a bitmap within the node to keep track if
an element is
already set. This makes search a `log(32, n)` operation.

Persistency is implemented by path-copying. When we insert/delete a
value into the HAMT
we copy each node along the path into a new HAMT, all other nodes are
shared with
the previous HAMT.

A noteable implementation choice is that I didn't add a (resizeable)
root table.
Normally this root table is dense and uses the first `t` bits to
calculate an index
within. This makes large HAMT a bit cheaper since the root-table
effectivly folds
multiple lookup steps into one. It does hurt persistent use-cases since
path-copying
means that we also copy the root node/table.

Importantly the HAMT itself is not immutable/persistent, the use of it
as part of the
`PersistentDict` is. Direct mutation of the underlying data breaks the
persistentcy
invariants. One could use the HAMT to implement a non-persistent
dictionary (or
other datastructures). 

As an interesting side-note we could use a related data-structure
[Ctrie](http:https://lamp.epfl.ch/~prokopec/ctries-snapshot.pdf)
to implement a concurrent lock-free dictionary. Ctrie also support
`O(1)` snapshotting
so we could replace the HAMT used here with a Ctrie.
  • Loading branch information
vchuravy committed Sep 7, 2023
1 parent 27fa5de commit 8599e2f
Show file tree
Hide file tree
Showing 4 changed files with 527 additions and 0 deletions.
142 changes: 142 additions & 0 deletions base/dict.jl
Original file line number Diff line number Diff line change
Expand Up @@ -869,3 +869,145 @@ empty(::ImmutableDict, ::Type{K}, ::Type{V}) where {K, V} = ImmutableDict{K,V}()
_similar_for(c::AbstractDict, ::Type{Pair{K,V}}, itr, isz, len) where {K, V} = empty(c, K, V)
_similar_for(c::AbstractDict, ::Type{T}, itr, isz, len) where {T} =
throw(ArgumentError("for AbstractDicts, similar requires an element type of Pair;\n if calling map, consider a comprehension instead"))


include("hamt.jl")
using .HashArrayMappedTries
const HAMT = HashArrayMappedTries

struct PersistentDict{K,V} <: AbstractDict{K,V}
trie::HAMT.HAMT{K,V}
end

"""
PersistentDict
`PersistentDict` is a dictionary implemented as an hash array mapped trie,
which is optimal for situations where you need persistence, each operation
returns a new dictonary separate from the previous one, but the underlying
implementation is space-efficient and may share storage across multiple
separate dictionaries.
PersistentDict(KV::Pair)
# Examples
```jldoctest
julia> dict = Base.PersistentDict(:a=>1)
Base.PersistentDict{Symbol, Int64} with 1 entry:
:a => 1
julia> dict2 = Base.delete(dict, :a)
Base.PersistentDict{Symbol, Int64}()
julia> dict3 = Base.PersistentDict(dict, :a=>2)
Base.PersistentDict{Symbol, Int64} with 1 entry:
:a => 2
```
"""
PersistentDict

PersistentDict{K,V}() where {K,V} = PersistentDict(HAMT.HAMT{K,V}())
PersistentDict(KV::Pair{K,V}) where {K,V} = PersistentDict(HAMT.HAMT(KV...))
PersistentDict(dict::PersistentDict, pair::Pair) = PersistentDict(dict, pair...)
function PersistentDict(dict::PersistentDict{K,V}, key::K, val::V) where {K,V}
trie = dict.trie
h = hash(key)
found, present, trie, i, bi, top, hs = HAMT.path(trie, key, h, #=persistent=# true)
HAMT.insert!(found, present, trie, i, bi, hs, val)
return PersistentDict(top)
end

function PersistentDict(kv::Pair, rest::Pair...)
dict = PersistentDict(kv)
for kv in rest
key, value = kv
dict = PersistentDict(dict, key, value)
end
return dict
end

eltype(::PersistentDict{K,V}) where {K,V} = Pair{K,V}

function in(key_val::Pair{K,V}, dict::PersistentDict{K,V}, valcmp=(==)) where {K,V}
trie = dict.trie
if HAMT.islevel_empty(trie)
return false
end

key, val = key_val

h = hash(key)
found, present, trie, i, _, _, _ = HAMT.path(trie, key, h)
if found && present
leaf = @inbounds trie.data[i]::HAMT.Leaf{K,V}
return valcmp(val, leaf.val) && return true
end
return false
end

function haskey(dict::PersistentDict{K}, key::K) where K
trie = dict.trie
h = hash(key)
found, present, _, _, _, _, _ = HAMT.path(trie, key, h)
return found && present
end

function getindex(dict::PersistentDict{K,V}, key::K) where {K,V}
trie = dict.trie
if HAMT.islevel_empty(trie)
throw(KeyError(key))
end
h = hash(key)
found, present, trie, i, _, _, _ = HAMT.path(trie, key, h)
if found && present
leaf = @inbounds trie.data[i]::HAMT.Leaf{K,V}
return leaf.val
end
throw(KeyError(key))
end

function get(dict::PersistentDict{K,V}, key::K, default::V) where {K,V}
trie = dict.trie
if HAMT.islevel_empty(trie)
return default
end
h = hash(key)
found, present, trie, i, _, _, _ = HAMT.path(trie, key, h)
if found && present
leaf = @inbounds trie.data[i]::HAMT.Leaf{K,V}
return leaf.val
end
return default
end

function get(default::Callable, dict::PersistentDict{K,V}, key::K) where {K,V}
trie = dict.trie
if HAMT.islevel_empty(trie)
return default
end
h = hash(key)
found, present, trie, i, _, _, _ = HAMT.path(trie, key, h)
if found && present
leaf = @inbounds trie.data[i]::HAMT.Leaf{K,V}
return leaf.val
end
return default()
end

iterate(dict::PersistentDict, state=nothing) = HAMT.iterate(dict.trie, state)

function delete(dict::PersistentDict{K}, key::K) where K
trie = dict.trie
h = hash(key)
found, present, trie, i, bi, top, _ = HAMT.path(trie, key, h, #=persistent=# true)
if found && present
deleteat!(trie.data, i)
HAMT.unset!(trie, bi)
end
return PersistentDict(top)
end

length(dict::PersistentDict) = HAMT.length(dict.trie)
isempty(dict::PersistentDict) = HAMT.isempty(dict.trie)
empty(::PersistentDict, ::Type{K}, ::Type{V}) where {K, V} = PersistentDict{K, V}()
Loading

0 comments on commit 8599e2f

Please sign in to comment.