Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Robindict with metadata #890

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
RobinDict storing metadata
  • Loading branch information
eulerkochy authored and Koustav Chowdhury committed Jan 5, 2024
commit 1096f00b2b0f24b7a050ea000a3fd5cb6df05cf3
157 changes: 60 additions & 97 deletions src/robin_dict.jl
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ RobinDict{String, Int64} with 2 entries:
```
"""
mutable struct RobinDict{K,V} <: AbstractDict{K,V}
hashes::Vector{UInt32}
meta::Vector{UInt32}
keys::Array{K,1}
vals::Array{V,1}
count::Int
Expand All @@ -40,7 +40,7 @@ function RobinDict{K, V}() where {K, V}
end

function RobinDict{K, V}(d::RobinDict{K, V}) where {K, V}
RobinDict{K, V}(copy(d.hashes), copy(d.keys), copy(d.vals), d.count, d.idxfloor)
RobinDict{K, V}(copy(d.meta), copy(d.keys), copy(d.vals), d.count, d.idxfloor)
end

function RobinDict{K,V}(kv) where V where K
Expand Down Expand Up @@ -83,15 +83,22 @@ function RobinDict(kv)
end
end

hash_key(key) = (hash(key)%UInt32) | 0x80000000
@propagate_inbounds isslotfilled(h::RobinDict, index) = !iszero(h.meta[index])
@propagate_inbounds isslotempty(h::RobinDict, index) = iszero(h.meta[index])

hash_key(key) = (hash(key)%UInt32)
desired_index(hash, sz) = (hash & (sz - 1)) + 1

function calculate_distance(h::RobinDict{K, V}, index) where {K, V}
@assert isslotfilled(h, index)
sz = length(h.keys)
@inbounds index_init = desired_index(h.hashes[index], sz)
return (index - index_init + sz) & (sz - 1)
end
function make_meta(hash::UInt32, dibs::Int)
meta = hash
meta = (meta << 8) | UInt32(dibs)
return meta
end

hash_meta(meta::UInt32) = (meta>>8)
dibs_meta(meta::UInt32) = Int(meta & 255)

@propagate_inbounds calculate_distance(h::RobinDict, index) = dibs_meta(h.meta[index])

# insert algorithm
function rh_insert!(h::RobinDict{K, V}, key::K, val::V) where {K, V}
Expand All @@ -100,11 +107,11 @@ function rh_insert!(h::RobinDict{K, V}, key::K, val::V) where {K, V}

# table full
@assert h.count != length(h.keys)

ckey, cval, chash = key, val, hash_key(key)
cmeta = make_meta(chash, 0)
sz = length(h.keys)
index_init = desired_index(chash, sz)

index_init = desired_index(hash_meta(cmeta), sz)
index_curr = index_init
probe_distance = 0
probe_current = 0
Expand All @@ -117,13 +124,14 @@ function rh_insert!(h::RobinDict{K, V}, key::K, val::V) where {K, V}
if probe_current > probe_distance
h.vals[index_curr], cval = cval, h.vals[index_curr]
h.keys[index_curr], ckey = ckey, h.keys[index_curr]
h.hashes[index_curr], chash = chash, h.hashes[index_curr]
cmeta >>= 8
h.meta[index_curr], cmeta = make_meta(cmeta, probe_current), h.meta[index_curr]
probe_current = probe_distance
end
probe_current += 1
index_curr = (index_curr & (sz - 1)) + 1
end

@inbounds if isslotfilled(h, index_curr) && isequal(h.keys[index_curr], ckey)
h.vals[index_curr] = cval
return index_curr
Expand All @@ -132,13 +140,15 @@ function rh_insert!(h::RobinDict{K, V}, key::K, val::V) where {K, V}
@inbounds if isslotempty(h, index_curr)
h.count += 1
end


# println(ckey, " ", index_curr, " ", index_init, " ", probe_current)
@inbounds h.vals[index_curr] = cval
@inbounds h.keys[index_curr] = ckey
@inbounds h.hashes[index_curr] = chash

cmeta >>= 8
@inbounds h.meta[index_curr] = make_meta(cmeta, probe_current)

@assert probe_current >= 0

if h.idxfloor == 0
h.idxfloor = index_curr
else
Expand All @@ -147,79 +157,34 @@ function rh_insert!(h::RobinDict{K, V}, key::K, val::V) where {K, V}
return index_curr
end

function rh_insert_for_rehash!(h_new::RobinDict{K, V}, key::K, val::V, hash::UInt32) where {K, V}
# table full
@assert h_new.count != length(h_new.keys)

ckey, cval, chash = key, val, hash
sz = length(h_new.keys)
index_init = desired_index(chash, sz)

index_curr = index_init
probe_distance = 0
probe_current = 0
@inbounds while true
if (isslotempty(h_new, index_curr))
break
end
probe_distance = calculate_distance(h_new, index_curr)

if probe_current > probe_distance
h_new.vals[index_curr], cval = cval, h_new.vals[index_curr]
h_new.keys[index_curr], ckey = ckey, h_new.keys[index_curr]
h_new.hashes[index_curr], chash = chash, h_new.hashes[index_curr]
probe_current = probe_distance
end
probe_current += 1
index_curr = (index_curr & (sz - 1)) + 1
end

@inbounds if isslotempty(h_new, index_curr)
h_new.count += 1
end

@inbounds h_new.vals[index_curr] = cval
@inbounds h_new.keys[index_curr] = ckey
@inbounds h_new.hashes[index_curr] = chash

@assert probe_current >= 0

if h_new.idxfloor == 0
h_new.idxfloor = index_curr
else
h_new.idxfloor = min(h_new.idxfloor, index_curr)
end
return index_curr
end

#rehash! algorithm
function rehash!(h::RobinDict{K,V}, newsz = length(h.keys)) where {K, V}
oldk = h.keys
oldv = h.vals
oldh = h.hashes
oldh = h.meta
sz = length(oldk)
newsz = _tablesz(newsz)
if h.count == 0
resize!(h.keys, newsz)
resize!(h.vals, newsz)
resize!(h.hashes, newsz)
fill!(h.hashes, 0)
resize!(h.meta, newsz)
fill!(h.meta, 0)
h.count = 0
h.idxfloor = 0
return h
end

h.keys = Vector{K}(undef, newsz)
h.vals = Vector{V}(undef, newsz)
h.hashes = zeros(UInt32,newsz)
h.meta = zeros(UInt32,newsz)
h.count = 0
h.idxfloor = 0

for i = 1:sz
@inbounds if oldh[i] != 0
k = oldk[i]
v = oldv[i]
rh_insert_for_rehash!(h, k, v, oldh[i])
rh_insert!(h, k, v)
end
end
return h
Expand All @@ -235,11 +200,7 @@ function Base.sizehint!(d::RobinDict, newsz)
rehash!(d, newsz)
end

Base.@propagate_inbounds isslotfilled(h::RobinDict, index) = (h.hashes[index] != 0)
Base.@propagate_inbounds isslotempty(h::RobinDict, index) = (h.hashes[index] == 0)


function Base.setindex!(h::RobinDict{K,V}, v0, key0) where {K, V}
function setindex!(h::RobinDict{K,V}, v0, key0) where {K, V}
key = convert(K, key0)
isequal(key, key0) || throw(ArgumentError("$key0 is not a valid key for type $K"))
_setindex!(h, key, v0)
Expand All @@ -249,7 +210,7 @@ function _setindex!(h::RobinDict{K,V}, key::K, v0) where {K, V}
v = convert(V, v0)
index = rh_insert!(h, key, v)
@assert index > 0
return h
h
end

"""
Expand All @@ -273,29 +234,28 @@ RobinDict{String, Int64}()
"""
function Base.empty!(h::RobinDict{K,V}) where {K, V}
sz = length(h.keys)
empty!(h.hashes)
empty!(h.meta)
empty!(h.keys)
empty!(h.vals)
resize!(h.keys, sz)
resize!(h.vals, sz)
resize!(h.hashes, sz)
fill!(h.hashes, 0)
resize!(h.meta, sz)
fill!(h.meta, 0)
h.count = 0
h.idxfloor = 0
return h
end

function rh_search(h::RobinDict{K, V}, key) where {K, V}
function rh_search(h::RobinDict{K, V}, key::K) where {K, V}
sz = length(h.keys)
chash = hash_key(key)
index = desired_index(chash, sz)
cdibs = 0
cmeta = make_meta(chash, 0)
chash_meta = hash_meta(cmeta)
index = desired_index(chash_meta, sz)
@inbounds while true
if isslotempty(h, index)
return -1
elseif cdibs > calculate_distance(h, index)
return -1
elseif h.hashes[index] == chash && (h.keys[index] === key || isequal(h.keys[index], key))
elseif isequal(h.keys[index], key)
return index
end
index = (index & (sz - 1)) + 1
Expand Down Expand Up @@ -353,7 +313,7 @@ end

function _get!(default::Callable, h::RobinDict{K,V}, key::K) where V where K
index = rh_search(h, key)

index > 0 && return h.vals[index]

v = convert(V, default())
Expand Down Expand Up @@ -458,37 +418,40 @@ end
# backward shift deletion by not keeping any tombstones
function rh_delete!(h::RobinDict{K, V}, index) where {K, V}
@assert index > 0

# this assumes that there is a key/value present in the dictionary at index

index0 = index
sz = length(h.keys)
@inbounds while true
index0 = (index0 & (sz - 1)) + 1
if isslotempty(h, index0) || calculate_distance(h, index0) == 0
if isslotempty(h, index0) || iszero(dibs_meta(h.meta[index0]))
break
end
end
#index0 represents the position before which we have to shift backwards

#index0 represents the position before which we have to shift backwards
# the backwards shifting algorithm
curr = index
next = (index & (sz - 1)) + 1

@inbounds while next != index0
h.vals[curr] = h.vals[next]
h.keys[curr] = h.keys[next]
h.hashes[curr] = h.hashes[next]
mmeta = h.meta[next]
mdibs = dibs_meta(mmeta)
@assert mdibs > 0
h.meta[curr] = make_meta(mmeta, mdibs-1)
curr = next
next = (next & (sz-1)) + 1
end

#curr is at the last position, reset back to normal
isbitstype(K) || isbitsunion(K) || ccall(:jl_arrayunset, Cvoid, (Any, UInt), h.keys, curr-1)
isbitstype(V) || isbitsunion(V) || ccall(:jl_arrayunset, Cvoid, (Any, UInt), h.vals, curr-1)
@inbounds h.hashes[curr] = 0x0
isbitstype(K) || isbitsunion(K) || ccall(:jl_arrayunset, Cvoid, (Any, UInt32), h.keys, curr-1)
isbitstype(V) || isbitsunion(V) || ccall(:jl_arrayunset, Cvoid, (Any, UInt32), h.vals, curr-1)
@inbounds h.meta[curr] = zero(UInt32)

h.count -= 1
# this is necessary because key at idxfloor might get deleted
# this is necessary because key at idxfloor might get deleted
h.idxfloor = get_next_filled(h, h.idxfloor)
return h
end
Expand Down Expand Up @@ -538,7 +501,7 @@ function Base.pop!(h::RobinDict)
@inbounds key = h.keys[idx]
@inbounds val = h.vals[idx]
rh_delete!(h, idx)
return key => val
key => val
end

"""
Expand Down
20 changes: 10 additions & 10 deletions test/test_robin_dict.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
@test h1.idxfloor == 0
@test length(h1.keys) == 16
@test length(h1.vals) == 16
@test length(h1.hashes) == 16
@test length(h1.meta) == 16
@test eltype(h1) == Pair{Any, Any}
@test keytype(h1) == Any
@test valtype(h1) == Any
Expand Down Expand Up @@ -334,11 +334,11 @@ end
for i=1:1000
h[i] = i+1
end
length0 = length(h.hashes)
length0 = length(h.meta)
empty!(h)
@test h.count == 0
@test h.idxfloor == 0
@test length(h.hashes) == length(h.keys) == length(h.vals) == length0
@test length(h.meta) == length(h.keys) == length(h.vals) == length0
for i=-1000:1000
@test !haskey(h, i)
end
Expand Down Expand Up @@ -407,7 +407,7 @@ end

for i in 1:length(h1.keys)
if isslotfilled(h1, i)
@test hash_key(h1.keys[i]) == h1.hashes[i]
@test hash_meta((hash_key(h1.keys[i])<< 8)) == hash_meta(h1.meta[i])
end
end

Expand All @@ -418,7 +418,7 @@ end

for i in 1:length(h2.keys)
if isslotfilled(h2, i)
@test hash_key(h2.keys[i]) == h2.hashes[i]
@test hash_meta((hash_key(h2.keys[i])<<8)) == hash_meta(h2.meta[i])
end
end

Expand All @@ -429,7 +429,7 @@ end

for i in 1:length(h3.keys)
if isslotfilled(h3, i)
@test hash_key(h3.keys[i]) == h3.hashes[i]
@test hash_meta((hash_key(h3.keys[i])<<8)) == hash_meta(h3.meta[i])
end
end

Expand All @@ -441,11 +441,11 @@ end
for i=1:length(h.keys)
isslotfilled(h, i) || continue
(min_idx == 0) && (min_idx = i)
@assert hash_key(h.keys[i]) == h.hashes[i]
@assert (h.hashes[i] & 0x80000000) != 0
@assert hash_meta((hash_key(h.keys[i])<<8)) == hash_meta(h.meta[i])
@assert !iszero(h.meta[i])
cnt += 1
@assert typeof(h.hashes[i]) == UInt32
des_ind = desired_index(h.hashes[i], sz)
@assert typeof(h.meta[i]) == UInt32
des_ind = desired_index(hash_meta(h.meta[i]), sz)
pos_diff = 0
if (i >= des_ind)
pos_diff = i - des_ind
Expand Down