You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
359 lines
8.7 KiB
359 lines
8.7 KiB
// Copyright 2023 Dolthub, Inc. |
|
// |
|
// Licensed under the Apache License, Version 2.0 (the "License"); |
|
// you may not use this file except in compliance with the License. |
|
// You may obtain a copy of the License at |
|
// |
|
// http://www.apache.org/licenses/LICENSE-2.0 |
|
// |
|
// Unless required by applicable law or agreed to in writing, software |
|
// distributed under the License is distributed on an "AS IS" BASIS, |
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
// See the License for the specific language governing permissions and |
|
// limitations under the License. |
|
|
|
package swiss |
|
|
|
import ( |
|
"github.com/dolthub/maphash" |
|
) |
|
|
|
const ( |
|
maxLoadFactor = float32(maxAvgGroupLoad) / float32(groupSize) |
|
) |
|
|
|
// Map is an open-addressing hash map |
|
// based on Abseil's flat_hash_map. |
|
type Map[K comparable, V any] struct { |
|
ctrl []metadata |
|
groups []group[K, V] |
|
hash maphash.Hasher[K] |
|
resident uint32 |
|
dead uint32 |
|
limit uint32 |
|
} |
|
|
|
// metadata is the h2 metadata array for a group. |
|
// find operations first probe the controls bytes |
|
// to filter candidates before matching keys |
|
type metadata [groupSize]int8 |
|
|
|
// group is a group of 16 key-value pairs |
|
type group[K comparable, V any] struct { |
|
keys [groupSize]K |
|
values [groupSize]V |
|
} |
|
|
|
const ( |
|
h1Mask uint64 = 0xffff_ffff_ffff_ff80 |
|
h2Mask uint64 = 0x0000_0000_0000_007f |
|
empty int8 = -128 // 0b1000_0000 |
|
tombstone int8 = -2 // 0b1111_1110 |
|
) |
|
|
|
// h1 is a 57 bit hash prefix |
|
type h1 uint64 |
|
|
|
// h2 is a 7 bit hash suffix |
|
type h2 int8 |
|
|
|
// NewMap constructs a Map. |
|
func NewMap[K comparable, V any](sz uint32) (m *Map[K, V]) { |
|
groups := numGroups(sz) |
|
m = &Map[K, V]{ |
|
ctrl: make([]metadata, groups), |
|
groups: make([]group[K, V], groups), |
|
hash: maphash.NewHasher[K](), |
|
limit: groups * maxAvgGroupLoad, |
|
} |
|
for i := range m.ctrl { |
|
m.ctrl[i] = newEmptyMetadata() |
|
} |
|
return |
|
} |
|
|
|
// Has returns true if |key| is present in |m|. |
|
func (m *Map[K, V]) Has(key K) (ok bool) { |
|
hi, lo := splitHash(m.hash.Hash(key)) |
|
g := probeStart(hi, len(m.groups)) |
|
for { // inlined find loop |
|
matches := metaMatchH2(&m.ctrl[g], lo) |
|
for matches != 0 { |
|
s := nextMatch(&matches) |
|
if key == m.groups[g].keys[s] { |
|
ok = true |
|
return |
|
} |
|
} |
|
// |key| is not in group |g|, |
|
// stop probing if we see an empty slot |
|
matches = metaMatchEmpty(&m.ctrl[g]) |
|
if matches != 0 { |
|
ok = false |
|
return |
|
} |
|
g += 1 // linear probing |
|
if g >= uint32(len(m.groups)) { |
|
g = 0 |
|
} |
|
} |
|
} |
|
|
|
// Get returns the |value| mapped by |key| if one exists. |
|
func (m *Map[K, V]) Get(key K) (value V, ok bool) { |
|
hi, lo := splitHash(m.hash.Hash(key)) |
|
g := probeStart(hi, len(m.groups)) |
|
for { // inlined find loop |
|
matches := metaMatchH2(&m.ctrl[g], lo) |
|
for matches != 0 { |
|
s := nextMatch(&matches) |
|
if key == m.groups[g].keys[s] { |
|
value, ok = m.groups[g].values[s], true |
|
return |
|
} |
|
} |
|
// |key| is not in group |g|, |
|
// stop probing if we see an empty slot |
|
matches = metaMatchEmpty(&m.ctrl[g]) |
|
if matches != 0 { |
|
ok = false |
|
return |
|
} |
|
g += 1 // linear probing |
|
if g >= uint32(len(m.groups)) { |
|
g = 0 |
|
} |
|
} |
|
} |
|
|
|
// Put attempts to insert |key| and |value| |
|
func (m *Map[K, V]) Put(key K, value V) { |
|
if m.resident >= m.limit { |
|
m.rehash(m.nextSize()) |
|
} |
|
hi, lo := splitHash(m.hash.Hash(key)) |
|
g := probeStart(hi, len(m.groups)) |
|
for { // inlined find loop |
|
matches := metaMatchH2(&m.ctrl[g], lo) |
|
for matches != 0 { |
|
s := nextMatch(&matches) |
|
if key == m.groups[g].keys[s] { // update |
|
m.groups[g].keys[s] = key |
|
m.groups[g].values[s] = value |
|
return |
|
} |
|
} |
|
// |key| is not in group |g|, |
|
// stop probing if we see an empty slot |
|
matches = metaMatchEmpty(&m.ctrl[g]) |
|
if matches != 0 { // insert |
|
s := nextMatch(&matches) |
|
m.groups[g].keys[s] = key |
|
m.groups[g].values[s] = value |
|
m.ctrl[g][s] = int8(lo) |
|
m.resident++ |
|
return |
|
} |
|
g += 1 // linear probing |
|
if g >= uint32(len(m.groups)) { |
|
g = 0 |
|
} |
|
} |
|
} |
|
|
|
// Delete attempts to remove |key|, returns true successful. |
|
func (m *Map[K, V]) Delete(key K) (ok bool) { |
|
hi, lo := splitHash(m.hash.Hash(key)) |
|
g := probeStart(hi, len(m.groups)) |
|
for { |
|
matches := metaMatchH2(&m.ctrl[g], lo) |
|
for matches != 0 { |
|
s := nextMatch(&matches) |
|
if key == m.groups[g].keys[s] { |
|
ok = true |
|
// optimization: if |m.ctrl[g]| contains any empty |
|
// metadata bytes, we can physically delete |key| |
|
// rather than placing a tombstone. |
|
// The observation is that any probes into group |g| |
|
// would already be terminated by the existing empty |
|
// slot, and therefore reclaiming slot |s| will not |
|
// cause premature termination of probes into |g|. |
|
if metaMatchEmpty(&m.ctrl[g]) != 0 { |
|
m.ctrl[g][s] = empty |
|
m.resident-- |
|
} else { |
|
m.ctrl[g][s] = tombstone |
|
m.dead++ |
|
} |
|
var k K |
|
var v V |
|
m.groups[g].keys[s] = k |
|
m.groups[g].values[s] = v |
|
return |
|
} |
|
} |
|
// |key| is not in group |g|, |
|
// stop probing if we see an empty slot |
|
matches = metaMatchEmpty(&m.ctrl[g]) |
|
if matches != 0 { // |key| absent |
|
ok = false |
|
return |
|
} |
|
g += 1 // linear probing |
|
if g >= uint32(len(m.groups)) { |
|
g = 0 |
|
} |
|
} |
|
} |
|
|
|
// Iter iterates the elements of the Map, passing them to the callback. |
|
// It guarantees that any key in the Map will be visited only once, and |
|
// for un-mutated Maps, every key will be visited once. If the Map is |
|
// Mutated during iteration, mutations will be reflected on return from |
|
// Iter, but the set of keys visited by Iter is non-deterministic. |
|
func (m *Map[K, V]) Iter(cb func(k K, v V) (stop bool)) { |
|
// take a consistent view of the table in case |
|
// we rehash during iteration |
|
ctrl, groups := m.ctrl, m.groups |
|
// pick a random starting group |
|
g := randIntN(len(groups)) |
|
for n := 0; n < len(groups); n++ { |
|
for s, c := range ctrl[g] { |
|
if c == empty || c == tombstone { |
|
continue |
|
} |
|
k, v := groups[g].keys[s], groups[g].values[s] |
|
if stop := cb(k, v); stop { |
|
return |
|
} |
|
} |
|
g++ |
|
if g >= uint32(len(groups)) { |
|
g = 0 |
|
} |
|
} |
|
} |
|
|
|
// Clear removes all elements from the Map. |
|
func (m *Map[K, V]) Clear() { |
|
for i, c := range m.ctrl { |
|
for j := range c { |
|
m.ctrl[i][j] = empty |
|
} |
|
} |
|
var k K |
|
var v V |
|
for i := range m.groups { |
|
g := &m.groups[i] |
|
for i := range g.keys { |
|
g.keys[i] = k |
|
g.values[i] = v |
|
} |
|
} |
|
m.resident, m.dead = 0, 0 |
|
} |
|
|
|
// Count returns the number of elements in the Map. |
|
func (m *Map[K, V]) Count() int { |
|
return int(m.resident - m.dead) |
|
} |
|
|
|
// Capacity returns the number of additional elements |
|
// the can be added to the Map before resizing. |
|
func (m *Map[K, V]) Capacity() int { |
|
return int(m.limit - m.resident) |
|
} |
|
|
|
// find returns the location of |key| if present, or its insertion location if absent. |
|
// for performance, find is manually inlined into public methods. |
|
func (m *Map[K, V]) find(key K, hi h1, lo h2) (g, s uint32, ok bool) { |
|
g = probeStart(hi, len(m.groups)) |
|
for { |
|
matches := metaMatchH2(&m.ctrl[g], lo) |
|
for matches != 0 { |
|
s = nextMatch(&matches) |
|
if key == m.groups[g].keys[s] { |
|
return g, s, true |
|
} |
|
} |
|
// |key| is not in group |g|, |
|
// stop probing if we see an empty slot |
|
matches = metaMatchEmpty(&m.ctrl[g]) |
|
if matches != 0 { |
|
s = nextMatch(&matches) |
|
return g, s, false |
|
} |
|
g += 1 // linear probing |
|
if g >= uint32(len(m.groups)) { |
|
g = 0 |
|
} |
|
} |
|
} |
|
|
|
func (m *Map[K, V]) nextSize() (n uint32) { |
|
n = uint32(len(m.groups)) * 2 |
|
if m.dead >= (m.resident / 2) { |
|
n = uint32(len(m.groups)) |
|
} |
|
return |
|
} |
|
|
|
func (m *Map[K, V]) rehash(n uint32) { |
|
groups, ctrl := m.groups, m.ctrl |
|
m.groups = make([]group[K, V], n) |
|
m.ctrl = make([]metadata, n) |
|
for i := range m.ctrl { |
|
m.ctrl[i] = newEmptyMetadata() |
|
} |
|
m.hash = maphash.NewSeed(m.hash) |
|
m.limit = n * maxAvgGroupLoad |
|
m.resident, m.dead = 0, 0 |
|
for g := range ctrl { |
|
for s := range ctrl[g] { |
|
c := ctrl[g][s] |
|
if c == empty || c == tombstone { |
|
continue |
|
} |
|
m.Put(groups[g].keys[s], groups[g].values[s]) |
|
} |
|
} |
|
} |
|
|
|
func (m *Map[K, V]) loadFactor() float32 { |
|
slots := float32(len(m.groups) * groupSize) |
|
return float32(m.resident-m.dead) / slots |
|
} |
|
|
|
// numGroups returns the minimum number of groups needed to store |n| elems. |
|
func numGroups(n uint32) (groups uint32) { |
|
groups = (n + maxAvgGroupLoad - 1) / maxAvgGroupLoad |
|
if groups == 0 { |
|
groups = 1 |
|
} |
|
return |
|
} |
|
|
|
func newEmptyMetadata() (meta metadata) { |
|
for i := range meta { |
|
meta[i] = empty |
|
} |
|
return |
|
} |
|
|
|
func splitHash(h uint64) (h1, h2) { |
|
return h1((h & h1Mask) >> 7), h2(h & h2Mask) |
|
} |
|
|
|
func probeStart(hi h1, groups int) uint32 { |
|
return fastModN(uint32(hi), uint32(groups)) |
|
} |
|
|
|
// lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/ |
|
func fastModN(x, n uint32) uint32 { |
|
return uint32((uint64(x) * uint64(n)) >> 32) |
|
} |
|
|
|
// randIntN returns a random number in the interval [0, n). |
|
func randIntN(n int) uint32 { |
|
return fastModN(fastrand(), uint32(n)) |
|
}
|
|
|