Implement backward shift deletion for OAHashMap

This commit is contained in:
Shiqing 2019-09-21 14:32:40 +08:00
parent 2e065d8ad0
commit 78bee16e05
2 changed files with 31 additions and 29 deletions

View file

@ -37,10 +37,11 @@
#include "core/os/memory.h" #include "core/os/memory.h"
/** /**
* A HashMap implementation that uses open addressing with robinhood hashing. * A HashMap implementation that uses open addressing with Robin Hood hashing.
* Robinhood hashing swaps out entries that have a smaller probing distance * Robin Hood hashing swaps out entries that have a smaller probing distance
* than the to-be-inserted entry, that evens out the average probing distance * than the to-be-inserted entry, that evens out the average probing distance
* and enables faster lookups. * and enables faster lookups. Backward shift deletion is employed to further
* improve the performance and to avoid infinite loops in rare cases.
* *
* The entries are stored inplace, so huge keys or values might fill cache lines * The entries are stored inplace, so huge keys or values might fill cache lines
* a lot faster. * a lot faster.
@ -60,25 +61,20 @@ private:
uint32_t num_elements; uint32_t num_elements;
static const uint32_t EMPTY_HASH = 0; static const uint32_t EMPTY_HASH = 0;
static const uint32_t DELETED_HASH_BIT = 1 << 31;
_FORCE_INLINE_ uint32_t _hash(const TKey &p_key) const { _FORCE_INLINE_ uint32_t _hash(const TKey &p_key) const {
uint32_t hash = Hasher::hash(p_key); uint32_t hash = Hasher::hash(p_key);
if (hash == EMPTY_HASH) { if (hash == EMPTY_HASH) {
hash = EMPTY_HASH + 1; hash = EMPTY_HASH + 1;
} else if (hash & DELETED_HASH_BIT) {
hash &= ~DELETED_HASH_BIT;
} }
return hash; return hash;
} }
_FORCE_INLINE_ uint32_t _get_probe_length(uint32_t p_pos, uint32_t p_hash) const { _FORCE_INLINE_ uint32_t _get_probe_length(uint32_t p_pos, uint32_t p_hash) const {
p_hash = p_hash & ~DELETED_HASH_BIT; // we don't care if it was deleted or not
uint32_t original_pos = p_hash % capacity; uint32_t original_pos = p_hash % capacity;
return (p_pos - original_pos) % capacity; return (p_pos - original_pos + capacity) % capacity;
} }
_FORCE_INLINE_ void _construct(uint32_t p_pos, uint32_t p_hash, const TKey &p_key, const TValue &p_value) { _FORCE_INLINE_ void _construct(uint32_t p_pos, uint32_t p_hash, const TKey &p_key, const TValue &p_value) {
@ -132,14 +128,6 @@ private:
// not an empty slot, let's check the probing length of the existing one // not an empty slot, let's check the probing length of the existing one
uint32_t existing_probe_len = _get_probe_length(pos, hashes[pos]); uint32_t existing_probe_len = _get_probe_length(pos, hashes[pos]);
if (existing_probe_len < distance) { if (existing_probe_len < distance) {
if (hashes[pos] & DELETED_HASH_BIT) {
// we found a place where we can fit in!
_construct(pos, hash, key, value);
return;
}
SWAP(hash, hashes[pos]); SWAP(hash, hashes[pos]);
SWAP(key, keys[pos]); SWAP(key, keys[pos]);
SWAP(value, values[pos]); SWAP(value, values[pos]);
@ -173,9 +161,6 @@ private:
if (old_hashes[i] == EMPTY_HASH) { if (old_hashes[i] == EMPTY_HASH) {
continue; continue;
} }
if (old_hashes[i] & DELETED_HASH_BIT) {
continue;
}
_insert_with_hash(old_hashes[i], old_keys[i], old_values[i]); _insert_with_hash(old_hashes[i], old_keys[i], old_values[i]);
} }
@ -205,10 +190,6 @@ public:
continue; continue;
} }
if (hashes[i] & DELETED_HASH_BIT) {
continue;
}
hashes[i] = EMPTY_HASH; hashes[i] = EMPTY_HASH;
values[i].~TValue(); values[i].~TValue();
keys[i].~TKey(); keys[i].~TKey();
@ -219,7 +200,7 @@ public:
void insert(const TKey &p_key, const TValue &p_value) { void insert(const TKey &p_key, const TValue &p_value) {
if ((float)num_elements / (float)capacity > 0.9) { if (num_elements + 1 > 0.9 * capacity) {
_resize_and_rehash(); _resize_and_rehash();
} }
@ -272,9 +253,20 @@ public:
return; return;
} }
hashes[pos] |= DELETED_HASH_BIT; uint32_t next_pos = (pos + 1) % capacity;
while (hashes[next_pos] != EMPTY_HASH &&
_get_probe_length(next_pos, hashes[next_pos]) != 0) {
SWAP(hashes[next_pos], hashes[pos]);
SWAP(keys[next_pos], keys[pos]);
SWAP(values[next_pos], values[pos]);
pos = next_pos;
next_pos = (pos + 1) % capacity;
}
hashes[pos] = EMPTY_HASH;
values[pos].~TValue(); values[pos].~TValue();
keys[pos].~TKey(); keys[pos].~TKey();
num_elements--; num_elements--;
} }
@ -326,9 +318,6 @@ public:
if (hashes[i] == EMPTY_HASH) { if (hashes[i] == EMPTY_HASH) {
continue; continue;
} }
if (hashes[i] & DELETED_HASH_BIT) {
continue;
}
it.valid = true; it.valid = true;
it.key = &keys[i]; it.key = &keys[i];

View file

@ -140,6 +140,19 @@ MainLoop *test() {
OS::get_singleton()->print("test for issue #31402 passed.\n"); OS::get_singleton()->print("test for issue #31402 passed.\n");
} }
// test collision resolution, should not crash or run indefinitely
{
OAHashMap<int, int> map(4);
map.set(1, 1);
map.set(5, 1);
map.set(9, 1);
map.set(13, 1);
map.remove(5);
map.remove(9);
map.remove(13);
map.set(5, 1);
}
return NULL; return NULL;
} }
} // namespace TestOAHashMap } // namespace TestOAHashMap