Optimize HashMap/HashSet using fastmod

This commit is contained in:
Hendrik Brucker 2022-06-23 18:08:52 +02:00
parent d1dac8427a
commit fddafed919
3 changed files with 98 additions and 30 deletions

View file

@ -91,9 +91,9 @@ private:
return hash;
}
_FORCE_INLINE_ uint32_t _get_probe_length(uint32_t p_pos, uint32_t p_hash, uint32_t p_capacity) const {
uint32_t original_pos = p_hash % p_capacity;
return (p_pos - original_pos + p_capacity) % p_capacity;
static _FORCE_INLINE_ uint32_t _get_probe_length(const uint32_t p_pos, const uint32_t p_hash, const uint32_t p_capacity, const uint64_t p_capacity_inv) {
const uint32_t original_pos = fastmod(p_hash, p_capacity_inv, p_capacity);
return fastmod(p_pos - original_pos + p_capacity, p_capacity_inv, p_capacity);
}
bool _lookup_pos(const TKey &p_key, uint32_t &r_pos) const {
@ -101,9 +101,10 @@ private:
return false; // Failed lookups, no elements
}
uint32_t capacity = hash_table_size_primes[capacity_index];
const uint32_t capacity = hash_table_size_primes[capacity_index];
const uint64_t capacity_inv = hash_table_size_primes_inv[capacity_index];
uint32_t hash = _hash(p_key);
uint32_t pos = hash % capacity;
uint32_t pos = fastmod(hash, capacity_inv, capacity);
uint32_t distance = 0;
while (true) {
@ -111,7 +112,7 @@ private:
return false;
}
if (distance > _get_probe_length(pos, hashes[pos], capacity)) {
if (distance > _get_probe_length(pos, hashes[pos], capacity, capacity_inv)) {
return false;
}
@ -120,17 +121,18 @@ private:
return true;
}
pos = (pos + 1) % capacity;
pos = fastmod((pos + 1), capacity_inv, capacity);
distance++;
}
}
void _insert_with_hash(uint32_t p_hash, HashMapElement<TKey, TValue> *p_value) {
uint32_t capacity = hash_table_size_primes[capacity_index];
const uint32_t capacity = hash_table_size_primes[capacity_index];
const uint64_t capacity_inv = hash_table_size_primes_inv[capacity_index];
uint32_t hash = p_hash;
HashMapElement<TKey, TValue> *value = p_value;
uint32_t distance = 0;
uint32_t pos = hash % capacity;
uint32_t pos = fastmod(hash, capacity_inv, capacity);
while (true) {
if (hashes[pos] == EMPTY_HASH) {
@ -143,14 +145,14 @@ private:
}
// Not an empty slot, let's check the probing length of the existing one.
uint32_t existing_probe_len = _get_probe_length(pos, hashes[pos], capacity);
uint32_t existing_probe_len = _get_probe_length(pos, hashes[pos], capacity, capacity_inv);
if (existing_probe_len < distance) {
SWAP(hash, hashes[pos]);
SWAP(value, elements[pos]);
distance = existing_probe_len;
}
pos = (pos + 1) % capacity;
pos = fastmod((pos + 1), capacity_inv, capacity);
distance++;
}
}
@ -316,13 +318,14 @@ public:
return false;
}
uint32_t capacity = hash_table_size_primes[capacity_index];
uint32_t next_pos = (pos + 1) % capacity;
while (hashes[next_pos] != EMPTY_HASH && _get_probe_length(next_pos, hashes[next_pos], capacity) != 0) {
const uint32_t capacity = hash_table_size_primes[capacity_index];
const uint64_t capacity_inv = hash_table_size_primes_inv[capacity_index];
uint32_t next_pos = fastmod((pos + 1), capacity_inv, capacity);
while (hashes[next_pos] != EMPTY_HASH && _get_probe_length(next_pos, hashes[next_pos], capacity, capacity_inv) != 0) {
SWAP(hashes[next_pos], hashes[pos]);
SWAP(elements[next_pos], elements[pos]);
pos = next_pos;
next_pos = (pos + 1) % capacity;
next_pos = fastmod((pos + 1), capacity_inv, capacity);
}
hashes[pos] = EMPTY_HASH;

View file

@ -74,9 +74,9 @@ private:
return hash;
}
_FORCE_INLINE_ uint32_t _get_probe_length(uint32_t p_pos, uint32_t p_hash, uint32_t p_capacity) const {
uint32_t original_pos = p_hash % p_capacity;
return (p_pos - original_pos + p_capacity) % p_capacity;
static _FORCE_INLINE_ uint32_t _get_probe_length(const uint32_t p_pos, const uint32_t p_hash, const uint32_t p_capacity, const uint64_t p_capacity_inv) {
const uint32_t original_pos = fastmod(p_hash, p_capacity_inv, p_capacity);
return fastmod(p_pos - original_pos + p_capacity, p_capacity_inv, p_capacity);
}
bool _lookup_pos(const TKey &p_key, uint32_t &r_pos) const {
@ -84,9 +84,10 @@ private:
return false; // Failed lookups, no elements
}
uint32_t capacity = hash_table_size_primes[capacity_index];
const uint32_t capacity = hash_table_size_primes[capacity_index];
const uint64_t capacity_inv = hash_table_size_primes_inv[capacity_index];
uint32_t hash = _hash(p_key);
uint32_t pos = hash % capacity;
uint32_t pos = fastmod(hash, capacity_inv, capacity);
uint32_t distance = 0;
while (true) {
@ -94,7 +95,7 @@ private:
return false;
}
if (distance > _get_probe_length(pos, hashes[pos], capacity)) {
if (distance > _get_probe_length(pos, hashes[pos], capacity, capacity_inv)) {
return false;
}
@ -103,17 +104,18 @@ private:
return true;
}
pos = (pos + 1) % capacity;
pos = fastmod(pos + 1, capacity_inv, capacity);
distance++;
}
}
uint32_t _insert_with_hash(uint32_t p_hash, uint32_t p_index) {
uint32_t capacity = hash_table_size_primes[capacity_index];
const uint32_t capacity = hash_table_size_primes[capacity_index];
const uint64_t capacity_inv = hash_table_size_primes_inv[capacity_index];
uint32_t hash = p_hash;
uint32_t index = p_index;
uint32_t distance = 0;
uint32_t pos = hash % capacity;
uint32_t pos = fastmod(hash, capacity_inv, capacity);
while (true) {
if (hashes[pos] == EMPTY_HASH) {
@ -124,7 +126,7 @@ private:
}
// Not an empty slot, let's check the probing length of the existing one.
uint32_t existing_probe_len = _get_probe_length(pos, hashes[pos], capacity);
uint32_t existing_probe_len = _get_probe_length(pos, hashes[pos], capacity, capacity_inv);
if (existing_probe_len < distance) {
key_to_hash[index] = pos;
SWAP(hash, hashes[pos]);
@ -132,7 +134,7 @@ private:
distance = existing_probe_len;
}
pos = (pos + 1) % capacity;
pos = fastmod(pos + 1, capacity_inv, capacity);
distance++;
}
}
@ -265,9 +267,10 @@ public:
uint32_t key_pos = pos;
pos = key_to_hash[pos]; //make hash pos
uint32_t capacity = hash_table_size_primes[capacity_index];
uint32_t next_pos = (pos + 1) % capacity;
while (hashes[next_pos] != EMPTY_HASH && _get_probe_length(next_pos, hashes[next_pos], capacity) != 0) {
const uint32_t capacity = hash_table_size_primes[capacity_index];
const uint64_t capacity_inv = hash_table_size_primes_inv[capacity_index];
uint32_t next_pos = fastmod(pos + 1, capacity_inv, capacity);
while (hashes[next_pos] != EMPTY_HASH && _get_probe_length(next_pos, hashes[next_pos], capacity, capacity_inv) != 0) {
uint32_t kpos = hash_to_key[pos];
uint32_t kpos_next = hash_to_key[next_pos];
SWAP(key_to_hash[kpos], key_to_hash[kpos_next]);
@ -275,7 +278,7 @@ public:
SWAP(hash_to_key[next_pos], hash_to_key[pos]);
pos = next_pos;
next_pos = (pos + 1) % capacity;
next_pos = fastmod(pos + 1, capacity_inv, capacity);
}
hashes[pos] = EMPTY_HASH;

View file

@ -437,4 +437,66 @@ const uint32_t hash_table_size_primes[HASH_TABLE_SIZE_MAX] = {
1610612741,
};
// Computed with elem_i = UINT64_C (0 x FFFFFFFF FFFFFFFF ) / d_i + 1, where d_i is the i-th element of the above array.
const uint64_t hash_table_size_primes_inv[HASH_TABLE_SIZE_MAX] = {
3689348814741910324,
1418980313362273202,
802032351030850071,
392483916461905354,
190172619316593316,
95578984837873325,
47420935922132524,
23987963684927896,
11955116055547344,
5991147799191151,
2998982941588287,
1501077717772769,
750081082979285,
375261795343686,
187625172388393,
93822606204624,
46909513691883,
23456218233098,
11728086747027,
5864041509391,
2932024948977,
1466014921160,
733007198436,
366503839517,
183251896093,
91625960335,
45812983922,
22906489714,
11453246088
};
/**
* Fastmod computes ( n mod d ) given the precomputed c much faster than n % d.
* The implementation of fastmod is based on the following paper by Daniel Lemire et al.
* Faster Remainder by Direct Computation: Applications to Compilers and Software Libraries
* https://arxiv.org/abs/1902.01961
*/
static _FORCE_INLINE_ uint32_t fastmod(const uint32_t n, const uint64_t c, const uint32_t d) {
#if defined(_MSC_VER)
// Returns the upper 64 bits of the product of two 64-bit unsigned integers.
// This intrinsic function is required since MSVC does not support unsigned 128-bit integers.
#if defined(_M_X64) || defined(_M_ARM64)
return __umulh(c * n, d);
#else
// Fallback to the slower method for 32-bit platforms.
return n % d;
#endif // _M_X64 || _M_ARM64
#else
#ifdef __SIZEOF_INT128__
// Prevent compiler warning, because we know what we are doing.
uint64_t lowbits = c * n;
__extension__ typedef unsigned __int128 uint128;
return static_cast<uint64_t>(((uint128)lowbits * d) >> 64);
#else
// Fallback to the slower method if no 128-bit unsigned integer type is available.
return n % d;
#endif // __SIZEOF_INT128__
#endif // _MSC_VER
}
#endif // HASHFUNCS_H