Optimize HashMap/HashSet using fastmod
This commit is contained in:
parent
d1dac8427a
commit
fddafed919
3 changed files with 98 additions and 30 deletions
|
@ -91,9 +91,9 @@ private:
|
||||||
return hash;
|
return hash;
|
||||||
}
|
}
|
||||||
|
|
||||||
_FORCE_INLINE_ uint32_t _get_probe_length(uint32_t p_pos, uint32_t p_hash, uint32_t p_capacity) const {
|
static _FORCE_INLINE_ uint32_t _get_probe_length(const uint32_t p_pos, const uint32_t p_hash, const uint32_t p_capacity, const uint64_t p_capacity_inv) {
|
||||||
uint32_t original_pos = p_hash % p_capacity;
|
const uint32_t original_pos = fastmod(p_hash, p_capacity_inv, p_capacity);
|
||||||
return (p_pos - original_pos + p_capacity) % p_capacity;
|
return fastmod(p_pos - original_pos + p_capacity, p_capacity_inv, p_capacity);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool _lookup_pos(const TKey &p_key, uint32_t &r_pos) const {
|
bool _lookup_pos(const TKey &p_key, uint32_t &r_pos) const {
|
||||||
|
@ -101,9 +101,10 @@ private:
|
||||||
return false; // Failed lookups, no elements
|
return false; // Failed lookups, no elements
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32_t capacity = hash_table_size_primes[capacity_index];
|
const uint32_t capacity = hash_table_size_primes[capacity_index];
|
||||||
|
const uint64_t capacity_inv = hash_table_size_primes_inv[capacity_index];
|
||||||
uint32_t hash = _hash(p_key);
|
uint32_t hash = _hash(p_key);
|
||||||
uint32_t pos = hash % capacity;
|
uint32_t pos = fastmod(hash, capacity_inv, capacity);
|
||||||
uint32_t distance = 0;
|
uint32_t distance = 0;
|
||||||
|
|
||||||
while (true) {
|
while (true) {
|
||||||
|
@ -111,7 +112,7 @@ private:
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (distance > _get_probe_length(pos, hashes[pos], capacity)) {
|
if (distance > _get_probe_length(pos, hashes[pos], capacity, capacity_inv)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -120,17 +121,18 @@ private:
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
pos = (pos + 1) % capacity;
|
pos = fastmod((pos + 1), capacity_inv, capacity);
|
||||||
distance++;
|
distance++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void _insert_with_hash(uint32_t p_hash, HashMapElement<TKey, TValue> *p_value) {
|
void _insert_with_hash(uint32_t p_hash, HashMapElement<TKey, TValue> *p_value) {
|
||||||
uint32_t capacity = hash_table_size_primes[capacity_index];
|
const uint32_t capacity = hash_table_size_primes[capacity_index];
|
||||||
|
const uint64_t capacity_inv = hash_table_size_primes_inv[capacity_index];
|
||||||
uint32_t hash = p_hash;
|
uint32_t hash = p_hash;
|
||||||
HashMapElement<TKey, TValue> *value = p_value;
|
HashMapElement<TKey, TValue> *value = p_value;
|
||||||
uint32_t distance = 0;
|
uint32_t distance = 0;
|
||||||
uint32_t pos = hash % capacity;
|
uint32_t pos = fastmod(hash, capacity_inv, capacity);
|
||||||
|
|
||||||
while (true) {
|
while (true) {
|
||||||
if (hashes[pos] == EMPTY_HASH) {
|
if (hashes[pos] == EMPTY_HASH) {
|
||||||
|
@ -143,14 +145,14 @@ private:
|
||||||
}
|
}
|
||||||
|
|
||||||
// Not an empty slot, let's check the probing length of the existing one.
|
// Not an empty slot, let's check the probing length of the existing one.
|
||||||
uint32_t existing_probe_len = _get_probe_length(pos, hashes[pos], capacity);
|
uint32_t existing_probe_len = _get_probe_length(pos, hashes[pos], capacity, capacity_inv);
|
||||||
if (existing_probe_len < distance) {
|
if (existing_probe_len < distance) {
|
||||||
SWAP(hash, hashes[pos]);
|
SWAP(hash, hashes[pos]);
|
||||||
SWAP(value, elements[pos]);
|
SWAP(value, elements[pos]);
|
||||||
distance = existing_probe_len;
|
distance = existing_probe_len;
|
||||||
}
|
}
|
||||||
|
|
||||||
pos = (pos + 1) % capacity;
|
pos = fastmod((pos + 1), capacity_inv, capacity);
|
||||||
distance++;
|
distance++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -316,13 +318,14 @@ public:
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32_t capacity = hash_table_size_primes[capacity_index];
|
const uint32_t capacity = hash_table_size_primes[capacity_index];
|
||||||
uint32_t next_pos = (pos + 1) % capacity;
|
const uint64_t capacity_inv = hash_table_size_primes_inv[capacity_index];
|
||||||
while (hashes[next_pos] != EMPTY_HASH && _get_probe_length(next_pos, hashes[next_pos], capacity) != 0) {
|
uint32_t next_pos = fastmod((pos + 1), capacity_inv, capacity);
|
||||||
|
while (hashes[next_pos] != EMPTY_HASH && _get_probe_length(next_pos, hashes[next_pos], capacity, capacity_inv) != 0) {
|
||||||
SWAP(hashes[next_pos], hashes[pos]);
|
SWAP(hashes[next_pos], hashes[pos]);
|
||||||
SWAP(elements[next_pos], elements[pos]);
|
SWAP(elements[next_pos], elements[pos]);
|
||||||
pos = next_pos;
|
pos = next_pos;
|
||||||
next_pos = (pos + 1) % capacity;
|
next_pos = fastmod((pos + 1), capacity_inv, capacity);
|
||||||
}
|
}
|
||||||
|
|
||||||
hashes[pos] = EMPTY_HASH;
|
hashes[pos] = EMPTY_HASH;
|
||||||
|
|
|
@ -74,9 +74,9 @@ private:
|
||||||
return hash;
|
return hash;
|
||||||
}
|
}
|
||||||
|
|
||||||
_FORCE_INLINE_ uint32_t _get_probe_length(uint32_t p_pos, uint32_t p_hash, uint32_t p_capacity) const {
|
static _FORCE_INLINE_ uint32_t _get_probe_length(const uint32_t p_pos, const uint32_t p_hash, const uint32_t p_capacity, const uint64_t p_capacity_inv) {
|
||||||
uint32_t original_pos = p_hash % p_capacity;
|
const uint32_t original_pos = fastmod(p_hash, p_capacity_inv, p_capacity);
|
||||||
return (p_pos - original_pos + p_capacity) % p_capacity;
|
return fastmod(p_pos - original_pos + p_capacity, p_capacity_inv, p_capacity);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool _lookup_pos(const TKey &p_key, uint32_t &r_pos) const {
|
bool _lookup_pos(const TKey &p_key, uint32_t &r_pos) const {
|
||||||
|
@ -84,9 +84,10 @@ private:
|
||||||
return false; // Failed lookups, no elements
|
return false; // Failed lookups, no elements
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32_t capacity = hash_table_size_primes[capacity_index];
|
const uint32_t capacity = hash_table_size_primes[capacity_index];
|
||||||
|
const uint64_t capacity_inv = hash_table_size_primes_inv[capacity_index];
|
||||||
uint32_t hash = _hash(p_key);
|
uint32_t hash = _hash(p_key);
|
||||||
uint32_t pos = hash % capacity;
|
uint32_t pos = fastmod(hash, capacity_inv, capacity);
|
||||||
uint32_t distance = 0;
|
uint32_t distance = 0;
|
||||||
|
|
||||||
while (true) {
|
while (true) {
|
||||||
|
@ -94,7 +95,7 @@ private:
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (distance > _get_probe_length(pos, hashes[pos], capacity)) {
|
if (distance > _get_probe_length(pos, hashes[pos], capacity, capacity_inv)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -103,17 +104,18 @@ private:
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
pos = (pos + 1) % capacity;
|
pos = fastmod(pos + 1, capacity_inv, capacity);
|
||||||
distance++;
|
distance++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32_t _insert_with_hash(uint32_t p_hash, uint32_t p_index) {
|
uint32_t _insert_with_hash(uint32_t p_hash, uint32_t p_index) {
|
||||||
uint32_t capacity = hash_table_size_primes[capacity_index];
|
const uint32_t capacity = hash_table_size_primes[capacity_index];
|
||||||
|
const uint64_t capacity_inv = hash_table_size_primes_inv[capacity_index];
|
||||||
uint32_t hash = p_hash;
|
uint32_t hash = p_hash;
|
||||||
uint32_t index = p_index;
|
uint32_t index = p_index;
|
||||||
uint32_t distance = 0;
|
uint32_t distance = 0;
|
||||||
uint32_t pos = hash % capacity;
|
uint32_t pos = fastmod(hash, capacity_inv, capacity);
|
||||||
|
|
||||||
while (true) {
|
while (true) {
|
||||||
if (hashes[pos] == EMPTY_HASH) {
|
if (hashes[pos] == EMPTY_HASH) {
|
||||||
|
@ -124,7 +126,7 @@ private:
|
||||||
}
|
}
|
||||||
|
|
||||||
// Not an empty slot, let's check the probing length of the existing one.
|
// Not an empty slot, let's check the probing length of the existing one.
|
||||||
uint32_t existing_probe_len = _get_probe_length(pos, hashes[pos], capacity);
|
uint32_t existing_probe_len = _get_probe_length(pos, hashes[pos], capacity, capacity_inv);
|
||||||
if (existing_probe_len < distance) {
|
if (existing_probe_len < distance) {
|
||||||
key_to_hash[index] = pos;
|
key_to_hash[index] = pos;
|
||||||
SWAP(hash, hashes[pos]);
|
SWAP(hash, hashes[pos]);
|
||||||
|
@ -132,7 +134,7 @@ private:
|
||||||
distance = existing_probe_len;
|
distance = existing_probe_len;
|
||||||
}
|
}
|
||||||
|
|
||||||
pos = (pos + 1) % capacity;
|
pos = fastmod(pos + 1, capacity_inv, capacity);
|
||||||
distance++;
|
distance++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -265,9 +267,10 @@ public:
|
||||||
uint32_t key_pos = pos;
|
uint32_t key_pos = pos;
|
||||||
pos = key_to_hash[pos]; //make hash pos
|
pos = key_to_hash[pos]; //make hash pos
|
||||||
|
|
||||||
uint32_t capacity = hash_table_size_primes[capacity_index];
|
const uint32_t capacity = hash_table_size_primes[capacity_index];
|
||||||
uint32_t next_pos = (pos + 1) % capacity;
|
const uint64_t capacity_inv = hash_table_size_primes_inv[capacity_index];
|
||||||
while (hashes[next_pos] != EMPTY_HASH && _get_probe_length(next_pos, hashes[next_pos], capacity) != 0) {
|
uint32_t next_pos = fastmod(pos + 1, capacity_inv, capacity);
|
||||||
|
while (hashes[next_pos] != EMPTY_HASH && _get_probe_length(next_pos, hashes[next_pos], capacity, capacity_inv) != 0) {
|
||||||
uint32_t kpos = hash_to_key[pos];
|
uint32_t kpos = hash_to_key[pos];
|
||||||
uint32_t kpos_next = hash_to_key[next_pos];
|
uint32_t kpos_next = hash_to_key[next_pos];
|
||||||
SWAP(key_to_hash[kpos], key_to_hash[kpos_next]);
|
SWAP(key_to_hash[kpos], key_to_hash[kpos_next]);
|
||||||
|
@ -275,7 +278,7 @@ public:
|
||||||
SWAP(hash_to_key[next_pos], hash_to_key[pos]);
|
SWAP(hash_to_key[next_pos], hash_to_key[pos]);
|
||||||
|
|
||||||
pos = next_pos;
|
pos = next_pos;
|
||||||
next_pos = (pos + 1) % capacity;
|
next_pos = fastmod(pos + 1, capacity_inv, capacity);
|
||||||
}
|
}
|
||||||
|
|
||||||
hashes[pos] = EMPTY_HASH;
|
hashes[pos] = EMPTY_HASH;
|
||||||
|
|
|
@ -437,4 +437,66 @@ const uint32_t hash_table_size_primes[HASH_TABLE_SIZE_MAX] = {
|
||||||
1610612741,
|
1610612741,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Computed with elem_i = UINT64_C (0 x FFFFFFFF FFFFFFFF ) / d_i + 1, where d_i is the i-th element of the above array.
|
||||||
|
const uint64_t hash_table_size_primes_inv[HASH_TABLE_SIZE_MAX] = {
|
||||||
|
3689348814741910324,
|
||||||
|
1418980313362273202,
|
||||||
|
802032351030850071,
|
||||||
|
392483916461905354,
|
||||||
|
190172619316593316,
|
||||||
|
95578984837873325,
|
||||||
|
47420935922132524,
|
||||||
|
23987963684927896,
|
||||||
|
11955116055547344,
|
||||||
|
5991147799191151,
|
||||||
|
2998982941588287,
|
||||||
|
1501077717772769,
|
||||||
|
750081082979285,
|
||||||
|
375261795343686,
|
||||||
|
187625172388393,
|
||||||
|
93822606204624,
|
||||||
|
46909513691883,
|
||||||
|
23456218233098,
|
||||||
|
11728086747027,
|
||||||
|
5864041509391,
|
||||||
|
2932024948977,
|
||||||
|
1466014921160,
|
||||||
|
733007198436,
|
||||||
|
366503839517,
|
||||||
|
183251896093,
|
||||||
|
91625960335,
|
||||||
|
45812983922,
|
||||||
|
22906489714,
|
||||||
|
11453246088
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Fastmod computes ( n mod d ) given the precomputed c much faster than n % d.
|
||||||
|
* The implementation of fastmod is based on the following paper by Daniel Lemire et al.
|
||||||
|
* Faster Remainder by Direct Computation: Applications to Compilers and Software Libraries
|
||||||
|
* https://arxiv.org/abs/1902.01961
|
||||||
|
*/
|
||||||
|
static _FORCE_INLINE_ uint32_t fastmod(const uint32_t n, const uint64_t c, const uint32_t d) {
|
||||||
|
#if defined(_MSC_VER)
|
||||||
|
// Returns the upper 64 bits of the product of two 64-bit unsigned integers.
|
||||||
|
// This intrinsic function is required since MSVC does not support unsigned 128-bit integers.
|
||||||
|
#if defined(_M_X64) || defined(_M_ARM64)
|
||||||
|
return __umulh(c * n, d);
|
||||||
|
#else
|
||||||
|
// Fallback to the slower method for 32-bit platforms.
|
||||||
|
return n % d;
|
||||||
|
#endif // _M_X64 || _M_ARM64
|
||||||
|
#else
|
||||||
|
#ifdef __SIZEOF_INT128__
|
||||||
|
// Prevent compiler warning, because we know what we are doing.
|
||||||
|
uint64_t lowbits = c * n;
|
||||||
|
__extension__ typedef unsigned __int128 uint128;
|
||||||
|
return static_cast<uint64_t>(((uint128)lowbits * d) >> 64);
|
||||||
|
#else
|
||||||
|
// Fallback to the slower method if no 128-bit unsigned integer type is available.
|
||||||
|
return n % d;
|
||||||
|
#endif // __SIZEOF_INT128__
|
||||||
|
#endif // _MSC_VER
|
||||||
|
}
|
||||||
|
|
||||||
#endif // HASHFUNCS_H
|
#endif // HASHFUNCS_H
|
||||||
|
|
Loading…
Reference in a new issue