Merge pull request #62327 from Geometror/hash-containers-fastmod-optimization
HashMap/HashSet optimization: fast modulo
This commit is contained in:
commit
7e075fc482
3 changed files with 98 additions and 30 deletions
|
@ -91,9 +91,9 @@ private:
|
|||
return hash;
|
||||
}
|
||||
|
||||
_FORCE_INLINE_ uint32_t _get_probe_length(uint32_t p_pos, uint32_t p_hash, uint32_t p_capacity) const {
|
||||
uint32_t original_pos = p_hash % p_capacity;
|
||||
return (p_pos - original_pos + p_capacity) % p_capacity;
|
||||
static _FORCE_INLINE_ uint32_t _get_probe_length(const uint32_t p_pos, const uint32_t p_hash, const uint32_t p_capacity, const uint64_t p_capacity_inv) {
|
||||
const uint32_t original_pos = fastmod(p_hash, p_capacity_inv, p_capacity);
|
||||
return fastmod(p_pos - original_pos + p_capacity, p_capacity_inv, p_capacity);
|
||||
}
|
||||
|
||||
bool _lookup_pos(const TKey &p_key, uint32_t &r_pos) const {
|
||||
|
@ -101,9 +101,10 @@ private:
|
|||
return false; // Failed lookups, no elements
|
||||
}
|
||||
|
||||
uint32_t capacity = hash_table_size_primes[capacity_index];
|
||||
const uint32_t capacity = hash_table_size_primes[capacity_index];
|
||||
const uint64_t capacity_inv = hash_table_size_primes_inv[capacity_index];
|
||||
uint32_t hash = _hash(p_key);
|
||||
uint32_t pos = hash % capacity;
|
||||
uint32_t pos = fastmod(hash, capacity_inv, capacity);
|
||||
uint32_t distance = 0;
|
||||
|
||||
while (true) {
|
||||
|
@ -111,7 +112,7 @@ private:
|
|||
return false;
|
||||
}
|
||||
|
||||
if (distance > _get_probe_length(pos, hashes[pos], capacity)) {
|
||||
if (distance > _get_probe_length(pos, hashes[pos], capacity, capacity_inv)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -120,17 +121,18 @@ private:
|
|||
return true;
|
||||
}
|
||||
|
||||
pos = (pos + 1) % capacity;
|
||||
pos = fastmod((pos + 1), capacity_inv, capacity);
|
||||
distance++;
|
||||
}
|
||||
}
|
||||
|
||||
void _insert_with_hash(uint32_t p_hash, HashMapElement<TKey, TValue> *p_value) {
|
||||
uint32_t capacity = hash_table_size_primes[capacity_index];
|
||||
const uint32_t capacity = hash_table_size_primes[capacity_index];
|
||||
const uint64_t capacity_inv = hash_table_size_primes_inv[capacity_index];
|
||||
uint32_t hash = p_hash;
|
||||
HashMapElement<TKey, TValue> *value = p_value;
|
||||
uint32_t distance = 0;
|
||||
uint32_t pos = hash % capacity;
|
||||
uint32_t pos = fastmod(hash, capacity_inv, capacity);
|
||||
|
||||
while (true) {
|
||||
if (hashes[pos] == EMPTY_HASH) {
|
||||
|
@ -143,14 +145,14 @@ private:
|
|||
}
|
||||
|
||||
// Not an empty slot, let's check the probing length of the existing one.
|
||||
uint32_t existing_probe_len = _get_probe_length(pos, hashes[pos], capacity);
|
||||
uint32_t existing_probe_len = _get_probe_length(pos, hashes[pos], capacity, capacity_inv);
|
||||
if (existing_probe_len < distance) {
|
||||
SWAP(hash, hashes[pos]);
|
||||
SWAP(value, elements[pos]);
|
||||
distance = existing_probe_len;
|
||||
}
|
||||
|
||||
pos = (pos + 1) % capacity;
|
||||
pos = fastmod((pos + 1), capacity_inv, capacity);
|
||||
distance++;
|
||||
}
|
||||
}
|
||||
|
@ -316,13 +318,14 @@ public:
|
|||
return false;
|
||||
}
|
||||
|
||||
uint32_t capacity = hash_table_size_primes[capacity_index];
|
||||
uint32_t next_pos = (pos + 1) % capacity;
|
||||
while (hashes[next_pos] != EMPTY_HASH && _get_probe_length(next_pos, hashes[next_pos], capacity) != 0) {
|
||||
const uint32_t capacity = hash_table_size_primes[capacity_index];
|
||||
const uint64_t capacity_inv = hash_table_size_primes_inv[capacity_index];
|
||||
uint32_t next_pos = fastmod((pos + 1), capacity_inv, capacity);
|
||||
while (hashes[next_pos] != EMPTY_HASH && _get_probe_length(next_pos, hashes[next_pos], capacity, capacity_inv) != 0) {
|
||||
SWAP(hashes[next_pos], hashes[pos]);
|
||||
SWAP(elements[next_pos], elements[pos]);
|
||||
pos = next_pos;
|
||||
next_pos = (pos + 1) % capacity;
|
||||
next_pos = fastmod((pos + 1), capacity_inv, capacity);
|
||||
}
|
||||
|
||||
hashes[pos] = EMPTY_HASH;
|
||||
|
|
|
@ -74,9 +74,9 @@ private:
|
|||
return hash;
|
||||
}
|
||||
|
||||
_FORCE_INLINE_ uint32_t _get_probe_length(uint32_t p_pos, uint32_t p_hash, uint32_t p_capacity) const {
|
||||
uint32_t original_pos = p_hash % p_capacity;
|
||||
return (p_pos - original_pos + p_capacity) % p_capacity;
|
||||
static _FORCE_INLINE_ uint32_t _get_probe_length(const uint32_t p_pos, const uint32_t p_hash, const uint32_t p_capacity, const uint64_t p_capacity_inv) {
|
||||
const uint32_t original_pos = fastmod(p_hash, p_capacity_inv, p_capacity);
|
||||
return fastmod(p_pos - original_pos + p_capacity, p_capacity_inv, p_capacity);
|
||||
}
|
||||
|
||||
bool _lookup_pos(const TKey &p_key, uint32_t &r_pos) const {
|
||||
|
@ -84,9 +84,10 @@ private:
|
|||
return false; // Failed lookups, no elements
|
||||
}
|
||||
|
||||
uint32_t capacity = hash_table_size_primes[capacity_index];
|
||||
const uint32_t capacity = hash_table_size_primes[capacity_index];
|
||||
const uint64_t capacity_inv = hash_table_size_primes_inv[capacity_index];
|
||||
uint32_t hash = _hash(p_key);
|
||||
uint32_t pos = hash % capacity;
|
||||
uint32_t pos = fastmod(hash, capacity_inv, capacity);
|
||||
uint32_t distance = 0;
|
||||
|
||||
while (true) {
|
||||
|
@ -94,7 +95,7 @@ private:
|
|||
return false;
|
||||
}
|
||||
|
||||
if (distance > _get_probe_length(pos, hashes[pos], capacity)) {
|
||||
if (distance > _get_probe_length(pos, hashes[pos], capacity, capacity_inv)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -103,17 +104,18 @@ private:
|
|||
return true;
|
||||
}
|
||||
|
||||
pos = (pos + 1) % capacity;
|
||||
pos = fastmod(pos + 1, capacity_inv, capacity);
|
||||
distance++;
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t _insert_with_hash(uint32_t p_hash, uint32_t p_index) {
|
||||
uint32_t capacity = hash_table_size_primes[capacity_index];
|
||||
const uint32_t capacity = hash_table_size_primes[capacity_index];
|
||||
const uint64_t capacity_inv = hash_table_size_primes_inv[capacity_index];
|
||||
uint32_t hash = p_hash;
|
||||
uint32_t index = p_index;
|
||||
uint32_t distance = 0;
|
||||
uint32_t pos = hash % capacity;
|
||||
uint32_t pos = fastmod(hash, capacity_inv, capacity);
|
||||
|
||||
while (true) {
|
||||
if (hashes[pos] == EMPTY_HASH) {
|
||||
|
@ -124,7 +126,7 @@ private:
|
|||
}
|
||||
|
||||
// Not an empty slot, let's check the probing length of the existing one.
|
||||
uint32_t existing_probe_len = _get_probe_length(pos, hashes[pos], capacity);
|
||||
uint32_t existing_probe_len = _get_probe_length(pos, hashes[pos], capacity, capacity_inv);
|
||||
if (existing_probe_len < distance) {
|
||||
key_to_hash[index] = pos;
|
||||
SWAP(hash, hashes[pos]);
|
||||
|
@ -132,7 +134,7 @@ private:
|
|||
distance = existing_probe_len;
|
||||
}
|
||||
|
||||
pos = (pos + 1) % capacity;
|
||||
pos = fastmod(pos + 1, capacity_inv, capacity);
|
||||
distance++;
|
||||
}
|
||||
}
|
||||
|
@ -265,9 +267,10 @@ public:
|
|||
uint32_t key_pos = pos;
|
||||
pos = key_to_hash[pos]; //make hash pos
|
||||
|
||||
uint32_t capacity = hash_table_size_primes[capacity_index];
|
||||
uint32_t next_pos = (pos + 1) % capacity;
|
||||
while (hashes[next_pos] != EMPTY_HASH && _get_probe_length(next_pos, hashes[next_pos], capacity) != 0) {
|
||||
const uint32_t capacity = hash_table_size_primes[capacity_index];
|
||||
const uint64_t capacity_inv = hash_table_size_primes_inv[capacity_index];
|
||||
uint32_t next_pos = fastmod(pos + 1, capacity_inv, capacity);
|
||||
while (hashes[next_pos] != EMPTY_HASH && _get_probe_length(next_pos, hashes[next_pos], capacity, capacity_inv) != 0) {
|
||||
uint32_t kpos = hash_to_key[pos];
|
||||
uint32_t kpos_next = hash_to_key[next_pos];
|
||||
SWAP(key_to_hash[kpos], key_to_hash[kpos_next]);
|
||||
|
@ -275,7 +278,7 @@ public:
|
|||
SWAP(hash_to_key[next_pos], hash_to_key[pos]);
|
||||
|
||||
pos = next_pos;
|
||||
next_pos = (pos + 1) % capacity;
|
||||
next_pos = fastmod(pos + 1, capacity_inv, capacity);
|
||||
}
|
||||
|
||||
hashes[pos] = EMPTY_HASH;
|
||||
|
|
|
@ -437,4 +437,66 @@ const uint32_t hash_table_size_primes[HASH_TABLE_SIZE_MAX] = {
|
|||
1610612741,
|
||||
};
|
||||
|
||||
// Computed with elem_i = UINT64_C (0 x FFFFFFFF FFFFFFFF ) / d_i + 1, where d_i is the i-th element of the above array.
|
||||
const uint64_t hash_table_size_primes_inv[HASH_TABLE_SIZE_MAX] = {
|
||||
3689348814741910324,
|
||||
1418980313362273202,
|
||||
802032351030850071,
|
||||
392483916461905354,
|
||||
190172619316593316,
|
||||
95578984837873325,
|
||||
47420935922132524,
|
||||
23987963684927896,
|
||||
11955116055547344,
|
||||
5991147799191151,
|
||||
2998982941588287,
|
||||
1501077717772769,
|
||||
750081082979285,
|
||||
375261795343686,
|
||||
187625172388393,
|
||||
93822606204624,
|
||||
46909513691883,
|
||||
23456218233098,
|
||||
11728086747027,
|
||||
5864041509391,
|
||||
2932024948977,
|
||||
1466014921160,
|
||||
733007198436,
|
||||
366503839517,
|
||||
183251896093,
|
||||
91625960335,
|
||||
45812983922,
|
||||
22906489714,
|
||||
11453246088
|
||||
};
|
||||
|
||||
/**
|
||||
* Fastmod computes ( n mod d ) given the precomputed c much faster than n % d.
|
||||
* The implementation of fastmod is based on the following paper by Daniel Lemire et al.
|
||||
* Faster Remainder by Direct Computation: Applications to Compilers and Software Libraries
|
||||
* https://arxiv.org/abs/1902.01961
|
||||
*/
|
||||
static _FORCE_INLINE_ uint32_t fastmod(const uint32_t n, const uint64_t c, const uint32_t d) {
|
||||
#if defined(_MSC_VER)
|
||||
// Returns the upper 64 bits of the product of two 64-bit unsigned integers.
|
||||
// This intrinsic function is required since MSVC does not support unsigned 128-bit integers.
|
||||
#if defined(_M_X64) || defined(_M_ARM64)
|
||||
return __umulh(c * n, d);
|
||||
#else
|
||||
// Fallback to the slower method for 32-bit platforms.
|
||||
return n % d;
|
||||
#endif // _M_X64 || _M_ARM64
|
||||
#else
|
||||
#ifdef __SIZEOF_INT128__
|
||||
// Prevent compiler warning, because we know what we are doing.
|
||||
uint64_t lowbits = c * n;
|
||||
__extension__ typedef unsigned __int128 uint128;
|
||||
return static_cast<uint64_t>(((uint128)lowbits * d) >> 64);
|
||||
#else
|
||||
// Fallback to the slower method if no 128-bit unsigned integer type is available.
|
||||
return n % d;
|
||||
#endif // __SIZEOF_INT128__
|
||||
#endif // _MSC_VER
|
||||
}
|
||||
|
||||
#endif // HASHFUNCS_H
|
||||
|
|
Loading…
Reference in a new issue