Prevent false sharing in lightbaker RNG state
The previous commit corrected the RNG behavior for the lightbaker but also made it significantly slower on high core count systems. Due to the vector of states being physically close together in RAM we force a cache synchronization across all cores whenever we call for the next random number to be generated. This will create a temporary local copy of the RNG state before entering the loop and then saving it back to the global state when done. This will preserve the per-thread RNG state (and random number quality) while significantly improving performance. On my 16 thread box it saves 3 minutes baking the Sponza scene, bringing performance back in line to before the various RNG fixes were introduced, being slightly faster than the first implementation.
This commit is contained in:
parent
0ead6b3cf5
commit
0db5123548
1 changed files with 7 additions and 2 deletions
|
@ -1711,11 +1711,14 @@ Vector3 VoxelLightBaker::_compute_ray_trace_at_pos(const Vector3 &p_pos, const V
|
|||
const Light *light = bake_light.ptr();
|
||||
const Cell *cells = bake_cells.ptr();
|
||||
|
||||
// Prevent false sharing when running on OpenMP
|
||||
uint32_t local_rng_state = *rng_state;
|
||||
|
||||
for (int i = 0; i < samples; i++) {
|
||||
|
||||
float random_angle1 = (((xorshift32(rng_state) % 65535) / 65535.0) * 2.0 - 1.0) * spread;
|
||||
float random_angle1 = (((xorshift32(&local_rng_state) % 65535) / 65535.0) * 2.0 - 1.0) * spread;
|
||||
Vector3 axis(0, sin(random_angle1), cos(random_angle1));
|
||||
float random_angle2 = ((xorshift32(rng_state) % 65535) / 65535.0) * Math_PI * 2.0;
|
||||
float random_angle2 = ((xorshift32(&local_rng_state) % 65535) / 65535.0) * Math_PI * 2.0;
|
||||
Basis rot(Vector3(0, 0, 1), random_angle2);
|
||||
axis = rot.xform(axis);
|
||||
|
||||
|
@ -1792,6 +1795,8 @@ Vector3 VoxelLightBaker::_compute_ray_trace_at_pos(const Vector3 &p_pos, const V
|
|||
}
|
||||
}
|
||||
|
||||
// Make sure we don't reset this thread's RNG state
|
||||
*rng_state = local_rng_state;
|
||||
return accum / samples;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue