Fix lightmapper rng
In our previous attempts to fix the lightmapper we may have inadvertently introduced the same issue we were trying to fix. It appears that rand() will on some platforms introduce a mutex making it slower and on others may have a per-thread state that would need to be initialized with srand() on each thread. This slows down the lightbaking further. This sets up a separate rng state for each OpenMP thread by calling rand() only in the single-threaded part of the code. We then keep a vector of states. I believe this solves our problems.
This commit is contained in:
parent
b08735f209
commit
ccbb5923ac
2 changed files with 36 additions and 17 deletions
|
@ -32,6 +32,10 @@
|
|||
#include "os/os.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
#ifdef _OPENMP
|
||||
#include <omp.h>
|
||||
#endif
|
||||
|
||||
#define FINDMINMAX(x0, x1, x2, min, max) \
|
||||
min = max = x0; \
|
||||
if (x1 < min) min = x1; \
|
||||
|
@ -1675,19 +1679,17 @@ Vector3 VoxelLightBaker::_compute_pixel_light_at_pos(const Vector3 &p_pos, const
|
|||
return accum;
|
||||
}
|
||||
|
||||
uint32_t xorshiftstate[] = { 123 }; // anything non-zero will do here
|
||||
|
||||
_ALWAYS_INLINE_ uint32_t xorshift32(uint32_t *seed) {
|
||||
_ALWAYS_INLINE_ uint32_t xorshift32(uint32_t *state) {
|
||||
/* Algorithm "xor" from p. 4 of Marsaglia, "Xorshift RNGs" */
|
||||
uint32_t x = *seed;
|
||||
uint32_t x = *state;
|
||||
x ^= x << 13;
|
||||
x ^= x >> 17;
|
||||
x ^= x << 5;
|
||||
*seed = x;
|
||||
*state = x;
|
||||
return x;
|
||||
}
|
||||
|
||||
Vector3 VoxelLightBaker::_compute_ray_trace_at_pos(const Vector3 &p_pos, const Vector3 &p_normal) {
|
||||
Vector3 VoxelLightBaker::_compute_ray_trace_at_pos(const Vector3 &p_pos, const Vector3 &p_normal, uint32_t *rng_state) {
|
||||
|
||||
int samples_per_quality[3] = { 48, 128, 512 };
|
||||
|
||||
|
@ -1709,16 +1711,11 @@ Vector3 VoxelLightBaker::_compute_ray_trace_at_pos(const Vector3 &p_pos, const V
|
|||
const Light *light = bake_light.ptr();
|
||||
const Cell *cells = bake_cells.ptr();
|
||||
|
||||
uint32_t seed = 0;
|
||||
while (seed == 0) {
|
||||
seed = rand(); //system rand is thread safe, do not replace by Math:: random.
|
||||
}
|
||||
|
||||
for (int i = 0; i < samples; i++) {
|
||||
|
||||
float random_angle1 = (((xorshift32(&seed) % 65535) / 65535.0) * 2.0 - 1.0) * spread;
|
||||
float random_angle1 = (((xorshift32(rng_state) % 65535) / 65535.0) * 2.0 - 1.0) * spread;
|
||||
Vector3 axis(0, sin(random_angle1), cos(random_angle1));
|
||||
float random_angle2 = ((xorshift32(&seed) % 65535) / 65535.0) * Math_PI * 2.0;
|
||||
float random_angle2 = ((xorshift32(rng_state) % 65535) / 65535.0) * Math_PI * 2.0;
|
||||
Basis rot(Vector3(0, 0, 1), random_angle2);
|
||||
axis = rot.xform(axis);
|
||||
|
||||
|
@ -1852,21 +1849,43 @@ Error VoxelLightBaker::make_lightmap(const Transform &p_xform, Ref<Mesh> &p_mesh
|
|||
_plot_triangle(uv, vertex, normal, lightmap.ptrw(), width, height);
|
||||
}
|
||||
}
|
||||
//step 3 perform voxel cone trace on lightmap pixels
|
||||
|
||||
//step 3 perform voxel cone trace on lightmap pixels
|
||||
{
|
||||
LightMap *lightmap_ptr = lightmap.ptrw();
|
||||
uint64_t begin_time = OS::get_singleton()->get_ticks_usec();
|
||||
volatile int lines = 0;
|
||||
|
||||
// make sure our OS-level rng is seeded
|
||||
srand(OS::get_singleton()->get_ticks_usec());
|
||||
|
||||
// setup an RNG state for each OpenMP thread
|
||||
uint32_t threadcount = 1;
|
||||
uint32_t threadnum = 0;
|
||||
#ifdef _OPENMP
|
||||
threadcount = omp_get_max_threads();
|
||||
#endif
|
||||
Vector<uint32_t> rng_states;
|
||||
rng_states.resize(threadcount);
|
||||
for (uint32_t i = 0; i < threadcount; i++) {
|
||||
do {
|
||||
rng_states[i] = rand();
|
||||
} while (rng_states[i] == 0);
|
||||
}
|
||||
uint32_t *rng_states_p = rng_states.ptrw();
|
||||
|
||||
for (int i = 0; i < height; i++) {
|
||||
|
||||
//print_line("bake line " + itos(i) + " / " + itos(height));
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel for schedule(dynamic, 1)
|
||||
#pragma omp parallel for schedule(dynamic, 1) private(threadnum)
|
||||
#endif
|
||||
for (int j = 0; j < width; j++) {
|
||||
|
||||
#ifdef _OPENMP
|
||||
threadnum = omp_get_thread_num();
|
||||
#endif
|
||||
|
||||
//if (i == 125 && j == 280) {
|
||||
|
||||
LightMap *pixel = &lightmap_ptr[i * width + j];
|
||||
|
@ -1879,7 +1898,7 @@ Error VoxelLightBaker::make_lightmap(const Transform &p_xform, Ref<Mesh> &p_mesh
|
|||
pixel->light = _compute_pixel_light_at_pos(pixel->pos, pixel->normal) * energy;
|
||||
} break;
|
||||
case BAKE_MODE_RAY_TRACE: {
|
||||
pixel->light = _compute_ray_trace_at_pos(pixel->pos, pixel->normal) * energy;
|
||||
pixel->light = _compute_ray_trace_at_pos(pixel->pos, pixel->normal, &rng_states_p[threadnum]) * energy;
|
||||
} break;
|
||||
// pixel->light = Vector3(1, 1, 1);
|
||||
//}
|
||||
|
|
|
@ -148,7 +148,7 @@ private:
|
|||
_FORCE_INLINE_ void _sample_baked_octree_filtered_and_anisotropic(const Vector3 &p_posf, const Vector3 &p_direction, float p_level, Vector3 &r_color, float &r_alpha);
|
||||
_FORCE_INLINE_ Vector3 _voxel_cone_trace(const Vector3 &p_pos, const Vector3 &p_normal, float p_aperture);
|
||||
_FORCE_INLINE_ Vector3 _compute_pixel_light_at_pos(const Vector3 &p_pos, const Vector3 &p_normal);
|
||||
_FORCE_INLINE_ Vector3 _compute_ray_trace_at_pos(const Vector3 &p_pos, const Vector3 &p_normal);
|
||||
_FORCE_INLINE_ Vector3 _compute_ray_trace_at_pos(const Vector3 &p_pos, const Vector3 &p_normal, uint32_t *rng_state);
|
||||
|
||||
public:
|
||||
void begin_bake(int p_subdiv, const AABB &p_bounds);
|
||||
|
|
Loading…
Reference in a new issue