diff --git a/SConstruct b/SConstruct index dbce94f296f..88b29695cba 100644 --- a/SConstruct +++ b/SConstruct @@ -168,7 +168,6 @@ opts.Add(BoolVariable('vsproj', "Generate Visual Studio Project.", False)) opts.Add(EnumVariable('warnings', "Set the level of warnings emitted during compilation", 'no', ('extra', 'all', 'moderate', 'no'))) opts.Add(BoolVariable('progress', "Show a progress indicator during build", True)) opts.Add(BoolVariable('dev', "If yes, alias for verbose=yes warnings=all", False)) -opts.Add(BoolVariable('openmp', "If yes, enable OpenMP", True)) opts.Add(EnumVariable('macports_clang', "Build using clang from MacPorts", 'no', ('no', '5.0', 'devel'))) # Thirdparty libraries diff --git a/core/os/threaded_array_processor.cpp b/core/os/threaded_array_processor.cpp new file mode 100644 index 00000000000..8e92508ea5f --- /dev/null +++ b/core/os/threaded_array_processor.cpp @@ -0,0 +1,2 @@ +#include "threaded_array_processor.h" + diff --git a/core/os/threaded_array_processor.h b/core/os/threaded_array_processor.h new file mode 100644 index 00000000000..e584fbb193a --- /dev/null +++ b/core/os/threaded_array_processor.h @@ -0,0 +1,80 @@ +#ifndef THREADED_ARRAY_PROCESSOR_H +#define THREADED_ARRAY_PROCESSOR_H + +#include "os/mutex.h" +#include "os/os.h" +#include "os/thread.h" +#include "safe_refcount.h" +#include "thread_safe.h" + +template +struct ThreadArrayProcessData { + uint32_t elements; + uint32_t index; + C *instance; + U userdata; + void (C::*method)(uint32_t, U); + + void process(uint32_t p_index) { + (instance->*method)(p_index, userdata); + } +}; + +#ifndef NO_THREADS + +template +void process_array_thread(void *ud) { + + T &data = *(T *)ud; + while (true) { + uint32_t index = atomic_increment(&data.index); + if (index >= data.elements) + break; + data.process(index); + } +} + +template +void thread_process_array(uint32_t p_elements, C *p_instance, M p_method, U p_userdata) { + + ThreadArrayProcessData data; + data.method = p_method; + data.instance = p_instance; + data.userdata = p_userdata; + data.index = 0; + data.elements = p_elements; + data.process(data.index); //process first, let threads increment for next + + Vector threads; + + threads.resize(OS::get_singleton()->get_processor_count()); + + for (int i = 0; i < threads.size(); i++) { + threads[i] = Thread::create(process_array_thread >, &data); + } + + for (int i = 0; i < threads.size(); i++) { + Thread::wait_to_finish(threads[i]); + memdelete(threads[i]); + } +} + +#else + +template +void thread_process_array(uint32_t p_elements, C *p_instance, M p_method, U p_userdata) { + + ThreadArrayProcessData data; + data.method = p_method; + data.instance = p_instance; + data.userdata = p_userdata; + data.index = 0; + data.elements = p_elements; + for (uint32_t i = 0; i < p_elements; i++) { + data.process(i); + } +} + +#endif + +#endif // THREADED_ARRAY_PROCESSOR_H diff --git a/modules/opus/config.py b/modules/opus/config.py index 60f8d838d60..0ee1b1b7b40 100644 --- a/modules/opus/config.py +++ b/modules/opus/config.py @@ -1,5 +1,10 @@ def can_build(platform): - return True + # Sorry guys, do not enable this unless you can figure out a way + # to get Opus to not do any memory allocation or system calls + # in the audio thread. + # Currently the implementation even reads files from the audio thread, + # and this is not how audio programming works. + return False def configure(env): pass diff --git a/platform/osx/detect.py b/platform/osx/detect.py index 5b04ab8826e..2e686fbee43 100644 --- a/platform/osx/detect.py +++ b/platform/osx/detect.py @@ -82,9 +82,6 @@ def configure(env): env['RANLIB'] = mpprefix + "/libexec/llvm-" + mpclangver + "/bin/llvm-ranlib" env['AS'] = mpprefix + "/libexec/llvm-" + mpclangver + "/bin/llvm-as" env.Append(CCFLAGS=['-D__MACPORTS__']) #hack to fix libvpx MM256_BROADCASTSI128_SI256 define - if env['tools'] and env['openmp']: - env.Append(CPPFLAGS=['-fopenmp']) - env.Append(LINKFLAGS=['-fopenmp']) else: # osxcross build root = os.environ.get("OSXCROSS_ROOT", 0) diff --git a/platform/windows/detect.py b/platform/windows/detect.py index e216868bd85..3b8de2caf47 100644 --- a/platform/windows/detect.py +++ b/platform/windows/detect.py @@ -191,8 +191,6 @@ def configure(env): if (env["use_lto"]): env.Append(CCFLAGS=['/GL']) env.Append(LINKFLAGS=['/LTCG']) - if env['tools'] and env['openmp']: - env.Append(CPPFLAGS=['/openmp']) env.Append(CCFLAGS=["/I" + p for p in os.getenv("INCLUDE").split(";")]) env.Append(LIBPATH=[p for p in os.getenv("LIB").split(";")]) @@ -270,9 +268,6 @@ def configure(env): env.Append(CCFLAGS=['-flto']) env.Append(LINKFLAGS=['-flto=' + str(env.GetOption("num_jobs"))]) - if env['tools'] and env['openmp']: - env.Append(CPPFLAGS=['-fopenmp']) - env.Append(LINKFLAGS=['-fopenmp']) ## Compile flags diff --git a/platform/x11/detect.py b/platform/x11/detect.py index 98ae9a86585..cb45fed1be8 100644 --- a/platform/x11/detect.py +++ b/platform/x11/detect.py @@ -265,9 +265,5 @@ def configure(env): env.Append(LINKFLAGS=['-m64', '-L/usr/lib/i686-linux-gnu']) - if env['tools'] and env['openmp']: - env.Append(CPPFLAGS=['-fopenmp']) - env.Append(LINKFLAGS=['-fopenmp']) - if env['use_static_cpp']: env.Append(LINKFLAGS=['-static-libstdc++']) diff --git a/scene/3d/baked_lightmap.cpp b/scene/3d/baked_lightmap.cpp index 9a77626296e..8c282a31b81 100644 --- a/scene/3d/baked_lightmap.cpp +++ b/scene/3d/baked_lightmap.cpp @@ -772,8 +772,8 @@ void BakedLightmap::_bind_methods() { BakedLightmap::BakedLightmap() { extents = Vector3(10, 10, 10); - bake_cell_size = 0.1; - capture_cell_size = 0.25; + bake_cell_size = 0.25; + capture_cell_size = 0.5; bake_quality = BAKE_QUALITY_MEDIUM; bake_mode = BAKE_MODE_CONE_TRACE; diff --git a/scene/3d/voxel_light_baker.cpp b/scene/3d/voxel_light_baker.cpp index bf0f801e329..17aa649dff3 100644 --- a/scene/3d/voxel_light_baker.cpp +++ b/scene/3d/voxel_light_baker.cpp @@ -30,11 +30,9 @@ #include "voxel_light_baker.h" #include "os/os.h" +#include "os/threaded_array_processor.h" #include -#ifdef _OPENMP -#include -#endif #define FINDMINMAX(x0, x1, x2, min, max) \ min = max = x0; \ @@ -1689,7 +1687,7 @@ _ALWAYS_INLINE_ uint32_t xorshift32(uint32_t *state) { return x; } -Vector3 VoxelLightBaker::_compute_ray_trace_at_pos(const Vector3 &p_pos, const Vector3 &p_normal, uint32_t *rng_state) { +Vector3 VoxelLightBaker::_compute_ray_trace_at_pos(const Vector3 &p_pos, const Vector3 &p_normal) { int samples_per_quality[3] = { 48, 128, 512 }; @@ -1711,8 +1709,7 @@ Vector3 VoxelLightBaker::_compute_ray_trace_at_pos(const Vector3 &p_pos, const V const Light *light = bake_light.ptr(); const Cell *cells = bake_cells.ptr(); - // Prevent false sharing when running on OpenMP - uint32_t local_rng_state = *rng_state; + uint32_t local_rng_state = rand(); //needs to be fixed again for (int i = 0; i < samples; i++) { @@ -1796,10 +1793,30 @@ Vector3 VoxelLightBaker::_compute_ray_trace_at_pos(const Vector3 &p_pos, const V } // Make sure we don't reset this thread's RNG state - *rng_state = local_rng_state; + return accum / samples; } +void VoxelLightBaker::_lightmap_bake_point(uint32_t p_x, LightMap *p_line) { + + + LightMap *pixel = &p_line[p_x]; + if (pixel->pos == Vector3()) + return; + //print_line("pos: " + pixel->pos + " normal " + pixel->normal); + switch (bake_mode) { + case BAKE_MODE_CONE_TRACE: { + pixel->light = _compute_pixel_light_at_pos(pixel->pos, pixel->normal) * energy; + } break; + case BAKE_MODE_RAY_TRACE: { + pixel->light = _compute_ray_trace_at_pos(pixel->pos, pixel->normal) * energy; + } break; + // pixel->light = Vector3(1, 1, 1); + //} + } + +} + Error VoxelLightBaker::make_lightmap(const Transform &p_xform, Ref &p_mesh, LightMapData &r_lightmap, bool (*p_bake_time_func)(void *, float, float), void *p_bake_time_ud) { //transfer light information to a lightmap @@ -1862,53 +1879,10 @@ Error VoxelLightBaker::make_lightmap(const Transform &p_xform, Ref &p_mesh volatile int lines = 0; // make sure our OS-level rng is seeded - srand(OS::get_singleton()->get_ticks_usec()); - - // setup an RNG state for each OpenMP thread - uint32_t threadcount = 1; - uint32_t threadnum = 0; -#ifdef _OPENMP - threadcount = omp_get_max_threads(); -#endif - Vector rng_states; - rng_states.resize(threadcount); - for (uint32_t i = 0; i < threadcount; i++) { - do { - rng_states[i] = rand(); - } while (rng_states[i] == 0); - } - uint32_t *rng_states_p = rng_states.ptrw(); for (int i = 0; i < height; i++) { - //print_line("bake line " + itos(i) + " / " + itos(height)); -#ifdef _OPENMP -#pragma omp parallel for schedule(dynamic, 1) private(threadnum) -#endif - for (int j = 0; j < width; j++) { - -#ifdef _OPENMP - threadnum = omp_get_thread_num(); -#endif - - //if (i == 125 && j == 280) { - - LightMap *pixel = &lightmap_ptr[i * width + j]; - if (pixel->pos == Vector3()) - continue; //unused, skipe - - //print_line("pos: " + pixel->pos + " normal " + pixel->normal); - switch (bake_mode) { - case BAKE_MODE_CONE_TRACE: { - pixel->light = _compute_pixel_light_at_pos(pixel->pos, pixel->normal) * energy; - } break; - case BAKE_MODE_RAY_TRACE: { - pixel->light = _compute_ray_trace_at_pos(pixel->pos, pixel->normal, &rng_states_p[threadnum]) * energy; - } break; - // pixel->light = Vector3(1, 1, 1); - //} - } - } + thread_process_array(width,this,&VoxelLightBaker::_lightmap_bake_point,&lightmap_ptr[i*width]); lines = MAX(lines, i); //for multithread if (p_bake_time_func) { diff --git a/scene/3d/voxel_light_baker.h b/scene/3d/voxel_light_baker.h index 7db31f8a675..68e11c356ba 100644 --- a/scene/3d/voxel_light_baker.h +++ b/scene/3d/voxel_light_baker.h @@ -148,9 +148,12 @@ private: _FORCE_INLINE_ void _sample_baked_octree_filtered_and_anisotropic(const Vector3 &p_posf, const Vector3 &p_direction, float p_level, Vector3 &r_color, float &r_alpha); _FORCE_INLINE_ Vector3 _voxel_cone_trace(const Vector3 &p_pos, const Vector3 &p_normal, float p_aperture); _FORCE_INLINE_ Vector3 _compute_pixel_light_at_pos(const Vector3 &p_pos, const Vector3 &p_normal); - _FORCE_INLINE_ Vector3 _compute_ray_trace_at_pos(const Vector3 &p_pos, const Vector3 &p_normal, uint32_t *rng_state); + _FORCE_INLINE_ Vector3 _compute_ray_trace_at_pos(const Vector3 &p_pos, const Vector3 &p_normal); + + void _lightmap_bake_point(uint32_t p_x, LightMap *p_line); public: + void begin_bake(int p_subdiv, const AABB &p_bounds); void plot_mesh(const Transform &p_xform, Ref &p_mesh, const Vector > &p_materials, const Ref &p_override_material); void begin_bake_light(BakeQuality p_quality = BAKE_QUALITY_MEDIUM, BakeMode p_bake_mode = BAKE_MODE_CONE_TRACE, float p_propagation = 0.85, float p_energy = 1);