-Removed OpenMP support, replaced by a custom class.

-Disabled Opus, implementation is wrong.
This commit is contained in:
Juan Linietsky 2017-12-24 09:31:17 -03:00
parent 83182ea4a1
commit 021f3c924b
10 changed files with 119 additions and 68 deletions

View file

@ -168,7 +168,6 @@ opts.Add(BoolVariable('vsproj', "Generate Visual Studio Project.", False))
opts.Add(EnumVariable('warnings', "Set the level of warnings emitted during compilation", 'no', ('extra', 'all', 'moderate', 'no'))) opts.Add(EnumVariable('warnings', "Set the level of warnings emitted during compilation", 'no', ('extra', 'all', 'moderate', 'no')))
opts.Add(BoolVariable('progress', "Show a progress indicator during build", True)) opts.Add(BoolVariable('progress', "Show a progress indicator during build", True))
opts.Add(BoolVariable('dev', "If yes, alias for verbose=yes warnings=all", False)) opts.Add(BoolVariable('dev', "If yes, alias for verbose=yes warnings=all", False))
opts.Add(BoolVariable('openmp', "If yes, enable OpenMP", True))
opts.Add(EnumVariable('macports_clang', "Build using clang from MacPorts", 'no', ('no', '5.0', 'devel'))) opts.Add(EnumVariable('macports_clang', "Build using clang from MacPorts", 'no', ('no', '5.0', 'devel')))
# Thirdparty libraries # Thirdparty libraries

View file

@ -0,0 +1,2 @@
#include "threaded_array_processor.h"

View file

@ -0,0 +1,80 @@
#ifndef THREADED_ARRAY_PROCESSOR_H
#define THREADED_ARRAY_PROCESSOR_H
#include "os/mutex.h"
#include "os/os.h"
#include "os/thread.h"
#include "safe_refcount.h"
#include "thread_safe.h"
template <class C, class U>
struct ThreadArrayProcessData {
uint32_t elements;
uint32_t index;
C *instance;
U userdata;
void (C::*method)(uint32_t, U);
void process(uint32_t p_index) {
(instance->*method)(p_index, userdata);
}
};
#ifndef NO_THREADS
template <class T>
void process_array_thread(void *ud) {
T &data = *(T *)ud;
while (true) {
uint32_t index = atomic_increment(&data.index);
if (index >= data.elements)
break;
data.process(index);
}
}
template <class C, class M, class U>
void thread_process_array(uint32_t p_elements, C *p_instance, M p_method, U p_userdata) {
ThreadArrayProcessData<C, U> data;
data.method = p_method;
data.instance = p_instance;
data.userdata = p_userdata;
data.index = 0;
data.elements = p_elements;
data.process(data.index); //process first, let threads increment for next
Vector<Thread *> threads;
threads.resize(OS::get_singleton()->get_processor_count());
for (int i = 0; i < threads.size(); i++) {
threads[i] = Thread::create(process_array_thread<ThreadArrayProcessData<C, U> >, &data);
}
for (int i = 0; i < threads.size(); i++) {
Thread::wait_to_finish(threads[i]);
memdelete(threads[i]);
}
}
#else
template <class C, class M, class U>
void thread_process_array(uint32_t p_elements, C *p_instance, M p_method, U p_userdata) {
ThreadArrayProcessData<C, U> data;
data.method = p_method;
data.instance = p_instance;
data.userdata = p_userdata;
data.index = 0;
data.elements = p_elements;
for (uint32_t i = 0; i < p_elements; i++) {
data.process(i);
}
}
#endif
#endif // THREADED_ARRAY_PROCESSOR_H

View file

@ -1,5 +1,10 @@
def can_build(platform): def can_build(platform):
return True # Sorry guys, do not enable this unless you can figure out a way
# to get Opus to not do any memory allocation or system calls
# in the audio thread.
# Currently the implementation even reads files from the audio thread,
# and this is not how audio programming works.
return False
def configure(env): def configure(env):
pass pass

View file

@ -82,9 +82,6 @@ def configure(env):
env['RANLIB'] = mpprefix + "/libexec/llvm-" + mpclangver + "/bin/llvm-ranlib" env['RANLIB'] = mpprefix + "/libexec/llvm-" + mpclangver + "/bin/llvm-ranlib"
env['AS'] = mpprefix + "/libexec/llvm-" + mpclangver + "/bin/llvm-as" env['AS'] = mpprefix + "/libexec/llvm-" + mpclangver + "/bin/llvm-as"
env.Append(CCFLAGS=['-D__MACPORTS__']) #hack to fix libvpx MM256_BROADCASTSI128_SI256 define env.Append(CCFLAGS=['-D__MACPORTS__']) #hack to fix libvpx MM256_BROADCASTSI128_SI256 define
if env['tools'] and env['openmp']:
env.Append(CPPFLAGS=['-fopenmp'])
env.Append(LINKFLAGS=['-fopenmp'])
else: # osxcross build else: # osxcross build
root = os.environ.get("OSXCROSS_ROOT", 0) root = os.environ.get("OSXCROSS_ROOT", 0)

View file

@ -191,8 +191,6 @@ def configure(env):
if (env["use_lto"]): if (env["use_lto"]):
env.Append(CCFLAGS=['/GL']) env.Append(CCFLAGS=['/GL'])
env.Append(LINKFLAGS=['/LTCG']) env.Append(LINKFLAGS=['/LTCG'])
if env['tools'] and env['openmp']:
env.Append(CPPFLAGS=['/openmp'])
env.Append(CCFLAGS=["/I" + p for p in os.getenv("INCLUDE").split(";")]) env.Append(CCFLAGS=["/I" + p for p in os.getenv("INCLUDE").split(";")])
env.Append(LIBPATH=[p for p in os.getenv("LIB").split(";")]) env.Append(LIBPATH=[p for p in os.getenv("LIB").split(";")])
@ -270,9 +268,6 @@ def configure(env):
env.Append(CCFLAGS=['-flto']) env.Append(CCFLAGS=['-flto'])
env.Append(LINKFLAGS=['-flto=' + str(env.GetOption("num_jobs"))]) env.Append(LINKFLAGS=['-flto=' + str(env.GetOption("num_jobs"))])
if env['tools'] and env['openmp']:
env.Append(CPPFLAGS=['-fopenmp'])
env.Append(LINKFLAGS=['-fopenmp'])
## Compile flags ## Compile flags

View file

@ -265,9 +265,5 @@ def configure(env):
env.Append(LINKFLAGS=['-m64', '-L/usr/lib/i686-linux-gnu']) env.Append(LINKFLAGS=['-m64', '-L/usr/lib/i686-linux-gnu'])
if env['tools'] and env['openmp']:
env.Append(CPPFLAGS=['-fopenmp'])
env.Append(LINKFLAGS=['-fopenmp'])
if env['use_static_cpp']: if env['use_static_cpp']:
env.Append(LINKFLAGS=['-static-libstdc++']) env.Append(LINKFLAGS=['-static-libstdc++'])

View file

@ -772,8 +772,8 @@ void BakedLightmap::_bind_methods() {
BakedLightmap::BakedLightmap() { BakedLightmap::BakedLightmap() {
extents = Vector3(10, 10, 10); extents = Vector3(10, 10, 10);
bake_cell_size = 0.1; bake_cell_size = 0.25;
capture_cell_size = 0.25; capture_cell_size = 0.5;
bake_quality = BAKE_QUALITY_MEDIUM; bake_quality = BAKE_QUALITY_MEDIUM;
bake_mode = BAKE_MODE_CONE_TRACE; bake_mode = BAKE_MODE_CONE_TRACE;

View file

@ -30,11 +30,9 @@
#include "voxel_light_baker.h" #include "voxel_light_baker.h"
#include "os/os.h" #include "os/os.h"
#include "os/threaded_array_processor.h"
#include <stdlib.h> #include <stdlib.h>
#ifdef _OPENMP
#include <omp.h>
#endif
#define FINDMINMAX(x0, x1, x2, min, max) \ #define FINDMINMAX(x0, x1, x2, min, max) \
min = max = x0; \ min = max = x0; \
@ -1689,7 +1687,7 @@ _ALWAYS_INLINE_ uint32_t xorshift32(uint32_t *state) {
return x; return x;
} }
Vector3 VoxelLightBaker::_compute_ray_trace_at_pos(const Vector3 &p_pos, const Vector3 &p_normal, uint32_t *rng_state) { Vector3 VoxelLightBaker::_compute_ray_trace_at_pos(const Vector3 &p_pos, const Vector3 &p_normal) {
int samples_per_quality[3] = { 48, 128, 512 }; int samples_per_quality[3] = { 48, 128, 512 };
@ -1711,8 +1709,7 @@ Vector3 VoxelLightBaker::_compute_ray_trace_at_pos(const Vector3 &p_pos, const V
const Light *light = bake_light.ptr(); const Light *light = bake_light.ptr();
const Cell *cells = bake_cells.ptr(); const Cell *cells = bake_cells.ptr();
// Prevent false sharing when running on OpenMP uint32_t local_rng_state = rand(); //needs to be fixed again
uint32_t local_rng_state = *rng_state;
for (int i = 0; i < samples; i++) { for (int i = 0; i < samples; i++) {
@ -1796,10 +1793,30 @@ Vector3 VoxelLightBaker::_compute_ray_trace_at_pos(const Vector3 &p_pos, const V
} }
// Make sure we don't reset this thread's RNG state // Make sure we don't reset this thread's RNG state
*rng_state = local_rng_state;
return accum / samples; return accum / samples;
} }
void VoxelLightBaker::_lightmap_bake_point(uint32_t p_x, LightMap *p_line) {
LightMap *pixel = &p_line[p_x];
if (pixel->pos == Vector3())
return;
//print_line("pos: " + pixel->pos + " normal " + pixel->normal);
switch (bake_mode) {
case BAKE_MODE_CONE_TRACE: {
pixel->light = _compute_pixel_light_at_pos(pixel->pos, pixel->normal) * energy;
} break;
case BAKE_MODE_RAY_TRACE: {
pixel->light = _compute_ray_trace_at_pos(pixel->pos, pixel->normal) * energy;
} break;
// pixel->light = Vector3(1, 1, 1);
//}
}
}
Error VoxelLightBaker::make_lightmap(const Transform &p_xform, Ref<Mesh> &p_mesh, LightMapData &r_lightmap, bool (*p_bake_time_func)(void *, float, float), void *p_bake_time_ud) { Error VoxelLightBaker::make_lightmap(const Transform &p_xform, Ref<Mesh> &p_mesh, LightMapData &r_lightmap, bool (*p_bake_time_func)(void *, float, float), void *p_bake_time_ud) {
//transfer light information to a lightmap //transfer light information to a lightmap
@ -1862,53 +1879,10 @@ Error VoxelLightBaker::make_lightmap(const Transform &p_xform, Ref<Mesh> &p_mesh
volatile int lines = 0; volatile int lines = 0;
// make sure our OS-level rng is seeded // make sure our OS-level rng is seeded
srand(OS::get_singleton()->get_ticks_usec());
// setup an RNG state for each OpenMP thread
uint32_t threadcount = 1;
uint32_t threadnum = 0;
#ifdef _OPENMP
threadcount = omp_get_max_threads();
#endif
Vector<uint32_t> rng_states;
rng_states.resize(threadcount);
for (uint32_t i = 0; i < threadcount; i++) {
do {
rng_states[i] = rand();
} while (rng_states[i] == 0);
}
uint32_t *rng_states_p = rng_states.ptrw();
for (int i = 0; i < height; i++) { for (int i = 0; i < height; i++) {
//print_line("bake line " + itos(i) + " / " + itos(height)); thread_process_array(width,this,&VoxelLightBaker::_lightmap_bake_point,&lightmap_ptr[i*width]);
#ifdef _OPENMP
#pragma omp parallel for schedule(dynamic, 1) private(threadnum)
#endif
for (int j = 0; j < width; j++) {
#ifdef _OPENMP
threadnum = omp_get_thread_num();
#endif
//if (i == 125 && j == 280) {
LightMap *pixel = &lightmap_ptr[i * width + j];
if (pixel->pos == Vector3())
continue; //unused, skipe
//print_line("pos: " + pixel->pos + " normal " + pixel->normal);
switch (bake_mode) {
case BAKE_MODE_CONE_TRACE: {
pixel->light = _compute_pixel_light_at_pos(pixel->pos, pixel->normal) * energy;
} break;
case BAKE_MODE_RAY_TRACE: {
pixel->light = _compute_ray_trace_at_pos(pixel->pos, pixel->normal, &rng_states_p[threadnum]) * energy;
} break;
// pixel->light = Vector3(1, 1, 1);
//}
}
}
lines = MAX(lines, i); //for multithread lines = MAX(lines, i); //for multithread
if (p_bake_time_func) { if (p_bake_time_func) {

View file

@ -148,9 +148,12 @@ private:
_FORCE_INLINE_ void _sample_baked_octree_filtered_and_anisotropic(const Vector3 &p_posf, const Vector3 &p_direction, float p_level, Vector3 &r_color, float &r_alpha); _FORCE_INLINE_ void _sample_baked_octree_filtered_and_anisotropic(const Vector3 &p_posf, const Vector3 &p_direction, float p_level, Vector3 &r_color, float &r_alpha);
_FORCE_INLINE_ Vector3 _voxel_cone_trace(const Vector3 &p_pos, const Vector3 &p_normal, float p_aperture); _FORCE_INLINE_ Vector3 _voxel_cone_trace(const Vector3 &p_pos, const Vector3 &p_normal, float p_aperture);
_FORCE_INLINE_ Vector3 _compute_pixel_light_at_pos(const Vector3 &p_pos, const Vector3 &p_normal); _FORCE_INLINE_ Vector3 _compute_pixel_light_at_pos(const Vector3 &p_pos, const Vector3 &p_normal);
_FORCE_INLINE_ Vector3 _compute_ray_trace_at_pos(const Vector3 &p_pos, const Vector3 &p_normal, uint32_t *rng_state); _FORCE_INLINE_ Vector3 _compute_ray_trace_at_pos(const Vector3 &p_pos, const Vector3 &p_normal);
void _lightmap_bake_point(uint32_t p_x, LightMap *p_line);
public: public:
void begin_bake(int p_subdiv, const AABB &p_bounds); void begin_bake(int p_subdiv, const AABB &p_bounds);
void plot_mesh(const Transform &p_xform, Ref<Mesh> &p_mesh, const Vector<Ref<Material> > &p_materials, const Ref<Material> &p_override_material); void plot_mesh(const Transform &p_xform, Ref<Mesh> &p_mesh, const Vector<Ref<Material> > &p_materials, const Ref<Material> &p_override_material);
void begin_bake_light(BakeQuality p_quality = BAKE_QUALITY_MEDIUM, BakeMode p_bake_mode = BAKE_MODE_CONE_TRACE, float p_propagation = 0.85, float p_energy = 1); void begin_bake_light(BakeQuality p_quality = BAKE_QUALITY_MEDIUM, BakeMode p_bake_mode = BAKE_MODE_CONE_TRACE, float p_propagation = 0.85, float p_energy = 1);