-Removed OpenMP support, replaced by a custom class.
-Disabled Opus, implementation is wrong.
This commit is contained in:
parent
83182ea4a1
commit
021f3c924b
10 changed files with 119 additions and 68 deletions
|
@ -168,7 +168,6 @@ opts.Add(BoolVariable('vsproj', "Generate Visual Studio Project.", False))
|
||||||
opts.Add(EnumVariable('warnings', "Set the level of warnings emitted during compilation", 'no', ('extra', 'all', 'moderate', 'no')))
|
opts.Add(EnumVariable('warnings', "Set the level of warnings emitted during compilation", 'no', ('extra', 'all', 'moderate', 'no')))
|
||||||
opts.Add(BoolVariable('progress', "Show a progress indicator during build", True))
|
opts.Add(BoolVariable('progress', "Show a progress indicator during build", True))
|
||||||
opts.Add(BoolVariable('dev', "If yes, alias for verbose=yes warnings=all", False))
|
opts.Add(BoolVariable('dev', "If yes, alias for verbose=yes warnings=all", False))
|
||||||
opts.Add(BoolVariable('openmp', "If yes, enable OpenMP", True))
|
|
||||||
opts.Add(EnumVariable('macports_clang', "Build using clang from MacPorts", 'no', ('no', '5.0', 'devel')))
|
opts.Add(EnumVariable('macports_clang', "Build using clang from MacPorts", 'no', ('no', '5.0', 'devel')))
|
||||||
|
|
||||||
# Thirdparty libraries
|
# Thirdparty libraries
|
||||||
|
|
2
core/os/threaded_array_processor.cpp
Normal file
2
core/os/threaded_array_processor.cpp
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
#include "threaded_array_processor.h"
|
||||||
|
|
80
core/os/threaded_array_processor.h
Normal file
80
core/os/threaded_array_processor.h
Normal file
|
@ -0,0 +1,80 @@
|
||||||
|
#ifndef THREADED_ARRAY_PROCESSOR_H
|
||||||
|
#define THREADED_ARRAY_PROCESSOR_H
|
||||||
|
|
||||||
|
#include "os/mutex.h"
|
||||||
|
#include "os/os.h"
|
||||||
|
#include "os/thread.h"
|
||||||
|
#include "safe_refcount.h"
|
||||||
|
#include "thread_safe.h"
|
||||||
|
|
||||||
|
template <class C, class U>
|
||||||
|
struct ThreadArrayProcessData {
|
||||||
|
uint32_t elements;
|
||||||
|
uint32_t index;
|
||||||
|
C *instance;
|
||||||
|
U userdata;
|
||||||
|
void (C::*method)(uint32_t, U);
|
||||||
|
|
||||||
|
void process(uint32_t p_index) {
|
||||||
|
(instance->*method)(p_index, userdata);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
#ifndef NO_THREADS
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
void process_array_thread(void *ud) {
|
||||||
|
|
||||||
|
T &data = *(T *)ud;
|
||||||
|
while (true) {
|
||||||
|
uint32_t index = atomic_increment(&data.index);
|
||||||
|
if (index >= data.elements)
|
||||||
|
break;
|
||||||
|
data.process(index);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class C, class M, class U>
|
||||||
|
void thread_process_array(uint32_t p_elements, C *p_instance, M p_method, U p_userdata) {
|
||||||
|
|
||||||
|
ThreadArrayProcessData<C, U> data;
|
||||||
|
data.method = p_method;
|
||||||
|
data.instance = p_instance;
|
||||||
|
data.userdata = p_userdata;
|
||||||
|
data.index = 0;
|
||||||
|
data.elements = p_elements;
|
||||||
|
data.process(data.index); //process first, let threads increment for next
|
||||||
|
|
||||||
|
Vector<Thread *> threads;
|
||||||
|
|
||||||
|
threads.resize(OS::get_singleton()->get_processor_count());
|
||||||
|
|
||||||
|
for (int i = 0; i < threads.size(); i++) {
|
||||||
|
threads[i] = Thread::create(process_array_thread<ThreadArrayProcessData<C, U> >, &data);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < threads.size(); i++) {
|
||||||
|
Thread::wait_to_finish(threads[i]);
|
||||||
|
memdelete(threads[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
template <class C, class M, class U>
|
||||||
|
void thread_process_array(uint32_t p_elements, C *p_instance, M p_method, U p_userdata) {
|
||||||
|
|
||||||
|
ThreadArrayProcessData<C, U> data;
|
||||||
|
data.method = p_method;
|
||||||
|
data.instance = p_instance;
|
||||||
|
data.userdata = p_userdata;
|
||||||
|
data.index = 0;
|
||||||
|
data.elements = p_elements;
|
||||||
|
for (uint32_t i = 0; i < p_elements; i++) {
|
||||||
|
data.process(i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif // THREADED_ARRAY_PROCESSOR_H
|
|
@ -1,5 +1,10 @@
|
||||||
def can_build(platform):
|
def can_build(platform):
|
||||||
return True
|
# Sorry guys, do not enable this unless you can figure out a way
|
||||||
|
# to get Opus to not do any memory allocation or system calls
|
||||||
|
# in the audio thread.
|
||||||
|
# Currently the implementation even reads files from the audio thread,
|
||||||
|
# and this is not how audio programming works.
|
||||||
|
return False
|
||||||
|
|
||||||
def configure(env):
|
def configure(env):
|
||||||
pass
|
pass
|
||||||
|
|
|
@ -82,9 +82,6 @@ def configure(env):
|
||||||
env['RANLIB'] = mpprefix + "/libexec/llvm-" + mpclangver + "/bin/llvm-ranlib"
|
env['RANLIB'] = mpprefix + "/libexec/llvm-" + mpclangver + "/bin/llvm-ranlib"
|
||||||
env['AS'] = mpprefix + "/libexec/llvm-" + mpclangver + "/bin/llvm-as"
|
env['AS'] = mpprefix + "/libexec/llvm-" + mpclangver + "/bin/llvm-as"
|
||||||
env.Append(CCFLAGS=['-D__MACPORTS__']) #hack to fix libvpx MM256_BROADCASTSI128_SI256 define
|
env.Append(CCFLAGS=['-D__MACPORTS__']) #hack to fix libvpx MM256_BROADCASTSI128_SI256 define
|
||||||
if env['tools'] and env['openmp']:
|
|
||||||
env.Append(CPPFLAGS=['-fopenmp'])
|
|
||||||
env.Append(LINKFLAGS=['-fopenmp'])
|
|
||||||
|
|
||||||
else: # osxcross build
|
else: # osxcross build
|
||||||
root = os.environ.get("OSXCROSS_ROOT", 0)
|
root = os.environ.get("OSXCROSS_ROOT", 0)
|
||||||
|
|
|
@ -191,8 +191,6 @@ def configure(env):
|
||||||
if (env["use_lto"]):
|
if (env["use_lto"]):
|
||||||
env.Append(CCFLAGS=['/GL'])
|
env.Append(CCFLAGS=['/GL'])
|
||||||
env.Append(LINKFLAGS=['/LTCG'])
|
env.Append(LINKFLAGS=['/LTCG'])
|
||||||
if env['tools'] and env['openmp']:
|
|
||||||
env.Append(CPPFLAGS=['/openmp'])
|
|
||||||
|
|
||||||
env.Append(CCFLAGS=["/I" + p for p in os.getenv("INCLUDE").split(";")])
|
env.Append(CCFLAGS=["/I" + p for p in os.getenv("INCLUDE").split(";")])
|
||||||
env.Append(LIBPATH=[p for p in os.getenv("LIB").split(";")])
|
env.Append(LIBPATH=[p for p in os.getenv("LIB").split(";")])
|
||||||
|
@ -270,9 +268,6 @@ def configure(env):
|
||||||
env.Append(CCFLAGS=['-flto'])
|
env.Append(CCFLAGS=['-flto'])
|
||||||
env.Append(LINKFLAGS=['-flto=' + str(env.GetOption("num_jobs"))])
|
env.Append(LINKFLAGS=['-flto=' + str(env.GetOption("num_jobs"))])
|
||||||
|
|
||||||
if env['tools'] and env['openmp']:
|
|
||||||
env.Append(CPPFLAGS=['-fopenmp'])
|
|
||||||
env.Append(LINKFLAGS=['-fopenmp'])
|
|
||||||
|
|
||||||
## Compile flags
|
## Compile flags
|
||||||
|
|
||||||
|
|
|
@ -265,9 +265,5 @@ def configure(env):
|
||||||
env.Append(LINKFLAGS=['-m64', '-L/usr/lib/i686-linux-gnu'])
|
env.Append(LINKFLAGS=['-m64', '-L/usr/lib/i686-linux-gnu'])
|
||||||
|
|
||||||
|
|
||||||
if env['tools'] and env['openmp']:
|
|
||||||
env.Append(CPPFLAGS=['-fopenmp'])
|
|
||||||
env.Append(LINKFLAGS=['-fopenmp'])
|
|
||||||
|
|
||||||
if env['use_static_cpp']:
|
if env['use_static_cpp']:
|
||||||
env.Append(LINKFLAGS=['-static-libstdc++'])
|
env.Append(LINKFLAGS=['-static-libstdc++'])
|
||||||
|
|
|
@ -772,8 +772,8 @@ void BakedLightmap::_bind_methods() {
|
||||||
BakedLightmap::BakedLightmap() {
|
BakedLightmap::BakedLightmap() {
|
||||||
|
|
||||||
extents = Vector3(10, 10, 10);
|
extents = Vector3(10, 10, 10);
|
||||||
bake_cell_size = 0.1;
|
bake_cell_size = 0.25;
|
||||||
capture_cell_size = 0.25;
|
capture_cell_size = 0.5;
|
||||||
|
|
||||||
bake_quality = BAKE_QUALITY_MEDIUM;
|
bake_quality = BAKE_QUALITY_MEDIUM;
|
||||||
bake_mode = BAKE_MODE_CONE_TRACE;
|
bake_mode = BAKE_MODE_CONE_TRACE;
|
||||||
|
|
|
@ -30,11 +30,9 @@
|
||||||
|
|
||||||
#include "voxel_light_baker.h"
|
#include "voxel_light_baker.h"
|
||||||
#include "os/os.h"
|
#include "os/os.h"
|
||||||
|
#include "os/threaded_array_processor.h"
|
||||||
|
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#ifdef _OPENMP
|
|
||||||
#include <omp.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#define FINDMINMAX(x0, x1, x2, min, max) \
|
#define FINDMINMAX(x0, x1, x2, min, max) \
|
||||||
min = max = x0; \
|
min = max = x0; \
|
||||||
|
@ -1689,7 +1687,7 @@ _ALWAYS_INLINE_ uint32_t xorshift32(uint32_t *state) {
|
||||||
return x;
|
return x;
|
||||||
}
|
}
|
||||||
|
|
||||||
Vector3 VoxelLightBaker::_compute_ray_trace_at_pos(const Vector3 &p_pos, const Vector3 &p_normal, uint32_t *rng_state) {
|
Vector3 VoxelLightBaker::_compute_ray_trace_at_pos(const Vector3 &p_pos, const Vector3 &p_normal) {
|
||||||
|
|
||||||
int samples_per_quality[3] = { 48, 128, 512 };
|
int samples_per_quality[3] = { 48, 128, 512 };
|
||||||
|
|
||||||
|
@ -1711,8 +1709,7 @@ Vector3 VoxelLightBaker::_compute_ray_trace_at_pos(const Vector3 &p_pos, const V
|
||||||
const Light *light = bake_light.ptr();
|
const Light *light = bake_light.ptr();
|
||||||
const Cell *cells = bake_cells.ptr();
|
const Cell *cells = bake_cells.ptr();
|
||||||
|
|
||||||
// Prevent false sharing when running on OpenMP
|
uint32_t local_rng_state = rand(); //needs to be fixed again
|
||||||
uint32_t local_rng_state = *rng_state;
|
|
||||||
|
|
||||||
for (int i = 0; i < samples; i++) {
|
for (int i = 0; i < samples; i++) {
|
||||||
|
|
||||||
|
@ -1796,10 +1793,30 @@ Vector3 VoxelLightBaker::_compute_ray_trace_at_pos(const Vector3 &p_pos, const V
|
||||||
}
|
}
|
||||||
|
|
||||||
// Make sure we don't reset this thread's RNG state
|
// Make sure we don't reset this thread's RNG state
|
||||||
*rng_state = local_rng_state;
|
|
||||||
return accum / samples;
|
return accum / samples;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void VoxelLightBaker::_lightmap_bake_point(uint32_t p_x, LightMap *p_line) {
|
||||||
|
|
||||||
|
|
||||||
|
LightMap *pixel = &p_line[p_x];
|
||||||
|
if (pixel->pos == Vector3())
|
||||||
|
return;
|
||||||
|
//print_line("pos: " + pixel->pos + " normal " + pixel->normal);
|
||||||
|
switch (bake_mode) {
|
||||||
|
case BAKE_MODE_CONE_TRACE: {
|
||||||
|
pixel->light = _compute_pixel_light_at_pos(pixel->pos, pixel->normal) * energy;
|
||||||
|
} break;
|
||||||
|
case BAKE_MODE_RAY_TRACE: {
|
||||||
|
pixel->light = _compute_ray_trace_at_pos(pixel->pos, pixel->normal) * energy;
|
||||||
|
} break;
|
||||||
|
// pixel->light = Vector3(1, 1, 1);
|
||||||
|
//}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
Error VoxelLightBaker::make_lightmap(const Transform &p_xform, Ref<Mesh> &p_mesh, LightMapData &r_lightmap, bool (*p_bake_time_func)(void *, float, float), void *p_bake_time_ud) {
|
Error VoxelLightBaker::make_lightmap(const Transform &p_xform, Ref<Mesh> &p_mesh, LightMapData &r_lightmap, bool (*p_bake_time_func)(void *, float, float), void *p_bake_time_ud) {
|
||||||
|
|
||||||
//transfer light information to a lightmap
|
//transfer light information to a lightmap
|
||||||
|
@ -1862,53 +1879,10 @@ Error VoxelLightBaker::make_lightmap(const Transform &p_xform, Ref<Mesh> &p_mesh
|
||||||
volatile int lines = 0;
|
volatile int lines = 0;
|
||||||
|
|
||||||
// make sure our OS-level rng is seeded
|
// make sure our OS-level rng is seeded
|
||||||
srand(OS::get_singleton()->get_ticks_usec());
|
|
||||||
|
|
||||||
// setup an RNG state for each OpenMP thread
|
|
||||||
uint32_t threadcount = 1;
|
|
||||||
uint32_t threadnum = 0;
|
|
||||||
#ifdef _OPENMP
|
|
||||||
threadcount = omp_get_max_threads();
|
|
||||||
#endif
|
|
||||||
Vector<uint32_t> rng_states;
|
|
||||||
rng_states.resize(threadcount);
|
|
||||||
for (uint32_t i = 0; i < threadcount; i++) {
|
|
||||||
do {
|
|
||||||
rng_states[i] = rand();
|
|
||||||
} while (rng_states[i] == 0);
|
|
||||||
}
|
|
||||||
uint32_t *rng_states_p = rng_states.ptrw();
|
|
||||||
|
|
||||||
for (int i = 0; i < height; i++) {
|
for (int i = 0; i < height; i++) {
|
||||||
|
|
||||||
//print_line("bake line " + itos(i) + " / " + itos(height));
|
thread_process_array(width,this,&VoxelLightBaker::_lightmap_bake_point,&lightmap_ptr[i*width]);
|
||||||
#ifdef _OPENMP
|
|
||||||
#pragma omp parallel for schedule(dynamic, 1) private(threadnum)
|
|
||||||
#endif
|
|
||||||
for (int j = 0; j < width; j++) {
|
|
||||||
|
|
||||||
#ifdef _OPENMP
|
|
||||||
threadnum = omp_get_thread_num();
|
|
||||||
#endif
|
|
||||||
|
|
||||||
//if (i == 125 && j == 280) {
|
|
||||||
|
|
||||||
LightMap *pixel = &lightmap_ptr[i * width + j];
|
|
||||||
if (pixel->pos == Vector3())
|
|
||||||
continue; //unused, skipe
|
|
||||||
|
|
||||||
//print_line("pos: " + pixel->pos + " normal " + pixel->normal);
|
|
||||||
switch (bake_mode) {
|
|
||||||
case BAKE_MODE_CONE_TRACE: {
|
|
||||||
pixel->light = _compute_pixel_light_at_pos(pixel->pos, pixel->normal) * energy;
|
|
||||||
} break;
|
|
||||||
case BAKE_MODE_RAY_TRACE: {
|
|
||||||
pixel->light = _compute_ray_trace_at_pos(pixel->pos, pixel->normal, &rng_states_p[threadnum]) * energy;
|
|
||||||
} break;
|
|
||||||
// pixel->light = Vector3(1, 1, 1);
|
|
||||||
//}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
lines = MAX(lines, i); //for multithread
|
lines = MAX(lines, i); //for multithread
|
||||||
if (p_bake_time_func) {
|
if (p_bake_time_func) {
|
||||||
|
|
|
@ -148,9 +148,12 @@ private:
|
||||||
_FORCE_INLINE_ void _sample_baked_octree_filtered_and_anisotropic(const Vector3 &p_posf, const Vector3 &p_direction, float p_level, Vector3 &r_color, float &r_alpha);
|
_FORCE_INLINE_ void _sample_baked_octree_filtered_and_anisotropic(const Vector3 &p_posf, const Vector3 &p_direction, float p_level, Vector3 &r_color, float &r_alpha);
|
||||||
_FORCE_INLINE_ Vector3 _voxel_cone_trace(const Vector3 &p_pos, const Vector3 &p_normal, float p_aperture);
|
_FORCE_INLINE_ Vector3 _voxel_cone_trace(const Vector3 &p_pos, const Vector3 &p_normal, float p_aperture);
|
||||||
_FORCE_INLINE_ Vector3 _compute_pixel_light_at_pos(const Vector3 &p_pos, const Vector3 &p_normal);
|
_FORCE_INLINE_ Vector3 _compute_pixel_light_at_pos(const Vector3 &p_pos, const Vector3 &p_normal);
|
||||||
_FORCE_INLINE_ Vector3 _compute_ray_trace_at_pos(const Vector3 &p_pos, const Vector3 &p_normal, uint32_t *rng_state);
|
_FORCE_INLINE_ Vector3 _compute_ray_trace_at_pos(const Vector3 &p_pos, const Vector3 &p_normal);
|
||||||
|
|
||||||
|
void _lightmap_bake_point(uint32_t p_x, LightMap *p_line);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
|
||||||
void begin_bake(int p_subdiv, const AABB &p_bounds);
|
void begin_bake(int p_subdiv, const AABB &p_bounds);
|
||||||
void plot_mesh(const Transform &p_xform, Ref<Mesh> &p_mesh, const Vector<Ref<Material> > &p_materials, const Ref<Material> &p_override_material);
|
void plot_mesh(const Transform &p_xform, Ref<Mesh> &p_mesh, const Vector<Ref<Material> > &p_materials, const Ref<Material> &p_override_material);
|
||||||
void begin_bake_light(BakeQuality p_quality = BAKE_QUALITY_MEDIUM, BakeMode p_bake_mode = BAKE_MODE_CONE_TRACE, float p_propagation = 0.85, float p_energy = 1);
|
void begin_bake_light(BakeQuality p_quality = BAKE_QUALITY_MEDIUM, BakeMode p_bake_mode = BAKE_MODE_CONE_TRACE, float p_propagation = 0.85, float p_energy = 1);
|
||||||
|
|
Loading…
Reference in a new issue