From 4c710780d49f850261cf7cb3ca361002e635e592 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pedro=20J=2E=20Est=C3=A9banez?= Date: Sun, 26 Sep 2021 21:31:17 +0200 Subject: [PATCH] Implement async shader compilation plus caching for GL ES 3 Async. compilation via ubershader is currently available in the scene and particles shaders only. Bonus: - Use `#if defined()` syntax for not true conditionals, so they don't unnecessarily take a bit in the version flagset. - Remove unused `ENABLE_CLIP_ALPHA` from scene shader. - Remove unused `PARTICLES_COPY` from the particles shader. - Remove unused uniform related code. - Shader language/compiler: use ordered hash maps for deterministic code generation (needed for caching). --- core/threaded_callable_queue.h | 133 +++ doc/classes/ProjectSettings.xml | 33 + doc/classes/SpatialMaterial.xml | 11 + doc/classes/VisualServer.xml | 9 + drivers/dummy/rasterizer_dummy.h | 3 + drivers/gles2/rasterizer_storage_gles2.h | 3 + drivers/gles2/shader_compiler_gles2.cpp | 42 +- drivers/gles3/rasterizer_gles3.cpp | 7 + drivers/gles3/rasterizer_scene_gles3.cpp | 25 +- drivers/gles3/rasterizer_storage_gles3.cpp | 144 ++- drivers/gles3/rasterizer_storage_gles3.h | 16 + drivers/gles3/shader_cache_gles3.cpp | 196 ++++ drivers/gles3/shader_cache_gles3.h | 58 ++ drivers/gles3/shader_compiler_gles3.cpp | 50 +- drivers/gles3/shader_gles3.cpp | 1067 +++++++++++++++----- drivers/gles3/shader_gles3.h | 174 +++- drivers/gles3/shaders/particles.glsl | 33 +- drivers/gles3/shaders/scene.glsl | 527 +++++----- editor/editor_export.cpp | 4 + editor/editor_export.h | 1 + editor/editor_node.cpp | 29 + editor/editor_node.h | 1 + editor/editor_run.cpp | 13 + editor/editor_run.h | 4 + editor/editor_run_native.cpp | 12 + editor/editor_run_native.h | 4 + gles_builders.py | 7 +- main/main.cpp | 11 + main/tests/test_shader_lang.cpp | 24 +- scene/resources/material.cpp | 31 + scene/resources/material.h | 10 + scene/resources/visual_shader.cpp | 1 + servers/visual/rasterizer.h | 3 + servers/visual/shader_language.cpp | 8 +- servers/visual/shader_language.h | 5 +- servers/visual/shader_types.cpp | 3 + servers/visual/visual_server_raster.h | 2 + servers/visual/visual_server_scene.cpp | 4 + servers/visual/visual_server_wrap_mt.h | 2 + servers/visual_server.cpp | 32 + servers/visual_server.h | 10 + thirdparty/glad/glad.c | 40 +- thirdparty/glad/glad/glad.h | 46 +- 43 files changed, 2169 insertions(+), 669 deletions(-) create mode 100644 core/threaded_callable_queue.h create mode 100644 drivers/gles3/shader_cache_gles3.cpp create mode 100644 drivers/gles3/shader_cache_gles3.h diff --git a/core/threaded_callable_queue.h b/core/threaded_callable_queue.h new file mode 100644 index 00000000000..33484669403 --- /dev/null +++ b/core/threaded_callable_queue.h @@ -0,0 +1,133 @@ +/*************************************************************************/ +/* threaded_callable_queue.h */ +/*************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/*************************************************************************/ +/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/*************************************************************************/ + +#ifndef THREADED_CALLABLE_QUEUE_H +#define THREADED_CALLABLE_QUEUE_H + +#include "core/local_vector.h" +#include "core/ordered_hash_map.h" +#include "core/os/mutex.h" +#include "core/os/semaphore.h" +#include "core/os/thread.h" + +#include + +template +class ThreadedCallableQueue { +public: + using Job = std::function; + +private: + bool exit; + Thread thread; + BinaryMutex mutex; + Semaphore sem; + OrderedHashMap queue; + + static void _thread_func(void *p_user_data); + +public: + void enqueue(K p_key, Job p_job); + void cancel(K p_key); + + ThreadedCallableQueue(); + ~ThreadedCallableQueue(); +}; + +template +void ThreadedCallableQueue::_thread_func(void *p_user_data) { + ThreadedCallableQueue *self = static_cast(p_user_data); + + while (true) { + self->sem.wait(); + self->mutex.lock(); + if (self->exit) { + self->mutex.unlock(); + break; + } + + typename OrderedHashMap::Element E = self->queue.front(); + // Defense about implementation bugs (excessive posts) + if (!E) { + ERR_PRINT("Semaphore unlocked, the queue is empty. Bug?"); + self->mutex.unlock(); + // --- Defense end + } else { + LocalVector jobs; + jobs.push_back(E.value()); + self->queue.erase(E); + self->mutex.unlock(); + + for (uint32_t i = 0; i < jobs.size(); i++) { + jobs[i](); + } + } + } + + self->mutex.lock(); + for (typename OrderedHashMap::Element E = self->queue.front(); E; E = E.next()) { + Job job = E.value(); + job(); + } + self->mutex.unlock(); +} + +template +void ThreadedCallableQueue::enqueue(K p_key, Job p_job) { + MutexLock lock(mutex); + ERR_FAIL_COND(exit); + ERR_FAIL_COND(queue.has(p_key)); + queue.insert(p_key, p_job); + sem.post(); +} + +template +void ThreadedCallableQueue::cancel(K p_key) { + MutexLock lock(mutex); + ERR_FAIL_COND(exit); + if (queue.erase(p_key)) { + sem.wait(); + } +} + +template +ThreadedCallableQueue::ThreadedCallableQueue() : + exit(false) { + thread.start(&_thread_func, this); +} + +template +ThreadedCallableQueue::~ThreadedCallableQueue() { + exit = true; + sem.post(); + thread.wait_to_finish(); +} + +#endif // THREADED_CALLABLE_QUEUE_H diff --git a/doc/classes/ProjectSettings.xml b/doc/classes/ProjectSettings.xml index 2fbadf39737..42531083a04 100644 --- a/doc/classes/ProjectSettings.xml +++ b/doc/classes/ProjectSettings.xml @@ -1222,6 +1222,39 @@ If [code]true[/code] and available on the target Android device, enables high floating point precision for all shader computations in GLES2. [b]Warning:[/b] High floating point precision can be extremely slow on older devices and is often not available at all. Use with caution. + + If [code]true[/code], every time an asynchronous shader compilation or an asynchronous shader reconstruction from cache starts or finishes, a line will be logged telling how many of those are happening. + If the platform doesn't support parallel shader compile, but only the compile queue via a secondary GL context, what the message will tell is the number of shader compiles currently queued. + [b]Note:[/b] This setting is only meaningful if [code]rendering/gles3/shaders/shader_compilation_mode[/code] is [b]not[/b] [code]Synchronous[/code]. + + + This is the maximum number of shaders that can be compiled (or reconstructed from cache) at the same time. + At runtime, while that count is reached, other shaders that can be asynchronously compiled will just use their fallback, without their setup being started until the count gets lower. + This is a way to balance the CPU work between running the game and compiling the shaders. The goal is to have as many asynchronous compiles in flight as possible without impacting the responsiveness of the game, which beyond some point would destroy the benefits of asynchronous compilation. In other words, you may be able to afford that the FPS lowers a bit, and that will already be better than the stalling that synchronous compilation could cause. + The default value is a conservative one, so you are advised to tweak it according to the hardware you are targeting. + [b]Note:[/b] This setting is only meaningful if [code]rendering/gles3/shaders/shader_compilation_mode[/code] is [b]not[/b] [code]Synchronous[/code]. + + + The default is a very conservative override for [code]rendering/gles3/shaders/max_concurrent_compiles[/code]. + Depending on the specific devices you are targeting, you may want to raise it. + [b]Note:[/b] This setting is only meaningful if [code]rendering/gles3/shaders/shader_compilation_mode[/code] is [b]not[/b] [code]Synchronous[/code]. + + + The maximum size, in megabytes, that the ubershader cache can grow up to. On startup, the least recently used entries will be deleted until the total size is within bounds. + [b]Note:[/b] This setting is only meaningful if [code]rendering/gles3/shaders/shader_compilation_mode[/code] is set to [code]Asynchronous + Cache[/code]. + + + An override for [code]rendering/gles3/shaders/ubershader_cache_size_mb[/code], so a smaller maximum size can be configured for mobile platforms, where storage space is more limited. + [b]Note:[/b] This setting is only meaningful if [code]rendering/gles3/shaders/shader_compilation_mode[/code] is set to [code]Asynchronous + Cache[/code]. + + + If set to [code]Asynchronous[/code] and available on the target device, asynchronous compilation of shaders is enabled (in contrast to [code]Asynchronous[/code]). + That means that when a shader is first used under some new rendering situation, the game won't stall while such shader is being compiled. Instead, a fallback will be used and the real shader will be compiled in the background. Once the actual shader is compiled, it will be used the next times it's used to draw a frame. + Depending on the async mode configured for a given material/shader, the fallback will be an "ubershader" (the default) or just skip rendering any item it is applied to. + An ubershader is a very complex shader, slow but suited to any rendering situation, that the engine generates internally so it can be used from the beginning while the traditional conditioned, optimized version of it is being compiled. + In order to save some loading time, you can use [code]Asynchronous + Cache[/code], which also causes the ubershaders to be cached into storage so they can be ready faster next time they are used (provided the platform provides support for it). + [b]Warning:[/b] Async. compilation is currently only supported for spatial and particle materials/shaders. + Max buffer size for blend shapes. Any blend shape bigger than this will not work. diff --git a/doc/classes/SpatialMaterial.xml b/doc/classes/SpatialMaterial.xml index 0d04c20709d..47861055fc5 100644 --- a/doc/classes/SpatialMaterial.xml +++ b/doc/classes/SpatialMaterial.xml @@ -87,6 +87,10 @@ Specifies the channel of the [member ao_texture] in which the ambient occlusion information is stored. This is useful when you store the information for multiple effects in a single texture. For example if you stored metallic in the red channel, roughness in the blue, and ambient occlusion in the green you could reduce the number of textures you use. + + If [member ProjectSettings.rendering/gles3/shaders/shader_compilation_mode] is [code]Synchronous[/code] (with or without cache), this determines how this material must behave in regards to asynchronous shader compilation. + [constant ASYNC_MODE_VISIBLE] is the default and the best for most cases. + Sets the strength of the clearcoat effect. Setting to [code]0[/code] looks the same as disabling the clearcoat effect. @@ -639,5 +643,12 @@ Smoothly fades the object out based on the object's distance from the camera using a dither approach. Dithering discards pixels based on a set pattern to smoothly fade without enabling transparency. On certain hardware this can be faster than [constant DISTANCE_FADE_PIXEL_ALPHA]. + + The real conditioned shader needed on each situation will be sent for background compilation. In the meantime, a very complex shader that adapts to every situation will be used ("ubershader"). This ubershader is much slower to render, but will keep the game running without stalling to compile. Once shader compilation is done, the ubershader is replaced by the traditional optimized shader. + + + Anything with this material applied won't be rendered while this material's shader is being compiled. + This is useful for optimization, in cases where the visuals won't suffer from having certain non-essential elements missing during the short time their shaders are being compiled. + diff --git a/doc/classes/VisualServer.xml b/doc/classes/VisualServer.xml index f8cfe042819..f88059ac183 100644 --- a/doc/classes/VisualServer.xml +++ b/doc/classes/VisualServer.xml @@ -2525,6 +2525,15 @@ Sets the default clear color which is used when a specific clear color has not been selected. + + + + + If asynchronous shader compilation is enabled, this controls whether [constant SpatialMaterial.ASYNC_MODE_HIDDEN] is obeyed. + For instance, you may want to enable this temporarily before taking a screenshot. This ensures everything is visible even if shaders with async mode [i]hidden[/i] are not ready yet. + Reflection probes use this internally to ensure they capture everything regardless the shaders are ready or not. + + diff --git a/drivers/dummy/rasterizer_dummy.h b/drivers/dummy/rasterizer_dummy.h index 4b0482e04fc..cc21fd01c4d 100644 --- a/drivers/dummy/rasterizer_dummy.h +++ b/drivers/dummy/rasterizer_dummy.h @@ -266,6 +266,9 @@ public: void shader_get_custom_defines(RID p_shader, Vector *p_defines) const {} void shader_remove_custom_define(RID p_shader, const String &p_define) {} + void set_shader_async_hidden_forbidden(bool p_forbidden) {} + bool is_shader_async_hidden_forbidden() { return false; } + /* COMMON MATERIAL API */ RID material_create() { return RID(); } diff --git a/drivers/gles2/rasterizer_storage_gles2.h b/drivers/gles2/rasterizer_storage_gles2.h index 3ffe9ab7099..e469b7da1c8 100644 --- a/drivers/gles2/rasterizer_storage_gles2.h +++ b/drivers/gles2/rasterizer_storage_gles2.h @@ -544,6 +544,9 @@ public: virtual void shader_get_custom_defines(RID p_shader, Vector *p_defines) const; virtual void shader_remove_custom_define(RID p_shader, const String &p_define); + void set_shader_async_hidden_forbidden(bool p_forbidden) {} + bool is_shader_async_hidden_forbidden() { return false; } + void _update_shader(Shader *p_shader) const; void update_dirty_shaders(); diff --git a/drivers/gles2/shader_compiler_gles2.cpp b/drivers/gles2/shader_compiler_gles2.cpp index 7c75e852f9a..6a9fb18ab2c 100644 --- a/drivers/gles2/shader_compiler_gles2.cpp +++ b/drivers/gles2/shader_compiler_gles2.cpp @@ -295,8 +295,8 @@ String ShaderCompilerGLES2::_dump_node_code(const SL::Node *p_node, int p_level, int max_texture_uniforms = 0; int max_uniforms = 0; - for (Map::Element *E = snode->uniforms.front(); E; E = E->next()) { - if (SL::is_sampler_type(E->get().type)) { + for (OrderedHashMap::Element E = snode->uniforms.front(); E; E = E.next()) { + if (SL::is_sampler_type(E.get().type)) { max_texture_uniforms++; } else { max_uniforms++; @@ -347,55 +347,55 @@ String ShaderCompilerGLES2::_dump_node_code(const SL::Node *p_node, int p_level, // uniforms - for (Map::Element *E = snode->uniforms.front(); E; E = E->next()) { + for (OrderedHashMap::Element E = snode->uniforms.front(); E; E = E.next()) { StringBuffer<> uniform_code; // use highp if no precision is specified to prevent different default values in fragment and vertex shader - SL::DataPrecision precision = E->get().precision; - if (precision == SL::PRECISION_DEFAULT && E->get().type != SL::TYPE_BOOL) { + SL::DataPrecision precision = E.get().precision; + if (precision == SL::PRECISION_DEFAULT && E.get().type != SL::TYPE_BOOL) { precision = SL::PRECISION_HIGHP; } uniform_code += "uniform "; uniform_code += _prestr(precision); - uniform_code += _typestr(E->get().type); + uniform_code += _typestr(E.get().type); uniform_code += " "; - uniform_code += _mkid(E->key()); + uniform_code += _mkid(E.key()); uniform_code += ";\n"; - if (SL::is_sampler_type(E->get().type)) { - r_gen_code.texture_uniforms.write[E->get().texture_order] = E->key(); - r_gen_code.texture_hints.write[E->get().texture_order] = E->get().hint; + if (SL::is_sampler_type(E.get().type)) { + r_gen_code.texture_uniforms.write[E.get().texture_order] = E.key(); + r_gen_code.texture_hints.write[E.get().texture_order] = E.get().hint; } else { - r_gen_code.uniforms.write[E->get().order] = E->key(); + r_gen_code.uniforms.write[E.get().order] = E.key(); } vertex_global += uniform_code.as_string(); fragment_global += uniform_code.as_string(); - p_actions.uniforms->insert(E->key(), E->get()); + p_actions.uniforms->insert(E.key(), E.get()); } // varyings List> var_frag_to_light; - for (Map::Element *E = snode->varyings.front(); E; E = E->next()) { - if (E->get().stage == SL::ShaderNode::Varying::STAGE_FRAGMENT_TO_LIGHT || E->get().stage == SL::ShaderNode::Varying::STAGE_FRAGMENT) { - var_frag_to_light.push_back(Pair(E->key(), E->get())); - fragment_varyings.insert(E->key()); + for (OrderedHashMap::Element E = snode->varyings.front(); E; E = E.next()) { + if (E.get().stage == SL::ShaderNode::Varying::STAGE_FRAGMENT_TO_LIGHT || E.get().stage == SL::ShaderNode::Varying::STAGE_FRAGMENT) { + var_frag_to_light.push_back(Pair(E.key(), E.get())); + fragment_varyings.insert(E.key()); continue; } StringBuffer<> varying_code; varying_code += "varying "; - varying_code += _prestr(E->get().precision); - varying_code += _typestr(E->get().type); + varying_code += _prestr(E.get().precision); + varying_code += _typestr(E.get().type); varying_code += " "; - varying_code += _mkid(E->key()); - if (E->get().array_size > 0) { + varying_code += _mkid(E.key()); + if (E.get().array_size > 0) { varying_code += "["; - varying_code += itos(E->get().array_size); + varying_code += itos(E.get().array_size); varying_code += "]"; } varying_code += ";\n"; diff --git a/drivers/gles3/rasterizer_gles3.cpp b/drivers/gles3/rasterizer_gles3.cpp index 6789d46f74f..3d583832684 100644 --- a/drivers/gles3/rasterizer_gles3.cpp +++ b/drivers/gles3/rasterizer_gles3.cpp @@ -207,6 +207,7 @@ void RasterizerGLES3::begin_frame(double frame_step) { storage->frame.time[2] = Math::fmod(time_total, 900); storage->frame.time[3] = Math::fmod(time_total, 60); storage->frame.count++; + storage->frame.shader_compiles_started = 0; storage->frame.delta = frame_step; storage->update_dirty_resources(); @@ -214,6 +215,8 @@ void RasterizerGLES3::begin_frame(double frame_step) { storage->info.render_final = storage->info.render; storage->info.render.reset(); + ShaderGLES3::current_frame = storage->frame.count; + scene->iteration(); } @@ -410,6 +413,8 @@ void RasterizerGLES3::end_frame(bool p_swap_buffers) { } } + ShaderGLES3::advance_async_shaders_compilation(); + if (p_swap_buffers) { OS::get_singleton()->swap_buffers(); } else { @@ -487,6 +492,8 @@ RasterizerGLES3::RasterizerGLES3() { time_total = 0; time_scale = 1; + + ShaderGLES3::compiles_started_this_frame = &storage->frame.shader_compiles_started; } RasterizerGLES3::~RasterizerGLES3() { diff --git a/drivers/gles3/rasterizer_scene_gles3.cpp b/drivers/gles3/rasterizer_scene_gles3.cpp index cc747474cc9..dfa25f045ed 100644 --- a/drivers/gles3/rasterizer_scene_gles3.cpp +++ b/drivers/gles3/rasterizer_scene_gles3.cpp @@ -1817,7 +1817,7 @@ void RasterizerSceneGLES3::_setup_light(RenderList::Element *e, const Transform GIProbeInstance *gipi = gi_probe_instance_owner.getptr(ridp[0]); float bias_scale = e->instance->baked_light ? 1 : 0; - glActiveTexture(GL_TEXTURE0 + storage->config.max_texture_image_units - 9); + glActiveTexture(GL_TEXTURE0 + storage->config.max_texture_image_units - 10); glBindTexture(GL_TEXTURE_3D, gipi->tex_cache); state.scene_shader.set_uniform(SceneShaderGLES3::GI_PROBE_XFORM1, gipi->transform_to_data * p_view_transform); state.scene_shader.set_uniform(SceneShaderGLES3::GI_PROBE_BOUNDS1, gipi->bounds); @@ -1829,7 +1829,7 @@ void RasterizerSceneGLES3::_setup_light(RenderList::Element *e, const Transform if (gi_probe_count > 1) { GIProbeInstance *gipi2 = gi_probe_instance_owner.getptr(ridp[1]); - glActiveTexture(GL_TEXTURE0 + storage->config.max_texture_image_units - 10); + glActiveTexture(GL_TEXTURE0 + storage->config.max_texture_image_units - 11); glBindTexture(GL_TEXTURE_3D, gipi2->tex_cache); state.scene_shader.set_uniform(SceneShaderGLES3::GI_PROBE_XFORM2, gipi2->transform_to_data * p_view_transform); state.scene_shader.set_uniform(SceneShaderGLES3::GI_PROBE_BOUNDS2, gipi2->bounds); @@ -1850,7 +1850,7 @@ void RasterizerSceneGLES3::_setup_light(RenderList::Element *e, const Transform RasterizerStorageGLES3::LightmapCapture *capture = storage->lightmap_capture_data_owner.getornull(e->instance->lightmap_capture->base); if (lightmap && capture) { - glActiveTexture(GL_TEXTURE0 + storage->config.max_texture_image_units - 9); + glActiveTexture(GL_TEXTURE0 + storage->config.max_texture_image_units - 10); if (e->instance->lightmap_slice == -1) { glBindTexture(GL_TEXTURE_2D, lightmap->tex_id); } else { @@ -1897,13 +1897,14 @@ void RasterizerSceneGLES3::_render_list(RenderList::Element **p_elements, int p_ glBindBufferBase(GL_UNIFORM_BUFFER, 2, state.env_radiance_ubo); //bind environment radiance info if (p_sky != nullptr) { - glActiveTexture(GL_TEXTURE0 + storage->config.max_texture_image_units - 2); if (storage->config.use_texture_array_environment) { + glActiveTexture(GL_TEXTURE0 + storage->config.max_texture_image_units - 3); glBindTexture(GL_TEXTURE_2D_ARRAY, p_sky->radiance); } else { + glActiveTexture(GL_TEXTURE0 + storage->config.max_texture_image_units - 2); glBindTexture(GL_TEXTURE_2D, p_sky->radiance); } - glActiveTexture(GL_TEXTURE0 + storage->config.max_texture_image_units - 6); + glActiveTexture(GL_TEXTURE0 + storage->config.max_texture_image_units - 7); glBindTexture(GL_TEXTURE_2D, p_sky->irradiance); state.scene_shader.set_conditional(SceneShaderGLES3::USE_RADIANCE_MAP, true); state.scene_shader.set_conditional(SceneShaderGLES3::USE_RADIANCE_MAP_ARRAY, storage->config.use_texture_array_environment); @@ -2572,7 +2573,7 @@ void RasterizerSceneGLES3::_setup_environment(Environment *env, const CameraMatr state.ubo_data.shadow_directional_pixel_size[0] = 1.0 / directional_shadow.size; state.ubo_data.shadow_directional_pixel_size[1] = 1.0 / directional_shadow.size; - glActiveTexture(GL_TEXTURE0 + storage->config.max_texture_image_units - 4); + glActiveTexture(GL_TEXTURE0 + storage->config.max_texture_image_units - 5); glBindTexture(GL_TEXTURE_2D, directional_shadow.depth); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_COMPARE_MODE, GL_COMPARE_REF_TO_TEXTURE); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_COMPARE_FUNC, GL_LESS); @@ -3170,7 +3171,7 @@ void RasterizerSceneGLES3::_bind_depth_texture() { if (!state.bound_depth_texture) { ERR_FAIL_COND(!state.prepared_depth_texture); //bind depth for read - glActiveTexture(GL_TEXTURE0 + storage->config.max_texture_image_units - 8); + glActiveTexture(GL_TEXTURE0 + storage->config.max_texture_image_units - 9); glBindTexture(GL_TEXTURE_2D, storage->frame.current_rt->depth); state.bound_depth_texture = true; } @@ -4026,7 +4027,7 @@ void RasterizerSceneGLES3::render_scene(const Transform &p_cam_transform, const state.scene_shader.set_conditional(SceneShaderGLES3::USE_SHADOW, use_shadows); if (use_shadows) { - glActiveTexture(GL_TEXTURE0 + storage->config.max_texture_image_units - 5); + glActiveTexture(GL_TEXTURE0 + storage->config.max_texture_image_units - 6); glBindTexture(GL_TEXTURE_2D, shadow_atlas->depth); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_COMPARE_MODE, GL_COMPARE_REF_TO_TEXTURE); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_COMPARE_FUNC, GL_LESS); @@ -4035,7 +4036,7 @@ void RasterizerSceneGLES3::render_scene(const Transform &p_cam_transform, const } if (reflection_atlas && reflection_atlas->size) { - glActiveTexture(GL_TEXTURE0 + storage->config.max_texture_image_units - 3); + glActiveTexture(GL_TEXTURE0 + storage->config.max_texture_image_units - 4); glBindTexture(GL_TEXTURE_2D, reflection_atlas->color); } @@ -4464,7 +4465,7 @@ void RasterizerSceneGLES3::render_scene(const Transform &p_cam_transform, const } if (storage->frame.current_rt && state.used_screen_texture && storage->frame.current_rt->buffers.active) { - glActiveTexture(GL_TEXTURE0 + storage->config.max_texture_image_units - 7); + glActiveTexture(GL_TEXTURE0 + storage->config.max_texture_image_units - 8); glBindTexture(GL_TEXTURE_2D, storage->frame.current_rt->effects.mip_maps[0].color); } @@ -5235,6 +5236,10 @@ void RasterizerSceneGLES3::initialize() { state.debug_draw = VS::VIEWPORT_DEBUG_DRAW_DISABLED; glFrontFace(GL_CW); + + if (storage->config.async_compilation_enabled) { + state.scene_shader.init_async_compilation(); + } } void RasterizerSceneGLES3::iteration() { diff --git a/drivers/gles3/rasterizer_storage_gles3.cpp b/drivers/gles3/rasterizer_storage_gles3.cpp index 76ae9f9fbad..ad255047884 100644 --- a/drivers/gles3/rasterizer_storage_gles3.cpp +++ b/drivers/gles3/rasterizer_storage_gles3.cpp @@ -29,10 +29,22 @@ /*************************************************************************/ #include "rasterizer_storage_gles3.h" + #include "core/engine.h" +#include "core/os/os.h" #include "core/project_settings.h" +#include "core/threaded_callable_queue.h" #include "rasterizer_canvas_gles3.h" #include "rasterizer_scene_gles3.h" +#include "servers/visual_server.h" + +#if defined(IPHONE_ENABLED) || defined(ANDROID_ENABLED) +#include +#endif + +#ifdef TOOLS_ENABLED +#include "editor/editor_settings.h" +#endif /* TEXTURE API */ @@ -2169,6 +2181,8 @@ void RasterizerStorageGLES3::_update_shader(Shader *p_shader) const { ShaderCompilerGLES3::GeneratedCode gen_code; ShaderCompilerGLES3::IdentifierActions *actions = nullptr; + int async_mode = (int)ShaderGLES3::ASYNC_MODE_VISIBLE; + switch (p_shader->mode) { case VS::SHADER_CANVAS_ITEM: { p_shader->canvas_item.light_mode = Shader::CanvasItem::LIGHT_MODE_NORMAL; @@ -2249,6 +2263,9 @@ void RasterizerStorageGLES3::_update_shader(Shader *p_shader) const { shaders.actions_scene.render_mode_values["cull_back"] = Pair(&p_shader->spatial.cull_mode, Shader::Spatial::CULL_MODE_BACK); shaders.actions_scene.render_mode_values["cull_disabled"] = Pair(&p_shader->spatial.cull_mode, Shader::Spatial::CULL_MODE_DISABLED); + shaders.actions_scene.render_mode_values["async_visible"] = Pair(&async_mode, (int)ShaderGLES3::ASYNC_MODE_VISIBLE); + shaders.actions_scene.render_mode_values["async_hidden"] = Pair(&async_mode, (int)ShaderGLES3::ASYNC_MODE_HIDDEN); + shaders.actions_scene.render_mode_flags["unshaded"] = &p_shader->spatial.unshaded; shaders.actions_scene.render_mode_flags["depth_test_disable"] = &p_shader->spatial.no_depth_test; @@ -2293,8 +2310,6 @@ void RasterizerStorageGLES3::_update_shader(Shader *p_shader) const { return; } - p_shader->shader->set_custom_shader_code(p_shader->custom_code_id, gen_code.vertex, gen_code.vertex_global, gen_code.fragment, gen_code.light, gen_code.fragment_global, gen_code.uniforms, gen_code.texture_uniforms, gen_code.defines); - p_shader->ubo_size = gen_code.uniform_total_size; p_shader->ubo_offsets = gen_code.uniform_offsets; p_shader->texture_count = gen_code.texture_uniforms.size(); @@ -2317,6 +2332,8 @@ void RasterizerStorageGLES3::_update_shader(Shader *p_shader) const { } } + p_shader->shader->set_custom_shader_code(p_shader->custom_code_id, gen_code.vertex, gen_code.vertex_global, gen_code.fragment, gen_code.light, gen_code.fragment_global, gen_code.uniforms, gen_code.texture_uniforms, gen_code.defines, (ShaderGLES3::AsyncMode)async_mode); + //all materials using this shader will have to be invalidated, unfortunately for (SelfList *E = p_shader->materials.first(); E; E = E->next()) { @@ -2509,6 +2526,14 @@ void RasterizerStorageGLES3::shader_remove_custom_define(RID p_shader, const Str _shader_make_dirty(shader); } +void RasterizerStorageGLES3::set_shader_async_hidden_forbidden(bool p_forbidden) { + ShaderGLES3::async_hidden_forbidden = p_forbidden; +} + +bool RasterizerStorageGLES3::is_shader_async_hidden_forbidden() { + return ShaderGLES3::async_hidden_forbidden; +} + /* COMMON MATERIAL API */ void RasterizerStorageGLES3::_material_make_dirty(Material *p_material) const { @@ -3407,6 +3432,8 @@ void RasterizerStorageGLES3::mesh_add_surface(RID p_mesh, uint32_t p_format, VS: attribs[i].size = 2; attribs[i].type = GL_SHORT; attributes_stride += 4; + // Storing normal/tangent in the tangent attrib makes it easier to ubershaderify the scene shader + attribs[i].index = VS::ARRAY_TANGENT; } else { attribs[i].size = 3; @@ -8061,7 +8088,6 @@ void RasterizerStorageGLES3::initialize() { config.texture_float_linear_supported = true; config.framebuffer_float_supported = true; config.framebuffer_half_float_supported = true; - #else config.etc2_supported = true; config.s3tc_supported = config.extensions.has("GL_EXT_texture_compression_dxt1") || config.extensions.has("GL_EXT_texture_compression_s3tc") || config.extensions.has("WEBGL_compressed_texture_s3tc"); @@ -8069,7 +8095,6 @@ void RasterizerStorageGLES3::initialize() { config.texture_float_linear_supported = config.extensions.has("GL_OES_texture_float_linear"); config.framebuffer_float_supported = config.extensions.has("GL_EXT_color_buffer_float"); config.framebuffer_half_float_supported = config.extensions.has("GL_EXT_color_buffer_half_float") || config.framebuffer_float_supported; - #endif // not yet detected on GLES3 (is this mandated?) @@ -8085,8 +8110,103 @@ void RasterizerStorageGLES3::initialize() { config.anisotropic_level = MIN(int(ProjectSettings::get_singleton()->get("rendering/quality/filters/anisotropic_filter_level")), config.anisotropic_level); } +#ifdef GLES_OVER_GL + config.program_binary_supported = GLAD_GL_ARB_get_program_binary; + config.parallel_shader_compile_supported = GLAD_GL_ARB_parallel_shader_compile || GLAD_GL_KHR_parallel_shader_compile; +#else +#ifdef JAVASCRIPT_ENABLED + config.program_binary_supported = false; +#else + config.program_binary_supported = true; +#endif + config.parallel_shader_compile_supported = config.extensions.has("GL_KHR_parallel_shader_compile") || config.extensions.has("GL_ARB_parallel_shader_compile"); +#endif + if (Engine::get_singleton()->is_editor_hint()) { + config.async_compilation_enabled = false; + config.shader_cache_enabled = false; + } else { + int compilation_mode = ProjectSettings::get_singleton()->get("rendering/gles3/shaders/shader_compilation_mode"); + config.async_compilation_enabled = compilation_mode >= 1; + config.shader_cache_enabled = compilation_mode == 2; + } + if (config.async_compilation_enabled) { + ShaderGLES3::max_simultaneous_compiles = MAX(1, (int)ProjectSettings::get_singleton()->get("rendering/gles3/shaders/max_simultaneous_compiles")); +#ifdef GLES_OVER_GL + if (GLAD_GL_ARB_parallel_shader_compile) { + glMaxShaderCompilerThreadsARB(ShaderGLES3::max_simultaneous_compiles); + } else if (GLAD_GL_KHR_parallel_shader_compile) { + glMaxShaderCompilerThreadsKHR(ShaderGLES3::max_simultaneous_compiles); + } +#else +#if defined(IPHONE_ENABLED) || defined(ANDROID_ENABLED) // TODO: Consider more platforms? + void *gles3_lib = nullptr; + void (*MaxShaderCompilerThreads)(GLuint) = nullptr; +#if defined(IPHONE_ENABLED) + gles3_lib = dlopen(nullptr, RTLD_LAZY); +#elif defined(ANDROID_ENABLED) + gles3_lib = dlopen("libGLESv3.so", RTLD_LAZY); +#endif + if (gles3_lib) { + MaxShaderCompilerThreads = (void (*)(GLuint))dlsym(gles3_lib, "glMaxShaderCompilerThreadsARB"); + if (!MaxShaderCompilerThreads) { + MaxShaderCompilerThreads = (void (*)(GLuint))dlsym(gles3_lib, "glMaxShaderCompilerThreadsKHR"); + } + } + if (MaxShaderCompilerThreads) { + MaxShaderCompilerThreads(ShaderGLES3::max_simultaneous_compiles); + } else { +#ifdef DEBUG_ENABLED + print_line("Async. shader compilation: No MaxShaderCompilerThreads function found."); +#endif + } +#endif +#endif + } else { + ShaderGLES3::max_simultaneous_compiles = 0; + } +#ifdef DEBUG_ENABLED + ShaderGLES3::log_active_async_compiles_count = (bool)ProjectSettings::get_singleton()->get("rendering/gles3/shaders/log_active_async_compiles_count"); +#endif + frame.clear_request = false; + shaders.compile_queue = nullptr; + shaders.cache = nullptr; + shaders.cache_write_queue = nullptr; + bool effectively_on = false; + if (config.async_compilation_enabled) { + if (config.parallel_shader_compile_supported) { + print_line("Async. shader compilation: ON (full native support)"); + effectively_on = true; + } else if (config.program_binary_supported && OS::get_singleton()->is_offscreen_gl_available()) { + shaders.compile_queue = memnew(ThreadedCallableQueue()); + shaders.compile_queue->enqueue(0, []() { OS::get_singleton()->set_offscreen_gl_current(true); }); + print_line("Async. shader compilation: ON (via secondary context)"); + effectively_on = true; + } else { + print_line("Async. shader compilation: OFF (enabled for " + String(Engine::get_singleton()->is_editor_hint() ? "editor" : "project") + ", but not supported)"); + } + if (effectively_on) { + if (config.shader_cache_enabled) { + if (config.program_binary_supported) { + print_line("Shader cache: ON"); + shaders.cache = memnew(ShaderCacheGLES3); + shaders.cache_write_queue = memnew(ThreadedCallableQueue()); + } else { + print_line("Shader cache: OFF (enabled, but not supported)"); + } + } else { + print_line("Shader cache: OFF"); + } + } + } else { + print_line("Async. shader compilation: OFF"); + } + ShaderGLES3::compile_queue = shaders.compile_queue; + ShaderGLES3::parallel_compile_supported = config.parallel_shader_compile_supported; + ShaderGLES3::shader_cache = shaders.cache; + ShaderGLES3::cache_write_queue = shaders.cache_write_queue; + shaders.copy.init(); { @@ -8233,6 +8353,9 @@ void RasterizerStorageGLES3::initialize() { bool ggx_hq = GLOBAL_GET("rendering/quality/reflections/high_quality_ggx"); shaders.cubemap_filter.set_conditional(CubemapFilterShaderGLES3::LOW_QUALITY, !ggx_hq); shaders.particles.init(); + if (config.async_compilation_enabled) { + shaders.particles.init_async_compilation(); + } #ifdef GLES_OVER_GL glEnable(_EXT_TEXTURE_CUBE_MAP_SEAMLESS); @@ -8303,3 +8426,16 @@ void RasterizerStorageGLES3::update_dirty_resources() { RasterizerStorageGLES3::RasterizerStorageGLES3() { config.should_orphan = true; } + +RasterizerStorageGLES3::~RasterizerStorageGLES3() { + if (shaders.cache) { + memdelete(shaders.cache); + } + if (shaders.cache_write_queue) { + memdelete(shaders.cache_write_queue); + } + if (shaders.compile_queue) { + shaders.compile_queue->enqueue(0, []() { OS::get_singleton()->set_offscreen_gl_current(false); }); + memdelete(shaders.compile_queue); + } +} diff --git a/drivers/gles3/rasterizer_storage_gles3.h b/drivers/gles3/rasterizer_storage_gles3.h index bccdc6f7665..934f2938352 100644 --- a/drivers/gles3/rasterizer_storage_gles3.h +++ b/drivers/gles3/rasterizer_storage_gles3.h @@ -35,6 +35,7 @@ #include "drivers/gles_common/rasterizer_asserts.h" #include "servers/visual/rasterizer.h" #include "servers/visual/shader_language.h" +#include "shader_cache_gles3.h" #include "shader_compiler_gles3.h" #include "shader_gles3.h" @@ -49,6 +50,8 @@ void glGetBufferSubData(GLenum target, GLintptr offset, GLsizeiptr size, GLvoid *data); #endif +template +class ThreadedCallableQueue; class RasterizerCanvasGLES3; class RasterizerSceneGLES3; @@ -113,12 +116,20 @@ public: // in some cases the legacy render didn't orphan. We will mark these // so the user can switch orphaning off for them. bool should_orphan; + + bool program_binary_supported; + bool parallel_shader_compile_supported; + bool async_compilation_enabled; + bool shader_cache_enabled; } config; mutable struct Shaders { CopyShaderGLES3 copy; ShaderCompilerGLES3 compiler; + ShaderCacheGLES3 *cache; + ThreadedCallableQueue *cache_write_queue; + ThreadedCallableQueue *compile_queue; CubemapFilterShaderGLES3 cubemap_filter; @@ -547,6 +558,9 @@ public: virtual void shader_get_custom_defines(RID p_shader, Vector *p_defines) const; virtual void shader_remove_custom_define(RID p_shader, const String &p_define); + virtual void set_shader_async_hidden_forbidden(bool p_forbidden); + virtual bool is_shader_async_hidden_forbidden(); + void _update_shader(Shader *p_shader) const; void update_dirty_shaders(); @@ -1476,6 +1490,7 @@ public: float time[4]; float delta; uint64_t count; + int shader_compiles_started; } frame; @@ -1500,6 +1515,7 @@ public: bool safe_buffer_sub_data(unsigned int p_total_buffer_size, GLenum p_target, unsigned int p_offset, unsigned int p_data_size, const void *p_data, unsigned int &r_offset_after) const; RasterizerStorageGLES3(); + ~RasterizerStorageGLES3(); }; inline bool RasterizerStorageGLES3::safe_buffer_sub_data(unsigned int p_total_buffer_size, GLenum p_target, unsigned int p_offset, unsigned int p_data_size, const void *p_data, unsigned int &r_offset_after) const { diff --git a/drivers/gles3/shader_cache_gles3.cpp b/drivers/gles3/shader_cache_gles3.cpp new file mode 100644 index 00000000000..73d04246382 --- /dev/null +++ b/drivers/gles3/shader_cache_gles3.cpp @@ -0,0 +1,196 @@ +/*************************************************************************/ +/* shader_cache_gles3.cpp */ +/*************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/*************************************************************************/ +/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/*************************************************************************/ + +#include "shader_cache_gles3.h" + +#include "core/crypto/crypto_core.h" +#include "core/os/dir_access.h" +#include "core/os/os.h" +#include "core/project_settings.h" +#include "core/sort_array.h" +#include "core/ustring.h" + +String ShaderCacheGLES3::hash_program(const char *const *p_strings_platform, const LocalVector &p_vertex_strings, const LocalVector &p_fragment_strings) { + CryptoCore::SHA256Context ctx; + ctx.start(); + + // GL may already reject a binary program if harware/software has changed, but just in case + for (const char *const *s = p_strings_platform; *s; s++) { + uint8_t *bytes = reinterpret_cast(const_cast(*s)); + ctx.update(bytes, strlen(*s)); + } + for (uint32_t i = 0; i < p_vertex_strings.size(); i++) { + ctx.update((uint8_t *)p_vertex_strings[i], strlen(p_vertex_strings[i])); + } + for (uint32_t i = 0; i < p_fragment_strings.size(); i++) { + ctx.update((uint8_t *)p_fragment_strings[i], strlen(p_fragment_strings[i])); + } + + uint8_t hash[32]; + ctx.finish(hash); + return String::hex_encode_buffer(hash, 32); +} + +bool ShaderCacheGLES3::retrieve(const String &p_program_hash, uint32_t *r_format, PoolByteArray *r_data) { + if (!storage_da) { + return false; + } + + FileAccessRef fa = FileAccess::open(storage_path.plus_file(p_program_hash), FileAccess::READ_WRITE); + if (!fa) { + return false; + } + + *r_format = fa->get_32(); + uint32_t binary_len = fa->get_32(); + if (binary_len <= 0 || binary_len > 0x10000000) { + ERR_PRINT("Program binary cache file is corrupted. Ignoring and removing."); + fa->close(); + storage_da->remove(p_program_hash); + return false; + } + r_data->resize(binary_len); + PoolByteArray::Write w = r_data->write(); + if (fa->get_buffer(w.ptr(), binary_len) != static_cast(binary_len)) { + ERR_PRINT("Program binary cache file is truncated. Ignoring and removing."); + fa->close(); + storage_da->remove(p_program_hash); + return false; + } + + // Force update modification time (for LRU purge) + fa->seek(0); + fa->store_32(*r_format); + + return true; +} + +void ShaderCacheGLES3::store(const String &p_program_hash, uint32_t p_program_format, const PoolByteArray &p_program_data) { + if (!storage_da) { + return; + } + + FileAccessRef fa = FileAccess::open(storage_path.plus_file(p_program_hash), FileAccess::WRITE); + ERR_FAIL_COND(!fa); + fa->store_32(p_program_format); + fa->store_32(p_program_data.size()); + PoolByteArray::Read r = p_program_data.read(); + fa->store_buffer(r.ptr(), p_program_data.size()); +} + +void ShaderCacheGLES3::remove(const String &p_program_hash) { + if (!storage_da) { + return; + } + + storage_da->remove(p_program_hash); +} + +void ShaderCacheGLES3::_purge_excess() { + if (!storage_da) { + return; + } + + struct Entry { + String name; + uint64_t timestamp; + uint64_t size; + + bool operator<(const Entry &p_rhs) const { + return timestamp < p_rhs.timestamp; + } + }; + LocalVector entries; + uint64_t total_size = 0; + + ERR_FAIL_COND(storage_da->list_dir_begin() != OK); + while (true) { + String f = storage_da->get_next(); + if (f == "") { + break; + } + if (storage_da->current_is_dir()) { + continue; + } + String path = storage_da->get_current_dir().plus_file(f); + FileAccessRef fa = FileAccess::open(path, FileAccess::READ); + ERR_CONTINUE(!fa); + + Entry entry; + entry.name = f; + entry.timestamp = FileAccess::get_modified_time(path); + entry.size = fa->get_len(); + entries.push_back(entry); + total_size += entry.size; + } + storage_da->list_dir_end(); + + print_verbose("Shader cache size: " + itos(total_size / (1024 * 1024)) + " MiB (max. is " + (itos(storage_size / (1024 * 1024))) + " MiB)"); + if (total_size > storage_size) { + print_verbose("Purging LRU from shader cache."); + SortArray().sort(entries.ptr(), entries.size()); + for (uint32_t i = 0; i < entries.size(); i++) { + storage_da->remove(entries[i].name); + total_size -= entries[i].size; + if (total_size <= storage_size) { + break; + } + } + } +} + +ShaderCacheGLES3::ShaderCacheGLES3() { + storage_size = (int)GLOBAL_GET("rendering/gles3/shaders/shader_cache_size_mb") * 1024 * 1024; + + storage_da = DirAccess::create(DirAccess::ACCESS_FILESYSTEM); + storage_path = OS::get_singleton()->get_cache_path().plus_file(OS::get_singleton()->get_godot_dir_name()).plus_file("shaders"); + + print_verbose("Shader cache path: " + storage_path); + if (storage_da->make_dir_recursive(storage_path) != OK) { + ERR_PRINT("Couldn't create shader cache directory. Shader cache disabled."); + memdelete(storage_da); + storage_da = nullptr; + return; + } + if (storage_da->change_dir(storage_path) != OK) { + ERR_PRINT("Couldn't open shader cache directory. Shader cache disabled."); + memdelete(storage_da); + storage_da = nullptr; + return; + } + + _purge_excess(); +} + +ShaderCacheGLES3::~ShaderCacheGLES3() { + if (storage_da) { + memdelete(storage_da); + } +} diff --git a/drivers/gles3/shader_cache_gles3.h b/drivers/gles3/shader_cache_gles3.h new file mode 100644 index 00000000000..bd5f1287d1b --- /dev/null +++ b/drivers/gles3/shader_cache_gles3.h @@ -0,0 +1,58 @@ +/*************************************************************************/ +/* shader_cache_gles3.h */ +/*************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/*************************************************************************/ +/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/*************************************************************************/ + +#ifndef SHADER_CACHE_GLES3_H +#define SHADER_CACHE_GLES3_H + +#include "core/local_vector.h" +#include "core/reference.h" + +class DirAccess; +class String; + +class ShaderCacheGLES3 { + DirAccess *storage_da; + String storage_path; + uint64_t storage_size = 0; + + void _purge_excess(); + +public: + static String hash_program(const char *const *p_platform_strings, const LocalVector &p_vertex_strings, const LocalVector &p_fragment_strings); + + bool retrieve(const String &p_program_hash, uint32_t *r_format, PoolByteArray *r_data); + void store(const String &p_program_hash, uint32_t p_program_format, const PoolByteArray &p_program_data); + void remove(const String &p_program_hash); + + ShaderCacheGLES3(); + ~ShaderCacheGLES3(); +}; + +#endif diff --git a/drivers/gles3/shader_compiler_gles3.cpp b/drivers/gles3/shader_compiler_gles3.cpp index 7966fe00856..a630395e61a 100644 --- a/drivers/gles3/shader_compiler_gles3.cpp +++ b/drivers/gles3/shader_compiler_gles3.cpp @@ -448,8 +448,8 @@ String ShaderCompilerGLES3::_dump_node_code(const SL::Node *p_node, int p_level, int max_texture_uniforms = 0; int max_uniforms = 0; - for (Map::Element *E = pnode->uniforms.front(); E; E = E->next()) { - if (SL::is_sampler_type(E->get().type)) { + for (OrderedHashMap::Element E = pnode->uniforms.front(); E; E = E.next()) { + if (SL::is_sampler_type(E.get().type)) { max_texture_uniforms++; } else { max_uniforms++; @@ -468,34 +468,34 @@ String ShaderCompilerGLES3::_dump_node_code(const SL::Node *p_node, int p_level, uniform_defines.resize(max_uniforms); bool uses_uniforms = false; - for (Map::Element *E = pnode->uniforms.front(); E; E = E->next()) { + for (OrderedHashMap::Element E = pnode->uniforms.front(); E; E = E.next()) { String ucode; - if (SL::is_sampler_type(E->get().type)) { + if (SL::is_sampler_type(E.get().type)) { ucode = "uniform "; } - ucode += _prestr(E->get().precision); - ucode += _typestr(E->get().type); - ucode += " " + _mkid(E->key()); + ucode += _prestr(E.get().precision); + ucode += _typestr(E.get().type); + ucode += " " + _mkid(E.key()); ucode += ";\n"; - if (SL::is_sampler_type(E->get().type)) { + if (SL::is_sampler_type(E.get().type)) { r_gen_code.vertex_global += ucode; r_gen_code.fragment_global += ucode; - r_gen_code.texture_uniforms.write[E->get().texture_order] = _mkid(E->key()); - r_gen_code.texture_hints.write[E->get().texture_order] = E->get().hint; - r_gen_code.texture_types.write[E->get().texture_order] = E->get().type; + r_gen_code.texture_uniforms.write[E.get().texture_order] = _mkid(E.key()); + r_gen_code.texture_hints.write[E.get().texture_order] = E.get().hint; + r_gen_code.texture_types.write[E.get().texture_order] = E.get().type; } else { if (!uses_uniforms) { r_gen_code.defines.push_back(String("#define USE_MATERIAL\n").ascii()); uses_uniforms = true; } - uniform_defines.write[E->get().order] = ucode; - uniform_sizes.write[E->get().order] = _get_datatype_size(E->get().type); - uniform_alignments.write[E->get().order] = _get_datatype_alignment(E->get().type); + uniform_defines.write[E.get().order] = ucode; + uniform_sizes.write[E.get().order] = _get_datatype_size(E.get().type); + uniform_alignments.write[E.get().order] = _get_datatype_alignment(E.get().type); } - p_actions.uniforms->insert(E->key(), E->get()); + p_actions.uniforms->insert(E.key(), E.get()); } for (int i = 0; i < max_uniforms; i++) { @@ -523,20 +523,20 @@ String ShaderCompilerGLES3::_dump_node_code(const SL::Node *p_node, int p_level, List> var_frag_to_light; - for (Map::Element *E = pnode->varyings.front(); E; E = E->next()) { - if (E->get().stage == SL::ShaderNode::Varying::STAGE_FRAGMENT_TO_LIGHT || E->get().stage == SL::ShaderNode::Varying::STAGE_FRAGMENT) { - var_frag_to_light.push_back(Pair(E->key(), E->get())); - fragment_varyings.insert(E->key()); + for (OrderedHashMap::Element E = pnode->varyings.front(); E; E = E.next()) { + if (E.get().stage == SL::ShaderNode::Varying::STAGE_FRAGMENT_TO_LIGHT || E.get().stage == SL::ShaderNode::Varying::STAGE_FRAGMENT) { + var_frag_to_light.push_back(Pair(E.key(), E.get())); + fragment_varyings.insert(E.key()); continue; } String vcode; - String interp_mode = _interpstr(E->get().interpolation); - vcode += _prestr(E->get().precision); - vcode += _typestr(E->get().type); - vcode += " " + _mkid(E->key()); - if (E->get().array_size > 0) { + String interp_mode = _interpstr(E.get().interpolation); + vcode += _prestr(E.get().precision); + vcode += _typestr(E.get().type); + vcode += " " + _mkid(E.key()); + if (E.get().array_size > 0) { vcode += "["; - vcode += itos(E->get().array_size); + vcode += itos(E.get().array_size); vcode += "]"; } vcode += ";\n"; diff --git a/drivers/gles3/shader_gles3.cpp b/drivers/gles3/shader_gles3.cpp index af03b0b6a02..79cabe43cfa 100644 --- a/drivers/gles3/shader_gles3.cpp +++ b/drivers/gles3/shader_gles3.cpp @@ -30,7 +30,12 @@ #include "shader_gles3.h" +#include "core/local_vector.h" +#include "core/os/os.h" #include "core/print_string.h" +#include "core/threaded_callable_queue.h" +#include "drivers/gles3/shader_cache_gles3.h" +#include "servers/visual_server.h" //#define DEBUG_OPENGL @@ -50,56 +55,33 @@ #endif ShaderGLES3 *ShaderGLES3::active = nullptr; +SelfList::List ShaderGLES3::versions_compiling; + +ShaderCacheGLES3 *ShaderGLES3::shader_cache; +ThreadedCallableQueue *ShaderGLES3::cache_write_queue; + +ThreadedCallableQueue *ShaderGLES3::compile_queue; +bool ShaderGLES3::parallel_compile_supported; + +bool ShaderGLES3::async_hidden_forbidden; +int *ShaderGLES3::compiles_started_this_frame; +int ShaderGLES3::max_simultaneous_compiles; +#ifdef DEBUG_ENABLED +bool ShaderGLES3::log_active_async_compiles_count; +#endif + +int ShaderGLES3::active_compiles_count; +uint64_t ShaderGLES3::current_frame; //#define DEBUG_SHADER #ifdef DEBUG_SHADER - #define DEBUG_PRINT(m_text) print_line(m_text); - #else - #define DEBUG_PRINT(m_text) - #endif -void ShaderGLES3::bind_uniforms() { - if (!uniforms_dirty) { - return; - }; - - // upload default uniforms - const Map::Element *E = uniform_defaults.front(); - - while (E) { - int idx = E->key(); - int location = version->uniform_location[idx]; - - if (location < 0) { - E = E->next(); - continue; - } - - const Variant &v = E->value(); - _set_uniform_variant(location, v); - //print_line("uniform "+itos(location)+" value "+v+ " type "+Variant::get_type_name(v.get_type())); - E = E->next(); - }; - - const Map::Element *C = uniform_cameras.front(); - while (C) { - int location = version->uniform_location[C->key()]; - if (location < 0) { - C = C->next(); - continue; - } - - glUniformMatrix4fv(location, 1, false, &(C->get().matrix[0][0])); - C = C->next(); - }; - - uniforms_dirty = false; -} +#define _EXT_COMPLETION_STATUS 0x91B1 GLint ShaderGLES3::get_uniform_location(int p_index) const { ERR_FAIL_COND_V(!version, -1); @@ -108,44 +90,338 @@ GLint ShaderGLES3::get_uniform_location(int p_index) const { } bool ShaderGLES3::bind() { - if (active != this || !version || new_conditional_version.key != conditional_version.key) { - conditional_version = new_conditional_version; - version = get_current_version(); - } else { - return false; + return _bind(false); +} + +bool ShaderGLES3::_bind(bool p_binding_fallback) { + // Same base shader and version valid version? + if (active == this && version) { + if (new_conditional_version.code_version == conditional_version.code_version) { + if (new_conditional_version.version == conditional_version.version) { + return false; + } + // From ubershader to ubershader of the same code? + if ((conditional_version.version & VersionKey::UBERSHADER_FLAG) && (new_conditional_version.version & VersionKey::UBERSHADER_FLAG)) { + conditional_version.version = new_conditional_version.version; + return false; + } + } } + bool must_be_ready_now = !is_async_compilation_supported() || p_binding_fallback; + + conditional_version = new_conditional_version; + version = get_current_version(must_be_ready_now); ERR_FAIL_COND_V(!version, false); - if (!version->ok) { //broken, unable to bind (do not throw error, you saw it before already when it failed compilation). - glUseProgram(0); - return false; + bool ready = false; + ready = _process_program_state(version, must_be_ready_now); + if (version->compile_status == Version::COMPILE_STATUS_RESTART_NEEDED) { + get_current_version(must_be_ready_now); // Trigger recompile + ready = _process_program_state(version, must_be_ready_now); } - glUseProgram(version->id); +#ifdef DEBUG_ENABLED + if (ready) { + if (VS::get_singleton()->is_force_shader_fallbacks_enabled() && !must_be_ready_now) { + ready = false; + } + } +#endif - DEBUG_TEST_ERROR("Use Program"); + if (ready) { + glUseProgram(version->ids.main); + if (!version->uniforms_ready) { + _setup_uniforms(custom_code_map.getptr(conditional_version.code_version)); + version->uniforms_ready = true; + } + DEBUG_TEST_ERROR("Use Program"); + active = this; + return true; + } else if (!must_be_ready_now && version->async_mode == ASYNC_MODE_VISIBLE && !p_binding_fallback && get_ubershader_flags_uniform() != -1) { + // We can and have to fall back to the ubershader + return _bind_ubershader(); + } else { + // We have a compile error or must fall back by skipping render + unbind(); + return false; + } +} - active = this; - uniforms_dirty = true; - return true; +bool ShaderGLES3::_bind_ubershader() { +#ifdef DEBUG_ENABLED + ERR_FAIL_COND_V(!is_async_compilation_supported(), false); + ERR_FAIL_COND_V(get_ubershader_flags_uniform() == -1, false); +#endif + new_conditional_version.version |= VersionKey::UBERSHADER_FLAG; + bool bound = _bind(true); + int conditionals_uniform = _get_uniform(get_ubershader_flags_uniform()); +#ifdef DEBUG_ENABLED + ERR_FAIL_COND_V(conditionals_uniform == -1, false); +#endif + new_conditional_version.version &= ~VersionKey::UBERSHADER_FLAG; + glUniform1i(conditionals_uniform, new_conditional_version.version); + return bound; +} + +void ShaderGLES3::advance_async_shaders_compilation() { + SelfList *curr = versions_compiling.first(); + while (curr) { + SelfList *next = curr->next(); + + ShaderGLES3::Version *v = curr->self(); + // Only if it didn't already have a chance to be processed in this frame + if (v->last_frame_processed != current_frame) { + v->shader->_process_program_state(v, false); + } + + curr = next; + } +} + +void ShaderGLES3::_log_active_compiles() { +#ifdef DEBUG_ENABLED + if (log_active_async_compiles_count) { + ERR_FAIL_COND(active_compiles_count < 0); + if (parallel_compile_supported) { + print_line("Async. shader compiles: " + itos(active_compiles_count)); + } else if (compile_queue) { + print_line("Queued shader compiles: " + itos(active_compiles_count)); + } else { + CRASH_NOW(); + } + } +#endif +} + +bool ShaderGLES3::_process_program_state(Version *p_version, bool p_async_forbidden) { + bool ready = false; + bool run_next_step = true; + while (run_next_step) { + run_next_step = false; + switch (p_version->compile_status) { + case Version::COMPILE_STATUS_OK: { + // Yeaaah! + ready = true; + } break; + case Version::COMPILE_STATUS_ERROR: { + // Sad, but we have to accept it + } break; + case Version::COMPILE_STATUS_PENDING: + case Version::COMPILE_STATUS_RESTART_NEEDED: { + // These lead to nowhere unless other piece of code starts the compile process + } break; + case Version::COMPILE_STATUS_SOURCE_PROVIDED: { + int start_compiles_count = p_async_forbidden ? 2 : 0; + if (!start_compiles_count) { + int free_async_slots = MAX(0, MIN(max_simultaneous_compiles - active_compiles_count, max_simultaneous_compiles - *compiles_started_this_frame)); + start_compiles_count = MIN(2, free_async_slots); + } + if (start_compiles_count >= 1) { + glCompileShader(p_version->ids.vert); + if (start_compiles_count == 1) { + p_version->compile_status = Version::COMPILE_STATUS_COMPILING_VERTEX; + } else { + glCompileShader(p_version->ids.frag); + p_version->compile_status = Version::COMPILE_STATUS_COMPILING_VERTEX_AND_FRAGMENT; + } + if (!p_async_forbidden) { + versions_compiling.add_last(&p_version->compiling_list); + // Vertex and fragment shaders take independent compile slots + active_compiles_count += start_compiles_count; + _log_active_compiles(); + } + (*compiles_started_this_frame) += start_compiles_count; + run_next_step = p_async_forbidden; + } + } break; + case Version::COMPILE_STATUS_COMPILING_VERTEX: { + bool must_compile_frag_now = p_async_forbidden; + if (!must_compile_frag_now) { + if (active_compiles_count < max_simultaneous_compiles && *compiles_started_this_frame < max_simultaneous_compiles) { + must_compile_frag_now = true; + } + } + if (must_compile_frag_now) { + glCompileShader(p_version->ids.frag); + if (p_version->compiling_list.in_list()) { + active_compiles_count++; + _log_active_compiles(); + } + p_version->compile_status = Version::COMPILE_STATUS_COMPILING_VERTEX_AND_FRAGMENT; + } else if (parallel_compile_supported) { + GLint completed = 0; + glGetShaderiv(p_version->ids.vert, _EXT_COMPLETION_STATUS, &completed); + if (completed) { + // Not touching compiles count since the same slot used for vertex is now used for fragment + glCompileShader(p_version->ids.frag); + p_version->compile_status = Version::COMPILE_STATUS_COMPILING_FRAGMENT; + } + } + run_next_step = p_async_forbidden; + } break; + case Version::COMPILE_STATUS_COMPILING_FRAGMENT: + case Version::COMPILE_STATUS_COMPILING_VERTEX_AND_FRAGMENT: { + bool must_complete_now = p_async_forbidden; + if (!must_complete_now && parallel_compile_supported) { + GLint vertex_completed = 0; + if (p_version->compile_status == Version::COMPILE_STATUS_COMPILING_FRAGMENT) { + vertex_completed = true; + } else { + glGetShaderiv(p_version->ids.vert, _EXT_COMPLETION_STATUS, &vertex_completed); + if (p_version->compiling_list.in_list()) { + active_compiles_count--; + _log_active_compiles(); + } + p_version->compile_status = Version::COMPILE_STATUS_COMPILING_FRAGMENT; + } + if (vertex_completed) { + GLint frag_completed = 0; + glGetShaderiv(p_version->ids.frag, _EXT_COMPLETION_STATUS, &frag_completed); + if (frag_completed) { + must_complete_now = true; + } + } + } + if (must_complete_now) { + bool must_save_to_cache = p_version->version_key.is_subject_to_caching() && p_version->program_binary.source != Version::ProgramBinary::SOURCE_CACHE && shader_cache; + bool ok = p_version->shader->_complete_compile(p_version->ids, must_save_to_cache); + if (ok) { + p_version->compile_status = Version::COMPILE_STATUS_LINKING; + run_next_step = p_async_forbidden; + } else { + p_version->compile_status = Version::COMPILE_STATUS_ERROR; + if (p_version->compiling_list.in_list()) { + p_version->compiling_list.remove_from_list(); + active_compiles_count--; + _log_active_compiles(); + } + } + } + } break; + case Version::COMPILE_STATUS_PROCESSING_AT_QUEUE: { + // This is from the async. queue + switch (p_version->program_binary.result_from_queue.get()) { + case -1: { // Error + p_version->compile_status = Version::COMPILE_STATUS_ERROR; + p_version->compiling_list.remove_from_list(); + active_compiles_count--; + _log_active_compiles(); + } break; + case 0: { // In progress + if (p_async_forbidden) { + OS::get_singleton()->delay_usec(1000); + run_next_step = true; + } + } break; + case 1: { // Complete + p_version->compile_status = Version::COMPILE_STATUS_BINARY_READY; + run_next_step = true; + } break; + } + } break; + case Version::COMPILE_STATUS_BINARY_READY_FROM_CACHE: { + bool eat_binary_now = p_async_forbidden; + if (!eat_binary_now) { + if (active_compiles_count < max_simultaneous_compiles && *compiles_started_this_frame < max_simultaneous_compiles) { + eat_binary_now = true; + } + } + if (eat_binary_now) { + p_version->compile_status = Version::COMPILE_STATUS_BINARY_READY; + run_next_step = true; + if (!p_async_forbidden) { + versions_compiling.add_last(&p_version->compiling_list); + active_compiles_count++; + _log_active_compiles(); + (*compiles_started_this_frame)++; + } + } + } break; + case Version::COMPILE_STATUS_BINARY_READY: { + PoolByteArray::Read r = p_version->program_binary.data.read(); + glProgramBinary(p_version->ids.main, static_cast(p_version->program_binary.format), r.ptr(), p_version->program_binary.data.size()); + p_version->compile_status = Version::COMPILE_STATUS_LINKING; + run_next_step = true; + } break; + case Version::COMPILE_STATUS_LINKING: { + bool must_complete_now = p_async_forbidden || p_version->program_binary.source == Version::ProgramBinary::SOURCE_QUEUE; + if (!must_complete_now && parallel_compile_supported) { + GLint link_completed; + glGetProgramiv(p_version->ids.main, _EXT_COMPLETION_STATUS, &link_completed); + must_complete_now = link_completed; + } + if (must_complete_now) { + bool must_save_to_cache = p_version->version_key.is_subject_to_caching() && p_version->program_binary.source != Version::ProgramBinary::SOURCE_CACHE && shader_cache; + bool ok = false; + if (must_save_to_cache && p_version->program_binary.source == Version::ProgramBinary::SOURCE_LOCAL) { + ok = p_version->shader->_complete_link(p_version->ids, &p_version->program_binary.format, &p_version->program_binary.data); + } else { + ok = p_version->shader->_complete_link(p_version->ids); +#ifdef DEBUG_ENABLED +#if 0 + // Simulate GL rejecting program from cache + if (p_version->program_binary.source == Version::ProgramBinary::SOURCE_CACHE) { + ok = false; + } +#endif +#endif + } + if (ok) { + if (must_save_to_cache) { + String &tmp_hash = p_version->program_binary.cache_hash; + GLenum &tmp_format = p_version->program_binary.format; + PoolByteArray &tmp_data = p_version->program_binary.data; + cache_write_queue->enqueue(p_version->ids.main, [=]() { + shader_cache->store(tmp_hash, static_cast(tmp_format), tmp_data); + }); + } + p_version->compile_status = Version::COMPILE_STATUS_OK; + ready = true; + } else { + if (p_version->program_binary.source == Version::ProgramBinary::SOURCE_CACHE) { +#ifdef DEBUG_ENABLED + WARN_PRINT("Program binary from cache has been rejected by the GL. Removing from cache."); +#endif + shader_cache->remove(p_version->program_binary.cache_hash); + p_version->compile_status = Version::COMPILE_STATUS_RESTART_NEEDED; + } else { + if (p_version->program_binary.source == Version::ProgramBinary::SOURCE_QUEUE) { + ERR_PRINT("Program binary from compile queue has been rejected by the GL. Bug?"); + } + p_version->compile_status = Version::COMPILE_STATUS_ERROR; + } + } + p_version->program_binary.data = PoolByteArray(); + p_version->program_binary.cache_hash.clear(); + if (p_version->compiling_list.in_list()) { + p_version->compiling_list.remove_from_list(); + active_compiles_count--; + _log_active_compiles(); + } + } + } break; + } + } + return ready; } void ShaderGLES3::unbind() { version = nullptr; glUseProgram(0); - uniforms_dirty = true; active = nullptr; } -static void _display_error_with_code(const String &p_error, const Vector &p_code) { +static void _display_error_with_code(const String &p_error, GLuint p_shader_id) { int line = 1; - String total_code; - for (int i = 0; i < p_code.size(); i++) { - total_code += String(p_code[i]); - } + GLint source_len; + glGetShaderiv(p_shader_id, GL_SHADER_SOURCE_LENGTH, &source_len); + LocalVector source_buffer; + source_buffer.resize(source_len); + glGetShaderSource(p_shader_id, source_len, NULL, source_buffer.ptr()); + String total_code(source_buffer.ptr()); Vector lines = String(total_code).split("\n"); for (int j = 0; j < lines.size(); j++) { @@ -156,157 +432,425 @@ static void _display_error_with_code(const String &p_error, const Vector lines = s.split("\n"); + s.clear(); + for (int i = 0; i < lines.size(); ++i) { + if (lines[i].ends_with("//ubershader-skip")) { + continue; + } else if (lines[i].ends_with("//ubershader-runtime")) { + // Move from the preprocessor world to the true code realm + String l = lines[i].trim_suffix("//ubershader-runtime").strip_edges(); + { + // Ignore other comments + Vector pieces = l.split("//"); + l = pieces[0].strip_edges(); + } + if (l == "#else") { + s += "} else {\n"; + } else if (l == "#endif") { + s += "}\n"; + } else if (l.begins_with("#ifdef")) { + Vector pieces = l.split_spaces(); + CRASH_COND(pieces.size() != 2); + s += "if ((ubershader_flags & FLAG_" + pieces[1] + ") != 0) {\n"; + } else if (l.begins_with("#ifndef")) { + Vector pieces = l.split_spaces(); + CRASH_COND(pieces.size() != 2); + s += "if ((ubershader_flags & FLAG_" + pieces[1] + ") == 0) {\n"; + } else { + CRASH_NOW_MSG("The shader template is using too complex syntax in a line marked with ubershader-runtime."); + } + continue; + } + s += lines[i] + "\n"; + } + return s.ascii(); +} +// Possible source-status pairs after this: +// Local - Source provided +// Queue - Processing / Binary ready / Error +// Cache - Binary ready +ShaderGLES3::Version *ShaderGLES3::get_current_version(bool &r_async_forbidden) { + VersionKey effective_version; + effective_version.key = conditional_version.key; + // Store and look up ubershader with all other version bits set to zero + if ((conditional_version.version & VersionKey::UBERSHADER_FLAG)) { + effective_version.version = VersionKey::UBERSHADER_FLAG; + } + + Version *_v = version_map.getptr(effective_version); + CustomCode *cc = nullptr; if (_v) { - if (conditional_version.code_version != 0) { - CustomCode *cc = custom_code_map.getptr(conditional_version.code_version); - ERR_FAIL_COND_V(!cc, _v); - if (cc->version == _v->code_version) { + if (_v->compile_status == Version::COMPILE_STATUS_RESTART_NEEDED) { + _v->program_binary.source = Version::ProgramBinary::SOURCE_NONE; + } else { + if (effective_version.code_version != 0) { + cc = custom_code_map.getptr(effective_version.code_version); + ERR_FAIL_COND_V(!cc, _v); + if (cc->version == _v->code_version) { + return _v; + } + } else { return _v; } - } else { - return _v; } } if (!_v) { - version_map[conditional_version] = Version(); + _v = &version_map[effective_version]; + _v->version_key = effective_version; + _v->shader = this; + _v->uniform_location = memnew_arr(GLint, uniform_count); } - Version &v = version_map[conditional_version]; + Version &v = *_v; - if (!_v) { - v.uniform_location = memnew_arr(GLint, uniform_count); - - } else { - if (v.ok) { - //bye bye shaders - glDeleteShader(v.vert_id); - glDeleteShader(v.frag_id); - glDeleteProgram(v.id); - v.id = 0; - } - } - - v.ok = false; /* SETUP CONDITIONALS */ - Vector strings; + LocalVector strings_common; #ifdef GLES_OVER_GL - strings.push_back("#version 330\n"); - strings.push_back("#define GLES_OVER_GL\n"); + strings_common.push_back("#version 330\n"); + strings_common.push_back("#define GLES_OVER_GL\n"); #else - strings.push_back("#version 300 es\n"); + strings_common.push_back("#version 300 es\n"); #endif #ifdef ANDROID_ENABLED - strings.push_back("#define ANDROID_ENABLED\n"); + strings_common.push_back("#define ANDROID_ENABLED\n"); #endif for (int i = 0; i < custom_defines.size(); i++) { - strings.push_back(custom_defines[i].get_data()); - strings.push_back("\n"); + strings_common.push_back(custom_defines[i].get_data()); + strings_common.push_back("\n"); } - for (int j = 0; j < conditional_count; j++) { - bool enable = ((1 << j) & conditional_version.version); - strings.push_back(enable ? conditional_defines[j] : ""); + LocalVector flag_macros; + bool build_ubershader = get_ubershader_flags_uniform() != -1 && (effective_version.version & VersionKey::UBERSHADER_FLAG); + if (build_ubershader) { + strings_common.push_back("#define IS_UBERSHADER\n"); + for (int i = 0; i < conditional_count; i++) { + String s = vformat("#define FLAG_%s (1 << %d)\n", String(conditional_defines[i]).strip_edges().trim_prefix("#define "), i); + CharString cs = s.ascii(); + flag_macros.push_back(cs); + strings_common.push_back(cs.ptr()); + } + strings_common.push_back("\n"); + } else { + for (int i = 0; i < conditional_count; i++) { + bool enable = ((1 << i) & effective_version.version); + strings_common.push_back(enable ? conditional_defines[i] : ""); - if (enable) { - DEBUG_PRINT(conditional_defines[j]); + if (enable) { + DEBUG_PRINT(conditional_defines[i]); + } } } //keep them around during the function - CharString code_string; - CharString code_string2; - CharString code_globals; - CharString material_string; + struct { + CharString code_string; + CharString code_globals; + CharString material_string; + } vert; + struct { + CharString code_string; + CharString code_string2; + CharString code_globals; + CharString material_string; + } frag; - CustomCode *cc = nullptr; - - if (conditional_version.code_version > 0) { - //do custom code related stuff - - ERR_FAIL_COND_V(!custom_code_map.has(conditional_version.code_version), nullptr); - cc = &custom_code_map[conditional_version.code_version]; - v.code_version = cc->version; + if (effective_version.code_version != 0) { + ERR_FAIL_COND_V(!custom_code_map.has(effective_version.code_version), nullptr); + if (!cc) { + cc = &custom_code_map[effective_version.code_version]; + } + if (cc->version != v.code_version) { + v.code_version = cc->version; + v.async_mode = cc->async_mode; + v.uniforms_ready = false; + } } /* CREATE PROGRAM */ - v.id = glCreateProgram(); + v.ids.main = glCreateProgram(); - ERR_FAIL_COND_V(v.id == 0, nullptr); + ERR_FAIL_COND_V(v.ids.main == 0, nullptr); + + // To create the ubershader we need to modify the static strings; + // they'll go in this array + LocalVector filtered_strings; /* VERTEX SHADER */ if (cc) { for (int i = 0; i < cc->custom_defines.size(); i++) { - strings.push_back(cc->custom_defines[i].get_data()); + strings_common.push_back(cc->custom_defines[i].get_data()); DEBUG_PRINT("CD #" + itos(i) + ": " + String(cc->custom_defines[i])); } } - int strings_base_size = strings.size(); + LocalVector strings_vertex(strings_common); //vertex precision is high - strings.push_back("precision highp float;\n"); - strings.push_back("precision highp int;\n"); + strings_vertex.push_back("precision highp float;\n"); + strings_vertex.push_back("precision highp int;\n"); #ifndef GLES_OVER_GL - strings.push_back("precision highp sampler2D;\n"); - strings.push_back("precision highp samplerCube;\n"); - strings.push_back("precision highp sampler2DArray;\n"); + strings_vertex.push_back("precision highp sampler2D;\n"); + strings_vertex.push_back("precision highp samplerCube;\n"); + strings_vertex.push_back("precision highp sampler2DArray;\n"); #endif - strings.push_back(vertex_code0.get_data()); - - if (cc) { - material_string = cc->uniforms.ascii(); - strings.push_back(material_string.get_data()); + if (build_ubershader) { + CharString s = _prepare_ubershader_chunk(vertex_code0); + filtered_strings.push_back(s); + strings_vertex.push_back(s.get_data()); + } else { + strings_vertex.push_back(vertex_code0.get_data()); } - strings.push_back(vertex_code1.get_data()); - if (cc) { - code_globals = cc->vertex_globals.ascii(); - strings.push_back(code_globals.get_data()); + vert.material_string = cc->uniforms.ascii(); + strings_vertex.push_back(vert.material_string.get_data()); } - strings.push_back(vertex_code2.get_data()); - - if (cc) { - code_string = cc->vertex.ascii(); - strings.push_back(code_string.get_data()); + if (build_ubershader) { + CharString s = _prepare_ubershader_chunk(vertex_code1); + filtered_strings.push_back(s); + strings_vertex.push_back(s.get_data()); + } else { + strings_vertex.push_back(vertex_code1.get_data()); + } + + if (cc) { + vert.code_globals = cc->vertex_globals.ascii(); + strings_vertex.push_back(vert.code_globals.get_data()); + } + + if (build_ubershader) { + CharString s = _prepare_ubershader_chunk(vertex_code2); + filtered_strings.push_back(s); + strings_vertex.push_back(s.get_data()); + } else { + strings_vertex.push_back(vertex_code2.get_data()); + } + + if (cc) { + vert.code_string = cc->vertex.ascii(); + strings_vertex.push_back(vert.code_string.get_data()); + } + + if (build_ubershader) { + CharString s = _prepare_ubershader_chunk(vertex_code3); + filtered_strings.push_back(s); + strings_vertex.push_back(s.get_data()); + } else { + strings_vertex.push_back(vertex_code3.get_data()); } - strings.push_back(vertex_code3.get_data()); #ifdef DEBUG_SHADER - DEBUG_PRINT("\nVertex Code:\n\n" + String(code_string.get_data())); - for (int i = 0; i < strings.size(); i++) { - //print_line("vert strings "+itos(i)+":"+String(strings[i])); + for (int i = 0; i < strings_vertex.size(); i++) { + //print_line("vert strings "+itos(i)+":"+String(strings_vertex[i])); } #endif - v.vert_id = glCreateShader(GL_VERTEX_SHADER); - glShaderSource(v.vert_id, strings.size(), &strings[0], nullptr); - glCompileShader(v.vert_id); + /* FRAGMENT SHADER */ + LocalVector strings_fragment(strings_common); + + //fragment precision is medium + strings_fragment.push_back("precision highp float;\n"); + strings_fragment.push_back("precision highp int;\n"); +#ifndef GLES_OVER_GL + strings_fragment.push_back("precision highp sampler2D;\n"); + strings_fragment.push_back("precision highp samplerCube;\n"); + strings_fragment.push_back("precision highp sampler2DArray;\n"); +#endif + + if (build_ubershader) { + CharString s = _prepare_ubershader_chunk(fragment_code0); + filtered_strings.push_back(s); + strings_fragment.push_back(s.get_data()); + } else { + strings_fragment.push_back(fragment_code0.get_data()); + } + + if (cc) { + frag.material_string = cc->uniforms.ascii(); + strings_fragment.push_back(frag.material_string.get_data()); + } + + if (build_ubershader) { + CharString s = _prepare_ubershader_chunk(fragment_code1); + filtered_strings.push_back(s); + strings_fragment.push_back(s.get_data()); + } else { + strings_fragment.push_back(fragment_code1.get_data()); + } + + if (cc) { + frag.code_globals = cc->fragment_globals.ascii(); + strings_fragment.push_back(frag.code_globals.get_data()); + } + + if (build_ubershader) { + CharString s = _prepare_ubershader_chunk(fragment_code2); + filtered_strings.push_back(s); + strings_fragment.push_back(s.get_data()); + } else { + strings_fragment.push_back(fragment_code2.get_data()); + } + + if (cc) { + frag.code_string = cc->light.ascii(); + strings_fragment.push_back(frag.code_string.get_data()); + } + + if (build_ubershader) { + CharString s = _prepare_ubershader_chunk(fragment_code3); + filtered_strings.push_back(s); + strings_fragment.push_back(s.get_data()); + } else { + strings_fragment.push_back(fragment_code3.get_data()); + } + + if (cc) { + frag.code_string2 = cc->fragment.ascii(); + strings_fragment.push_back(frag.code_string2.get_data()); + } + + if (build_ubershader) { + CharString s = _prepare_ubershader_chunk(fragment_code4); + filtered_strings.push_back(s); + strings_fragment.push_back(s.get_data()); + } else { + strings_fragment.push_back(fragment_code4.get_data()); + } + +#ifdef DEBUG_SHADER + DEBUG_PRINT("\nFragment Globals:\n\n" + String(code_globals.get_data())); + DEBUG_PRINT("\nFragment Code:\n\n" + String(code_string2.get_data())); + for (int i = 0; i < strings_fragment.size(); i++) { + //print_line("frag strings "+itos(i)+":"+String(strings_fragment[i])); + } +#endif + + if (!r_async_forbidden) { + r_async_forbidden = + (v.async_mode == ASYNC_MODE_HIDDEN && async_hidden_forbidden) || + (v.async_mode == ASYNC_MODE_VISIBLE && get_ubershader_flags_uniform() == -1); + } + + bool in_cache = false; + if (shader_cache && effective_version.is_subject_to_caching()) { + const char *strings_platform[] = { + reinterpret_cast(glGetString(GL_VENDOR)), + reinterpret_cast(glGetString(GL_RENDERER)), + reinterpret_cast(glGetString(GL_VERSION)), + nullptr, + }; + v.program_binary.cache_hash = ShaderCacheGLES3::hash_program(strings_platform, strings_vertex, strings_fragment); + if (shader_cache->retrieve(v.program_binary.cache_hash, &v.program_binary.format, &v.program_binary.data)) { + in_cache = true; + v.program_binary.source = Version::ProgramBinary::SOURCE_CACHE; + v.compile_status = Version::COMPILE_STATUS_BINARY_READY_FROM_CACHE; + } + } + if (!in_cache) { + if (compile_queue && !r_async_forbidden) { + // Asynchronous compilation via queue (secondary context) + // Remarks: + // 1. We need to save vertex and fragment strings because they will not live beyond this function. + // 2. We'll create another program since the other GL context is not shared. + // We are doing it that way since GL drivers can implement context sharing via locking, which + // would render (no pun intended) this whole effort to asynchronous useless. + + auto concat_shader_strings = [](const LocalVector &p_shader_strings, LocalVector *r_out) { + r_out->clear(); + for (uint32_t i = 0; i < p_shader_strings.size(); i++) { + uint32_t initial_size = r_out->size(); + uint32_t piece_len = strlen(reinterpret_cast(p_shader_strings[i])); + r_out->resize(initial_size + piece_len + 1); + memcpy(r_out->ptr() + initial_size, p_shader_strings[i], piece_len); + *(r_out->ptr() + initial_size + piece_len) = '\n'; + } + *(r_out->ptr() + r_out->size() - 1) = '\0'; + }; + + LocalVector vertex_code; + concat_shader_strings(strings_vertex, &vertex_code); + LocalVector fragment_code; + concat_shader_strings(strings_fragment, &fragment_code); + + v.program_binary.source = Version::ProgramBinary::SOURCE_QUEUE; + v.compile_status = Version::COMPILE_STATUS_PROCESSING_AT_QUEUE; + versions_compiling.add_last(&v.compiling_list); + active_compiles_count++; + _log_active_compiles(); + (*compiles_started_this_frame)++; + + compile_queue->enqueue(v.ids.main, [this, &v, vertex_code, fragment_code]() { + Version::Ids async_ids; + async_ids.main = glCreateProgram(); + async_ids.vert = glCreateShader(GL_VERTEX_SHADER); + async_ids.frag = glCreateShader(GL_FRAGMENT_SHADER); + + LocalVector async_strings_vertex; + async_strings_vertex.push_back(vertex_code.ptr()); + LocalVector async_strings_fragment; + async_strings_fragment.push_back(fragment_code.ptr()); + + _set_source(async_ids, async_strings_vertex, async_strings_fragment); + glCompileShader(async_ids.vert); + glCompileShader(async_ids.frag); + if (_complete_compile(async_ids, true) && _complete_link(async_ids, &v.program_binary.format, &v.program_binary.data)) { + glDeleteShader(async_ids.frag); + glDeleteShader(async_ids.vert); + glDeleteProgram(async_ids.main); + v.program_binary.result_from_queue.set(1); + } else { + v.program_binary.result_from_queue.set(0); + } + }); + } else { + // Synchronous compilation, or async. via native support + v.ids.vert = glCreateShader(GL_VERTEX_SHADER); + v.ids.frag = glCreateShader(GL_FRAGMENT_SHADER); + _set_source(v.ids, strings_vertex, strings_fragment); + v.program_binary.source = Version::ProgramBinary::SOURCE_LOCAL; + v.compile_status = Version::COMPILE_STATUS_SOURCE_PROVIDED; + } + } + + if (cc) { + cc->versions.insert(effective_version.version); + } + + return &v; +} + +void ShaderGLES3::_set_source(Version::Ids p_ids, const LocalVector &p_vertex_strings, const LocalVector &p_fragment_strings) const { + glShaderSource(p_ids.vert, p_vertex_strings.size(), p_vertex_strings.ptr(), nullptr); + glShaderSource(p_ids.frag, p_fragment_strings.size(), p_fragment_strings.ptr(), nullptr); +} + +bool ShaderGLES3::_complete_compile(Version::Ids p_ids, bool p_retrievable) const { GLint status; - glGetShaderiv(v.vert_id, GL_COMPILE_STATUS, &status); + glGetShaderiv(p_ids.vert, GL_COMPILE_STATUS, &status); if (status == GL_FALSE) { // error compiling GLsizei iloglen; - glGetShaderiv(v.vert_id, GL_INFO_LOG_LENGTH, &iloglen); + glGetShaderiv(p_ids.vert, GL_INFO_LOG_LENGTH, &iloglen); if (iloglen < 0) { - glDeleteShader(v.vert_id); - glDeleteProgram(v.id); - v.id = 0; + glDeleteShader(p_ids.frag); + glDeleteShader(p_ids.vert); + glDeleteProgram(p_ids.main); ERR_PRINT("Vertex shader compilation failed with empty log"); } else { @@ -316,87 +860,32 @@ ShaderGLES3::Version *ShaderGLES3::get_current_version() { char *ilogmem = (char *)memalloc(iloglen + 1); ilogmem[iloglen] = 0; - glGetShaderInfoLog(v.vert_id, iloglen, &iloglen, ilogmem); + glGetShaderInfoLog(p_ids.vert, iloglen, &iloglen, ilogmem); String err_string = get_shader_name() + ": Vertex Program Compilation Failed:\n"; err_string += ilogmem; - _display_error_with_code(err_string, strings); + _display_error_with_code(err_string, p_ids.vert); + ERR_PRINT(err_string.ascii().get_data()); memfree(ilogmem); - glDeleteShader(v.vert_id); - glDeleteProgram(v.id); - v.id = 0; + glDeleteShader(p_ids.frag); + glDeleteShader(p_ids.vert); + glDeleteProgram(p_ids.main); } - ERR_FAIL_V(nullptr); + return false; } - //_display_error_with_code("pepo", strings); - - /* FRAGMENT SHADER */ - - strings.resize(strings_base_size); - //fragment precision is medium - strings.push_back("precision highp float;\n"); - strings.push_back("precision highp int;\n"); -#ifndef GLES_OVER_GL - strings.push_back("precision highp sampler2D;\n"); - strings.push_back("precision highp samplerCube;\n"); - strings.push_back("precision highp sampler2DArray;\n"); -#endif - - strings.push_back(fragment_code0.get_data()); - if (cc) { - material_string = cc->uniforms.ascii(); - strings.push_back(material_string.get_data()); - } - - strings.push_back(fragment_code1.get_data()); - - if (cc) { - code_globals = cc->fragment_globals.ascii(); - strings.push_back(code_globals.get_data()); - } - - strings.push_back(fragment_code2.get_data()); - - if (cc) { - code_string = cc->light.ascii(); - strings.push_back(code_string.get_data()); - } - - strings.push_back(fragment_code3.get_data()); - - if (cc) { - code_string2 = cc->fragment.ascii(); - strings.push_back(code_string2.get_data()); - } - - strings.push_back(fragment_code4.get_data()); - -#ifdef DEBUG_SHADER - DEBUG_PRINT("\nFragment Globals:\n\n" + String(code_globals.get_data())); - DEBUG_PRINT("\nFragment Code:\n\n" + String(code_string2.get_data())); - for (int i = 0; i < strings.size(); i++) { - //print_line("frag strings "+itos(i)+":"+String(strings[i])); - } -#endif - - v.frag_id = glCreateShader(GL_FRAGMENT_SHADER); - glShaderSource(v.frag_id, strings.size(), &strings[0], nullptr); - glCompileShader(v.frag_id); - - glGetShaderiv(v.frag_id, GL_COMPILE_STATUS, &status); + glGetShaderiv(p_ids.frag, GL_COMPILE_STATUS, &status); if (status == GL_FALSE) { // error compiling GLsizei iloglen; - glGetShaderiv(v.frag_id, GL_INFO_LOG_LENGTH, &iloglen); + glGetShaderiv(p_ids.frag, GL_INFO_LOG_LENGTH, &iloglen); if (iloglen < 0) { - glDeleteShader(v.frag_id); - glDeleteShader(v.vert_id); - glDeleteProgram(v.id); - v.id = 0; + glDeleteShader(p_ids.frag); + glDeleteShader(p_ids.vert); + glDeleteProgram(p_ids.main); ERR_PRINT("Fragment shader compilation failed with empty log"); } else { if (iloglen == 0) { @@ -405,29 +894,28 @@ ShaderGLES3::Version *ShaderGLES3::get_current_version() { char *ilogmem = (char *)memalloc(iloglen + 1); ilogmem[iloglen] = 0; - glGetShaderInfoLog(v.frag_id, iloglen, &iloglen, ilogmem); + glGetShaderInfoLog(p_ids.frag, iloglen, &iloglen, ilogmem); String err_string = get_shader_name() + ": Fragment Program Compilation Failed:\n"; err_string += ilogmem; - _display_error_with_code(err_string, strings); + _display_error_with_code(err_string, p_ids.frag); ERR_PRINT(err_string.ascii().get_data()); memfree(ilogmem); - glDeleteShader(v.frag_id); - glDeleteShader(v.vert_id); - glDeleteProgram(v.id); - v.id = 0; + glDeleteShader(p_ids.frag); + glDeleteShader(p_ids.vert); + glDeleteProgram(p_ids.main); } - ERR_FAIL_V(nullptr); + return false; } - glAttachShader(v.id, v.frag_id); - glAttachShader(v.id, v.vert_id); + glAttachShader(p_ids.main, p_ids.frag); + glAttachShader(p_ids.main, p_ids.vert); // bind attributes before linking for (int i = 0; i < attribute_pair_count; i++) { - glBindAttribLocation(v.id, attribute_pairs[i].index, attribute_pairs[i].name); + glBindAttribLocation(p_ids.main, attribute_pairs[i].index, attribute_pairs[i].name); } //if feedback exists, set it up @@ -442,25 +930,32 @@ ShaderGLES3::Version *ShaderGLES3::get_current_version() { } if (feedback.size()) { - glTransformFeedbackVaryings(v.id, feedback.size(), feedback.ptr(), GL_INTERLEAVED_ATTRIBS); + glTransformFeedbackVaryings(p_ids.main, feedback.size(), feedback.ptr(), GL_INTERLEAVED_ATTRIBS); } } - glLinkProgram(v.id); + if (p_retrievable) { + glProgramParameteri(p_ids.main, GL_PROGRAM_BINARY_RETRIEVABLE_HINT, GL_TRUE); + } + glLinkProgram(p_ids.main); - glGetProgramiv(v.id, GL_LINK_STATUS, &status); + return true; +} + +bool ShaderGLES3::_complete_link(Version::Ids p_ids, GLenum *r_program_format, PoolByteArray *r_program_binary) const { + GLint status; + glGetProgramiv(p_ids.main, GL_LINK_STATUS, &status); if (status == GL_FALSE) { // error linking GLsizei iloglen; - glGetProgramiv(v.id, GL_INFO_LOG_LENGTH, &iloglen); + glGetProgramiv(p_ids.main, GL_INFO_LOG_LENGTH, &iloglen); if (iloglen < 0) { - glDeleteShader(v.frag_id); - glDeleteShader(v.vert_id); - glDeleteProgram(v.id); - v.id = 0; - ERR_FAIL_COND_V(iloglen < 0, nullptr); + glDeleteShader(p_ids.frag); + glDeleteShader(p_ids.vert); + glDeleteProgram(p_ids.main); + ERR_FAIL_COND_V(iloglen < 0, false); } if (iloglen == 0) { @@ -469,35 +964,41 @@ ShaderGLES3::Version *ShaderGLES3::get_current_version() { char *ilogmem = (char *)Memory::alloc_static(iloglen + 1); ilogmem[iloglen] = 0; - glGetProgramInfoLog(v.id, iloglen, &iloglen, ilogmem); + glGetProgramInfoLog(p_ids.main, iloglen, &iloglen, ilogmem); String err_string = get_shader_name() + ": Program LINK FAILED:\n"; err_string += ilogmem; - _display_error_with_code(err_string, strings); ERR_PRINT(err_string.ascii().get_data()); Memory::free_static(ilogmem); - glDeleteShader(v.frag_id); - glDeleteShader(v.vert_id); - glDeleteProgram(v.id); - v.id = 0; + glDeleteShader(p_ids.frag); + glDeleteShader(p_ids.vert); + glDeleteProgram(p_ids.main); - ERR_FAIL_V(nullptr); + return false; } - /* UNIFORMS */ + if (r_program_binary) { + GLint program_len; + glGetProgramiv(p_ids.main, GL_PROGRAM_BINARY_LENGTH, &program_len); + r_program_binary->resize(program_len); + PoolByteArray::Write w = r_program_binary->write(); + glGetProgramBinary(p_ids.main, program_len, NULL, r_program_format, w.ptr()); + } - glUseProgram(v.id); + return true; +} +void ShaderGLES3::_setup_uniforms(CustomCode *p_cc) const { //print_line("uniforms: "); for (int j = 0; j < uniform_count; j++) { - v.uniform_location[j] = glGetUniformLocation(v.id, uniform_names[j]); - //print_line("uniform "+String(uniform_names[j])+" location "+itos(v.uniform_location[j])); + version->uniform_location[j] = glGetUniformLocation(version->ids.main, uniform_names[j]); + //print_line("uniform "+String(uniform_names[j])+" location "+itos(version->uniform_location[j])); } // set texture uniforms for (int i = 0; i < texunit_pair_count; i++) { - GLint loc = glGetUniformLocation(v.id, texunit_pairs[i].name); + GLint loc = glGetUniformLocation(version->ids.main, texunit_pairs[i].name); if (loc >= 0) { if (texunit_pairs[i].index < 0) { glUniform1i(loc, max_image_units + texunit_pairs[i].index); //negative, goes down @@ -509,33 +1010,45 @@ ShaderGLES3::Version *ShaderGLES3::get_current_version() { // assign uniform block bind points for (int i = 0; i < ubo_count; i++) { - GLint loc = glGetUniformBlockIndex(v.id, ubo_pairs[i].name); - if (loc >= 0) { - glUniformBlockBinding(v.id, loc, ubo_pairs[i].index); + GLint loc = glGetUniformBlockIndex(version->ids.main, ubo_pairs[i].name); + if (loc >= 0) + glUniformBlockBinding(version->ids.main, loc, ubo_pairs[i].index); + } + + if (p_cc) { + version->texture_uniform_locations.resize(p_cc->texture_uniforms.size()); + for (int i = 0; i < p_cc->texture_uniforms.size(); i++) { + version->texture_uniform_locations.write[i] = glGetUniformLocation(version->ids.main, String(p_cc->texture_uniforms[i]).ascii().get_data()); + glUniform1i(version->texture_uniform_locations[i], i + base_material_tex_index); } } +} - if (cc) { - v.texture_uniform_locations.resize(cc->texture_uniforms.size()); - for (int i = 0; i < cc->texture_uniforms.size(); i++) { - v.texture_uniform_locations.write[i] = glGetUniformLocation(v.id, String(cc->texture_uniforms[i]).ascii().get_data()); - glUniform1i(v.texture_uniform_locations[i], i + base_material_tex_index); +void ShaderGLES3::_dispose_program(Version *p_version) { + if (compile_queue) { + if (p_version->compile_status == Version::COMPILE_STATUS_PROCESSING_AT_QUEUE) { + compile_queue->cancel(p_version->ids.main); } } + glDeleteShader(p_version->ids.vert); + glDeleteShader(p_version->ids.frag); + glDeleteProgram(p_version->ids.main); - glUseProgram(0); - - v.ok = true; - if (cc) { - cc->versions.insert(conditional_version.version); + if (p_version->compiling_list.in_list()) { + p_version->compiling_list.remove_from_list(); + active_compiles_count--; + if (p_version->compile_status == Version::COMPILE_STATUS_COMPILING_VERTEX_AND_FRAGMENT) { + active_compiles_count--; + } + _log_active_compiles(); } - - return &v; + p_version->compile_status = Version::COMPILE_STATUS_ERROR; + ERR_FAIL_COND(active_compiles_count < 0); } GLint ShaderGLES3::get_uniform_location(const String &p_name) const { ERR_FAIL_COND_V(!version, -1); - return glGetUniformLocation(version->id, p_name.ascii().get_data()); + return glGetUniformLocation(version->ids.main, p_name.ascii().get_data()); } void ShaderGLES3::setup(const char **p_conditional_defines, int p_conditional_count, const char **p_uniform_names, int p_uniform_count, const AttributePair *p_attribute_pairs, int p_attribute_count, const TexUnitPair *p_texunit_pairs, int p_texunit_pair_count, const UBOPair *p_ubo_pairs, int p_ubo_pair_count, const Feedback *p_feedback, int p_feedback_count, const char *p_vertex_code, const char *p_fragment_code, int p_vertex_code_start, int p_fragment_code_start) { @@ -640,33 +1153,44 @@ void ShaderGLES3::setup(const char **p_conditional_defines, int p_conditional_co glGetIntegerv(GL_MAX_TEXTURE_IMAGE_UNITS, &max_image_units); } +void ShaderGLES3::init_async_compilation() { + if (is_async_compilation_supported() && get_ubershader_flags_uniform() != -1) { + // Warm up the ubershader for the case of no custom code + new_conditional_version.code_version = 0; + _bind_ubershader(); + } +} + +bool ShaderGLES3::is_async_compilation_supported() { + return max_simultaneous_compiles > 0 && (compile_queue || parallel_compile_supported); +} + void ShaderGLES3::finish() { const VersionKey *V = nullptr; while ((V = version_map.next(V))) { Version &v = version_map[*V]; - glDeleteShader(v.vert_id); - glDeleteShader(v.frag_id); - glDeleteProgram(v.id); + _dispose_program(&v); memdelete_arr(v.uniform_location); } + ERR_FAIL_COND(versions_compiling.first()); + ERR_FAIL_COND(active_compiles_count != 0); } void ShaderGLES3::clear_caches() { const VersionKey *V = nullptr; while ((V = version_map.next(V))) { Version &v = version_map[*V]; - glDeleteShader(v.vert_id); - glDeleteShader(v.frag_id); - glDeleteProgram(v.id); + _dispose_program(&v); memdelete_arr(v.uniform_location); } + ERR_FAIL_COND(versions_compiling.first()); + ERR_FAIL_COND(active_compiles_count != 0); version_map.clear(); custom_code_map.clear(); version = nullptr; last_custom_code = 1; - uniforms_dirty = true; } uint32_t ShaderGLES3::create_custom_shader() { @@ -675,7 +1199,7 @@ uint32_t ShaderGLES3::create_custom_shader() { return last_custom_code++; } -void ShaderGLES3::set_custom_shader_code(uint32_t p_code_id, const String &p_vertex, const String &p_vertex_globals, const String &p_fragment, const String &p_light, const String &p_fragment_globals, const String &p_uniforms, const Vector &p_texture_uniforms, const Vector &p_custom_defines) { +void ShaderGLES3::set_custom_shader_code(uint32_t p_code_id, const String &p_vertex, const String &p_vertex_globals, const String &p_fragment, const String &p_light, const String &p_fragment_globals, const String &p_uniforms, const Vector &p_texture_uniforms, const Vector &p_custom_defines, AsyncMode p_async_mode) { ERR_FAIL_COND(!custom_code_map.has(p_code_id)); CustomCode *cc = &custom_code_map[p_code_id]; @@ -687,7 +1211,14 @@ void ShaderGLES3::set_custom_shader_code(uint32_t p_code_id, const String &p_ver cc->texture_uniforms = p_texture_uniforms; cc->uniforms = p_uniforms; cc->custom_defines = p_custom_defines; + cc->async_mode = p_async_mode; cc->version++; + + if (p_async_mode == ASYNC_MODE_VISIBLE && is_async_compilation_supported() && get_ubershader_flags_uniform() != -1) { + // Warm up the ubershader for this custom code + new_conditional_version.code_version = p_code_id; + _bind_ubershader(); + } } void ShaderGLES3::set_custom_shader(uint32_t p_code_id) { @@ -708,11 +1239,8 @@ void ShaderGLES3::free_custom_shader(uint32_t p_code_id) { ERR_CONTINUE(!version_map.has(key)); Version &v = version_map[key]; - glDeleteShader(v.vert_id); - glDeleteShader(v.frag_id); - glDeleteProgram(v.id); + _dispose_program(&v); memdelete_arr(v.uniform_location); - v.id = 0; version_map.erase(key); } @@ -727,7 +1255,6 @@ void ShaderGLES3::set_base_material_tex_index(int p_idx) { ShaderGLES3::ShaderGLES3() { version = nullptr; last_custom_code = 1; - uniforms_dirty = true; base_material_tex_index = 0; } diff --git a/drivers/gles3/shader_gles3.h b/drivers/gles3/shader_gles3.h index e0f34e889bd..301c058697c 100644 --- a/drivers/gles3/shader_gles3.h +++ b/drivers/gles3/shader_gles3.h @@ -32,8 +32,11 @@ #define SHADER_GLES3_H #include "core/hash_map.h" +#include "core/local_vector.h" #include "core/map.h" #include "core/math/camera_matrix.h" +#include "core/safe_refcount.h" +#include "core/self_list.h" #include "core/variant.h" #include "platform_config.h" @@ -45,6 +48,10 @@ #include +template +class ThreadedCallableQueue; +class ShaderCacheGLES3; + class ShaderGLES3 { protected: struct Enum { @@ -83,7 +90,7 @@ protected: int conditional; }; - bool uniforms_dirty; + virtual int get_ubershader_flags_uniform() const { return -1; } private: //@TODO Optimize to a fixed set of shader pools and use a LRU @@ -96,6 +103,13 @@ private: int fragment_code_start; int attribute_pair_count; +public: + enum AsyncMode { + ASYNC_MODE_VISIBLE, + ASYNC_MODE_HIDDEN, + }; + +private: struct CustomCode { String vertex; String vertex_globals; @@ -103,32 +117,35 @@ private: String fragment_globals; String light; String uniforms; + AsyncMode async_mode; uint32_t version; Vector texture_uniforms; Vector custom_defines; Set versions; }; - struct Version { - GLuint id; - GLuint vert_id; - GLuint frag_id; - GLint *uniform_location; - Vector texture_uniform_locations; - uint32_t code_version; - bool ok; - Version() : - id(0), - vert_id(0), - frag_id(0), - uniform_location(nullptr), - code_version(0), - ok(false) {} - }; +public: + static ShaderCacheGLES3 *shader_cache; + static ThreadedCallableQueue *cache_write_queue; - Version *version; + static ThreadedCallableQueue *compile_queue; // Non-null if using queued asynchronous compilation (via seconday context) + static bool parallel_compile_supported; // True if using natively supported asyncrhonous compilation + + static bool async_hidden_forbidden; + static int *compiles_started_this_frame; + static int max_simultaneous_compiles; +#ifdef DEBUG_ENABLED + static bool log_active_async_compiles_count; +#endif + static uint64_t current_frame; + + static void advance_async_shaders_compilation(); + +private: + static int active_compiles_count; union VersionKey { + static const uint32_t UBERSHADER_FLAG = ((uint32_t)1) << 31; struct { uint32_t version; uint32_t code_version; @@ -136,8 +153,79 @@ private: uint64_t key; bool operator==(const VersionKey &p_key) const { return key == p_key.key; } bool operator<(const VersionKey &p_key) const { return key < p_key.key; } + VersionKey() {} + VersionKey(uint64_t p_key) : + key(p_key) {} + _FORCE_INLINE_ bool is_subject_to_caching() const { return (version & UBERSHADER_FLAG); } }; + struct Version { + VersionKey version_key; + + // Set by the render thread upfront; the compile thread (for queued async.) reads them + struct Ids { + GLuint main; + GLuint vert; + GLuint frag; + } ids; + + ShaderGLES3 *shader; + uint32_t code_version; + + AsyncMode async_mode; + GLint *uniform_location; + Vector texture_uniform_locations; + bool uniforms_ready; + uint64_t last_frame_processed; + + enum CompileStatus { + COMPILE_STATUS_PENDING, + COMPILE_STATUS_SOURCE_PROVIDED, + COMPILE_STATUS_COMPILING_VERTEX, + COMPILE_STATUS_COMPILING_FRAGMENT, + COMPILE_STATUS_COMPILING_VERTEX_AND_FRAGMENT, + COMPILE_STATUS_PROCESSING_AT_QUEUE, + COMPILE_STATUS_BINARY_READY, + COMPILE_STATUS_BINARY_READY_FROM_CACHE, + COMPILE_STATUS_LINKING, + COMPILE_STATUS_ERROR, + COMPILE_STATUS_RESTART_NEEDED, + COMPILE_STATUS_OK, + }; + CompileStatus compile_status; + SelfList compiling_list; + + struct ProgramBinary { + String cache_hash; + enum Source { + SOURCE_NONE, + SOURCE_LOCAL, // Binary data will only be available if cache enabled + SOURCE_QUEUE, + SOURCE_CACHE, + } source; + // Shared with the compile thread (for queued async.); otherwise render thread only + GLenum format; + PoolByteArray data; + SafeNumeric result_from_queue; + } program_binary; + + Version() : + version_key(0), + ids(), + shader(nullptr), + code_version(0), + async_mode(ASYNC_MODE_VISIBLE), + uniform_location(nullptr), + uniforms_ready(false), + last_frame_processed(UINT64_MAX), + compile_status(COMPILE_STATUS_PENDING), + compiling_list(this), + program_binary() {} + }; + static SelfList::List versions_compiling; + + Version *version; + struct VersionKeyHash { static _FORCE_INLINE_ uint32_t hash(const VersionKey &p_key) { return HashMapHasherDefault::hash(p_key.key); }; }; @@ -176,7 +264,16 @@ private: int base_material_tex_index; - Version *get_current_version(); + Version *get_current_version(bool &r_async_forbidden); + // These will run on the shader compile thread if using que compile queue approach to async. + void _set_source(Version::Ids p_ids, const LocalVector &p_vertex_strings, const LocalVector &p_fragment_strings) const; + bool _complete_compile(Version::Ids p_ids, bool p_retrievable) const; + bool _complete_link(Version::Ids p_ids, GLenum *r_program_format = nullptr, PoolByteArray *r_program_binary = nullptr) const; + // --- + static void _log_active_compiles(); + static bool _process_program_state(Version *p_version, bool p_async_forbidden); + void _setup_uniforms(CustomCode *p_cc) const; + void _dispose_program(Version *p_version); static ShaderGLES3 *active; @@ -271,8 +368,8 @@ private: } } - Map uniform_defaults; - Map uniform_cameras; + bool _bind(bool p_binding_fallback); + bool _bind_ubershader(); protected: _FORCE_INLINE_ int _get_uniform(int p_which) const; @@ -293,47 +390,20 @@ public: static _FORCE_INLINE_ ShaderGLES3 *get_active() { return active; }; bool bind(); void unbind(); - void bind_uniforms(); - - inline GLuint get_program() const { return version ? version->id : 0; } void clear_caches(); uint32_t create_custom_shader(); - void set_custom_shader_code(uint32_t p_code_id, const String &p_vertex, const String &p_vertex_globals, const String &p_fragment, const String &p_light, const String &p_fragment_globals, const String &p_uniforms, const Vector &p_texture_uniforms, const Vector &p_custom_defines); + void set_custom_shader_code(uint32_t p_code_id, const String &p_vertex, const String &p_vertex_globals, const String &p_fragment, const String &p_light, const String &p_fragment_globals, const String &p_uniforms, const Vector &p_texture_uniforms, const Vector &p_custom_defines, AsyncMode p_async_mode); void set_custom_shader(uint32_t p_code_id); void free_custom_shader(uint32_t p_code_id); - void set_uniform_default(int p_idx, const Variant &p_value) { - if (p_value.get_type() == Variant::NIL) { - uniform_defaults.erase(p_idx); - } else { - uniform_defaults[p_idx] = p_value; - } - uniforms_dirty = true; - } - uint32_t get_version() const { return new_conditional_version.version; } - _FORCE_INLINE_ bool is_version_valid() const { return version && version->ok; } - - void set_uniform_camera(int p_idx, const CameraMatrix &p_mat) { - uniform_cameras[p_idx] = p_mat; - uniforms_dirty = true; - }; - - _FORCE_INLINE_ void set_texture_uniform(int p_idx, const Variant &p_value) { - ERR_FAIL_COND(!version); - ERR_FAIL_INDEX(p_idx, version->texture_uniform_locations.size()); - _set_uniform_variant(version->texture_uniform_locations[p_idx], p_value); - } - - _FORCE_INLINE_ GLint get_texture_uniform_location(int p_idx) { - ERR_FAIL_COND_V(!version, -1); - ERR_FAIL_INDEX_V(p_idx, version->texture_uniform_locations.size(), -1); - return version->texture_uniform_locations[p_idx]; - } + _FORCE_INLINE_ bool is_version_valid() const { return version && version->compile_status == Version::COMPILE_STATUS_OK; } virtual void init() = 0; + void init_async_compilation(); + bool is_async_compilation_supported(); void finish(); void set_base_material_tex_index(int p_idx); diff --git a/drivers/gles3/shaders/particles.glsl b/drivers/gles3/shaders/particles.glsl index 9586d88cf69..570e54e56dc 100644 --- a/drivers/gles3/shaders/particles.glsl +++ b/drivers/gles3/shaders/particles.glsl @@ -1,6 +1,10 @@ /* clang-format off */ [vertex] +#if defined(IS_UBERSHADER) +uniform highp int ubershader_flags; +#endif + layout(location = 0) in highp vec4 color; /* clang-format on */ layout(location = 1) in highp vec4 velocity_active; @@ -70,17 +74,6 @@ uint hash(uint x) { } void main() { -#ifdef PARTICLES_COPY - - out_color = color; - out_velocity_active = velocity_active; - out_custom = custom; - out_xform_1 = xform_1; - out_xform_2 = xform_2; - out_xform_3 = xform_3; - -#else - bool apply_forces = true; bool apply_velocity = true; float local_delta = delta; @@ -109,22 +102,22 @@ void main() { if (restart_phase >= prev_system_phase && restart_phase < system_phase) { restart = true; -#ifdef USE_FRACTIONAL_DELTA +#ifdef USE_FRACTIONAL_DELTA //ubershader-runtime local_delta = (system_phase - restart_phase) * lifetime; -#endif +#endif //ubershader-runtime } } else if (delta > 0.0) { if (restart_phase >= prev_system_phase) { restart = true; -#ifdef USE_FRACTIONAL_DELTA +#ifdef USE_FRACTIONAL_DELTA //ubershader-runtime local_delta = (1.0 - restart_phase + system_phase) * lifetime; -#endif +#endif //ubershader-runtime } else if (restart_phase < system_phase) { restart = true; -#ifdef USE_FRACTIONAL_DELTA +#ifdef USE_FRACTIONAL_DELTA //ubershader-runtime local_delta = (system_phase - restart_phase) * lifetime; -#endif +#endif //ubershader-runtime } } @@ -223,13 +216,15 @@ VERTEX_SHADER_CODE out_xform_1 = xform[0]; out_xform_2 = xform[1]; out_xform_3 = xform[2]; - -#endif //PARTICLES_COPY } /* clang-format off */ [fragment] +#if defined(IS_UBERSHADER) +uniform highp int ubershader_flags; +#endif + // any code here is never executed, stuff is filled just so it works #if defined(USE_MATERIAL) diff --git a/drivers/gles3/shaders/scene.glsl b/drivers/gles3/shaders/scene.glsl index c8350b7575d..71f472ec876 100644 --- a/drivers/gles3/shaders/scene.glsl +++ b/drivers/gles3/shaders/scene.glsl @@ -1,6 +1,10 @@ /* clang-format off */ [vertex] +#if defined(IS_UBERSHADER) +uniform highp int ubershader_flags; +#endif + #define M_PI 3.14159265359 #define SHADER_IS_SRGB false @@ -25,17 +29,17 @@ ARRAY_INDEX=8, layout(location = 0) in highp vec4 vertex_attrib; /* clang-format on */ -#ifdef ENABLE_OCTAHEDRAL_COMPRESSION -layout(location = 1) in vec4 normal_tangent_attrib; -#else +#ifdef ENABLE_OCTAHEDRAL_COMPRESSION //ubershader-skip +layout(location = 2) in vec4 normal_tangent_attrib; +#else //ubershader-skip layout(location = 1) in vec3 normal_attrib; -#endif +#endif //ubershader-skip #if defined(ENABLE_TANGENT_INTERP) || defined(ENABLE_NORMALMAP) || defined(LIGHT_USE_ANISOTROPY) -#ifdef ENABLE_OCTAHEDRAL_COMPRESSION +#ifdef ENABLE_OCTAHEDRAL_COMPRESSION //ubershader-skip // packed into normal_attrib zw component -#else -layout(location = 2) in vec4 tangent_attrib; -#endif +#else //ubershader-skip +layout(location = 2) in vec4 normal_tangent_attrib; //ubershader-skip +#endif //ubershader-skip #endif #if defined(ENABLE_COLOR_INTERP) @@ -46,16 +50,20 @@ layout(location = 3) in vec4 color_attrib; layout(location = 4) in vec2 uv_attrib; #endif -#if defined(ENABLE_UV2_INTERP) || defined(USE_LIGHTMAP) +#if defined(ENABLE_UV2_INTERP) layout(location = 5) in vec2 uv2_attrib; +#else +#ifdef USE_LIGHTMAP //ubershader-skip +layout(location = 5) in vec2 uv2_attrib; +#endif //ubershader-skip #endif -#ifdef USE_SKELETON +#ifdef USE_SKELETON //ubershader-skip layout(location = 6) in uvec4 bone_indices; // attrib:6 layout(location = 7) in highp vec4 bone_weights; // attrib:7 -#endif +#endif //ubershader-skip -#ifdef USE_INSTANCING +#ifdef USE_INSTANCING //ubershader-skip layout(location = 8) in highp vec4 instance_xform0; layout(location = 9) in highp vec4 instance_xform1; @@ -66,7 +74,7 @@ layout(location = 11) in lowp vec4 instance_color; layout(location = 12) in highp vec4 instance_custom_data; #endif -#endif +#endif //ubershader-skip layout(std140) uniform SceneData { // ubo:0 @@ -119,11 +127,11 @@ layout(std140) uniform SceneData { // ubo:0 uniform highp mat4 world_transform; -#ifdef USE_LIGHTMAP +#ifdef USE_LIGHTMAP //ubershader-skip uniform highp vec4 lightmap_uv_rect; -#endif +#endif //ubershader-skip -#ifdef USE_LIGHT_DIRECTIONAL +#ifdef USE_LIGHT_DIRECTIONAL //ubershader-skip layout(std140) uniform DirectionalLightData { //ubo:3 @@ -140,9 +148,9 @@ layout(std140) uniform DirectionalLightData { //ubo:3 mediump vec4 shadow_split_offsets; }; -#endif +#endif //ubershader-skip -#ifdef USE_VERTEX_LIGHTING +#ifdef USE_VERTEX_LIGHTING //ubershader-skip //omni and spot struct LightData { @@ -165,7 +173,7 @@ layout(std140) uniform SpotLightData { //ubo:5 LightData spot_lights[MAX_LIGHT_DATA_STRUCTS]; }; -#ifdef USE_FORWARD_LIGHTING +#ifdef USE_FORWARD_LIGHTING //ubershader-skip uniform int omni_light_indices[MAX_FORWARD_LIGHTS]; uniform int omni_light_count; @@ -173,7 +181,7 @@ uniform int omni_light_count; uniform int spot_light_indices[MAX_FORWARD_LIGHTS]; uniform int spot_light_count; -#endif +#endif //ubershader-skip out vec4 diffuse_light_interp; out vec4 specular_light_interp; @@ -279,16 +287,16 @@ void light_process_spot(int idx, vec3 vertex, vec3 eye_vec, vec3 normal, float r light_compute(normal, normalize(light_rel_vec), eye_vec, spot_lights[idx].light_color_energy.rgb * light_attenuation, roughness, diffuse, specular); } -#endif +#endif //ubershader-skip -#ifdef ENABLE_OCTAHEDRAL_COMPRESSION +#ifdef ENABLE_OCTAHEDRAL_COMPRESSION //ubershader-skip vec3 oct_to_vec3(vec2 e) { vec3 v = vec3(e.xy, 1.0 - abs(e.x) - abs(e.y)); float t = max(-v.z, 0.0); v.xy += t * -sign(v.xy); return normalize(v); } -#endif +#endif //ubershader-skip /* Varyings */ @@ -303,8 +311,12 @@ out vec4 color_interp; out vec2 uv_interp; #endif -#if defined(ENABLE_UV2_INTERP) || defined(USE_LIGHTMAP) +#if defined(ENABLE_UV2_INTERP) out vec2 uv2_interp; +#else +#ifdef USE_LIGHTMAP //ubershader-skip +out vec2 uv2_interp; +#endif //ubershader-skip #endif #if defined(ENABLE_TANGENT_INTERP) || defined(ENABLE_NORMALMAP) || defined(LIGHT_USE_ANISOTROPY) @@ -330,17 +342,17 @@ VERTEX_SHADER_GLOBALS /* clang-format on */ -#ifdef RENDER_DEPTH_DUAL_PARABOLOID +#ifdef RENDER_DEPTH_DUAL_PARABOLOID //ubershader-skip out highp float dp_clip; -#endif +#endif //ubershader-skip #define SKELETON_TEXTURE_WIDTH 256 -#ifdef USE_SKELETON +#ifdef USE_SKELETON //ubershader-skip uniform highp sampler2D skeleton_texture; // texunit:-1 -#endif +#endif //ubershader-skip out highp vec4 position_interp; @@ -353,35 +365,38 @@ void main() { highp mat4 world_matrix = world_transform; -#ifdef USE_INSTANCING +#ifdef USE_INSTANCING //ubershader-runtime { highp mat4 m = mat4(instance_xform0, instance_xform1, instance_xform2, vec4(0.0, 0.0, 0.0, 1.0)); world_matrix = world_matrix * transpose(m); } -#endif +#endif //ubershader-runtime -#ifdef ENABLE_OCTAHEDRAL_COMPRESSION - vec3 normal = oct_to_vec3(normal_tangent_attrib.xy); -#else - vec3 normal = normal_attrib; -#endif + vec3 normal; +#ifdef ENABLE_OCTAHEDRAL_COMPRESSION //ubershader-runtime + normal = oct_to_vec3(normal_tangent_attrib.xy); +#else //ubershader-runtime + normal = normal_attrib; +#endif //ubershader-runtime #if defined(ENABLE_TANGENT_INTERP) || defined(ENABLE_NORMALMAP) || defined(LIGHT_USE_ANISOTROPY) -#ifdef ENABLE_OCTAHEDRAL_COMPRESSION - vec3 tangent = oct_to_vec3(vec2(normal_tangent_attrib.z, abs(normal_tangent_attrib.w) * 2.0 - 1.0)); - float binormalf = sign(normal_tangent_attrib.w); -#else - vec3 tangent = tangent_attrib.xyz; - float binormalf = tangent_attrib.a; -#endif + vec3 tangent; + float binormalf; +#ifdef ENABLE_OCTAHEDRAL_COMPRESSION //ubershader-runtime + tangent = oct_to_vec3(vec2(normal_tangent_attrib.z, abs(normal_tangent_attrib.w) * 2.0 - 1.0)); + binormalf = sign(normal_tangent_attrib.w); +#else //ubershader-runtime + tangent = normal_tangent_attrib.xyz; + binormalf = normal_tangent_attrib.a; +#endif //ubershader-runtime #endif #if defined(ENABLE_COLOR_INTERP) color_interp = color_attrib; -#if defined(USE_INSTANCING) +#ifdef USE_INSTANCING //ubershader-runtime color_interp *= instance_color; -#endif +#endif //ubershader-runtime #endif @@ -393,21 +408,28 @@ void main() { uv_interp = uv_attrib; #endif -#if defined(USE_LIGHTMAP) +#ifdef USE_LIGHTMAP //ubershader-runtime uv2_interp = lightmap_uv_rect.zw * uv2_attrib + lightmap_uv_rect.xy; -#elif defined(ENABLE_UV2_INTERP) +#else //ubershader-runtime +#if defined(ENABLE_UV2_INTERP) uv2_interp = uv2_attrib; #endif +#endif //ubershader-runtime -#ifdef OVERRIDE_POSITION +#if defined(OVERRIDE_POSITION) highp vec4 position; #endif -#if defined(USE_INSTANCING) && defined(ENABLE_INSTANCE_CUSTOM) - vec4 instance_custom = instance_custom_data; + vec4 instance_custom; +#ifdef USE_INSTANCING //ubershader-runtime +#if defined(ENABLE_INSTANCE_CUSTOM) + instance_custom = instance_custom_data; #else - vec4 instance_custom = vec4(0.0); + instance_custom = vec4(0.0); #endif +#else //ubershader-runtime + instance_custom = vec4(0.0); +#endif //ubershader-runtime highp mat4 local_projection = projection_matrix; @@ -436,7 +458,7 @@ void main() { #define projection_matrix local_projection #define world_transform world_matrix -#ifdef USE_SKELETON +#ifdef USE_SKELETON //ubershader-runtime { //skeleton transform ivec4 bone_indicesi = ivec4(bone_indices); // cast to signed int @@ -479,7 +501,7 @@ void main() { world_matrix = world_matrix * transpose(m); } -#endif +#endif //ubershader-runtime float point_size = 1.0; @@ -534,9 +556,9 @@ VERTEX_SHADER_CODE binormal_interp = binormal; #endif -#ifdef RENDER_DEPTH +#ifdef RENDER_DEPTH //ubershader-runtime -#ifdef RENDER_DEPTH_DUAL_PARABOLOID +#ifdef RENDER_DEPTH_DUAL_PARABOLOID //ubershader-runtime vertex_interp.z *= shadow_dual_paraboloid_render_side; normal_interp.z *= shadow_dual_paraboloid_render_side; @@ -554,17 +576,17 @@ VERTEX_SHADER_CODE vertex_interp = vtx; -#else +#else //ubershader-runtime float z_ofs = z_offset; z_ofs += (1.0 - abs(normal_interp.z)) * z_slope_scale; vertex_interp.z -= z_ofs; -#endif //RENDER_DEPTH_DUAL_PARABOLOID +#endif //RENDER_DEPTH_DUAL_PARABOLOID //ubershader-runtime -#endif //RENDER_DEPTH +#endif //RENDER_DEPTH //ubershader-runtime -#ifdef OVERRIDE_POSITION +#if defined(OVERRIDE_POSITION) gl_Position = position; #else gl_Position = projection_matrix * vec4(vertex_interp, 1.0); @@ -572,12 +594,12 @@ VERTEX_SHADER_CODE position_interp = gl_Position; -#ifdef USE_VERTEX_LIGHTING +#ifdef USE_VERTEX_LIGHTING //ubershader-runtime diffuse_light_interp = vec4(0.0); specular_light_interp = vec4(0.0); -#ifdef USE_FORWARD_LIGHTING +#ifdef USE_FORWARD_LIGHTING //ubershader-runtime for (int i = 0; i < omni_light_count; i++) { light_process_omni(omni_light_indices[i], vertex_interp, -normalize(vertex_interp), normal_interp, roughness, diffuse_light_interp.rgb, specular_light_interp.rgb); @@ -586,9 +608,9 @@ VERTEX_SHADER_CODE for (int i = 0; i < spot_light_count; i++) { light_process_spot(spot_light_indices[i], vertex_interp, -normalize(vertex_interp), normal_interp, roughness, diffuse_light_interp.rgb, specular_light_interp.rgb); } -#endif +#endif //ubershader-runtime -#ifdef USE_LIGHT_DIRECTIONAL +#ifdef USE_LIGHT_DIRECTIONAL //ubershader-runtime vec3 directional_diffuse = vec3(0.0); vec3 directional_specular = vec3(0.0); @@ -614,27 +636,34 @@ VERTEX_SHADER_CODE specular_light_interp.rgb += directional_specular; -#endif //USE_LIGHT_DIRECTIONAL +#endif //USE_LIGHT_DIRECTIONAL //ubershader-runtime -#endif // USE_VERTEX_LIGHTING +#endif // USE_VERTEX_LIGHTING //ubershader-runtime } /* clang-format off */ [fragment] +#if defined(IS_UBERSHADER) +uniform highp int ubershader_flags; +// These are more performant and make the ubershaderification simpler +#define VCT_QUALITY_HIGH +#define USE_LIGHTMAP_FILTER_BICUBIC +#endif /* texture unit usage, N is max_texture_unity-N 1-skeleton 2-radiance -3-reflection_atlas -4-directional_shadow -5-shadow_atlas -6-decal_atlas -7-screen -8-depth -9-probe1 -10-probe2 +3-radiance_array +4-reflection_atlas +5-directional_shadow +6-shadow_atlas +7-irradiance +8-screen +9-depth +10-probe1, lightmap +11-probe2, lightmap_array */ @@ -654,8 +683,12 @@ in vec4 color_interp; in vec2 uv_interp; #endif -#if defined(ENABLE_UV2_INTERP) || defined(USE_LIGHTMAP) +#if defined(ENABLE_UV2_INTERP) in vec2 uv2_interp; +#else +#ifdef USE_LIGHTMAP //ubershader-skip +in vec2 uv2_interp; +#endif //ubershader-skip #endif #if defined(ENABLE_TANGENT_INTERP) || defined(ENABLE_NORMALMAP) || defined(LIGHT_USE_ANISOTROPY) @@ -668,7 +701,7 @@ in vec3 normal_interp; /* PBR CHANNELS */ -#ifdef USE_RADIANCE_MAP +#ifdef USE_RADIANCE_MAP //ubershader-skip layout(std140) uniform Radiance { // ubo:2 @@ -678,13 +711,13 @@ layout(std140) uniform Radiance { // ubo:2 #define RADIANCE_MAX_LOD 5.0 -uniform sampler2D irradiance_map; // texunit:-6 +uniform sampler2D irradiance_map; // texunit:-7 -#ifdef USE_RADIANCE_MAP_ARRAY +#ifdef USE_RADIANCE_MAP_ARRAY //ubershader-skip -uniform sampler2DArray radiance_map; // texunit:-2 +uniform sampler2DArray radiance_map_array; // texunit:-3 -vec3 textureDualParaboloid(sampler2DArray p_tex, vec3 p_vec, float p_roughness) { +vec3 textureDualParaboloidArray(sampler2DArray p_tex, vec3 p_vec, float p_roughness) { vec3 norm = normalize(p_vec); norm.xy /= 1.0 + abs(norm.z); norm.xy = norm.xy * vec2(0.5, 0.25) + vec2(0.5, 0.25); @@ -707,7 +740,7 @@ vec3 textureDualParaboloid(sampler2DArray p_tex, vec3 p_vec, float p_roughness) return mix(base, next, float(indexi % 256) / 256.0); } -#else +#else //ubershader-skip uniform sampler2D radiance_map; // texunit:-2 @@ -721,9 +754,9 @@ vec3 textureDualParaboloid(sampler2D p_tex, vec3 p_vec, float p_roughness) { return textureLod(p_tex, norm.xy, p_roughness * RADIANCE_MAX_LOD).xyz; } -#endif +#endif //ubershader-skip -#endif +#endif //ubershader-skip /* Material Uniforms */ @@ -795,7 +828,7 @@ FRAGMENT_SHADER_GLOBALS //directional light data -#ifdef USE_LIGHT_DIRECTIONAL +#ifdef USE_LIGHT_DIRECTIONAL //ubershader-skip layout(std140) uniform DirectionalLightData { highp vec4 light_pos_inv_radius; @@ -811,14 +844,14 @@ layout(std140) uniform DirectionalLightData { mediump vec4 shadow_split_offsets; }; -uniform highp sampler2DShadow directional_shadow; // texunit:-4 +uniform highp sampler2DShadow directional_shadow; // texunit:-5 -#endif +#endif //ubershader-skip -#ifdef USE_VERTEX_LIGHTING +#ifdef USE_VERTEX_LIGHTING //ubershader-skip in vec4 diffuse_light_interp; in vec4 specular_light_interp; -#endif +#endif //ubershader-skip // omni and spot struct LightData { @@ -841,7 +874,7 @@ layout(std140) uniform SpotLightData { // ubo:5 LightData spot_lights[MAX_LIGHT_DATA_STRUCTS]; }; -uniform highp sampler2DShadow shadow_atlas; // texunit:-5 +uniform highp sampler2DShadow shadow_atlas; // texunit:-6 struct ReflectionData { mediump vec4 box_extents; @@ -857,9 +890,9 @@ layout(std140) uniform ReflectionProbeData { //ubo:6 ReflectionData reflections[MAX_REFLECTION_DATA_STRUCTS]; }; -uniform mediump sampler2D reflection_atlas; // texunit:-3 +uniform mediump sampler2D reflection_atlas; // texunit:-4 -#ifdef USE_FORWARD_LIGHTING +#ifdef USE_FORWARD_LIGHTING //ubershader-skip uniform int omni_light_indices[MAX_FORWARD_LIGHTS]; uniform int omni_light_count; @@ -870,33 +903,31 @@ uniform int spot_light_count; uniform int reflection_indices[MAX_FORWARD_LIGHTS]; uniform int reflection_count; -#endif +#endif //ubershader-skip #if defined(SCREEN_TEXTURE_USED) -uniform highp sampler2D screen_texture; // texunit:-7 +uniform highp sampler2D screen_texture; // texunit:-8 #endif -#ifdef USE_MULTIPLE_RENDER_TARGETS +layout(location = 0) out vec4 frag_color; -layout(location = 0) out vec4 diffuse_buffer; +#ifdef USE_MULTIPLE_RENDER_TARGETS //ubershader-skip + +#define diffuse_buffer frag_color layout(location = 1) out vec4 specular_buffer; layout(location = 2) out vec4 normal_mr_buffer; #if defined(ENABLE_SSS) layout(location = 3) out float sss_buffer; #endif -#else - -layout(location = 0) out vec4 frag_color; - -#endif +#endif //ubershader-skip in highp vec4 position_interp; -uniform highp sampler2D depth_buffer; // texunit:-8 +uniform highp sampler2D depth_buffer; // texunit:-9 -#ifdef USE_CONTACT_SHADOWS +#ifdef USE_CONTACT_SHADOWS //ubershader-skip float contact_shadow_compute(vec3 pos, vec3 dir, float max_distance) { if (abs(dir.z) > 0.99) @@ -962,7 +993,7 @@ float contact_shadow_compute(vec3 pos, vec3 dir, float max_distance) { return 1.0; } -#endif +#endif //ubershader-skip // This returns the G_GGX function divided by 2 cos_theta_m, where in practice cos_theta_m is either N.L or N.V. // We're dividing this factor off because the overall term we'll end up looks like @@ -1222,7 +1253,7 @@ LIGHT_SHADER_CODE #endif } -#ifdef USE_SHADOW_TO_OPACITY +#if defined(USE_SHADOW_TO_OPACITY) alpha = min(alpha, clamp(1.0 - length(attenuation), 0.0, 1.0)); #endif @@ -1230,7 +1261,7 @@ LIGHT_SHADER_CODE } float sample_shadow(highp sampler2DShadow shadow, vec2 shadow_pixel_size, vec2 pos, float depth, vec4 clamp_rect) { -#ifdef SHADOW_MODE_PCF_13 +#ifdef SHADOW_MODE_PCF_13 //ubershader-runtime float avg = textureProj(shadow, vec4(pos, depth, 1.0)); avg += textureProj(shadow, vec4(pos + vec2(shadow_pixel_size.x, 0.0), depth, 1.0)); @@ -1246,9 +1277,9 @@ float sample_shadow(highp sampler2DShadow shadow, vec2 shadow_pixel_size, vec2 p avg += textureProj(shadow, vec4(pos + vec2(0.0, shadow_pixel_size.y * 2.0), depth, 1.0)); avg += textureProj(shadow, vec4(pos + vec2(0.0, -shadow_pixel_size.y * 2.0), depth, 1.0)); return avg * (1.0 / 13.0); -#endif +#endif //ubershader-runtime -#ifdef SHADOW_MODE_PCF_5 +#ifdef SHADOW_MODE_PCF_5 //ubershader-runtime float avg = textureProj(shadow, vec4(pos, depth, 1.0)); avg += textureProj(shadow, vec4(pos + vec2(shadow_pixel_size.x, 0.0), depth, 1.0)); @@ -1257,20 +1288,22 @@ float sample_shadow(highp sampler2DShadow shadow, vec2 shadow_pixel_size, vec2 p avg += textureProj(shadow, vec4(pos + vec2(0.0, -shadow_pixel_size.y), depth, 1.0)); return avg * (1.0 / 5.0); -#endif +#endif //ubershader-runtime -#if !defined(SHADOW_MODE_PCF_5) || !defined(SHADOW_MODE_PCF_13) +#ifndef SHADOW_MODE_PCF_5 //ubershader-runtime +#ifndef SHADOW_MODE_PCF_13 //ubershader-runtime return textureProj(shadow, vec4(pos, depth, 1.0)); -#endif +#endif //ubershader-runtime +#endif //ubershader-runtime } -#ifdef RENDER_DEPTH_DUAL_PARABOLOID +#ifdef RENDER_DEPTH_DUAL_PARABOLOID //ubershader-skip in highp float dp_clip; -#endif +#endif //ubershader-skip #ifdef USE_PHYSICAL_LIGHT_ATTENUATION float get_omni_attenuation(float distance, float inv_range, float decay) { @@ -1300,7 +1333,7 @@ void light_process_omni(int idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 bi vec3 light_attenuation = vec3(omni_attenuation); #if !defined(SHADOWS_DISABLED) -#ifdef USE_SHADOW +#ifdef USE_SHADOW //ubershader-runtime if (omni_lights[idx].light_params.w > 0.5) { // there is a shadowmap @@ -1333,16 +1366,16 @@ void light_process_omni(int idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 bi splane.xy = clamp_rect.xy + splane.xy * clamp_rect.zw; float shadow = sample_shadow(shadow_atlas, shadow_atlas_pixel_size, splane.xy, splane.z, clamp_rect); -#ifdef USE_CONTACT_SHADOWS +#ifdef USE_CONTACT_SHADOWS //ubershader-runtime if (shadow > 0.01 && omni_lights[idx].shadow_color_contact.a > 0.0) { float contact_shadow = contact_shadow_compute(vertex, normalize(light_rel_vec), min(light_length, omni_lights[idx].shadow_color_contact.a)); shadow = min(shadow, contact_shadow); } -#endif +#endif //ubershader-runtime light_attenuation *= mix(omni_lights[idx].shadow_color_contact.rgb, vec3(1.0), shadow); } -#endif //USE_SHADOW +#endif //USE_SHADOW //ubershader-runtime #endif //SHADOWS_DISABLED light_compute(normal, normalize(light_rel_vec), eye_vec, binormal, tangent, omni_lights[idx].light_color_energy.rgb, light_attenuation, albedo, transmission, omni_lights[idx].light_params.z * p_blob_intensity, roughness, metallic, specular, rim * omni_attenuation, rim_tint, clearcoat, clearcoat_gloss, anisotropy, diffuse_light, specular_light, alpha); } @@ -1369,7 +1402,7 @@ void light_process_spot(int idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 bi vec3 light_attenuation = vec3(spot_attenuation); #if !defined(SHADOWS_DISABLED) -#ifdef USE_SHADOW +#ifdef USE_SHADOW //ubershader-runtime if (spot_lights[idx].light_params.w > 0.5) { //there is a shadowmap highp vec4 splane = (spot_lights[idx].shadow_matrix * vec4(vertex, 1.0)); @@ -1377,15 +1410,15 @@ void light_process_spot(int idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 bi float shadow = sample_shadow(shadow_atlas, shadow_atlas_pixel_size, splane.xy, splane.z, spot_lights[idx].light_clamp); -#ifdef USE_CONTACT_SHADOWS +#ifdef USE_CONTACT_SHADOWS //ubershader-runtime if (shadow > 0.01 && spot_lights[idx].shadow_color_contact.a > 0.0) { float contact_shadow = contact_shadow_compute(vertex, normalize(light_rel_vec), min(light_length, spot_lights[idx].shadow_color_contact.a)); shadow = min(shadow, contact_shadow); } -#endif +#endif //ubershader-runtime light_attenuation *= mix(spot_lights[idx].shadow_color_contact.rgb, vec3(1.0), shadow); } -#endif //USE_SHADOW +#endif //USE_SHADOW //ubershader-runtime #endif //SHADOWS_DISABLED light_compute(normal, normalize(light_rel_vec), eye_vec, binormal, tangent, spot_lights[idx].light_color_energy.rgb, light_attenuation, albedo, transmission, spot_lights[idx].light_params.z * p_blob_intensity, roughness, metallic, specular, rim * spot_attenuation, rim_tint, clearcoat, clearcoat_gloss, anisotropy, diffuse_light, specular_light, alpha); @@ -1447,7 +1480,8 @@ void reflection_process(int idx, vec3 vertex, vec3 normal, vec3 binormal, vec3 t reflection_accum += reflection; } -#if !defined(USE_LIGHTMAP) && !defined(USE_LIGHTMAP_CAPTURE) +#ifndef USE_LIGHTMAP //ubershader-runtime +#ifndef USE_LIGHTMAP_CAPTURE //ubershader-runtime if (reflections[idx].ambient.a > 0.0) { //compute ambient using skybox vec3 local_amb_vec = (reflections[idx].local_matrix * vec4(normal, 0.0)).xyz; @@ -1490,16 +1524,17 @@ void reflection_process(int idx, vec3 vertex, vec3 normal, vec3 binormal, vec3 t ambient_out.rgb *= ambient_out.a; ambient_accum += ambient_out; } -#endif +#endif //ubershader-runtime +#endif //ubershader-runtime } -#ifdef USE_LIGHTMAP -#ifdef USE_LIGHTMAP_LAYERED -uniform mediump sampler2DArray lightmap; //texunit:-9 +#ifdef USE_LIGHTMAP //ubershader-skip +#ifdef USE_LIGHTMAP_LAYERED //ubershader-skip +uniform mediump sampler2DArray lightmap_array; //texunit:-11 uniform int lightmap_layer; -#else -uniform mediump sampler2D lightmap; //texunit:-9 -#endif +#else //ubershader-skip +uniform mediump sampler2D lightmap; //texunit:-10 +#endif //ubershader-skip uniform mediump float lightmap_energy; @@ -1597,15 +1632,15 @@ vec4 textureArray_bicubic(sampler2DArray tex, vec3 uv) { #define LIGHTMAP_TEXTURE_LAYERED_SAMPLE(m_tex, m_uv) texture(m_tex, m_uv) #endif //USE_LIGHTMAP_FILTER_BICUBIC -#endif +#endif //ubershader-skip -#ifdef USE_LIGHTMAP_CAPTURE +#ifdef USE_LIGHTMAP_CAPTURE //ubershader-skip uniform mediump vec4[12] lightmap_captures; -#endif +#endif //ubershader-skip -#ifdef USE_GI_PROBES +#ifdef USE_GI_PROBES //ubershader-skip -uniform mediump sampler3D gi_probe1; //texunit:-9 +uniform mediump sampler3D gi_probe1; //texunit:-10 uniform highp mat4 gi_probe_xform1; uniform highp vec3 gi_probe_bounds1; uniform highp vec3 gi_probe_cell_size1; @@ -1614,7 +1649,7 @@ uniform highp float gi_probe_bias1; uniform highp float gi_probe_normal_bias1; uniform bool gi_probe_blend_ambient1; -uniform mediump sampler3D gi_probe2; //texunit:-10 +uniform mediump sampler3D gi_probe2; //texunit:-11 uniform highp mat4 gi_probe_xform2; uniform highp vec3 gi_probe_bounds2; uniform highp vec3 gi_probe_cell_size2; @@ -1759,14 +1794,14 @@ void gi_probes_compute(vec3 pos, vec3 normal, float roughness, inout vec3 out_sp out_ambient += diff_accum.rgb; } -#endif +#endif //ubershader-skip void main() { -#ifdef RENDER_DEPTH_DUAL_PARABOLOID +#ifdef RENDER_DEPTH_DUAL_PARABOLOID //ubershader-runtime if (dp_clip > 0.0) discard; -#endif +#endif //ubershader-runtime //lay out everything, whathever is unused is optimized away anyway highp vec3 vertex = vertex_interp; @@ -1814,8 +1849,12 @@ void main() { vec2 uv = uv_interp; #endif -#if defined(ENABLE_UV2_INTERP) || defined(USE_LIGHTMAP) +#if defined(ENABLE_UV2_INTERP) vec2 uv2 = uv2_interp; +#else +#ifdef USE_LIGHTMAP //ubershader-skip + vec2 uv2 = uv2_interp; +#endif //ubershader-skip #endif #if defined(ENABLE_COLOR_INTERP) @@ -1853,13 +1892,13 @@ FRAGMENT_SHADER_CODE } #endif // ALPHA_SCISSOR_USED -#ifdef USE_OPAQUE_PREPASS +#ifdef USE_OPAQUE_PREPASS //ubershader-runtime if (alpha < opaque_prepass_threshold) { discard; } -#endif // USE_OPAQUE_PREPASS +#endif // USE_OPAQUE_PREPASS //ubershader-runtime #endif // !USE_SHADOW_TO_OPACITY @@ -1884,27 +1923,22 @@ FRAGMENT_SHADER_CODE #endif -#ifdef ENABLE_CLIP_ALPHA - if (albedo.a < 0.99) { - //used for doublepass and shadowmapping - discard; - } -#endif - /////////////////////// LIGHTING ////////////////////////////// //apply energy conservation -#ifdef USE_VERTEX_LIGHTING + vec3 specular_light; + vec3 diffuse_light; +#ifdef USE_VERTEX_LIGHTING //ubershader-runtime - vec3 specular_light = specular_light_interp.rgb; - vec3 diffuse_light = diffuse_light_interp.rgb; -#else + specular_light = specular_light_interp.rgb; + diffuse_light = diffuse_light_interp.rgb; +#else //ubershader-runtime - vec3 specular_light = vec3(0.0, 0.0, 0.0); - vec3 diffuse_light = vec3(0.0, 0.0, 0.0); + specular_light = vec3(0.0, 0.0, 0.0); + diffuse_light = vec3(0.0, 0.0, 0.0); -#endif +#endif //ubershader-runtime vec3 ambient_light; vec3 env_reflection_light = vec3(0.0, 0.0, 0.0); @@ -1916,9 +1950,9 @@ FRAGMENT_SHADER_CODE vec3 f0 = F0(metallic, specular, albedo); vec3 F = f0 + (max(vec3(1.0 - roughness), f0) - f0) * pow(1.0 - ndotv, 5.0); -#ifdef USE_RADIANCE_MAP +#ifdef USE_RADIANCE_MAP //ubershader-runtime -#ifdef AMBIENT_LIGHT_DISABLED +#if defined(AMBIENT_LIGHT_DISABLED) ambient_light = vec3(0.0, 0.0, 0.0); #else { @@ -1927,12 +1961,17 @@ FRAGMENT_SHADER_CODE vec3 ref_vec = reflect(-eye_vec, normal); float horizon = min(1.0 + dot(ref_vec, normal), 1.0); ref_vec = normalize((radiance_inverse_xform * vec4(ref_vec, 0.0)).xyz); - vec3 radiance = textureDualParaboloid(radiance_map, ref_vec, roughness) * bg_energy; + vec3 radiance; +#ifdef USE_RADIANCE_MAP_ARRAY //ubershader-runtime + radiance = textureDualParaboloidArray(radiance_map_array, ref_vec, roughness) * bg_energy; +#else //ubershader-runtime + radiance = textureDualParaboloid(radiance_map, ref_vec, roughness) * bg_energy; +#endif //ubershader-runtime env_reflection_light = radiance; env_reflection_light *= horizon * horizon; } } -#ifndef USE_LIGHTMAP +#ifndef USE_LIGHTMAP //ubershader-runtime { vec3 norm = normal; norm = normalize((radiance_inverse_xform * vec4(norm, 0.0)).xyz); @@ -1947,19 +1986,19 @@ FRAGMENT_SHADER_CODE ambient_light = mix(ambient_light_color.rgb, env_ambient, radiance_ambient_contribution); } -#endif +#endif //ubershader-runtime #endif //AMBIENT_LIGHT_DISABLED -#else +#else //ubershader-runtime -#ifdef AMBIENT_LIGHT_DISABLED +#if defined(AMBIENT_LIGHT_DISABLED) ambient_light = vec3(0.0, 0.0, 0.0); #else ambient_light = ambient_light_color.rgb; env_reflection_light = bg_color.rgb * bg_energy; #endif //AMBIENT_LIGHT_DISABLED -#endif +#endif //ubershader-runtime ambient_light *= ambient_energy; @@ -1969,20 +2008,20 @@ FRAGMENT_SHADER_CODE specular_blob_intensity *= specular * 2.0; #endif -#ifdef USE_GI_PROBES +#ifdef USE_GI_PROBES //ubershader-runtime gi_probes_compute(vertex, normal, roughness, env_reflection_light, ambient_light); -#endif +#endif //ubershader-runtime -#ifdef USE_LIGHTMAP -#ifdef USE_LIGHTMAP_LAYERED - ambient_light = LIGHTMAP_TEXTURE_LAYERED_SAMPLE(lightmap, vec3(uv2, float(lightmap_layer))).rgb * lightmap_energy; -#else +#ifdef USE_LIGHTMAP //ubershader-runtime +#ifdef USE_LIGHTMAP_LAYERED //ubershader-runtime + ambient_light = LIGHTMAP_TEXTURE_LAYERED_SAMPLE(lightmap_array, vec3(uv2, float(lightmap_layer))).rgb * lightmap_energy; +#else //ubershader-runtime ambient_light = LIGHTMAP_TEXTURE_SAMPLE(lightmap, uv2).rgb * lightmap_energy; -#endif -#endif +#endif //ubershader-runtime +#endif //ubershader-runtime -#ifdef USE_LIGHTMAP_CAPTURE +#ifdef USE_LIGHTMAP_CAPTURE //ubershader-runtime { vec3 cone_dirs[12] = vec3[]( vec3(0.0, 0.0, 1.0), @@ -2016,9 +2055,9 @@ FRAGMENT_SHADER_CODE ambient_light = captured.rgb; } } -#endif +#endif //ubershader-runtime -#ifdef USE_FORWARD_LIGHTING +#ifdef USE_FORWARD_LIGHTING //ubershader-runtime highp vec4 reflection_accum = vec4(0.0, 0.0, 0.0, 0.0); highp vec4 ambient_accum = vec4(0.0, 0.0, 0.0, 0.0); @@ -2031,12 +2070,15 @@ FRAGMENT_SHADER_CODE } else { specular_light += env_reflection_light; } -#if !defined(USE_LIGHTMAP) && !defined(USE_LIGHTMAP_CAPTURE) +#ifndef USE_LIGHTMAP //ubershader-runtime +#ifndef USE_LIGHTMAP_CAPTURE //ubershader-runtime if (ambient_accum.a > 0.0) { ambient_light = ambient_accum.rgb / ambient_accum.a; } -#endif -#endif +#endif //ubershader-runtime +#endif //ubershader-runtime + +#endif //ubershader-runtime { #if defined(DIFFUSE_TOON) @@ -2057,152 +2099,157 @@ FRAGMENT_SHADER_CODE #endif } -#if defined(USE_LIGHT_DIRECTIONAL) +#ifdef USE_LIGHT_DIRECTIONAL //ubershader-runtime vec3 light_attenuation = vec3(1.0); float depth_z = -vertex.z; -#ifdef LIGHT_DIRECTIONAL_SHADOW +#ifdef LIGHT_DIRECTIONAL_SHADOW //ubershader-runtime #if !defined(SHADOWS_DISABLED) -#ifdef LIGHT_USE_PSSM4 - if (depth_z < shadow_split_offsets.w) { -#elif defined(LIGHT_USE_PSSM2) - if (depth_z < shadow_split_offsets.y) { -#else - if (depth_z < shadow_split_offsets.x) { -#endif //LIGHT_USE_PSSM4 - + float value; +#ifdef LIGHT_USE_PSSM4 //ubershader-runtime + value = shadow_split_offsets.w; +#else //ubershader-runtime +#ifdef LIGHT_USE_PSSM2 //ubershader-runtime + value = shadow_split_offsets.y; +#else //ubershader-runtime + value = shadow_split_offsets.x; +#endif //ubershader-runtime +#endif //LIGHT_USE_PSSM4 //ubershader-runtime + if (depth_z < value) { vec3 pssm_coord; float pssm_fade = 0.0; -#ifdef LIGHT_USE_PSSM_BLEND +#ifdef LIGHT_USE_PSSM_BLEND //ubershader-skip float pssm_blend; vec3 pssm_coord2; bool use_blend = true; -#endif +#endif //ubershader-skip -#ifdef LIGHT_USE_PSSM4 +#ifdef LIGHT_USE_PSSM4 //ubershader-runtime if (depth_z < shadow_split_offsets.y) { if (depth_z < shadow_split_offsets.x) { highp vec4 splane = (shadow_matrix1 * vec4(vertex, 1.0)); pssm_coord = splane.xyz / splane.w; -#if defined(LIGHT_USE_PSSM_BLEND) +#ifdef LIGHT_USE_PSSM_BLEND //ubershader-runtime splane = (shadow_matrix2 * vec4(vertex, 1.0)); pssm_coord2 = splane.xyz / splane.w; pssm_blend = smoothstep(0.0, shadow_split_offsets.x, depth_z); -#endif +#endif //ubershader-runtime } else { highp vec4 splane = (shadow_matrix2 * vec4(vertex, 1.0)); pssm_coord = splane.xyz / splane.w; -#if defined(LIGHT_USE_PSSM_BLEND) +#ifdef LIGHT_USE_PSSM_BLEND //ubershader-runtime splane = (shadow_matrix3 * vec4(vertex, 1.0)); pssm_coord2 = splane.xyz / splane.w; pssm_blend = smoothstep(shadow_split_offsets.x, shadow_split_offsets.y, depth_z); -#endif +#endif //ubershader-runtime } } else { if (depth_z < shadow_split_offsets.z) { highp vec4 splane = (shadow_matrix3 * vec4(vertex, 1.0)); pssm_coord = splane.xyz / splane.w; -#if defined(LIGHT_USE_PSSM_BLEND) +#ifdef LIGHT_USE_PSSM_BLEND //ubershader-runtime splane = (shadow_matrix4 * vec4(vertex, 1.0)); pssm_coord2 = splane.xyz / splane.w; pssm_blend = smoothstep(shadow_split_offsets.y, shadow_split_offsets.z, depth_z); -#endif +#endif //ubershader-runtime } else { highp vec4 splane = (shadow_matrix4 * vec4(vertex, 1.0)); pssm_coord = splane.xyz / splane.w; pssm_fade = smoothstep(shadow_split_offsets.z, shadow_split_offsets.w, depth_z); -#if defined(LIGHT_USE_PSSM_BLEND) +#ifdef LIGHT_USE_PSSM_BLEND //ubershader-runtime use_blend = false; -#endif +#endif //ubershader-runtime } } -#endif //LIGHT_USE_PSSM4 +#endif //LIGHT_USE_PSSM4 //ubershader-runtime -#ifdef LIGHT_USE_PSSM2 +#ifdef LIGHT_USE_PSSM2 //ubershader-runtime if (depth_z < shadow_split_offsets.x) { highp vec4 splane = (shadow_matrix1 * vec4(vertex, 1.0)); pssm_coord = splane.xyz / splane.w; -#if defined(LIGHT_USE_PSSM_BLEND) +#ifdef LIGHT_USE_PSSM_BLEND //ubershader-runtime splane = (shadow_matrix2 * vec4(vertex, 1.0)); pssm_coord2 = splane.xyz / splane.w; pssm_blend = smoothstep(0.0, shadow_split_offsets.x, depth_z); -#endif +#endif //ubershader-runtime } else { highp vec4 splane = (shadow_matrix2 * vec4(vertex, 1.0)); pssm_coord = splane.xyz / splane.w; pssm_fade = smoothstep(shadow_split_offsets.x, shadow_split_offsets.y, depth_z); -#if defined(LIGHT_USE_PSSM_BLEND) +#ifdef LIGHT_USE_PSSM_BLEND //ubershader-runtime use_blend = false; -#endif +#endif //ubershader-runtime } -#endif //LIGHT_USE_PSSM2 +#endif //LIGHT_USE_PSSM2 //ubershader-runtime -#if !defined(LIGHT_USE_PSSM4) && !defined(LIGHT_USE_PSSM2) +#ifndef LIGHT_USE_PSSM2 //ubershader-runtime +#ifndef LIGHT_USE_PSSM4 //ubershader-runtime { //regular orthogonal highp vec4 splane = (shadow_matrix1 * vec4(vertex, 1.0)); pssm_coord = splane.xyz / splane.w; } -#endif +#endif //ubershader-runtime +#endif //ubershader-runtime //one one sample float shadow = sample_shadow(directional_shadow, directional_shadow_pixel_size, pssm_coord.xy, pssm_coord.z, light_clamp); -#if defined(LIGHT_USE_PSSM_BLEND) +#ifdef LIGHT_USE_PSSM_BLEND //ubershader-runtime if (use_blend) { shadow = mix(shadow, sample_shadow(directional_shadow, directional_shadow_pixel_size, pssm_coord2.xy, pssm_coord2.z, light_clamp), pssm_blend); } -#endif +#endif //ubershader-runtime -#ifdef USE_CONTACT_SHADOWS +#ifdef USE_CONTACT_SHADOWS //ubershader-runtime if (shadow > 0.01 && shadow_color_contact.a > 0.0) { float contact_shadow = contact_shadow_compute(vertex, -light_direction_attenuation.xyz, shadow_color_contact.a); shadow = min(shadow, contact_shadow); } -#endif +#endif //ubershader-runtime light_attenuation = mix(mix(shadow_color_contact.rgb, vec3(1.0), shadow), vec3(1.0), pssm_fade); } #endif // !defined(SHADOWS_DISABLED) -#endif //LIGHT_DIRECTIONAL_SHADOW +#endif //LIGHT_DIRECTIONAL_SHADOW //ubershader-runtime -#ifdef USE_VERTEX_LIGHTING +#ifdef USE_VERTEX_LIGHTING //ubershader-runtime diffuse_light *= mix(vec3(1.0), light_attenuation, diffuse_light_interp.a); specular_light *= mix(vec3(1.0), light_attenuation, specular_light_interp.a); -#else +#else //ubershader-runtime light_compute(normal, -light_direction_attenuation.xyz, eye_vec, binormal, tangent, light_color_energy.rgb, light_attenuation, albedo, transmission, light_params.z * specular_blob_intensity, roughness, metallic, specular, rim, rim_tint, clearcoat, clearcoat_gloss, anisotropy, diffuse_light, specular_light, alpha); -#endif +#endif //ubershader-runtime -#endif //#USE_LIGHT_DIRECTIONAL +#endif //#USE_LIGHT_DIRECTIONAL //ubershader-runtime -#ifdef USE_VERTEX_LIGHTING +#ifdef USE_VERTEX_LIGHTING //ubershader-runtime diffuse_light *= albedo; -#endif +#endif //ubershader-runtime -#ifdef USE_FORWARD_LIGHTING +#ifdef USE_FORWARD_LIGHTING //ubershader-runtime -#ifndef USE_VERTEX_LIGHTING +#ifndef USE_VERTEX_LIGHTING //ubershader-runtime for (int i = 0; i < omni_light_count; i++) { light_process_omni(omni_light_indices[i], vertex, eye_vec, normal, binormal, tangent, albedo, transmission, roughness, metallic, specular, rim, rim_tint, clearcoat, clearcoat_gloss, anisotropy, specular_blob_intensity, diffuse_light, specular_light, alpha); @@ -2212,11 +2259,11 @@ FRAGMENT_SHADER_CODE light_process_spot(spot_light_indices[i], vertex, eye_vec, normal, binormal, tangent, albedo, transmission, roughness, metallic, specular, rim, rim_tint, clearcoat, clearcoat_gloss, anisotropy, specular_blob_intensity, diffuse_light, specular_light, alpha); } -#endif //USE_VERTEX_LIGHTING +#endif //USE_VERTEX_LIGHTING //ubershader-runtime -#endif +#endif //ubershader-runtime -#ifdef USE_SHADOW_TO_OPACITY +#if defined(USE_SHADOW_TO_OPACITY) alpha = min(alpha, clamp(length(ambient_light), 0.0, 1.0)); #if defined(ALPHA_SCISSOR_USED) @@ -2225,19 +2272,18 @@ FRAGMENT_SHADER_CODE } #endif // ALPHA_SCISSOR_USED -#ifdef USE_OPAQUE_PREPASS - +#ifdef USE_OPAQUE_PREPASS //ubershader-runtime if (alpha < opaque_prepass_threshold) { discard; } -#endif // USE_OPAQUE_PREPASS +#endif // USE_OPAQUE_PREPASS //ubershader-runtime #endif // USE_SHADOW_TO_OPACITY -#ifdef RENDER_DEPTH +#ifdef RENDER_DEPTH //ubershader-runtime //nothing happens, so a tree-ssa optimizer will result in no fragment shader :) -#else +#else //ubershader-runtime specular_light *= reflection_multiplier; ambient_light *= albedo; //ambient must be multiplied by albedo at the end @@ -2256,13 +2302,14 @@ FRAGMENT_SHADER_CODE if (fog_color_enabled.a > 0.5) { float fog_amount = 0.0; -#ifdef USE_LIGHT_DIRECTIONAL + vec3 fog_color; +#ifdef USE_LIGHT_DIRECTIONAL //ubershader-runtime - vec3 fog_color = mix(fog_color_enabled.rgb, fog_sun_color_amount.rgb, fog_sun_color_amount.a * pow(max(dot(normalize(vertex), -light_direction_attenuation.xyz), 0.0), 8.0)); -#else + fog_color = mix(fog_color_enabled.rgb, fog_sun_color_amount.rgb, fog_sun_color_amount.a * pow(max(dot(normalize(vertex), -light_direction_attenuation.xyz), 0.0), 8.0)); +#else //ubershader-runtime - vec3 fog_color = fog_color_enabled.rgb; -#endif + fog_color = fog_color_enabled.rgb; +#endif //ubershader-runtime //apply fog @@ -2292,13 +2339,13 @@ FRAGMENT_SHADER_CODE diffuse_light *= rev_amount; } -#ifdef USE_MULTIPLE_RENDER_TARGETS +#ifdef USE_MULTIPLE_RENDER_TARGETS //ubershader-runtime -#ifdef SHADELESS +#ifdef SHADELESS //ubershader-runtime diffuse_buffer = vec4(albedo.rgb, 0.0); specular_buffer = vec4(0.0); -#else +#else //ubershader-runtime //approximate ambient scale for SSAO, since we will lack full ambient float max_emission = max(emission.r, max(emission.g, emission.b)); @@ -2319,7 +2366,7 @@ FRAGMENT_SHADER_CODE #ifdef USE_FORWARD_LIGHTING diffuse_buffer.rgb += emission; #endif -#endif //SHADELESS +#endif //SHADELESS //ubershader-runtime normal_mr_buffer = vec4(normalize(normal) * 0.5 + 0.5, roughness); @@ -2327,18 +2374,18 @@ FRAGMENT_SHADER_CODE sss_buffer = sss_strength; #endif -#else //USE_MULTIPLE_RENDER_TARGETS +#else //USE_MULTIPLE_RENDER_TARGETS //ubershader-runtime -#ifdef SHADELESS +#ifdef SHADELESS //ubershader-runtime frag_color = vec4(albedo, alpha); -#else +#else //ubershader-runtime frag_color = vec4(ambient_light + diffuse_light + specular_light, alpha); #ifdef USE_FORWARD_LIGHTING frag_color.rgb += emission; #endif -#endif //SHADELESS +#endif //SHADELESS //ubershader-runtime -#endif //USE_MULTIPLE_RENDER_TARGETS +#endif //USE_MULTIPLE_RENDER_TARGETS //ubershader-runtime -#endif //RENDER_DEPTH +#endif //RENDER_DEPTH //ubershader-runtime } diff --git a/editor/editor_export.cpp b/editor/editor_export.cpp index 4f38e953bdc..bc8a9bff251 100644 --- a/editor/editor_export.cpp +++ b/editor/editor_export.cpp @@ -268,6 +268,10 @@ void EditorExportPlatform::gen_debug_flags(Vector &r_flags, int p_flags) if (p_flags & DEBUG_FLAG_VIEW_NAVIGATION) { r_flags.push_back("--debug-navigation"); } + + if (p_flags & DEBUG_FLAG_SHADER_FALLBACKS) { + r_flags.push_back("--debug-shader-fallbacks"); + } } Error EditorExportPlatform::_save_pack_file(void *p_userdata, const String &p_path, const Vector &p_data, int p_file, int p_total) { diff --git a/editor/editor_export.h b/editor/editor_export.h index 0c0f62effe8..2662e29d085 100644 --- a/editor/editor_export.h +++ b/editor/editor_export.h @@ -246,6 +246,7 @@ public: DEBUG_FLAG_REMOTE_DEBUG_LOCALHOST = 4, DEBUG_FLAG_VIEW_COLLISONS = 8, DEBUG_FLAG_VIEW_NAVIGATION = 16, + DEBUG_FLAG_SHADER_FALLBACKS = 32, }; virtual Error run(const Ref &p_preset, int p_device, int p_debug_flags) { return OK; } diff --git a/editor/editor_node.cpp b/editor/editor_node.cpp index 7ad39767950..b9125ecd181 100644 --- a/editor/editor_node.cpp +++ b/editor/editor_node.cpp @@ -2748,6 +2748,13 @@ void EditorNode::_menu_option_confirm(int p_option, bool p_confirmed) { EditorSettings::get_singleton()->set_project_metadata("debug_options", "run_debug_navigation", !ischecked); } break; + case RUN_DEBUG_SHADER_FALLBACKS: { + bool ischecked = debug_menu->get_popup()->is_item_checked(debug_menu->get_popup()->get_item_index(RUN_DEBUG_SHADER_FALLBACKS)); + debug_menu->get_popup()->set_item_checked(debug_menu->get_popup()->get_item_index(RUN_DEBUG_SHADER_FALLBACKS), !ischecked); + run_native->set_debug_shader_fallbacks(!ischecked); + editor_run.set_debug_shader_fallbacks(!ischecked); + EditorSettings::get_singleton()->set_project_metadata("debug_options", "run_debug_shader_fallbacks", !ischecked); + } break; case RUN_RELOAD_SCRIPTS: { bool ischecked = debug_menu->get_popup()->is_item_checked(debug_menu->get_popup()->get_item_index(RUN_RELOAD_SCRIPTS)); debug_menu->get_popup()->set_item_checked(debug_menu->get_popup()->get_item_index(RUN_RELOAD_SCRIPTS), !ischecked); @@ -3004,6 +3011,7 @@ void EditorNode::_update_debug_options() { bool check_file_server = EditorSettings::get_singleton()->get_project_metadata("debug_options", "run_file_server", false); bool check_debug_collisons = EditorSettings::get_singleton()->get_project_metadata("debug_options", "run_debug_collisons", false); bool check_debug_navigation = EditorSettings::get_singleton()->get_project_metadata("debug_options", "run_debug_navigation", false); + bool check_debug_shader_fallbacks = EditorSettings::get_singleton()->get_project_metadata("debug_options", "run_debug_shader_fallbacks", false); bool check_live_debug = EditorSettings::get_singleton()->get_project_metadata("debug_options", "run_live_debug", true); bool check_reload_scripts = EditorSettings::get_singleton()->get_project_metadata("debug_options", "run_reload_scripts", true); @@ -3019,6 +3027,9 @@ void EditorNode::_update_debug_options() { if (check_debug_navigation) { _menu_option_confirm(RUN_DEBUG_NAVIGATION, true); } + if (check_debug_shader_fallbacks) { + _menu_option_confirm(RUN_DEBUG_SHADER_FALLBACKS, true); + } if (check_live_debug) { _menu_option_confirm(RUN_LIVE_DEBUG, true); } @@ -6342,32 +6353,50 @@ EditorNode::EditorNode() { p = debug_menu->get_popup(); p->set_hide_on_window_lose_focus(true); p->set_hide_on_checkable_item_selection(false); + p->add_check_shortcut(ED_SHORTCUT("editor/deploy_with_remote_debug", TTR("Deploy with Remote Debug")), RUN_DEPLOY_REMOTE_DEBUG); p->set_item_tooltip( p->get_item_count() - 1, TTR("When this option is enabled, using one-click deploy will make the executable attempt to connect to this computer's IP so the running project can be debugged.\nThis option is intended to be used for remote debugging (typically with a mobile device).\nYou don't need to enable it to use the GDScript debugger locally.")); + p->add_check_shortcut(ED_SHORTCUT("editor/small_deploy_with_network_fs", TTR("Small Deploy with Network Filesystem")), RUN_FILE_SERVER); p->set_item_tooltip( p->get_item_count() - 1, TTR("When this option is enabled, using one-click deploy for Android will only export an executable without the project data.\nThe filesystem will be provided from the project by the editor over the network.\nOn Android, deploying will use the USB cable for faster performance. This option speeds up testing for projects with large assets.")); + p->add_separator(); + p->add_check_shortcut(ED_SHORTCUT("editor/visible_collision_shapes", TTR("Visible Collision Shapes")), RUN_DEBUG_COLLISONS); p->set_item_tooltip( p->get_item_count() - 1, TTR("When this option is enabled, collision shapes and raycast nodes (for 2D and 3D) will be visible in the running project.")); + p->add_check_shortcut(ED_SHORTCUT("editor/visible_navigation", TTR("Visible Navigation")), RUN_DEBUG_NAVIGATION); p->set_item_tooltip( p->get_item_count() - 1, TTR("When this option is enabled, navigation meshes and polygons will be visible in the running project.")); + + if (GLOBAL_GET("rendering/quality/driver/driver_name") == "GLES3") { + p->add_separator(); + + p->add_check_shortcut(ED_SHORTCUT("editor/use_shader_fallbacks", TTR("Force Shader Fallbacks")), RUN_DEBUG_SHADER_FALLBACKS); + p->set_item_tooltip( + p->get_item_count() - 1, + TTR("When this option is enabled, shaders will be used in their fallback form (either visible via an ubershader or hidden) during all the run time.\nThis is useful for verifying the look and performance of fallbacks, which are normally displayed briefly.\nAsynchronous shader compilation must be enabled in the project settings for this option to make a difference.")); + } + p->add_separator(); + p->add_check_shortcut(ED_SHORTCUT("editor/sync_scene_changes", TTR("Synchronize Scene Changes")), RUN_LIVE_DEBUG); p->set_item_tooltip( p->get_item_count() - 1, TTR("When this option is enabled, any changes made to the scene in the editor will be replicated in the running project.\nWhen used remotely on a device, this is more efficient when the network filesystem option is enabled.")); + p->add_check_shortcut(ED_SHORTCUT("editor/sync_script_changes", TTR("Synchronize Script Changes")), RUN_RELOAD_SCRIPTS); p->set_item_tooltip( p->get_item_count() - 1, TTR("When this option is enabled, any script that is saved will be reloaded in the running project.\nWhen used remotely on a device, this is more efficient when the network filesystem option is enabled.")); + p->connect("id_pressed", this, "_menu_option"); menu_hb->add_spacer(); diff --git a/editor/editor_node.h b/editor/editor_node.h index 706016b4665..a469448df7e 100644 --- a/editor/editor_node.h +++ b/editor/editor_node.h @@ -170,6 +170,7 @@ private: RUN_LIVE_DEBUG, RUN_DEBUG_COLLISONS, RUN_DEBUG_NAVIGATION, + RUN_DEBUG_SHADER_FALLBACKS, RUN_DEPLOY_REMOTE_DEBUG, RUN_RELOAD_SCRIPTS, RUN_VCS_SETTINGS, diff --git a/editor/editor_run.cpp b/editor/editor_run.cpp index e6f04ea3cd7..e19656aab86 100644 --- a/editor/editor_run.cpp +++ b/editor/editor_run.cpp @@ -75,6 +75,10 @@ Error EditorRun::run(const String &p_scene, const String &p_custom_args, const L args.push_back("--debug-navigation"); } + if (debug_shader_fallbacks) { + args.push_back("--debug-shader-fallbacks"); + } + int screen = EditorSettings::get_singleton()->get("run/window_placement/screen"); if (screen == 0) { // Same as editor @@ -273,9 +277,18 @@ bool EditorRun::get_debug_navigation() const { return debug_navigation; } +void EditorRun::set_debug_shader_fallbacks(bool p_debug) { + debug_shader_fallbacks = p_debug; +} + +bool EditorRun::get_debug_shader_fallbacks() const { + return debug_shader_fallbacks; +} + EditorRun::EditorRun() { status = STATUS_STOP; running_scene = ""; debug_collisions = false; debug_navigation = false; + debug_shader_fallbacks = false; } diff --git a/editor/editor_run.h b/editor/editor_run.h index efcf3f53e30..27db6b98545 100644 --- a/editor/editor_run.h +++ b/editor/editor_run.h @@ -47,6 +47,7 @@ public: private: bool debug_collisions; bool debug_navigation; + bool debug_shader_fallbacks; Status status; String running_scene; @@ -65,6 +66,9 @@ public: void set_debug_navigation(bool p_debug); bool get_debug_navigation() const; + void set_debug_shader_fallbacks(bool p_debug); + bool get_debug_shader_fallbacks() const; + EditorRun(); }; diff --git a/editor/editor_run_native.cpp b/editor/editor_run_native.cpp index b9e15d335e2..b6a004a599c 100644 --- a/editor/editor_run_native.cpp +++ b/editor/editor_run_native.cpp @@ -142,6 +142,9 @@ void EditorRunNative::_run_native(int p_idx, int p_platform) { if (debug_navigation) { flags |= EditorExportPlatform::DEBUG_FLAG_VIEW_NAVIGATION; } + if (debug_shader_fallbacks) { + flags |= EditorExportPlatform::DEBUG_FLAG_SHADER_FALLBACKS; + } eep->run(preset, p_idx, flags); } @@ -188,6 +191,14 @@ bool EditorRunNative::get_debug_navigation() const { return debug_navigation; } +void EditorRunNative::set_debug_shader_fallbacks(bool p_debug) { + debug_shader_fallbacks = p_debug; +} + +bool EditorRunNative::get_debug_shader_fallbacks() const { + return debug_shader_fallbacks; +} + EditorRunNative::EditorRunNative() { set_process(true); first = true; @@ -195,6 +206,7 @@ EditorRunNative::EditorRunNative() { deploy_debug_remote = false; debug_collisions = false; debug_navigation = false; + debug_shader_fallbacks = false; resume_idx = 0; resume_platform = 0; } diff --git a/editor/editor_run_native.h b/editor/editor_run_native.h index a9373611296..30067a3dd9a 100644 --- a/editor/editor_run_native.h +++ b/editor/editor_run_native.h @@ -43,6 +43,7 @@ class EditorRunNative : public HBoxContainer { bool deploy_debug_remote; bool debug_collisions; bool debug_navigation; + bool debug_shader_fallbacks; int resume_idx; int resume_platform; @@ -66,6 +67,9 @@ public: void set_debug_navigation(bool p_debug); bool get_debug_navigation() const; + void set_debug_shader_fallbacks(bool p_debug); + bool get_debug_shader_fallbacks() const; + void resume_run_native(); EditorRunNative(); diff --git a/gles_builders.py b/gles_builders.py index dda3922ed3f..ef0cb811f57 100644 --- a/gles_builders.py +++ b/gles_builders.py @@ -4,6 +4,7 @@ All such functions are invoked in a subprocess on Windows to prevent build flaki """ from platform_methods import subprocess_main +import re class LegacyGLHeaderStruct: @@ -69,7 +70,7 @@ def include_file_in_legacygl_header(filename, header_data, depth): if line.find("#ifdef ") != -1: if line.find("#ifdef ") != -1: - ifdefline = line.replace("#ifdef ", "").strip() + ifdefline = re.sub(r".*#ifdef (\S+).*\n", "\\1", line) if line.find("_EN_") != -1: enumbase = ifdefline[: ifdefline.find("_EN_")] @@ -227,6 +228,10 @@ def build_legacygl_header(filename, include, class_suffix, output_attribs, gles2 fd.write("\t\t" + x.upper() + ",\n") fd.write("\t};\n\n") + supports_ubershader = not gles2 and "ubershader_flags" in header_data.uniforms + if supports_ubershader: + fd.write("\tint get_ubershader_flags_uniform() const { return Uniforms::UBERSHADER_FLAGS; }\n\n") + fd.write("\t_FORCE_INLINE_ int get_uniform(Uniforms p_uniform) const { return _get_uniform(p_uniform); }\n\n") if header_data.conditionals: fd.write( diff --git a/main/main.cpp b/main/main.cpp index b4d111d3507..feaaeae2490 100644 --- a/main/main.cpp +++ b/main/main.cpp @@ -145,6 +145,7 @@ static bool use_debug_profiler = false; #ifdef DEBUG_ENABLED static bool debug_collisions = false; static bool debug_navigation = false; +static bool debug_shader_fallbacks = false; #endif static int frame_delay = 0; static bool disable_render_loop = false; @@ -292,6 +293,7 @@ void Main::print_help(const char *p_binary) { #if defined(DEBUG_ENABLED) && !defined(SERVER_ENABLED) OS::get_singleton()->print(" --debug-collisions Show collision shapes when running the scene.\n"); OS::get_singleton()->print(" --debug-navigation Show navigation polygons when running the scene.\n"); + OS::get_singleton()->print(" --debug-shader-fallbacks Use the fallbacks of the shaders which have one when running the scene (GL ES 3 only).\n"); #endif OS::get_singleton()->print(" --frame-delay Simulate high CPU load (delay each frame by milliseconds).\n"); OS::get_singleton()->print(" --time-scale Force time scale (higher values are faster, 1.0 is normal speed).\n"); @@ -806,6 +808,8 @@ Error Main::setup(const char *execpath, int argc, char *argv[], bool p_second_ph debug_collisions = true; } else if (I->get() == "--debug-navigation") { debug_navigation = true; + } else if (I->get() == "--debug-shader-fallbacks") { + debug_shader_fallbacks = true; #endif } else if (I->get() == "--remote-debug") { if (I->next()) { @@ -1218,6 +1222,13 @@ Error Main::setup(const char *execpath, int argc, char *argv[], bool p_second_ph Engine::get_singleton()->set_frame_delay(frame_delay); +#ifdef DEBUG_ENABLED + if (!Engine::get_singleton()->is_editor_hint()) { + GLOBAL_DEF("rendering/gles3/shaders/debug_shader_fallbacks", debug_shader_fallbacks); + ProjectSettings::get_singleton()->set_hide_from_editor("rendering/gles3/shaders/debug_shader_fallbacks", true); + } +#endif + message_queue = memnew(MessageQueue); if (p_second_phase) { diff --git a/main/tests/test_shader_lang.cpp b/main/tests/test_shader_lang.cpp index b1fedcf5836..bc06b11d971 100644 --- a/main/tests/test_shader_lang.cpp +++ b/main/tests/test_shader_lang.cpp @@ -120,14 +120,14 @@ static String dump_node_code(SL::Node *p_node, int p_level) { case SL::Node::TYPE_SHADER: { SL::ShaderNode *pnode = (SL::ShaderNode *)p_node; - for (Map::Element *E = pnode->uniforms.front(); E; E = E->next()) { + for (OrderedHashMap::Element E = pnode->uniforms.front(); E; E = E.next()) { String ucode = "uniform "; - ucode += _prestr(E->get().precision); - ucode += _typestr(E->get().type); - ucode += " " + String(E->key()); + ucode += _prestr(E.get().precision); + ucode += _typestr(E.get().type); + ucode += " " + String(E.key()); - if (E->get().default_value.size()) { - ucode += " = " + get_constant_text(E->get().type, E->get().default_value); + if (E.get().default_value.size()) { + ucode += " = " + get_constant_text(E.get().type, E.get().default_value); } static const char *hint_name[SL::ShaderNode::Uniform::HINT_MAX] = { @@ -140,18 +140,18 @@ static String dump_node_code(SL::Node *p_node, int p_level) { "white" }; - if (E->get().hint) { - ucode += " : " + String(hint_name[E->get().hint]); + if (E.get().hint) { + ucode += " : " + String(hint_name[E.get().hint]); } code += ucode + "\n"; } - for (Map::Element *E = pnode->varyings.front(); E; E = E->next()) { + for (OrderedHashMap::Element E = pnode->varyings.front(); E; E = E.next()) { String vcode = "varying "; - vcode += _prestr(E->get().precision); - vcode += _typestr(E->get().type); - vcode += " " + String(E->key()); + vcode += _prestr(E.get().precision); + vcode += _typestr(E.get().type); + vcode += " " + String(E.key()); code += vcode + "\n"; } diff --git a/scene/resources/material.cpp b/scene/resources/material.cpp index 15f5e9635e7..559c9bcaab9 100644 --- a/scene/resources/material.cpp +++ b/scene/resources/material.cpp @@ -1047,6 +1047,17 @@ void SpatialMaterial::_update_shader() { code += "}\n"; + String fallback_mode_str; + switch (async_mode) { + case ASYNC_MODE_VISIBLE: { + fallback_mode_str = "async_visible"; + } break; + case ASYNC_MODE_HIDDEN: { + fallback_mode_str = "async_hidden"; + } break; + } + code = code.replace_first("render_mode ", "render_mode " + fallback_mode_str + ","); + ShaderData shader_data; shader_data.shader = VS::get_singleton()->shader_create(); shader_data.users = 1; @@ -1822,6 +1833,16 @@ Shader::Mode SpatialMaterial::get_shader_mode() const { return Shader::MODE_SPATIAL; } +void SpatialMaterial::set_async_mode(AsyncMode p_mode) { + async_mode = p_mode; + _queue_shader_change(); + _change_notify(); +} + +SpatialMaterial::AsyncMode SpatialMaterial::get_async_mode() const { + return async_mode; +} + void SpatialMaterial::_bind_methods() { ClassDB::bind_method(D_METHOD("set_albedo", "albedo"), &SpatialMaterial::set_albedo); ClassDB::bind_method(D_METHOD("get_albedo"), &SpatialMaterial::get_albedo); @@ -1994,6 +2015,9 @@ void SpatialMaterial::_bind_methods() { ClassDB::bind_method(D_METHOD("set_distance_fade_min_distance", "distance"), &SpatialMaterial::set_distance_fade_min_distance); ClassDB::bind_method(D_METHOD("get_distance_fade_min_distance"), &SpatialMaterial::get_distance_fade_min_distance); + ClassDB::bind_method(D_METHOD("set_async_mode", "mode"), &SpatialMaterial::set_async_mode); + ClassDB::bind_method(D_METHOD("get_async_mode"), &SpatialMaterial::get_async_mode); + ADD_GROUP("Flags", "flags_"); ADD_PROPERTYI(PropertyInfo(Variant::BOOL, "flags_transparent"), "set_feature", "get_feature", FEATURE_TRANSPARENT); ADD_PROPERTYI(PropertyInfo(Variant::BOOL, "flags_use_shadow_to_opacity"), "set_flag", "get_flag", FLAG_USE_SHADOW_TO_OPACITY); @@ -2136,6 +2160,8 @@ void SpatialMaterial::_bind_methods() { ADD_PROPERTY(PropertyInfo(Variant::REAL, "distance_fade_min_distance", PROPERTY_HINT_RANGE, "0,4096,0.01"), "set_distance_fade_min_distance", "get_distance_fade_min_distance"); ADD_PROPERTY(PropertyInfo(Variant::REAL, "distance_fade_max_distance", PROPERTY_HINT_RANGE, "0,4096,0.01"), "set_distance_fade_max_distance", "get_distance_fade_max_distance"); + ADD_PROPERTY(PropertyInfo(Variant::INT, "async_mode", PROPERTY_HINT_ENUM, "Visible,Hidden"), "set_async_mode", "get_async_mode"); + BIND_ENUM_CONSTANT(TEXTURE_ALBEDO); BIND_ENUM_CONSTANT(TEXTURE_METALLIC); BIND_ENUM_CONSTANT(TEXTURE_ROUGHNESS); @@ -2236,6 +2262,9 @@ void SpatialMaterial::_bind_methods() { BIND_ENUM_CONSTANT(DISTANCE_FADE_PIXEL_ALPHA); BIND_ENUM_CONSTANT(DISTANCE_FADE_PIXEL_DITHER); BIND_ENUM_CONSTANT(DISTANCE_FADE_OBJECT_DITHER); + + BIND_ENUM_CONSTANT(ASYNC_MODE_VISIBLE); + BIND_ENUM_CONSTANT(ASYNC_MODE_HIDDEN); } SpatialMaterial::SpatialMaterial() : @@ -2309,6 +2338,8 @@ SpatialMaterial::SpatialMaterial() : diffuse_mode = DIFFUSE_BURLEY; specular_mode = SPECULAR_SCHLICK_GGX; + async_mode = ASYNC_MODE_VISIBLE; + for (int i = 0; i < FEATURE_MAX; i++) { features[i] = false; } diff --git a/scene/resources/material.h b/scene/resources/material.h index 94ab38304e5..0a6a6cbad26 100644 --- a/scene/resources/material.h +++ b/scene/resources/material.h @@ -237,6 +237,11 @@ public: DISTANCE_FADE_OBJECT_DITHER, }; + enum AsyncMode { + ASYNC_MODE_VISIBLE, + ASYNC_MODE_HIDDEN, + }; + private: union MaterialKey { struct { @@ -425,6 +430,7 @@ private: DiffuseMode diffuse_mode; BillboardMode billboard_mode; EmissionOperator emission_op; + AsyncMode async_mode; TextureChannel metallic_texture_channel; TextureChannel roughness_texture_channel; @@ -622,6 +628,9 @@ public: void set_refraction_texture_channel(TextureChannel p_channel); TextureChannel get_refraction_texture_channel() const; + void set_async_mode(AsyncMode p_mode); + AsyncMode get_async_mode() const; + static void init_shaders(); static void finish_shaders(); static void flush_changes(); @@ -649,6 +658,7 @@ VARIANT_ENUM_CAST(SpatialMaterial::BillboardMode) VARIANT_ENUM_CAST(SpatialMaterial::TextureChannel) VARIANT_ENUM_CAST(SpatialMaterial::EmissionOperator) VARIANT_ENUM_CAST(SpatialMaterial::DistanceFadeMode) +VARIANT_ENUM_CAST(SpatialMaterial::AsyncMode) ////////////////////// diff --git a/scene/resources/visual_shader.cpp b/scene/resources/visual_shader.cpp index 6516cd93e56..cba662b7386 100644 --- a/scene/resources/visual_shader.cpp +++ b/scene/resources/visual_shader.cpp @@ -897,6 +897,7 @@ VisualShader::RenderModeEnums VisualShader::render_mode_enums[] = { { Shader::MODE_SPATIAL, "cull" }, { Shader::MODE_SPATIAL, "diffuse" }, { Shader::MODE_SPATIAL, "specular" }, + { Shader::MODE_SPATIAL, "async" }, { Shader::MODE_CANVAS_ITEM, "blend" }, { Shader::MODE_CANVAS_ITEM, nullptr } }; diff --git a/servers/visual/rasterizer.h b/servers/visual/rasterizer.h index 3bc02a22f2a..452662c2759 100644 --- a/servers/visual/rasterizer.h +++ b/servers/visual/rasterizer.h @@ -248,6 +248,9 @@ public: virtual void shader_get_custom_defines(RID p_shader, Vector *p_defines) const = 0; virtual void shader_remove_custom_define(RID p_shader, const String &p_define) = 0; + virtual void set_shader_async_hidden_forbidden(bool p_forbidden) = 0; + virtual bool is_shader_async_hidden_forbidden() = 0; + /* COMMON MATERIAL API */ virtual RID material_create() = 0; diff --git a/servers/visual/shader_language.cpp b/servers/visual/shader_language.cpp index bee16577195..4aca4f3b43b 100644 --- a/servers/visual/shader_language.cpp +++ b/servers/visual/shader_language.cpp @@ -6893,11 +6893,11 @@ Error ShaderLanguage::complete(const String &p_code, const Map::Element *E = shader->varyings.front(); E; E = E->next()) { - matches.insert(E->key(), ScriptCodeCompletionOption::KIND_VARIABLE); + for (OrderedHashMap::Element E = shader->varyings.front(); E; E = E.next()) { + matches.insert(E.key(), ScriptCodeCompletionOption::KIND_VARIABLE); } - for (const Map::Element *E = shader->uniforms.front(); E; E = E->next()) { - matches.insert(E->key(), ScriptCodeCompletionOption::KIND_MEMBER); + for (OrderedHashMap::Element E = shader->uniforms.front(); E; E = E.next()) { + matches.insert(E.key(), ScriptCodeCompletionOption::KIND_MEMBER); } } diff --git a/servers/visual/shader_language.h b/servers/visual/shader_language.h index b665c705b69..30384efee29 100644 --- a/servers/visual/shader_language.h +++ b/servers/visual/shader_language.h @@ -33,6 +33,7 @@ #include "core/list.h" #include "core/map.h" +#include "core/ordered_hash_map.h" #include "core/script_language.h" #include "core/string_name.h" #include "core/typedefs.h" @@ -652,8 +653,8 @@ public: }; Map constants; - Map varyings; - Map uniforms; + OrderedHashMap varyings; + OrderedHashMap uniforms; Map structs; Vector render_modes; diff --git a/servers/visual/shader_types.cpp b/servers/visual/shader_types.cpp index ce09b84a7da..4bcb3d16ed6 100644 --- a/servers/visual/shader_types.cpp +++ b/servers/visual/shader_types.cpp @@ -197,6 +197,9 @@ ShaderTypes::ShaderTypes() { shader_modes[VS::SHADER_SPATIAL].modes.push_back("vertex_lighting"); + shader_modes[VS::SHADER_SPATIAL].modes.push_back("async_visible"); + shader_modes[VS::SHADER_SPATIAL].modes.push_back("async_hidden"); + /************ CANVAS ITEM **************************/ shader_modes[VS::SHADER_CANVAS_ITEM].functions["global"].built_ins["TIME"] = constt(ShaderLanguage::TYPE_FLOAT); diff --git a/servers/visual/visual_server_raster.h b/servers/visual/visual_server_raster.h index f27f104b290..9e7c754206a 100644 --- a/servers/visual/visual_server_raster.h +++ b/servers/visual/visual_server_raster.h @@ -192,6 +192,8 @@ public: BIND2C(shader_get_custom_defines, RID, Vector *) BIND2(shader_remove_custom_define, RID, const String &) + BIND1(set_shader_async_hidden_forbidden, bool) + /* COMMON MATERIAL API */ BIND0R(RID, material_create) diff --git a/servers/visual/visual_server_scene.cpp b/servers/visual/visual_server_scene.cpp index 6a39d2a063c..f80814b89cf 100644 --- a/servers/visual/visual_server_scene.cpp +++ b/servers/visual/visual_server_scene.cpp @@ -2875,7 +2875,11 @@ bool VisualServerScene::_render_reflection_probe_step(Instance *p_instance, int } _prepare_scene(xform, cm, false, RID(), VSG::storage->reflection_probe_get_cull_mask(p_instance->base), p_instance->scenario->self, shadow_atlas, reflection_probe->instance, reflection_probe->previous_room_id_hint); + + bool async_forbidden_backup = VSG::storage->is_shader_async_hidden_forbidden(); + VSG::storage->set_shader_async_hidden_forbidden(true); _render_scene(xform, cm, 0, false, RID(), p_instance->scenario->self, shadow_atlas, reflection_probe->instance, p_step); + VSG::storage->set_shader_async_hidden_forbidden(async_forbidden_backup); } else { //do roughness postprocess step until it believes it's done diff --git a/servers/visual/visual_server_wrap_mt.h b/servers/visual/visual_server_wrap_mt.h index 2ab43c22b5a..fe252639bb6 100644 --- a/servers/visual/visual_server_wrap_mt.h +++ b/servers/visual/visual_server_wrap_mt.h @@ -130,6 +130,8 @@ public: FUNC2SC(shader_get_custom_defines, RID, Vector *) FUNC2(shader_remove_custom_define, RID, const String &) + FUNC1(set_shader_async_hidden_forbidden, bool) + /* COMMON MATERIAL API */ FUNCRID(material) diff --git a/servers/visual_server.cpp b/servers/visual_server.cpp index 2f960f5ef37..45e9bd55c61 100644 --- a/servers/visual_server.cpp +++ b/servers/visual_server.cpp @@ -30,9 +30,14 @@ #include "visual_server.h" +#include "core/engine.h" #include "core/method_bind_ext.gen.inc" #include "core/project_settings.h" +#ifdef TOOLS_ENABLED +#include "editor/editor_settings.h" +#endif + VisualServer *VisualServer::singleton = nullptr; VisualServer *(*VisualServer::create_func)() = nullptr; @@ -1868,6 +1873,7 @@ void VisualServer::_bind_methods() { ClassDB::bind_method(D_METHOD("shader_get_param_list", "shader"), &VisualServer::_shader_get_param_list_bind); ClassDB::bind_method(D_METHOD("shader_set_default_texture_param", "shader", "name", "texture"), &VisualServer::shader_set_default_texture_param); ClassDB::bind_method(D_METHOD("shader_get_default_texture_param", "shader", "name"), &VisualServer::shader_get_default_texture_param); + ClassDB::bind_method(D_METHOD("set_shader_async_hidden_forbidden", "forbidden"), &VisualServer::set_shader_async_hidden_forbidden); ClassDB::bind_method(D_METHOD("material_create"), &VisualServer::material_create); ClassDB::bind_method(D_METHOD("material_set_shader", "shader_material", "shader"), &VisualServer::material_set_shader); @@ -2582,6 +2588,16 @@ void VisualServer::set_render_loop_enabled(bool p_enabled) { render_loop_enabled = p_enabled; } +#ifdef DEBUG_ENABLED +bool VisualServer::is_force_shader_fallbacks_enabled() const { + return force_shader_fallbacks; +} + +void VisualServer::set_force_shader_fallbacks_enabled(bool p_enabled) { + force_shader_fallbacks = p_enabled; +} +#endif + VisualServer::VisualServer() { //ERR_FAIL_COND(singleton); singleton = this; @@ -2701,6 +2717,22 @@ VisualServer::VisualServer() { // Occlusion culling GLOBAL_DEF("rendering/misc/occlusion_culling/max_active_spheres", 8); ProjectSettings::get_singleton()->set_custom_property_info("rendering/misc/occlusion_culling/max_active_spheres", PropertyInfo(Variant::INT, "rendering/misc/occlusion_culling/max_active_spheres", PROPERTY_HINT_RANGE, "0,64")); + + // Async. compilation and caching +#ifdef DEBUG_ENABLED + if (!Engine::get_singleton()->is_editor_hint()) { + force_shader_fallbacks = GLOBAL_GET("rendering/gles3/shaders/debug_shader_fallbacks"); + } +#endif + GLOBAL_DEF("rendering/gles3/shaders/shader_compilation_mode", 0); + ProjectSettings::get_singleton()->set_custom_property_info("rendering/gles3/shaders/shader_compilation_mode", PropertyInfo(Variant::INT, "rendering/gles3/shaders/shader_compilation_mode", PROPERTY_HINT_ENUM, "Synchronous,Asynchronous,Asynchronous + Cache")); + GLOBAL_DEF("rendering/gles3/shaders/max_simultaneous_compiles", 2); + ProjectSettings::get_singleton()->set_custom_property_info("rendering/gles3/shaders/max_simultaneous_compiles", PropertyInfo(Variant::INT, "rendering/gles3/shaders/max_simultaneous_compiles", PROPERTY_HINT_RANGE, "1,8,1")); + GLOBAL_DEF("rendering/gles3/shaders/max_simultaneous_compiles.mobile", 1); + GLOBAL_DEF("rendering/gles3/shaders/log_active_async_compiles_count", false); + GLOBAL_DEF("rendering/gles3/shaders/shader_cache_size_mb", 512); + ProjectSettings::get_singleton()->set_custom_property_info("rendering/gles3/shaders/shader_cache_size_mb", PropertyInfo(Variant::INT, "rendering/gles3/shaders/shader_cache_size_mb", PROPERTY_HINT_RANGE, "128,4096,128")); + GLOBAL_DEF("rendering/gles3/shaders/shader_cache_size_mb.mobile", 128); } VisualServer::~VisualServer() { diff --git a/servers/visual_server.h b/servers/visual_server.h index 6540ea683d1..b6da7a91369 100644 --- a/servers/visual_server.h +++ b/servers/visual_server.h @@ -48,6 +48,9 @@ class VisualServer : public Object { int mm_policy; bool render_loop_enabled = true; +#ifdef DEBUG_ENABLED + bool force_shader_fallbacks = false; +#endif void _camera_set_orthogonal(RID p_camera, float p_size, float p_z_near, float p_z_far); void _canvas_item_add_style_box(RID p_item, const Rect2 &p_rect, const Rect2 &p_source, RID p_texture, const Vector &p_margins, const Color &p_modulate = Color(1, 1, 1)); @@ -204,6 +207,8 @@ public: virtual void shader_get_custom_defines(RID p_shader, Vector *p_defines) const = 0; virtual void shader_remove_custom_define(RID p_shader, const String &p_define) = 0; + virtual void set_shader_async_hidden_forbidden(bool p_forbidden) = 0; + /* COMMON MATERIAL API */ enum { @@ -1157,6 +1162,11 @@ public: bool is_render_loop_enabled() const; void set_render_loop_enabled(bool p_enabled); +#ifdef DEBUG_ENABLED + bool is_force_shader_fallbacks_enabled() const; + void set_force_shader_fallbacks_enabled(bool p_enabled); +#endif + VisualServer(); virtual ~VisualServer(); }; diff --git a/thirdparty/glad/glad.c b/thirdparty/glad/glad.c index dc1b8cb697e..836fbee09be 100644 --- a/thirdparty/glad/glad.c +++ b/thirdparty/glad/glad.c @@ -1,6 +1,6 @@ /* - OpenGL loader generated by glad 0.1.34 on Tue Nov 17 16:41:02 2020. + OpenGL loader generated by glad 0.1.34 on Fri Feb 19 21:01:51 2021. Language/Generator: C/C++ Specification: gl @@ -9,18 +9,21 @@ Extensions: GL_ARB_debug_output, GL_ARB_framebuffer_object, + GL_ARB_get_program_binary, + GL_ARB_parallel_shader_compile, GL_EXT_framebuffer_blit, GL_EXT_framebuffer_multisample, - GL_EXT_framebuffer_object + GL_EXT_framebuffer_object, + GL_KHR_parallel_shader_compile Loader: True Local files: False Omit khrplatform: False Reproducible: False Commandline: - --profile="compatibility" --api="gl=3.3" --generator="c" --spec="gl" --extensions="GL_ARB_debug_output,GL_ARB_framebuffer_object,GL_EXT_framebuffer_blit,GL_EXT_framebuffer_multisample,GL_EXT_framebuffer_object" + --profile="compatibility" --api="gl=3.3" --generator="c" --spec="gl" --extensions="GL_ARB_debug_output,GL_ARB_framebuffer_object,GL_ARB_get_program_binary,GL_ARB_parallel_shader_compile,GL_EXT_framebuffer_blit,GL_EXT_framebuffer_multisample,GL_EXT_framebuffer_object,GL_KHR_parallel_shader_compile" Online: - https://glad.dav1d.de/#profile=compatibility&language=c&specification=gl&loader=on&api=gl%3D3.3&extensions=GL_ARB_debug_output&extensions=GL_ARB_framebuffer_object&extensions=GL_EXT_framebuffer_blit&extensions=GL_EXT_framebuffer_multisample&extensions=GL_EXT_framebuffer_object + https://glad.dav1d.de/#profile=compatibility&language=c&specification=gl&loader=on&api=gl%3D3.3&extensions=GL_ARB_debug_output&extensions=GL_ARB_framebuffer_object&extensions=GL_ARB_get_program_binary&extensions=GL_ARB_parallel_shader_compile&extensions=GL_EXT_framebuffer_blit&extensions=GL_EXT_framebuffer_multisample&extensions=GL_EXT_framebuffer_object&extensions=GL_KHR_parallel_shader_compile */ #include @@ -997,13 +1000,20 @@ PFNGLWINDOWPOS3SPROC glad_glWindowPos3s = NULL; PFNGLWINDOWPOS3SVPROC glad_glWindowPos3sv = NULL; int GLAD_GL_ARB_debug_output = 0; int GLAD_GL_ARB_framebuffer_object = 0; +int GLAD_GL_ARB_get_program_binary = 0; +int GLAD_GL_ARB_parallel_shader_compile = 0; int GLAD_GL_EXT_framebuffer_blit = 0; int GLAD_GL_EXT_framebuffer_multisample = 0; int GLAD_GL_EXT_framebuffer_object = 0; +int GLAD_GL_KHR_parallel_shader_compile = 0; PFNGLDEBUGMESSAGECONTROLARBPROC glad_glDebugMessageControlARB = NULL; PFNGLDEBUGMESSAGEINSERTARBPROC glad_glDebugMessageInsertARB = NULL; PFNGLDEBUGMESSAGECALLBACKARBPROC glad_glDebugMessageCallbackARB = NULL; PFNGLGETDEBUGMESSAGELOGARBPROC glad_glGetDebugMessageLogARB = NULL; +PFNGLGETPROGRAMBINARYPROC glad_glGetProgramBinary = NULL; +PFNGLPROGRAMBINARYPROC glad_glProgramBinary = NULL; +PFNGLPROGRAMPARAMETERIPROC glad_glProgramParameteri = NULL; +PFNGLMAXSHADERCOMPILERTHREADSARBPROC glad_glMaxShaderCompilerThreadsARB = NULL; PFNGLBLITFRAMEBUFFEREXTPROC glad_glBlitFramebufferEXT = NULL; PFNGLRENDERBUFFERSTORAGEMULTISAMPLEEXTPROC glad_glRenderbufferStorageMultisampleEXT = NULL; PFNGLISRENDERBUFFEREXTPROC glad_glIsRenderbufferEXT = NULL; @@ -1023,6 +1033,7 @@ PFNGLFRAMEBUFFERTEXTURE3DEXTPROC glad_glFramebufferTexture3DEXT = NULL; PFNGLFRAMEBUFFERRENDERBUFFEREXTPROC glad_glFramebufferRenderbufferEXT = NULL; PFNGLGETFRAMEBUFFERATTACHMENTPARAMETERIVEXTPROC glad_glGetFramebufferAttachmentParameterivEXT = NULL; PFNGLGENERATEMIPMAPEXTPROC glad_glGenerateMipmapEXT = NULL; +PFNGLMAXSHADERCOMPILERTHREADSKHRPROC glad_glMaxShaderCompilerThreadsKHR = NULL; static void load_GL_VERSION_1_0(GLADloadproc load) { if(!GLAD_GL_VERSION_1_0) return; glad_glCullFace = (PFNGLCULLFACEPROC)load("glCullFace"); @@ -1816,6 +1827,16 @@ static void load_GL_ARB_framebuffer_object(GLADloadproc load) { glad_glRenderbufferStorageMultisample = (PFNGLRENDERBUFFERSTORAGEMULTISAMPLEPROC)load("glRenderbufferStorageMultisample"); glad_glFramebufferTextureLayer = (PFNGLFRAMEBUFFERTEXTURELAYERPROC)load("glFramebufferTextureLayer"); } +static void load_GL_ARB_get_program_binary(GLADloadproc load) { + if(!GLAD_GL_ARB_get_program_binary) return; + glad_glGetProgramBinary = (PFNGLGETPROGRAMBINARYPROC)load("glGetProgramBinary"); + glad_glProgramBinary = (PFNGLPROGRAMBINARYPROC)load("glProgramBinary"); + glad_glProgramParameteri = (PFNGLPROGRAMPARAMETERIPROC)load("glProgramParameteri"); +} +static void load_GL_ARB_parallel_shader_compile(GLADloadproc load) { + if(!GLAD_GL_ARB_parallel_shader_compile) return; + glad_glMaxShaderCompilerThreadsARB = (PFNGLMAXSHADERCOMPILERTHREADSARBPROC)load("glMaxShaderCompilerThreadsARB"); +} static void load_GL_EXT_framebuffer_blit(GLADloadproc load) { if(!GLAD_GL_EXT_framebuffer_blit) return; glad_glBlitFramebufferEXT = (PFNGLBLITFRAMEBUFFEREXTPROC)load("glBlitFramebufferEXT"); @@ -1844,13 +1865,20 @@ static void load_GL_EXT_framebuffer_object(GLADloadproc load) { glad_glGetFramebufferAttachmentParameterivEXT = (PFNGLGETFRAMEBUFFERATTACHMENTPARAMETERIVEXTPROC)load("glGetFramebufferAttachmentParameterivEXT"); glad_glGenerateMipmapEXT = (PFNGLGENERATEMIPMAPEXTPROC)load("glGenerateMipmapEXT"); } +static void load_GL_KHR_parallel_shader_compile(GLADloadproc load) { + if(!GLAD_GL_KHR_parallel_shader_compile) return; + glad_glMaxShaderCompilerThreadsKHR = (PFNGLMAXSHADERCOMPILERTHREADSKHRPROC)load("glMaxShaderCompilerThreadsKHR"); +} static int find_extensionsGL(void) { if (!get_exts()) return 0; GLAD_GL_ARB_debug_output = has_ext("GL_ARB_debug_output"); GLAD_GL_ARB_framebuffer_object = has_ext("GL_ARB_framebuffer_object"); + GLAD_GL_ARB_get_program_binary = has_ext("GL_ARB_get_program_binary"); + GLAD_GL_ARB_parallel_shader_compile = has_ext("GL_ARB_parallel_shader_compile"); GLAD_GL_EXT_framebuffer_blit = has_ext("GL_EXT_framebuffer_blit"); GLAD_GL_EXT_framebuffer_multisample = has_ext("GL_EXT_framebuffer_multisample"); GLAD_GL_EXT_framebuffer_object = has_ext("GL_EXT_framebuffer_object"); + GLAD_GL_KHR_parallel_shader_compile = has_ext("GL_KHR_parallel_shader_compile"); free_exts(); return 1; } @@ -1931,9 +1959,11 @@ int gladLoadGLLoader(GLADloadproc load) { if (!find_extensionsGL()) return 0; load_GL_ARB_debug_output(load); load_GL_ARB_framebuffer_object(load); + load_GL_ARB_get_program_binary(load); + load_GL_ARB_parallel_shader_compile(load); load_GL_EXT_framebuffer_blit(load); load_GL_EXT_framebuffer_multisample(load); load_GL_EXT_framebuffer_object(load); + load_GL_KHR_parallel_shader_compile(load); return GLVersion.major != 0 || GLVersion.minor != 0; } - diff --git a/thirdparty/glad/glad/glad.h b/thirdparty/glad/glad/glad.h index f211e6aa570..b328522e127 100644 --- a/thirdparty/glad/glad/glad.h +++ b/thirdparty/glad/glad/glad.h @@ -1,6 +1,6 @@ /* - OpenGL loader generated by glad 0.1.34 on Tue Nov 17 16:41:02 2020. + OpenGL loader generated by glad 0.1.34 on Fri Feb 19 21:01:51 2021. Language/Generator: C/C++ Specification: gl @@ -9,18 +9,21 @@ Extensions: GL_ARB_debug_output, GL_ARB_framebuffer_object, + GL_ARB_get_program_binary, + GL_ARB_parallel_shader_compile, GL_EXT_framebuffer_blit, GL_EXT_framebuffer_multisample, - GL_EXT_framebuffer_object + GL_EXT_framebuffer_object, + GL_KHR_parallel_shader_compile Loader: True Local files: False Omit khrplatform: False Reproducible: False Commandline: - --profile="compatibility" --api="gl=3.3" --generator="c" --spec="gl" --extensions="GL_ARB_debug_output,GL_ARB_framebuffer_object,GL_EXT_framebuffer_blit,GL_EXT_framebuffer_multisample,GL_EXT_framebuffer_object" + --profile="compatibility" --api="gl=3.3" --generator="c" --spec="gl" --extensions="GL_ARB_debug_output,GL_ARB_framebuffer_object,GL_ARB_get_program_binary,GL_ARB_parallel_shader_compile,GL_EXT_framebuffer_blit,GL_EXT_framebuffer_multisample,GL_EXT_framebuffer_object,GL_KHR_parallel_shader_compile" Online: - https://glad.dav1d.de/#profile=compatibility&language=c&specification=gl&loader=on&api=gl%3D3.3&extensions=GL_ARB_debug_output&extensions=GL_ARB_framebuffer_object&extensions=GL_EXT_framebuffer_blit&extensions=GL_EXT_framebuffer_multisample&extensions=GL_EXT_framebuffer_object + https://glad.dav1d.de/#profile=compatibility&language=c&specification=gl&loader=on&api=gl%3D3.3&extensions=GL_ARB_debug_output&extensions=GL_ARB_framebuffer_object&extensions=GL_ARB_get_program_binary&extensions=GL_ARB_parallel_shader_compile&extensions=GL_EXT_framebuffer_blit&extensions=GL_EXT_framebuffer_multisample&extensions=GL_EXT_framebuffer_object&extensions=GL_KHR_parallel_shader_compile */ @@ -3629,6 +3632,12 @@ GLAPI PFNGLSECONDARYCOLORP3UIVPROC glad_glSecondaryColorP3uiv; #define GL_DEBUG_SEVERITY_HIGH_ARB 0x9146 #define GL_DEBUG_SEVERITY_MEDIUM_ARB 0x9147 #define GL_DEBUG_SEVERITY_LOW_ARB 0x9148 +#define GL_PROGRAM_BINARY_RETRIEVABLE_HINT 0x8257 +#define GL_PROGRAM_BINARY_LENGTH 0x8741 +#define GL_NUM_PROGRAM_BINARY_FORMATS 0x87FE +#define GL_PROGRAM_BINARY_FORMATS 0x87FF +#define GL_MAX_SHADER_COMPILER_THREADS_ARB 0x91B0 +#define GL_COMPLETION_STATUS_ARB 0x91B1 #define GL_READ_FRAMEBUFFER_EXT 0x8CA8 #define GL_DRAW_FRAMEBUFFER_EXT 0x8CA9 #define GL_DRAW_FRAMEBUFFER_BINDING_EXT 0x8CA6 @@ -3687,6 +3696,8 @@ GLAPI PFNGLSECONDARYCOLORP3UIVPROC glad_glSecondaryColorP3uiv; #define GL_RENDERBUFFER_ALPHA_SIZE_EXT 0x8D53 #define GL_RENDERBUFFER_DEPTH_SIZE_EXT 0x8D54 #define GL_RENDERBUFFER_STENCIL_SIZE_EXT 0x8D55 +#define GL_MAX_SHADER_COMPILER_THREADS_KHR 0x91B0 +#define GL_COMPLETION_STATUS_KHR 0x91B1 #ifndef GL_ARB_debug_output #define GL_ARB_debug_output 1 GLAPI int GLAD_GL_ARB_debug_output; @@ -3707,6 +3718,26 @@ GLAPI PFNGLGETDEBUGMESSAGELOGARBPROC glad_glGetDebugMessageLogARB; #define GL_ARB_framebuffer_object 1 GLAPI int GLAD_GL_ARB_framebuffer_object; #endif +#ifndef GL_ARB_get_program_binary +#define GL_ARB_get_program_binary 1 +GLAPI int GLAD_GL_ARB_get_program_binary; +typedef void (APIENTRYP PFNGLGETPROGRAMBINARYPROC)(GLuint program, GLsizei bufSize, GLsizei *length, GLenum *binaryFormat, void *binary); +GLAPI PFNGLGETPROGRAMBINARYPROC glad_glGetProgramBinary; +#define glGetProgramBinary glad_glGetProgramBinary +typedef void (APIENTRYP PFNGLPROGRAMBINARYPROC)(GLuint program, GLenum binaryFormat, const void *binary, GLsizei length); +GLAPI PFNGLPROGRAMBINARYPROC glad_glProgramBinary; +#define glProgramBinary glad_glProgramBinary +typedef void (APIENTRYP PFNGLPROGRAMPARAMETERIPROC)(GLuint program, GLenum pname, GLint value); +GLAPI PFNGLPROGRAMPARAMETERIPROC glad_glProgramParameteri; +#define glProgramParameteri glad_glProgramParameteri +#endif +#ifndef GL_ARB_parallel_shader_compile +#define GL_ARB_parallel_shader_compile 1 +GLAPI int GLAD_GL_ARB_parallel_shader_compile; +typedef void (APIENTRYP PFNGLMAXSHADERCOMPILERTHREADSARBPROC)(GLuint count); +GLAPI PFNGLMAXSHADERCOMPILERTHREADSARBPROC glad_glMaxShaderCompilerThreadsARB; +#define glMaxShaderCompilerThreadsARB glad_glMaxShaderCompilerThreadsARB +#endif #ifndef GL_EXT_framebuffer_blit #define GL_EXT_framebuffer_blit 1 GLAPI int GLAD_GL_EXT_framebuffer_blit; @@ -3776,6 +3807,13 @@ typedef void (APIENTRYP PFNGLGENERATEMIPMAPEXTPROC)(GLenum target); GLAPI PFNGLGENERATEMIPMAPEXTPROC glad_glGenerateMipmapEXT; #define glGenerateMipmapEXT glad_glGenerateMipmapEXT #endif +#ifndef GL_KHR_parallel_shader_compile +#define GL_KHR_parallel_shader_compile 1 +GLAPI int GLAD_GL_KHR_parallel_shader_compile; +typedef void (APIENTRYP PFNGLMAXSHADERCOMPILERTHREADSKHRPROC)(GLuint count); +GLAPI PFNGLMAXSHADERCOMPILERTHREADSKHRPROC glad_glMaxShaderCompilerThreadsKHR; +#define glMaxShaderCompilerThreadsKHR glad_glMaxShaderCompilerThreadsKHR +#endif #ifdef __cplusplus }