From a2ed82d3b20d931f872e2edc7ddd672544ed7947 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pedro=20J=2E=20Est=C3=A9banez?= Date: Sun, 7 Aug 2022 22:53:33 +0200 Subject: [PATCH 1/3] Fix shadow flickering with async shader compilation This mostly reverts the approach in #62628, which now the problem is better scoped, looks overengineered and instead focuses on the few cases where there's something to take care of. --- drivers/gles3/rasterizer_scene_gles3.cpp | 16 ++++++++- drivers/gles3/rasterizer_storage_gles3.cpp | 2 +- drivers/gles3/shader_gles3.cpp | 41 ++++++---------------- drivers/gles3/shader_gles3.h | 9 ++--- gles_builders.py | 16 --------- 5 files changed, 30 insertions(+), 54 deletions(-) diff --git a/drivers/gles3/rasterizer_scene_gles3.cpp b/drivers/gles3/rasterizer_scene_gles3.cpp index 3ab1f447e1d..e89aef920ec 100644 --- a/drivers/gles3/rasterizer_scene_gles3.cpp +++ b/drivers/gles3/rasterizer_scene_gles3.cpp @@ -4114,6 +4114,13 @@ void RasterizerSceneGLES3::render_scene(const Transform &p_cam_transform, const glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_COMPARE_FUNC, GL_LESS); state.ubo_data.shadow_atlas_pixel_size[0] = 1.0 / shadow_atlas->size; state.ubo_data.shadow_atlas_pixel_size[1] = 1.0 / shadow_atlas->size; + } else { + if (storage->config.async_compilation_enabled) { + // Avoid GL UB message id 131222 caused by shadow samplers not properly set up in the ubershader + glActiveTexture(GL_TEXTURE0 + storage->config.max_texture_image_units - 6); + glBindTexture(GL_TEXTURE_2D, storage->resources.depth_tex); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_COMPARE_MODE, GL_COMPARE_REF_TO_TEXTURE); + } } if (reflection_atlas && reflection_atlas->size) { @@ -4869,6 +4876,13 @@ void RasterizerSceneGLES3::render_shadow(RID p_light, RID p_shadow_atlas, int p_ state.ubo_data.shadow_dual_paraboloid_render_zfar = zfar; state.ubo_data.opaque_prepass_threshold = 0.1; + if (storage->config.async_compilation_enabled) { + // Avoid GL UB message id 131222 caused by shadow samplers not properly set up in the ubershader + glActiveTexture(GL_TEXTURE0 + storage->config.max_texture_image_units - 6); + glBindTexture(GL_TEXTURE_2D, storage->resources.depth_tex); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_COMPARE_MODE, GL_COMPARE_REF_TO_TEXTURE); + } + _setup_environment(nullptr, light_projection, light_transform); state.scene_shader.set_conditional(SceneShaderGLES3::RENDER_DEPTH, true); @@ -5300,7 +5314,7 @@ void RasterizerSceneGLES3::initialize() { glFrontFace(GL_CW); if (storage->config.async_compilation_enabled) { - state.scene_shader.init_async_compilation(storage->resources.depth_tex); + state.scene_shader.init_async_compilation(); } } diff --git a/drivers/gles3/rasterizer_storage_gles3.cpp b/drivers/gles3/rasterizer_storage_gles3.cpp index b992292f3cf..3f4ef7228a5 100644 --- a/drivers/gles3/rasterizer_storage_gles3.cpp +++ b/drivers/gles3/rasterizer_storage_gles3.cpp @@ -8369,7 +8369,7 @@ void RasterizerStorageGLES3::initialize() { shaders.cubemap_filter.set_conditional(CubemapFilterShaderGLES3::LOW_QUALITY, !ggx_hq); shaders.particles.init(); if (config.async_compilation_enabled) { - shaders.particles.init_async_compilation(resources.depth_tex); + shaders.particles.init_async_compilation(); } #ifdef GLES_OVER_GL diff --git a/drivers/gles3/shader_gles3.cpp b/drivers/gles3/shader_gles3.cpp index 388c3762ffc..4331b7c81db 100644 --- a/drivers/gles3/shader_gles3.cpp +++ b/drivers/gles3/shader_gles3.cpp @@ -177,45 +177,29 @@ bool ShaderGLES3::is_custom_code_ready_for_render(uint32_t p_code_id) { return true; } -bool ShaderGLES3::_bind_ubershader() { +bool ShaderGLES3::_bind_ubershader(bool p_for_warmup) { #ifdef DEBUG_ENABLED ERR_FAIL_COND_V(!is_async_compilation_supported(), false); ERR_FAIL_COND_V(get_ubershader_flags_uniform() == -1, false); #endif new_conditional_version.version |= VersionKey::UBERSHADER_FLAG; bool bound = _bind(true); + new_conditional_version.version &= ~VersionKey::UBERSHADER_FLAG; + if (p_for_warmup) { + // Avoid GL UB message id 131222 caused by shadow samplers not properly set up yet + unbind(); + return bound; + } int conditionals_uniform = _get_uniform(get_ubershader_flags_uniform()); #ifdef DEBUG_ENABLED ERR_FAIL_COND_V(conditionals_uniform == -1, false); #endif - new_conditional_version.version &= ~VersionKey::UBERSHADER_FLAG; #ifdef DEV_ENABLED // So far we don't need bit 31 for conditionals. That allows us to use signed integers, // which are more compatible across GL driver vendors. CRASH_COND(new_conditional_version.version >= 0x80000000); #endif glUniform1i(conditionals_uniform, new_conditional_version.version); - - // This is done to avoid running into the GL UB message id 131222. Long explanation: - // If an ubershader has shadow samplers, they are generally not used if the current material has shadowing disabled, - // but that also implies the rasterizer won't do any preparation to the relevant shadow samplers (which won't really exist, - // so that's the correct way in a conditioned shader). - // However, in the case of the ubershader those shadow samplers are unconditionally declared, although potentially unused and - // thus "uninitialized". Sampling in that situation (compare disabled, no depth texture bound) is undefined behavior for GL. - // And that's a problem for us because, even if dynamic branching will serve to avoid using the unprepared sampler when shadowing - // is not enabled, the GPU may still run the other branch. And it's not just that the results from the sampling are undefined - // (that wouldn't be a problem and we could just ignore the warning); the problem is that sampling in that state is fully UB. - for (int i = 0; i < shadow_texunit_count; i++) { - int unit = shadow_texunits[i]; - if (unit >= 0) { - glActiveTexture(GL_TEXTURE0 + unit); - } else { - glActiveTexture(GL_TEXTURE0 + max_image_units + unit); - } - glBindTexture(GL_TEXTURE_2D, depth_tex); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_COMPARE_MODE, GL_COMPARE_REF_TO_TEXTURE); - } - return bound; } @@ -1128,7 +1112,7 @@ GLint ShaderGLES3::get_uniform_location(const String &p_name) const { return glGetUniformLocation(version->ids.main, p_name.ascii().get_data()); } -void ShaderGLES3::setup(const char **p_conditional_defines, int p_conditional_count, const char **p_uniform_names, int p_uniform_count, const AttributePair *p_attribute_pairs, int p_attribute_count, const TexUnitPair *p_texunit_pairs, int p_texunit_pair_count, const int *p_shadow_texunits, int p_shadow_texunit_count, const UBOPair *p_ubo_pairs, int p_ubo_pair_count, const Feedback *p_feedback, int p_feedback_count, const char *p_vertex_code, const char *p_fragment_code, int p_vertex_code_start, int p_fragment_code_start) { +void ShaderGLES3::setup(const char **p_conditional_defines, int p_conditional_count, const char **p_uniform_names, int p_uniform_count, const AttributePair *p_attribute_pairs, int p_attribute_count, const TexUnitPair *p_texunit_pairs, int p_texunit_pair_count, const UBOPair *p_ubo_pairs, int p_ubo_pair_count, const Feedback *p_feedback, int p_feedback_count, const char *p_vertex_code, const char *p_fragment_code, int p_vertex_code_start, int p_fragment_code_start) { ERR_FAIL_COND(version); conditional_version.key = 0; new_conditional_version.key = 0; @@ -1140,8 +1124,6 @@ void ShaderGLES3::setup(const char **p_conditional_defines, int p_conditional_co fragment_code = p_fragment_code; texunit_pairs = p_texunit_pairs; texunit_pair_count = p_texunit_pair_count; - shadow_texunits = p_shadow_texunits; - shadow_texunit_count = p_shadow_texunit_count; vertex_code_start = p_vertex_code_start; fragment_code_start = p_fragment_code_start; attribute_pairs = p_attribute_pairs; @@ -1232,12 +1214,11 @@ void ShaderGLES3::setup(const char **p_conditional_defines, int p_conditional_co glGetIntegerv(GL_MAX_TEXTURE_IMAGE_UNITS, &max_image_units); } -void ShaderGLES3::init_async_compilation(GLuint p_depth_tex) { - depth_tex = p_depth_tex; +void ShaderGLES3::init_async_compilation() { if (is_async_compilation_supported() && get_ubershader_flags_uniform() != -1) { // Warm up the ubershader for the case of no custom code new_conditional_version.code_version = 0; - _bind_ubershader(); + _bind_ubershader(true); } } @@ -1297,7 +1278,7 @@ void ShaderGLES3::set_custom_shader_code(uint32_t p_code_id, const String &p_ver if (p_async_mode == ASYNC_MODE_VISIBLE && is_async_compilation_supported() && get_ubershader_flags_uniform() != -1) { // Warm up the ubershader for this custom code new_conditional_version.code_version = p_code_id; - _bind_ubershader(); + _bind_ubershader(true); } } diff --git a/drivers/gles3/shader_gles3.h b/drivers/gles3/shader_gles3.h index 38050f02231..9e44559e006 100644 --- a/drivers/gles3/shader_gles3.h +++ b/drivers/gles3/shader_gles3.h @@ -96,7 +96,6 @@ private: //@TODO Optimize to a fixed set of shader pools and use a LRU int uniform_count; int texunit_pair_count; - int shadow_texunit_count; int conditional_count; int ubo_count; int feedback_count; @@ -246,7 +245,6 @@ private: const char **uniform_names; const AttributePair *attribute_pairs; const TexUnitPair *texunit_pairs; - const int *shadow_texunits; const UBOPair *ubo_pairs; const Feedback *feedbacks; const char *vertex_code; @@ -280,7 +278,6 @@ private: static ShaderGLES3 *active; int max_image_units; - GLuint depth_tex = 0; _FORCE_INLINE_ void _set_uniform_variant(GLint p_uniform, const Variant &p_value) { if (p_uniform < 0) { @@ -372,13 +369,13 @@ private: } bool _bind(bool p_binding_fallback); - bool _bind_ubershader(); + bool _bind_ubershader(bool p_for_warmrup = false); protected: _FORCE_INLINE_ int _get_uniform(int p_which) const; _FORCE_INLINE_ void _set_conditional(int p_which, bool p_value); - void setup(const char **p_conditional_defines, int p_conditional_count, const char **p_uniform_names, int p_uniform_count, const AttributePair *p_attribute_pairs, int p_attribute_count, const TexUnitPair *p_texunit_pairs, int p_texunit_pair_count, const int *p_shadow_texunits, int p_shadow_texunit_count, const UBOPair *p_ubo_pairs, int p_ubo_pair_count, const Feedback *p_feedback, int p_feedback_count, const char *p_vertex_code, const char *p_fragment_code, int p_vertex_code_start, int p_fragment_code_start); + void setup(const char **p_conditional_defines, int p_conditional_count, const char **p_uniform_names, int p_uniform_count, const AttributePair *p_attribute_pairs, int p_attribute_count, const TexUnitPair *p_texunit_pairs, int p_texunit_pair_count, const UBOPair *p_ubo_pairs, int p_ubo_pair_count, const Feedback *p_feedback, int p_feedback_count, const char *p_vertex_code, const char *p_fragment_code, int p_vertex_code_start, int p_fragment_code_start); ShaderGLES3(); @@ -407,7 +404,7 @@ public: _FORCE_INLINE_ bool is_version_valid() const { return version && version->compile_status == Version::COMPILE_STATUS_OK; } virtual void init() = 0; - void init_async_compilation(GLuint p_depth_tex); + void init_async_compilation(); bool is_async_compilation_supported(); void finish(); diff --git a/gles_builders.py b/gles_builders.py index 8afc9a49929..ef0cb811f57 100644 --- a/gles_builders.py +++ b/gles_builders.py @@ -19,7 +19,6 @@ class LegacyGLHeaderStruct: self.enums = {} self.texunits = [] self.texunit_names = [] - self.shadow_texunits = [] self.ubos = [] self.ubo_names = [] @@ -107,8 +106,6 @@ def include_file_in_legacygl_header(filename, header_data, depth): if not x in header_data.texunit_names: header_data.texunits += [(x, texunit)] - if line.find("sampler2DShadow") != -1: - header_data.shadow_texunits += [texunit] header_data.texunit_names += [x] elif line.find("uniform") != -1 and line.lower().find("ubo:") != -1: @@ -486,15 +483,6 @@ def build_legacygl_header(filename, include, class_suffix, output_attribs, gles2 else: fd.write("\t\tstatic TexUnitPair *_texunit_pairs=NULL;\n") - if not gles2: - if header_data.shadow_texunits: - fd.write("\t\tstatic int _shadow_texunits[]={") - for x in header_data.shadow_texunits: - fd.write(str(x) + ',') - fd.write("};\n\n") - else: - fd.write("\t\tstatic int *_shadow_texunits=NULL;\n") - if not gles2 and header_data.ubos: fd.write("\t\tstatic UBOPair _ubo_pairs[]={\n") for x in header_data.ubos: @@ -549,8 +537,6 @@ def build_legacygl_header(filename, include, class_suffix, output_attribs, gles2 + str(len(header_data.attributes)) + ", _texunit_pairs," + str(len(header_data.texunits)) - + ", _shadow_texunits," - + str(len(header_data.shadow_texunits)) + ",_ubo_pairs," + str(len(header_data.ubos)) + ",_feedbacks," @@ -580,8 +566,6 @@ def build_legacygl_header(filename, include, class_suffix, output_attribs, gles2 + str(len(header_data.uniforms)) + ",_texunit_pairs," + str(len(header_data.texunits)) - + ",_shadow_texunits," - + str(len(header_data.shadow_texunits)) + ",_enums," + str(len(header_data.enums)) + ",_enum_values," From ea6ed9658dec58b22b594d041800e87d891411fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pedro=20J=2E=20Est=C3=A9banez?= Date: Sun, 7 Aug 2022 23:11:44 +0200 Subject: [PATCH 2/3] Fix emission not working in the ubershader --- drivers/gles3/shaders/scene.glsl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gles3/shaders/scene.glsl b/drivers/gles3/shaders/scene.glsl index 15491911dcb..a45a363c5ad 100644 --- a/drivers/gles3/shaders/scene.glsl +++ b/drivers/gles3/shaders/scene.glsl @@ -2376,9 +2376,9 @@ FRAGMENT_SHADER_CODE float max_ambient = max(ambient_light.r, max(ambient_light.g, ambient_light.b)); float max_diffuse = max(diffuse_light.r, max(diffuse_light.g, diffuse_light.b)); float total_ambient = max_ambient + max_diffuse; -#ifdef USE_FORWARD_LIGHTING +#ifdef USE_FORWARD_LIGHTING //ubershader-runtime total_ambient += max_emission; -#endif +#endif //ubershader-runtime float ambient_scale = (total_ambient > 0.0) ? (max_ambient + ambient_occlusion_affect_light * max_diffuse) / total_ambient : 0.0; #if defined(ENABLE_AO) @@ -2387,9 +2387,9 @@ FRAGMENT_SHADER_CODE diffuse_buffer = vec4(diffuse_light + ambient_light, ambient_scale); specular_buffer = vec4(specular_light, metallic); -#ifdef USE_FORWARD_LIGHTING +#ifdef USE_FORWARD_LIGHTING //ubershader-runtime diffuse_buffer.rgb += emission; -#endif +#endif //ubershader-runtime #endif //SHADELESS //ubershader-runtime normal_mr_buffer = vec4(normalize(normal) * 0.5 + 0.5, roughness); @@ -2404,9 +2404,9 @@ FRAGMENT_SHADER_CODE frag_color = vec4(albedo, alpha); #else //ubershader-runtime frag_color = vec4(ambient_light + diffuse_light + specular_light, alpha); -#ifdef USE_FORWARD_LIGHTING +#ifdef USE_FORWARD_LIGHTING //ubershader-runtime frag_color.rgb += emission; -#endif +#endif //ubershader-runtime #endif //SHADELESS //ubershader-runtime #endif //USE_MULTIPLE_RENDER_TARGETS //ubershader-runtime From edb140839ef047ead1d9b4245fb43f7f7ac49ce9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pedro=20J=2E=20Est=C3=A9banez?= Date: Mon, 8 Aug 2022 10:47:01 +0200 Subject: [PATCH 3/3] Fix GI probes not working in the ubershader --- drivers/gles3/rasterizer_scene_gles3.cpp | 7 +++++-- drivers/gles3/shader_gles3.cpp | 5 +++++ drivers/gles3/shader_gles3.h | 1 - drivers/gles3/shaders/scene.glsl | 4 ++-- 4 files changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/gles3/rasterizer_scene_gles3.cpp b/drivers/gles3/rasterizer_scene_gles3.cpp index e89aef920ec..584567c49d8 100644 --- a/drivers/gles3/rasterizer_scene_gles3.cpp +++ b/drivers/gles3/rasterizer_scene_gles3.cpp @@ -1855,7 +1855,10 @@ void RasterizerSceneGLES3::_setup_light(RenderList::Element *e, const Transform // Normally, lightmapping uses the same texturing units than the GI probes; however, in the case of the ubershader // that's not a good idea because some hardware/drivers (Android/Intel) may fail to render if a single texturing unit // is used through multiple kinds of samplers in the same shader. - if (state.scene_shader.is_version_ubershader()) { + // Moreover, since we don't know at this point if we are going to consume these textures from the ubershader or + // a conditioned one, the fact that async compilation is enabled is enough for us to switch to the alternative + // arrangement of texturing units. + if (storage->config.async_compilation_enabled) { glActiveTexture(GL_TEXTURE0 + storage->config.max_texture_image_units - 12); } else { glActiveTexture(GL_TEXTURE0 + storage->config.max_texture_image_units - 10); @@ -1871,7 +1874,7 @@ void RasterizerSceneGLES3::_setup_light(RenderList::Element *e, const Transform if (gi_probe_count > 1) { GIProbeInstance *gipi2 = gi_probe_instance_owner.getptr(ridp[1]); - if (state.scene_shader.is_version_ubershader()) { + if (storage->config.async_compilation_enabled) { glActiveTexture(GL_TEXTURE0 + storage->config.max_texture_image_units - 13); } else { glActiveTexture(GL_TEXTURE0 + storage->config.max_texture_image_units - 11); diff --git a/drivers/gles3/shader_gles3.cpp b/drivers/gles3/shader_gles3.cpp index 4331b7c81db..d8afc831247 100644 --- a/drivers/gles3/shader_gles3.cpp +++ b/drivers/gles3/shader_gles3.cpp @@ -582,6 +582,11 @@ ShaderGLES3::Version *ShaderGLES3::get_current_version(bool &r_async_forbidden) strings_common.push_back("\n"); } + if (is_async_compilation_supported() && get_ubershader_flags_uniform() != -1) { + // Indicate that this shader may be used both as ubershader and conditioned during the session + strings_common.push_back("#define UBERSHADER_COMPAT\n"); + } + LocalVector flag_macros; bool build_ubershader = get_ubershader_flags_uniform() != -1 && (effective_version.version & VersionKey::UBERSHADER_FLAG); if (build_ubershader) { diff --git a/drivers/gles3/shader_gles3.h b/drivers/gles3/shader_gles3.h index 9e44559e006..924a18e4608 100644 --- a/drivers/gles3/shader_gles3.h +++ b/drivers/gles3/shader_gles3.h @@ -400,7 +400,6 @@ public: bool is_custom_code_ready_for_render(uint32_t p_code_id); uint32_t get_version() const { return new_conditional_version.version; } - bool is_version_ubershader() const { return (new_conditional_version.version & VersionKey::UBERSHADER_FLAG); } _FORCE_INLINE_ bool is_version_valid() const { return version && version->compile_status == Version::COMPILE_STATUS_OK; } virtual void init() = 0; diff --git a/drivers/gles3/shaders/scene.glsl b/drivers/gles3/shaders/scene.glsl index a45a363c5ad..038e5fab454 100644 --- a/drivers/gles3/shaders/scene.glsl +++ b/drivers/gles3/shaders/scene.glsl @@ -1649,7 +1649,7 @@ uniform mediump vec4[12] lightmap_captures; #ifdef USE_GI_PROBES //ubershader-skip -#if !defined(IS_UBERSHADER) +#if !defined(UBERSHADER_COMPAT) uniform mediump sampler3D gi_probe1; //texunit:-10 #else uniform mediump sampler3D gi_probe1_uber; //texunit:-12 @@ -1663,7 +1663,7 @@ uniform highp float gi_probe_bias1; uniform highp float gi_probe_normal_bias1; uniform bool gi_probe_blend_ambient1; -#if !defined(IS_UBERSHADER) +#if !defined(UBERSHADER_COMPAT) uniform mediump sampler3D gi_probe2; //texunit:-11 #else uniform mediump sampler3D gi_probe2_uber; //texunit:-13