Merge pull request #76832 from RandomShaper/cluster_render_prevail

Save cluster render shader from being optimized out entirely
This commit is contained in:
Rémi Verschelde 2023-05-09 10:46:23 +02:00
commit 668cf3c66f
No known key found for this signature in database
GPG key ID: C3336907360768E1
4 changed files with 36 additions and 7 deletions

View file

@ -9380,6 +9380,9 @@ bool RenderingDeviceVulkan::has_feature(const Features p_feature) const {
VulkanContext::VRSCapabilities vrs_capabilities = context->get_vrs_capabilities();
return vrs_capabilities.attachment_vrs_supported && context->get_physical_device_features().shaderStorageImageExtendedFormats;
} break;
case SUPPORTS_FRAGMENT_SHADER_WITH_ONLY_SIDE_EFFECTS: {
return true;
} break;
default: {
return false;
}

View file

@ -47,15 +47,29 @@ ClusterBuilderSharedDataRD::ClusterBuilderSharedDataRD() {
}
{
RD::FramebufferFormatID fb_format;
RD::PipelineColorBlendState blend_state;
String defines;
if (RD::get_singleton()->has_feature(RD::SUPPORTS_FRAGMENT_SHADER_WITH_ONLY_SIDE_EFFECTS)) {
fb_format = RD::get_singleton()->framebuffer_format_create_empty();
blend_state = RD::PipelineColorBlendState::create_disabled();
} else {
Vector<RD::AttachmentFormat> afs;
afs.push_back(RD::AttachmentFormat());
afs.write[0].usage_flags = RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT;
fb_format = RD::get_singleton()->framebuffer_format_create(afs);
defines = "\n#define USE_ATTACHMENT\n";
}
Vector<String> versions;
versions.push_back("");
cluster_render.cluster_render_shader.initialize(versions);
cluster_render.cluster_render_shader.initialize(versions, defines);
cluster_render.shader_version = cluster_render.cluster_render_shader.version_create();
cluster_render.shader = cluster_render.cluster_render_shader.version_get_shader(cluster_render.shader_version, 0);
cluster_render.shader_pipelines[ClusterRender::PIPELINE_NORMAL] = RD::get_singleton()->render_pipeline_create(cluster_render.shader, RD::get_singleton()->framebuffer_format_create_empty(), vertex_format, RD::RENDER_PRIMITIVE_TRIANGLES, RD::PipelineRasterizationState(), RD::PipelineMultisampleState(), RD::PipelineDepthStencilState(), RD::PipelineColorBlendState(), 0);
cluster_render.shader_pipelines[ClusterRender::PIPELINE_NORMAL] = RD::get_singleton()->render_pipeline_create(cluster_render.shader, fb_format, vertex_format, RD::RENDER_PRIMITIVE_TRIANGLES, RD::PipelineRasterizationState(), RD::PipelineMultisampleState(), RD::PipelineDepthStencilState(), blend_state, 0);
RD::PipelineMultisampleState ms;
ms.sample_count = RD::TEXTURE_SAMPLES_4;
cluster_render.shader_pipelines[ClusterRender::PIPELINE_MSAA] = RD::get_singleton()->render_pipeline_create(cluster_render.shader, RD::get_singleton()->framebuffer_format_create_empty(), vertex_format, RD::RENDER_PRIMITIVE_TRIANGLES, RD::PipelineRasterizationState(), ms, RD::PipelineDepthStencilState(), RD::PipelineColorBlendState(), 0);
cluster_render.shader_pipelines[ClusterRender::PIPELINE_MSAA] = RD::get_singleton()->render_pipeline_create(cluster_render.shader, fb_format, vertex_format, RD::RENDER_PRIMITIVE_TRIANGLES, RD::PipelineRasterizationState(), ms, RD::PipelineDepthStencilState(), blend_state, 0);
}
{
Vector<String> versions;

View file

@ -100,6 +100,10 @@ layout(set = 0, binding = 3, std430) buffer restrict ClusterRender {
}
cluster_render;
#ifdef USE_ATTACHMENT
layout(location = 0) out vec4 frag_color;
#endif
void main() {
//convert from screen to cluster
uvec2 cluster = uvec2(gl_FragCoord.xy) >> state.screen_to_clusters_shift;
@ -113,6 +117,8 @@ void main() {
uint usage_write_offset = cluster_offset + (element_index >> 5);
uint usage_write_bit = 1 << (element_index & 0x1F);
uint aux = 0;
#ifdef USE_SUBGROUPS
uint cluster_thread_group_index;
@ -138,7 +144,7 @@ void main() {
cluster_thread_group_index = subgroupBallotExclusiveBitCount(mask);
if (cluster_thread_group_index == 0) {
atomicOr(cluster_render.data[usage_write_offset], usage_write_bit);
aux = atomicOr(cluster_render.data[usage_write_offset], usage_write_bit);
}
}
#else
@ -147,7 +153,7 @@ void main() {
if (!gl_HelperInvocation)
#endif
{
atomicOr(cluster_render.data[usage_write_offset], usage_write_bit);
aux = atomicOr(cluster_render.data[usage_write_offset], usage_write_bit);
}
#endif
//find the current element in the depth usage list and mark the current depth as used
@ -162,7 +168,7 @@ void main() {
if (!gl_HelperInvocation) {
z_write_bit = subgroupOr(z_write_bit); //merge all Zs
if (cluster_thread_group_index == 0) {
atomicOr(cluster_render.data[z_write_offset], z_write_bit);
aux = atomicOr(cluster_render.data[z_write_offset], z_write_bit);
}
}
#else
@ -171,7 +177,11 @@ void main() {
if (!gl_HelperInvocation)
#endif
{
atomicOr(cluster_render.data[z_write_offset], z_write_bit);
aux = atomicOr(cluster_render.data[z_write_offset], z_write_bit);
}
#endif
#ifdef USE_ATTACHMENT
frag_color = vec4(float(aux));
#endif
}

View file

@ -704,6 +704,8 @@ public:
SUPPORTS_MULTIVIEW,
SUPPORTS_FSR_HALF_FLOAT,
SUPPORTS_ATTACHMENT_VRS,
// If not supported, a fragment shader with only side effets (i.e., writes to buffers, but doesn't output to attachments), may be optimized down to no-op by the GPU driver.
SUPPORTS_FRAGMENT_SHADER_WITH_ONLY_SIDE_EFFECTS,
};
virtual bool has_feature(const Features p_feature) const = 0;