Merge pull request #45672 from reduz/barrier-optimization

Rewrote how barriers work for faster rendering
This commit is contained in:
Rémi Verschelde 2021-02-04 17:19:35 +01:00 committed by GitHub
commit 2ba66c1457
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
25 changed files with 2102 additions and 1164 deletions

View file

@ -1627,6 +1627,9 @@ void RenderingDeviceVulkan::_memory_barrier(VkPipelineStageFlags p_src_stage_mas
mem_barrier.srcAccessMask = p_src_access; mem_barrier.srcAccessMask = p_src_access;
mem_barrier.dstAccessMask = p_dst_sccess; mem_barrier.dstAccessMask = p_dst_sccess;
if (p_src_stage_mask == 0 || p_dst_stage_mask == 0) {
return; //no barrier, since this is invalid
}
vkCmdPipelineBarrier(p_sync_with_draw ? frames[frame].draw_command_buffer : frames[frame].setup_command_buffer, p_src_stage_mask, p_dst_stage_mask, 0, 1, &mem_barrier, 0, nullptr, 0, nullptr); vkCmdPipelineBarrier(p_sync_with_draw ? frames[frame].draw_command_buffer : frames[frame].setup_command_buffer, p_src_stage_mask, p_dst_stage_mask, 0, 1, &mem_barrier, 0, nullptr, 0, nullptr);
} }
@ -2477,6 +2480,10 @@ Error RenderingDeviceVulkan::texture_update(RID p_texture, uint32_t p_layer, con
access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT; access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT;
} }
if (barrier_flags == 0) {
barrier_flags = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
}
VkImageMemoryBarrier image_memory_barrier; VkImageMemoryBarrier image_memory_barrier;
image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
image_memory_barrier.pNext = nullptr; image_memory_barrier.pNext = nullptr;
@ -2496,6 +2503,13 @@ Error RenderingDeviceVulkan::texture_update(RID p_texture, uint32_t p_layer, con
vkCmdPipelineBarrier(command_buffer, VK_ACCESS_TRANSFER_WRITE_BIT, barrier_flags, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); vkCmdPipelineBarrier(command_buffer, VK_ACCESS_TRANSFER_WRITE_BIT, barrier_flags, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier);
} }
if (texture->used_in_frame != frames_drawn) {
texture->used_in_raster = false;
texture->used_in_compute = false;
texture->used_in_frame = frames_drawn;
}
texture->used_in_transfer = true;
return OK; return OK;
} }
@ -2844,6 +2858,10 @@ Error RenderingDeviceVulkan::texture_copy(RID p_from_texture, RID p_to_texture,
access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT; access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT;
} }
if (barrier_flags == 0) {
barrier_flags = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
}
{ //restore src { //restore src
VkImageMemoryBarrier image_memory_barrier; VkImageMemoryBarrier image_memory_barrier;
image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
@ -3011,6 +3029,10 @@ Error RenderingDeviceVulkan::texture_resolve_multisample(RID p_from_texture, RID
access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT; access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT;
} }
if (barrier_flags == 0) {
barrier_flags = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
}
{ //restore src { //restore src
VkImageMemoryBarrier image_memory_barrier; VkImageMemoryBarrier image_memory_barrier;
image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
@ -3143,6 +3165,10 @@ Error RenderingDeviceVulkan::texture_clear(RID p_texture, const Color &p_color,
access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT; access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT;
} }
if (barrier_flags == 0) {
barrier_flags = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
}
VkImageMemoryBarrier image_memory_barrier; VkImageMemoryBarrier image_memory_barrier;
image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
image_memory_barrier.pNext = nullptr; image_memory_barrier.pNext = nullptr;
@ -3163,6 +3189,13 @@ Error RenderingDeviceVulkan::texture_clear(RID p_texture, const Color &p_color,
vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT, barrier_flags, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT, barrier_flags, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier);
} }
if (src_tex->used_in_frame != frames_drawn) {
src_tex->used_in_raster = false;
src_tex->used_in_compute = false;
src_tex->used_in_frame = frames_drawn;
}
src_tex->used_in_transfer = true;
return OK; return OK;
} }
@ -3289,6 +3322,7 @@ VkRenderPass RenderingDeviceVulkan::_render_pass_create(const Vector<AttachmentF
dependency_from_external.srcStageMask |= reading_stages; dependency_from_external.srcStageMask |= reading_stages;
} }
} break; } break;
case INITIAL_ACTION_CLEAR_REGION_CONTINUE:
case INITIAL_ACTION_CONTINUE: { case INITIAL_ACTION_CONTINUE: {
if (p_format[i].usage_flags & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT) { if (p_format[i].usage_flags & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT) {
description.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; description.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
@ -3296,7 +3330,7 @@ VkRenderPass RenderingDeviceVulkan::_render_pass_create(const Vector<AttachmentF
description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
} else if (p_format[i].usage_flags & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) { } else if (p_format[i].usage_flags & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) {
description.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; description.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
description.initialLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; //don't care what is there description.initialLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD; description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
} else { } else {
description.loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; description.loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
@ -3425,8 +3459,13 @@ VkRenderPass RenderingDeviceVulkan::_render_pass_create(const Vector<AttachmentF
render_pass_create_info.pAttachments = attachments.ptr(); render_pass_create_info.pAttachments = attachments.ptr();
render_pass_create_info.subpassCount = 1; render_pass_create_info.subpassCount = 1;
render_pass_create_info.pSubpasses = &subpass; render_pass_create_info.pSubpasses = &subpass;
render_pass_create_info.dependencyCount = 2; // Commenting this because it seems it just avoids raster and compute to work at the same time.
render_pass_create_info.pDependencies = dependencies; // Other barriers seem to be protecting the render pass fine.
// render_pass_create_info.dependencyCount = 2;
// render_pass_create_info.pDependencies = dependencies;
render_pass_create_info.dependencyCount = 0;
render_pass_create_info.pDependencies = nullptr;
VkRenderPass render_pass; VkRenderPass render_pass;
VkResult res = vkCreateRenderPass(device, &render_pass_create_info, nullptr, &render_pass); VkResult res = vkCreateRenderPass(device, &render_pass_create_info, nullptr, &render_pass);
@ -4108,6 +4147,8 @@ RID RenderingDeviceVulkan::shader_create(const Vector<ShaderStageData> &p_stages
bool is_compute = false; bool is_compute = false;
uint32_t compute_local_size[3] = { 0, 0, 0 };
for (int i = 0; i < p_stages.size(); i++) { for (int i = 0; i < p_stages.size(); i++) {
if (p_stages[i].shader_stage == SHADER_STAGE_COMPUTE) { if (p_stages[i].shader_stage == SHADER_STAGE_COMPUTE) {
is_compute = true; is_compute = true;
@ -4124,6 +4165,11 @@ RID RenderingDeviceVulkan::shader_create(const Vector<ShaderStageData> &p_stages
ERR_FAIL_COND_V_MSG(result != SPV_REFLECT_RESULT_SUCCESS, RID(), ERR_FAIL_COND_V_MSG(result != SPV_REFLECT_RESULT_SUCCESS, RID(),
"Reflection of SPIR-V shader stage '" + String(shader_stage_names[p_stages[i].shader_stage]) + "' failed parsing shader."); "Reflection of SPIR-V shader stage '" + String(shader_stage_names[p_stages[i].shader_stage]) + "' failed parsing shader.");
if (is_compute) {
compute_local_size[0] = module.entry_points->local_size.x;
compute_local_size[1] = module.entry_points->local_size.y;
compute_local_size[2] = module.entry_points->local_size.z;
}
uint32_t binding_count = 0; uint32_t binding_count = 0;
result = spvReflectEnumerateDescriptorBindings(&module, &binding_count, nullptr); result = spvReflectEnumerateDescriptorBindings(&module, &binding_count, nullptr);
ERR_FAIL_COND_V_MSG(result != SPV_REFLECT_RESULT_SUCCESS, RID(), ERR_FAIL_COND_V_MSG(result != SPV_REFLECT_RESULT_SUCCESS, RID(),
@ -4328,6 +4374,7 @@ RID RenderingDeviceVulkan::shader_create(const Vector<ShaderStageData> &p_stages
} }
} }
} }
uint32_t pc_count = 0; uint32_t pc_count = 0;
result = spvReflectEnumeratePushConstantBlocks(&module, &pc_count, nullptr); result = spvReflectEnumeratePushConstantBlocks(&module, &pc_count, nullptr);
ERR_FAIL_COND_V_MSG(result != SPV_REFLECT_RESULT_SUCCESS, RID(), ERR_FAIL_COND_V_MSG(result != SPV_REFLECT_RESULT_SUCCESS, RID(),
@ -4376,6 +4423,9 @@ RID RenderingDeviceVulkan::shader_create(const Vector<ShaderStageData> &p_stages
shader.fragment_outputs = fragment_outputs; shader.fragment_outputs = fragment_outputs;
shader.push_constant = push_constant; shader.push_constant = push_constant;
shader.is_compute = is_compute; shader.is_compute = is_compute;
shader.compute_local_size[0] = compute_local_size[0];
shader.compute_local_size[1] = compute_local_size[1];
shader.compute_local_size[2] = compute_local_size[2];
String error_text; String error_text;
@ -5216,7 +5266,14 @@ Error RenderingDeviceVulkan::buffer_update(RID p_buffer, uint32_t p_offset, uint
#ifdef FORCE_FULL_BARRIER #ifdef FORCE_FULL_BARRIER
_full_barrier(true); _full_barrier(true);
#else #else
_buffer_memory_barrier(buffer->buffer, p_offset, p_size, VK_PIPELINE_STAGE_TRANSFER_BIT, dst_stage_mask, VK_ACCESS_TRANSFER_WRITE_BIT, dst_access, true); if (dst_stage_mask == 0) {
dst_stage_mask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
}
if (p_post_barrier != RD::BARRIER_MASK_NO_BARRIER) {
_buffer_memory_barrier(buffer->buffer, p_offset, p_size, VK_PIPELINE_STAGE_TRANSFER_BIT, dst_stage_mask, VK_ACCESS_TRANSFER_WRITE_BIT, dst_access, dst_stage_mask);
}
#endif #endif
return err; return err;
} }
@ -5255,7 +5312,12 @@ Error RenderingDeviceVulkan::buffer_clear(RID p_buffer, uint32_t p_offset, uint3
#ifdef FORCE_FULL_BARRIER #ifdef FORCE_FULL_BARRIER
_full_barrier(true); _full_barrier(true);
#else #else
_buffer_memory_barrier(buffer->buffer, p_offset, p_size, VK_PIPELINE_STAGE_TRANSFER_BIT, dst_stage_mask, VK_ACCESS_TRANSFER_WRITE_BIT, dst_access, p_post_barrier); if (dst_stage_mask == 0) {
dst_stage_mask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
}
_buffer_memory_barrier(buffer->buffer, p_offset, p_size, VK_PIPELINE_STAGE_TRANSFER_BIT, dst_stage_mask, VK_ACCESS_TRANSFER_WRITE_BIT, dst_access, dst_stage_mask);
#endif #endif
return OK; return OK;
} }
@ -5710,6 +5772,9 @@ RID RenderingDeviceVulkan::compute_pipeline_create(RID p_shader) {
pipeline.pipeline_layout = shader->pipeline_layout; pipeline.pipeline_layout = shader->pipeline_layout;
pipeline.shader = p_shader; pipeline.shader = p_shader;
pipeline.push_constant_size = shader->push_constant.push_constant_size; pipeline.push_constant_size = shader->push_constant.push_constant_size;
pipeline.local_group_size[0] = shader->compute_local_size[0];
pipeline.local_group_size[1] = shader->compute_local_size[1];
pipeline.local_group_size[2] = shader->compute_local_size[2];
//create ID to associate with this pipeline //create ID to associate with this pipeline
RID id = compute_pipeline_owner.make_rid(pipeline); RID id = compute_pipeline_owner.make_rid(pipeline);
@ -6019,7 +6084,7 @@ RenderingDevice::DrawListID RenderingDeviceVulkan::draw_list_begin(RID p_framebu
_THREAD_SAFE_METHOD_ _THREAD_SAFE_METHOD_
ERR_FAIL_COND_V_MSG(draw_list != nullptr, INVALID_ID, "Only one draw list can be active at the same time."); ERR_FAIL_COND_V_MSG(draw_list != nullptr, INVALID_ID, "Only one draw list can be active at the same time.");
ERR_FAIL_COND_V_MSG(compute_list != nullptr, INVALID_ID, "Only one draw/compute list can be active at the same time."); ERR_FAIL_COND_V_MSG(compute_list != nullptr && !compute_list->state.allow_draw_overlap, INVALID_ID, "Only one draw/compute list can be active at the same time.");
Framebuffer *framebuffer = framebuffer_owner.getornull(p_framebuffer); Framebuffer *framebuffer = framebuffer_owner.getornull(p_framebuffer);
ERR_FAIL_COND_V(!framebuffer, INVALID_ID); ERR_FAIL_COND_V(!framebuffer, INVALID_ID);
@ -6040,7 +6105,14 @@ RenderingDevice::DrawListID RenderingDeviceVulkan::draw_list_begin(RID p_framebu
viewport_offset = regioni.position; viewport_offset = regioni.position;
viewport_size = regioni.size; viewport_size = regioni.size;
if (p_initial_color_action == INITIAL_ACTION_CLEAR_REGION_CONTINUE) {
needs_clear_color = true;
p_initial_color_action = INITIAL_ACTION_CONTINUE;
}
if (p_initial_depth_action == INITIAL_ACTION_CLEAR_REGION_CONTINUE) {
needs_clear_depth = true;
p_initial_depth_action = INITIAL_ACTION_CONTINUE;
}
if (p_initial_color_action == INITIAL_ACTION_CLEAR_REGION) { if (p_initial_color_action == INITIAL_ACTION_CLEAR_REGION) {
needs_clear_color = true; needs_clear_color = true;
p_initial_color_action = INITIAL_ACTION_KEEP; p_initial_color_action = INITIAL_ACTION_KEEP;
@ -6388,6 +6460,19 @@ void RenderingDeviceVulkan::draw_list_bind_uniform_set(DrawListID p_list, RID p_
dl->state.sets[p_index].uniform_set_format = uniform_set->format; dl->state.sets[p_index].uniform_set_format = uniform_set->format;
dl->state.sets[p_index].uniform_set = p_uniform_set; dl->state.sets[p_index].uniform_set = p_uniform_set;
uint32_t mst_count = uniform_set->mutable_storage_textures.size();
if (mst_count) {
Texture **mst_textures = const_cast<UniformSet *>(uniform_set)->mutable_storage_textures.ptrw();
for (uint32_t i = 0; i < mst_count; i++) {
if (mst_textures[i]->used_in_frame != frames_drawn) {
mst_textures[i]->used_in_frame = frames_drawn;
mst_textures[i]->used_in_transfer = false;
mst_textures[i]->used_in_compute = false;
}
mst_textures[i]->used_in_raster = true;
}
}
#ifdef DEBUG_ENABLED #ifdef DEBUG_ENABLED
{ //validate that textures bound are not attached as framebuffer bindings { //validate that textures bound are not attached as framebuffer bindings
uint32_t attachable_count = uniform_set->attachable_textures.size(); uint32_t attachable_count = uniform_set->attachable_textures.size();
@ -6673,23 +6758,43 @@ void RenderingDeviceVulkan::draw_list_end(uint32_t p_post_barrier) {
access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
} }
if (p_post_barrier & BARRIER_MASK_RASTER) { if (p_post_barrier & BARRIER_MASK_RASTER) {
barrier_flags |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT; barrier_flags |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT /*| VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT*/;
access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_INDEX_READ_BIT | VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_INDIRECT_COMMAND_READ_BIT; access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_INDEX_READ_BIT | VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT /*| VK_ACCESS_INDIRECT_COMMAND_READ_BIT*/;
} }
if (p_post_barrier & BARRIER_MASK_TRANSFER) { if (p_post_barrier & BARRIER_MASK_TRANSFER) {
barrier_flags |= VK_PIPELINE_STAGE_TRANSFER_BIT; barrier_flags |= VK_PIPELINE_STAGE_TRANSFER_BIT;
access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT | VK_ACCESS_TRANSFER_READ_BIT; access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT | VK_ACCESS_TRANSFER_READ_BIT;
} }
if (barrier_flags == 0) {
barrier_flags = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
}
draw_list_bound_textures.clear(); draw_list_bound_textures.clear();
for (int i = 0; i < draw_list_storage_textures.size(); i++) { VkImageMemoryBarrier *image_barriers = nullptr;
uint32_t image_barrier_count = draw_list_storage_textures.size();
if (image_barrier_count) {
image_barriers = (VkImageMemoryBarrier *)alloca(sizeof(VkImageMemoryBarrier) * draw_list_storage_textures.size());
}
uint32_t src_stage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
uint32_t src_access = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
if (image_barrier_count) {
src_stage |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
src_access |= VK_ACCESS_SHADER_WRITE_BIT;
}
for (uint32_t i = 0; i < image_barrier_count; i++) {
Texture *texture = texture_owner.getornull(draw_list_storage_textures[i]); Texture *texture = texture_owner.getornull(draw_list_storage_textures[i]);
VkImageMemoryBarrier image_memory_barrier; VkImageMemoryBarrier &image_memory_barrier = image_barriers[i];
image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
image_memory_barrier.pNext = nullptr; image_memory_barrier.pNext = nullptr;
image_memory_barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; image_memory_barrier.srcAccessMask = src_access;
image_memory_barrier.dstAccessMask = access_flags; image_memory_barrier.dstAccessMask = access_flags;
image_memory_barrier.oldLayout = texture->layout; image_memory_barrier.oldLayout = texture->layout;
image_memory_barrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; image_memory_barrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
@ -6703,8 +6808,6 @@ void RenderingDeviceVulkan::draw_list_end(uint32_t p_post_barrier) {
image_memory_barrier.subresourceRange.baseArrayLayer = texture->base_layer; image_memory_barrier.subresourceRange.baseArrayLayer = texture->base_layer;
image_memory_barrier.subresourceRange.layerCount = texture->layers; image_memory_barrier.subresourceRange.layerCount = texture->layers;
vkCmdPipelineBarrier(frames[frame].draw_command_buffer, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, barrier_flags, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier);
texture->layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; texture->layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
} }
@ -6717,7 +6820,17 @@ void RenderingDeviceVulkan::draw_list_end(uint32_t p_post_barrier) {
#ifdef FORCE_FULL_BARRIER #ifdef FORCE_FULL_BARRIER
_full_barrier(true); _full_barrier(true);
#else #else
_memory_barrier(VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT, barrier_flags, VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, access_flags, true);
VkMemoryBarrier mem_barrier;
mem_barrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
mem_barrier.pNext = nullptr;
mem_barrier.srcAccessMask = src_access;
mem_barrier.dstAccessMask = access_flags;
if (image_barrier_count > 0 || p_post_barrier != BARRIER_MASK_NO_BARRIER) {
vkCmdPipelineBarrier(frames[frame].draw_command_buffer, src_stage, barrier_flags, 0, 1, &mem_barrier, 0, nullptr, image_barrier_count, image_barriers);
}
#endif #endif
} }
@ -6725,12 +6838,13 @@ void RenderingDeviceVulkan::draw_list_end(uint32_t p_post_barrier) {
/**** COMPUTE LISTS ****/ /**** COMPUTE LISTS ****/
/***********************/ /***********************/
RenderingDevice::ComputeListID RenderingDeviceVulkan::compute_list_begin() { RenderingDevice::ComputeListID RenderingDeviceVulkan::compute_list_begin(bool p_allow_draw_overlap) {
ERR_FAIL_COND_V_MSG(draw_list != nullptr, INVALID_ID, "Only one draw list can be active at the same time."); ERR_FAIL_COND_V_MSG(!p_allow_draw_overlap && draw_list != nullptr, INVALID_ID, "Only one draw list can be active at the same time.");
ERR_FAIL_COND_V_MSG(compute_list != nullptr, INVALID_ID, "Only one draw/compute list can be active at the same time."); ERR_FAIL_COND_V_MSG(compute_list != nullptr, INVALID_ID, "Only one draw/compute list can be active at the same time.");
compute_list = memnew(ComputeList); compute_list = memnew(ComputeList);
compute_list->command_buffer = frames[frame].draw_command_buffer; compute_list->command_buffer = frames[frame].draw_command_buffer;
compute_list->state.allow_draw_overlap = p_allow_draw_overlap;
return ID_TYPE_COMPUTE_LIST; return ID_TYPE_COMPUTE_LIST;
} }
@ -6787,6 +6901,9 @@ void RenderingDeviceVulkan::compute_list_bind_compute_pipeline(ComputeListID p_l
} }
cl->state.pipeline_shader = pipeline->shader; cl->state.pipeline_shader = pipeline->shader;
cl->state.local_group_size[0] = pipeline->local_group_size[0];
cl->state.local_group_size[1] = pipeline->local_group_size[1];
cl->state.local_group_size[2] = pipeline->local_group_size[2];
} }
#ifdef DEBUG_ENABLED #ifdef DEBUG_ENABLED
@ -6824,11 +6941,24 @@ void RenderingDeviceVulkan::compute_list_bind_uniform_set(ComputeListID p_list,
cl->state.sets[p_index].uniform_set = p_uniform_set; cl->state.sets[p_index].uniform_set = p_uniform_set;
uint32_t textures_to_sampled_count = uniform_set->mutable_sampled_textures.size(); uint32_t textures_to_sampled_count = uniform_set->mutable_sampled_textures.size();
uint32_t textures_to_storage_count = uniform_set->mutable_storage_textures.size();
Texture **textures_to_sampled = uniform_set->mutable_sampled_textures.ptrw(); Texture **textures_to_sampled = uniform_set->mutable_sampled_textures.ptrw();
VkImageMemoryBarrier *texture_barriers = nullptr;
if (textures_to_sampled_count + textures_to_storage_count) {
texture_barriers = (VkImageMemoryBarrier *)alloca(sizeof(VkImageMemoryBarrier) * (textures_to_sampled_count + textures_to_storage_count));
}
uint32_t texture_barrier_count = 0;
uint32_t src_stage_flags = 0;
for (uint32_t i = 0; i < textures_to_sampled_count; i++) { for (uint32_t i = 0; i < textures_to_sampled_count; i++) {
if (textures_to_sampled[i]->layout != VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL) { if (textures_to_sampled[i]->layout != VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL) {
VkImageMemoryBarrier image_memory_barrier; src_stage_flags |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
VkImageMemoryBarrier &image_memory_barrier = texture_barriers[texture_barrier_count++];
image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
image_memory_barrier.pNext = nullptr; image_memory_barrier.pNext = nullptr;
image_memory_barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; image_memory_barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
@ -6845,23 +6975,55 @@ void RenderingDeviceVulkan::compute_list_bind_uniform_set(ComputeListID p_list,
image_memory_barrier.subresourceRange.baseArrayLayer = textures_to_sampled[i]->base_layer; image_memory_barrier.subresourceRange.baseArrayLayer = textures_to_sampled[i]->base_layer;
image_memory_barrier.subresourceRange.layerCount = textures_to_sampled[i]->layers; image_memory_barrier.subresourceRange.layerCount = textures_to_sampled[i]->layers;
vkCmdPipelineBarrier(cl->command_buffer, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier);
textures_to_sampled[i]->layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; textures_to_sampled[i]->layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
cl->state.textures_to_sampled_layout.erase(textures_to_sampled[i]); cl->state.textures_to_sampled_layout.erase(textures_to_sampled[i]);
} }
if (textures_to_sampled[i]->used_in_frame != frames_drawn) {
textures_to_sampled[i]->used_in_frame = frames_drawn;
textures_to_sampled[i]->used_in_transfer = false;
textures_to_sampled[i]->used_in_raster = false;
}
textures_to_sampled[i]->used_in_compute = true;
} }
uint32_t textures_to_storage_count = uniform_set->mutable_storage_textures.size();
Texture **textures_to_storage = uniform_set->mutable_storage_textures.ptrw(); Texture **textures_to_storage = uniform_set->mutable_storage_textures.ptrw();
for (uint32_t i = 0; i < textures_to_storage_count; i++) { for (uint32_t i = 0; i < textures_to_storage_count; i++) {
if (textures_to_storage[i]->layout != VK_IMAGE_LAYOUT_GENERAL) { if (textures_to_storage[i]->layout != VK_IMAGE_LAYOUT_GENERAL) {
VkImageMemoryBarrier image_memory_barrier; uint32_t src_access_flags = 0;
if (textures_to_storage[i]->used_in_frame == frames_drawn) {
if (textures_to_storage[i]->used_in_compute) {
src_stage_flags |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
src_access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
}
if (textures_to_storage[i]->used_in_raster) {
src_stage_flags |= VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT;
src_access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
}
if (textures_to_storage[i]->used_in_transfer) {
src_stage_flags |= VK_PIPELINE_STAGE_TRANSFER_BIT;
src_access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT | VK_ACCESS_TRANSFER_READ_BIT;
}
textures_to_storage[i]->used_in_compute = false;
textures_to_storage[i]->used_in_raster = false;
textures_to_storage[i]->used_in_compute = false;
} else {
src_access_flags = 0;
textures_to_storage[i]->used_in_compute = false;
textures_to_storage[i]->used_in_raster = false;
textures_to_storage[i]->used_in_compute = false;
textures_to_storage[i]->used_in_frame = frames_drawn;
}
VkImageMemoryBarrier &image_memory_barrier = texture_barriers[texture_barrier_count++];
image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
image_memory_barrier.pNext = nullptr; image_memory_barrier.pNext = nullptr;
image_memory_barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; image_memory_barrier.srcAccessMask = src_access_flags;
image_memory_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; image_memory_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
image_memory_barrier.oldLayout = textures_to_storage[i]->layout; image_memory_barrier.oldLayout = textures_to_storage[i]->layout;
image_memory_barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL; image_memory_barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL;
@ -6875,14 +7037,20 @@ void RenderingDeviceVulkan::compute_list_bind_uniform_set(ComputeListID p_list,
image_memory_barrier.subresourceRange.baseArrayLayer = textures_to_storage[i]->base_layer; image_memory_barrier.subresourceRange.baseArrayLayer = textures_to_storage[i]->base_layer;
image_memory_barrier.subresourceRange.layerCount = textures_to_storage[i]->layers; image_memory_barrier.subresourceRange.layerCount = textures_to_storage[i]->layers;
vkCmdPipelineBarrier(cl->command_buffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier);
textures_to_storage[i]->layout = VK_IMAGE_LAYOUT_GENERAL; textures_to_storage[i]->layout = VK_IMAGE_LAYOUT_GENERAL;
cl->state.textures_to_sampled_layout.insert(textures_to_storage[i]); //needs to go back to sampled layout afterwards cl->state.textures_to_sampled_layout.insert(textures_to_storage[i]); //needs to go back to sampled layout afterwards
} }
} }
if (texture_barrier_count) {
if (src_stage_flags == 0) {
src_stage_flags = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
}
vkCmdPipelineBarrier(cl->command_buffer, src_stage_flags, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, 0, nullptr, texture_barrier_count, texture_barriers);
}
#if 0 #if 0
{ //validate that textures bound are not attached as framebuffer bindings { //validate that textures bound are not attached as framebuffer bindings
uint32_t attachable_count = uniform_set->attachable_textures.size(); uint32_t attachable_count = uniform_set->attachable_textures.size();
@ -6976,6 +7144,27 @@ void RenderingDeviceVulkan::compute_list_dispatch(ComputeListID p_list, uint32_t
vkCmdDispatch(cl->command_buffer, p_x_groups, p_y_groups, p_z_groups); vkCmdDispatch(cl->command_buffer, p_x_groups, p_y_groups, p_z_groups);
} }
void RenderingDeviceVulkan::compute_list_dispatch_threads(ComputeListID p_list, uint32_t p_x_threads, uint32_t p_y_threads, uint32_t p_z_threads) {
ERR_FAIL_COND(p_list != ID_TYPE_COMPUTE_LIST);
ERR_FAIL_COND(!compute_list);
ComputeList *cl = compute_list;
#ifdef DEBUG_ENABLED
ERR_FAIL_COND_MSG(!cl->validation.pipeline_active, "No compute pipeline was set before attempting to draw.");
if (cl->validation.pipeline_push_constant_size > 0) {
//using push constants, check that they were supplied
ERR_FAIL_COND_MSG(!cl->validation.pipeline_push_constant_supplied,
"The shader in this pipeline requires a push constant to be set before drawing, but it's not present.");
}
#endif
compute_list_dispatch(p_list, (p_x_threads - 1) / cl->state.local_group_size[0] + 1, (p_y_threads - 1) / cl->state.local_group_size[1] + 1, (p_z_threads - 1) / cl->state.local_group_size[2] + 1);
}
void RenderingDeviceVulkan::compute_list_dispatch_indirect(ComputeListID p_list, RID p_buffer, uint32_t p_offset) { void RenderingDeviceVulkan::compute_list_dispatch_indirect(ComputeListID p_list, RID p_buffer, uint32_t p_offset) {
ERR_FAIL_COND(p_list != ID_TYPE_COMPUTE_LIST); ERR_FAIL_COND(p_list != ID_TYPE_COMPUTE_LIST);
ERR_FAIL_COND(!compute_list); ERR_FAIL_COND(!compute_list);
@ -7047,7 +7236,7 @@ void RenderingDeviceVulkan::compute_list_end(uint32_t p_post_barrier) {
uint32_t access_flags = 0; uint32_t access_flags = 0;
if (p_post_barrier & BARRIER_MASK_COMPUTE) { if (p_post_barrier & BARRIER_MASK_COMPUTE) {
barrier_flags |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; barrier_flags |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_INDIRECT_COMMAND_READ_BIT; access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
} }
if (p_post_barrier & BARRIER_MASK_RASTER) { if (p_post_barrier & BARRIER_MASK_RASTER) {
barrier_flags |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT; barrier_flags |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT;
@ -7058,8 +7247,22 @@ void RenderingDeviceVulkan::compute_list_end(uint32_t p_post_barrier) {
access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT | VK_ACCESS_TRANSFER_READ_BIT; access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT | VK_ACCESS_TRANSFER_READ_BIT;
} }
if (barrier_flags == 0) {
barrier_flags = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
}
VkImageMemoryBarrier *image_barriers = nullptr;
uint32_t image_barrier_count = compute_list->state.textures_to_sampled_layout.size();
if (image_barrier_count) {
image_barriers = (VkImageMemoryBarrier *)alloca(sizeof(VkImageMemoryBarrier) * image_barrier_count);
}
uint32_t barrier_idx = 0;
for (Set<Texture *>::Element *E = compute_list->state.textures_to_sampled_layout.front(); E; E = E->next()) { for (Set<Texture *>::Element *E = compute_list->state.textures_to_sampled_layout.front(); E; E = E->next()) {
VkImageMemoryBarrier image_memory_barrier; VkImageMemoryBarrier &image_memory_barrier = image_barriers[barrier_idx++];
image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
image_memory_barrier.pNext = nullptr; image_memory_barrier.pNext = nullptr;
image_memory_barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; image_memory_barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
@ -7076,19 +7279,33 @@ void RenderingDeviceVulkan::compute_list_end(uint32_t p_post_barrier) {
image_memory_barrier.subresourceRange.baseArrayLayer = E->get()->base_layer; image_memory_barrier.subresourceRange.baseArrayLayer = E->get()->base_layer;
image_memory_barrier.subresourceRange.layerCount = E->get()->layers; image_memory_barrier.subresourceRange.layerCount = E->get()->layers;
// TODO: Look at the usages in the compute list and determine tighter dst stage and access masks based on some "final" usage equivalent
vkCmdPipelineBarrier(compute_list->command_buffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, barrier_flags, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier);
E->get()->layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; E->get()->layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
if (E->get()->used_in_frame != frames_drawn) {
E->get()->used_in_transfer = false;
E->get()->used_in_raster = false;
E->get()->used_in_compute = false;
E->get()->used_in_frame = frames_drawn;
}
} }
memdelete(compute_list);
compute_list = nullptr;
#ifdef FORCE_FULL_BARRIER #ifdef FORCE_FULL_BARRIER
_full_barrier(true); _full_barrier(true);
#else #else
_memory_barrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, barrier_flags, VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_INDEX_READ_BIT | VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_INDIRECT_COMMAND_READ_BIT, true); VkMemoryBarrier mem_barrier;
mem_barrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
mem_barrier.pNext = nullptr;
mem_barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
mem_barrier.dstAccessMask = access_flags;
if (image_barrier_count > 0 || p_post_barrier != BARRIER_MASK_NO_BARRIER) {
vkCmdPipelineBarrier(compute_list->command_buffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, barrier_flags, 0, 1, &mem_barrier, 0, nullptr, image_barrier_count, image_barriers);
}
#endif #endif
memdelete(compute_list);
compute_list = nullptr;
} }
void RenderingDeviceVulkan::barrier(uint32_t p_from, uint32_t p_to) { void RenderingDeviceVulkan::barrier(uint32_t p_from, uint32_t p_to) {
@ -7111,7 +7328,7 @@ void RenderingDeviceVulkan::barrier(uint32_t p_from, uint32_t p_to) {
uint32_t dst_access_flags = 0; uint32_t dst_access_flags = 0;
if (p_to & BARRIER_MASK_COMPUTE) { if (p_to & BARRIER_MASK_COMPUTE) {
dst_barrier_flags |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; dst_barrier_flags |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
dst_access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_INDIRECT_COMMAND_READ_BIT; dst_access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
} }
if (p_to & BARRIER_MASK_RASTER) { if (p_to & BARRIER_MASK_RASTER) {
dst_barrier_flags |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT; dst_barrier_flags |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT;
@ -7325,6 +7542,16 @@ void RenderingDeviceVulkan::draw_command_end_label() {
context->command_end_label(frames[frame].draw_command_buffer); context->command_end_label(frames[frame].draw_command_buffer);
} }
String RenderingDeviceVulkan::get_device_vendor_name() const {
return context->get_device_vendor_name();
}
String RenderingDeviceVulkan::get_device_name() const {
return context->get_device_name();
}
String RenderingDeviceVulkan::get_device_pipeline_cache_uuid() const {
return context->get_device_pipeline_cache_uuid();
}
void RenderingDeviceVulkan::_finalize_command_bufers() { void RenderingDeviceVulkan::_finalize_command_bufers() {
if (draw_list) { if (draw_list) {
ERR_PRINT("Found open draw list at the end of the frame, this should never happen (further drawing will likely not work)."); ERR_PRINT("Found open draw list at the end of the frame, this should never happen (further drawing will likely not work).");
@ -7377,6 +7604,7 @@ void RenderingDeviceVulkan::_begin_frame() {
if (frames[frame].timestamp_count) { if (frames[frame].timestamp_count) {
vkGetQueryPoolResults(device, frames[frame].timestamp_pool, 0, frames[frame].timestamp_count, sizeof(uint64_t) * max_timestamp_query_elements, frames[frame].timestamp_result_values, sizeof(uint64_t), VK_QUERY_RESULT_64_BIT); vkGetQueryPoolResults(device, frames[frame].timestamp_pool, 0, frames[frame].timestamp_count, sizeof(uint64_t) * max_timestamp_query_elements, frames[frame].timestamp_result_values, sizeof(uint64_t), VK_QUERY_RESULT_64_BIT);
vkCmdResetQueryPool(frames[frame].setup_command_buffer, frames[frame].timestamp_pool, 0, frames[frame].timestamp_count);
SWAP(frames[frame].timestamp_names, frames[frame].timestamp_result_names); SWAP(frames[frame].timestamp_names, frames[frame].timestamp_result_names);
SWAP(frames[frame].timestamp_cpu_values, frames[frame].timestamp_cpu_result_values); SWAP(frames[frame].timestamp_cpu_values, frames[frame].timestamp_cpu_result_values);
} }

View file

@ -141,6 +141,11 @@ class RenderingDeviceVulkan : public RenderingDevice {
VkImageLayout layout; VkImageLayout layout;
uint64_t used_in_frame = 0;
bool used_in_transfer = false;
bool used_in_raster = false;
bool used_in_compute = false;
uint32_t read_aspect_mask = 0; uint32_t read_aspect_mask = 0;
uint32_t barrier_aspect_mask = 0; uint32_t barrier_aspect_mask = 0;
bool bound = false; //bound to framebffer bool bound = false; //bound to framebffer
@ -528,6 +533,8 @@ class RenderingDeviceVulkan : public RenderingDevice {
PushConstant push_constant; PushConstant push_constant;
uint32_t compute_local_size[3] = { 0, 0, 0 };
bool is_compute = false; bool is_compute = false;
int max_output = 0; int max_output = 0;
Vector<Set> sets; Vector<Set> sets;
@ -686,6 +693,7 @@ class RenderingDeviceVulkan : public RenderingDevice {
VkPipeline pipeline = VK_NULL_HANDLE; VkPipeline pipeline = VK_NULL_HANDLE;
uint32_t push_constant_size = 0; uint32_t push_constant_size = 0;
uint32_t push_constant_stages = 0; uint32_t push_constant_stages = 0;
uint32_t local_group_size[3] = { 0, 0, 0 };
}; };
RID_Owner<ComputePipeline, true> compute_pipeline_owner; RID_Owner<ComputePipeline, true> compute_pipeline_owner;
@ -808,8 +816,10 @@ class RenderingDeviceVulkan : public RenderingDevice {
uint32_t set_count = 0; uint32_t set_count = 0;
RID pipeline; RID pipeline;
RID pipeline_shader; RID pipeline_shader;
uint32_t local_group_size[3] = { 0, 0, 0 };
VkPipelineLayout pipeline_layout = VK_NULL_HANDLE; VkPipelineLayout pipeline_layout = VK_NULL_HANDLE;
uint32_t pipeline_push_constant_stages = 0; uint32_t pipeline_push_constant_stages = 0;
bool allow_draw_overlap;
} state; } state;
#ifdef DEBUG_ENABLED #ifdef DEBUG_ENABLED
@ -1028,13 +1038,14 @@ public:
/**** COMPUTE LISTS ****/ /**** COMPUTE LISTS ****/
/***********************/ /***********************/
virtual ComputeListID compute_list_begin(); virtual ComputeListID compute_list_begin(bool p_allow_draw_overlap = false);
virtual void compute_list_bind_compute_pipeline(ComputeListID p_list, RID p_compute_pipeline); virtual void compute_list_bind_compute_pipeline(ComputeListID p_list, RID p_compute_pipeline);
virtual void compute_list_bind_uniform_set(ComputeListID p_list, RID p_uniform_set, uint32_t p_index); virtual void compute_list_bind_uniform_set(ComputeListID p_list, RID p_uniform_set, uint32_t p_index);
virtual void compute_list_set_push_constant(ComputeListID p_list, const void *p_data, uint32_t p_data_size); virtual void compute_list_set_push_constant(ComputeListID p_list, const void *p_data, uint32_t p_data_size);
virtual void compute_list_add_barrier(ComputeListID p_list); virtual void compute_list_add_barrier(ComputeListID p_list);
virtual void compute_list_dispatch(ComputeListID p_list, uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups); virtual void compute_list_dispatch(ComputeListID p_list, uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups);
virtual void compute_list_dispatch_threads(ComputeListID p_list, uint32_t p_x_threads, uint32_t p_y_threads, uint32_t p_z_threads);
virtual void compute_list_dispatch_indirect(ComputeListID p_list, RID p_buffer, uint32_t p_offset); virtual void compute_list_dispatch_indirect(ComputeListID p_list, RID p_buffer, uint32_t p_offset);
virtual void compute_list_end(uint32_t p_post_barrier = BARRIER_MASK_ALL); virtual void compute_list_end(uint32_t p_post_barrier = BARRIER_MASK_ALL);
@ -1085,6 +1096,10 @@ public:
virtual void draw_command_insert_label(String p_label_name, const Color p_color = Color(1, 1, 1, 1)); virtual void draw_command_insert_label(String p_label_name, const Color p_color = Color(1, 1, 1, 1));
virtual void draw_command_end_label(); virtual void draw_command_end_label();
virtual String get_device_vendor_name() const;
virtual String get_device_name() const;
virtual String get_device_pipeline_cache_uuid() const;
RenderingDeviceVulkan(); RenderingDeviceVulkan();
~RenderingDeviceVulkan(); ~RenderingDeviceVulkan();
}; };

View file

@ -380,7 +380,8 @@ Error VulkanContext::_create_physical_device() {
ERR_FAIL_V(ERR_CANT_CREATE); ERR_FAIL_V(ERR_CANT_CREATE);
} }
/* for now, just grab the first physical device */ /* for now, just grab the first physical device */
gpu = physical_devices[0]; uint32_t device_index = 0;
gpu = physical_devices[device_index];
free(physical_devices); free(physical_devices);
/* Look for device extensions */ /* Look for device extensions */
@ -389,6 +390,40 @@ Error VulkanContext::_create_physical_device() {
enabled_extension_count = 0; enabled_extension_count = 0;
memset(extension_names, 0, sizeof(extension_names)); memset(extension_names, 0, sizeof(extension_names));
/* Get identifier properties */
vkGetPhysicalDeviceProperties(gpu, &gpu_props);
static const struct {
uint32_t id;
const char *name;
} vendor_names[] = {
{ 0x1002, "AMD" },
{ 0x1010, "ImgTec" },
{ 0x10DE, "NVIDIA" },
{ 0x13B5, "ARM" },
{ 0x5143, "Qualcomm" },
{ 0x8086, "INTEL" },
{ 0, nullptr },
};
device_name = gpu_props.deviceName;
pipeline_cache_id = String::hex_encode_buffer(gpu_props.pipelineCacheUUID, VK_UUID_SIZE);
pipeline_cache_id += "-driver-" + itos(gpu_props.driverVersion);
{
device_vendor = "Unknown";
uint32_t vendor_idx = 0;
while (vendor_names[vendor_idx].name != nullptr) {
if (gpu_props.vendorID == vendor_names[vendor_idx].id) {
device_vendor = vendor_names[vendor_idx].name;
break;
}
vendor_idx++;
}
}
#ifdef DEBUG_ENABLED
print_line("Using Vulkan Device #" + itos(device_index) + ": " + device_vendor + " - " + device_name);
#endif
device_api_version = gpu_props.apiVersion;
err = vkEnumerateDeviceExtensionProperties(gpu, nullptr, &device_extension_count, nullptr); err = vkEnumerateDeviceExtensionProperties(gpu, nullptr, &device_extension_count, nullptr);
ERR_FAIL_COND_V(err, ERR_CANT_CREATE); ERR_FAIL_COND_V(err, ERR_CANT_CREATE);
@ -498,7 +533,6 @@ Error VulkanContext::_create_physical_device() {
break; break;
} }
} }
vkGetPhysicalDeviceProperties(gpu, &gpu_props);
/* Call with NULL data to get count */ /* Call with NULL data to get count */
vkGetPhysicalDeviceQueueFamilyProperties(gpu, &queue_family_count, nullptr); vkGetPhysicalDeviceQueueFamilyProperties(gpu, &queue_family_count, nullptr);
@ -565,6 +599,7 @@ Error VulkanContext::_create_device() {
} }
err = vkCreateDevice(gpu, &sdevice, nullptr, &device); err = vkCreateDevice(gpu, &sdevice, nullptr, &device);
ERR_FAIL_COND_V(err, ERR_CANT_CREATE); ERR_FAIL_COND_V(err, ERR_CANT_CREATE);
return OK; return OK;
} }
@ -1590,11 +1625,12 @@ void VulkanContext::command_begin_label(VkCommandBuffer p_command_buffer, String
if (!enabled_debug_utils) { if (!enabled_debug_utils) {
return; return;
} }
CharString cs = p_label_name.utf8().get_data();
VkDebugUtilsLabelEXT label; VkDebugUtilsLabelEXT label;
label.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT; label.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT;
label.pNext = nullptr; label.pNext = nullptr;
CharString label_name = p_label_name.utf8(); label.pLabelName = cs.get_data();
label.pLabelName = label_name.get_data();
label.color[0] = p_color[0]; label.color[0] = p_color[0];
label.color[1] = p_color[1]; label.color[1] = p_color[1];
label.color[2] = p_color[2]; label.color[2] = p_color[2];
@ -1606,11 +1642,11 @@ void VulkanContext::command_insert_label(VkCommandBuffer p_command_buffer, Strin
if (!enabled_debug_utils) { if (!enabled_debug_utils) {
return; return;
} }
CharString cs = p_label_name.utf8().get_data();
VkDebugUtilsLabelEXT label; VkDebugUtilsLabelEXT label;
label.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT; label.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT;
label.pNext = nullptr; label.pNext = nullptr;
CharString label_name = p_label_name.utf8(); label.pLabelName = cs.get_data();
label.pLabelName = label_name.get_data();
label.color[0] = p_color[0]; label.color[0] = p_color[0];
label.color[1] = p_color[1]; label.color[1] = p_color[1];
label.color[2] = p_color[2]; label.color[2] = p_color[2];
@ -1629,16 +1665,26 @@ void VulkanContext::set_object_name(VkObjectType p_object_type, uint64_t p_objec
if (!enabled_debug_utils) { if (!enabled_debug_utils) {
return; return;
} }
CharString obj_data = p_object_name.utf8();
VkDebugUtilsObjectNameInfoEXT name_info; VkDebugUtilsObjectNameInfoEXT name_info;
name_info.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT; name_info.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT;
name_info.pNext = nullptr; name_info.pNext = nullptr;
name_info.objectType = p_object_type; name_info.objectType = p_object_type;
name_info.objectHandle = p_object_handle; name_info.objectHandle = p_object_handle;
CharString object_name = p_object_name.utf8(); name_info.pObjectName = obj_data.get_data();
name_info.pObjectName = object_name.get_data();
SetDebugUtilsObjectNameEXT(device, &name_info); SetDebugUtilsObjectNameEXT(device, &name_info);
} }
String VulkanContext::get_device_vendor_name() const {
return device_vendor;
}
String VulkanContext::get_device_name() const {
return device_name;
}
String VulkanContext::get_device_pipeline_cache_uuid() const {
return pipeline_cache_id;
}
VulkanContext::VulkanContext() { VulkanContext::VulkanContext() {
use_validation_layers = Engine::get_singleton()->is_validation_layers_enabled(); use_validation_layers = Engine::get_singleton()->is_validation_layers_enabled();

View file

@ -57,6 +57,11 @@ class VulkanContext {
bool device_initialized = false; bool device_initialized = false;
bool inst_initialized = false; bool inst_initialized = false;
String device_vendor;
String device_name;
String pipeline_cache_id;
uint32_t device_api_version = 0;
bool buffers_prepared = false; bool buffers_prepared = false;
// Present queue. // Present queue.
@ -215,6 +220,10 @@ public:
void command_end_label(VkCommandBuffer p_command_buffer); void command_end_label(VkCommandBuffer p_command_buffer);
void set_object_name(VkObjectType p_object_type, uint64_t p_object_handle, String p_object_name); void set_object_name(VkObjectType p_object_type, uint64_t p_object_handle, String p_object_name);
String get_device_vendor_name() const;
String get_device_name() const;
String get_device_pipeline_cache_uuid() const;
VulkanContext(); VulkanContext();
virtual ~VulkanContext(); virtual ~VulkanContext();
}; };

View file

@ -597,5 +597,4 @@ PhysicalSkyMaterial::PhysicalSkyMaterial() {
PhysicalSkyMaterial::~PhysicalSkyMaterial() { PhysicalSkyMaterial::~PhysicalSkyMaterial() {
RS::get_singleton()->free(shader); RS::get_singleton()->free(shader);
RS::get_singleton()->material_set_shader(_get_material(), RID());
} }

View file

@ -400,12 +400,14 @@ void ClusterBuilderRD::begin(const Transform &p_view_transform, const CameraMatr
void ClusterBuilderRD::bake_cluster() { void ClusterBuilderRD::bake_cluster() {
RENDER_TIMESTAMP(">Bake Cluster"); RENDER_TIMESTAMP(">Bake Cluster");
RD::get_singleton()->draw_command_begin_label("Bake Light Cluster");
//clear cluster buffer //clear cluster buffer
RD::get_singleton()->buffer_clear(cluster_buffer, 0, cluster_buffer_size); RD::get_singleton()->buffer_clear(cluster_buffer, 0, cluster_buffer_size, 0);
if (render_element_count > 0) { if (render_element_count > 0) {
//clear render buffer //clear render buffer
RD::get_singleton()->buffer_clear(cluster_render_buffer, 0, cluster_render_buffer_size); RD::get_singleton()->buffer_clear(cluster_render_buffer, 0, cluster_render_buffer_size, 0);
{ //fill state uniform { //fill state uniform
@ -420,15 +422,16 @@ void ClusterBuilderRD::bake_cluster() {
state.cluster_depth_offset = (render_element_max / 32); state.cluster_depth_offset = (render_element_max / 32);
state.cluster_data_size = state.cluster_depth_offset + render_element_max; state.cluster_data_size = state.cluster_depth_offset + render_element_max;
RD::get_singleton()->buffer_update(state_uniform, 0, sizeof(StateUniform), &state); RD::get_singleton()->buffer_update(state_uniform, 0, sizeof(StateUniform), &state, 0);
} }
//update instances //update instances
RD::get_singleton()->buffer_update(element_buffer, 0, sizeof(RenderElementData) * render_element_count, render_elements); RD::get_singleton()->buffer_update(element_buffer, 0, sizeof(RenderElementData) * render_element_count, render_elements, 0);
RENDER_TIMESTAMP("Render Elements"); RENDER_TIMESTAMP("Render Elements");
RD::get_singleton()->barrier(RD::BARRIER_MASK_TRANSFER, RD::BARRIER_MASK_RASTER);
//render elements //render elements
{ {
RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(framebuffer, RD::INITIAL_ACTION_DROP, RD::FINAL_ACTION_DISCARD, RD::INITIAL_ACTION_DROP, RD::FINAL_ACTION_DISCARD); RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(framebuffer, RD::INITIAL_ACTION_DROP, RD::FINAL_ACTION_DISCARD, RD::INITIAL_ACTION_DROP, RD::FINAL_ACTION_DISCARD);
@ -469,7 +472,7 @@ void ClusterBuilderRD::bake_cluster() {
RD::get_singleton()->draw_list_draw(draw_list, true, instances); RD::get_singleton()->draw_list_draw(draw_list, true, instances);
i += instances; i += instances;
} }
RD::get_singleton()->draw_list_end(); RD::get_singleton()->draw_list_end(RD::BARRIER_MASK_COMPUTE);
} }
//store elements //store elements
RENDER_TIMESTAMP("Pack Elements"); RENDER_TIMESTAMP("Pack Elements");
@ -491,12 +494,15 @@ void ClusterBuilderRD::bake_cluster() {
RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(ClusterBuilderSharedDataRD::ClusterStore::PushConstant)); RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(ClusterBuilderSharedDataRD::ClusterStore::PushConstant));
RD::get_singleton()->compute_list_dispatch_threads(compute_list, cluster_screen_size.x, cluster_screen_size.y, 1, 8, 8, 1); RD::get_singleton()->compute_list_dispatch_threads(compute_list, cluster_screen_size.x, cluster_screen_size.y, 1);
RD::get_singleton()->compute_list_end(); RD::get_singleton()->compute_list_end(RD::BARRIER_MASK_RASTER | RD::BARRIER_MASK_COMPUTE);
} }
} else {
RD::get_singleton()->barrier(RD::BARRIER_MASK_TRANSFER, RD::BARRIER_MASK_RASTER | RD::BARRIER_MASK_COMPUTE);
} }
RENDER_TIMESTAMP("<Bake Cluster"); RENDER_TIMESTAMP("<Bake Cluster");
RD::get_singleton()->draw_command_end_label();
} }
void ClusterBuilderRD::debug(ElementType p_element) { void ClusterBuilderRD::debug(ElementType p_element) {
@ -519,7 +525,7 @@ void ClusterBuilderRD::debug(ElementType p_element) {
RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(ClusterBuilderSharedDataRD::ClusterDebug::PushConstant)); RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(ClusterBuilderSharedDataRD::ClusterDebug::PushConstant));
RD::get_singleton()->compute_list_dispatch_threads(compute_list, screen_size.x, screen_size.y, 1, 8, 8, 1); RD::get_singleton()->compute_list_dispatch_threads(compute_list, screen_size.x, screen_size.y, 1);
RD::get_singleton()->compute_list_end(); RD::get_singleton()->compute_list_end();
} }

View file

@ -299,15 +299,12 @@ void EffectsRD::copy_to_rect(RID p_source_rd_texture, RID p_dest_texture, const
copy.push_constant.target[0] = p_rect.position.x; copy.push_constant.target[0] = p_rect.position.x;
copy.push_constant.target[1] = p_rect.position.y; copy.push_constant.target[1] = p_rect.position.y;
int32_t x_groups = (p_rect.size.width - 1) / 8 + 1;
int32_t y_groups = (p_rect.size.height - 1) / 8 + 1;
RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin();
RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, copy.pipelines[p_8_bit_dst ? COPY_MODE_SIMPLY_COPY_8BIT : COPY_MODE_SIMPLY_COPY]); RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, copy.pipelines[p_8_bit_dst ? COPY_MODE_SIMPLY_COPY_8BIT : COPY_MODE_SIMPLY_COPY]);
RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_source_rd_texture), 0); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_source_rd_texture), 0);
RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_dest_texture), 3); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_dest_texture), 3);
RD::get_singleton()->compute_list_set_push_constant(compute_list, &copy.push_constant, sizeof(CopyPushConstant)); RD::get_singleton()->compute_list_set_push_constant(compute_list, &copy.push_constant, sizeof(CopyPushConstant));
RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_rect.size.width, p_rect.size.height, 1);
RD::get_singleton()->compute_list_end(); RD::get_singleton()->compute_list_end();
} }
@ -322,15 +319,12 @@ void EffectsRD::copy_cubemap_to_panorama(RID p_source_cube, RID p_dest_panorama,
copy.push_constant.target[1] = 0; copy.push_constant.target[1] = 0;
copy.push_constant.camera_z_far = p_lod; copy.push_constant.camera_z_far = p_lod;
int32_t x_groups = (p_panorama_size.width - 1) / 8 + 1;
int32_t y_groups = (p_panorama_size.height - 1) / 8 + 1;
RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin();
RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, copy.pipelines[p_is_array ? COPY_MODE_CUBE_ARRAY_TO_PANORAMA : COPY_MODE_CUBE_TO_PANORAMA]); RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, copy.pipelines[p_is_array ? COPY_MODE_CUBE_ARRAY_TO_PANORAMA : COPY_MODE_CUBE_TO_PANORAMA]);
RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_source_cube), 0); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_source_cube), 0);
RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_dest_panorama), 3); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_dest_panorama), 3);
RD::get_singleton()->compute_list_set_push_constant(compute_list, &copy.push_constant, sizeof(CopyPushConstant)); RD::get_singleton()->compute_list_set_push_constant(compute_list, &copy.push_constant, sizeof(CopyPushConstant));
RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_panorama_size.width, p_panorama_size.height, 1);
RD::get_singleton()->compute_list_end(); RD::get_singleton()->compute_list_end();
} }
@ -349,15 +343,12 @@ void EffectsRD::copy_depth_to_rect_and_linearize(RID p_source_rd_texture, RID p_
copy.push_constant.camera_z_far = p_z_far; copy.push_constant.camera_z_far = p_z_far;
copy.push_constant.camera_z_near = p_z_near; copy.push_constant.camera_z_near = p_z_near;
int32_t x_groups = (p_rect.size.width - 1) / 8 + 1;
int32_t y_groups = (p_rect.size.height - 1) / 8 + 1;
RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin();
RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, copy.pipelines[COPY_MODE_LINEARIZE_DEPTH]); RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, copy.pipelines[COPY_MODE_LINEARIZE_DEPTH]);
RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_source_rd_texture), 0); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_source_rd_texture), 0);
RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_dest_texture), 3); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_dest_texture), 3);
RD::get_singleton()->compute_list_set_push_constant(compute_list, &copy.push_constant, sizeof(CopyPushConstant)); RD::get_singleton()->compute_list_set_push_constant(compute_list, &copy.push_constant, sizeof(CopyPushConstant));
RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_rect.size.width, p_rect.size.height, 1);
RD::get_singleton()->compute_list_end(); RD::get_singleton()->compute_list_end();
} }
@ -374,15 +365,12 @@ void EffectsRD::copy_depth_to_rect(RID p_source_rd_texture, RID p_dest_texture,
copy.push_constant.target[0] = p_rect.position.x; copy.push_constant.target[0] = p_rect.position.x;
copy.push_constant.target[1] = p_rect.position.y; copy.push_constant.target[1] = p_rect.position.y;
int32_t x_groups = (p_rect.size.width - 1) / 8 + 1;
int32_t y_groups = (p_rect.size.height - 1) / 8 + 1;
RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin();
RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, copy.pipelines[COPY_MODE_SIMPLY_COPY_DEPTH]); RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, copy.pipelines[COPY_MODE_SIMPLY_COPY_DEPTH]);
RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_source_rd_texture), 0); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_source_rd_texture), 0);
RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_dest_texture), 3); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_dest_texture), 3);
RD::get_singleton()->compute_list_set_push_constant(compute_list, &copy.push_constant, sizeof(CopyPushConstant)); RD::get_singleton()->compute_list_set_push_constant(compute_list, &copy.push_constant, sizeof(CopyPushConstant));
RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_rect.size.width, p_rect.size.height, 1);
RD::get_singleton()->compute_list_end(); RD::get_singleton()->compute_list_end();
} }
@ -400,14 +388,11 @@ void EffectsRD::set_color(RID p_dest_texture, const Color &p_color, const Rect2i
copy.push_constant.set_color[2] = p_color.b; copy.push_constant.set_color[2] = p_color.b;
copy.push_constant.set_color[3] = p_color.a; copy.push_constant.set_color[3] = p_color.a;
int32_t x_groups = (p_region.size.width - 1) / 8 + 1;
int32_t y_groups = (p_region.size.height - 1) / 8 + 1;
RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin();
RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, copy.pipelines[p_8bit_dst ? COPY_MODE_SET_COLOR_8BIT : COPY_MODE_SET_COLOR]); RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, copy.pipelines[p_8bit_dst ? COPY_MODE_SET_COLOR_8BIT : COPY_MODE_SET_COLOR]);
RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_dest_texture), 3); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_dest_texture), 3);
RD::get_singleton()->compute_list_set_push_constant(compute_list, &copy.push_constant, sizeof(CopyPushConstant)); RD::get_singleton()->compute_list_set_push_constant(compute_list, &copy.push_constant, sizeof(CopyPushConstant));
RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_region.size.width, p_region.size.height, 1);
RD::get_singleton()->compute_list_end(); RD::get_singleton()->compute_list_end();
} }
@ -420,8 +405,6 @@ void EffectsRD::gaussian_blur(RID p_source_rd_texture, RID p_texture, RID p_back
copy.push_constant.section[2] = p_region.size.width; copy.push_constant.section[2] = p_region.size.width;
copy.push_constant.section[3] = p_region.size.height; copy.push_constant.section[3] = p_region.size.height;
int32_t x_groups = (p_region.size.width - 1) / 8 + 1;
int32_t y_groups = (p_region.size.height - 1) / 8 + 1;
//HORIZONTAL //HORIZONTAL
RD::DrawListID compute_list = RD::get_singleton()->compute_list_begin(); RD::DrawListID compute_list = RD::get_singleton()->compute_list_begin();
RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, copy.pipelines[p_8bit_dst ? COPY_MODE_GAUSSIAN_COPY_8BIT : COPY_MODE_GAUSSIAN_COPY]); RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, copy.pipelines[p_8bit_dst ? COPY_MODE_GAUSSIAN_COPY_8BIT : COPY_MODE_GAUSSIAN_COPY]);
@ -431,7 +414,7 @@ void EffectsRD::gaussian_blur(RID p_source_rd_texture, RID p_texture, RID p_back
copy.push_constant.flags = base_flags | COPY_FLAG_HORIZONTAL; copy.push_constant.flags = base_flags | COPY_FLAG_HORIZONTAL;
RD::get_singleton()->compute_list_set_push_constant(compute_list, &copy.push_constant, sizeof(CopyPushConstant)); RD::get_singleton()->compute_list_set_push_constant(compute_list, &copy.push_constant, sizeof(CopyPushConstant));
RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_region.size.width, p_region.size.height, 1);
RD::get_singleton()->compute_list_add_barrier(compute_list); RD::get_singleton()->compute_list_add_barrier(compute_list);
@ -442,7 +425,7 @@ void EffectsRD::gaussian_blur(RID p_source_rd_texture, RID p_texture, RID p_back
copy.push_constant.flags = base_flags; copy.push_constant.flags = base_flags;
RD::get_singleton()->compute_list_set_push_constant(compute_list, &copy.push_constant, sizeof(CopyPushConstant)); RD::get_singleton()->compute_list_set_push_constant(compute_list, &copy.push_constant, sizeof(CopyPushConstant));
RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_region.size.width, p_region.size.height, 1);
RD::get_singleton()->compute_list_end(); RD::get_singleton()->compute_list_end();
} }
@ -452,9 +435,6 @@ void EffectsRD::gaussian_glow(RID p_source_rd_texture, RID p_back_texture, const
CopyMode copy_mode = p_first_pass && p_auto_exposure.is_valid() ? COPY_MODE_GAUSSIAN_GLOW_AUTO_EXPOSURE : COPY_MODE_GAUSSIAN_GLOW; CopyMode copy_mode = p_first_pass && p_auto_exposure.is_valid() ? COPY_MODE_GAUSSIAN_GLOW_AUTO_EXPOSURE : COPY_MODE_GAUSSIAN_GLOW;
uint32_t base_flags = 0; uint32_t base_flags = 0;
int32_t x_groups = (p_size.width + 7) / 8;
int32_t y_groups = (p_size.height + 7) / 8;
copy.push_constant.section[2] = p_size.x; copy.push_constant.section[2] = p_size.x;
copy.push_constant.section[3] = p_size.y; copy.push_constant.section[3] = p_size.y;
@ -479,16 +459,13 @@ void EffectsRD::gaussian_glow(RID p_source_rd_texture, RID p_back_texture, const
copy.push_constant.flags = base_flags | (p_first_pass ? COPY_FLAG_GLOW_FIRST_PASS : 0) | (p_high_quality ? COPY_FLAG_HIGH_QUALITY_GLOW : 0); copy.push_constant.flags = base_flags | (p_first_pass ? COPY_FLAG_GLOW_FIRST_PASS : 0) | (p_high_quality ? COPY_FLAG_HIGH_QUALITY_GLOW : 0);
RD::get_singleton()->compute_list_set_push_constant(compute_list, &copy.push_constant, sizeof(CopyPushConstant)); RD::get_singleton()->compute_list_set_push_constant(compute_list, &copy.push_constant, sizeof(CopyPushConstant));
RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_size.width, p_size.height, 1);
RD::get_singleton()->compute_list_end(); RD::get_singleton()->compute_list_end();
} }
void EffectsRD::screen_space_reflection(RID p_diffuse, RID p_normal_roughness, RenderingServer::EnvironmentSSRRoughnessQuality p_roughness_quality, RID p_blur_radius, RID p_blur_radius2, RID p_metallic, const Color &p_metallic_mask, RID p_depth, RID p_scale_depth, RID p_scale_normal, RID p_output, RID p_output_blur, const Size2i &p_screen_size, int p_max_steps, float p_fade_in, float p_fade_out, float p_tolerance, const CameraMatrix &p_camera) { void EffectsRD::screen_space_reflection(RID p_diffuse, RID p_normal_roughness, RenderingServer::EnvironmentSSRRoughnessQuality p_roughness_quality, RID p_blur_radius, RID p_blur_radius2, RID p_metallic, const Color &p_metallic_mask, RID p_depth, RID p_scale_depth, RID p_scale_normal, RID p_output, RID p_output_blur, const Size2i &p_screen_size, int p_max_steps, float p_fade_in, float p_fade_out, float p_tolerance, const CameraMatrix &p_camera) {
RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin();
int32_t x_groups = (p_screen_size.width - 1) / 8 + 1;
int32_t y_groups = (p_screen_size.height - 1) / 8 + 1;
{ //scale color and depth to half { //scale color and depth to half
ssr_scale.push_constant.camera_z_far = p_camera.get_z_far(); ssr_scale.push_constant.camera_z_far = p_camera.get_z_far();
ssr_scale.push_constant.camera_z_near = p_camera.get_z_near(); ssr_scale.push_constant.camera_z_near = p_camera.get_z_near();
@ -506,7 +483,7 @@ void EffectsRD::screen_space_reflection(RID p_diffuse, RID p_normal_roughness, R
RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssr_scale.push_constant, sizeof(ScreenSpaceReflectionScalePushConstant)); RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssr_scale.push_constant, sizeof(ScreenSpaceReflectionScalePushConstant));
RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_screen_size.width, p_screen_size.height, 1);
RD::get_singleton()->compute_list_add_barrier(compute_list); RD::get_singleton()->compute_list_add_barrier(compute_list);
} }
@ -547,7 +524,7 @@ void EffectsRD::screen_space_reflection(RID p_diffuse, RID p_normal_roughness, R
} }
RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_scale_normal), 2); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_scale_normal), 2);
RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_screen_size.width, p_screen_size.height, 1);
} }
if (p_roughness_quality != RS::ENV_SSR_ROUGNESS_QUALITY_DISABLED) { if (p_roughness_quality != RS::ENV_SSR_ROUGNESS_QUALITY_DISABLED) {
@ -585,7 +562,7 @@ void EffectsRD::screen_space_reflection(RID p_diffuse, RID p_normal_roughness, R
RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssr_filter.push_constant, sizeof(ScreenSpaceReflectionFilterPushConstant)); RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssr_filter.push_constant, sizeof(ScreenSpaceReflectionFilterPushConstant));
RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_screen_size.width, p_screen_size.height, 1);
RD::get_singleton()->compute_list_add_barrier(compute_list); RD::get_singleton()->compute_list_add_barrier(compute_list);
@ -600,7 +577,7 @@ void EffectsRD::screen_space_reflection(RID p_diffuse, RID p_normal_roughness, R
RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssr_filter.push_constant, sizeof(ScreenSpaceReflectionFilterPushConstant)); RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssr_filter.push_constant, sizeof(ScreenSpaceReflectionFilterPushConstant));
RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_screen_size.width, p_screen_size.height, 1);
} }
RD::get_singleton()->compute_list_end(); RD::get_singleton()->compute_list_end();
@ -609,9 +586,6 @@ void EffectsRD::screen_space_reflection(RID p_diffuse, RID p_normal_roughness, R
void EffectsRD::sub_surface_scattering(RID p_diffuse, RID p_diffuse2, RID p_depth, const CameraMatrix &p_camera, const Size2i &p_screen_size, float p_scale, float p_depth_scale, RenderingServer::SubSurfaceScatteringQuality p_quality) { void EffectsRD::sub_surface_scattering(RID p_diffuse, RID p_diffuse2, RID p_depth, const CameraMatrix &p_camera, const Size2i &p_screen_size, float p_scale, float p_depth_scale, RenderingServer::SubSurfaceScatteringQuality p_quality) {
RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin();
int32_t x_groups = (p_screen_size.width - 1) / 8 + 1;
int32_t y_groups = (p_screen_size.height - 1) / 8 + 1;
Plane p = p_camera.xform4(Plane(1, 0, -1, 1)); Plane p = p_camera.xform4(Plane(1, 0, -1, 1));
p.normal /= p.d; p.normal /= p.d;
float unit_size = p.normal.x; float unit_size = p.normal.x;
@ -635,7 +609,7 @@ void EffectsRD::sub_surface_scattering(RID p_diffuse, RID p_diffuse2, RID p_dept
RD::get_singleton()->compute_list_set_push_constant(compute_list, &sss.push_constant, sizeof(SubSurfaceScatteringPushConstant)); RD::get_singleton()->compute_list_set_push_constant(compute_list, &sss.push_constant, sizeof(SubSurfaceScatteringPushConstant));
RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_screen_size.width, p_screen_size.height, 1);
RD::get_singleton()->compute_list_add_barrier(compute_list); RD::get_singleton()->compute_list_add_barrier(compute_list);
@ -646,7 +620,7 @@ void EffectsRD::sub_surface_scattering(RID p_diffuse, RID p_diffuse2, RID p_dept
sss.push_constant.vertical = true; sss.push_constant.vertical = true;
RD::get_singleton()->compute_list_set_push_constant(compute_list, &sss.push_constant, sizeof(SubSurfaceScatteringPushConstant)); RD::get_singleton()->compute_list_set_push_constant(compute_list, &sss.push_constant, sizeof(SubSurfaceScatteringPushConstant));
RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_screen_size.width, p_screen_size.height, 1);
RD::get_singleton()->compute_list_end(); RD::get_singleton()->compute_list_end();
} }
@ -690,15 +664,12 @@ void EffectsRD::make_mipmap(RID p_source_rd_texture, RID p_dest_texture, const S
copy.push_constant.section[2] = p_size.width; copy.push_constant.section[2] = p_size.width;
copy.push_constant.section[3] = p_size.height; copy.push_constant.section[3] = p_size.height;
int32_t x_groups = (p_size.width - 1) / 8 + 1;
int32_t y_groups = (p_size.height - 1) / 8 + 1;
RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin();
RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, copy.pipelines[COPY_MODE_MIPMAP]); RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, copy.pipelines[COPY_MODE_MIPMAP]);
RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_source_rd_texture), 0); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_source_rd_texture), 0);
RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_dest_texture), 3); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_dest_texture), 3);
RD::get_singleton()->compute_list_set_push_constant(compute_list, &copy.push_constant, sizeof(CopyPushConstant)); RD::get_singleton()->compute_list_set_push_constant(compute_list, &copy.push_constant, sizeof(CopyPushConstant));
RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_size.width, p_size.height, 1);
RD::get_singleton()->compute_list_end(); RD::get_singleton()->compute_list_end();
} }
@ -719,7 +690,7 @@ void EffectsRD::copy_cubemap_to_dp(RID p_source_rd_texture, RID p_dst_framebuffe
RD::get_singleton()->draw_list_set_push_constant(draw_list, &push_constant, sizeof(CopyToDPPushConstant)); RD::get_singleton()->draw_list_set_push_constant(draw_list, &push_constant, sizeof(CopyToDPPushConstant));
RD::get_singleton()->draw_list_draw(draw_list, true); RD::get_singleton()->draw_list_draw(draw_list, true);
RD::get_singleton()->draw_list_end(); RD::get_singleton()->draw_list_end(RD::BARRIER_MASK_RASTER | RD::BARRIER_MASK_TRANSFER);
} }
void EffectsRD::tonemapper(RID p_source_color, RID p_dst_framebuffer, const TonemapSettings &p_settings) { void EffectsRD::tonemapper(RID p_source_color, RID p_dst_framebuffer, const TonemapSettings &p_settings) {
@ -804,10 +775,7 @@ void EffectsRD::luminance_reduction(RID p_source_texture, const Size2i p_source_
RD::get_singleton()->compute_list_set_push_constant(compute_list, &luminance_reduce.push_constant, sizeof(LuminanceReducePushConstant)); RD::get_singleton()->compute_list_set_push_constant(compute_list, &luminance_reduce.push_constant, sizeof(LuminanceReducePushConstant));
int32_t x_groups = (luminance_reduce.push_constant.source_size[0] - 1) / 8 + 1; RD::get_singleton()->compute_list_dispatch_threads(compute_list, luminance_reduce.push_constant.source_size[0], luminance_reduce.push_constant.source_size[1], 1);
int32_t y_groups = (luminance_reduce.push_constant.source_size[1] - 1) / 8 + 1;
RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1);
luminance_reduce.push_constant.source_size[0] = MAX(luminance_reduce.push_constant.source_size[0] / 8, 1); luminance_reduce.push_constant.source_size[0] = MAX(luminance_reduce.push_constant.source_size[0] / 8, 1);
luminance_reduce.push_constant.source_size[1] = MAX(luminance_reduce.push_constant.source_size[1] / 8, 1); luminance_reduce.push_constant.source_size[1] = MAX(luminance_reduce.push_constant.source_size[1] / 8, 1);
@ -848,14 +816,12 @@ void EffectsRD::bokeh_dof(RID p_base_texture, RID p_depth_texture, const Size2i
RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_base_texture), 0); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_base_texture), 0);
RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_depth_texture), 1); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_depth_texture), 1);
int32_t x_groups = (p_base_texture_size.x - 1) / 8 + 1;
int32_t y_groups = (p_base_texture_size.y - 1) / 8 + 1;
bokeh.push_constant.size[0] = p_base_texture_size.x; bokeh.push_constant.size[0] = p_base_texture_size.x;
bokeh.push_constant.size[1] = p_base_texture_size.y; bokeh.push_constant.size[1] = p_base_texture_size.y;
RD::get_singleton()->compute_list_set_push_constant(compute_list, &bokeh.push_constant, sizeof(BokehPushConstant)); RD::get_singleton()->compute_list_set_push_constant(compute_list, &bokeh.push_constant, sizeof(BokehPushConstant));
RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_base_texture_size.x, p_base_texture_size.y, 1);
RD::get_singleton()->compute_list_add_barrier(compute_list); RD::get_singleton()->compute_list_add_barrier(compute_list);
if (p_bokeh_shape == RS::DOF_BOKEH_BOX || p_bokeh_shape == RS::DOF_BOKEH_HEXAGON) { if (p_bokeh_shape == RS::DOF_BOKEH_BOX || p_bokeh_shape == RS::DOF_BOKEH_HEXAGON) {
@ -872,8 +838,6 @@ void EffectsRD::bokeh_dof(RID p_base_texture, RID p_depth_texture, const Size2i
RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_halfsize_texture1), 0); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_halfsize_texture1), 0);
RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_base_texture), 1); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_base_texture), 1);
x_groups = ((p_base_texture_size.x >> 1) - 1) / 8 + 1;
y_groups = ((p_base_texture_size.y >> 1) - 1) / 8 + 1;
bokeh.push_constant.size[0] = p_base_texture_size.x >> 1; bokeh.push_constant.size[0] = p_base_texture_size.x >> 1;
bokeh.push_constant.size[1] = p_base_texture_size.y >> 1; bokeh.push_constant.size[1] = p_base_texture_size.y >> 1;
bokeh.push_constant.half_size = true; bokeh.push_constant.half_size = true;
@ -887,7 +851,7 @@ void EffectsRD::bokeh_dof(RID p_base_texture, RID p_depth_texture, const Size2i
RD::get_singleton()->compute_list_set_push_constant(compute_list, &bokeh.push_constant, sizeof(BokehPushConstant)); RD::get_singleton()->compute_list_set_push_constant(compute_list, &bokeh.push_constant, sizeof(BokehPushConstant));
RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); RD::get_singleton()->compute_list_dispatch_threads(compute_list, bokeh.push_constant.size[0], bokeh.push_constant.size[1], 1);
RD::get_singleton()->compute_list_add_barrier(compute_list); RD::get_singleton()->compute_list_add_barrier(compute_list);
//third pass //third pass
@ -903,7 +867,7 @@ void EffectsRD::bokeh_dof(RID p_base_texture, RID p_depth_texture, const Size2i
RD::get_singleton()->compute_list_set_push_constant(compute_list, &bokeh.push_constant, sizeof(BokehPushConstant)); RD::get_singleton()->compute_list_set_push_constant(compute_list, &bokeh.push_constant, sizeof(BokehPushConstant));
RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); RD::get_singleton()->compute_list_dispatch_threads(compute_list, bokeh.push_constant.size[0], bokeh.push_constant.size[1], 1);
RD::get_singleton()->compute_list_add_barrier(compute_list); RD::get_singleton()->compute_list_add_barrier(compute_list);
if (p_quality == RS::DOF_BLUR_QUALITY_VERY_LOW || p_quality == RS::DOF_BLUR_QUALITY_LOW) { if (p_quality == RS::DOF_BLUR_QUALITY_VERY_LOW || p_quality == RS::DOF_BLUR_QUALITY_LOW) {
@ -914,8 +878,6 @@ void EffectsRD::bokeh_dof(RID p_base_texture, RID p_depth_texture, const Size2i
RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_base_texture), 0); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_base_texture), 0);
RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_halfsize_texture2), 1); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_halfsize_texture2), 1);
x_groups = (p_base_texture_size.x - 1) / 8 + 1;
y_groups = (p_base_texture_size.y - 1) / 8 + 1;
bokeh.push_constant.size[0] = p_base_texture_size.x; bokeh.push_constant.size[0] = p_base_texture_size.x;
bokeh.push_constant.size[1] = p_base_texture_size.y; bokeh.push_constant.size[1] = p_base_texture_size.y;
bokeh.push_constant.half_size = false; bokeh.push_constant.half_size = false;
@ -923,7 +885,7 @@ void EffectsRD::bokeh_dof(RID p_base_texture, RID p_depth_texture, const Size2i
RD::get_singleton()->compute_list_set_push_constant(compute_list, &bokeh.push_constant, sizeof(BokehPushConstant)); RD::get_singleton()->compute_list_set_push_constant(compute_list, &bokeh.push_constant, sizeof(BokehPushConstant));
RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_base_texture_size.x, p_base_texture_size.y, 1);
} }
} else { } else {
//circle //circle
@ -941,15 +903,13 @@ void EffectsRD::bokeh_dof(RID p_base_texture, RID p_depth_texture, const Size2i
RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_halfsize_texture1), 0); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_halfsize_texture1), 0);
RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_base_texture), 1); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_base_texture), 1);
x_groups = ((p_base_texture_size.x >> 1) - 1) / 8 + 1;
y_groups = ((p_base_texture_size.y >> 1) - 1) / 8 + 1;
bokeh.push_constant.size[0] = p_base_texture_size.x >> 1; bokeh.push_constant.size[0] = p_base_texture_size.x >> 1;
bokeh.push_constant.size[1] = p_base_texture_size.y >> 1; bokeh.push_constant.size[1] = p_base_texture_size.y >> 1;
bokeh.push_constant.half_size = true; bokeh.push_constant.half_size = true;
RD::get_singleton()->compute_list_set_push_constant(compute_list, &bokeh.push_constant, sizeof(BokehPushConstant)); RD::get_singleton()->compute_list_set_push_constant(compute_list, &bokeh.push_constant, sizeof(BokehPushConstant));
RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); RD::get_singleton()->compute_list_dispatch_threads(compute_list, bokeh.push_constant.size[0], bokeh.push_constant.size[1], 1);
RD::get_singleton()->compute_list_add_barrier(compute_list); RD::get_singleton()->compute_list_add_barrier(compute_list);
//circle is just one pass, then upscale //circle is just one pass, then upscale
@ -961,8 +921,6 @@ void EffectsRD::bokeh_dof(RID p_base_texture, RID p_depth_texture, const Size2i
RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_base_texture), 0); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_base_texture), 0);
RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_halfsize_texture1), 1); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_halfsize_texture1), 1);
x_groups = (p_base_texture_size.x - 1) / 8 + 1;
y_groups = (p_base_texture_size.y - 1) / 8 + 1;
bokeh.push_constant.size[0] = p_base_texture_size.x; bokeh.push_constant.size[0] = p_base_texture_size.x;
bokeh.push_constant.size[1] = p_base_texture_size.y; bokeh.push_constant.size[1] = p_base_texture_size.y;
bokeh.push_constant.half_size = false; bokeh.push_constant.half_size = false;
@ -970,7 +928,7 @@ void EffectsRD::bokeh_dof(RID p_base_texture, RID p_depth_texture, const Size2i
RD::get_singleton()->compute_list_set_push_constant(compute_list, &bokeh.push_constant, sizeof(BokehPushConstant)); RD::get_singleton()->compute_list_set_push_constant(compute_list, &bokeh.push_constant, sizeof(BokehPushConstant));
RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_base_texture_size.x, p_base_texture_size.y, 1);
} }
RD::get_singleton()->compute_list_end(); RD::get_singleton()->compute_list_end();
@ -995,10 +953,9 @@ void EffectsRD::gather_ssao(RD::ComputeListID p_compute_list, const Vector<RID>
RD::get_singleton()->compute_list_bind_uniform_set(p_compute_list, _get_uniform_set_from_image(p_ao_slices[i]), 2); RD::get_singleton()->compute_list_bind_uniform_set(p_compute_list, _get_uniform_set_from_image(p_ao_slices[i]), 2);
RD::get_singleton()->compute_list_set_push_constant(p_compute_list, &ssao.gather_push_constant, sizeof(SSAOGatherPushConstant)); RD::get_singleton()->compute_list_set_push_constant(p_compute_list, &ssao.gather_push_constant, sizeof(SSAOGatherPushConstant));
int x_groups = ((p_settings.full_screen_size.x >> (p_settings.half_size ? 2 : 1)) - 1) / 8 + 1; Size2i size = Size2i(p_settings.full_screen_size.x >> (p_settings.half_size ? 2 : 1), p_settings.full_screen_size.y >> (p_settings.half_size ? 2 : 1));
int y_groups = ((p_settings.full_screen_size.y >> (p_settings.half_size ? 2 : 1)) - 1) / 8 + 1;
RD::get_singleton()->compute_list_dispatch(p_compute_list, x_groups, y_groups, 1); RD::get_singleton()->compute_list_dispatch_threads(p_compute_list, size.x, size.y, 1);
} }
RD::get_singleton()->compute_list_add_barrier(p_compute_list); RD::get_singleton()->compute_list_add_barrier(p_compute_list);
} }
@ -1072,10 +1029,9 @@ void EffectsRD::generate_ssao(RID p_depth_buffer, RID p_normal_buffer, RID p_dep
} }
RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssao.downsample_push_constant, sizeof(SSAODownsamplePushConstant)); RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssao.downsample_push_constant, sizeof(SSAODownsamplePushConstant));
int x_groups = (MAX(1, p_settings.full_screen_size.x >> (p_settings.half_size ? 2 : 1)) - 1) / 8 + 1; Size2i size(MAX(1, p_settings.full_screen_size.x >> (p_settings.half_size ? 2 : 1)), MAX(1, p_settings.full_screen_size.y >> (p_settings.half_size ? 2 : 1)));
int y_groups = (MAX(1, p_settings.full_screen_size.y >> (p_settings.half_size ? 2 : 1)) - 1) / 8 + 1;
RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); RD::get_singleton()->compute_list_dispatch_threads(compute_list, size.x, size.y, 1);
RD::get_singleton()->compute_list_add_barrier(compute_list); RD::get_singleton()->compute_list_add_barrier(compute_list);
RD::get_singleton()->draw_command_end_label(); // Downsample SSAO RD::get_singleton()->draw_command_end_label(); // Downsample SSAO
} }
@ -1193,21 +1149,19 @@ void EffectsRD::generate_ssao(RID p_depth_buffer, RID p_normal_buffer, RID p_dep
RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[SSAO_GATHER_BASE]); RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[SSAO_GATHER_BASE]);
gather_ssao(compute_list, p_ao_pong_slices, p_settings, true); gather_ssao(compute_list, p_ao_pong_slices, p_settings, true);
//generate importance map //generate importance map
int x_groups = (p_settings.quarter_screen_size.x - 1) / 8 + 1;
int y_groups = (p_settings.quarter_screen_size.y - 1) / 8 + 1;
RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[SSAO_GENERATE_IMPORTANCE_MAP]); RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[SSAO_GENERATE_IMPORTANCE_MAP]);
RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_ao_pong), 0); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_ao_pong), 0);
RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_importance_map), 1); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_importance_map), 1);
RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssao.importance_map_push_constant, sizeof(SSAOImportanceMapPushConstant)); RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssao.importance_map_push_constant, sizeof(SSAOImportanceMapPushConstant));
RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_settings.quarter_screen_size.x, p_settings.quarter_screen_size.y, 1);
RD::get_singleton()->compute_list_add_barrier(compute_list); RD::get_singleton()->compute_list_add_barrier(compute_list);
//process importance map A //process importance map A
RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[SSAO_PROCESS_IMPORTANCE_MAPA]); RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[SSAO_PROCESS_IMPORTANCE_MAPA]);
RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_importance_map), 0); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_importance_map), 0);
RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_importance_map_pong), 1); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_importance_map_pong), 1);
RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssao.importance_map_push_constant, sizeof(SSAOImportanceMapPushConstant)); RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssao.importance_map_push_constant, sizeof(SSAOImportanceMapPushConstant));
RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_settings.quarter_screen_size.x, p_settings.quarter_screen_size.y, 1);
RD::get_singleton()->compute_list_add_barrier(compute_list); RD::get_singleton()->compute_list_add_barrier(compute_list);
//process Importance Map B //process Importance Map B
RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[SSAO_PROCESS_IMPORTANCE_MAPB]); RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[SSAO_PROCESS_IMPORTANCE_MAPB]);
@ -1215,7 +1169,7 @@ void EffectsRD::generate_ssao(RID p_depth_buffer, RID p_normal_buffer, RID p_dep
RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_importance_map), 1); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_importance_map), 1);
RD::get_singleton()->compute_list_bind_uniform_set(compute_list, ssao.counter_uniform_set, 2); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, ssao.counter_uniform_set, 2);
RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssao.importance_map_push_constant, sizeof(SSAOImportanceMapPushConstant)); RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssao.importance_map_push_constant, sizeof(SSAOImportanceMapPushConstant));
RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_settings.quarter_screen_size.x, p_settings.quarter_screen_size.y, 1);
RD::get_singleton()->compute_list_add_barrier(compute_list); RD::get_singleton()->compute_list_add_barrier(compute_list);
RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[SSAO_GATHER_ADAPTIVE]); RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[SSAO_GATHER_ADAPTIVE]);
@ -1272,10 +1226,8 @@ void EffectsRD::generate_ssao(RID p_depth_buffer, RID p_normal_buffer, RID p_dep
} }
RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssao.blur_push_constant, sizeof(SSAOBlurPushConstant)); RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssao.blur_push_constant, sizeof(SSAOBlurPushConstant));
int x_groups = ((p_settings.full_screen_size.x >> (p_settings.half_size ? 2 : 1)) - 1) / 8 + 1; Size2i size(p_settings.full_screen_size.x >> (p_settings.half_size ? 2 : 1), p_settings.full_screen_size.y >> (p_settings.half_size ? 2 : 1));
int y_groups = ((p_settings.full_screen_size.y >> (p_settings.half_size ? 2 : 1)) - 1) / 8 + 1; RD::get_singleton()->compute_list_dispatch_threads(compute_list, size.x, size.y, 1);
RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1);
} }
if (p_settings.quality > RS::ENV_SSAO_QUALITY_VERY_LOW) { if (p_settings.quality > RS::ENV_SSAO_QUALITY_VERY_LOW) {
@ -1313,18 +1265,15 @@ void EffectsRD::generate_ssao(RID p_depth_buffer, RID p_normal_buffer, RID p_dep
RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssao.interleave_push_constant, sizeof(SSAOInterleavePushConstant)); RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssao.interleave_push_constant, sizeof(SSAOInterleavePushConstant));
int x_groups = (p_settings.full_screen_size.x - 1) / 8 + 1; RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_settings.full_screen_size.x, p_settings.full_screen_size.y, 1);
int y_groups = (p_settings.full_screen_size.y - 1) / 8 + 1;
RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1);
RD::get_singleton()->compute_list_add_barrier(compute_list); RD::get_singleton()->compute_list_add_barrier(compute_list);
RD::get_singleton()->draw_command_end_label(); // Interleave RD::get_singleton()->draw_command_end_label(); // Interleave
} }
RD::get_singleton()->draw_command_end_label(); //SSAO RD::get_singleton()->draw_command_end_label(); //SSAO
RD::get_singleton()->compute_list_end(); RD::get_singleton()->compute_list_end(RD::BARRIER_MASK_TRANSFER); //wait for upcoming transfer
int zero[1] = { 0 }; int zero[1] = { 0 };
RD::get_singleton()->buffer_update(ssao.importance_map_load_counter, 0, sizeof(uint32_t), &zero); RD::get_singleton()->buffer_update(ssao.importance_map_load_counter, 0, sizeof(uint32_t), &zero, 0); //no barrier
} }
void EffectsRD::roughness_limit(RID p_source_normal, RID p_roughness, const Size2i &p_size, float p_curve) { void EffectsRD::roughness_limit(RID p_source_normal, RID p_roughness, const Size2i &p_size, float p_curve) {
@ -1337,12 +1286,9 @@ void EffectsRD::roughness_limit(RID p_source_normal, RID p_roughness, const Size
RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_source_normal), 0); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_source_normal), 0);
RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_roughness), 1); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_roughness), 1);
int x_groups = (p_size.x - 1) / 8 + 1;
int y_groups = (p_size.y - 1) / 8 + 1;
RD::get_singleton()->compute_list_set_push_constant(compute_list, &roughness_limiter.push_constant, sizeof(RoughnessLimiterPushConstant)); //not used but set anyway RD::get_singleton()->compute_list_set_push_constant(compute_list, &roughness_limiter.push_constant, sizeof(RoughnessLimiterPushConstant)); //not used but set anyway
RD::get_singleton()->compute_list_dispatch(compute_list, x_groups, y_groups, 1); RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_size.x, p_size.y, 1);
RD::get_singleton()->compute_list_end(); RD::get_singleton()->compute_list_end();
} }
@ -1455,7 +1401,7 @@ void EffectsRD::render_sky(RD::DrawListID p_list, float p_time, RID p_fb, RID p_
RD::get_singleton()->draw_list_draw(draw_list, true); RD::get_singleton()->draw_list_draw(draw_list, true);
} }
void EffectsRD::resolve_gi(RID p_source_depth, RID p_source_normal_roughness, RID p_source_giprobe, RID p_dest_depth, RID p_dest_normal_roughness, RID p_dest_giprobe, Vector2i p_screen_size, int p_samples) { void EffectsRD::resolve_gi(RID p_source_depth, RID p_source_normal_roughness, RID p_source_giprobe, RID p_dest_depth, RID p_dest_normal_roughness, RID p_dest_giprobe, Vector2i p_screen_size, int p_samples, uint32_t p_barrier) {
ResolvePushConstant push_constant; ResolvePushConstant push_constant;
push_constant.screen_size[0] = p_screen_size.x; push_constant.screen_size[0] = p_screen_size.x;
push_constant.screen_size[1] = p_screen_size.y; push_constant.screen_size[1] = p_screen_size.y;
@ -1472,19 +1418,26 @@ void EffectsRD::resolve_gi(RID p_source_depth, RID p_source_normal_roughness, RI
RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(ResolvePushConstant)); RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(ResolvePushConstant));
RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_screen_size.x, p_screen_size.y, 1, 8, 8, 1); RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_screen_size.x, p_screen_size.y, 1);
RD::get_singleton()->compute_list_end(); RD::get_singleton()->compute_list_end(p_barrier);
} }
void EffectsRD::reduce_shadow(RID p_source_shadow, RID p_dest_shadow, const Size2i &p_source_size, const Rect2i &p_source_rect, int p_shrink_limit, RD::ComputeListID compute_list) { void EffectsRD::reduce_shadow(RID p_source_shadow, RID p_dest_shadow, const Size2i &p_source_size, const Rect2i &p_source_rect, int p_shrink_limit, RD::ComputeListID compute_list) {
uint32_t push_constant[8] = { (uint32_t)p_source_size.x, (uint32_t)p_source_size.y, (uint32_t)p_source_rect.position.x, (uint32_t)p_source_rect.position.y, (uint32_t)p_shrink_limit, 0, 0, 0 }; uint32_t push_constant[8] = { (uint32_t)p_source_size.x, (uint32_t)p_source_size.y, (uint32_t)p_source_rect.position.x, (uint32_t)p_source_rect.position.y, (uint32_t)p_shrink_limit, 0, 0, 0 };
RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, shadow_reduce.pipelines[SHADOW_REDUCE_REDUCE]); uint32_t height = p_source_rect.size.height;
RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_image_pair(p_source_shadow, p_dest_shadow), 0); if (true) { // subgroup support, @TODO must detect them
RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, shadow_reduce.pipelines[p_shrink_limit == 1 ? SHADOW_REDUCE_REDUCE_SUBGROUPS_8 : SHADOW_REDUCE_REDUCE_SUBGROUPS]);
height /= 2; //cause kernel is 8x4
} else {
RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, shadow_reduce.pipelines[SHADOW_REDUCE_REDUCE]);
}
RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_source_shadow), 0);
RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_dest_shadow), 1);
RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(uint32_t) * 8); RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(uint32_t) * 8);
RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_source_rect.size.width, p_source_rect.size.height, 1, 8, 8, 1); RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_source_rect.size.width, height, 1);
} }
void EffectsRD::filter_shadow(RID p_shadow, RID p_backing_shadow, const Size2i &p_source_size, const Rect2i &p_source_rect, RenderingServer::EnvVolumetricFogShadowFilter p_filter, RD::ComputeListID compute_list, bool p_vertical, bool p_horizontal) { void EffectsRD::filter_shadow(RID p_shadow, RID p_backing_shadow, const Size2i &p_source_size, const Rect2i &p_source_rect, RenderingServer::EnvVolumetricFogShadowFilter p_filter, RD::ComputeListID compute_list, bool p_vertical, bool p_horizontal) {
uint32_t push_constant[8] = { (uint32_t)p_source_size.x, (uint32_t)p_source_size.y, (uint32_t)p_source_rect.position.x, (uint32_t)p_source_rect.position.y, 0, 0, 0, 0 }; uint32_t push_constant[8] = { (uint32_t)p_source_size.x, (uint32_t)p_source_size.y, (uint32_t)p_source_rect.position.x, (uint32_t)p_source_rect.position.y, 0, 0, 0, 0 };
@ -1506,9 +1459,10 @@ void EffectsRD::filter_shadow(RID p_shadow, RID p_backing_shadow, const Size2i &
if (p_vertical) { if (p_vertical) {
push_constant[6] = 1; push_constant[6] = 1;
push_constant[7] = 0; push_constant[7] = 0;
RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_image_pair(p_shadow, p_backing_shadow), 0); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_shadow), 0);
RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_backing_shadow), 1);
RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(uint32_t) * 8); RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(uint32_t) * 8);
RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_source_rect.size.width, p_source_rect.size.height, 1, 8, 8, 1); RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_source_rect.size.width, p_source_rect.size.height, 1);
} }
if (p_vertical && p_horizontal) { if (p_vertical && p_horizontal) {
RD::get_singleton()->compute_list_add_barrier(compute_list); RD::get_singleton()->compute_list_add_barrier(compute_list);
@ -1516,9 +1470,10 @@ void EffectsRD::filter_shadow(RID p_shadow, RID p_backing_shadow, const Size2i &
if (p_horizontal) { if (p_horizontal) {
push_constant[6] = 0; push_constant[6] = 0;
push_constant[7] = 1; push_constant[7] = 1;
RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_image_pair(p_backing_shadow, p_shadow), 0); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_backing_shadow), 0);
RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_shadow), 1);
RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(uint32_t) * 8); RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(uint32_t) * 8);
RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_source_rect.size.width, p_source_rect.size.height, 1, 8, 8, 1); RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_source_rect.size.width, p_source_rect.size.height, 1);
} }
} }
@ -2020,6 +1975,8 @@ EffectsRD::EffectsRD() {
{ {
Vector<String> shadow_reduce_modes; Vector<String> shadow_reduce_modes;
shadow_reduce_modes.push_back("\n#define MODE_REDUCE\n"); shadow_reduce_modes.push_back("\n#define MODE_REDUCE\n");
shadow_reduce_modes.push_back("\n#define MODE_REDUCE_SUBGROUP\n");
shadow_reduce_modes.push_back("\n#define MODE_REDUCE_SUBGROUP\n#define MODE_REDUCE_8\n");
shadow_reduce_modes.push_back("\n#define MODE_FILTER\n"); shadow_reduce_modes.push_back("\n#define MODE_FILTER\n");
shadow_reduce.shader.initialize(shadow_reduce_modes); shadow_reduce.shader.initialize(shadow_reduce_modes);

View file

@ -599,6 +599,8 @@ class EffectsRD {
enum ShadowReduceMode { enum ShadowReduceMode {
SHADOW_REDUCE_REDUCE, SHADOW_REDUCE_REDUCE,
SHADOW_REDUCE_REDUCE_SUBGROUPS,
SHADOW_REDUCE_REDUCE_SUBGROUPS_8,
SHADOW_REDUCE_FILTER, SHADOW_REDUCE_FILTER,
SHADOW_REDUCE_MAX SHADOW_REDUCE_MAX
}; };
@ -763,7 +765,7 @@ public:
void merge_specular(RID p_dest_framebuffer, RID p_specular, RID p_base, RID p_reflection); void merge_specular(RID p_dest_framebuffer, RID p_specular, RID p_base, RID p_reflection);
void sub_surface_scattering(RID p_diffuse, RID p_diffuse2, RID p_depth, const CameraMatrix &p_camera, const Size2i &p_screen_size, float p_scale, float p_depth_scale, RS::SubSurfaceScatteringQuality p_quality); void sub_surface_scattering(RID p_diffuse, RID p_diffuse2, RID p_depth, const CameraMatrix &p_camera, const Size2i &p_screen_size, float p_scale, float p_depth_scale, RS::SubSurfaceScatteringQuality p_quality);
void resolve_gi(RID p_source_depth, RID p_source_normal_roughness, RID p_source_giprobe, RID p_dest_depth, RID p_dest_normal_roughness, RID p_dest_giprobe, Vector2i p_screen_size, int p_samples); void resolve_gi(RID p_source_depth, RID p_source_normal_roughness, RID p_source_giprobe, RID p_dest_depth, RID p_dest_normal_roughness, RID p_dest_giprobe, Vector2i p_screen_size, int p_samples, uint32_t p_barrier = RD::BARRIER_MASK_ALL);
void reduce_shadow(RID p_source_shadow, RID p_dest_shadow, const Size2i &p_source_size, const Rect2i &p_source_rect, int p_shrink_limit, RenderingDevice::ComputeListID compute_list); void reduce_shadow(RID p_source_shadow, RID p_dest_shadow, const Size2i &p_source_size, const Rect2i &p_source_rect, int p_shrink_limit, RenderingDevice::ComputeListID compute_list);
void filter_shadow(RID p_shadow, RID p_backing_shadow, const Size2i &p_source_size, const Rect2i &p_source_rect, RS::EnvVolumetricFogShadowFilter p_filter, RenderingDevice::ComputeListID compute_list, bool p_vertical = true, bool p_horizontal = true); void filter_shadow(RID p_shadow, RID p_backing_shadow, const Size2i &p_source_size, const Rect2i &p_source_rect, RS::EnvVolumetricFogShadowFilter p_filter, RenderingDevice::ComputeListID compute_list, bool p_vertical = true, bool p_horizontal = true);

View file

@ -50,6 +50,15 @@ class RendererSceneRenderForward : public RendererSceneRenderRD {
MAX_GI_PROBES = 8, MAX_GI_PROBES = 8,
MAX_LIGHTMAPS = 8, MAX_LIGHTMAPS = 8,
MAX_GI_PROBES_PER_INSTANCE = 2, MAX_GI_PROBES_PER_INSTANCE = 2,
INSTANCE_DATA_BUFFER_MIN_SIZE = 4096
};
enum RenderListType {
RENDER_LIST_OPAQUE, //used for opaque objects
RENDER_LIST_ALPHA, //used for transparent objects
RENDER_LIST_SECONDARY, //used for shadows and other objects
RENDER_LIST_MAX
}; };
/* Scene Shader */ /* Scene Shader */
@ -245,7 +254,7 @@ class RendererSceneRenderForward : public RendererSceneRenderRD {
RID shadow_sampler; RID shadow_sampler;
RID render_base_uniform_set; RID render_base_uniform_set;
RID render_pass_uniform_set; LocalVector<RID> render_pass_uniform_sets;
RID sdfgi_pass_uniform_set; RID sdfgi_pass_uniform_set;
uint64_t lightmap_texture_array_version = 0xFFFFFFFF; uint64_t lightmap_texture_array_version = 0xFFFFFFFF;
@ -257,7 +266,58 @@ class RendererSceneRenderForward : public RendererSceneRenderRD {
void _update_render_base_uniform_set(); void _update_render_base_uniform_set();
RID _setup_sdfgi_render_pass_uniform_set(RID p_albedo_texture, RID p_emission_texture, RID p_emission_aniso_texture, RID p_geom_facing_texture); RID _setup_sdfgi_render_pass_uniform_set(RID p_albedo_texture, RID p_emission_texture, RID p_emission_aniso_texture, RID p_geom_facing_texture);
RID _setup_render_pass_uniform_set(RID p_render_buffers, RID p_radiance_texture, RID p_shadow_atlas, RID p_reflection_atlas, RID p_cluster_buffer, const PagedArray<RID> &p_gi_probes, const PagedArray<RID> &p_lightmaps, bool p_use_directional_shadow_atlas = false); RID _setup_render_pass_uniform_set(RenderListType p_render_list, RID p_render_buffers, RID p_radiance_texture, RID p_shadow_atlas, RID p_reflection_atlas, RID p_cluster_buffer, const PagedArray<RID> &p_gi_probes, const PagedArray<RID> &p_lightmaps, bool p_use_directional_shadow_atlas = false, int p_index = 0);
enum PassMode {
PASS_MODE_COLOR,
PASS_MODE_COLOR_SPECULAR,
PASS_MODE_COLOR_TRANSPARENT,
PASS_MODE_SHADOW,
PASS_MODE_SHADOW_DP,
PASS_MODE_DEPTH,
PASS_MODE_DEPTH_NORMAL_ROUGHNESS,
PASS_MODE_DEPTH_NORMAL_ROUGHNESS_GIPROBE,
PASS_MODE_DEPTH_MATERIAL,
PASS_MODE_SDF,
};
struct GeometryInstanceSurfaceDataCache;
struct RenderElementInfo;
struct RenderListParameters {
GeometryInstanceSurfaceDataCache **elements = nullptr;
RenderElementInfo *element_info = nullptr;
int element_count = 0;
bool reverse_cull = false;
PassMode pass_mode = PASS_MODE_COLOR;
bool no_gi = false;
RID render_pass_uniform_set;
bool force_wireframe = false;
Vector2 uv_offset;
Plane lod_plane;
float lod_distance_multiplier = 0.0;
float screen_lod_threshold = 0.0;
RD::FramebufferFormatID framebuffer_format = 0;
uint32_t element_offset = 0;
uint32_t barrier = RD::BARRIER_MASK_ALL;
RenderListParameters(GeometryInstanceSurfaceDataCache **p_elements, RenderElementInfo *p_element_info, int p_element_count, bool p_reverse_cull, PassMode p_pass_mode, bool p_no_gi, RID p_render_pass_uniform_set, bool p_force_wireframe = false, const Vector2 &p_uv_offset = Vector2(), const Plane &p_lod_plane = Plane(), float p_lod_distance_multiplier = 0.0, float p_screen_lod_threshold = 0.0, uint32_t p_element_offset = 0, uint32_t p_barrier = RD::BARRIER_MASK_ALL) {
elements = p_elements;
element_info = p_element_info;
element_count = p_element_count;
reverse_cull = p_reverse_cull;
pass_mode = p_pass_mode;
no_gi = p_no_gi;
render_pass_uniform_set = p_render_pass_uniform_set;
force_wireframe = p_force_wireframe;
uv_offset = p_uv_offset;
lod_plane = p_lod_plane;
lod_distance_multiplier = p_lod_distance_multiplier;
screen_lod_threshold = p_screen_lod_threshold;
element_offset = p_element_offset;
barrier = p_barrier;
}
};
struct LightmapData { struct LightmapData {
float normal_xform[12]; float normal_xform[12];
@ -367,9 +427,24 @@ class RendererSceneRenderForward : public RendererSceneRenderRD {
uint32_t pancake_shadows; uint32_t pancake_shadows;
}; };
struct PushConstant {
uint32_t base_index; //
uint32_t uv_offset; //packed
uint32_t pad[2];
};
struct InstanceData {
float transform[16];
uint32_t flags;
uint32_t instance_uniforms_ofs; //base offset in global buffer for instance variables
uint32_t gi_offset; //GI information when using lightmapping (VCT or lightmap index)
uint32_t layer_mask;
float lightmap_uv_scale[4];
};
UBO ubo; UBO ubo;
RID uniform_buffer; LocalVector<RID> uniform_buffers;
LightmapData lightmaps[MAX_LIGHTMAPS]; LightmapData lightmaps[MAX_LIGHTMAPS];
RID lightmap_ids[MAX_LIGHTMAPS]; RID lightmap_ids[MAX_LIGHTMAPS];
@ -378,6 +453,10 @@ class RendererSceneRenderForward : public RendererSceneRenderRD {
uint32_t max_lightmaps; uint32_t max_lightmaps;
RID lightmap_buffer; RID lightmap_buffer;
RID instance_buffer[RENDER_LIST_MAX];
uint32_t instance_buffer_size[RENDER_LIST_MAX] = { 0, 0, 0 };
LocalVector<InstanceData> instance_data[RENDER_LIST_MAX];
LightmapCaptureData *lightmap_captures; LightmapCaptureData *lightmap_captures;
uint32_t max_lightmap_captures; uint32_t max_lightmap_captures;
RID lightmap_capture_buffer; RID lightmap_capture_buffer;
@ -390,10 +469,29 @@ class RendererSceneRenderForward : public RendererSceneRenderRD {
bool used_depth_texture = false; bool used_depth_texture = false;
bool used_sss = false; bool used_sss = false;
struct ShadowPass {
uint32_t element_from;
uint32_t element_count;
bool flip_cull;
PassMode pass_mode;
RID rp_uniform_set;
Plane camera_plane;
float lod_distance_multiplier;
float screen_lod_threshold;
RID framebuffer;
RD::InitialAction initial_depth_action;
RD::FinalAction final_depth_action;
Rect2i rect;
};
LocalVector<ShadowPass> shadow_passes;
} scene_state; } scene_state;
static RendererSceneRenderForward *singleton; static RendererSceneRenderForward *singleton;
uint64_t render_pass;
double time; double time;
RID default_shader; RID default_shader;
RID default_material; RID default_material;
@ -407,51 +505,15 @@ class RendererSceneRenderForward : public RendererSceneRenderRD {
RID default_vec4_xform_buffer; RID default_vec4_xform_buffer;
RID default_vec4_xform_uniform_set; RID default_vec4_xform_uniform_set;
enum PassMode { void _setup_environment(RID p_environment, RID p_render_buffers, const CameraMatrix &p_cam_projection, const Transform &p_cam_transform, RID p_reflection_probe, bool p_no_fog, const Size2i &p_screen_size, uint32_t p_cluster_size, uint32_t p_max_cluster_elements, RID p_shadow_atlas, bool p_flip_y, const Color &p_default_bg_color, float p_znear, float p_zfar, bool p_opaque_render_buffers = false, bool p_pancake_shadows = false, int p_index = 0);
PASS_MODE_COLOR,
PASS_MODE_COLOR_SPECULAR,
PASS_MODE_COLOR_TRANSPARENT,
PASS_MODE_SHADOW,
PASS_MODE_SHADOW_DP,
PASS_MODE_DEPTH,
PASS_MODE_DEPTH_NORMAL_ROUGHNESS,
PASS_MODE_DEPTH_NORMAL_ROUGHNESS_GIPROBE,
PASS_MODE_DEPTH_MATERIAL,
PASS_MODE_SDF,
};
void _setup_environment(RID p_environment, RID p_render_buffers, const CameraMatrix &p_cam_projection, const Transform &p_cam_transform, RID p_reflection_probe, bool p_no_fog, const Size2i &p_screen_size, uint32_t p_cluster_size, uint32_t p_max_cluster_elements, RID p_shadow_atlas, bool p_flip_y, const Color &p_default_bg_color, float p_znear, float p_zfar, bool p_opaque_render_buffers = false, bool p_pancake_shadows = false);
void _setup_giprobes(const PagedArray<RID> &p_giprobes); void _setup_giprobes(const PagedArray<RID> &p_giprobes);
void _setup_lightmaps(const PagedArray<RID> &p_lightmaps, const Transform &p_cam_transform); void _setup_lightmaps(const PagedArray<RID> &p_lightmaps, const Transform &p_cam_transform);
struct GeometryInstanceSurfaceDataCache; struct RenderElementInfo {
uint32_t repeat : 22;
struct RenderListParameters { uint32_t uses_lightmap : 1;
GeometryInstanceSurfaceDataCache **elements = nullptr; uint32_t uses_forward_gi : 1;
int element_count = 0; uint32_t lod_index : 8;
bool reverse_cull = false;
PassMode pass_mode = PASS_MODE_COLOR;
bool no_gi = false;
RID render_pass_uniform_set;
bool force_wireframe = false;
Vector2 uv_offset;
Plane lod_plane;
float lod_distance_multiplier = 0.0;
float screen_lod_threshold = 0.0;
RD::FramebufferFormatID framebuffer_format = 0;
RenderListParameters(GeometryInstanceSurfaceDataCache **p_elements, int p_element_count, bool p_reverse_cull, PassMode p_pass_mode, bool p_no_gi, RID p_render_pass_uniform_set, bool p_force_wireframe = false, const Vector2 &p_uv_offset = Vector2(), const Plane &p_lod_plane = Plane(), float p_lod_distance_multiplier = 0.0, float p_screen_lod_threshold = 0.0) {
elements = p_elements;
element_count = p_element_count;
reverse_cull = p_reverse_cull;
pass_mode = p_pass_mode;
no_gi = p_no_gi;
render_pass_uniform_set = p_render_pass_uniform_set;
force_wireframe = p_force_wireframe;
uv_offset = p_uv_offset;
lod_plane = p_lod_plane;
lod_distance_multiplier = p_lod_distance_multiplier;
screen_lod_threshold = p_screen_lod_threshold;
}
}; };
template <PassMode p_pass_mode> template <PassMode p_pass_mode>
@ -465,7 +527,9 @@ class RendererSceneRenderForward : public RendererSceneRenderRD {
uint32_t render_list_thread_threshold = 500; uint32_t render_list_thread_threshold = 500;
void _fill_render_list(const PagedArray<GeometryInstance *> &p_instances, PassMode p_pass_mode, const CameraMatrix &p_cam_projection, const Transform &p_cam_transform, bool p_using_sdfgi = false, bool p_using_opaque_gi = false); void _update_instance_data_buffer(RenderListType p_render_list);
void _fill_instance_data(RenderListType p_render_list, uint32_t p_offset = 0, int32_t p_max_elements = -1, bool p_update_buffer = true);
void _fill_render_list(RenderListType p_render_list, const PagedArray<GeometryInstance *> &p_instances, PassMode p_pass_mode, const CameraMatrix &p_cam_projection, const Transform &p_cam_transform, bool p_using_sdfgi = false, bool p_using_opaque_gi = false, const Plane &p_lod_camera_plane = Plane(), float p_lod_distance_multiplier = 0.0, float p_screen_lod_threshold = 0.0, bool p_append = false);
Map<Size2i, RID> sdfgi_framebuffer_size_cache; Map<Size2i, RID> sdfgi_framebuffer_size_cache;
@ -493,14 +557,17 @@ class RendererSceneRenderForward : public RendererSceneRenderRD {
union { union {
struct { struct {
uint32_t geometry_id; uint64_t lod_index : 8;
uint32_t material_id; uint64_t surface_index : 10;
uint32_t shader_id; uint64_t geometry_id : 32;
uint32_t surface_type : 4; uint64_t material_id_low : 14;
uint32_t uses_forward_gi : 1; //set during addition
uint32_t uses_lightmap : 1; //set during addition uint64_t material_id_hi : 18;
uint32_t depth_layer : 4; //set during addition uint64_t shader_id : 32;
uint32_t priority : 8; uint64_t uses_forward_gi : 1;
uint64_t uses_lightmap : 1;
uint64_t depth_layer : 4;
uint64_t priority : 8;
}; };
struct { struct {
uint64_t sort_key1; uint64_t sort_key1;
@ -532,20 +599,20 @@ class RendererSceneRenderForward : public RendererSceneRenderRD {
float lod_model_scale = 1.0; float lod_model_scale = 1.0;
AABB transformed_aabb; //needed for LOD AABB transformed_aabb; //needed for LOD
float depth = 0; float depth = 0;
struct PushConstant { uint32_t gi_offset_cache = 0;
float transform[16]; uint32_t flags_cache = 0;
uint32_t flags; bool store_transform_cache = true;
uint32_t instance_uniforms_ofs; //base offset in global buffer for instance variables int32_t shader_parameters_offset = -1;
uint32_t gi_offset; //GI information when using lightmapping (VCT or lightmap index) uint32_t lightmap_slice_index;
uint32_t layer_mask; Rect2 lightmap_uv_scale;
float lightmap_uv_scale[4]; uint32_t layer_mask = 1;
} push_constant;
RID transforms_uniform_set; RID transforms_uniform_set;
uint32_t instance_count = 0; uint32_t instance_count = 0;
RID mesh_instance; RID mesh_instance;
bool can_sdfgi = false; bool can_sdfgi = false;
//used during setup //used during setup
uint32_t base_flags = 0; uint32_t base_flags = 0;
Transform transform;
RID gi_probes[MAX_GI_PROBES_PER_INSTANCE]; RID gi_probes[MAX_GI_PROBES_PER_INSTANCE];
RID lightmap_instance; RID lightmap_instance;
GeometryInstanceLightmapSH *lightmap_sh = nullptr; GeometryInstanceLightmapSH *lightmap_sh = nullptr;
@ -558,21 +625,14 @@ class RendererSceneRenderForward : public RendererSceneRenderRD {
RS::InstanceType base_type; RS::InstanceType base_type;
RID skeleton; RID skeleton;
uint32_t layer_mask = 1;
Vector<RID> surface_materials; Vector<RID> surface_materials;
RID material_override; RID material_override;
Transform transform;
AABB aabb; AABB aabb;
int32_t shader_parameters_offset = -1;
bool use_dynamic_gi = false; bool use_dynamic_gi = false;
bool use_baked_light = false; bool use_baked_light = false;
bool cast_double_sided_shaodows = false; bool cast_double_sided_shaodows = false;
bool mirror = false; bool mirror = false;
Rect2 lightmap_uv_scale;
uint32_t lightmap_slice_index = 0;
bool dirty_dependencies = false; bool dirty_dependencies = false;
RendererStorage::DependencyTracker dependency_tracker; RendererStorage::DependencyTracker dependency_tracker;
@ -604,16 +664,12 @@ class RendererSceneRenderForward : public RendererSceneRenderRD {
/* Render List */ /* Render List */
struct RenderList { struct RenderList {
int max_elements; LocalVector<GeometryInstanceSurfaceDataCache *> elements;
LocalVector<RenderElementInfo> element_info;
GeometryInstanceSurfaceDataCache **elements = nullptr;
int element_count;
int alpha_element_count;
void clear() { void clear() {
element_count = 0; elements.clear();
alpha_element_count = 0; element_info.clear();
} }
//should eventually be replaced by radix //should eventually be replaced by radix
@ -624,13 +680,14 @@ class RendererSceneRenderForward : public RendererSceneRenderRD {
} }
}; };
void sort_by_key(bool p_alpha) { void sort_by_key() {
SortArray<GeometryInstanceSurfaceDataCache *, SortByKey> sorter; SortArray<GeometryInstanceSurfaceDataCache *, SortByKey> sorter;
if (p_alpha) { sorter.sort(elements.ptr(), elements.size());
sorter.sort(&elements[max_elements - alpha_element_count], alpha_element_count); }
} else {
sorter.sort(elements, element_count); void sort_by_key_range(uint32_t p_from, uint32_t p_size) {
} SortArray<GeometryInstanceSurfaceDataCache *, SortByKey> sorter;
sorter.sort(elements.ptr() + p_from, p_size);
} }
struct SortByDepth { struct SortByDepth {
@ -639,14 +696,10 @@ class RendererSceneRenderForward : public RendererSceneRenderRD {
} }
}; };
void sort_by_depth(bool p_alpha) { //used for shadows void sort_by_depth() { //used for shadows
SortArray<GeometryInstanceSurfaceDataCache *, SortByDepth> sorter; SortArray<GeometryInstanceSurfaceDataCache *, SortByDepth> sorter;
if (p_alpha) { sorter.sort(elements.ptr(), elements.size());
sorter.sort(&elements[max_elements - alpha_element_count], alpha_element_count);
} else {
sorter.sort(elements, element_count);
}
} }
struct SortByReverseDepthAndPriority { struct SortByReverseDepthAndPriority {
@ -658,50 +711,24 @@ class RendererSceneRenderForward : public RendererSceneRenderRD {
void sort_by_reverse_depth_and_priority(bool p_alpha) { //used for alpha void sort_by_reverse_depth_and_priority(bool p_alpha) { //used for alpha
SortArray<GeometryInstanceSurfaceDataCache *, SortByReverseDepthAndPriority> sorter; SortArray<GeometryInstanceSurfaceDataCache *, SortByReverseDepthAndPriority> sorter;
if (p_alpha) { sorter.sort(elements.ptr(), elements.size());
sorter.sort(&elements[max_elements - alpha_element_count], alpha_element_count);
} else {
sorter.sort(elements, element_count);
}
} }
_FORCE_INLINE_ void add_element(GeometryInstanceSurfaceDataCache *p_element) { _FORCE_INLINE_ void add_element(GeometryInstanceSurfaceDataCache *p_element) {
if (element_count + alpha_element_count >= max_elements) { elements.push_back(p_element);
return;
}
elements[element_count] = p_element;
element_count++;
}
_FORCE_INLINE_ void add_alpha_element(GeometryInstanceSurfaceDataCache *p_element) {
if (element_count + alpha_element_count >= max_elements) {
return;
}
int idx = max_elements - alpha_element_count - 1;
elements[idx] = p_element;
alpha_element_count++;
}
void init() {
element_count = 0;
alpha_element_count = 0;
elements = memnew_arr(GeometryInstanceSurfaceDataCache *, max_elements);
}
RenderList() {
max_elements = 0;
}
~RenderList() {
memdelete_arr(elements);
} }
}; };
RenderList render_list; RenderList render_list[RENDER_LIST_MAX];
protected: protected:
virtual void _render_scene(RID p_render_buffer, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, int p_directional_light_count, const PagedArray<RID> &p_gi_probes, const PagedArray<RID> &p_lightmaps, RID p_environment, RID p_cluster_buffer, uint32_t p_cluster_size, uint32_t p_max_cluster_elements, RID p_camera_effects, RID p_shadow_atlas, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, const Color &p_default_bg_color, float p_lod_threshold); virtual void _render_scene(RID p_render_buffer, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, const PagedArray<RID> &p_gi_probes, const PagedArray<RID> &p_lightmaps, RID p_environment, RID p_cluster_buffer, uint32_t p_cluster_size, uint32_t p_max_cluster_elements, RID p_camera_effects, RID p_shadow_atlas, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, const Color &p_default_bg_color, float p_lod_threshold);
virtual void _render_shadow(RID p_framebuffer, const PagedArray<GeometryInstance *> &p_instances, const CameraMatrix &p_projection, const Transform &p_transform, float p_zfar, float p_bias, float p_normal_bias, bool p_use_dp, bool use_dp_flip, bool p_use_pancake, const Plane &p_camera_plane = Plane(), float p_lod_distance_multiplier = 0.0, float p_screen_lod_threshold = 0.0, const Rect2i &p_rect = Rect2i(), bool p_flip_y = false, bool p_clear_region = true, bool p_begin = true, bool p_end = true);
virtual void _render_shadow_begin();
virtual void _render_shadow_append(RID p_framebuffer, const PagedArray<GeometryInstance *> &p_instances, const CameraMatrix &p_projection, const Transform &p_transform, float p_zfar, float p_bias, float p_normal_bias, bool p_use_dp, bool p_use_dp_flip, bool p_use_pancake, const Plane &p_camera_plane = Plane(), float p_lod_distance_multiplier = 0.0, float p_screen_lod_threshold = 0.0, const Rect2i &p_rect = Rect2i(), bool p_flip_y = false, bool p_clear_region = true, bool p_begin = true, bool p_end = true);
virtual void _render_shadow_process();
virtual void _render_shadow_end(uint32_t p_barrier = RD::BARRIER_MASK_ALL);
virtual void _render_material(const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, RID p_framebuffer, const Rect2i &p_region); virtual void _render_material(const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, RID p_framebuffer, const Rect2i &p_region);
virtual void _render_uv2(const PagedArray<GeometryInstance *> &p_instances, RID p_framebuffer, const Rect2i &p_region); virtual void _render_uv2(const PagedArray<GeometryInstance *> &p_instances, RID p_framebuffer, const Rect2i &p_region);
virtual void _render_sdfgi(RID p_render_buffers, const Vector3i &p_from, const Vector3i &p_size, const AABB &p_bounds, const PagedArray<GeometryInstance *> &p_instances, const RID &p_albedo_texture, const RID &p_emission_texture, const RID &p_emission_aniso_texture, const RID &p_geom_facing_texture); virtual void _render_sdfgi(RID p_render_buffers, const Vector3i &p_from, const Vector3i &p_size, const AABB &p_bounds, const PagedArray<GeometryInstance *> &p_instances, const RID &p_albedo_texture, const RID &p_emission_texture, const RID &p_emission_aniso_texture, const RID &p_geom_facing_texture);

File diff suppressed because it is too large Load diff

View file

@ -109,8 +109,13 @@ protected:
void _setup_reflections(const PagedArray<RID> &p_reflections, const Transform &p_camera_inverse_transform, RID p_environment); void _setup_reflections(const PagedArray<RID> &p_reflections, const Transform &p_camera_inverse_transform, RID p_environment);
void _setup_giprobes(RID p_render_buffers, const Transform &p_transform, const PagedArray<RID> &p_gi_probes, uint32_t &r_gi_probes_used); void _setup_giprobes(RID p_render_buffers, const Transform &p_transform, const PagedArray<RID> &p_gi_probes, uint32_t &r_gi_probes_used);
virtual void _render_scene(RID p_render_buffer, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, int p_directional_light_count, const PagedArray<RID> &p_gi_probes, const PagedArray<RID> &p_lightmaps, RID p_environment, RID p_cluster_buffer, uint32_t p_cluster_size, uint32_t p_cluster_max_elements, RID p_camera_effects, RID p_shadow_atlas, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, const Color &p_default_color, float p_screen_lod_threshold) = 0; virtual void _render_scene(RID p_render_buffer, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, const PagedArray<RID> &p_gi_probes, const PagedArray<RID> &p_lightmaps, RID p_environment, RID p_cluster_buffer, uint32_t p_cluster_size, uint32_t p_cluster_max_elements, RID p_camera_effects, RID p_shadow_atlas, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, const Color &p_default_color, float p_screen_lod_threshold) = 0;
virtual void _render_shadow(RID p_framebuffer, const PagedArray<GeometryInstance *> &p_instances, const CameraMatrix &p_projection, const Transform &p_transform, float p_zfar, float p_bias, float p_normal_bias, bool p_use_dp, bool use_dp_flip, bool p_use_pancake, const Plane &p_camera_plane = Plane(), float p_lod_distance_multiplier = 0.0, float p_screen_lod_threshold = 0.0, const Rect2i &p_rect = Rect2i(), bool p_flip_y = false, bool p_clear_region = true, bool p_begin = true, bool p_end = true) = 0;
virtual void _render_shadow_begin() = 0;
virtual void _render_shadow_append(RID p_framebuffer, const PagedArray<GeometryInstance *> &p_instances, const CameraMatrix &p_projection, const Transform &p_transform, float p_zfar, float p_bias, float p_normal_bias, bool p_use_dp, bool p_use_dp_flip, bool p_use_pancake, const Plane &p_camera_plane = Plane(), float p_lod_distance_multiplier = 0.0, float p_screen_lod_threshold = 0.0, const Rect2i &p_rect = Rect2i(), bool p_flip_y = false, bool p_clear_region = true, bool p_begin = true, bool p_end = true) = 0;
virtual void _render_shadow_process() = 0;
virtual void _render_shadow_end(uint32_t p_barrier = RD::BARRIER_MASK_ALL) = 0;
virtual void _render_material(const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, RID p_framebuffer, const Rect2i &p_region) = 0; virtual void _render_material(const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, RID p_framebuffer, const Rect2i &p_region) = 0;
virtual void _render_uv2(const PagedArray<GeometryInstance *> &p_instances, RID p_framebuffer, const Rect2i &p_region) = 0; virtual void _render_uv2(const PagedArray<GeometryInstance *> &p_instances, RID p_framebuffer, const Rect2i &p_region) = 0;
virtual void _render_sdfgi(RID p_render_buffers, const Vector3i &p_from, const Vector3i &p_size, const AABB &p_bounds, const PagedArray<GeometryInstance *> &p_instances, const RID &p_albedo_texture, const RID &p_emission_texture, const RID &p_emission_aniso_texture, const RID &p_geom_facing_texture) = 0; virtual void _render_sdfgi(RID p_render_buffers, const Vector3i &p_from, const Vector3i &p_size, const AABB &p_bounds, const PagedArray<GeometryInstance *> &p_instances, const RID &p_albedo_texture, const RID &p_emission_texture, const RID &p_emission_aniso_texture, const RID &p_geom_facing_texture) = 0;
@ -132,8 +137,16 @@ protected:
void _setup_sky(RID p_environment, RID p_render_buffers, const CameraMatrix &p_projection, const Transform &p_transform, const Size2i p_screen_size); void _setup_sky(RID p_environment, RID p_render_buffers, const CameraMatrix &p_projection, const Transform &p_transform, const Size2i p_screen_size);
void _update_sky(RID p_environment, const CameraMatrix &p_projection, const Transform &p_transform); void _update_sky(RID p_environment, const CameraMatrix &p_projection, const Transform &p_transform);
void _draw_sky(bool p_can_continue_color, bool p_can_continue_depth, RID p_fb, RID p_environment, const CameraMatrix &p_projection, const Transform &p_transform); void _draw_sky(bool p_can_continue_color, bool p_can_continue_depth, RID p_fb, RID p_environment, const CameraMatrix &p_projection, const Transform &p_transform);
void _pre_process_gi(RID p_render_buffers, const Transform &p_transform);
void _process_gi(RID p_render_buffers, RID p_normal_roughness_buffer, RID p_gi_probe_buffer, RID p_environment, const CameraMatrix &p_projection, const Transform &p_transform, const PagedArray<RID> &p_gi_probes); void _process_gi(RID p_render_buffers, RID p_normal_roughness_buffer, RID p_gi_probe_buffer, RID p_environment, const CameraMatrix &p_projection, const Transform &p_transform, const PagedArray<RID> &p_gi_probes);
bool _needs_post_prepass_render(bool p_use_gi);
void _post_prepass_render(bool p_use_gi);
void _pre_resolve_render(bool p_use_gi);
void _pre_opaque_render(bool p_use_ssao, bool p_use_gi, RID p_normal_roughness_buffer, RID p_gi_probe_buffer);
uint32_t _get_render_state_directional_light_count() const;
// needed for a single argument calls (material and uv2) // needed for a single argument calls (material and uv2)
PagedArrayPool<GeometryInstance *> cull_argument_pool; PagedArrayPool<GeometryInstance *> cull_argument_pool;
PagedArray<GeometryInstance *> cull_argument; //need this to exist PagedArray<GeometryInstance *> cull_argument; //need this to exist
@ -651,7 +664,7 @@ private:
RS::LightType light_type = RS::LIGHT_DIRECTIONAL; RS::LightType light_type = RS::LIGHT_DIRECTIONAL;
ShadowTransform shadow_transform[4]; ShadowTransform shadow_transform[6];
AABB aabb; AABB aabb;
RID self; RID self;
@ -1031,8 +1044,14 @@ private:
float y_mult = 1.0; float y_mult = 1.0;
uint32_t render_pass = 0; uint32_t render_pass = 0;
int32_t cascade_dynamic_light_count[SDFGI::MAX_CASCADES]; //used dynamically
}; };
void _sdfgi_update_light(RID p_render_buffers, RID p_environment);
void _sdfgi_update_probes(RID p_render_buffers, RID p_environment);
void _sdfgi_store_probes(RID p_render_buffers);
RS::EnvironmentSDFGIRayCount sdfgi_ray_count = RS::ENV_SDFGI_RAY_COUNT_16; RS::EnvironmentSDFGIRayCount sdfgi_ray_count = RS::ENV_SDFGI_RAY_COUNT_16;
RS::EnvironmentSDFGIFramesToConverge sdfgi_frames_to_converge = RS::ENV_SDFGI_CONVERGE_IN_10_FRAMES; RS::EnvironmentSDFGIFramesToConverge sdfgi_frames_to_converge = RS::ENV_SDFGI_CONVERGE_IN_10_FRAMES;
RS::EnvironmentSDFGIFramesToUpdateLight sdfgi_frames_to_update_light = RS::ENV_SDFGI_UPDATE_LIGHT_IN_4_FRAMES; RS::EnvironmentSDFGIFramesToUpdateLight sdfgi_frames_to_update_light = RS::ENV_SDFGI_UPDATE_LIGHT_IN_4_FRAMES;
@ -1460,6 +1479,41 @@ private:
} cluster; } cluster;
struct RenderState {
RID render_buffers;
Transform cam_transform;
CameraMatrix cam_projection;
bool cam_ortogonal = false;
const PagedArray<GeometryInstance *> *instances = nullptr;
const PagedArray<RID> *lights = nullptr;
const PagedArray<RID> *reflection_probes = nullptr;
const PagedArray<RID> *gi_probes = nullptr;
const PagedArray<RID> *decals = nullptr;
const PagedArray<RID> *lightmaps = nullptr;
RID environment;
RID camera_effects;
RID shadow_atlas;
RID reflection_atlas;
RID reflection_probe;
int reflection_probe_pass = 0;
float screen_lod_threshold = 0.0;
const RenderShadowData *render_shadows = nullptr;
int render_shadow_count = 0;
const RenderSDFGIData *render_sdfgi_regions = nullptr;
int render_sdfgi_region_count = 0;
const RenderSDFGIUpdateData *sdfgi_update_data = nullptr;
uint32_t directional_light_count = 0;
uint32_t gi_probe_count = 0;
LocalVector<int> cube_shadows;
LocalVector<int> shadows;
LocalVector<int> directional_shadows;
bool depth_prepass_used;
} render_state;
struct VolumetricFog { struct VolumetricFog {
uint32_t width = 0; uint32_t width = 0;
uint32_t height = 0; uint32_t height = 0;
@ -1547,6 +1601,10 @@ private:
uint32_t max_cluster_elements = 512; uint32_t max_cluster_elements = 512;
bool low_end = false; bool low_end = false;
void _render_shadow_pass(RID p_light, RID p_shadow_atlas, int p_pass, const PagedArray<GeometryInstance *> &p_instances, const Plane &p_camera_plane = Plane(), float p_lod_distance_multiplier = 0, float p_screen_lod_threshold = 0.0, bool p_open_pass = true, bool p_close_pass = true, bool p_clear_region = true);
void _render_sdfgi_region(RID p_render_buffers, int p_region, const PagedArray<GeometryInstance *> &p_instances);
void _render_sdfgi_static_lights(RID p_render_buffers, uint32_t p_cascade_count, const uint32_t *p_cascade_indices, const PagedArray<RID> *p_positional_light_cull_result);
public: public:
virtual Transform geometry_instance_get_transform(GeometryInstance *p_instance) = 0; virtual Transform geometry_instance_get_transform(GeometryInstance *p_instance) = 0;
virtual AABB geometry_instance_get_aabb(GeometryInstance *p_instance) = 0; virtual AABB geometry_instance_get_aabb(GeometryInstance *p_instance) = 0;
@ -1594,7 +1652,6 @@ public:
virtual int sdfgi_get_pending_region_count(RID p_render_buffers) const; virtual int sdfgi_get_pending_region_count(RID p_render_buffers) const;
virtual AABB sdfgi_get_pending_region_bounds(RID p_render_buffers, int p_region) const; virtual AABB sdfgi_get_pending_region_bounds(RID p_render_buffers, int p_region) const;
virtual uint32_t sdfgi_get_pending_region_cascade(RID p_render_buffers, int p_region) const; virtual uint32_t sdfgi_get_pending_region_cascade(RID p_render_buffers, int p_region) const;
virtual void sdfgi_update_probes(RID p_render_buffers, RID p_environment, const Vector<RID> &p_directional_lights, const RID *p_positional_light_instances, uint32_t p_positional_light_count);
RID sdfgi_get_ubo() const { return gi.sdfgi_ubo; } RID sdfgi_get_ubo() const { return gi.sdfgi_ubo; }
/* SKY API */ /* SKY API */
@ -1997,15 +2054,10 @@ public:
float render_buffers_get_volumetric_fog_end(RID p_render_buffers); float render_buffers_get_volumetric_fog_end(RID p_render_buffers);
float render_buffers_get_volumetric_fog_detail_spread(RID p_render_buffers); float render_buffers_get_volumetric_fog_detail_spread(RID p_render_buffers);
void render_scene(RID p_render_buffers, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, const PagedArray<RID> &p_lights, const PagedArray<RID> &p_reflection_probes, const PagedArray<RID> &p_gi_probes, const PagedArray<RID> &p_decals, const PagedArray<RID> &p_lightmaps, RID p_environment, RID p_camera_effects, RID p_shadow_atlas, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, float p_screen_lod_threshold); void render_scene(RID p_render_buffers, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, const PagedArray<RID> &p_lights, const PagedArray<RID> &p_reflection_probes, const PagedArray<RID> &p_gi_probes, const PagedArray<RID> &p_decals, const PagedArray<RID> &p_lightmaps, RID p_environment, RID p_camera_effects, RID p_shadow_atlas, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, float p_screen_lod_threshold, const RenderShadowData *p_render_shadows, int p_render_shadow_count, const RenderSDFGIData *p_render_sdfgi_regions, int p_render_sdfgi_region_count, const RenderSDFGIUpdateData *p_sdfgi_update_data = nullptr);
void render_shadow(RID p_light, RID p_shadow_atlas, int p_pass, const PagedArray<GeometryInstance *> &p_instances, const Plane &p_camera_plane = Plane(), float p_lod_distance_multiplier = 0, float p_screen_lod_threshold = 0.0);
void render_material(const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, RID p_framebuffer, const Rect2i &p_region); void render_material(const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, RID p_framebuffer, const Rect2i &p_region);
void render_sdfgi(RID p_render_buffers, int p_region, const PagedArray<GeometryInstance *> &p_instances);
void render_sdfgi_static_lights(RID p_render_buffers, uint32_t p_cascade_count, const uint32_t *p_cascade_indices, const PagedArray<RID> *p_positional_light_cull_result);
void render_particle_collider_heightfield(RID p_collider, const Transform &p_transform, const PagedArray<GeometryInstance *> &p_instances); void render_particle_collider_heightfield(RID p_collider, const Transform &p_transform, const PagedArray<GeometryInstance *> &p_instances);
virtual void set_scene_pass(uint64_t p_pass) { virtual void set_scene_pass(uint64_t p_pass) {

View file

@ -3098,7 +3098,7 @@ void RendererStorageRD::update_mesh_instances() {
RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SkeletonShader::PushConstant)); RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SkeletonShader::PushConstant));
//dispatch without barrier, so all is done at the same time //dispatch without barrier, so all is done at the same time
RD::get_singleton()->compute_list_dispatch_threads(compute_list, push_constant.vertex_count, 1, 1, 64, 1, 1); RD::get_singleton()->compute_list_dispatch_threads(compute_list, push_constant.vertex_count, 1, 1);
} }
mi->dirty = false; mi->dirty = false;
@ -4555,7 +4555,7 @@ void RendererStorageRD::_particles_process(Particles *p_particles, float p_delta
RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(ParticlesShader::PushConstant)); RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(ParticlesShader::PushConstant));
RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_particles->amount, 1, 1, 64, 1, 1); RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_particles->amount, 1, 1);
RD::get_singleton()->compute_list_end(); RD::get_singleton()->compute_list_end();
} }
@ -4609,7 +4609,7 @@ void RendererStorageRD::particles_set_view_axis(RID p_particles, const Vector3 &
RD::get_singleton()->compute_list_bind_uniform_set(compute_list, particles->particles_sort_uniform_set, 1); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, particles->particles_sort_uniform_set, 1);
RD::get_singleton()->compute_list_set_push_constant(compute_list, &copy_push_constant, sizeof(ParticlesShader::CopyPushConstant)); RD::get_singleton()->compute_list_set_push_constant(compute_list, &copy_push_constant, sizeof(ParticlesShader::CopyPushConstant));
RD::get_singleton()->compute_list_dispatch_threads(compute_list, particles->amount, 1, 1, 64, 1, 1); RD::get_singleton()->compute_list_dispatch_threads(compute_list, particles->amount, 1, 1);
RD::get_singleton()->compute_list_end(); RD::get_singleton()->compute_list_end();
@ -4621,7 +4621,7 @@ void RendererStorageRD::particles_set_view_axis(RID p_particles, const Vector3 &
RD::get_singleton()->compute_list_bind_uniform_set(compute_list, particles->particles_sort_uniform_set, 1); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, particles->particles_sort_uniform_set, 1);
RD::get_singleton()->compute_list_set_push_constant(compute_list, &copy_push_constant, sizeof(ParticlesShader::CopyPushConstant)); RD::get_singleton()->compute_list_set_push_constant(compute_list, &copy_push_constant, sizeof(ParticlesShader::CopyPushConstant));
RD::get_singleton()->compute_list_dispatch_threads(compute_list, particles->amount, 1, 1, 64, 1, 1); RD::get_singleton()->compute_list_dispatch_threads(compute_list, particles->amount, 1, 1);
RD::get_singleton()->compute_list_end(); RD::get_singleton()->compute_list_end();
} }
@ -4728,7 +4728,7 @@ void RendererStorageRD::update_particles() {
RD::get_singleton()->compute_list_bind_uniform_set(compute_list, particles->particles_copy_uniform_set, 0); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, particles->particles_copy_uniform_set, 0);
RD::get_singleton()->compute_list_set_push_constant(compute_list, &copy_push_constant, sizeof(ParticlesShader::CopyPushConstant)); RD::get_singleton()->compute_list_set_push_constant(compute_list, &copy_push_constant, sizeof(ParticlesShader::CopyPushConstant));
RD::get_singleton()->compute_list_dispatch_threads(compute_list, particles->amount, 1, 1, 64, 1, 1); RD::get_singleton()->compute_list_dispatch_threads(compute_list, particles->amount, 1, 1);
RD::get_singleton()->compute_list_end(); RD::get_singleton()->compute_list_end();
} }
@ -6980,7 +6980,7 @@ void RendererStorageRD::render_target_sdf_process(RID p_render_target) {
RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rt->sdf_buffer_process_uniform_sets[1], 0); //fill [0] RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rt->sdf_buffer_process_uniform_sets[1], 0); //fill [0]
RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(RenderTargetSDF::PushConstant)); RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(RenderTargetSDF::PushConstant));
RD::get_singleton()->compute_list_dispatch_threads(compute_list, push_constant.size[0], push_constant.size[1], 1, 8, 8, 1); RD::get_singleton()->compute_list_dispatch_threads(compute_list, push_constant.size[0], push_constant.size[1], 1);
/* Process */ /* Process */
@ -6996,7 +6996,7 @@ void RendererStorageRD::render_target_sdf_process(RID p_render_target) {
RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rt->sdf_buffer_process_uniform_sets[swap ? 1 : 0], 0); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rt->sdf_buffer_process_uniform_sets[swap ? 1 : 0], 0);
push_constant.stride = stride; push_constant.stride = stride;
RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(RenderTargetSDF::PushConstant)); RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(RenderTargetSDF::PushConstant));
RD::get_singleton()->compute_list_dispatch_threads(compute_list, push_constant.size[0], push_constant.size[1], 1, 8, 8, 1); RD::get_singleton()->compute_list_dispatch_threads(compute_list, push_constant.size[0], push_constant.size[1], 1);
stride /= 2; stride /= 2;
swap = !swap; swap = !swap;
RD::get_singleton()->compute_list_add_barrier(compute_list); RD::get_singleton()->compute_list_add_barrier(compute_list);
@ -7007,7 +7007,7 @@ void RendererStorageRD::render_target_sdf_process(RID p_render_target) {
RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, rt_sdf.pipelines[shrink ? RenderTargetSDF::SHADER_STORE_SHRINK : RenderTargetSDF::SHADER_STORE]); RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, rt_sdf.pipelines[shrink ? RenderTargetSDF::SHADER_STORE_SHRINK : RenderTargetSDF::SHADER_STORE]);
RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rt->sdf_buffer_process_uniform_sets[swap ? 1 : 0], 0); RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rt->sdf_buffer_process_uniform_sets[swap ? 1 : 0], 0);
RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(RenderTargetSDF::PushConstant)); RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(RenderTargetSDF::PushConstant));
RD::get_singleton()->compute_list_dispatch_threads(compute_list, push_constant.size[0], push_constant.size[1], 1, 8, 8, 1); RD::get_singleton()->compute_list_dispatch_threads(compute_list, push_constant.size[0], push_constant.size[1], 1);
RD::get_singleton()->compute_list_end(); RD::get_singleton()->compute_list_end();
} }

View file

@ -1482,13 +1482,7 @@ public:
return s->lod_count > 0; return s->lod_count > 0;
} }
_FORCE_INLINE_ RID mesh_surface_get_index_array(void *p_surface) const { _FORCE_INLINE_ uint32_t mesh_surface_get_lod(void *p_surface, float p_model_scale, float p_distance_threshold, float p_lod_threshold) const {
Mesh::Surface *s = reinterpret_cast<Mesh::Surface *>(p_surface);
return s->index_array;
}
_FORCE_INLINE_ RID mesh_surface_get_index_array_with_lod(void *p_surface, float p_model_scale, float p_distance_threshold, float p_lod_threshold) const {
Mesh::Surface *s = reinterpret_cast<Mesh::Surface *>(p_surface); Mesh::Surface *s = reinterpret_cast<Mesh::Surface *>(p_surface);
int32_t current_lod = -1; int32_t current_lod = -1;
@ -1500,9 +1494,19 @@ public:
current_lod = i; current_lod = i;
} }
if (current_lod == -1) { if (current_lod == -1) {
return 0;
} else {
return current_lod + 1;
}
}
_FORCE_INLINE_ RID mesh_surface_get_index_array(void *p_surface, uint32_t p_lod) const {
Mesh::Surface *s = reinterpret_cast<Mesh::Surface *>(p_surface);
if (p_lod == 0) {
return s->index_array; return s->index_array;
} else { } else {
return s->lods[current_lod].index_array; return s->lods[p_lod - 1].index_array;
} }
} }

View file

@ -301,6 +301,7 @@ void ShaderRD::_compile_variant(uint32_t p_variant, Version *p_version) {
builder.append(compute_codev.get_data()); // version info (if exists) builder.append(compute_codev.get_data()); // version info (if exists)
builder.append("\n"); //make sure defines begin at newline builder.append("\n"); //make sure defines begin at newline
builder.append(base_compute_defines.get_data());
builder.append(general_defines.get_data()); builder.append(general_defines.get_data());
builder.append(variant_defines[p_variant].get_data()); builder.append(variant_defines[p_variant].get_data());
@ -401,7 +402,6 @@ RS::ShaderNativeSourceCode ShaderRD::version_get_native_source_code(RID p_versio
builder.append(fragment_codev.get_data()); // version info (if exists) builder.append(fragment_codev.get_data()); // version info (if exists)
builder.append("\n"); //make sure defines begin at newline builder.append("\n"); //make sure defines begin at newline
builder.append(general_defines.get_data()); builder.append(general_defines.get_data());
builder.append(variant_defines[i].get_data()); builder.append(variant_defines[i].get_data());
for (int j = 0; j < version->custom_defines.size(); j++) { for (int j = 0; j < version->custom_defines.size(); j++) {
@ -440,6 +440,7 @@ RS::ShaderNativeSourceCode ShaderRD::version_get_native_source_code(RID p_versio
builder.append(compute_codev.get_data()); // version info (if exists) builder.append(compute_codev.get_data()); // version info (if exists)
builder.append("\n"); //make sure defines begin at newline builder.append("\n"); //make sure defines begin at newline
builder.append(base_compute_defines.get_data());
builder.append(general_defines.get_data()); builder.append(general_defines.get_data());
builder.append(variant_defines[i].get_data()); builder.append(variant_defines[i].get_data());
@ -596,6 +597,22 @@ bool ShaderRD::is_variant_enabled(int p_variant) const {
return variants_enabled[p_variant]; return variants_enabled[p_variant];
} }
ShaderRD::ShaderRD() {
// Do not feel forced to use this, in most cases it makes little to no difference.
bool use_32_threads = false;
if (RD::get_singleton()->get_device_vendor_name() == "NVIDIA") {
use_32_threads = true;
}
String base_compute_define_text;
if (use_32_threads) {
base_compute_define_text = "\n#define NATIVE_LOCAL_GROUP_SIZE 32\n#define NATIVE_LOCAL_SIZE_2D_X 8\n#define NATIVE_LOCAL_SIZE_2D_Y 4\n";
} else {
base_compute_define_text = "\n#define NATIVE_LOCAL_GROUP_SIZE 64\n#define NATIVE_LOCAL_SIZE_2D_X 8\n#define NATIVE_LOCAL_SIZE_2D_Y 8\n";
}
base_compute_defines = base_compute_define_text.ascii();
}
void ShaderRD::initialize(const Vector<String> &p_variant_defines, const String &p_general_defines) { void ShaderRD::initialize(const Vector<String> &p_variant_defines, const String &p_general_defines) {
ERR_FAIL_COND(variant_defines.size()); ERR_FAIL_COND(variant_defines.size());
ERR_FAIL_COND(p_variant_defines.size() == 0); ERR_FAIL_COND(p_variant_defines.size() == 0);

View file

@ -99,8 +99,10 @@ class ShaderRD {
const char *name; const char *name;
CharString base_compute_defines;
protected: protected:
ShaderRD() {} ShaderRD();
void setup(const char *p_vertex_code, const char *p_fragment_code, const char *p_compute_code, const char *p_name); void setup(const char *p_vertex_code, const char *p_fragment_code, const char *p_compute_code, const char *p_name);
public: public:

View file

@ -89,12 +89,6 @@ MATERIAL_UNIFORMS
} material; } material;
#endif #endif
/* clang-format off */
VERTEX_SHADER_GLOBALS
/* clang-format on */
invariant gl_Position; invariant gl_Position;
#ifdef MODE_DUAL_PARABOLOID #ifdef MODE_DUAL_PARABOLOID
@ -103,28 +97,43 @@ layout(location = 8) out float dp_clip;
#endif #endif
layout(location = 9) out flat uint instance_index;
/* clang-format off */
VERTEX_SHADER_GLOBALS
/* clang-format on */
void main() { void main() {
vec4 instance_custom = vec4(0.0); vec4 instance_custom = vec4(0.0);
#if defined(COLOR_USED) #if defined(COLOR_USED)
color_interp = color_attrib; color_interp = color_attrib;
#endif #endif
mat4 world_matrix = draw_call.transform; uint instance_index = draw_call.instance_index;
bool is_multimesh = bool(instances.data[instance_index].flags & INSTANCE_FLAGS_MULTIMESH);
if (!is_multimesh) {
instance_index += gl_InstanceIndex;
}
mat4 world_matrix = instances.data[instance_index].transform;
mat3 world_normal_matrix; mat3 world_normal_matrix;
if (bool(draw_call.flags & INSTANCE_FLAGS_NON_UNIFORM_SCALE)) { if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_NON_UNIFORM_SCALE)) {
world_normal_matrix = inverse(mat3(world_matrix)); world_normal_matrix = inverse(mat3(world_matrix));
} else { } else {
world_normal_matrix = mat3(world_matrix); world_normal_matrix = mat3(world_matrix);
} }
if (bool(draw_call.flags & INSTANCE_FLAGS_MULTIMESH)) { if (is_multimesh) {
//multimesh, instances are for it //multimesh, instances are for it
uint offset = (draw_call.flags >> INSTANCE_FLAGS_MULTIMESH_STRIDE_SHIFT) & INSTANCE_FLAGS_MULTIMESH_STRIDE_MASK; uint offset = (instances.data[instance_index].flags >> INSTANCE_FLAGS_MULTIMESH_STRIDE_SHIFT) & INSTANCE_FLAGS_MULTIMESH_STRIDE_MASK;
offset *= gl_InstanceIndex; offset *= gl_InstanceIndex;
mat4 matrix; mat4 matrix;
if (bool(draw_call.flags & INSTANCE_FLAGS_MULTIMESH_FORMAT_2D)) { if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_MULTIMESH_FORMAT_2D)) {
matrix = mat4(transforms.data[offset + 0], transforms.data[offset + 1], vec4(0.0, 0.0, 1.0, 0.0), vec4(0.0, 0.0, 0.0, 1.0)); matrix = mat4(transforms.data[offset + 0], transforms.data[offset + 1], vec4(0.0, 0.0, 1.0, 0.0), vec4(0.0, 0.0, 0.0, 1.0));
offset += 2; offset += 2;
} else { } else {
@ -132,14 +141,14 @@ void main() {
offset += 3; offset += 3;
} }
if (bool(draw_call.flags & INSTANCE_FLAGS_MULTIMESH_HAS_COLOR)) { if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_MULTIMESH_HAS_COLOR)) {
#ifdef COLOR_USED #ifdef COLOR_USED
color_interp *= transforms.data[offset]; color_interp *= transforms.data[offset];
#endif #endif
offset += 1; offset += 1;
} }
if (bool(draw_call.flags & INSTANCE_FLAGS_MULTIMESH_HAS_CUSTOM_DATA)) { if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_MULTIMESH_HAS_CUSTOM_DATA)) {
instance_custom = transforms.data[offset]; instance_custom = transforms.data[offset];
} }
@ -161,7 +170,7 @@ void main() {
#endif #endif
#if 0 #if 0
if (bool(draw_call.flags & INSTANCE_FLAGS_SKELETON)) { if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_SKELETON)) {
//multimesh, instances are for it //multimesh, instances are for it
uvec2 bones_01 = uvec2(bone_attrib.x & 0xFFFF, bone_attrib.x >> 16) * 3; uvec2 bones_01 = uvec2(bone_attrib.x & 0xFFFF, bone_attrib.x >> 16) * 3;
@ -304,7 +313,8 @@ VERTEX_SHADER_CODE
#endif #endif
#ifdef MODE_RENDER_MATERIAL #ifdef MODE_RENDER_MATERIAL
if (scene_data.material_uv2_mode) { if (scene_data.material_uv2_mode) {
gl_Position.xy = (uv2_attrib.xy + draw_call.lightmap_uv_scale.xy) * 2.0 - 1.0; vec2 uv_offset = unpackHalf2x16(draw_call.uv_offset);
gl_Position.xy = (uv2_attrib.xy + uv_offset) * 2.0 - 1.0;
gl_Position.z = 0.00001; gl_Position.z = 0.00001;
gl_Position.w = 1.0; gl_Position.w = 1.0;
} }
@ -350,9 +360,11 @@ layout(location = 8) in float dp_clip;
#endif #endif
layout(location = 9) in flat uint instance_index;
//defines to keep compatibility with vertex //defines to keep compatibility with vertex
#define world_matrix draw_call.transform #define world_matrix instances.data[instance_index].transform
#define projection_matrix scene_data.projection_matrix #define projection_matrix scene_data.projection_matrix
#if defined(ENABLE_SSS) && defined(ENABLE_TRANSMITTANCE) #if defined(ENABLE_SSS) && defined(ENABLE_TRANSMITTANCE)
@ -1770,7 +1782,7 @@ vec4 fog_process(vec3 vertex) {
} }
} }
float fog_amount = 1.0 - exp(vertex.z * scene_data.fog_density); float fog_amount = 1.0 - exp(min(0.0, vertex.z * scene_data.fog_density));
if (abs(scene_data.fog_height_density) > 0.001) { if (abs(scene_data.fog_height_density) > 0.001) {
float y = (scene_data.camera_matrix * vec4(vertex, 1.0)).y; float y = (scene_data.camera_matrix * vec4(vertex, 1.0)).y;
@ -2083,7 +2095,7 @@ FRAGMENT_SHADER_CODE
#endif #endif
uint decal_index = 32 * i + bit; uint decal_index = 32 * i + bit;
if (!bool(decals.data[decal_index].mask & draw_call.layer_mask)) { if (!bool(decals.data[decal_index].mask & instances.data[instance_index].layer_mask)) {
continue; //not masked continue; //not masked
} }
@ -2210,8 +2222,8 @@ FRAGMENT_SHADER_CODE
#ifdef USE_LIGHTMAP #ifdef USE_LIGHTMAP
//lightmap //lightmap
if (bool(draw_call.flags & INSTANCE_FLAGS_USE_LIGHTMAP_CAPTURE)) { //has lightmap capture if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_USE_LIGHTMAP_CAPTURE)) { //has lightmap capture
uint index = draw_call.gi_offset; uint index = instances.data[instance_index].gi_offset;
vec3 wnormal = mat3(scene_data.camera_matrix) * normal; vec3 wnormal = mat3(scene_data.camera_matrix) * normal;
const float c1 = 0.429043; const float c1 = 0.429043;
@ -2230,12 +2242,12 @@ FRAGMENT_SHADER_CODE
2.0 * c2 * lightmap_captures.data[index].sh[1].rgb * wnormal.y + 2.0 * c2 * lightmap_captures.data[index].sh[1].rgb * wnormal.y +
2.0 * c2 * lightmap_captures.data[index].sh[2].rgb * wnormal.z); 2.0 * c2 * lightmap_captures.data[index].sh[2].rgb * wnormal.z);
} else if (bool(draw_call.flags & INSTANCE_FLAGS_USE_LIGHTMAP)) { // has actual lightmap } else if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_USE_LIGHTMAP)) { // has actual lightmap
bool uses_sh = bool(draw_call.flags & INSTANCE_FLAGS_USE_SH_LIGHTMAP); bool uses_sh = bool(instances.data[instance_index].flags & INSTANCE_FLAGS_USE_SH_LIGHTMAP);
uint ofs = draw_call.gi_offset & 0xFFFF; uint ofs = instances.data[instance_index].gi_offset & 0xFFFF;
vec3 uvw; vec3 uvw;
uvw.xy = uv2 * draw_call.lightmap_uv_scale.zw + draw_call.lightmap_uv_scale.xy; uvw.xy = uv2 * instances.data[instance_index].lightmap_uv_scale.zw + instances.data[instance_index].lightmap_uv_scale.xy;
uvw.z = float((draw_call.gi_offset >> 16) & 0xFFFF); uvw.z = float((instances.data[instance_index].gi_offset >> 16) & 0xFFFF);
if (uses_sh) { if (uses_sh) {
uvw.z *= 4.0; //SH textures use 4 times more data uvw.z *= 4.0; //SH textures use 4 times more data
@ -2244,7 +2256,7 @@ FRAGMENT_SHADER_CODE
vec3 lm_light_l1_0 = textureLod(sampler2DArray(lightmap_textures[ofs], material_samplers[SAMPLER_LINEAR_CLAMP]), uvw + vec3(0.0, 0.0, 2.0), 0.0).rgb; vec3 lm_light_l1_0 = textureLod(sampler2DArray(lightmap_textures[ofs], material_samplers[SAMPLER_LINEAR_CLAMP]), uvw + vec3(0.0, 0.0, 2.0), 0.0).rgb;
vec3 lm_light_l1p1 = textureLod(sampler2DArray(lightmap_textures[ofs], material_samplers[SAMPLER_LINEAR_CLAMP]), uvw + vec3(0.0, 0.0, 3.0), 0.0).rgb; vec3 lm_light_l1p1 = textureLod(sampler2DArray(lightmap_textures[ofs], material_samplers[SAMPLER_LINEAR_CLAMP]), uvw + vec3(0.0, 0.0, 3.0), 0.0).rgb;
uint idx = draw_call.gi_offset >> 20; uint idx = instances.data[instance_index].gi_offset >> 20;
vec3 n = normalize(lightmaps.data[idx].normal_xform * normal); vec3 n = normalize(lightmaps.data[idx].normal_xform * normal);
ambient_light += lm_light_l0 * 0.282095f; ambient_light += lm_light_l0 * 0.282095f;
@ -2264,7 +2276,7 @@ FRAGMENT_SHADER_CODE
} }
#elif defined(USE_FORWARD_GI) #elif defined(USE_FORWARD_GI)
if (bool(draw_call.flags & INSTANCE_FLAGS_USE_SDFGI)) { //has lightmap capture if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_USE_SDFGI)) { //has lightmap capture
//make vertex orientation the world one, but still align to camera //make vertex orientation the world one, but still align to camera
vec3 cam_pos = mat3(scene_data.camera_matrix) * vertex; vec3 cam_pos = mat3(scene_data.camera_matrix) * vertex;
@ -2336,9 +2348,9 @@ FRAGMENT_SHADER_CODE
} }
} }
if (bool(draw_call.flags & INSTANCE_FLAGS_USE_GIPROBE)) { // process giprobes if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_USE_GIPROBE)) { // process giprobes
uint index1 = draw_call.gi_offset & 0xFFFF; uint index1 = instances.data[instance_index].gi_offset & 0xFFFF;
vec3 ref_vec = normalize(reflect(normalize(vertex), normal)); vec3 ref_vec = normalize(reflect(normalize(vertex), normal));
//find arbitrary tangent and bitangent, then build a matrix //find arbitrary tangent and bitangent, then build a matrix
vec3 v0 = abs(normal.z) < 0.999 ? vec3(0.0, 0.0, 1.0) : vec3(0.0, 1.0, 0.0); vec3 v0 = abs(normal.z) < 0.999 ? vec3(0.0, 0.0, 1.0) : vec3(0.0, 1.0, 0.0);
@ -2350,7 +2362,7 @@ FRAGMENT_SHADER_CODE
vec4 spec_accum = vec4(0.0); vec4 spec_accum = vec4(0.0);
gi_probe_compute(index1, vertex, normal, ref_vec, normal_mat, roughness * roughness, ambient_light, specular_light, spec_accum, amb_accum); gi_probe_compute(index1, vertex, normal, ref_vec, normal_mat, roughness * roughness, ambient_light, specular_light, spec_accum, amb_accum);
uint index2 = draw_call.gi_offset >> 16; uint index2 = instances.data[instance_index].gi_offset >> 16;
if (index2 != 0xFFFF) { if (index2 != 0xFFFF) {
gi_probe_compute(index2, vertex, normal, ref_vec, normal_mat, roughness * roughness, ambient_light, specular_light, spec_accum, amb_accum); gi_probe_compute(index2, vertex, normal, ref_vec, normal_mat, roughness * roughness, ambient_light, specular_light, spec_accum, amb_accum);
@ -2369,7 +2381,7 @@ FRAGMENT_SHADER_CODE
} }
#elif !defined(LOW_END_MODE) #elif !defined(LOW_END_MODE)
if (bool(draw_call.flags & INSTANCE_FLAGS_USE_GI_BUFFERS)) { //use GI buffers if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_USE_GI_BUFFERS)) { //use GI buffers
vec2 coord; vec2 coord;
@ -2448,7 +2460,7 @@ FRAGMENT_SHADER_CODE
#endif #endif
uint reflection_index = 32 * i + bit; uint reflection_index = 32 * i + bit;
if (!bool(reflections.data[reflection_index].mask & draw_call.layer_mask)) { if (!bool(reflections.data[reflection_index].mask & instances.data[instance_index].layer_mask)) {
continue; //not masked continue; //not masked
} }
@ -2519,7 +2531,7 @@ FRAGMENT_SHADER_CODE
break; break;
} }
if (!bool(directional_lights.data[i].mask & draw_call.layer_mask)) { if (!bool(directional_lights.data[i].mask & instances.data[instance_index].layer_mask)) {
continue; //not masked continue; //not masked
} }
@ -2838,7 +2850,7 @@ FRAGMENT_SHADER_CODE
break; break;
} }
if (!bool(directional_lights.data[i].mask & draw_call.layer_mask)) { if (!bool(directional_lights.data[i].mask & instances.data[instance_index].layer_mask)) {
continue; //not masked continue; //not masked
} }
@ -2968,7 +2980,7 @@ FRAGMENT_SHADER_CODE
#endif #endif
uint light_index = 32 * i + bit; uint light_index = 32 * i + bit;
if (!bool(omni_lights.data[light_index].mask & draw_call.layer_mask)) { if (!bool(omni_lights.data[light_index].mask & instances.data[instance_index].layer_mask)) {
continue; //not masked continue; //not masked
} }
@ -3041,7 +3053,7 @@ FRAGMENT_SHADER_CODE
uint light_index = 32 * i + bit; uint light_index = 32 * i + bit;
if (!bool(spot_lights.data[light_index].mask & draw_call.layer_mask)) { if (!bool(spot_lights.data[light_index].mask & instances.data[instance_index].layer_mask)) {
continue; //not masked continue; //not masked
} }
@ -3214,9 +3226,9 @@ FRAGMENT_SHADER_CODE
normal_roughness_output_buffer = vec4(normal * 0.5 + 0.5, roughness); normal_roughness_output_buffer = vec4(normal * 0.5 + 0.5, roughness);
#ifdef MODE_RENDER_GIPROBE #ifdef MODE_RENDER_GIPROBE
if (bool(draw_call.flags & INSTANCE_FLAGS_USE_GIPROBE)) { // process giprobes if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_USE_GIPROBE)) { // process giprobes
uint index1 = draw_call.gi_offset & 0xFFFF; uint index1 = instances.data[instance_index].gi_offset & 0xFFFF;
uint index2 = draw_call.gi_offset >> 16; uint index2 = instances.data[instance_index].gi_offset >> 16;
giprobe_buffer.x = index1 & 0xFF; giprobe_buffer.x = index1 & 0xFF;
giprobe_buffer.y = index2 & 0xFF; giprobe_buffer.y = index2 & 0xFF;
} else { } else {
@ -3275,6 +3287,7 @@ FRAGMENT_SHADER_CODE
// Draw "fixed" fog before volumetric fog to ensure volumetric fog can appear in front of the sky. // Draw "fixed" fog before volumetric fog to ensure volumetric fog can appear in front of the sky.
frag_color.rgb = mix(frag_color.rgb, fog.rgb, fog.a); frag_color.rgb = mix(frag_color.rgb, fog.rgb, fog.a);
;
#endif //MODE_MULTIPLE_RENDER_TARGETS #endif //MODE_MULTIPLE_RENDER_TARGETS

View file

@ -21,12 +21,10 @@
#endif #endif
layout(push_constant, binding = 0, std430) uniform DrawCall { layout(push_constant, binding = 0, std430) uniform DrawCall {
mat4 transform; uint instance_index;
uint flags; uint uv_offset;
uint instance_uniforms_ofs; //base offset in global buffer for instance variables uint pad0;
uint gi_offset; //GI information when using lightmapping (VCT or lightmap index) uint pad1;
uint layer_mask;
vec4 lightmap_uv_scale;
} }
draw_call; draw_call;
@ -45,13 +43,127 @@ draw_call;
#define SAMPLER_NEAREST_WITH_MIPMAPS_ANISOTROPIC_REPEAT 10 #define SAMPLER_NEAREST_WITH_MIPMAPS_ANISOTROPIC_REPEAT 10
#define SAMPLER_LINEAR_WITH_MIPMAPS_ANISOTROPIC_REPEAT 11 #define SAMPLER_LINEAR_WITH_MIPMAPS_ANISOTROPIC_REPEAT 11
#define SDFGI_MAX_CASCADES 8
/* Set 1: Base Pass (never changes) */
layout(set = 0, binding = 1) uniform sampler material_samplers[12]; layout(set = 0, binding = 1) uniform sampler material_samplers[12];
layout(set = 0, binding = 2) uniform sampler shadow_sampler; layout(set = 0, binding = 2) uniform sampler shadow_sampler;
#define SDFGI_MAX_CASCADES 8 #define INSTANCE_FLAGS_USE_GI_BUFFERS (1 << 6)
#define INSTANCE_FLAGS_USE_SDFGI (1 << 7)
#define INSTANCE_FLAGS_USE_LIGHTMAP_CAPTURE (1 << 8)
#define INSTANCE_FLAGS_USE_LIGHTMAP (1 << 9)
#define INSTANCE_FLAGS_USE_SH_LIGHTMAP (1 << 10)
#define INSTANCE_FLAGS_USE_GIPROBE (1 << 11)
#define INSTANCE_FLAGS_MULTIMESH (1 << 12)
#define INSTANCE_FLAGS_MULTIMESH_FORMAT_2D (1 << 13)
#define INSTANCE_FLAGS_MULTIMESH_HAS_COLOR (1 << 14)
#define INSTANCE_FLAGS_MULTIMESH_HAS_CUSTOM_DATA (1 << 15)
#define INSTANCE_FLAGS_MULTIMESH_STRIDE_SHIFT 16
//3 bits of stride
#define INSTANCE_FLAGS_MULTIMESH_STRIDE_MASK 0x7
layout(set = 0, binding = 3, std140) uniform SceneData { #define INSTANCE_FLAGS_SKELETON (1 << 19)
#define INSTANCE_FLAGS_NON_UNIFORM_SCALE (1 << 20)
layout(set = 0, binding = 3, std430) restrict readonly buffer OmniLights {
LightData data[];
}
omni_lights;
layout(set = 0, binding = 4, std430) restrict readonly buffer SpotLights {
LightData data[];
}
spot_lights;
layout(set = 0, binding = 5) buffer restrict readonly ReflectionProbeData {
ReflectionData data[];
}
reflections;
layout(set = 0, binding = 6, std140) uniform DirectionalLights {
DirectionalLightData data[MAX_DIRECTIONAL_LIGHT_DATA_STRUCTS];
}
directional_lights;
#define LIGHTMAP_FLAG_USE_DIRECTION 1
#define LIGHTMAP_FLAG_USE_SPECULAR_DIRECTION 2
struct Lightmap {
mat3 normal_xform;
};
layout(set = 0, binding = 7, std140) restrict readonly buffer Lightmaps {
Lightmap data[];
}
lightmaps;
struct LightmapCapture {
vec4 sh[9];
};
layout(set = 0, binding = 8, std140) restrict readonly buffer LightmapCaptures {
LightmapCapture data[];
}
lightmap_captures;
layout(set = 0, binding = 9) uniform texture2D decal_atlas;
layout(set = 0, binding = 10) uniform texture2D decal_atlas_srgb;
layout(set = 0, binding = 11, std430) restrict readonly buffer Decals {
DecalData data[];
}
decals;
layout(set = 0, binding = 12, std430) restrict readonly buffer GlobalVariableData {
vec4 data[];
}
global_variables;
#ifndef LOW_END_MODE
struct SDFGIProbeCascadeData {
vec3 position;
float to_probe;
ivec3 probe_world_offset;
float to_cell; // 1/bounds * grid_size
};
layout(set = 0, binding = 13, std140) uniform SDFGI {
vec3 grid_size;
uint max_cascades;
bool use_occlusion;
int probe_axis_size;
float probe_to_uvw;
float normal_bias;
vec3 lightprobe_tex_pixel_size;
float energy;
vec3 lightprobe_uv_offset;
float y_mult;
vec3 occlusion_clamp;
uint pad3;
vec3 occlusion_renormalize;
uint pad4;
vec3 cascade_probe_size;
uint pad5;
SDFGIProbeCascadeData cascades[SDFGI_MAX_CASCADES];
}
sdfgi;
#endif //LOW_END_MODE
/* Set 2: Render Pass (changes per render pass) */
layout(set = 1, binding = 0, std140) uniform SceneData {
mat4 projection_matrix; mat4 projection_matrix;
mat4 inv_projection_matrix; mat4 inv_projection_matrix;
@ -136,157 +248,53 @@ layout(set = 0, binding = 3, std140) uniform SceneData {
scene_data; scene_data;
#define INSTANCE_FLAGS_USE_GI_BUFFERS (1 << 6) struct InstanceData {
#define INSTANCE_FLAGS_USE_SDFGI (1 << 7) mat4 transform;
#define INSTANCE_FLAGS_USE_LIGHTMAP_CAPTURE (1 << 8) uint flags;
#define INSTANCE_FLAGS_USE_LIGHTMAP (1 << 9) uint instance_uniforms_ofs; //base offset in global buffer for instance variables
#define INSTANCE_FLAGS_USE_SH_LIGHTMAP (1 << 10) uint gi_offset; //GI information when using lightmapping (VCT or lightmap index)
#define INSTANCE_FLAGS_USE_GIPROBE (1 << 11) uint layer_mask;
#define INSTANCE_FLAGS_MULTIMESH (1 << 12) vec4 lightmap_uv_scale;
#define INSTANCE_FLAGS_MULTIMESH_FORMAT_2D (1 << 13)
#define INSTANCE_FLAGS_MULTIMESH_HAS_COLOR (1 << 14)
#define INSTANCE_FLAGS_MULTIMESH_HAS_CUSTOM_DATA (1 << 15)
#define INSTANCE_FLAGS_MULTIMESH_STRIDE_SHIFT 16
//3 bits of stride
#define INSTANCE_FLAGS_MULTIMESH_STRIDE_MASK 0x7
#define INSTANCE_FLAGS_SKELETON (1 << 19)
#define INSTANCE_FLAGS_NON_UNIFORM_SCALE (1 << 20)
layout(set = 0, binding = 4, std430) restrict readonly buffer OmniLights {
LightData data[];
}
omni_lights;
layout(set = 0, binding = 5, std430) restrict readonly buffer SpotLights {
LightData data[];
}
spot_lights;
layout(set = 0, binding = 6) buffer restrict readonly ReflectionProbeData {
ReflectionData data[];
}
reflections;
layout(set = 0, binding = 7, std140) uniform DirectionalLights {
DirectionalLightData data[MAX_DIRECTIONAL_LIGHT_DATA_STRUCTS];
}
directional_lights;
#define LIGHTMAP_FLAG_USE_DIRECTION 1
#define LIGHTMAP_FLAG_USE_SPECULAR_DIRECTION 2
struct Lightmap {
mat3 normal_xform;
}; };
layout(set = 0, binding = 8, std140) restrict readonly buffer Lightmaps { layout(set = 1, binding = 1, std430) buffer restrict readonly InstanceDataBuffer {
Lightmap data[]; InstanceData data[];
} }
lightmaps; instances;
struct LightmapCapture {
vec4 sh[9];
};
layout(set = 0, binding = 9, std140) restrict readonly buffer LightmapCaptures {
LightmapCapture data[];
}
lightmap_captures;
layout(set = 0, binding = 10) uniform texture2D decal_atlas;
layout(set = 0, binding = 11) uniform texture2D decal_atlas_srgb;
layout(set = 0, binding = 12, std430) restrict readonly buffer Decals {
DecalData data[];
}
decals;
layout(set = 0, binding = 13, std430) restrict readonly buffer GlobalVariableData {
vec4 data[];
}
global_variables;
#ifndef LOW_END_MODE
struct SDFGIProbeCascadeData {
vec3 position;
float to_probe;
ivec3 probe_world_offset;
float to_cell; // 1/bounds * grid_size
};
layout(set = 0, binding = 14, std140) uniform SDFGI {
vec3 grid_size;
uint max_cascades;
bool use_occlusion;
int probe_axis_size;
float probe_to_uvw;
float normal_bias;
vec3 lightprobe_tex_pixel_size;
float energy;
vec3 lightprobe_uv_offset;
float y_mult;
vec3 occlusion_clamp;
uint pad3;
vec3 occlusion_renormalize;
uint pad4;
vec3 cascade_probe_size;
uint pad5;
SDFGIProbeCascadeData cascades[SDFGI_MAX_CASCADES];
}
sdfgi;
#endif //LOW_END_MODE
// decal atlas
/* Set 1, Radiance */
#ifdef USE_RADIANCE_CUBEMAP_ARRAY #ifdef USE_RADIANCE_CUBEMAP_ARRAY
layout(set = 1, binding = 0) uniform textureCubeArray radiance_cubemap; layout(set = 1, binding = 2) uniform textureCubeArray radiance_cubemap;
#else #else
layout(set = 1, binding = 0) uniform textureCube radiance_cubemap; layout(set = 1, binding = 2) uniform textureCube radiance_cubemap;
#endif #endif
/* Set 2, Reflection and Shadow Atlases (view dependent) */ layout(set = 1, binding = 3) uniform textureCubeArray reflection_atlas;
layout(set = 1, binding = 1) uniform textureCubeArray reflection_atlas; layout(set = 1, binding = 4) uniform texture2D shadow_atlas;
layout(set = 1, binding = 2) uniform texture2D shadow_atlas; layout(set = 1, binding = 5) uniform texture2D directional_shadow_atlas;
layout(set = 1, binding = 3) uniform texture2D directional_shadow_atlas; layout(set = 1, binding = 6) uniform texture2DArray lightmap_textures[MAX_LIGHTMAP_TEXTURES];
layout(set = 1, binding = 4) uniform texture2DArray lightmap_textures[MAX_LIGHTMAP_TEXTURES]; #ifndef LOW_END_MOD
layout(set = 1, binding = 7) uniform texture3D gi_probe_textures[MAX_GI_PROBES];
#ifndef LOW_END_MODE
layout(set = 1, binding = 5) uniform texture3D gi_probe_textures[MAX_GI_PROBES];
#endif #endif
layout(set = 1, binding = 6, std430) buffer restrict readonly ClusterBuffer { layout(set = 1, binding = 8, std430) buffer restrict readonly ClusterBuffer {
uint data[]; uint data[];
} }
cluster_buffer; cluster_buffer;
/* Set 3, Render Buffers */
#ifdef MODE_RENDER_SDF #ifdef MODE_RENDER_SDF
layout(r16ui, set = 1, binding = 7) uniform restrict writeonly uimage3D albedo_volume_grid; layout(r16ui, set = 1, binding = 9) uniform restrict writeonly uimage3D albedo_volume_grid;
layout(r32ui, set = 1, binding = 8) uniform restrict writeonly uimage3D emission_grid; layout(r32ui, set = 1, binding = 10) uniform restrict writeonly uimage3D emission_grid;
layout(r32ui, set = 1, binding = 9) uniform restrict writeonly uimage3D emission_aniso_grid; layout(r32ui, set = 1, binding = 11) uniform restrict writeonly uimage3D emission_aniso_grid;
layout(r32ui, set = 1, binding = 10) uniform restrict uimage3D geom_facing_grid; layout(r32ui, set = 1, binding = 12) uniform restrict uimage3D geom_facing_grid;
//still need to be present for shaders that use it, so remap them to something //still need to be present for shaders that use it, so remap them to something
#define depth_buffer shadow_atlas #define depth_buffer shadow_atlas
@ -295,17 +303,17 @@ layout(r32ui, set = 1, binding = 10) uniform restrict uimage3D geom_facing_grid;
#else #else
layout(set = 1, binding = 7) uniform texture2D depth_buffer; layout(set = 1, binding = 9) uniform texture2D depth_buffer;
layout(set = 1, binding = 8) uniform texture2D color_buffer; layout(set = 1, binding = 10) uniform texture2D color_buffer;
#ifndef LOW_END_MODE #ifndef LOW_END_MODE
layout(set = 1, binding = 9) uniform texture2D normal_roughness_buffer; layout(set = 1, binding = 11) uniform texture2D normal_roughness_buffer;
layout(set = 1, binding = 10) uniform texture2D ao_buffer; layout(set = 1, binding = 12) uniform texture2D ao_buffer;
layout(set = 1, binding = 11) uniform texture2D ambient_buffer; layout(set = 1, binding = 13) uniform texture2D ambient_buffer;
layout(set = 1, binding = 12) uniform texture2D reflection_buffer; layout(set = 1, binding = 14) uniform texture2D reflection_buffer;
layout(set = 1, binding = 13) uniform texture2DArray sdfgi_lightprobe_texture; layout(set = 1, binding = 15) uniform texture2DArray sdfgi_lightprobe_texture;
layout(set = 1, binding = 14) uniform texture3D sdfgi_occlusion_cascades; layout(set = 1, binding = 16) uniform texture3D sdfgi_occlusion_cascades;
struct GIProbeData { struct GIProbeData {
mat4 xform; mat4 xform;
@ -323,22 +331,22 @@ struct GIProbeData {
uint mipmaps; uint mipmaps;
}; };
layout(set = 1, binding = 15, std140) uniform GIProbes { layout(set = 1, binding = 17, std140) uniform GIProbes {
GIProbeData data[MAX_GI_PROBES]; GIProbeData data[MAX_GI_PROBES];
} }
gi_probes; gi_probes;
layout(set = 1, binding = 16) uniform texture3D volumetric_fog_texture; layout(set = 1, binding = 18) uniform texture3D volumetric_fog_texture;
#endif // LOW_END_MODE #endif // LOW_END_MODE
#endif #endif
/* Set 4 Skeleton & Instancing (Multimesh) */ /* Set 2 Skeleton & Instancing (can change per item) */
layout(set = 2, binding = 0, std430) restrict readonly buffer Transforms { layout(set = 2, binding = 0, std430) restrict readonly buffer Transforms {
vec4 data[]; vec4 data[];
} }
transforms; transforms;
/* Set 5 User Material */ /* Set 3 User Material */

View file

@ -6,8 +6,20 @@ VERSION_DEFINES
#define BLOCK_SIZE 8 #define BLOCK_SIZE 8
#ifdef MODE_REDUCE_SUBGROUP
#extension GL_KHR_shader_subgroup_ballot : enable
#extension GL_KHR_shader_subgroup_arithmetic : enable
//nvidia friendly, max 32
layout(local_size_x = 8, local_size_y = 4, local_size_z = 1) in;
#else
layout(local_size_x = BLOCK_SIZE, local_size_y = BLOCK_SIZE, local_size_z = 1) in; layout(local_size_x = BLOCK_SIZE, local_size_y = BLOCK_SIZE, local_size_z = 1) in;
#endif
#ifdef MODE_REDUCE #ifdef MODE_REDUCE
shared float tmp_data[BLOCK_SIZE * BLOCK_SIZE]; shared float tmp_data[BLOCK_SIZE * BLOCK_SIZE];
@ -16,8 +28,12 @@ const uint unswizzle_table[BLOCK_SIZE] = uint[](0, 0, 0, 1, 0, 2, 1, 3);
#endif #endif
layout(r32f, set = 0, binding = 0) uniform restrict readonly image2D source_depth; #if defined(MODE_REDUCE) || defined(MODE_REDUCE_SUBGROUP)
layout(r32f, set = 0, binding = 1) uniform restrict writeonly image2D dst_depth; layout(set = 0, binding = 0) uniform sampler2D source_depth;
#else
layout(r16, set = 0, binding = 0) uniform restrict readonly image2D source_depth;
#endif
layout(r16, set = 1, binding = 0) uniform restrict writeonly image2D dst_depth;
layout(push_constant, binding = 1, std430) uniform Params { layout(push_constant, binding = 1, std430) uniform Params {
ivec2 source_size; ivec2 source_size;
@ -29,6 +45,48 @@ layout(push_constant, binding = 1, std430) uniform Params {
params; params;
void main() { void main() {
#ifdef MODE_REDUCE_SUBGROUP
uvec2 local_pos = gl_LocalInvocationID.xy;
ivec2 image_offset = params.source_offset;
ivec2 image_pos = image_offset + ivec2(gl_GlobalInvocationID.xy * ivec2(1, 2));
float depth = texelFetch(source_depth, min(image_pos, params.source_size - ivec2(1)), 0).r;
depth += texelFetch(source_depth, min(image_pos + ivec2(0, 1), params.source_size - ivec2(1)), 0).r;
depth *= 0.5;
#ifdef MODE_REDUCE_8
//fast version, reduce all
float depth_average = subgroupAdd(depth) / 32.0;
if (local_pos == uvec2(0)) {
imageStore(dst_depth, image_pos / 8, vec4(depth_average));
}
#else
//bit slower version, reduce by regions
uint group_size = (8 / params.min_size);
uvec2 group_id = local_pos / (8 / params.min_size);
uvec4 mask;
float depth_average = 0;
while (true) {
uvec2 first = subgroupBroadcastFirst(group_id);
mask = subgroupBallot(first == group_id);
if (first == group_id) {
depth_average = subgroupAdd(depth);
break;
}
}
depth_average /= float(group_size * group_size);
if (local_pos == group_id) {
imageStore(dst_depth, image_pos / int(group_size), vec4(depth_average));
}
#endif
#endif
#ifdef MODE_REDUCE #ifdef MODE_REDUCE
uvec2 pos = gl_LocalInvocationID.xy; uvec2 pos = gl_LocalInvocationID.xy;
@ -36,7 +94,7 @@ void main() {
ivec2 image_offset = params.source_offset; ivec2 image_offset = params.source_offset;
ivec2 image_pos = image_offset + ivec2(gl_GlobalInvocationID.xy); ivec2 image_pos = image_offset + ivec2(gl_GlobalInvocationID.xy);
uint dst_t = swizzle_table[pos.y] * BLOCK_SIZE + swizzle_table[pos.x]; uint dst_t = swizzle_table[pos.y] * BLOCK_SIZE + swizzle_table[pos.x];
tmp_data[dst_t] = imageLoad(source_depth, min(image_pos, params.source_size - ivec2(1))).r; tmp_data[dst_t] = texelFetch(source_depth, min(image_pos, params.source_size - ivec2(1)), 0).r;
ivec2 image_size = params.source_size; ivec2 image_size = params.source_size;
uint t = pos.y * BLOCK_SIZE + pos.x; uint t = pos.y * BLOCK_SIZE + pos.x;

View file

@ -1906,6 +1906,9 @@ bool RendererSceneCull::_light_instance_update_shadow(Instance *p_instance, cons
RS::LightOmniShadowMode shadow_mode = RSG::storage->light_omni_get_shadow_mode(p_instance->base); RS::LightOmniShadowMode shadow_mode = RSG::storage->light_omni_get_shadow_mode(p_instance->base);
if (shadow_mode == RS::LIGHT_OMNI_SHADOW_DUAL_PARABOLOID || !scene_render->light_instances_can_render_shadow_cube()) { if (shadow_mode == RS::LIGHT_OMNI_SHADOW_DUAL_PARABOLOID || !scene_render->light_instances_can_render_shadow_cube()) {
if (max_shadows_used + 2 > MAX_UPDATE_SHADOWS) {
return true;
}
for (int i = 0; i < 2; i++) { for (int i = 0; i < 2; i++) {
//using this one ensures that raster deferred will have it //using this one ensures that raster deferred will have it
RENDER_TIMESTAMP("Culling Shadow Paraboloid" + itos(i)); RENDER_TIMESTAMP("Culling Shadow Paraboloid" + itos(i));
@ -1922,7 +1925,6 @@ bool RendererSceneCull::_light_instance_update_shadow(Instance *p_instance, cons
planes.write[4] = light_transform.xform(Plane(Vector3(0, -1, z).normalized(), radius)); planes.write[4] = light_transform.xform(Plane(Vector3(0, -1, z).normalized(), radius));
planes.write[5] = light_transform.xform(Plane(Vector3(0, 0, -z), 0)); planes.write[5] = light_transform.xform(Plane(Vector3(0, 0, -z), 0));
geometry_instances_to_shadow_render.clear();
instance_shadow_cull_result.clear(); instance_shadow_cull_result.clear();
Vector<Vector3> points = Geometry3D::compute_convex_mesh_points(&planes[0], planes.size()); Vector<Vector3> points = Geometry3D::compute_convex_mesh_points(&planes[0], planes.size());
@ -1943,6 +1945,8 @@ bool RendererSceneCull::_light_instance_update_shadow(Instance *p_instance, cons
Plane near_plane(light_transform.origin, light_transform.basis.get_axis(2) * z); Plane near_plane(light_transform.origin, light_transform.basis.get_axis(2) * z);
RendererSceneRender::RenderShadowData &shadow_data = render_shadow_data[max_shadows_used++];
for (int j = 0; j < (int)instance_shadow_cull_result.size(); j++) { for (int j = 0; j < (int)instance_shadow_cull_result.size(); j++) {
Instance *instance = instance_shadow_cull_result[j]; Instance *instance = instance_shadow_cull_result[j];
if (!instance->visible || !((1 << instance->base_type) & RS::INSTANCE_GEOMETRY_MASK) || !static_cast<InstanceGeometryData *>(instance->base_data)->can_cast_shadows) { if (!instance->visible || !((1 << instance->base_type) & RS::INSTANCE_GEOMETRY_MASK) || !static_cast<InstanceGeometryData *>(instance->base_data)->can_cast_shadows) {
@ -1957,16 +1961,21 @@ bool RendererSceneCull::_light_instance_update_shadow(Instance *p_instance, cons
} }
} }
geometry_instances_to_shadow_render.push_back(static_cast<InstanceGeometryData *>(instance->base_data)->geometry_instance); shadow_data.instances.push_back(static_cast<InstanceGeometryData *>(instance->base_data)->geometry_instance);
} }
RSG::storage->update_mesh_instances(); RSG::storage->update_mesh_instances();
scene_render->light_instance_set_shadow_transform(light->instance, CameraMatrix(), light_transform, radius, 0, i, 0); scene_render->light_instance_set_shadow_transform(light->instance, CameraMatrix(), light_transform, radius, 0, i, 0);
scene_render->render_shadow(light->instance, p_shadow_atlas, i, geometry_instances_to_shadow_render); shadow_data.light = light->instance;
shadow_data.pass = i;
} }
} else { //shadow cube } else { //shadow cube
if (max_shadows_used + 6 > MAX_UPDATE_SHADOWS) {
return true;
}
real_t radius = RSG::storage->light_get_param(p_instance->base, RS::LIGHT_PARAM_RANGE); real_t radius = RSG::storage->light_get_param(p_instance->base, RS::LIGHT_PARAM_RANGE);
CameraMatrix cm; CameraMatrix cm;
cm.set_perspective(90, 1, 0.01, radius); cm.set_perspective(90, 1, 0.01, radius);
@ -1996,7 +2005,6 @@ bool RendererSceneCull::_light_instance_update_shadow(Instance *p_instance, cons
Vector<Plane> planes = cm.get_projection_planes(xform); Vector<Plane> planes = cm.get_projection_planes(xform);
geometry_instances_to_shadow_render.clear();
instance_shadow_cull_result.clear(); instance_shadow_cull_result.clear();
Vector<Vector3> points = Geometry3D::compute_convex_mesh_points(&planes[0], planes.size()); Vector<Vector3> points = Geometry3D::compute_convex_mesh_points(&planes[0], planes.size());
@ -2015,7 +2023,7 @@ bool RendererSceneCull::_light_instance_update_shadow(Instance *p_instance, cons
p_scenario->indexers[Scenario::INDEXER_GEOMETRY].convex_query(planes.ptr(), planes.size(), points.ptr(), points.size(), cull_convex); p_scenario->indexers[Scenario::INDEXER_GEOMETRY].convex_query(planes.ptr(), planes.size(), points.ptr(), points.size(), cull_convex);
Plane near_plane(xform.origin, -xform.basis.get_axis(2)); RendererSceneRender::RenderShadowData &shadow_data = render_shadow_data[max_shadows_used++];
for (int j = 0; j < (int)instance_shadow_cull_result.size(); j++) { for (int j = 0; j < (int)instance_shadow_cull_result.size(); j++) {
Instance *instance = instance_shadow_cull_result[j]; Instance *instance = instance_shadow_cull_result[j];
@ -2030,22 +2038,28 @@ bool RendererSceneCull::_light_instance_update_shadow(Instance *p_instance, cons
} }
} }
geometry_instances_to_shadow_render.push_back(static_cast<InstanceGeometryData *>(instance->base_data)->geometry_instance); shadow_data.instances.push_back(static_cast<InstanceGeometryData *>(instance->base_data)->geometry_instance);
} }
RSG::storage->update_mesh_instances(); RSG::storage->update_mesh_instances();
scene_render->light_instance_set_shadow_transform(light->instance, cm, xform, radius, 0, i, 0); scene_render->light_instance_set_shadow_transform(light->instance, cm, xform, radius, 0, i, 0);
scene_render->render_shadow(light->instance, p_shadow_atlas, i, geometry_instances_to_shadow_render);
shadow_data.light = light->instance;
shadow_data.pass = i;
} }
//restore the regular DP matrix //restore the regular DP matrix
scene_render->light_instance_set_shadow_transform(light->instance, CameraMatrix(), light_transform, radius, 0, 0, 0); //scene_render->light_instance_set_shadow_transform(light->instance, CameraMatrix(), light_transform, radius, 0, 0, 0);
} }
} break; } break;
case RS::LIGHT_SPOT: { case RS::LIGHT_SPOT: {
RENDER_TIMESTAMP("Culling Spot Light"); RENDER_TIMESTAMP("Culling Spot Light");
if (max_shadows_used + 1 > MAX_UPDATE_SHADOWS) {
return true;
}
real_t radius = RSG::storage->light_get_param(p_instance->base, RS::LIGHT_PARAM_RANGE); real_t radius = RSG::storage->light_get_param(p_instance->base, RS::LIGHT_PARAM_RANGE);
real_t angle = RSG::storage->light_get_param(p_instance->base, RS::LIGHT_PARAM_SPOT_ANGLE); real_t angle = RSG::storage->light_get_param(p_instance->base, RS::LIGHT_PARAM_SPOT_ANGLE);
@ -2054,7 +2068,6 @@ bool RendererSceneCull::_light_instance_update_shadow(Instance *p_instance, cons
Vector<Plane> planes = cm.get_projection_planes(light_transform); Vector<Plane> planes = cm.get_projection_planes(light_transform);
geometry_instances_to_shadow_render.clear();
instance_shadow_cull_result.clear(); instance_shadow_cull_result.clear();
Vector<Vector3> points = Geometry3D::compute_convex_mesh_points(&planes[0], planes.size()); Vector<Vector3> points = Geometry3D::compute_convex_mesh_points(&planes[0], planes.size());
@ -2073,7 +2086,7 @@ bool RendererSceneCull::_light_instance_update_shadow(Instance *p_instance, cons
p_scenario->indexers[Scenario::INDEXER_GEOMETRY].convex_query(planes.ptr(), planes.size(), points.ptr(), points.size(), cull_convex); p_scenario->indexers[Scenario::INDEXER_GEOMETRY].convex_query(planes.ptr(), planes.size(), points.ptr(), points.size(), cull_convex);
Plane near_plane(light_transform.origin, -light_transform.basis.get_axis(2)); RendererSceneRender::RenderShadowData &shadow_data = render_shadow_data[max_shadows_used++];
for (int j = 0; j < (int)instance_shadow_cull_result.size(); j++) { for (int j = 0; j < (int)instance_shadow_cull_result.size(); j++) {
Instance *instance = instance_shadow_cull_result[j]; Instance *instance = instance_shadow_cull_result[j];
@ -2088,13 +2101,14 @@ bool RendererSceneCull::_light_instance_update_shadow(Instance *p_instance, cons
RSG::storage->mesh_instance_check_for_update(instance->mesh_instance); RSG::storage->mesh_instance_check_for_update(instance->mesh_instance);
} }
} }
geometry_instances_to_shadow_render.push_back(static_cast<InstanceGeometryData *>(instance->base_data)->geometry_instance); shadow_data.instances.push_back(static_cast<InstanceGeometryData *>(instance->base_data)->geometry_instance);
} }
RSG::storage->update_mesh_instances(); RSG::storage->update_mesh_instances();
scene_render->light_instance_set_shadow_transform(light->instance, cm, light_transform, radius, 0, 0, 0); scene_render->light_instance_set_shadow_transform(light->instance, cm, light_transform, radius, 0, 0, 0);
scene_render->render_shadow(light->instance, p_shadow_atlas, 0, geometry_instances_to_shadow_render); shadow_data.light = light->instance;
shadow_data.pass = 0;
} break; } break;
} }
@ -2147,14 +2161,13 @@ void RendererSceneCull::render_camera(RID p_render_buffers, RID p_camera, RID p_
RID environment = _render_get_environment(p_camera, p_scenario); RID environment = _render_get_environment(p_camera, p_scenario);
_prepare_scene(camera->transform, camera_matrix, ortho, camera->vaspect, p_render_buffers, environment, camera->visible_layers, p_scenario, p_shadow_atlas, RID(), p_screen_lod_threshold); _render_scene(camera->transform, camera_matrix, ortho, camera->vaspect, p_render_buffers, environment, camera->effects, camera->visible_layers, p_scenario, p_shadow_atlas, RID(), -1, p_screen_lod_threshold);
_render_scene(p_render_buffers, camera->transform, camera_matrix, ortho, environment, camera->effects, p_scenario, p_shadow_atlas, RID(), -1, p_screen_lod_threshold);
#endif #endif
} }
void RendererSceneCull::render_camera(RID p_render_buffers, Ref<XRInterface> &p_interface, XRInterface::Eyes p_eye, RID p_camera, RID p_scenario, Size2 p_viewport_size, float p_screen_lod_threshold, RID p_shadow_atlas) { void RendererSceneCull::render_camera(RID p_render_buffers, Ref<XRInterface> &p_interface, XRInterface::Eyes p_eye, RID p_camera, RID p_scenario, Size2 p_viewport_size, float p_screen_lod_threshold, RID p_shadow_atlas) {
// render for AR/VR interface // render for AR/VR interface
#if 0
Camera *camera = camera_owner.getornull(p_camera); Camera *camera = camera_owner.getornull(p_camera);
ERR_FAIL_COND(!camera); ERR_FAIL_COND(!camera);
@ -2234,6 +2247,7 @@ void RendererSceneCull::render_camera(RID p_render_buffers, Ref<XRInterface> &p_
// And render our scene... // And render our scene...
_render_scene(p_render_buffers, cam_transform, camera_matrix, false, environment, camera->effects, p_scenario, p_shadow_atlas, RID(), -1, p_screen_lod_threshold); _render_scene(p_render_buffers, cam_transform, camera_matrix, false, environment, camera->effects, p_scenario, p_shadow_atlas, RID(), -1, p_screen_lod_threshold);
#endif
}; };
void RendererSceneCull::_frustum_cull_threaded(uint32_t p_thread, FrustumCullData *cull_data) { void RendererSceneCull::_frustum_cull_threaded(uint32_t p_thread, FrustumCullData *cull_data) {
@ -2452,7 +2466,7 @@ void RendererSceneCull::_frustum_cull(FrustumCullData &cull_data, FrustumCullRes
} }
} }
void RendererSceneCull::_prepare_scene(const Transform p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_orthogonal, bool p_cam_vaspect, RID p_render_buffers, RID p_environment, uint32_t p_visible_layers, RID p_scenario, RID p_shadow_atlas, RID p_reflection_probe, float p_screen_lod_threshold, bool p_using_shadows) { void RendererSceneCull::_render_scene(const Transform p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_orthogonal, bool p_cam_vaspect, RID p_render_buffers, RID p_environment, RID p_force_camera_effects, uint32_t p_visible_layers, RID p_scenario, RID p_shadow_atlas, RID p_reflection_probe, int p_reflection_probe_pass, float p_screen_lod_threshold, bool p_using_shadows) {
// Note, in stereo rendering: // Note, in stereo rendering:
// - p_cam_transform will be a transform in the middle of our two eyes // - p_cam_transform will be a transform in the middle of our two eyes
// - p_cam_projection is a wider frustrum that encompasses both eyes // - p_cam_projection is a wider frustrum that encompasses both eyes
@ -2466,6 +2480,7 @@ void RendererSceneCull::_prepare_scene(const Transform p_cam_transform, const Ca
scene_render->set_scene_pass(render_pass); scene_render->set_scene_pass(render_pass);
if (p_render_buffers.is_valid()) { if (p_render_buffers.is_valid()) {
//no rendering code here, this is only to set up what needs to be done, request regions, etc.
scene_render->sdfgi_update(p_render_buffers, p_environment, p_cam_transform.origin); //update conditions for SDFGI (whether its used or not) scene_render->sdfgi_update(p_render_buffers, p_environment, p_cam_transform.origin); //update conditions for SDFGI (whether its used or not)
} }
@ -2596,62 +2611,28 @@ void RendererSceneCull::_prepare_scene(const Transform p_cam_transform, const Ca
//render shadows //render shadows
for (uint32_t i = 0; i < cull.shadow_count; i++) { max_shadows_used = 0;
for (uint32_t j = 0; j < cull.shadows[i].cascade_count; j++) {
const Cull::Shadow::Cascade &c = cull.shadows[i].cascades[j];
// print_line("shadow " + itos(i) + " cascade " + itos(j) + " elements: " + itos(c.cull_result.size()));
scene_render->light_instance_set_shadow_transform(cull.shadows[i].light_instance, c.projection, c.transform, c.zfar, c.split, j, c.shadow_texel_size, c.bias_scale, c.range_begin, c.uv_scale);
scene_render->render_shadow(cull.shadows[i].light_instance, p_shadow_atlas, j, frustum_cull_result.directional_shadows[i].cascade_geometry_instances[j], near_plane, p_cam_projection.get_lod_multiplier(), p_screen_lod_threshold);
}
}
//render SDFGI
{
if (cull.sdfgi.region_count > 0) {
//update regions
for (uint32_t i = 0; i < cull.sdfgi.region_count; i++) {
scene_render->render_sdfgi(p_render_buffers, i, frustum_cull_result.sdfgi_region_geometry_instances[i]);
}
//check if static lights were culled
bool static_lights_culled = false;
for (uint32_t i = 0; i < cull.sdfgi.cascade_light_count; i++) {
if (frustum_cull_result.sdfgi_cascade_lights[i].size()) {
static_lights_culled = true;
break;
}
}
if (static_lights_culled) {
scene_render->render_sdfgi_static_lights(p_render_buffers, cull.sdfgi.cascade_light_count, cull.sdfgi.cascade_light_index, frustum_cull_result.sdfgi_cascade_lights);
}
}
if (p_render_buffers.is_valid()) {
scene_render->sdfgi_update_probes(p_render_buffers, p_environment, directional_lights, scenario->dynamic_lights.ptr(), scenario->dynamic_lights.size());
}
}
//light_samplers_culled=0;
/*
print_line("OT: "+rtos( (OS::get_singleton()->get_ticks_usec()-t)/1000.0));
print_line("OTO: "+itos(p_scenario->octree.get_octant_count()));
print_line("OTE: "+itos(p_scenario->octree.get_elem_count()));
print_line("OTP: "+itos(p_scenario->octree.get_pair_count()));
*/
/* STEP 3 - PROCESS PORTALS, VALIDATE ROOMS */
//removed, will replace with culling
/* STEP 4 - REMOVE FURTHER CULLED OBJECTS, ADD LIGHTS */
/* STEP 5 - PROCESS POSITIONAL LIGHTS */
if (p_using_shadows) { //setup shadow maps if (p_using_shadows) { //setup shadow maps
//SortArray<Instance*,_InstanceLightsort> sorter; // Directional Shadows
//sorter.sort(light_cull_result,light_cull_count);
for (uint32_t i = 0; i < cull.shadow_count; i++) {
for (uint32_t j = 0; j < cull.shadows[i].cascade_count; j++) {
const Cull::Shadow::Cascade &c = cull.shadows[i].cascades[j];
// print_line("shadow " + itos(i) + " cascade " + itos(j) + " elements: " + itos(c.cull_result.size()));
scene_render->light_instance_set_shadow_transform(cull.shadows[i].light_instance, c.projection, c.transform, c.zfar, c.split, j, c.shadow_texel_size, c.bias_scale, c.range_begin, c.uv_scale);
if (max_shadows_used == MAX_UPDATE_SHADOWS) {
continue;
}
render_shadow_data[max_shadows_used].light = cull.shadows[i].light_instance;
render_shadow_data[max_shadows_used].pass = j;
render_shadow_data[max_shadows_used].instances.merge_unordered(frustum_cull_result.directional_shadows[i].cascade_geometry_instances[j]);
max_shadows_used++;
}
}
// Positional Shadowss
for (uint32_t i = 0; i < (uint32_t)frustum_cull_result.lights.size(); i++) { for (uint32_t i = 0; i < (uint32_t)frustum_cull_result.lights.size(); i++) {
Instance *ins = frustum_cull_result.lights[i]; Instance *ins = frustum_cull_result.lights[i];
@ -2738,19 +2719,78 @@ void RendererSceneCull::_prepare_scene(const Transform p_cam_transform, const Ca
bool redraw = scene_render->shadow_atlas_update_light(p_shadow_atlas, light->instance, coverage, light->last_version); bool redraw = scene_render->shadow_atlas_update_light(p_shadow_atlas, light->instance, coverage, light->last_version);
if (redraw) { if (redraw && max_shadows_used < MAX_UPDATE_SHADOWS) {
//must redraw! //must redraw!
RENDER_TIMESTAMP(">Rendering Light " + itos(i)); RENDER_TIMESTAMP(">Rendering Light " + itos(i));
light->shadow_dirty = _light_instance_update_shadow(ins, p_cam_transform, p_cam_projection, p_cam_orthogonal, p_cam_vaspect, p_shadow_atlas, scenario, p_screen_lod_threshold); light->shadow_dirty = _light_instance_update_shadow(ins, p_cam_transform, p_cam_projection, p_cam_orthogonal, p_cam_vaspect, p_shadow_atlas, scenario, p_screen_lod_threshold);
RENDER_TIMESTAMP("<Rendering Light " + itos(i)); RENDER_TIMESTAMP("<Rendering Light " + itos(i));
} else {
light->shadow_dirty = redraw;
} }
} }
} }
//render SDFGI
{
sdfgi_update_data.update_static = false;
if (cull.sdfgi.region_count > 0) {
//update regions
for (uint32_t i = 0; i < cull.sdfgi.region_count; i++) {
render_sdfgi_data[i].instances.merge_unordered(frustum_cull_result.sdfgi_region_geometry_instances[i]);
render_sdfgi_data[i].region = i;
}
//check if static lights were culled
bool static_lights_culled = false;
for (uint32_t i = 0; i < cull.sdfgi.cascade_light_count; i++) {
if (frustum_cull_result.sdfgi_cascade_lights[i].size()) {
static_lights_culled = true;
break;
}
}
if (static_lights_culled) {
sdfgi_update_data.static_cascade_count = cull.sdfgi.cascade_light_count;
sdfgi_update_data.static_cascade_indices = cull.sdfgi.cascade_light_index;
sdfgi_update_data.static_positional_lights = frustum_cull_result.sdfgi_cascade_lights;
sdfgi_update_data.update_static = true;
}
}
if (p_render_buffers.is_valid()) {
sdfgi_update_data.directional_lights = &directional_lights;
sdfgi_update_data.positional_light_instances = scenario->dynamic_lights.ptr();
sdfgi_update_data.positional_light_count = scenario->dynamic_lights.size();
}
}
//append the directional lights to the lights culled //append the directional lights to the lights culled
for (int i = 0; i < directional_lights.size(); i++) { for (int i = 0; i < directional_lights.size(); i++) {
frustum_cull_result.light_instances.push_back(directional_lights[i]); frustum_cull_result.light_instances.push_back(directional_lights[i]);
} }
RID camera_effects;
if (p_force_camera_effects.is_valid()) {
camera_effects = p_force_camera_effects;
} else {
camera_effects = scenario->camera_effects;
}
/* PROCESS GEOMETRY AND DRAW SCENE */
RENDER_TIMESTAMP("Render Scene ");
scene_render->render_scene(p_render_buffers, p_cam_transform, p_cam_projection, p_cam_orthogonal, frustum_cull_result.geometry_instances, frustum_cull_result.light_instances, frustum_cull_result.reflections, frustum_cull_result.gi_probes, frustum_cull_result.decals, frustum_cull_result.lightmaps, p_environment, camera_effects, p_shadow_atlas, p_reflection_probe.is_valid() ? RID() : scenario->reflection_atlas, p_reflection_probe, p_reflection_probe_pass, p_screen_lod_threshold, render_shadow_data, max_shadows_used, render_sdfgi_data, cull.sdfgi.region_count, &sdfgi_update_data);
for (uint32_t i = 0; i < max_shadows_used; i++) {
render_shadow_data[i].instances.clear();
}
max_shadows_used = 0;
for (uint32_t i = 0; i < cull.sdfgi.region_count; i++) {
render_sdfgi_data[i].instances.clear();
}
// virtual void render_scene(RID p_render_buffers, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, const PagedArray<RID> &p_lights, const PagedArray<RID> &p_reflection_probes, const PagedArray<RID> &p_gi_probes, const PagedArray<RID> &p_decals, const PagedArray<RID> &p_lightmaps, RID p_environment, RID p_camera_effects, RID p_shadow_atlas, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, float p_screen_lod_threshold,const RenderShadowData *p_render_shadows,int p_render_shadow_count,const RenderSDFGIData *p_render_sdfgi_regions,int p_render_sdfgi_region_count,const RenderSDFGIStaticLightData *p_render_sdfgi_static_lights=nullptr) = 0;
} }
RID RendererSceneCull::_render_get_environment(RID p_camera, RID p_scenario) { RID RendererSceneCull::_render_get_environment(RID p_camera, RID p_scenario) {
@ -2774,21 +2814,6 @@ RID RendererSceneCull::_render_get_environment(RID p_camera, RID p_scenario) {
return RID(); return RID();
} }
void RendererSceneCull::_render_scene(RID p_render_buffers, const Transform p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_orthogonal, RID p_environment, RID p_force_camera_effects, RID p_scenario, RID p_shadow_atlas, RID p_reflection_probe, int p_reflection_probe_pass, float p_screen_lod_threshold) {
Scenario *scenario = scenario_owner.getornull(p_scenario);
RID camera_effects;
if (p_force_camera_effects.is_valid()) {
camera_effects = p_force_camera_effects;
} else {
camera_effects = scenario->camera_effects;
}
/* PROCESS GEOMETRY AND DRAW SCENE */
RENDER_TIMESTAMP("Render Scene ");
scene_render->render_scene(p_render_buffers, p_cam_transform, p_cam_projection, p_cam_orthogonal, frustum_cull_result.geometry_instances, frustum_cull_result.light_instances, frustum_cull_result.reflections, frustum_cull_result.gi_probes, frustum_cull_result.decals, frustum_cull_result.lightmaps, p_environment, camera_effects, p_shadow_atlas, p_reflection_probe.is_valid() ? RID() : scenario->reflection_atlas, p_reflection_probe, p_reflection_probe_pass, p_screen_lod_threshold);
}
void RendererSceneCull::render_empty_scene(RID p_render_buffers, RID p_scenario, RID p_shadow_atlas) { void RendererSceneCull::render_empty_scene(RID p_render_buffers, RID p_scenario, RID p_shadow_atlas) {
#ifndef _3D_DISABLED #ifndef _3D_DISABLED
@ -2801,7 +2826,7 @@ void RendererSceneCull::render_empty_scene(RID p_render_buffers, RID p_scenario,
environment = scenario->fallback_environment; environment = scenario->fallback_environment;
} }
RENDER_TIMESTAMP("Render Empty Scene "); RENDER_TIMESTAMP("Render Empty Scene ");
scene_render->render_scene(p_render_buffers, Transform(), CameraMatrix(), true, PagedArray<RendererSceneRender::GeometryInstance *>(), PagedArray<RID>(), PagedArray<RID>(), PagedArray<RID>(), PagedArray<RID>(), PagedArray<RID>(), RID(), RID(), p_shadow_atlas, scenario->reflection_atlas, RID(), 0, 0); scene_render->render_scene(p_render_buffers, Transform(), CameraMatrix(), true, PagedArray<RendererSceneRender::GeometryInstance *>(), PagedArray<RID>(), PagedArray<RID>(), PagedArray<RID>(), PagedArray<RID>(), PagedArray<RID>(), RID(), RID(), p_shadow_atlas, scenario->reflection_atlas, RID(), 0, 0, nullptr, 0, nullptr, 0, nullptr);
#endif #endif
} }
@ -2864,8 +2889,7 @@ bool RendererSceneCull::_render_reflection_probe_step(Instance *p_instance, int
} }
RENDER_TIMESTAMP("Render Reflection Probe, Step " + itos(p_step)); RENDER_TIMESTAMP("Render Reflection Probe, Step " + itos(p_step));
_prepare_scene(xform, cm, false, false, RID(), RID(), RSG::storage->reflection_probe_get_cull_mask(p_instance->base), p_instance->scenario->self, shadow_atlas, reflection_probe->instance, lod_threshold, use_shadows); _render_scene(xform, cm, false, false, RID(), RID(), RID(), RSG::storage->reflection_probe_get_cull_mask(p_instance->base), p_instance->scenario->self, shadow_atlas, reflection_probe->instance, p_step, lod_threshold, use_shadows);
_render_scene(RID(), xform, cm, false, RID(), RID(), p_instance->scenario->self, shadow_atlas, reflection_probe->instance, p_step, lod_threshold);
} else { } else {
//do roughness postprocess step until it believes it's done //do roughness postprocess step until it believes it's done
@ -3493,7 +3517,12 @@ RendererSceneCull::RendererSceneCull() {
instance_cull_result.set_page_pool(&instance_cull_page_pool); instance_cull_result.set_page_pool(&instance_cull_page_pool);
instance_shadow_cull_result.set_page_pool(&instance_cull_page_pool); instance_shadow_cull_result.set_page_pool(&instance_cull_page_pool);
geometry_instances_to_shadow_render.set_page_pool(&geometry_instance_cull_page_pool); for (uint32_t i = 0; i < MAX_UPDATE_SHADOWS; i++) {
render_shadow_data[i].instances.set_page_pool(&geometry_instance_cull_page_pool);
}
for (uint32_t i = 0; i < SDFGI_MAX_CASCADES * SDFGI_MAX_REGIONS_PER_CASCADE; i++) {
render_sdfgi_data[i].instances.set_page_pool(&geometry_instance_cull_page_pool);
}
frustum_cull_result.init(&rid_cull_page_pool, &geometry_instance_cull_page_pool, &instance_cull_page_pool); frustum_cull_result.init(&rid_cull_page_pool, &geometry_instance_cull_page_pool, &instance_cull_page_pool);
frustum_cull_result_threads.resize(RendererThreadPool::singleton->thread_work_pool.get_thread_count()); frustum_cull_result_threads.resize(RendererThreadPool::singleton->thread_work_pool.get_thread_count());
@ -3510,7 +3539,12 @@ RendererSceneCull::~RendererSceneCull() {
instance_cull_result.reset(); instance_cull_result.reset();
instance_shadow_cull_result.reset(); instance_shadow_cull_result.reset();
geometry_instances_to_shadow_render.reset(); for (uint32_t i = 0; i < MAX_UPDATE_SHADOWS; i++) {
render_shadow_data[i].instances.reset();
}
for (uint32_t i = 0; i < SDFGI_MAX_CASCADES * SDFGI_MAX_REGIONS_PER_CASCADE; i++) {
render_sdfgi_data[i].instances.reset();
}
frustum_cull_result.reset(); frustum_cull_result.reset();
for (uint32_t i = 0; i < frustum_cull_result_threads.size(); i++) { for (uint32_t i = 0; i < frustum_cull_result_threads.size(); i++) {

View file

@ -54,7 +54,8 @@ public:
enum { enum {
SDFGI_MAX_CASCADES = 8, SDFGI_MAX_CASCADES = 8,
SDFGI_MAX_REGIONS_PER_CASCADE = 3, SDFGI_MAX_REGIONS_PER_CASCADE = 3,
MAX_INSTANCE_PAIRS = 32 MAX_INSTANCE_PAIRS = 32,
MAX_UPDATE_SHADOWS = 512
}; };
uint64_t render_pass; uint64_t render_pass;
@ -696,7 +697,6 @@ public:
PagedArray<Instance *> instance_cull_result; PagedArray<Instance *> instance_cull_result;
PagedArray<Instance *> instance_shadow_cull_result; PagedArray<Instance *> instance_shadow_cull_result;
PagedArray<RendererSceneRender::GeometryInstance *> geometry_instances_to_shadow_render;
struct FrustumCullResult { struct FrustumCullResult {
PagedArray<RendererSceneRender::GeometryInstance *> geometry_instances; PagedArray<RendererSceneRender::GeometryInstance *> geometry_instances;
@ -816,6 +816,12 @@ public:
FrustumCullResult frustum_cull_result; FrustumCullResult frustum_cull_result;
LocalVector<FrustumCullResult> frustum_cull_result_threads; LocalVector<FrustumCullResult> frustum_cull_result_threads;
RendererSceneRender::RenderShadowData render_shadow_data[MAX_UPDATE_SHADOWS];
uint32_t max_shadows_used = 0;
RendererSceneRender::RenderSDFGIData render_sdfgi_data[SDFGI_MAX_CASCADES * SDFGI_MAX_REGIONS_PER_CASCADE];
RendererSceneRender::RenderSDFGIUpdateData sdfgi_update_data;
uint32_t thread_cull_threshold = 200; uint32_t thread_cull_threshold = 200;
RID_PtrOwner<Instance> instance_owner; RID_PtrOwner<Instance> instance_owner;
@ -924,8 +930,7 @@ public:
void _frustum_cull(FrustumCullData &cull_data, FrustumCullResult &cull_result, uint64_t p_from, uint64_t p_to); void _frustum_cull(FrustumCullData &cull_data, FrustumCullResult &cull_result, uint64_t p_from, uint64_t p_to);
bool _render_reflection_probe_step(Instance *p_instance, int p_step); bool _render_reflection_probe_step(Instance *p_instance, int p_step);
void _prepare_scene(const Transform p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_orthogonal, bool p_cam_vaspect, RID p_render_buffers, RID p_environment, uint32_t p_visible_layers, RID p_scenario, RID p_shadow_atlas, RID p_reflection_probe, float p_screen_lod_threshold, bool p_using_shadows = true); void _render_scene(const Transform p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_orthogonal, bool p_cam_vaspect, RID p_render_buffers, RID p_environment, RID p_force_camera_effects, uint32_t p_visible_layers, RID p_scenario, RID p_shadow_atlas, RID p_reflection_probe, int p_reflection_probe_pass, float p_screen_lod_threshold, bool p_using_shadows = true);
void _render_scene(RID p_render_buffers, const Transform p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_orthogonal, RID p_environment, RID p_force_camera_effects, RID p_scenario, RID p_shadow_atlas, RID p_reflection_probe, int p_reflection_probe_pass, float p_screen_lod_threshold);
void render_empty_scene(RID p_render_buffers, RID p_scenario, RID p_shadow_atlas); void render_empty_scene(RID p_render_buffers, RID p_scenario, RID p_shadow_atlas);
void render_camera(RID p_render_buffers, RID p_camera, RID p_scenario, Size2 p_viewport_size, float p_screen_lod_threshold, RID p_shadow_atlas); void render_camera(RID p_render_buffers, RID p_camera, RID p_scenario, Size2 p_viewport_size, float p_screen_lod_threshold, RID p_shadow_atlas);

View file

@ -87,7 +87,6 @@ public:
virtual int sdfgi_get_pending_region_count(RID p_render_buffers) const = 0; virtual int sdfgi_get_pending_region_count(RID p_render_buffers) const = 0;
virtual AABB sdfgi_get_pending_region_bounds(RID p_render_buffers, int p_region) const = 0; virtual AABB sdfgi_get_pending_region_bounds(RID p_render_buffers, int p_region) const = 0;
virtual uint32_t sdfgi_get_pending_region_cascade(RID p_render_buffers, int p_region) const = 0; virtual uint32_t sdfgi_get_pending_region_cascade(RID p_render_buffers, int p_region) const = 0;
virtual void sdfgi_update_probes(RID p_render_buffers, RID p_environment, const Vector<RID> &p_directional_lights, const RID *p_positional_light_instances, uint32_t p_positional_light_count) = 0;
/* SKY API */ /* SKY API */
@ -195,12 +194,31 @@ public:
virtual void gi_probe_set_quality(RS::GIProbeQuality) = 0; virtual void gi_probe_set_quality(RS::GIProbeQuality) = 0;
virtual void render_scene(RID p_render_buffers, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, const PagedArray<RID> &p_lights, const PagedArray<RID> &p_reflection_probes, const PagedArray<RID> &p_gi_probes, const PagedArray<RID> &p_decals, const PagedArray<RID> &p_lightmaps, RID p_environment, RID p_camera_effects, RID p_shadow_atlas, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, float p_screen_lod_threshold) = 0; struct RenderShadowData {
RID light;
int pass = 0;
PagedArray<GeometryInstance *> instances;
};
struct RenderSDFGIData {
int region = 0;
PagedArray<GeometryInstance *> instances;
};
struct RenderSDFGIUpdateData {
bool update_static = false;
uint32_t static_cascade_count;
uint32_t *static_cascade_indices;
PagedArray<RID> *static_positional_lights;
const Vector<RID> *directional_lights;
const RID *positional_light_instances;
uint32_t positional_light_count;
};
virtual void render_scene(RID p_render_buffers, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, const PagedArray<RID> &p_lights, const PagedArray<RID> &p_reflection_probes, const PagedArray<RID> &p_gi_probes, const PagedArray<RID> &p_decals, const PagedArray<RID> &p_lightmaps, RID p_environment, RID p_camera_effects, RID p_shadow_atlas, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, float p_screen_lod_threshold, const RenderShadowData *p_render_shadows, int p_render_shadow_count, const RenderSDFGIData *p_render_sdfgi_regions, int p_render_sdfgi_region_count, const RenderSDFGIUpdateData *p_sdfgi_update_data = nullptr) = 0;
virtual void render_shadow(RID p_light, RID p_shadow_atlas, int p_pass, const PagedArray<GeometryInstance *> &p_instances, const Plane &p_camera_plane = Plane(), float p_lod_distance_multiplier = 0, float p_screen_lod_threshold = 0.0) = 0;
virtual void render_material(const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, RID p_framebuffer, const Rect2i &p_region) = 0; virtual void render_material(const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, RID p_framebuffer, const Rect2i &p_region) = 0;
virtual void render_sdfgi(RID p_render_buffers, int p_region, const PagedArray<GeometryInstance *> &p_instances) = 0;
virtual void render_sdfgi_static_lights(RID p_render_buffers, uint32_t p_cascade_count, const uint32_t *p_cascade_indices, const PagedArray<RID> *p_positional_lights) = 0;
virtual void render_particle_collider_heightfield(RID p_collider, const Transform &p_transform, const PagedArray<GeometryInstance *> &p_instances) = 0; virtual void render_particle_collider_heightfield(RID p_collider, const Transform &p_transform, const PagedArray<GeometryInstance *> &p_instances) = 0;
virtual void set_scene_pass(uint64_t p_pass) = 0; virtual void set_scene_pass(uint64_t p_pass) = 0;

View file

@ -98,6 +98,7 @@ public:
while (to_clean_up.size()) { while (to_clean_up.size()) {
to_clean_up.front()->get().first->instances.erase(to_clean_up.front()->get().second); to_clean_up.front()->get().first->instances.erase(to_clean_up.front()->get().second);
dependencies.erase(to_clean_up.front()->get().first);
to_clean_up.pop_front(); to_clean_up.pop_front();
} }
} }

View file

@ -240,10 +240,6 @@ void RenderingDevice::_compute_list_set_push_constant(ComputeListID p_list, cons
compute_list_set_push_constant(p_list, p_data.ptr(), p_data_size); compute_list_set_push_constant(p_list, p_data.ptr(), p_data_size);
} }
void RenderingDevice::compute_list_dispatch_threads(ComputeListID p_list, uint32_t p_x_threads, uint32_t p_y_threads, uint32_t p_z_threads, uint32_t p_x_local_group, uint32_t p_y_local_group, uint32_t p_z_local_group) {
compute_list_dispatch(p_list, (p_x_threads - 1) / p_x_local_group + 1, (p_y_threads - 1) / p_y_local_group + 1, (p_z_threads - 1) / p_z_local_group + 1);
}
void RenderingDevice::_bind_methods() { void RenderingDevice::_bind_methods() {
ClassDB::bind_method(D_METHOD("texture_create", "format", "view", "data"), &RenderingDevice::_texture_create, DEFVAL(Array())); ClassDB::bind_method(D_METHOD("texture_create", "format", "view", "data"), &RenderingDevice::_texture_create, DEFVAL(Array()));
ClassDB::bind_method(D_METHOD("texture_create_shared", "view", "with_texture"), &RenderingDevice::_texture_create_shared); ClassDB::bind_method(D_METHOD("texture_create_shared", "view", "with_texture"), &RenderingDevice::_texture_create_shared);
@ -319,7 +315,7 @@ void RenderingDevice::_bind_methods() {
ClassDB::bind_method(D_METHOD("draw_list_end", "post_barrier"), &RenderingDevice::draw_list_end, DEFVAL(BARRIER_MASK_ALL)); ClassDB::bind_method(D_METHOD("draw_list_end", "post_barrier"), &RenderingDevice::draw_list_end, DEFVAL(BARRIER_MASK_ALL));
ClassDB::bind_method(D_METHOD("compute_list_begin"), &RenderingDevice::compute_list_begin); ClassDB::bind_method(D_METHOD("compute_list_begin", "allow_draw_overlap"), &RenderingDevice::compute_list_begin, DEFVAL(false));
ClassDB::bind_method(D_METHOD("compute_list_bind_compute_pipeline", "compute_list", "compute_pipeline"), &RenderingDevice::compute_list_bind_compute_pipeline); ClassDB::bind_method(D_METHOD("compute_list_bind_compute_pipeline", "compute_list", "compute_pipeline"), &RenderingDevice::compute_list_bind_compute_pipeline);
ClassDB::bind_method(D_METHOD("compute_list_set_push_constant", "compute_list", "buffer", "size_bytes"), &RenderingDevice::_compute_list_set_push_constant); ClassDB::bind_method(D_METHOD("compute_list_set_push_constant", "compute_list", "buffer", "size_bytes"), &RenderingDevice::_compute_list_set_push_constant);
ClassDB::bind_method(D_METHOD("compute_list_bind_uniform_set", "compute_list", "uniform_set", "set_index"), &RenderingDevice::compute_list_bind_uniform_set); ClassDB::bind_method(D_METHOD("compute_list_bind_uniform_set", "compute_list", "uniform_set", "set_index"), &RenderingDevice::compute_list_bind_uniform_set);
@ -352,10 +348,15 @@ void RenderingDevice::_bind_methods() {
ClassDB::bind_method(D_METHOD("draw_command_insert_label", "name", "color"), &RenderingDevice::draw_command_insert_label); ClassDB::bind_method(D_METHOD("draw_command_insert_label", "name", "color"), &RenderingDevice::draw_command_insert_label);
ClassDB::bind_method(D_METHOD("draw_command_end_label"), &RenderingDevice::draw_command_end_label); ClassDB::bind_method(D_METHOD("draw_command_end_label"), &RenderingDevice::draw_command_end_label);
ClassDB::bind_method(D_METHOD("get_device_vendor_name"), &RenderingDevice::get_device_vendor_name);
ClassDB::bind_method(D_METHOD("get_device_name"), &RenderingDevice::get_device_name);
ClassDB::bind_method(D_METHOD("get_device_pipeline_cache_uuid"), &RenderingDevice::get_device_pipeline_cache_uuid);
BIND_CONSTANT(BARRIER_MASK_RASTER); BIND_CONSTANT(BARRIER_MASK_RASTER);
BIND_CONSTANT(BARRIER_MASK_COMPUTE); BIND_CONSTANT(BARRIER_MASK_COMPUTE);
BIND_CONSTANT(BARRIER_MASK_TRANSFER); BIND_CONSTANT(BARRIER_MASK_TRANSFER);
BIND_CONSTANT(BARRIER_MASK_ALL); BIND_CONSTANT(BARRIER_MASK_ALL);
BIND_CONSTANT(BARRIER_MASK_NO_BARRIER);
BIND_ENUM_CONSTANT(DATA_FORMAT_R4G4_UNORM_PACK8); BIND_ENUM_CONSTANT(DATA_FORMAT_R4G4_UNORM_PACK8);
BIND_ENUM_CONSTANT(DATA_FORMAT_R4G4B4A4_UNORM_PACK16); BIND_ENUM_CONSTANT(DATA_FORMAT_R4G4B4A4_UNORM_PACK16);
@ -760,6 +761,7 @@ void RenderingDevice::_bind_methods() {
BIND_ENUM_CONSTANT(INITIAL_ACTION_CLEAR); //start rendering and clear the framebuffer (supply params) BIND_ENUM_CONSTANT(INITIAL_ACTION_CLEAR); //start rendering and clear the framebuffer (supply params)
BIND_ENUM_CONSTANT(INITIAL_ACTION_CLEAR_REGION); //start rendering and clear the framebuffer (supply params) BIND_ENUM_CONSTANT(INITIAL_ACTION_CLEAR_REGION); //start rendering and clear the framebuffer (supply params)
BIND_ENUM_CONSTANT(INITIAL_ACTION_CLEAR_REGION_CONTINUE); //continue rendering and clear the framebuffer (supply params)
BIND_ENUM_CONSTANT(INITIAL_ACTION_KEEP); //start rendering); but keep attached color texture contents (depth will be cleared) BIND_ENUM_CONSTANT(INITIAL_ACTION_KEEP); //start rendering); but keep attached color texture contents (depth will be cleared)
BIND_ENUM_CONSTANT(INITIAL_ACTION_DROP); //start rendering); ignore what is there); just write above it BIND_ENUM_CONSTANT(INITIAL_ACTION_DROP); //start rendering); ignore what is there); just write above it
BIND_ENUM_CONSTANT(INITIAL_ACTION_CONTINUE); //continue rendering (framebuffer must have been left in "continue" state as final action previously) BIND_ENUM_CONSTANT(INITIAL_ACTION_CONTINUE); //continue rendering (framebuffer must have been left in "continue" state as final action previously)

View file

@ -343,6 +343,7 @@ public:
BARRIER_MASK_RASTER = 1, BARRIER_MASK_RASTER = 1,
BARRIER_MASK_COMPUTE = 2, BARRIER_MASK_COMPUTE = 2,
BARRIER_MASK_TRANSFER = 4, BARRIER_MASK_TRANSFER = 4,
BARRIER_MASK_NO_BARRIER = 8,
BARRIER_MASK_ALL = BARRIER_MASK_RASTER | BARRIER_MASK_COMPUTE | BARRIER_MASK_TRANSFER BARRIER_MASK_ALL = BARRIER_MASK_RASTER | BARRIER_MASK_COMPUTE | BARRIER_MASK_TRANSFER
}; };
@ -944,6 +945,7 @@ public:
enum InitialAction { enum InitialAction {
INITIAL_ACTION_CLEAR, //start rendering and clear the whole framebuffer (region or not) (supply params) INITIAL_ACTION_CLEAR, //start rendering and clear the whole framebuffer (region or not) (supply params)
INITIAL_ACTION_CLEAR_REGION, //start rendering and clear the framebuffer in the specified region (supply params) INITIAL_ACTION_CLEAR_REGION, //start rendering and clear the framebuffer in the specified region (supply params)
INITIAL_ACTION_CLEAR_REGION_CONTINUE, //countinue rendering and clear the framebuffer in the specified region (supply params)
INITIAL_ACTION_KEEP, //start rendering, but keep attached color texture contents (depth will be cleared) INITIAL_ACTION_KEEP, //start rendering, but keep attached color texture contents (depth will be cleared)
INITIAL_ACTION_DROP, //start rendering, ignore what is there, just write above it INITIAL_ACTION_DROP, //start rendering, ignore what is there, just write above it
INITIAL_ACTION_CONTINUE, //continue rendering (framebuffer must have been left in "continue" state as final action previously) INITIAL_ACTION_CONTINUE, //continue rendering (framebuffer must have been left in "continue" state as final action previously)
@ -983,12 +985,12 @@ public:
typedef int64_t ComputeListID; typedef int64_t ComputeListID;
virtual ComputeListID compute_list_begin() = 0; virtual ComputeListID compute_list_begin(bool p_allow_draw_overlap = false) = 0;
virtual void compute_list_bind_compute_pipeline(ComputeListID p_list, RID p_compute_pipeline) = 0; virtual void compute_list_bind_compute_pipeline(ComputeListID p_list, RID p_compute_pipeline) = 0;
virtual void compute_list_bind_uniform_set(ComputeListID p_list, RID p_uniform_set, uint32_t p_index) = 0; virtual void compute_list_bind_uniform_set(ComputeListID p_list, RID p_uniform_set, uint32_t p_index) = 0;
virtual void compute_list_set_push_constant(ComputeListID p_list, const void *p_data, uint32_t p_data_size) = 0; virtual void compute_list_set_push_constant(ComputeListID p_list, const void *p_data, uint32_t p_data_size) = 0;
virtual void compute_list_dispatch(ComputeListID p_list, uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups) = 0; virtual void compute_list_dispatch(ComputeListID p_list, uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups) = 0;
virtual void compute_list_dispatch_threads(ComputeListID p_list, uint32_t p_x_threads, uint32_t p_y_threads, uint32_t p_z_threads, uint32_t p_x_local_group, uint32_t p_y_local_group, uint32_t p_z_local_group); virtual void compute_list_dispatch_threads(ComputeListID p_list, uint32_t p_x_threads, uint32_t p_y_threads, uint32_t p_z_threads) = 0;
virtual void compute_list_dispatch_indirect(ComputeListID p_list, RID p_buffer, uint32_t p_offset) = 0; virtual void compute_list_dispatch_indirect(ComputeListID p_list, RID p_buffer, uint32_t p_offset) = 0;
virtual void compute_list_add_barrier(ComputeListID p_list) = 0; virtual void compute_list_add_barrier(ComputeListID p_list) = 0;
@ -1078,6 +1080,10 @@ public:
virtual void draw_command_insert_label(String p_label_name, const Color p_color = Color(1, 1, 1, 1)) = 0; virtual void draw_command_insert_label(String p_label_name, const Color p_color = Color(1, 1, 1, 1)) = 0;
virtual void draw_command_end_label() = 0; virtual void draw_command_end_label() = 0;
virtual String get_device_vendor_name() const = 0;
virtual String get_device_name() const = 0;
virtual String get_device_pipeline_cache_uuid() const = 0;
static RenderingDevice *get_singleton(); static RenderingDevice *get_singleton();
RenderingDevice(); RenderingDevice();