Untested support for compute shaders

This commit is contained in:
Juan Linietsky 2019-09-25 16:44:44 -03:00
parent f55332ffad
commit 263bebe023
13 changed files with 1057 additions and 91 deletions

View file

@ -1604,6 +1604,10 @@ RID RenderingDeviceVulkan::texture_create(const TextureFormat &p_format, const T
image_create_info.usage |= VK_IMAGE_USAGE_SAMPLED_BIT;
}
if (p_format.usage_bits & TEXTURE_USAGE_STORAGE_BIT) {
image_create_info.usage |= VK_IMAGE_USAGE_STORAGE_BIT;
}
if (p_format.usage_bits & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT) {
image_create_info.usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
}
@ -1723,39 +1727,41 @@ RID RenderingDeviceVulkan::texture_create(const TextureFormat &p_format, const T
texture.samples = p_format.samples;
texture.allowed_shared_formats = p_format.shareable_formats;
//set bound and unbound layouts
//set base layout based on usage priority
if (p_format.usage_bits & TEXTURE_USAGE_SAMPLING_BIT) {
//first priority, readable
texture.layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
} else if (p_format.usage_bits & TEXTURE_USAGE_STORAGE_BIT) {
//second priority, storage
texture.layout = VK_IMAGE_LAYOUT_GENERAL;
} else if (p_format.usage_bits & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT) {
//third priority, color or depth
texture.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
} else if (p_format.usage_bits & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) {
texture.layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
} else {
texture.layout = VK_IMAGE_LAYOUT_GENERAL;
}
if (p_format.usage_bits & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) {
texture.read_aspect_mask = VK_IMAGE_ASPECT_DEPTH_BIT;
texture.barrier_aspect_mask = texture.read_aspect_mask;
texture.barrier_aspect_mask = VK_IMAGE_ASPECT_DEPTH_BIT;
if (format_has_stencil(p_format.format)) {
texture.barrier_aspect_mask |= VK_IMAGE_ASPECT_STENCIL_BIT;
}
if (p_format.usage_bits & TEXTURE_USAGE_SAMPLING_BIT) {
texture.unbound_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
} else {
texture.unbound_layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
}
texture.bound_layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
} else if (p_format.usage_bits & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT) {
texture.read_aspect_mask = VK_IMAGE_ASPECT_COLOR_BIT;
texture.barrier_aspect_mask = texture.read_aspect_mask;
if (p_format.usage_bits & TEXTURE_USAGE_SAMPLING_BIT) {
texture.unbound_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
} else {
texture.unbound_layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
}
texture.bound_layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
} else {
texture.read_aspect_mask = VK_IMAGE_ASPECT_COLOR_BIT;
texture.barrier_aspect_mask = texture.read_aspect_mask;
texture.unbound_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
texture.bound_layout = VK_IMAGE_LAYOUT_UNDEFINED; //will never be bound
texture.barrier_aspect_mask = VK_IMAGE_ASPECT_COLOR_BIT;
}
texture.bound = false;
@ -1825,7 +1831,7 @@ RID RenderingDeviceVulkan::texture_create(const TextureFormat &p_format, const T
image_memory_barrier.srcAccessMask = 0;
image_memory_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
image_memory_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
image_memory_barrier.newLayout = texture.unbound_layout;
image_memory_barrier.newLayout = texture.layout;
image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.image = texture.image;
@ -1835,7 +1841,7 @@ RID RenderingDeviceVulkan::texture_create(const TextureFormat &p_format, const T
image_memory_barrier.subresourceRange.baseArrayLayer = 0;
image_memory_barrier.subresourceRange.layerCount = image_create_info.arrayLayers;
vkCmdPipelineBarrier(frames[frame].setup_command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, 0, NULL, 0, NULL, 1, &image_memory_barrier);
vkCmdPipelineBarrier(frames[frame].setup_command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, NULL, 0, NULL, 1, &image_memory_barrier);
}
RID id = texture_owner.make_rid(texture);
@ -2081,7 +2087,7 @@ Error RenderingDeviceVulkan::texture_update(RID p_texture, uint32_t p_layer, con
image_memory_barrier.pNext = NULL;
image_memory_barrier.srcAccessMask = 0;
image_memory_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
image_memory_barrier.oldLayout = texture->unbound_layout;
image_memory_barrier.oldLayout = texture->layout;
image_memory_barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
@ -2221,7 +2227,7 @@ Error RenderingDeviceVulkan::texture_update(RID p_texture, uint32_t p_layer, con
image_memory_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
image_memory_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
image_memory_barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
image_memory_barrier.newLayout = texture->unbound_layout;
image_memory_barrier.newLayout = texture->layout;
image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.image = texture->image;
@ -2231,7 +2237,7 @@ Error RenderingDeviceVulkan::texture_update(RID p_texture, uint32_t p_layer, con
image_memory_barrier.subresourceRange.baseArrayLayer = p_layer;
image_memory_barrier.subresourceRange.layerCount = 1;
vkCmdPipelineBarrier(command_buffer, VK_ACCESS_TRANSFER_WRITE_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, 0, NULL, 0, NULL, 1, &image_memory_barrier);
vkCmdPipelineBarrier(command_buffer, VK_ACCESS_TRANSFER_WRITE_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, NULL, 0, NULL, 1, &image_memory_barrier);
}
return OK;
@ -2368,7 +2374,7 @@ PoolVector<uint8_t> RenderingDeviceVulkan::texture_get_data(RID p_texture, uint3
image_memory_barrier.pNext = NULL;
image_memory_barrier.srcAccessMask = 0;
image_memory_barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
image_memory_barrier.oldLayout = tex->unbound_layout;
image_memory_barrier.oldLayout = tex->layout;
image_memory_barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
@ -2446,7 +2452,7 @@ PoolVector<uint8_t> RenderingDeviceVulkan::texture_get_data(RID p_texture, uint3
image_memory_barrier.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
image_memory_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
image_memory_barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
image_memory_barrier.newLayout = tex->unbound_layout;
image_memory_barrier.newLayout = tex->layout;
image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.image = tex->image;
@ -2456,7 +2462,7 @@ PoolVector<uint8_t> RenderingDeviceVulkan::texture_get_data(RID p_texture, uint3
image_memory_barrier.subresourceRange.baseArrayLayer = p_layer;
image_memory_barrier.subresourceRange.layerCount = 1;
vkCmdPipelineBarrier(command_buffer, VK_ACCESS_TRANSFER_WRITE_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, 0, NULL, 0, NULL, 1, &image_memory_barrier);
vkCmdPipelineBarrier(command_buffer, VK_ACCESS_TRANSFER_WRITE_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, NULL, 0, NULL, 1, &image_memory_barrier);
}
{ //make dst readable
@ -2559,7 +2565,7 @@ Error RenderingDeviceVulkan::texture_copy(RID p_from_texture, RID p_to_texture,
image_memory_barrier.pNext = NULL;
image_memory_barrier.srcAccessMask = 0;
image_memory_barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
image_memory_barrier.oldLayout = src_tex->unbound_layout;
image_memory_barrier.oldLayout = src_tex->layout;
image_memory_barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
@ -2579,7 +2585,7 @@ Error RenderingDeviceVulkan::texture_copy(RID p_from_texture, RID p_to_texture,
image_memory_barrier.pNext = NULL;
image_memory_barrier.srcAccessMask = 0;
image_memory_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
image_memory_barrier.oldLayout = dst_tex->unbound_layout;
image_memory_barrier.oldLayout = dst_tex->layout;
image_memory_barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
@ -2631,7 +2637,7 @@ Error RenderingDeviceVulkan::texture_copy(RID p_from_texture, RID p_to_texture,
image_memory_barrier.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
image_memory_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
image_memory_barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
image_memory_barrier.newLayout = src_tex->unbound_layout;
image_memory_barrier.newLayout = src_tex->layout;
image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.image = src_tex->image;
@ -2641,7 +2647,7 @@ Error RenderingDeviceVulkan::texture_copy(RID p_from_texture, RID p_to_texture,
image_memory_barrier.subresourceRange.baseArrayLayer = p_src_layer;
image_memory_barrier.subresourceRange.layerCount = 1;
vkCmdPipelineBarrier(command_buffer, VK_ACCESS_TRANSFER_WRITE_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, 0, NULL, 0, NULL, 1, &image_memory_barrier);
vkCmdPipelineBarrier(command_buffer, VK_ACCESS_TRANSFER_WRITE_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, NULL, 0, NULL, 1, &image_memory_barrier);
}
{ //make dst readable
@ -2652,7 +2658,7 @@ Error RenderingDeviceVulkan::texture_copy(RID p_from_texture, RID p_to_texture,
image_memory_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
image_memory_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
image_memory_barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
image_memory_barrier.newLayout = dst_tex->unbound_layout;
image_memory_barrier.newLayout = dst_tex->layout;
image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
@ -2663,7 +2669,7 @@ Error RenderingDeviceVulkan::texture_copy(RID p_from_texture, RID p_to_texture,
image_memory_barrier.subresourceRange.baseArrayLayer = p_src_layer;
image_memory_barrier.subresourceRange.layerCount = 1;
vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, 0, NULL, 0, NULL, 1, &image_memory_barrier);
vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, NULL, 0, NULL, 1, &image_memory_barrier);
}
}
@ -2733,7 +2739,8 @@ VkRenderPass RenderingDeviceVulkan::_render_pass_create(const Vector<AttachmentF
ERR_FAIL_COND_V_MSG(!(p_format[i].usage_flags & (TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | TEXTURE_USAGE_RESOLVE_ATTACHMENT_BIT)), VK_NULL_HANDLE,
"Texture format for index (" + itos(i) + ") requires an attachment (depth, stencil or resolve) bit set.");
bool can_be_sampled = p_format[i].usage_flags & TEXTURE_USAGE_SAMPLING_BIT;
bool is_sampled = p_format[i].usage_flags & TEXTURE_USAGE_SAMPLING_BIT;
bool is_storage = p_format[i].usage_flags & TEXTURE_USAGE_STORAGE_BIT;
switch (p_initial_action) {
@ -2745,7 +2752,7 @@ VkRenderPass RenderingDeviceVulkan::_render_pass_create(const Vector<AttachmentF
case INITIAL_ACTION_KEEP_COLOR: {
if (p_format[i].usage_flags & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT) {
description.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
description.initialLayout = can_be_sampled ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
description.initialLayout = is_sampled ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : (is_storage ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
} else if (p_format[i].usage_flags & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) {
description.loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR;
@ -2761,10 +2768,10 @@ VkRenderPass RenderingDeviceVulkan::_render_pass_create(const Vector<AttachmentF
if (p_format[i].usage_flags & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT) {
description.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
description.initialLayout = can_be_sampled ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
description.initialLayout = is_sampled ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : (is_storage ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
} else if (p_format[i].usage_flags & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) {
description.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
description.initialLayout = can_be_sampled ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; //don't care what is there
description.initialLayout = is_sampled ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : (is_storage ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL);
description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_CLEAR;
} else {
description.loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
@ -2799,12 +2806,12 @@ VkRenderPass RenderingDeviceVulkan::_render_pass_create(const Vector<AttachmentF
if (p_format[i].usage_flags & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT) {
description.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
description.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
description.finalLayout = can_be_sampled ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
description.finalLayout = is_sampled ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : (is_storage ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
} else if (p_format[i].usage_flags & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) {
description.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
description.stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE;
description.finalLayout = can_be_sampled ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
description.finalLayout = is_sampled ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : (is_storage ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL);
} else {
description.loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
@ -2815,12 +2822,12 @@ VkRenderPass RenderingDeviceVulkan::_render_pass_create(const Vector<AttachmentF
if (p_format[i].usage_flags & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT) {
description.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
description.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
description.finalLayout = can_be_sampled ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
description.finalLayout = is_sampled ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : (is_storage ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
} else if (p_format[i].usage_flags & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) {
description.storeOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
description.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
description.finalLayout = can_be_sampled ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
description.finalLayout = is_sampled ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : (is_storage ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL);
} else {
description.loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
@ -2831,12 +2838,12 @@ VkRenderPass RenderingDeviceVulkan::_render_pass_create(const Vector<AttachmentF
if (p_format[i].usage_flags & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT) {
description.storeOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
description.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
description.finalLayout = can_be_sampled ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
description.finalLayout = is_sampled ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : (is_storage ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
} else if (p_format[i].usage_flags & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) {
description.storeOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
description.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
description.finalLayout = can_be_sampled ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
description.finalLayout = is_sampled ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : (is_storage ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL);
} else {
description.loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
@ -3531,8 +3538,15 @@ RID RenderingDeviceVulkan::shader_create(const Vector<ShaderStageData> &p_stages
uint32_t stages_processed = 0;
bool is_compute = false;
for (int i = 0; i < p_stages.size(); i++) {
if (p_stages[i].shader_stage == SHADER_STAGE_COMPUTE) {
is_compute = true;
ERR_FAIL_COND_V_MSG(p_stages.size() != 1, RID(),
"Compute shaders can only receive one stage, dedicated to compute.");
}
ERR_FAIL_COND_V_MSG(stages_processed & (1 << p_stages[i].shader_stage), RID(),
"Stage " + String(shader_stage_names[p_stages[i].shader_stage]) + " submitted more than once.");
@ -3793,6 +3807,7 @@ RID RenderingDeviceVulkan::shader_create(const Vector<ShaderStageData> &p_stages
shader.vertex_input_mask = vertex_input_mask;
shader.fragment_outputs = fragment_outputs;
shader.push_constant = push_constant;
shader.is_compute = is_compute;
String error_text;
@ -4166,6 +4181,8 @@ RID RenderingDeviceVulkan::uniform_set_create(const Vector<Uniform> &p_uniforms,
List<Vector<VkDescriptorImageInfo> > image_infos;
//used for verification to make sure a uniform set does not use a framebuffer bound texture
Vector<RID> attachable_textures;
Vector<Texture *> mutable_sampled_textures;
Vector<Texture *> mutable_storage_textures;
for (uint32_t i = 0; i < set_uniform_count; i++) {
const UniformInfo &set_uniform = set_uniforms[i];
@ -4259,9 +4276,14 @@ RID RenderingDeviceVulkan::uniform_set_create(const Vector<Uniform> &p_uniforms,
ERR_FAIL_COND_V(!texture, RID()); //bug, should never happen
}
img_info.imageLayout = texture->unbound_layout;
img_info.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
image_info.push_back(img_info);
if (texture->usage_flags & TEXTURE_USAGE_STORAGE_BIT) {
//can also be used as storage, add to mutable sampled
mutable_sampled_textures.push_back(texture);
}
}
write.dstArrayElement = 0;
@ -4306,9 +4328,14 @@ RID RenderingDeviceVulkan::uniform_set_create(const Vector<Uniform> &p_uniforms,
ERR_FAIL_COND_V(!texture, RID()); //bug, should never happen
}
img_info.imageLayout = texture->unbound_layout;
img_info.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
image_info.push_back(img_info);
if (texture->usage_flags & TEXTURE_USAGE_STORAGE_BIT) {
//can also be used as storage, add to mutable sampled
mutable_sampled_textures.push_back(texture);
}
}
write.dstArrayElement = 0;
@ -4321,7 +4348,54 @@ RID RenderingDeviceVulkan::uniform_set_create(const Vector<Uniform> &p_uniforms,
type_size = uniform.ids.size();
} break;
case UNIFORM_TYPE_IMAGE: {
//todo
if (uniform.ids.size() != set_uniform.length) {
if (set_uniform.length > 1) {
ERR_FAIL_V_MSG(RID(), "Image (binding: " + itos(uniform.binding) + ") is an array of (" + itos(set_uniform.length) + ") textures, so it should be provided equal number of texture IDs to satisfy it (IDs provided: " + itos(uniform.ids.size()) + ").");
} else {
ERR_FAIL_V_MSG(RID(), "Image (binding: " + itos(uniform.binding) + ") should provide one ID referencing a texture (IDs provided: " + itos(uniform.ids.size()) + ").");
}
}
Vector<VkDescriptorImageInfo> image_info;
for (int j = 0; j < uniform.ids.size(); j++) {
Texture *texture = texture_owner.getornull(uniform.ids[j]);
ERR_FAIL_COND_V_MSG(!texture, RID(),
"Image (binding: " + itos(uniform.binding) + ", index " + itos(j) + ") is not a valid texture.");
ERR_FAIL_COND_V_MSG(!(texture->usage_flags & TEXTURE_USAGE_STORAGE_BIT), RID(),
"Image (binding: " + itos(uniform.binding) + ", index " + itos(j) + ") needs the TEXTURE_USAGE_STORAGE_BIT usage flag set in order to be used as uniform.");
VkDescriptorImageInfo img_info;
img_info.sampler = NULL;
img_info.imageView = texture->view;
if (texture->owner.is_valid()) {
texture = texture_owner.getornull(texture->owner);
ERR_FAIL_COND_V(!texture, RID()); //bug, should never happen
}
img_info.imageLayout = VK_IMAGE_LAYOUT_GENERAL;
image_info.push_back(img_info);
if (texture->usage_flags & TEXTURE_USAGE_SAMPLING_BIT) {
//can also be used as storage, add to mutable sampled
mutable_storage_textures.push_back(texture);
}
}
write.dstArrayElement = 0;
write.descriptorCount = uniform.ids.size();
write.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
write.pImageInfo = image_infos.push_back(image_info)->get().ptr();
write.pBufferInfo = NULL;
write.pTexelBufferView = NULL;
type_size = uniform.ids.size();
} break;
case UNIFORM_TYPE_TEXTURE_BUFFER: {
if (uniform.ids.size() != set_uniform.length) {
@ -4476,6 +4550,8 @@ RID RenderingDeviceVulkan::uniform_set_create(const Vector<Uniform> &p_uniforms,
uniform_set.descriptor_set = descriptor_set;
uniform_set.format = shader->set_formats[p_shader_set];
uniform_set.attachable_textures = attachable_textures;
uniform_set.mutable_sampled_textures = mutable_sampled_textures;
uniform_set.mutable_storage_textures = mutable_storage_textures;
uniform_set.shader_set = p_shader_set;
uniform_set.shader_id = p_shader;
@ -4651,6 +4727,9 @@ RID RenderingDeviceVulkan::render_pipeline_create(RID p_shader, FramebufferForma
Shader *shader = shader_owner.getornull(p_shader);
ERR_FAIL_COND_V(!shader, RID());
ERR_FAIL_COND_V_MSG(shader->is_compute, RID(),
"Compute shaders can't be used in render pipelines");
if (p_framebuffer_format == INVALID_ID) {
//if nothing provided, use an empty one (no attachments)
p_framebuffer_format = framebuffer_format_create(Vector<AttachmentFormat>());
@ -4996,7 +5075,7 @@ RID RenderingDeviceVulkan::render_pipeline_create(RID p_shader, FramebufferForma
pipeline.validation.primitive_minimum = primitive_minimum[p_render_primitive];
#endif
//create ID to associate with this pipeline
RID id = pipeline_owner.make_rid(pipeline);
RID id = render_pipeline_owner.make_rid(pipeline);
//now add aall the dependencies
_add_dependency(id, p_shader);
return id;
@ -5004,7 +5083,55 @@ RID RenderingDeviceVulkan::render_pipeline_create(RID p_shader, FramebufferForma
bool RenderingDeviceVulkan::render_pipeline_is_valid(RID p_pipeline) {
_THREAD_SAFE_METHOD_
return pipeline_owner.owns(p_pipeline);
return render_pipeline_owner.owns(p_pipeline);
}
/**************************/
/**** COMPUTE PIPELINE ****/
/**************************/
RID RenderingDeviceVulkan::compute_pipeline_create(RID p_shader) {
_THREAD_SAFE_METHOD_
//needs a shader
Shader *shader = shader_owner.getornull(p_shader);
ERR_FAIL_COND_V(!shader, RID());
ERR_FAIL_COND_V_MSG(!shader->is_compute, RID(),
"Non-compute shaders can't be used in compute pipelines");
//finally, pipeline create info
VkComputePipelineCreateInfo compute_pipeline_create_info;
compute_pipeline_create_info.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO;
compute_pipeline_create_info.pNext = NULL;
compute_pipeline_create_info.flags = 0;
compute_pipeline_create_info.stage = shader->pipeline_stages[0];
compute_pipeline_create_info.layout = shader->pipeline_layout;
compute_pipeline_create_info.basePipelineHandle = NULL;
compute_pipeline_create_info.basePipelineIndex = 0;
ComputePipeline pipeline;
VkResult err = vkCreateComputePipelines(device, NULL, 1, &compute_pipeline_create_info, NULL, &pipeline.pipeline);
ERR_FAIL_COND_V(err, RID());
pipeline.set_formats = shader->set_formats;
pipeline.push_constant_stages = shader->push_constant.push_constants_vk_stage;
pipeline.pipeline_layout = shader->pipeline_layout;
pipeline.shader = p_shader;
pipeline.push_constant_size = shader->push_constant.push_constant_size;
//create ID to associate with this pipeline
RID id = compute_pipeline_owner.make_rid(pipeline);
//now add aall the dependencies
_add_dependency(id, p_shader);
return id;
}
bool RenderingDeviceVulkan::compute_pipeline_is_valid(RID p_pipeline) {
return compute_pipeline_owner.owns(p_pipeline);
}
/****************/
@ -5055,6 +5182,8 @@ RenderingDevice::DrawListID RenderingDeviceVulkan::draw_list_begin_for_screen(in
_THREAD_SAFE_METHOD_
ERR_FAIL_COND_V_MSG(draw_list != NULL, INVALID_ID, "Only one draw list can be active at the same time.");
ERR_FAIL_COND_V_MSG(compute_list != NULL, INVALID_ID, "Only one draw/compute list can be active at the same time.");
VkCommandBuffer command_buffer = frames[frame].draw_command_buffer;
draw_list = memnew(DrawList);
draw_list->command_buffer = command_buffer;
@ -5256,6 +5385,9 @@ RenderingDevice::DrawListID RenderingDeviceVulkan::draw_list_begin(RID p_framebu
_THREAD_SAFE_METHOD_
ERR_FAIL_COND_V_MSG(draw_list != NULL, INVALID_ID, "Only one draw list can be active at the same time.");
ERR_FAIL_COND_V_MSG(compute_list != NULL, INVALID_ID, "Only one draw/compute list can be active at the same time.");
Framebuffer *framebuffer = framebuffer_owner.getornull(p_framebuffer);
ERR_FAIL_COND_V(!framebuffer, INVALID_ID);
@ -5530,7 +5662,7 @@ void RenderingDeviceVulkan::draw_list_bind_render_pipeline(DrawListID p_list, RI
ERR_FAIL_COND_MSG(!dl->validation.active, "Submitted Draw Lists can no longer be modified.");
#endif
const RenderPipeline *pipeline = pipeline_owner.getornull(p_render_pipeline);
const RenderPipeline *pipeline = render_pipeline_owner.getornull(p_render_pipeline);
ERR_FAIL_COND(!pipeline);
#ifdef DEBUG_ENABLED
ERR_FAIL_COND(pipeline->validation.framebuffer_format != dl->validation.framebuffer_format);
@ -5887,6 +6019,284 @@ void RenderingDeviceVulkan::draw_list_end() {
// * Another render pass happens (since we may be done
_memory_barrier(VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_ACCESS_INDEX_READ_BIT | VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_TRANSFER_READ_BIT, true);
}
/***********************/
/**** COMPUTE LISTS ****/
/***********************/
RenderingDevice::ComputeListID RenderingDeviceVulkan::compute_list_begin() {
ERR_FAIL_COND_V_MSG(draw_list != NULL, INVALID_ID, "Only one draw list can be active at the same time.");
ERR_FAIL_COND_V_MSG(compute_list != NULL, INVALID_ID, "Only one draw/compute list can be active at the same time.");
compute_list = memnew(ComputeList);
compute_list->command_buffer = frames[frame].draw_command_buffer;
return ID_TYPE_COMPUTE_LIST;
}
void RenderingDeviceVulkan::compute_list_bind_compute_pipeline(ComputeListID p_list, RID p_compute_pipeline) {
ERR_FAIL_COND(p_list != ID_TYPE_COMPUTE_LIST);
ERR_FAIL_COND(!compute_list);
ComputeList *cl = compute_list;
const ComputePipeline *pipeline = compute_pipeline_owner.getornull(p_compute_pipeline);
ERR_FAIL_COND(!pipeline);
if (p_compute_pipeline == cl->state.pipeline) {
return; //redundant state, return.
}
cl->state.pipeline = p_compute_pipeline;
cl->state.pipeline_layout = pipeline->pipeline_layout;
vkCmdBindPipeline(cl->command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline->pipeline);
if (cl->state.pipeline_shader != pipeline->shader) {
// shader changed, so descriptor sets may become incompatible.
//go through ALL sets, and unbind them (and all those above) if the format is different
uint32_t pcount = pipeline->set_formats.size(); //formats count in this pipeline
cl->state.set_count = MAX(cl->state.set_count, pcount);
const uint32_t *pformats = pipeline->set_formats.ptr(); //pipeline set formats
bool sets_valid = true; //once invalid, all above become invalid
for (uint32_t i = 0; i < pcount; i++) {
//if a part of the format is different, invalidate it (and the rest)
if (!sets_valid || cl->state.sets[i].pipeline_expected_format != pformats[i]) {
cl->state.sets[i].bound = false;
cl->state.sets[i].pipeline_expected_format = pformats[i];
sets_valid = false;
}
}
for (uint32_t i = pcount; i < cl->state.set_count; i++) {
//unbind the ones above (not used) if exist
cl->state.sets[i].bound = false;
}
cl->state.set_count = pcount; //update set count
if (pipeline->push_constant_size) {
cl->state.pipeline_push_constant_stages = pipeline->push_constant_stages;
#ifdef DEBUG_ENABLED
cl->validation.pipeline_push_constant_suppplied = false;
#endif
}
}
#ifdef DEBUG_ENABLED
//update compute pass pipeline info
cl->validation.pipeline_active = true;
cl->validation.pipeline_push_constant_size = pipeline->push_constant_size;
#endif
}
void RenderingDeviceVulkan::compute_list_bind_uniform_set(ComputeListID p_list, RID p_uniform_set, uint32_t p_index) {
ERR_FAIL_COND(p_list != ID_TYPE_COMPUTE_LIST);
ERR_FAIL_COND(!compute_list);
ComputeList *cl = compute_list;
#ifdef DEBUG_ENABLED
ERR_FAIL_COND_MSG(p_index >= limits.maxBoundDescriptorSets || p_index > MAX_UNIFORM_SETS,
"Attempting to bind a descriptor set (" + itos(p_index) + ") greater than what the hardware supports (" + itos(limits.maxBoundDescriptorSets) + ").");
#endif
#ifdef DEBUG_ENABLED
ERR_FAIL_COND_MSG(!cl->validation.active, "Submitted Compute Lists can no longer be modified.");
#endif
UniformSet *uniform_set = uniform_set_owner.getornull(p_uniform_set);
ERR_FAIL_COND(!uniform_set);
if (p_index > cl->state.set_count) {
cl->state.set_count = p_index;
}
cl->state.sets[p_index].descriptor_set = uniform_set->descriptor_set; //update set pointer
cl->state.sets[p_index].bound = false; //needs rebind
cl->state.sets[p_index].uniform_set_format = uniform_set->format;
cl->state.sets[p_index].uniform_set = p_uniform_set;
uint32_t textures_to_sampled_count = uniform_set->mutable_sampled_textures.size();
Texture **textures_to_sampled = uniform_set->mutable_sampled_textures.ptrw();
for (uint32_t i = 0; i < textures_to_sampled_count; i++) {
if (textures_to_sampled[i]->layout != VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL) {
VkImageMemoryBarrier image_memory_barrier;
image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
image_memory_barrier.pNext = NULL;
image_memory_barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
image_memory_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
image_memory_barrier.oldLayout = textures_to_sampled[i]->layout;
image_memory_barrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.image = textures_to_sampled[i]->image;
image_memory_barrier.subresourceRange.aspectMask = textures_to_sampled[i]->read_aspect_mask;
image_memory_barrier.subresourceRange.baseMipLevel = 0;
image_memory_barrier.subresourceRange.levelCount = textures_to_sampled[i]->mipmaps;
image_memory_barrier.subresourceRange.baseArrayLayer = 0;
image_memory_barrier.subresourceRange.layerCount = textures_to_sampled[i]->layers;
vkCmdPipelineBarrier(cl->command_buffer, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, NULL, 0, NULL, 1, &image_memory_barrier);
textures_to_sampled[i]->layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
cl->state.textures_to_sampled_layout.erase(textures_to_sampled[i]);
}
}
uint32_t textures_to_storage_count = uniform_set->mutable_storage_textures.size();
Texture **textures_to_storage = uniform_set->mutable_storage_textures.ptrw();
for (uint32_t i = 0; i < textures_to_storage_count; i++) {
if (textures_to_storage[i]->layout != VK_IMAGE_LAYOUT_GENERAL) {
VkImageMemoryBarrier image_memory_barrier;
image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
image_memory_barrier.pNext = NULL;
image_memory_barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
image_memory_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
image_memory_barrier.oldLayout = textures_to_storage[i]->layout;
image_memory_barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL;
image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.image = textures_to_storage[i]->image;
image_memory_barrier.subresourceRange.aspectMask = textures_to_sampled[i]->read_aspect_mask;
image_memory_barrier.subresourceRange.baseMipLevel = 0;
image_memory_barrier.subresourceRange.levelCount = textures_to_storage[i]->mipmaps;
image_memory_barrier.subresourceRange.baseArrayLayer = 0;
image_memory_barrier.subresourceRange.layerCount = textures_to_storage[i]->layers;
vkCmdPipelineBarrier(cl->command_buffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, NULL, 0, NULL, 1, &image_memory_barrier);
textures_to_storage[i]->layout = VK_IMAGE_LAYOUT_GENERAL;
cl->state.textures_to_sampled_layout.insert(textures_to_storage[i]); //needs to go back to sampled layout afterwards
}
}
#if 0
{ //validate that textures bound are not attached as framebuffer bindings
uint32_t attachable_count = uniform_set->attachable_textures.size();
const RID *attachable_ptr = uniform_set->attachable_textures.ptr();
uint32_t bound_count = draw_list_bound_textures.size();
const RID *bound_ptr = draw_list_bound_textures.ptr();
for (uint32_t i = 0; i < attachable_count; i++) {
for (uint32_t j = 0; j < bound_count; j++) {
ERR_FAIL_COND_MSG(attachable_ptr[i] == bound_ptr[j],
"Attempted to use the same texture in framebuffer attachment and a uniform set, this is not allowed.");
}
}
}
#endif
}
void RenderingDeviceVulkan::compute_list_set_push_constant(ComputeListID p_list, void *p_data, uint32_t p_data_size) {
ERR_FAIL_COND(p_list != ID_TYPE_COMPUTE_LIST);
ERR_FAIL_COND(!compute_list);
ComputeList *cl = compute_list;
#ifdef DEBUG_ENABLED
ERR_FAIL_COND_MSG(!cl->validation.active, "Submitted Compute Lists can no longer be modified.");
#endif
#ifdef DEBUG_ENABLED
ERR_FAIL_COND_MSG(p_data_size != cl->validation.pipeline_push_constant_size,
"This compute pipeline requires (" + itos(cl->validation.pipeline_push_constant_size) + ") bytes of push constant data, supplied: (" + itos(p_data_size) + ")");
#endif
vkCmdPushConstants(cl->command_buffer, cl->state.pipeline_layout, cl->state.pipeline_push_constant_stages, 0, p_data_size, p_data);
#ifdef DEBUG_ENABLED
cl->validation.pipeline_push_constant_suppplied = true;
#endif
}
void RenderingDeviceVulkan::compute_list_dispatch(ComputeListID p_list, uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups) {
ERR_FAIL_COND(p_list != ID_TYPE_COMPUTE_LIST);
ERR_FAIL_COND(!compute_list);
ComputeList *cl = compute_list;
#ifdef DEBUG_ENABLED
ERR_FAIL_COND_MSG(!cl->validation.active, "Submitted Compute Lists can no longer be modified.");
#endif
#ifdef DEBUG_ENABLED
ERR_FAIL_COND_MSG(!cl->validation.pipeline_active, "No compute pipeline was set before attempting to draw.");
if (cl->validation.pipeline_push_constant_size > 0) {
//using push constants, check that they were supplied
ERR_FAIL_COND_MSG(!cl->validation.pipeline_push_constant_suppplied,
"The shader in this pipeline requires a push constant to be set before drawing, but it's not present.");
}
#endif
//Bind descriptor sets
for (uint32_t i = 0; i < cl->state.set_count; i++) {
if (cl->state.sets[i].pipeline_expected_format == 0) {
continue; //nothing expected by this pipeline
}
#ifdef DEBUG_ENABLED
if (cl->state.sets[i].pipeline_expected_format != cl->state.sets[i].uniform_set_format) {
if (cl->state.sets[i].uniform_set_format == 0) {
ERR_FAIL_MSG("Uniforms were never supplied for set (" + itos(i) + ") at the time of drawing, which are required by the pipeline");
} else if (uniform_set_owner.owns(cl->state.sets[i].uniform_set)) {
UniformSet *us = uniform_set_owner.getornull(cl->state.sets[i].uniform_set);
ERR_FAIL_MSG("Uniforms supplied for set (" + itos(i) + "):\n" + _shader_uniform_debug(us->shader_id, us->shader_set) + "\nare not the same format as required by the pipeline shader. Pipeline shader requires the following bindings:\n" + _shader_uniform_debug(cl->state.pipeline_shader));
} else {
ERR_FAIL_MSG("Uniforms supplied for set (" + itos(i) + ", which was was just freed) are not the same format as required by the pipeline shader. Pipeline shader requires the following bindings:\n" + _shader_uniform_debug(cl->state.pipeline_shader));
}
}
#endif
if (!cl->state.sets[i].bound) {
//All good, see if this requires re-binding
vkCmdBindDescriptorSets(cl->command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, cl->state.pipeline_layout, i, 1, &cl->state.sets[i].descriptor_set, 0, NULL);
cl->state.sets[i].bound = true;
}
}
vkCmdDispatch(cl->command_buffer, p_x_groups, p_y_groups, p_z_groups);
}
void RenderingDeviceVulkan::compute_list_end() {
ERR_FAIL_COND(!compute_list);
for (Set<Texture *>::Element *E = compute_list->state.textures_to_sampled_layout.front(); E; E = E->next()) {
VkImageMemoryBarrier image_memory_barrier;
image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
image_memory_barrier.pNext = NULL;
image_memory_barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
image_memory_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
image_memory_barrier.oldLayout = E->get()->layout;
image_memory_barrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.image = E->get()->image;
image_memory_barrier.subresourceRange.aspectMask = E->get()->read_aspect_mask;
image_memory_barrier.subresourceRange.baseMipLevel = 0;
image_memory_barrier.subresourceRange.levelCount = E->get()->mipmaps;
image_memory_barrier.subresourceRange.baseArrayLayer = 0;
image_memory_barrier.subresourceRange.layerCount = E->get()->layers;
vkCmdPipelineBarrier(compute_list->command_buffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, NULL, 0, NULL, 1, &image_memory_barrier);
E->get()->layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
}
memdelete(compute_list);
}
#if 0
void RenderingDeviceVulkan::draw_list_render_secondary_to_framebuffer(ID p_framebuffer, ID *p_draw_lists, uint32_t p_draw_list_count, InitialAction p_initial_action, FinalAction p_final_action, const Vector<Variant> &p_clear_colors) {
@ -5998,10 +6408,14 @@ void RenderingDeviceVulkan::_free_internal(RID p_id) {
UniformSet *uniform_set = uniform_set_owner.getornull(p_id);
frames[frame].uniform_sets_to_dispose_of.push_back(*uniform_set);
uniform_set_owner.free(p_id);
} else if (pipeline_owner.owns(p_id)) {
RenderPipeline *pipeline = pipeline_owner.getornull(p_id);
frames[frame].pipelines_to_dispose_of.push_back(*pipeline);
pipeline_owner.free(p_id);
} else if (render_pipeline_owner.owns(p_id)) {
RenderPipeline *pipeline = render_pipeline_owner.getornull(p_id);
frames[frame].render_pipelines_to_dispose_of.push_back(*pipeline);
render_pipeline_owner.free(p_id);
} else if (compute_pipeline_owner.owns(p_id)) {
ComputePipeline *pipeline = compute_pipeline_owner.getornull(p_id);
frames[frame].compute_pipelines_to_dispose_of.push_back(*pipeline);
compute_pipeline_owner.free(p_id);
} else {
ERR_PRINT("Attempted to free invalid ID: " + itos(p_id.get_id()));
}
@ -6022,6 +6436,10 @@ void RenderingDeviceVulkan::finalize_frame() {
ERR_PRINT("Found open draw list at the end of the frame, this should never happen (further drawing will likely not work).");
}
if (compute_list) {
ERR_PRINT("Found open compute list at the end of the frame, this should never happen (further compute will likely not work).");
}
{ //complete the setup buffer (that needs to be processed before anything else)
vkEndCommandBuffer(frames[frame].setup_command_buffer);
vkEndCommandBuffer(frames[frame].draw_command_buffer);
@ -6032,12 +6450,20 @@ void RenderingDeviceVulkan::finalize_frame() {
void RenderingDeviceVulkan::_free_pending_resources(int p_frame) {
//free in dependency usage order, so nothing weird happens
//pipelines
while (frames[p_frame].pipelines_to_dispose_of.front()) {
RenderPipeline *pipeline = &frames[p_frame].pipelines_to_dispose_of.front()->get();
while (frames[p_frame].render_pipelines_to_dispose_of.front()) {
RenderPipeline *pipeline = &frames[p_frame].render_pipelines_to_dispose_of.front()->get();
vkDestroyPipeline(device, pipeline->pipeline, NULL);
frames[p_frame].pipelines_to_dispose_of.pop_front();
frames[p_frame].render_pipelines_to_dispose_of.pop_front();
}
while (frames[p_frame].compute_pipelines_to_dispose_of.front()) {
ComputePipeline *pipeline = &frames[p_frame].compute_pipelines_to_dispose_of.front()->get();
vkDestroyPipeline(device, pipeline->pipeline, NULL);
frames[p_frame].compute_pipelines_to_dispose_of.pop_front();
}
//uniform sets
@ -6344,6 +6770,8 @@ void RenderingDeviceVulkan::initialize(VulkanContext *p_context) {
draw_list = NULL;
draw_list_count = 0;
draw_list_split = false;
compute_list = NULL;
}
template <class T>
@ -6430,7 +6858,8 @@ void RenderingDeviceVulkan::finalize() {
_flush(false);
_free_rids(pipeline_owner, "Pipeline");
_free_rids(render_pipeline_owner, "Pipeline");
_free_rids(compute_pipeline_owner, "Compute");
_free_rids(uniform_set_owner, "UniformSet");
_free_rids(texture_buffer_owner, "TextureBuffer");
_free_rids(storage_buffer_owner, "StorageBuffer");

View file

@ -91,6 +91,7 @@ class RenderingDeviceVulkan : public RenderingDevice {
ID_TYPE_VERTEX_FORMAT,
ID_TYPE_DRAW_LIST,
ID_TYPE_SPLIT_DRAW_LIST,
ID_TYPE_COMPUTE_LIST,
ID_TYPE_MAX,
ID_BASE_SHIFT = 58 //5 bits for ID types
};
@ -138,8 +139,8 @@ class RenderingDeviceVulkan : public RenderingDevice {
Vector<DataFormat> allowed_shared_formats;
VkImageLayout bound_layout; //layout used when bound to framebuffer being drawn
VkImageLayout unbound_layout; //layout used otherwise
VkImageLayout layout;
uint32_t read_aspect_mask;
uint32_t barrier_aspect_mask;
bool bound; //bound to framebffer
@ -286,6 +287,7 @@ class RenderingDeviceVulkan : public RenderingDevice {
}
};
uint32_t storage_mask;
Vector<RID> texture_ids;
struct Version {
@ -519,6 +521,7 @@ class RenderingDeviceVulkan : public RenderingDevice {
PushConstant push_constant;
bool is_compute = false;
int max_output;
Vector<Set> sets;
Vector<uint32_t> set_formats;
@ -620,6 +623,8 @@ class RenderingDeviceVulkan : public RenderingDevice {
VkDescriptorSet descriptor_set;
//VkPipelineLayout pipeline_layout; //not owned, inherited from shader
Vector<RID> attachable_textures; //used for validation
Vector<Texture *> mutable_sampled_textures; //used for layout change
Vector<Texture *> mutable_storage_textures; //used for layout change
};
RID_Owner<UniformSet, true> uniform_set_owner;
@ -660,7 +665,19 @@ class RenderingDeviceVulkan : public RenderingDevice {
uint32_t push_constant_stages;
};
RID_Owner<RenderPipeline, true> pipeline_owner;
RID_Owner<RenderPipeline, true> render_pipeline_owner;
struct ComputePipeline {
RID shader;
Vector<uint32_t> set_formats;
VkPipelineLayout pipeline_layout; // not owned, needed for push constants
VkPipeline pipeline;
uint32_t push_constant_size;
uint32_t push_constant_stages;
};
RID_Owner<ComputePipeline, true> compute_pipeline_owner;
/*******************/
/**** DRAW LIST ****/
@ -796,6 +813,74 @@ class RenderingDeviceVulkan : public RenderingDevice {
Error _draw_list_render_pass_begin(Framebuffer *framebuffer, InitialAction p_initial_action, FinalAction p_final_action, const Vector<Color> &p_clear_colors, Point2i viewport_offset, Point2i viewport_size, VkFramebuffer vkframebuffer, VkRenderPass render_pass, VkCommandBuffer command_buffer, VkSubpassContents subpass_contents);
_FORCE_INLINE_ DrawList *_get_draw_list_ptr(DrawListID p_id);
/**********************/
/**** COMPUTE LIST ****/
/**********************/
struct ComputeList {
VkCommandBuffer command_buffer; //if persistent, this is owned, otherwise it's shared with the ringbuffer
struct SetState {
uint32_t pipeline_expected_format;
uint32_t uniform_set_format;
VkDescriptorSet descriptor_set;
RID uniform_set;
bool bound;
SetState() {
bound = false;
pipeline_expected_format = 0;
uniform_set_format = 0;
descriptor_set = VK_NULL_HANDLE;
}
};
struct State {
Set<Texture *> textures_to_sampled_layout;
SetState sets[MAX_UNIFORM_SETS];
uint32_t set_count;
RID pipeline;
RID pipeline_shader;
VkPipelineLayout pipeline_layout;
uint32_t pipeline_push_constant_stages;
State() {
set_count = 0;
pipeline_layout = VK_NULL_HANDLE;
pipeline_push_constant_stages = 0;
}
} state;
#ifdef DEBUG_ENABLED
struct Validation {
bool active; //means command buffer was not closes, so you can keep adding things
Vector<uint32_t> set_formats;
Vector<bool> set_bound;
Vector<RID> set_rids;
//last pipeline set values
bool pipeline_active;
RID pipeline_shader;
uint32_t invalid_set_from;
Vector<uint32_t> pipeline_set_formats;
uint32_t pipeline_push_constant_size;
bool pipeline_push_constant_suppplied;
Validation() {
active = true;
invalid_set_from = 0;
//pipeline state initalize
pipeline_active = false;
pipeline_push_constant_size = 0;
pipeline_push_constant_suppplied = false;
}
} validation;
#endif
};
ComputeList *compute_list;
/**************************/
/**** FRAME MANAGEMENT ****/
/**************************/
@ -823,7 +908,8 @@ class RenderingDeviceVulkan : public RenderingDevice {
List<Shader> shaders_to_dispose_of;
List<VkBufferView> buffer_views_to_dispose_of;
List<UniformSet> uniform_sets_to_dispose_of;
List<RenderPipeline> pipelines_to_dispose_of;
List<RenderPipeline> render_pipelines_to_dispose_of;
List<ComputePipeline> compute_pipelines_to_dispose_of;
VkCommandPool command_pool;
VkCommandBuffer setup_command_buffer; //used at the begining of every frame for set-up
@ -940,6 +1026,13 @@ public:
virtual RID render_pipeline_create(RID p_shader, FramebufferFormatID p_framebuffer_format, VertexFormatID p_vertex_format, RenderPrimitive p_render_primitive, const PipelineRasterizationState &p_rasterization_state, const PipelineMultisampleState &p_multisample_state, const PipelineDepthStencilState &p_depth_stencil_state, const PipelineColorBlendState &p_blend_state, int p_dynamic_state_flags = 0);
virtual bool render_pipeline_is_valid(RID p_pipeline);
/**************************/
/**** COMPUTE PIPELINE ****/
/**************************/
virtual RID compute_pipeline_create(RID p_shader);
virtual bool compute_pipeline_is_valid(RID p_pipeline);
/****************/
/**** SCREEN ****/
/****************/
@ -970,6 +1063,17 @@ public:
virtual void draw_list_end();
/***********************/
/**** COMPUTE LISTS ****/
/***********************/
virtual ComputeListID compute_list_begin();
virtual void compute_list_bind_compute_pipeline(ComputeListID p_list, RID p_compute_pipeline);
virtual void compute_list_bind_uniform_set(ComputeListID p_list, RID p_uniform_set, uint32_t p_index);
virtual void compute_list_set_push_constant(ComputeListID p_list, void *p_data, uint32_t p_data_size);
virtual void compute_list_dispatch(ComputeListID p_list, uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups);
virtual void compute_list_end();
/**************/
/**** FREE ****/
/**************/

View file

@ -170,8 +170,9 @@ void EditorSpatialGizmo::Instance::create_instance(Spatial *p_base, bool p_hidde
instance = VS::get_singleton()->instance_create2(mesh->get_rid(), p_base->get_world()->get_scenario());
VS::get_singleton()->instance_attach_object_instance_id(instance, p_base->get_instance_id());
if (skin_reference.is_valid())
if (skin_reference.is_valid()) {
VS::get_singleton()->instance_attach_skeleton(instance, skin_reference->get_skeleton());
}
if (extra_margin)
VS::get_singleton()->instance_set_extra_visibility_margin(instance, 1);
VS::get_singleton()->instance_geometry_set_cast_shadows_setting(instance, VS::SHADOW_CASTING_SETTING_OFF);

View file

@ -509,6 +509,7 @@ class RDHeaderStruct:
def __init__(self):
self.vertex_lines = []
self.fragment_lines = []
self.compute_lines = []
self.vertex_included_files = []
self.fragment_included_files = []
@ -517,6 +518,7 @@ class RDHeaderStruct:
self.line_offset = 0
self.vertex_offset = 0
self.fragment_offset = 0
self.compute_offset = 0
def include_file_in_rd_header(filename, header_data, depth):
@ -539,6 +541,13 @@ def include_file_in_rd_header(filename, header_data, depth):
header_data.fragment_offset = header_data.line_offset
continue
if line.find("[compute]") != -1:
header_data.reading = "compute"
line = fs.readline()
header_data.line_offset += 1
header_data.compute_offset = header_data.line_offset
continue
while line.find("#include ") != -1:
includeline = line.replace("#include ", "").strip()[1:-1]
@ -553,6 +562,10 @@ def include_file_in_rd_header(filename, header_data, depth):
header_data.fragment_included_files += [included_file]
if include_file_in_rd_header(included_file, header_data, depth + 1) is None:
print("Error in file '" + filename + "': #include " + includeline + "could not be found!")
elif not included_file in header_data.compute_included_files and header_data.reading == "compute":
header_data.compute_included_files += [included_file]
if include_file_in_rd_header(included_file, header_data, depth + 1) is None:
print("Error in file '" + filename + "': #include " + includeline + "could not be found!")
line = fs.readline()
@ -563,6 +576,8 @@ def include_file_in_rd_header(filename, header_data, depth):
header_data.vertex_lines += [line]
if header_data.reading == "fragment":
header_data.fragment_lines += [line]
if header_data.reading == "compute":
header_data.compute_lines += [line]
line = fs.readline()
header_data.line_offset += 1
@ -572,7 +587,7 @@ def include_file_in_rd_header(filename, header_data, depth):
return header_data
def build_rd_header(filename):
header_data = LegacyGLHeaderStruct()
header_data = RDHeaderStruct()
include_file_in_rd_header(filename, header_data, 0)
out_file = filename + ".gen.h"
@ -598,24 +613,39 @@ def build_rd_header(filename):
fd.write("\t"+out_file_class+"() {\n\n")
if (len(header_data.compute_lines)):
fd.write("\t\tstatic const char _vertex_code[]={\n")
for x in header_data.vertex_lines:
for c in x:
fd.write(str(ord(c)) + ",")
fd.write("\t\tstatic const char _compute_code[]={\n")
for x in header_data.compute_lines:
for c in x:
fd.write(str(ord(c)) + ",")
fd.write(str(ord('\n')) + ",")
fd.write("\t\t0};\n\n")
fd.write(str(ord('\n')) + ",")
fd.write("\t\tstatic const char _fragment_code[]={\n")
for x in header_data.fragment_lines:
for c in x:
fd.write(str(ord(c)) + ",")
fd.write("\t\t0};\n\n")
fd.write("\t\tsetup(nullptr,nullptr,_compute_code,\""+out_file_class+"\");\n")
fd.write("\t}\n")
fd.write(str(ord('\n')) + ",")
fd.write("\t\t0};\n\n")
fd.write("\t\tsetup(_vertex_code,_fragment_code,\""+out_file_class+"\");\n")
fd.write("\t}\n")
else:
fd.write("\t\tstatic const char _vertex_code[]={\n")
for x in header_data.vertex_lines:
for c in x:
fd.write(str(ord(c)) + ",")
fd.write(str(ord('\n')) + ",")
fd.write("\t\t0};\n\n")
fd.write("\t\tstatic const char _fragment_code[]={\n")
for x in header_data.fragment_lines:
for c in x:
fd.write(str(ord(c)) + ",")
fd.write(str(ord('\n')) + ",")
fd.write("\t\t0};\n\n")
fd.write("\t\tsetup(_vertex_code,_fragment_code,nullptr,\""+out_file_class+"\");\n")
fd.write("\t}\n")
fd.write("};\n\n")

View file

@ -830,7 +830,9 @@ Ref<SkinReference> Skeleton::register_skin(const Ref<Skin> &p_skin) {
skin_bindings.insert(skin_ref.operator->());
skin->connect("changed", skin_ref.operator->(), "_skin_changed");
_make_dirty();
_make_dirty(); //skin needs to be updated, so update skeleton
return skin_ref;
}

View file

@ -1469,6 +1469,15 @@ RasterizerSceneRD::RasterizerSceneRD(RasterizerStorageRD *p_storage) {
sky_ggx_samples_realtime = GLOBAL_GET("rendering/quality/reflections/ggx_samples_realtime");
sky_use_cubemap_array = GLOBAL_GET("rendering/quality/reflections/texture_array_reflections");
// sky_use_cubemap_array = false;
{
String defines = "";
Vector<String> versions;
versions.push_back("");
giprobe_lighting_shader.initialize(versions, defines);
giprobe_lighting_shader_version = giprobe_lighting_shader.version_create();
giprobe_lighting_shader_version_shader = giprobe_lighting_shader.version_get_shader(giprobe_lighting_shader_version, 0);
}
}
RasterizerSceneRD::~RasterizerSceneRD() {

View file

@ -4,6 +4,7 @@
#include "core/rid_owner.h"
#include "servers/visual/rasterizer.h"
#include "servers/visual/rasterizer_rd/rasterizer_storage_rd.h"
#include "servers/visual/rasterizer_rd/shaders/giprobe_lighting.glsl.gen.h"
#include "servers/visual/rendering_device.h"
class RasterizerSceneRD : public RasterizerScene {
@ -108,6 +109,12 @@ private:
mutable RID_Owner<ReflectionProbeInstance> reflection_probe_instance_owner;
/* GIPROBE INSTANCE */
GiprobeLightingShaderRD giprobe_lighting_shader;
RID giprobe_lighting_shader_version;
RID giprobe_lighting_shader_version_shader;
/* SHADOW ATLAS */
struct ShadowAtlas {

View file

@ -778,6 +778,8 @@ public:
_FORCE_INLINE_ RID skeleton_get_3d_uniform_set(RID p_skeleton, RID p_shader, uint32_t p_set) const {
Skeleton *skeleton = skeleton_owner.getornull(p_skeleton);
ERR_FAIL_COND_V(!skeleton, RID());
ERR_FAIL_COND_V(skeleton->size == 0, RID());
if (skeleton->use_2d) {
return RID();
}

View file

@ -33,11 +33,11 @@
#include "rasterizer_rd.h"
#include "servers/visual/rendering_device.h"
void ShaderRD::setup(const char *p_vertex_code, const char *p_fragment_code, const char *p_name) {
void ShaderRD::setup(const char *p_vertex_code, const char *p_fragment_code, const char *p_compute_code, const char *p_name) {
name = p_name;
//split vertex and shader code (thank you, shader compiler programmers from you know what company).
{
if (p_vertex_code) {
String defines_tag = "\nVERSION_DEFINES";
String globals_tag = "\nVERTEX_SHADER_GLOBALS";
String material_tag = "\nMATERIAL_UNIFORMS";
@ -79,7 +79,7 @@ void ShaderRD::setup(const char *p_vertex_code, const char *p_fragment_code, con
}
}
{
if (p_fragment_code) {
String defines_tag = "\nVERSION_DEFINES";
String globals_tag = "\nFRAGMENT_SHADER_GLOBALS";
String material_tag = "\nMATERIAL_UNIFORMS";
@ -135,6 +135,50 @@ void ShaderRD::setup(const char *p_vertex_code, const char *p_fragment_code, con
}
}
}
if (p_compute_code) {
is_compute = true;
String defines_tag = "\nVERSION_DEFINES";
String globals_tag = "\nCOMPUTE_SHADER_GLOBALS";
String material_tag = "\nMATERIAL_UNIFORMS";
String code_tag = "\nCOMPUTE_SHADER_CODE";
String code = p_compute_code;
int cpos = code.find(defines_tag);
if (cpos != -1) {
compute_codev = code.substr(0, cpos).ascii();
code = code.substr(cpos + defines_tag.length(), code.length());
}
cpos = code.find(material_tag);
if (cpos == -1) {
compute_code0 = code.ascii();
} else {
compute_code0 = code.substr(0, cpos).ascii();
code = code.substr(cpos + material_tag.length(), code.length());
cpos = code.find(globals_tag);
if (cpos == -1) {
compute_code1 = code.ascii();
} else {
compute_code1 = code.substr(0, cpos).ascii();
String code2 = code.substr(cpos + globals_tag.length(), code.length());
cpos = code2.find(code_tag);
if (cpos == -1) {
compute_code2 = code2.ascii();
} else {
compute_code2 = code2.substr(0, cpos).ascii();
compute_code3 = code2.substr(cpos + code_tag.length(), code2.length()).ascii();
}
}
}
}
}
RID ShaderRD::version_create() {
@ -171,7 +215,7 @@ void ShaderRD::_compile_variant(uint32_t p_variant, Version *p_version) {
RD::ShaderStage current_stage = RD::SHADER_STAGE_VERTEX;
bool build_ok = true;
{
if (!is_compute) {
//vertex stage
StringBuilder builder;
@ -211,7 +255,7 @@ void ShaderRD::_compile_variant(uint32_t p_variant, Version *p_version) {
}
}
if (build_ok) {
if (!is_compute && build_ok) {
//fragment stage
current_stage = RD::SHADER_STAGE_FRAGMENT;
@ -256,9 +300,50 @@ void ShaderRD::_compile_variant(uint32_t p_variant, Version *p_version) {
}
}
if (is_compute) {
//compute stage
current_stage = RD::SHADER_STAGE_COMPUTE;
StringBuilder builder;
builder.append(compute_codev.get_data()); // version info (if exists)
builder.append("\n"); //make sure defines begin at newline
builder.append(general_defines.get_data());
builder.append(variant_defines[p_variant].get_data());
for (int j = 0; j < p_version->custom_defines.size(); j++) {
builder.append(p_version->custom_defines[j].get_data());
}
builder.append(compute_code0.get_data()); //first part of compute
builder.append(p_version->uniforms.get_data()); //uniforms (same for compute and fragment)
builder.append(compute_code1.get_data()); //second part of compute
builder.append(p_version->compute_globals.get_data()); // compute globals
builder.append(compute_code2.get_data()); //third part of compute
builder.append(p_version->compute_code.get_data()); // code
builder.append(compute_code3.get_data()); //fourth of compute
current_source = builder.as_string();
RD::ShaderStageData stage;
stage.spir_v = RD::get_singleton()->shader_compile_from_source(RD::SHADER_STAGE_COMPUTE, current_source, RD::SHADER_LANGUAGE_GLSL, &error);
if (stage.spir_v.size() == 0) {
build_ok = false;
} else {
stage.shader_stage = RD::SHADER_STAGE_COMPUTE;
stages.push_back(stage);
}
}
if (!build_ok) {
variant_set_mutex.lock(); //properly print the errors
ERR_PRINT("Error compiling " + String(current_stage == RD::SHADER_STAGE_VERTEX ? "Vertex" : "Fragment") + " shader, variant #" + itos(p_variant) + " (" + variant_defines[p_variant].get_data() + ").");
ERR_PRINT("Error compiling " + String(current_stage == RD::SHADER_STAGE_COMPUTE ? "Compute " : (current_stage == RD::SHADER_STAGE_VERTEX ? "Vertex" : "Fragment")) + " shader, variant #" + itos(p_variant) + " (" + variant_defines[p_variant].get_data() + ").");
ERR_PRINT(error);
#ifdef DEBUG_ENABLED
@ -319,6 +404,8 @@ void ShaderRD::_compile_version(Version *p_version) {
void ShaderRD::version_set_code(RID p_version, const String &p_uniforms, const String &p_vertex_globals, const String &p_vertex_code, const String &p_fragment_globals, const String &p_fragment_light, const String &p_fragment_code, const Vector<String> &p_custom_defines) {
ERR_FAIL_COND(is_compute);
Version *version = version_owner.getornull(p_version);
ERR_FAIL_COND(!version);
version->vertex_globals = p_vertex_globals.utf8();
@ -340,6 +427,28 @@ void ShaderRD::version_set_code(RID p_version, const String &p_uniforms, const S
}
}
void ShaderRD::version_set_compute_code(RID p_version, const String &p_uniforms, const String &p_compute_globals, const String &p_compute_code, const Vector<String> &p_custom_defines) {
ERR_FAIL_COND(!is_compute);
Version *version = version_owner.getornull(p_version);
ERR_FAIL_COND(!version);
version->compute_globals = p_compute_globals.utf8();
version->compute_code = p_compute_code.utf8();
version->uniforms = p_uniforms.utf8();
version->custom_defines.clear();
for (int i = 0; i < p_custom_defines.size(); i++) {
version->custom_defines.push_back(p_custom_defines[i].utf8());
}
version->dirty = true;
if (version->initialize_needed) {
_compile_version(version);
version->initialize_needed = false;
}
}
bool ShaderRD::version_is_valid(RID p_version) {
Version *version = version_owner.getornull(p_version);
ERR_FAIL_COND_V(!version, false);

View file

@ -55,6 +55,8 @@ class ShaderRD {
CharString uniforms;
CharString vertex_globals;
CharString vertex_code;
CharString compute_globals;
CharString compute_code;
CharString fragment_light;
CharString fragment_globals;
CharString fragment_code;
@ -89,16 +91,25 @@ class ShaderRD {
CharString vertex_code2;
CharString vertex_code3;
bool is_compute = false;
CharString compute_codev; //for version and extensions
CharString compute_code0;
CharString compute_code1;
CharString compute_code2;
CharString compute_code3;
const char *name;
protected:
ShaderRD() {}
void setup(const char *p_vertex_code, const char *p_fragment_code, const char *p_name);
void setup(const char *p_vertex_code, const char *p_fragment_code, const char *p_compute_code, const char *p_name);
public:
RID version_create();
void version_set_code(RID p_version, const String &p_uniforms, const String &p_vertex_globals, const String &p_vertex_code, const String &p_fragment_globals, const String &p_fragment_light, const String &p_fragment_code, const Vector<String> &p_custom_defines);
void version_set_compute_code(RID p_version, const String &p_uniforms, const String &p_compute_globals, const String &p_compute_code, const Vector<String> &p_custom_defines);
_FORCE_INLINE_ RID version_get_shader(RID p_version, int p_variant) {
ERR_FAIL_INDEX_V(p_variant, variant_defines.size(), RID());

View file

@ -11,4 +11,5 @@ if 'RD_GLSL' in env['BUILDERS']:
env.RD_GLSL('sky.glsl');
env.RD_GLSL('tonemap.glsl');
env.RD_GLSL('copy.glsl');
env.RD_GLSL('giprobe_lighting.glsl');

View file

@ -0,0 +1,241 @@
[compute]
#version 450
VERSION_DEFINES
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
#define NO_CHILDREN 0xFFFFFFFF
#define GREY_VEC vec3(0.33333,0.33333,0.33333)
struct CellPosition {
uint children[8];
};
layout(set=0,binding=1,std140) buffer CellPositions {
CellPosition data[];
} cell_positions;
struct CellMaterial {
uint position; // xyz 10 bits
uint albedo; //rgb albedo
uint emission; //rgb normalized with e as multiplier
uint normal; //RGB normal encoded
};
layout(set=0,binding=2,std140) buffer CellMaterials {
CellMaterial data[];
} cell_materials;
#define LIGHT_TYPE_DIRECTIONAL 0
#define LIGHT_TYPE_OMNI 1
#define LIGHT_TYPE_SPOT 2
struct Light {
uint type;
float energy;
float radius;
float attenuation;
vec3 color;
float spot_angle_radians;
float advance;
float max_length;
uint pad0;
uint pad2;
vec3 position;
float spot_attenuation;
vec3 direction;
bool visible;
vec4 clip_planes[3];
};
layout(set=0,binding=3,std140) buffer Lights {
Light data[];
} lights;
layout(set=0,binding=4,std140) uniform Params {
vec3 limits;
float max_length;
uint size;
uint stack_size;
uint light_count;
float emission_scale;
} params;
layout (rgba8,set=0,binding=5) uniform restrict writeonly image3D color_tex;
uint raymarch(float distance,float distance_adv,vec3 from,vec3 direction) {
uint result = NO_CHILDREN;
while (distance > -distance_adv) { //use this to avoid precision errors
uint cell = 0;
ivec3 pos = ivec3(from);
ivec3 ofs = ivec3(0);
ivec3 half_size = ivec3(params.size) / 2;
if (any(lessThan(pos,ivec3(0))) || any(greaterThanEqual(pos,ivec3(params.size)))) {
return NO_CHILDREN; //outside range
}
for (int i = 0; i < params.stack_size - 1; i++) {
bvec3 greater = greaterThanEqual(pos,ofs+half_size);
ofs += mix(ivec3(0),half_size,greater);
uint child = 0; //wonder if this can be done faster
if (greater.x) {
child|=1;
}
if (greater.y) {
child|=2;
}
if (greater.z) {
child|=4;
}
cell = cell_positions.data[cell].children[child];
if (cell == NO_CHILDREN)
break;
half_size >>= ivec3(1);
}
if ( cell != NO_CHILDREN) {
return cell; //found cell!
}
from += direction * distance_adv;
distance -= distance_adv;
}
return NO_CHILDREN;
}
bool compute_light_vector(uint light,uint cell, vec3 pos,out float attenuation, out vec3 light_pos) {
if (lights.data[light].type==LIGHT_TYPE_DIRECTIONAL) {
light_pos = pos - lights.data[light].direction * params.max_length;
attenuation = 1.0;
} else {
light_pos = lights.data[light].position;
float distance = length(pos - light_pos);
if (distance >= lights.data[light].radius) {
return false;
}
attenuation = pow( distance / lights.data[light].radius + 0.0001, lights.data[light].attenuation );
if (lights.data[light].type==LIGHT_TYPE_SPOT) {
vec3 rel = normalize(pos - light_pos);
float angle = acos(dot(rel,lights.data[light].direction));
if (angle > lights.data[light].spot_angle_radians) {
return false;
}
float d = clamp(angle / lights.data[light].spot_angle_radians, 0, 1);
attenuation *= pow(1.0 - d, lights.data[light].spot_attenuation);
}
}
return true;
}
void main() {
uint cell_index = gl_GlobalInvocationID.x;
uvec3 posu = uvec3(cell_materials.data[cell_index].position&0x3FF,(cell_materials.data[cell_index].position>>10)&0x3FF,cell_materials.data[cell_index].position>>20);
vec3 pos = vec3(posu);
vec3 emission = vec3(ivec3(cell_materials.data[cell_index].emission&0x3FF,(cell_materials.data[cell_index].emission>>10)&0x7FF,cell_materials.data[cell_index].emission>>21)) * params.emission_scale;
vec4 albedo = unpackUnorm4x8(cell_materials.data[cell_index].albedo);
vec4 normal = unpackSnorm4x8(cell_materials.data[cell_index].normal); //w >0.5 means, all directions
#ifdef MODE_ANISOTROPIC
vec3 accum[6]=vec3[](vec3(0.0),vec3(0.0),vec3(0.0),vec3(0.0),vec3(0.0),vec3(0.0));
const vec3 accum_dirs[6]=vec3[](vec3(1.0,0.0,0.0),vec3(-1.0,0.0,0.0),vec3(0.0,1.0,0.0),vec3(0.0,-1.0,0.0),vec3(0.0,0.0,1.0),vec3(0.0,0.0,-1.0));
#else
vec3 accum = vec3(0);
#endif
for(uint i=0;i<params.light_count;i++) {
float attenuation;
vec3 light_pos;
if (!compute_light_vector(i,cell_index,pos,attenuation,light_pos)) {
continue;
}
float distance_adv = lights.data[i].advance;
vec3 light_dir = pos - light_pos;
float distance = length(light_dir);
light_dir=normalize(light_dir);
distance += distance_adv - mod(distance, distance_adv); //make it reach the center of the box always
vec3 from = pos - light_dir * distance; //approximate
if (normal.w < 0.5 && dot(normal.xyz,light_dir)>=0) {
continue; //not facing the light
}
uint result = raymarch(distance,distance_adv,from,lights.data[i].direction);
if (result != cell_index) {
continue; //was occluded
}
vec3 light = lights.data[i].color * albedo.rgb * attenuation;
#ifdef MODE_ANISOTROPIC
for(uint j=0;j<6;j++) {
accum[j]+=max(0.0,dot(accum_dir,-light_dir))*light+emission;
}
#else
if (normal.w < 0.5) {
accum+=max(0.0,dot(normal.xyz,-light_dir))*light+emission;
} else {
//all directions
accum+=light+emission;
}
#endif
}
#ifdef MODE_ANISOTROPIC
vec3 accum_total = accum[0]+accum[1]+accum[2]+accum[3]+accum[4]+accum[5];
float accum_total_energy = max(dot(accum_total,GREY_VEC),0.00001);
vec3 iso_positive = vec3(dot(aniso[0],GREY_VEC),dot(aniso[2],GREY_VEC),dot(aniso[4],GREY_VEC))/vec3(accum_total_energy);
vec3 iso_negative = vec3(dot(aniso[1],GREY_VEC),dot(aniso[3],GREY_VEC),dot(aniso[5],GREY_VEC))/vec3(accum_total_energy);
//store in 3D textures, total color, and isotropic magnitudes
#else
//store in 3D texture pos, accum
imageStore(color_tex,ivec3(posu),vec4(accum,albedo.a));
#endif
}

View file

@ -882,6 +882,13 @@ public:
virtual RID render_pipeline_create(RID p_shader, FramebufferFormatID p_framebuffer_format, VertexFormatID p_vertex_format, RenderPrimitive p_render_primitive, const PipelineRasterizationState &p_rasterization_state, const PipelineMultisampleState &p_multisample_state, const PipelineDepthStencilState &p_depth_stencil_state, const PipelineColorBlendState &p_blend_state, int p_dynamic_state_flags = 0) = 0;
virtual bool render_pipeline_is_valid(RID p_pipeline) = 0;
/**************************/
/**** COMPUTE PIPELINE ****/
/**************************/
virtual RID compute_pipeline_create(RID p_shader) = 0;
virtual bool compute_pipeline_is_valid(RID p_pipeline) = 0;
/****************/
/**** SCREEN ****/
/****************/
@ -930,6 +937,19 @@ public:
virtual void draw_list_end() = 0;
/***********************/
/**** COMPUTE LISTS ****/
/***********************/
typedef int64_t ComputeListID;
virtual ComputeListID compute_list_begin() = 0;
virtual void compute_list_bind_compute_pipeline(ComputeListID p_list, RID p_compute_pipeline) = 0;
virtual void compute_list_bind_uniform_set(ComputeListID p_list, RID p_uniform_set, uint32_t p_index) = 0;
virtual void compute_list_set_push_constant(ComputeListID p_list, void *p_data, uint32_t p_data_size) = 0;
virtual void compute_list_dispatch(ComputeListID p_list, uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups) = 0;
virtual void compute_list_end() = 0;
/***************/
/**** FREE! ****/
/***************/