virtualx-engine/drivers/vulkan/rendering_device_vulkan.cpp
Matias N. Goldberg 472226422e Fix uninitialized variable ending up sent to Vulkan
The first time a shader is compiled Godot performs the following:

```cpp
for (uint32_t i = 0; i < SHADER_STAGE_MAX; i++) {
	if
(spirv_data.push_constant_stages_mask.has_flag((ShaderStage)(1 << i))) {
		binary_data.push_constant_vk_stages_mask |=
shader_stage_masks[i];
	}
}
```

However binary_data.push_constant_vk_stages_mask is never initialized to
0 and thus contains garbage data or'ed with the good data.

This value is used by push constants (and many other things) thus it can
be a big deal.

Fortunately because the relevant flags are always guaranteed to be set
(but not guaranteed to be unset), the damage is restricted to:

1. Performance (unnecessary flushing & over-excessive barriers)
2. Overwriting push descriptors already set (this would be serious,
doesn't seem to be an issue)
3. Driver implementations going crazy when they see bits set they don't
expect (unknown if this is an issue)

This uninitialized value is later saved into the binary cache.

Valgrind is able to detect this bug on the first run, but not on the
subsequent ones because they data comes from a file.

cache_file_version has been bumped to force rebuild of all cached
shaders. Because the ones generated so far are compromised.
2023-07-29 18:28:33 -03:00

9631 lines
391 KiB
C++

/**************************************************************************/
/* rendering_device_vulkan.cpp */
/**************************************************************************/
/* This file is part of: */
/* GODOT ENGINE */
/* https://godotengine.org */
/**************************************************************************/
/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
/* */
/* Permission is hereby granted, free of charge, to any person obtaining */
/* a copy of this software and associated documentation files (the */
/* "Software"), to deal in the Software without restriction, including */
/* without limitation the rights to use, copy, modify, merge, publish, */
/* distribute, sublicense, and/or sell copies of the Software, and to */
/* permit persons to whom the Software is furnished to do so, subject to */
/* the following conditions: */
/* */
/* The above copyright notice and this permission notice shall be */
/* included in all copies or substantial portions of the Software. */
/* */
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
/**************************************************************************/
#include "rendering_device_vulkan.h"
#include "core/config/project_settings.h"
#include "core/io/compression.h"
#include "core/io/dir_access.h"
#include "core/io/file_access.h"
#include "core/io/marshalls.h"
#include "core/os/os.h"
#include "core/templates/hashfuncs.h"
#include "drivers/vulkan/vulkan_context.h"
#include "thirdparty/misc/smolv.h"
//#define FORCE_FULL_BARRIER
static const uint32_t SMALL_ALLOCATION_MAX_SIZE = 4096;
// Get the Vulkan object information and possible stage access types (bitwise OR'd with incoming values).
RenderingDeviceVulkan::Buffer *RenderingDeviceVulkan::_get_buffer_from_owner(RID p_buffer, VkPipelineStageFlags &r_stage_mask, VkAccessFlags &r_access_mask, BitField<BarrierMask> p_post_barrier) {
Buffer *buffer = nullptr;
if (vertex_buffer_owner.owns(p_buffer)) {
buffer = vertex_buffer_owner.get_or_null(p_buffer);
r_stage_mask |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT;
r_access_mask |= VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT;
if (buffer->usage & VK_BUFFER_USAGE_STORAGE_BUFFER_BIT) {
if (p_post_barrier.has_flag(BARRIER_MASK_VERTEX)) {
r_access_mask |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
r_stage_mask |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT;
}
if (p_post_barrier.has_flag(BARRIER_MASK_FRAGMENT)) {
r_access_mask |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
r_stage_mask |= VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
}
if (p_post_barrier.has_flag(BARRIER_MASK_COMPUTE)) {
r_access_mask |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
r_stage_mask |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
}
}
} else if (index_buffer_owner.owns(p_buffer)) {
r_stage_mask |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT;
r_access_mask |= VK_ACCESS_INDEX_READ_BIT;
buffer = index_buffer_owner.get_or_null(p_buffer);
} else if (uniform_buffer_owner.owns(p_buffer)) {
if (p_post_barrier.has_flag(BARRIER_MASK_VERTEX)) {
r_stage_mask |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT;
}
if (p_post_barrier.has_flag(BARRIER_MASK_FRAGMENT)) {
r_stage_mask |= VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
}
if (p_post_barrier.has_flag(BARRIER_MASK_COMPUTE)) {
r_stage_mask |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
}
r_access_mask |= VK_ACCESS_UNIFORM_READ_BIT;
buffer = uniform_buffer_owner.get_or_null(p_buffer);
} else if (texture_buffer_owner.owns(p_buffer)) {
if (p_post_barrier.has_flag(BARRIER_MASK_VERTEX)) {
r_stage_mask |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT;
r_access_mask |= VK_ACCESS_SHADER_READ_BIT;
}
if (p_post_barrier.has_flag(BARRIER_MASK_FRAGMENT)) {
r_stage_mask |= VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
r_access_mask |= VK_ACCESS_SHADER_READ_BIT;
}
if (p_post_barrier.has_flag(BARRIER_MASK_COMPUTE)) {
r_stage_mask |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
r_access_mask |= VK_ACCESS_SHADER_READ_BIT;
}
buffer = &texture_buffer_owner.get_or_null(p_buffer)->buffer;
} else if (storage_buffer_owner.owns(p_buffer)) {
buffer = storage_buffer_owner.get_or_null(p_buffer);
if (p_post_barrier.has_flag(BARRIER_MASK_VERTEX)) {
r_stage_mask |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT;
r_access_mask |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
}
if (p_post_barrier.has_flag(BARRIER_MASK_FRAGMENT)) {
r_stage_mask |= VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
r_access_mask |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
}
if (p_post_barrier.has_flag(BARRIER_MASK_COMPUTE)) {
r_stage_mask |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
r_access_mask |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
}
if (buffer->usage & VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT) {
r_stage_mask |= VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT;
r_access_mask |= VK_ACCESS_INDIRECT_COMMAND_READ_BIT;
}
}
return buffer;
}
static void update_external_dependency_for_store(VkSubpassDependency2KHR &dependency, bool is_sampled, bool is_storage, bool is_depth) {
// Transitioning from write to read, protect the shaders that may use this next.
// Allow for copies/image layout transitions.
dependency.dstStageMask |= VK_PIPELINE_STAGE_TRANSFER_BIT;
dependency.dstAccessMask |= VK_ACCESS_TRANSFER_READ_BIT;
if (is_sampled) {
dependency.dstStageMask |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
dependency.dstAccessMask |= VK_ACCESS_SHADER_READ_BIT;
} else if (is_storage) {
dependency.dstStageMask |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
dependency.dstAccessMask |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
} else {
dependency.dstStageMask |= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
dependency.dstAccessMask |= VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
}
if (is_depth) {
// Depth resources have additional stages that may be interested in them.
dependency.dstStageMask |= VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
dependency.dstAccessMask |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
}
}
void RenderingDeviceVulkan::_add_dependency(RID p_id, RID p_depends_on) {
if (!dependency_map.has(p_depends_on)) {
dependency_map[p_depends_on] = HashSet<RID>();
}
dependency_map[p_depends_on].insert(p_id);
if (!reverse_dependency_map.has(p_id)) {
reverse_dependency_map[p_id] = HashSet<RID>();
}
reverse_dependency_map[p_id].insert(p_depends_on);
}
void RenderingDeviceVulkan::_free_dependencies(RID p_id) {
// Direct dependencies must be freed.
HashMap<RID, HashSet<RID>>::Iterator E = dependency_map.find(p_id);
if (E) {
while (E->value.size()) {
free(*E->value.begin());
}
dependency_map.remove(E);
}
// Reverse dependencies must be unreferenced.
E = reverse_dependency_map.find(p_id);
if (E) {
for (const RID &F : E->value) {
HashMap<RID, HashSet<RID>>::Iterator G = dependency_map.find(F);
ERR_CONTINUE(!G);
ERR_CONTINUE(!G->value.has(p_id));
G->value.erase(p_id);
}
reverse_dependency_map.remove(E);
}
}
const VkFormat RenderingDeviceVulkan::vulkan_formats[RenderingDevice::DATA_FORMAT_MAX] = {
VK_FORMAT_R4G4_UNORM_PACK8,
VK_FORMAT_R4G4B4A4_UNORM_PACK16,
VK_FORMAT_B4G4R4A4_UNORM_PACK16,
VK_FORMAT_R5G6B5_UNORM_PACK16,
VK_FORMAT_B5G6R5_UNORM_PACK16,
VK_FORMAT_R5G5B5A1_UNORM_PACK16,
VK_FORMAT_B5G5R5A1_UNORM_PACK16,
VK_FORMAT_A1R5G5B5_UNORM_PACK16,
VK_FORMAT_R8_UNORM,
VK_FORMAT_R8_SNORM,
VK_FORMAT_R8_USCALED,
VK_FORMAT_R8_SSCALED,
VK_FORMAT_R8_UINT,
VK_FORMAT_R8_SINT,
VK_FORMAT_R8_SRGB,
VK_FORMAT_R8G8_UNORM,
VK_FORMAT_R8G8_SNORM,
VK_FORMAT_R8G8_USCALED,
VK_FORMAT_R8G8_SSCALED,
VK_FORMAT_R8G8_UINT,
VK_FORMAT_R8G8_SINT,
VK_FORMAT_R8G8_SRGB,
VK_FORMAT_R8G8B8_UNORM,
VK_FORMAT_R8G8B8_SNORM,
VK_FORMAT_R8G8B8_USCALED,
VK_FORMAT_R8G8B8_SSCALED,
VK_FORMAT_R8G8B8_UINT,
VK_FORMAT_R8G8B8_SINT,
VK_FORMAT_R8G8B8_SRGB,
VK_FORMAT_B8G8R8_UNORM,
VK_FORMAT_B8G8R8_SNORM,
VK_FORMAT_B8G8R8_USCALED,
VK_FORMAT_B8G8R8_SSCALED,
VK_FORMAT_B8G8R8_UINT,
VK_FORMAT_B8G8R8_SINT,
VK_FORMAT_B8G8R8_SRGB,
VK_FORMAT_R8G8B8A8_UNORM,
VK_FORMAT_R8G8B8A8_SNORM,
VK_FORMAT_R8G8B8A8_USCALED,
VK_FORMAT_R8G8B8A8_SSCALED,
VK_FORMAT_R8G8B8A8_UINT,
VK_FORMAT_R8G8B8A8_SINT,
VK_FORMAT_R8G8B8A8_SRGB,
VK_FORMAT_B8G8R8A8_UNORM,
VK_FORMAT_B8G8R8A8_SNORM,
VK_FORMAT_B8G8R8A8_USCALED,
VK_FORMAT_B8G8R8A8_SSCALED,
VK_FORMAT_B8G8R8A8_UINT,
VK_FORMAT_B8G8R8A8_SINT,
VK_FORMAT_B8G8R8A8_SRGB,
VK_FORMAT_A8B8G8R8_UNORM_PACK32,
VK_FORMAT_A8B8G8R8_SNORM_PACK32,
VK_FORMAT_A8B8G8R8_USCALED_PACK32,
VK_FORMAT_A8B8G8R8_SSCALED_PACK32,
VK_FORMAT_A8B8G8R8_UINT_PACK32,
VK_FORMAT_A8B8G8R8_SINT_PACK32,
VK_FORMAT_A8B8G8R8_SRGB_PACK32,
VK_FORMAT_A2R10G10B10_UNORM_PACK32,
VK_FORMAT_A2R10G10B10_SNORM_PACK32,
VK_FORMAT_A2R10G10B10_USCALED_PACK32,
VK_FORMAT_A2R10G10B10_SSCALED_PACK32,
VK_FORMAT_A2R10G10B10_UINT_PACK32,
VK_FORMAT_A2R10G10B10_SINT_PACK32,
VK_FORMAT_A2B10G10R10_UNORM_PACK32,
VK_FORMAT_A2B10G10R10_SNORM_PACK32,
VK_FORMAT_A2B10G10R10_USCALED_PACK32,
VK_FORMAT_A2B10G10R10_SSCALED_PACK32,
VK_FORMAT_A2B10G10R10_UINT_PACK32,
VK_FORMAT_A2B10G10R10_SINT_PACK32,
VK_FORMAT_R16_UNORM,
VK_FORMAT_R16_SNORM,
VK_FORMAT_R16_USCALED,
VK_FORMAT_R16_SSCALED,
VK_FORMAT_R16_UINT,
VK_FORMAT_R16_SINT,
VK_FORMAT_R16_SFLOAT,
VK_FORMAT_R16G16_UNORM,
VK_FORMAT_R16G16_SNORM,
VK_FORMAT_R16G16_USCALED,
VK_FORMAT_R16G16_SSCALED,
VK_FORMAT_R16G16_UINT,
VK_FORMAT_R16G16_SINT,
VK_FORMAT_R16G16_SFLOAT,
VK_FORMAT_R16G16B16_UNORM,
VK_FORMAT_R16G16B16_SNORM,
VK_FORMAT_R16G16B16_USCALED,
VK_FORMAT_R16G16B16_SSCALED,
VK_FORMAT_R16G16B16_UINT,
VK_FORMAT_R16G16B16_SINT,
VK_FORMAT_R16G16B16_SFLOAT,
VK_FORMAT_R16G16B16A16_UNORM,
VK_FORMAT_R16G16B16A16_SNORM,
VK_FORMAT_R16G16B16A16_USCALED,
VK_FORMAT_R16G16B16A16_SSCALED,
VK_FORMAT_R16G16B16A16_UINT,
VK_FORMAT_R16G16B16A16_SINT,
VK_FORMAT_R16G16B16A16_SFLOAT,
VK_FORMAT_R32_UINT,
VK_FORMAT_R32_SINT,
VK_FORMAT_R32_SFLOAT,
VK_FORMAT_R32G32_UINT,
VK_FORMAT_R32G32_SINT,
VK_FORMAT_R32G32_SFLOAT,
VK_FORMAT_R32G32B32_UINT,
VK_FORMAT_R32G32B32_SINT,
VK_FORMAT_R32G32B32_SFLOAT,
VK_FORMAT_R32G32B32A32_UINT,
VK_FORMAT_R32G32B32A32_SINT,
VK_FORMAT_R32G32B32A32_SFLOAT,
VK_FORMAT_R64_UINT,
VK_FORMAT_R64_SINT,
VK_FORMAT_R64_SFLOAT,
VK_FORMAT_R64G64_UINT,
VK_FORMAT_R64G64_SINT,
VK_FORMAT_R64G64_SFLOAT,
VK_FORMAT_R64G64B64_UINT,
VK_FORMAT_R64G64B64_SINT,
VK_FORMAT_R64G64B64_SFLOAT,
VK_FORMAT_R64G64B64A64_UINT,
VK_FORMAT_R64G64B64A64_SINT,
VK_FORMAT_R64G64B64A64_SFLOAT,
VK_FORMAT_B10G11R11_UFLOAT_PACK32,
VK_FORMAT_E5B9G9R9_UFLOAT_PACK32,
VK_FORMAT_D16_UNORM,
VK_FORMAT_X8_D24_UNORM_PACK32,
VK_FORMAT_D32_SFLOAT,
VK_FORMAT_S8_UINT,
VK_FORMAT_D16_UNORM_S8_UINT,
VK_FORMAT_D24_UNORM_S8_UINT,
VK_FORMAT_D32_SFLOAT_S8_UINT,
VK_FORMAT_BC1_RGB_UNORM_BLOCK,
VK_FORMAT_BC1_RGB_SRGB_BLOCK,
VK_FORMAT_BC1_RGBA_UNORM_BLOCK,
VK_FORMAT_BC1_RGBA_SRGB_BLOCK,
VK_FORMAT_BC2_UNORM_BLOCK,
VK_FORMAT_BC2_SRGB_BLOCK,
VK_FORMAT_BC3_UNORM_BLOCK,
VK_FORMAT_BC3_SRGB_BLOCK,
VK_FORMAT_BC4_UNORM_BLOCK,
VK_FORMAT_BC4_SNORM_BLOCK,
VK_FORMAT_BC5_UNORM_BLOCK,
VK_FORMAT_BC5_SNORM_BLOCK,
VK_FORMAT_BC6H_UFLOAT_BLOCK,
VK_FORMAT_BC6H_SFLOAT_BLOCK,
VK_FORMAT_BC7_UNORM_BLOCK,
VK_FORMAT_BC7_SRGB_BLOCK,
VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK,
VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK,
VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK,
VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK,
VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK,
VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK,
VK_FORMAT_EAC_R11_UNORM_BLOCK,
VK_FORMAT_EAC_R11_SNORM_BLOCK,
VK_FORMAT_EAC_R11G11_UNORM_BLOCK,
VK_FORMAT_EAC_R11G11_SNORM_BLOCK,
VK_FORMAT_ASTC_4x4_UNORM_BLOCK,
VK_FORMAT_ASTC_4x4_SRGB_BLOCK,
VK_FORMAT_ASTC_5x4_UNORM_BLOCK,
VK_FORMAT_ASTC_5x4_SRGB_BLOCK,
VK_FORMAT_ASTC_5x5_UNORM_BLOCK,
VK_FORMAT_ASTC_5x5_SRGB_BLOCK,
VK_FORMAT_ASTC_6x5_UNORM_BLOCK,
VK_FORMAT_ASTC_6x5_SRGB_BLOCK,
VK_FORMAT_ASTC_6x6_UNORM_BLOCK,
VK_FORMAT_ASTC_6x6_SRGB_BLOCK,
VK_FORMAT_ASTC_8x5_UNORM_BLOCK,
VK_FORMAT_ASTC_8x5_SRGB_BLOCK,
VK_FORMAT_ASTC_8x6_UNORM_BLOCK,
VK_FORMAT_ASTC_8x6_SRGB_BLOCK,
VK_FORMAT_ASTC_8x8_UNORM_BLOCK,
VK_FORMAT_ASTC_8x8_SRGB_BLOCK,
VK_FORMAT_ASTC_10x5_UNORM_BLOCK,
VK_FORMAT_ASTC_10x5_SRGB_BLOCK,
VK_FORMAT_ASTC_10x6_UNORM_BLOCK,
VK_FORMAT_ASTC_10x6_SRGB_BLOCK,
VK_FORMAT_ASTC_10x8_UNORM_BLOCK,
VK_FORMAT_ASTC_10x8_SRGB_BLOCK,
VK_FORMAT_ASTC_10x10_UNORM_BLOCK,
VK_FORMAT_ASTC_10x10_SRGB_BLOCK,
VK_FORMAT_ASTC_12x10_UNORM_BLOCK,
VK_FORMAT_ASTC_12x10_SRGB_BLOCK,
VK_FORMAT_ASTC_12x12_UNORM_BLOCK,
VK_FORMAT_ASTC_12x12_SRGB_BLOCK,
VK_FORMAT_G8B8G8R8_422_UNORM,
VK_FORMAT_B8G8R8G8_422_UNORM,
VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM,
VK_FORMAT_G8_B8R8_2PLANE_420_UNORM,
VK_FORMAT_G8_B8_R8_3PLANE_422_UNORM,
VK_FORMAT_G8_B8R8_2PLANE_422_UNORM,
VK_FORMAT_G8_B8_R8_3PLANE_444_UNORM,
VK_FORMAT_R10X6_UNORM_PACK16,
VK_FORMAT_R10X6G10X6_UNORM_2PACK16,
VK_FORMAT_R10X6G10X6B10X6A10X6_UNORM_4PACK16,
VK_FORMAT_G10X6B10X6G10X6R10X6_422_UNORM_4PACK16,
VK_FORMAT_B10X6G10X6R10X6G10X6_422_UNORM_4PACK16,
VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16,
VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16,
VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_422_UNORM_3PACK16,
VK_FORMAT_G10X6_B10X6R10X6_2PLANE_422_UNORM_3PACK16,
VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_444_UNORM_3PACK16,
VK_FORMAT_R12X4_UNORM_PACK16,
VK_FORMAT_R12X4G12X4_UNORM_2PACK16,
VK_FORMAT_R12X4G12X4B12X4A12X4_UNORM_4PACK16,
VK_FORMAT_G12X4B12X4G12X4R12X4_422_UNORM_4PACK16,
VK_FORMAT_B12X4G12X4R12X4G12X4_422_UNORM_4PACK16,
VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_420_UNORM_3PACK16,
VK_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16,
VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_422_UNORM_3PACK16,
VK_FORMAT_G12X4_B12X4R12X4_2PLANE_422_UNORM_3PACK16,
VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_444_UNORM_3PACK16,
VK_FORMAT_G16B16G16R16_422_UNORM,
VK_FORMAT_B16G16R16G16_422_UNORM,
VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM,
VK_FORMAT_G16_B16R16_2PLANE_420_UNORM,
VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM,
VK_FORMAT_G16_B16R16_2PLANE_422_UNORM,
VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM,
};
const char *RenderingDeviceVulkan::named_formats[RenderingDevice::DATA_FORMAT_MAX] = {
"R4G4_Unorm_Pack8",
"R4G4B4A4_Unorm_Pack16",
"B4G4R4A4_Unorm_Pack16",
"R5G6B5_Unorm_Pack16",
"B5G6R5_Unorm_Pack16",
"R5G5B5A1_Unorm_Pack16",
"B5G5R5A1_Unorm_Pack16",
"A1R5G5B5_Unorm_Pack16",
"R8_Unorm",
"R8_Snorm",
"R8_Uscaled",
"R8_Sscaled",
"R8_Uint",
"R8_Sint",
"R8_Srgb",
"R8G8_Unorm",
"R8G8_Snorm",
"R8G8_Uscaled",
"R8G8_Sscaled",
"R8G8_Uint",
"R8G8_Sint",
"R8G8_Srgb",
"R8G8B8_Unorm",
"R8G8B8_Snorm",
"R8G8B8_Uscaled",
"R8G8B8_Sscaled",
"R8G8B8_Uint",
"R8G8B8_Sint",
"R8G8B8_Srgb",
"B8G8R8_Unorm",
"B8G8R8_Snorm",
"B8G8R8_Uscaled",
"B8G8R8_Sscaled",
"B8G8R8_Uint",
"B8G8R8_Sint",
"B8G8R8_Srgb",
"R8G8B8A8_Unorm",
"R8G8B8A8_Snorm",
"R8G8B8A8_Uscaled",
"R8G8B8A8_Sscaled",
"R8G8B8A8_Uint",
"R8G8B8A8_Sint",
"R8G8B8A8_Srgb",
"B8G8R8A8_Unorm",
"B8G8R8A8_Snorm",
"B8G8R8A8_Uscaled",
"B8G8R8A8_Sscaled",
"B8G8R8A8_Uint",
"B8G8R8A8_Sint",
"B8G8R8A8_Srgb",
"A8B8G8R8_Unorm_Pack32",
"A8B8G8R8_Snorm_Pack32",
"A8B8G8R8_Uscaled_Pack32",
"A8B8G8R8_Sscaled_Pack32",
"A8B8G8R8_Uint_Pack32",
"A8B8G8R8_Sint_Pack32",
"A8B8G8R8_Srgb_Pack32",
"A2R10G10B10_Unorm_Pack32",
"A2R10G10B10_Snorm_Pack32",
"A2R10G10B10_Uscaled_Pack32",
"A2R10G10B10_Sscaled_Pack32",
"A2R10G10B10_Uint_Pack32",
"A2R10G10B10_Sint_Pack32",
"A2B10G10R10_Unorm_Pack32",
"A2B10G10R10_Snorm_Pack32",
"A2B10G10R10_Uscaled_Pack32",
"A2B10G10R10_Sscaled_Pack32",
"A2B10G10R10_Uint_Pack32",
"A2B10G10R10_Sint_Pack32",
"R16_Unorm",
"R16_Snorm",
"R16_Uscaled",
"R16_Sscaled",
"R16_Uint",
"R16_Sint",
"R16_Sfloat",
"R16G16_Unorm",
"R16G16_Snorm",
"R16G16_Uscaled",
"R16G16_Sscaled",
"R16G16_Uint",
"R16G16_Sint",
"R16G16_Sfloat",
"R16G16B16_Unorm",
"R16G16B16_Snorm",
"R16G16B16_Uscaled",
"R16G16B16_Sscaled",
"R16G16B16_Uint",
"R16G16B16_Sint",
"R16G16B16_Sfloat",
"R16G16B16A16_Unorm",
"R16G16B16A16_Snorm",
"R16G16B16A16_Uscaled",
"R16G16B16A16_Sscaled",
"R16G16B16A16_Uint",
"R16G16B16A16_Sint",
"R16G16B16A16_Sfloat",
"R32_Uint",
"R32_Sint",
"R32_Sfloat",
"R32G32_Uint",
"R32G32_Sint",
"R32G32_Sfloat",
"R32G32B32_Uint",
"R32G32B32_Sint",
"R32G32B32_Sfloat",
"R32G32B32A32_Uint",
"R32G32B32A32_Sint",
"R32G32B32A32_Sfloat",
"R64_Uint",
"R64_Sint",
"R64_Sfloat",
"R64G64_Uint",
"R64G64_Sint",
"R64G64_Sfloat",
"R64G64B64_Uint",
"R64G64B64_Sint",
"R64G64B64_Sfloat",
"R64G64B64A64_Uint",
"R64G64B64A64_Sint",
"R64G64B64A64_Sfloat",
"B10G11R11_Ufloat_Pack32",
"E5B9G9R9_Ufloat_Pack32",
"D16_Unorm",
"X8_D24_Unorm_Pack32",
"D32_Sfloat",
"S8_Uint",
"D16_Unorm_S8_Uint",
"D24_Unorm_S8_Uint",
"D32_Sfloat_S8_Uint",
"Bc1_Rgb_Unorm_Block",
"Bc1_Rgb_Srgb_Block",
"Bc1_Rgba_Unorm_Block",
"Bc1_Rgba_Srgb_Block",
"Bc2_Unorm_Block",
"Bc2_Srgb_Block",
"Bc3_Unorm_Block",
"Bc3_Srgb_Block",
"Bc4_Unorm_Block",
"Bc4_Snorm_Block",
"Bc5_Unorm_Block",
"Bc5_Snorm_Block",
"Bc6H_Ufloat_Block",
"Bc6H_Sfloat_Block",
"Bc7_Unorm_Block",
"Bc7_Srgb_Block",
"Etc2_R8G8B8_Unorm_Block",
"Etc2_R8G8B8_Srgb_Block",
"Etc2_R8G8B8A1_Unorm_Block",
"Etc2_R8G8B8A1_Srgb_Block",
"Etc2_R8G8B8A8_Unorm_Block",
"Etc2_R8G8B8A8_Srgb_Block",
"Eac_R11_Unorm_Block",
"Eac_R11_Snorm_Block",
"Eac_R11G11_Unorm_Block",
"Eac_R11G11_Snorm_Block",
"Astc_4X4_Unorm_Block",
"Astc_4X4_Srgb_Block",
"Astc_5X4_Unorm_Block",
"Astc_5X4_Srgb_Block",
"Astc_5X5_Unorm_Block",
"Astc_5X5_Srgb_Block",
"Astc_6X5_Unorm_Block",
"Astc_6X5_Srgb_Block",
"Astc_6X6_Unorm_Block",
"Astc_6X6_Srgb_Block",
"Astc_8X5_Unorm_Block",
"Astc_8X5_Srgb_Block",
"Astc_8X6_Unorm_Block",
"Astc_8X6_Srgb_Block",
"Astc_8X8_Unorm_Block",
"Astc_8X8_Srgb_Block",
"Astc_10X5_Unorm_Block",
"Astc_10X5_Srgb_Block",
"Astc_10X6_Unorm_Block",
"Astc_10X6_Srgb_Block",
"Astc_10X8_Unorm_Block",
"Astc_10X8_Srgb_Block",
"Astc_10X10_Unorm_Block",
"Astc_10X10_Srgb_Block",
"Astc_12X10_Unorm_Block",
"Astc_12X10_Srgb_Block",
"Astc_12X12_Unorm_Block",
"Astc_12X12_Srgb_Block",
"G8B8G8R8_422_Unorm",
"B8G8R8G8_422_Unorm",
"G8_B8_R8_3Plane_420_Unorm",
"G8_B8R8_2Plane_420_Unorm",
"G8_B8_R8_3Plane_422_Unorm",
"G8_B8R8_2Plane_422_Unorm",
"G8_B8_R8_3Plane_444_Unorm",
"R10X6_Unorm_Pack16",
"R10X6G10X6_Unorm_2Pack16",
"R10X6G10X6B10X6A10X6_Unorm_4Pack16",
"G10X6B10X6G10X6R10X6_422_Unorm_4Pack16",
"B10X6G10X6R10X6G10X6_422_Unorm_4Pack16",
"G10X6_B10X6_R10X6_3Plane_420_Unorm_3Pack16",
"G10X6_B10X6R10X6_2Plane_420_Unorm_3Pack16",
"G10X6_B10X6_R10X6_3Plane_422_Unorm_3Pack16",
"G10X6_B10X6R10X6_2Plane_422_Unorm_3Pack16",
"G10X6_B10X6_R10X6_3Plane_444_Unorm_3Pack16",
"R12X4_Unorm_Pack16",
"R12X4G12X4_Unorm_2Pack16",
"R12X4G12X4B12X4A12X4_Unorm_4Pack16",
"G12X4B12X4G12X4R12X4_422_Unorm_4Pack16",
"B12X4G12X4R12X4G12X4_422_Unorm_4Pack16",
"G12X4_B12X4_R12X4_3Plane_420_Unorm_3Pack16",
"G12X4_B12X4R12X4_2Plane_420_Unorm_3Pack16",
"G12X4_B12X4_R12X4_3Plane_422_Unorm_3Pack16",
"G12X4_B12X4R12X4_2Plane_422_Unorm_3Pack16",
"G12X4_B12X4_R12X4_3Plane_444_Unorm_3Pack16",
"G16B16G16R16_422_Unorm",
"B16G16R16G16_422_Unorm",
"G16_B16_R16_3Plane_420_Unorm",
"G16_B16R16_2Plane_420_Unorm",
"G16_B16_R16_3Plane_422_Unorm",
"G16_B16R16_2Plane_422_Unorm",
"G16_B16_R16_3Plane_444_Unorm",
};
int RenderingDeviceVulkan::get_format_vertex_size(DataFormat p_format) {
switch (p_format) {
case DATA_FORMAT_R8_UNORM:
case DATA_FORMAT_R8_SNORM:
case DATA_FORMAT_R8_UINT:
case DATA_FORMAT_R8_SINT:
case DATA_FORMAT_R8G8_UNORM:
case DATA_FORMAT_R8G8_SNORM:
case DATA_FORMAT_R8G8_UINT:
case DATA_FORMAT_R8G8_SINT:
case DATA_FORMAT_R8G8B8_UNORM:
case DATA_FORMAT_R8G8B8_SNORM:
case DATA_FORMAT_R8G8B8_UINT:
case DATA_FORMAT_R8G8B8_SINT:
case DATA_FORMAT_B8G8R8_UNORM:
case DATA_FORMAT_B8G8R8_SNORM:
case DATA_FORMAT_B8G8R8_UINT:
case DATA_FORMAT_B8G8R8_SINT:
case DATA_FORMAT_R8G8B8A8_UNORM:
case DATA_FORMAT_R8G8B8A8_SNORM:
case DATA_FORMAT_R8G8B8A8_UINT:
case DATA_FORMAT_R8G8B8A8_SINT:
case DATA_FORMAT_B8G8R8A8_UNORM:
case DATA_FORMAT_B8G8R8A8_SNORM:
case DATA_FORMAT_B8G8R8A8_UINT:
case DATA_FORMAT_B8G8R8A8_SINT:
case DATA_FORMAT_A2B10G10R10_UNORM_PACK32:
return 4;
case DATA_FORMAT_R16_UNORM:
case DATA_FORMAT_R16_SNORM:
case DATA_FORMAT_R16_UINT:
case DATA_FORMAT_R16_SINT:
case DATA_FORMAT_R16_SFLOAT:
return 4;
case DATA_FORMAT_R16G16_UNORM:
case DATA_FORMAT_R16G16_SNORM:
case DATA_FORMAT_R16G16_UINT:
case DATA_FORMAT_R16G16_SINT:
case DATA_FORMAT_R16G16_SFLOAT:
return 4;
case DATA_FORMAT_R16G16B16_UNORM:
case DATA_FORMAT_R16G16B16_SNORM:
case DATA_FORMAT_R16G16B16_UINT:
case DATA_FORMAT_R16G16B16_SINT:
case DATA_FORMAT_R16G16B16_SFLOAT:
return 8;
case DATA_FORMAT_R16G16B16A16_UNORM:
case DATA_FORMAT_R16G16B16A16_SNORM:
case DATA_FORMAT_R16G16B16A16_UINT:
case DATA_FORMAT_R16G16B16A16_SINT:
case DATA_FORMAT_R16G16B16A16_SFLOAT:
return 8;
case DATA_FORMAT_R32_UINT:
case DATA_FORMAT_R32_SINT:
case DATA_FORMAT_R32_SFLOAT:
return 4;
case DATA_FORMAT_R32G32_UINT:
case DATA_FORMAT_R32G32_SINT:
case DATA_FORMAT_R32G32_SFLOAT:
return 8;
case DATA_FORMAT_R32G32B32_UINT:
case DATA_FORMAT_R32G32B32_SINT:
case DATA_FORMAT_R32G32B32_SFLOAT:
return 12;
case DATA_FORMAT_R32G32B32A32_UINT:
case DATA_FORMAT_R32G32B32A32_SINT:
case DATA_FORMAT_R32G32B32A32_SFLOAT:
return 16;
case DATA_FORMAT_R64_UINT:
case DATA_FORMAT_R64_SINT:
case DATA_FORMAT_R64_SFLOAT:
return 8;
case DATA_FORMAT_R64G64_UINT:
case DATA_FORMAT_R64G64_SINT:
case DATA_FORMAT_R64G64_SFLOAT:
return 16;
case DATA_FORMAT_R64G64B64_UINT:
case DATA_FORMAT_R64G64B64_SINT:
case DATA_FORMAT_R64G64B64_SFLOAT:
return 24;
case DATA_FORMAT_R64G64B64A64_UINT:
case DATA_FORMAT_R64G64B64A64_SINT:
case DATA_FORMAT_R64G64B64A64_SFLOAT:
return 32;
default:
return 0;
}
}
uint32_t RenderingDeviceVulkan::get_image_format_pixel_size(DataFormat p_format) {
switch (p_format) {
case DATA_FORMAT_R4G4_UNORM_PACK8:
return 1;
case DATA_FORMAT_R4G4B4A4_UNORM_PACK16:
case DATA_FORMAT_B4G4R4A4_UNORM_PACK16:
case DATA_FORMAT_R5G6B5_UNORM_PACK16:
case DATA_FORMAT_B5G6R5_UNORM_PACK16:
case DATA_FORMAT_R5G5B5A1_UNORM_PACK16:
case DATA_FORMAT_B5G5R5A1_UNORM_PACK16:
case DATA_FORMAT_A1R5G5B5_UNORM_PACK16:
return 2;
case DATA_FORMAT_R8_UNORM:
case DATA_FORMAT_R8_SNORM:
case DATA_FORMAT_R8_USCALED:
case DATA_FORMAT_R8_SSCALED:
case DATA_FORMAT_R8_UINT:
case DATA_FORMAT_R8_SINT:
case DATA_FORMAT_R8_SRGB:
return 1;
case DATA_FORMAT_R8G8_UNORM:
case DATA_FORMAT_R8G8_SNORM:
case DATA_FORMAT_R8G8_USCALED:
case DATA_FORMAT_R8G8_SSCALED:
case DATA_FORMAT_R8G8_UINT:
case DATA_FORMAT_R8G8_SINT:
case DATA_FORMAT_R8G8_SRGB:
return 2;
case DATA_FORMAT_R8G8B8_UNORM:
case DATA_FORMAT_R8G8B8_SNORM:
case DATA_FORMAT_R8G8B8_USCALED:
case DATA_FORMAT_R8G8B8_SSCALED:
case DATA_FORMAT_R8G8B8_UINT:
case DATA_FORMAT_R8G8B8_SINT:
case DATA_FORMAT_R8G8B8_SRGB:
case DATA_FORMAT_B8G8R8_UNORM:
case DATA_FORMAT_B8G8R8_SNORM:
case DATA_FORMAT_B8G8R8_USCALED:
case DATA_FORMAT_B8G8R8_SSCALED:
case DATA_FORMAT_B8G8R8_UINT:
case DATA_FORMAT_B8G8R8_SINT:
case DATA_FORMAT_B8G8R8_SRGB:
return 3;
case DATA_FORMAT_R8G8B8A8_UNORM:
case DATA_FORMAT_R8G8B8A8_SNORM:
case DATA_FORMAT_R8G8B8A8_USCALED:
case DATA_FORMAT_R8G8B8A8_SSCALED:
case DATA_FORMAT_R8G8B8A8_UINT:
case DATA_FORMAT_R8G8B8A8_SINT:
case DATA_FORMAT_R8G8B8A8_SRGB:
case DATA_FORMAT_B8G8R8A8_UNORM:
case DATA_FORMAT_B8G8R8A8_SNORM:
case DATA_FORMAT_B8G8R8A8_USCALED:
case DATA_FORMAT_B8G8R8A8_SSCALED:
case DATA_FORMAT_B8G8R8A8_UINT:
case DATA_FORMAT_B8G8R8A8_SINT:
case DATA_FORMAT_B8G8R8A8_SRGB:
return 4;
case DATA_FORMAT_A8B8G8R8_UNORM_PACK32:
case DATA_FORMAT_A8B8G8R8_SNORM_PACK32:
case DATA_FORMAT_A8B8G8R8_USCALED_PACK32:
case DATA_FORMAT_A8B8G8R8_SSCALED_PACK32:
case DATA_FORMAT_A8B8G8R8_UINT_PACK32:
case DATA_FORMAT_A8B8G8R8_SINT_PACK32:
case DATA_FORMAT_A8B8G8R8_SRGB_PACK32:
case DATA_FORMAT_A2R10G10B10_UNORM_PACK32:
case DATA_FORMAT_A2R10G10B10_SNORM_PACK32:
case DATA_FORMAT_A2R10G10B10_USCALED_PACK32:
case DATA_FORMAT_A2R10G10B10_SSCALED_PACK32:
case DATA_FORMAT_A2R10G10B10_UINT_PACK32:
case DATA_FORMAT_A2R10G10B10_SINT_PACK32:
case DATA_FORMAT_A2B10G10R10_UNORM_PACK32:
case DATA_FORMAT_A2B10G10R10_SNORM_PACK32:
case DATA_FORMAT_A2B10G10R10_USCALED_PACK32:
case DATA_FORMAT_A2B10G10R10_SSCALED_PACK32:
case DATA_FORMAT_A2B10G10R10_UINT_PACK32:
case DATA_FORMAT_A2B10G10R10_SINT_PACK32:
return 4;
case DATA_FORMAT_R16_UNORM:
case DATA_FORMAT_R16_SNORM:
case DATA_FORMAT_R16_USCALED:
case DATA_FORMAT_R16_SSCALED:
case DATA_FORMAT_R16_UINT:
case DATA_FORMAT_R16_SINT:
case DATA_FORMAT_R16_SFLOAT:
return 2;
case DATA_FORMAT_R16G16_UNORM:
case DATA_FORMAT_R16G16_SNORM:
case DATA_FORMAT_R16G16_USCALED:
case DATA_FORMAT_R16G16_SSCALED:
case DATA_FORMAT_R16G16_UINT:
case DATA_FORMAT_R16G16_SINT:
case DATA_FORMAT_R16G16_SFLOAT:
return 4;
case DATA_FORMAT_R16G16B16_UNORM:
case DATA_FORMAT_R16G16B16_SNORM:
case DATA_FORMAT_R16G16B16_USCALED:
case DATA_FORMAT_R16G16B16_SSCALED:
case DATA_FORMAT_R16G16B16_UINT:
case DATA_FORMAT_R16G16B16_SINT:
case DATA_FORMAT_R16G16B16_SFLOAT:
return 6;
case DATA_FORMAT_R16G16B16A16_UNORM:
case DATA_FORMAT_R16G16B16A16_SNORM:
case DATA_FORMAT_R16G16B16A16_USCALED:
case DATA_FORMAT_R16G16B16A16_SSCALED:
case DATA_FORMAT_R16G16B16A16_UINT:
case DATA_FORMAT_R16G16B16A16_SINT:
case DATA_FORMAT_R16G16B16A16_SFLOAT:
return 8;
case DATA_FORMAT_R32_UINT:
case DATA_FORMAT_R32_SINT:
case DATA_FORMAT_R32_SFLOAT:
return 4;
case DATA_FORMAT_R32G32_UINT:
case DATA_FORMAT_R32G32_SINT:
case DATA_FORMAT_R32G32_SFLOAT:
return 8;
case DATA_FORMAT_R32G32B32_UINT:
case DATA_FORMAT_R32G32B32_SINT:
case DATA_FORMAT_R32G32B32_SFLOAT:
return 12;
case DATA_FORMAT_R32G32B32A32_UINT:
case DATA_FORMAT_R32G32B32A32_SINT:
case DATA_FORMAT_R32G32B32A32_SFLOAT:
return 16;
case DATA_FORMAT_R64_UINT:
case DATA_FORMAT_R64_SINT:
case DATA_FORMAT_R64_SFLOAT:
return 8;
case DATA_FORMAT_R64G64_UINT:
case DATA_FORMAT_R64G64_SINT:
case DATA_FORMAT_R64G64_SFLOAT:
return 16;
case DATA_FORMAT_R64G64B64_UINT:
case DATA_FORMAT_R64G64B64_SINT:
case DATA_FORMAT_R64G64B64_SFLOAT:
return 24;
case DATA_FORMAT_R64G64B64A64_UINT:
case DATA_FORMAT_R64G64B64A64_SINT:
case DATA_FORMAT_R64G64B64A64_SFLOAT:
return 32;
case DATA_FORMAT_B10G11R11_UFLOAT_PACK32:
case DATA_FORMAT_E5B9G9R9_UFLOAT_PACK32:
return 4;
case DATA_FORMAT_D16_UNORM:
return 2;
case DATA_FORMAT_X8_D24_UNORM_PACK32:
return 4;
case DATA_FORMAT_D32_SFLOAT:
return 4;
case DATA_FORMAT_S8_UINT:
return 1;
case DATA_FORMAT_D16_UNORM_S8_UINT:
return 4;
case DATA_FORMAT_D24_UNORM_S8_UINT:
return 4;
case DATA_FORMAT_D32_SFLOAT_S8_UINT:
return 5; // ?
case DATA_FORMAT_BC1_RGB_UNORM_BLOCK:
case DATA_FORMAT_BC1_RGB_SRGB_BLOCK:
case DATA_FORMAT_BC1_RGBA_UNORM_BLOCK:
case DATA_FORMAT_BC1_RGBA_SRGB_BLOCK:
case DATA_FORMAT_BC2_UNORM_BLOCK:
case DATA_FORMAT_BC2_SRGB_BLOCK:
case DATA_FORMAT_BC3_UNORM_BLOCK:
case DATA_FORMAT_BC3_SRGB_BLOCK:
case DATA_FORMAT_BC4_UNORM_BLOCK:
case DATA_FORMAT_BC4_SNORM_BLOCK:
case DATA_FORMAT_BC5_UNORM_BLOCK:
case DATA_FORMAT_BC5_SNORM_BLOCK:
case DATA_FORMAT_BC6H_UFLOAT_BLOCK:
case DATA_FORMAT_BC6H_SFLOAT_BLOCK:
case DATA_FORMAT_BC7_UNORM_BLOCK:
case DATA_FORMAT_BC7_SRGB_BLOCK:
return 1;
case DATA_FORMAT_ETC2_R8G8B8_UNORM_BLOCK:
case DATA_FORMAT_ETC2_R8G8B8_SRGB_BLOCK:
case DATA_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK:
case DATA_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK:
case DATA_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK:
case DATA_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK:
return 1;
case DATA_FORMAT_EAC_R11_UNORM_BLOCK:
case DATA_FORMAT_EAC_R11_SNORM_BLOCK:
case DATA_FORMAT_EAC_R11G11_UNORM_BLOCK:
case DATA_FORMAT_EAC_R11G11_SNORM_BLOCK:
return 1;
case DATA_FORMAT_ASTC_4x4_UNORM_BLOCK:
case DATA_FORMAT_ASTC_4x4_SRGB_BLOCK:
case DATA_FORMAT_ASTC_5x4_UNORM_BLOCK:
case DATA_FORMAT_ASTC_5x4_SRGB_BLOCK:
case DATA_FORMAT_ASTC_5x5_UNORM_BLOCK:
case DATA_FORMAT_ASTC_5x5_SRGB_BLOCK:
case DATA_FORMAT_ASTC_6x5_UNORM_BLOCK:
case DATA_FORMAT_ASTC_6x5_SRGB_BLOCK:
case DATA_FORMAT_ASTC_6x6_UNORM_BLOCK:
case DATA_FORMAT_ASTC_6x6_SRGB_BLOCK:
case DATA_FORMAT_ASTC_8x5_UNORM_BLOCK:
case DATA_FORMAT_ASTC_8x5_SRGB_BLOCK:
case DATA_FORMAT_ASTC_8x6_UNORM_BLOCK:
case DATA_FORMAT_ASTC_8x6_SRGB_BLOCK:
case DATA_FORMAT_ASTC_8x8_UNORM_BLOCK:
case DATA_FORMAT_ASTC_8x8_SRGB_BLOCK:
case DATA_FORMAT_ASTC_10x5_UNORM_BLOCK:
case DATA_FORMAT_ASTC_10x5_SRGB_BLOCK:
case DATA_FORMAT_ASTC_10x6_UNORM_BLOCK:
case DATA_FORMAT_ASTC_10x6_SRGB_BLOCK:
case DATA_FORMAT_ASTC_10x8_UNORM_BLOCK:
case DATA_FORMAT_ASTC_10x8_SRGB_BLOCK:
case DATA_FORMAT_ASTC_10x10_UNORM_BLOCK:
case DATA_FORMAT_ASTC_10x10_SRGB_BLOCK:
case DATA_FORMAT_ASTC_12x10_UNORM_BLOCK:
case DATA_FORMAT_ASTC_12x10_SRGB_BLOCK:
case DATA_FORMAT_ASTC_12x12_UNORM_BLOCK:
case DATA_FORMAT_ASTC_12x12_SRGB_BLOCK:
return 1;
case DATA_FORMAT_G8B8G8R8_422_UNORM:
case DATA_FORMAT_B8G8R8G8_422_UNORM:
return 4;
case DATA_FORMAT_G8_B8_R8_3PLANE_420_UNORM:
case DATA_FORMAT_G8_B8R8_2PLANE_420_UNORM:
case DATA_FORMAT_G8_B8_R8_3PLANE_422_UNORM:
case DATA_FORMAT_G8_B8R8_2PLANE_422_UNORM:
case DATA_FORMAT_G8_B8_R8_3PLANE_444_UNORM:
return 4;
case DATA_FORMAT_R10X6_UNORM_PACK16:
case DATA_FORMAT_R10X6G10X6_UNORM_2PACK16:
case DATA_FORMAT_R10X6G10X6B10X6A10X6_UNORM_4PACK16:
case DATA_FORMAT_G10X6B10X6G10X6R10X6_422_UNORM_4PACK16:
case DATA_FORMAT_B10X6G10X6R10X6G10X6_422_UNORM_4PACK16:
case DATA_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16:
case DATA_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16:
case DATA_FORMAT_G10X6_B10X6_R10X6_3PLANE_422_UNORM_3PACK16:
case DATA_FORMAT_G10X6_B10X6R10X6_2PLANE_422_UNORM_3PACK16:
case DATA_FORMAT_G10X6_B10X6_R10X6_3PLANE_444_UNORM_3PACK16:
case DATA_FORMAT_R12X4_UNORM_PACK16:
case DATA_FORMAT_R12X4G12X4_UNORM_2PACK16:
case DATA_FORMAT_R12X4G12X4B12X4A12X4_UNORM_4PACK16:
case DATA_FORMAT_G12X4B12X4G12X4R12X4_422_UNORM_4PACK16:
case DATA_FORMAT_B12X4G12X4R12X4G12X4_422_UNORM_4PACK16:
case DATA_FORMAT_G12X4_B12X4_R12X4_3PLANE_420_UNORM_3PACK16:
case DATA_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16:
case DATA_FORMAT_G12X4_B12X4_R12X4_3PLANE_422_UNORM_3PACK16:
case DATA_FORMAT_G12X4_B12X4R12X4_2PLANE_422_UNORM_3PACK16:
case DATA_FORMAT_G12X4_B12X4_R12X4_3PLANE_444_UNORM_3PACK16:
return 2;
case DATA_FORMAT_G16B16G16R16_422_UNORM:
case DATA_FORMAT_B16G16R16G16_422_UNORM:
case DATA_FORMAT_G16_B16_R16_3PLANE_420_UNORM:
case DATA_FORMAT_G16_B16R16_2PLANE_420_UNORM:
case DATA_FORMAT_G16_B16_R16_3PLANE_422_UNORM:
case DATA_FORMAT_G16_B16R16_2PLANE_422_UNORM:
case DATA_FORMAT_G16_B16_R16_3PLANE_444_UNORM:
return 8;
default: {
ERR_PRINT("Format not handled, bug");
}
}
return 1;
}
// https://www.khronos.org/registry/DataFormat/specs/1.1/dataformat.1.1.pdf
void RenderingDeviceVulkan::get_compressed_image_format_block_dimensions(DataFormat p_format, uint32_t &r_w, uint32_t &r_h) {
switch (p_format) {
case DATA_FORMAT_BC1_RGB_UNORM_BLOCK:
case DATA_FORMAT_BC1_RGB_SRGB_BLOCK:
case DATA_FORMAT_BC1_RGBA_UNORM_BLOCK:
case DATA_FORMAT_BC1_RGBA_SRGB_BLOCK:
case DATA_FORMAT_BC2_UNORM_BLOCK:
case DATA_FORMAT_BC2_SRGB_BLOCK:
case DATA_FORMAT_BC3_UNORM_BLOCK:
case DATA_FORMAT_BC3_SRGB_BLOCK:
case DATA_FORMAT_BC4_UNORM_BLOCK:
case DATA_FORMAT_BC4_SNORM_BLOCK:
case DATA_FORMAT_BC5_UNORM_BLOCK:
case DATA_FORMAT_BC5_SNORM_BLOCK:
case DATA_FORMAT_BC6H_UFLOAT_BLOCK:
case DATA_FORMAT_BC6H_SFLOAT_BLOCK:
case DATA_FORMAT_BC7_UNORM_BLOCK:
case DATA_FORMAT_BC7_SRGB_BLOCK:
case DATA_FORMAT_ETC2_R8G8B8_UNORM_BLOCK:
case DATA_FORMAT_ETC2_R8G8B8_SRGB_BLOCK:
case DATA_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK:
case DATA_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK:
case DATA_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK:
case DATA_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK:
case DATA_FORMAT_EAC_R11_UNORM_BLOCK:
case DATA_FORMAT_EAC_R11_SNORM_BLOCK:
case DATA_FORMAT_EAC_R11G11_UNORM_BLOCK:
case DATA_FORMAT_EAC_R11G11_SNORM_BLOCK:
case DATA_FORMAT_ASTC_4x4_UNORM_BLOCK: // Again, not sure about astc.
case DATA_FORMAT_ASTC_4x4_SRGB_BLOCK: {
r_w = 4;
r_h = 4;
} break;
case DATA_FORMAT_ASTC_5x4_UNORM_BLOCK: // Unsupported
case DATA_FORMAT_ASTC_5x4_SRGB_BLOCK:
case DATA_FORMAT_ASTC_5x5_UNORM_BLOCK:
case DATA_FORMAT_ASTC_5x5_SRGB_BLOCK:
case DATA_FORMAT_ASTC_6x5_UNORM_BLOCK:
case DATA_FORMAT_ASTC_6x5_SRGB_BLOCK:
case DATA_FORMAT_ASTC_6x6_UNORM_BLOCK:
case DATA_FORMAT_ASTC_6x6_SRGB_BLOCK:
case DATA_FORMAT_ASTC_8x5_UNORM_BLOCK:
case DATA_FORMAT_ASTC_8x5_SRGB_BLOCK:
case DATA_FORMAT_ASTC_8x6_UNORM_BLOCK:
case DATA_FORMAT_ASTC_8x6_SRGB_BLOCK: {
r_w = 4;
r_h = 4;
} break;
case DATA_FORMAT_ASTC_8x8_UNORM_BLOCK:
case DATA_FORMAT_ASTC_8x8_SRGB_BLOCK: {
r_w = 8;
r_h = 8;
} break;
case DATA_FORMAT_ASTC_10x5_UNORM_BLOCK: // Unsupported
case DATA_FORMAT_ASTC_10x5_SRGB_BLOCK:
case DATA_FORMAT_ASTC_10x6_UNORM_BLOCK:
case DATA_FORMAT_ASTC_10x6_SRGB_BLOCK:
case DATA_FORMAT_ASTC_10x8_UNORM_BLOCK:
case DATA_FORMAT_ASTC_10x8_SRGB_BLOCK:
case DATA_FORMAT_ASTC_10x10_UNORM_BLOCK:
case DATA_FORMAT_ASTC_10x10_SRGB_BLOCK:
case DATA_FORMAT_ASTC_12x10_UNORM_BLOCK:
case DATA_FORMAT_ASTC_12x10_SRGB_BLOCK:
case DATA_FORMAT_ASTC_12x12_UNORM_BLOCK:
case DATA_FORMAT_ASTC_12x12_SRGB_BLOCK:
r_w = 4;
r_h = 4;
return;
default: {
r_w = 1;
r_h = 1;
}
}
}
uint32_t RenderingDeviceVulkan::get_compressed_image_format_block_byte_size(DataFormat p_format) {
switch (p_format) {
case DATA_FORMAT_BC1_RGB_UNORM_BLOCK:
case DATA_FORMAT_BC1_RGB_SRGB_BLOCK:
case DATA_FORMAT_BC1_RGBA_UNORM_BLOCK:
case DATA_FORMAT_BC1_RGBA_SRGB_BLOCK:
return 8;
case DATA_FORMAT_BC2_UNORM_BLOCK:
case DATA_FORMAT_BC2_SRGB_BLOCK:
return 16;
case DATA_FORMAT_BC3_UNORM_BLOCK:
case DATA_FORMAT_BC3_SRGB_BLOCK:
return 16;
case DATA_FORMAT_BC4_UNORM_BLOCK:
case DATA_FORMAT_BC4_SNORM_BLOCK:
return 8;
case DATA_FORMAT_BC5_UNORM_BLOCK:
case DATA_FORMAT_BC5_SNORM_BLOCK:
return 16;
case DATA_FORMAT_BC6H_UFLOAT_BLOCK:
case DATA_FORMAT_BC6H_SFLOAT_BLOCK:
return 16;
case DATA_FORMAT_BC7_UNORM_BLOCK:
case DATA_FORMAT_BC7_SRGB_BLOCK:
return 16;
case DATA_FORMAT_ETC2_R8G8B8_UNORM_BLOCK:
case DATA_FORMAT_ETC2_R8G8B8_SRGB_BLOCK:
return 8;
case DATA_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK:
case DATA_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK:
return 8;
case DATA_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK:
case DATA_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK:
return 16;
case DATA_FORMAT_EAC_R11_UNORM_BLOCK:
case DATA_FORMAT_EAC_R11_SNORM_BLOCK:
return 8;
case DATA_FORMAT_EAC_R11G11_UNORM_BLOCK:
case DATA_FORMAT_EAC_R11G11_SNORM_BLOCK:
return 16;
case DATA_FORMAT_ASTC_4x4_UNORM_BLOCK: // Again, not sure about astc.
case DATA_FORMAT_ASTC_4x4_SRGB_BLOCK:
case DATA_FORMAT_ASTC_5x4_UNORM_BLOCK:
case DATA_FORMAT_ASTC_5x4_SRGB_BLOCK:
case DATA_FORMAT_ASTC_5x5_UNORM_BLOCK:
case DATA_FORMAT_ASTC_5x5_SRGB_BLOCK:
case DATA_FORMAT_ASTC_6x5_UNORM_BLOCK:
case DATA_FORMAT_ASTC_6x5_SRGB_BLOCK:
case DATA_FORMAT_ASTC_6x6_UNORM_BLOCK:
case DATA_FORMAT_ASTC_6x6_SRGB_BLOCK:
case DATA_FORMAT_ASTC_8x5_UNORM_BLOCK:
case DATA_FORMAT_ASTC_8x5_SRGB_BLOCK:
case DATA_FORMAT_ASTC_8x6_UNORM_BLOCK:
case DATA_FORMAT_ASTC_8x6_SRGB_BLOCK:
case DATA_FORMAT_ASTC_8x8_UNORM_BLOCK:
case DATA_FORMAT_ASTC_8x8_SRGB_BLOCK:
case DATA_FORMAT_ASTC_10x5_UNORM_BLOCK:
case DATA_FORMAT_ASTC_10x5_SRGB_BLOCK:
case DATA_FORMAT_ASTC_10x6_UNORM_BLOCK:
case DATA_FORMAT_ASTC_10x6_SRGB_BLOCK:
case DATA_FORMAT_ASTC_10x8_UNORM_BLOCK:
case DATA_FORMAT_ASTC_10x8_SRGB_BLOCK:
case DATA_FORMAT_ASTC_10x10_UNORM_BLOCK:
case DATA_FORMAT_ASTC_10x10_SRGB_BLOCK:
case DATA_FORMAT_ASTC_12x10_UNORM_BLOCK:
case DATA_FORMAT_ASTC_12x10_SRGB_BLOCK:
case DATA_FORMAT_ASTC_12x12_UNORM_BLOCK:
case DATA_FORMAT_ASTC_12x12_SRGB_BLOCK:
return 16;
default: {
}
}
return 1;
}
uint32_t RenderingDeviceVulkan::get_compressed_image_format_pixel_rshift(DataFormat p_format) {
switch (p_format) {
case DATA_FORMAT_BC1_RGB_UNORM_BLOCK: // These formats are half byte size, so rshift is 1.
case DATA_FORMAT_BC1_RGB_SRGB_BLOCK:
case DATA_FORMAT_BC1_RGBA_UNORM_BLOCK:
case DATA_FORMAT_BC1_RGBA_SRGB_BLOCK:
case DATA_FORMAT_BC4_UNORM_BLOCK:
case DATA_FORMAT_BC4_SNORM_BLOCK:
case DATA_FORMAT_ETC2_R8G8B8_UNORM_BLOCK:
case DATA_FORMAT_ETC2_R8G8B8_SRGB_BLOCK:
case DATA_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK:
case DATA_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK:
case DATA_FORMAT_EAC_R11_UNORM_BLOCK:
case DATA_FORMAT_EAC_R11_SNORM_BLOCK:
return 1;
case DATA_FORMAT_ASTC_8x8_SRGB_BLOCK:
case DATA_FORMAT_ASTC_8x8_UNORM_BLOCK: {
return 2;
}
default: {
}
}
return 0;
}
bool RenderingDeviceVulkan::format_has_stencil(DataFormat p_format) {
switch (p_format) {
case DATA_FORMAT_S8_UINT:
case DATA_FORMAT_D16_UNORM_S8_UINT:
case DATA_FORMAT_D24_UNORM_S8_UINT:
case DATA_FORMAT_D32_SFLOAT_S8_UINT: {
return true;
}
default: {
}
}
return false;
}
uint32_t RenderingDeviceVulkan::get_image_format_required_size(DataFormat p_format, uint32_t p_width, uint32_t p_height, uint32_t p_depth, uint32_t p_mipmaps, uint32_t *r_blockw, uint32_t *r_blockh, uint32_t *r_depth) {
ERR_FAIL_COND_V(p_mipmaps == 0, 0);
uint32_t w = p_width;
uint32_t h = p_height;
uint32_t d = p_depth;
uint32_t size = 0;
uint32_t pixel_size = get_image_format_pixel_size(p_format);
uint32_t pixel_rshift = get_compressed_image_format_pixel_rshift(p_format);
uint32_t blockw, blockh;
get_compressed_image_format_block_dimensions(p_format, blockw, blockh);
for (uint32_t i = 0; i < p_mipmaps; i++) {
uint32_t bw = w % blockw != 0 ? w + (blockw - w % blockw) : w;
uint32_t bh = h % blockh != 0 ? h + (blockh - h % blockh) : h;
uint32_t s = bw * bh;
s *= pixel_size;
s >>= pixel_rshift;
size += s * d;
if (r_blockw) {
*r_blockw = bw;
}
if (r_blockh) {
*r_blockh = bh;
}
if (r_depth) {
*r_depth = d;
}
w = MAX(blockw, w >> 1);
h = MAX(blockh, h >> 1);
d = MAX(1u, d >> 1);
}
return size;
}
uint32_t RenderingDeviceVulkan::get_image_required_mipmaps(uint32_t p_width, uint32_t p_height, uint32_t p_depth) {
// Formats and block size don't really matter here since they can all go down to 1px (even if block is larger).
uint32_t w = p_width;
uint32_t h = p_height;
uint32_t d = p_depth;
uint32_t mipmaps = 1;
while (true) {
if (w == 1 && h == 1 && d == 1) {
break;
}
w = MAX(1u, w >> 1);
h = MAX(1u, h >> 1);
d = MAX(1u, d >> 1);
mipmaps++;
}
return mipmaps;
}
///////////////////////
const VkCompareOp RenderingDeviceVulkan::compare_operators[RenderingDevice::COMPARE_OP_MAX] = {
VK_COMPARE_OP_NEVER,
VK_COMPARE_OP_LESS,
VK_COMPARE_OP_EQUAL,
VK_COMPARE_OP_LESS_OR_EQUAL,
VK_COMPARE_OP_GREATER,
VK_COMPARE_OP_NOT_EQUAL,
VK_COMPARE_OP_GREATER_OR_EQUAL,
VK_COMPARE_OP_ALWAYS
};
const VkStencilOp RenderingDeviceVulkan::stencil_operations[RenderingDevice::STENCIL_OP_MAX] = {
VK_STENCIL_OP_KEEP,
VK_STENCIL_OP_ZERO,
VK_STENCIL_OP_REPLACE,
VK_STENCIL_OP_INCREMENT_AND_CLAMP,
VK_STENCIL_OP_DECREMENT_AND_CLAMP,
VK_STENCIL_OP_INVERT,
VK_STENCIL_OP_INCREMENT_AND_WRAP,
VK_STENCIL_OP_DECREMENT_AND_WRAP
};
const VkSampleCountFlagBits RenderingDeviceVulkan::rasterization_sample_count[RenderingDevice::TEXTURE_SAMPLES_MAX] = {
VK_SAMPLE_COUNT_1_BIT,
VK_SAMPLE_COUNT_2_BIT,
VK_SAMPLE_COUNT_4_BIT,
VK_SAMPLE_COUNT_8_BIT,
VK_SAMPLE_COUNT_16_BIT,
VK_SAMPLE_COUNT_32_BIT,
VK_SAMPLE_COUNT_64_BIT,
};
const VkLogicOp RenderingDeviceVulkan::logic_operations[RenderingDevice::LOGIC_OP_MAX] = {
VK_LOGIC_OP_CLEAR,
VK_LOGIC_OP_AND,
VK_LOGIC_OP_AND_REVERSE,
VK_LOGIC_OP_COPY,
VK_LOGIC_OP_AND_INVERTED,
VK_LOGIC_OP_NO_OP,
VK_LOGIC_OP_XOR,
VK_LOGIC_OP_OR,
VK_LOGIC_OP_NOR,
VK_LOGIC_OP_EQUIVALENT,
VK_LOGIC_OP_INVERT,
VK_LOGIC_OP_OR_REVERSE,
VK_LOGIC_OP_COPY_INVERTED,
VK_LOGIC_OP_OR_INVERTED,
VK_LOGIC_OP_NAND,
VK_LOGIC_OP_SET
};
const VkBlendFactor RenderingDeviceVulkan::blend_factors[RenderingDevice::BLEND_FACTOR_MAX] = {
VK_BLEND_FACTOR_ZERO,
VK_BLEND_FACTOR_ONE,
VK_BLEND_FACTOR_SRC_COLOR,
VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR,
VK_BLEND_FACTOR_DST_COLOR,
VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR,
VK_BLEND_FACTOR_SRC_ALPHA,
VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA,
VK_BLEND_FACTOR_DST_ALPHA,
VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA,
VK_BLEND_FACTOR_CONSTANT_COLOR,
VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR,
VK_BLEND_FACTOR_CONSTANT_ALPHA,
VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA,
VK_BLEND_FACTOR_SRC_ALPHA_SATURATE,
VK_BLEND_FACTOR_SRC1_COLOR,
VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR,
VK_BLEND_FACTOR_SRC1_ALPHA,
VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA
};
const VkBlendOp RenderingDeviceVulkan::blend_operations[RenderingDevice::BLEND_OP_MAX] = {
VK_BLEND_OP_ADD,
VK_BLEND_OP_SUBTRACT,
VK_BLEND_OP_REVERSE_SUBTRACT,
VK_BLEND_OP_MIN,
VK_BLEND_OP_MAX
};
const VkSamplerAddressMode RenderingDeviceVulkan::address_modes[RenderingDevice::SAMPLER_REPEAT_MODE_MAX] = {
VK_SAMPLER_ADDRESS_MODE_REPEAT,
VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT,
VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE
};
const VkBorderColor RenderingDeviceVulkan::sampler_border_colors[RenderingDevice::SAMPLER_BORDER_COLOR_MAX] = {
VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK,
VK_BORDER_COLOR_INT_TRANSPARENT_BLACK,
VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK,
VK_BORDER_COLOR_INT_OPAQUE_BLACK,
VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE,
VK_BORDER_COLOR_INT_OPAQUE_WHITE
};
const VkImageType RenderingDeviceVulkan::vulkan_image_type[RenderingDevice::TEXTURE_TYPE_MAX] = {
VK_IMAGE_TYPE_1D,
VK_IMAGE_TYPE_2D,
VK_IMAGE_TYPE_3D,
VK_IMAGE_TYPE_2D,
VK_IMAGE_TYPE_1D,
VK_IMAGE_TYPE_2D,
VK_IMAGE_TYPE_2D
};
/***************************/
/**** BUFFER MANAGEMENT ****/
/***************************/
Error RenderingDeviceVulkan::_buffer_allocate(Buffer *p_buffer, uint32_t p_size, uint32_t p_usage, VmaMemoryUsage p_mem_usage, VmaAllocationCreateFlags p_mem_flags) {
VkBufferCreateInfo bufferInfo;
bufferInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
bufferInfo.pNext = nullptr;
bufferInfo.flags = 0;
bufferInfo.size = p_size;
bufferInfo.usage = p_usage;
bufferInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
bufferInfo.queueFamilyIndexCount = 0;
bufferInfo.pQueueFamilyIndices = nullptr;
VmaAllocationCreateInfo allocInfo;
allocInfo.flags = p_mem_flags;
allocInfo.usage = p_mem_usage;
allocInfo.requiredFlags = 0;
allocInfo.preferredFlags = 0;
allocInfo.memoryTypeBits = 0;
allocInfo.pool = nullptr;
allocInfo.pUserData = nullptr;
if (p_size <= SMALL_ALLOCATION_MAX_SIZE) {
uint32_t mem_type_index = 0;
vmaFindMemoryTypeIndexForBufferInfo(allocator, &bufferInfo, &allocInfo, &mem_type_index);
allocInfo.pool = _find_or_create_small_allocs_pool(mem_type_index);
}
VkResult err = vmaCreateBuffer(allocator, &bufferInfo, &allocInfo, &p_buffer->buffer, &p_buffer->allocation, nullptr);
ERR_FAIL_COND_V_MSG(err, ERR_CANT_CREATE, "Can't create buffer of size: " + itos(p_size) + ", error " + itos(err) + ".");
p_buffer->size = p_size;
p_buffer->buffer_info.buffer = p_buffer->buffer;
p_buffer->buffer_info.offset = 0;
p_buffer->buffer_info.range = p_size;
p_buffer->usage = p_usage;
buffer_memory += p_size;
return OK;
}
Error RenderingDeviceVulkan::_buffer_free(Buffer *p_buffer) {
ERR_FAIL_COND_V(p_buffer->size == 0, ERR_INVALID_PARAMETER);
buffer_memory -= p_buffer->size;
vmaDestroyBuffer(allocator, p_buffer->buffer, p_buffer->allocation);
p_buffer->buffer = VK_NULL_HANDLE;
p_buffer->allocation = nullptr;
p_buffer->size = 0;
return OK;
}
Error RenderingDeviceVulkan::_insert_staging_block() {
VkBufferCreateInfo bufferInfo;
bufferInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
bufferInfo.pNext = nullptr;
bufferInfo.flags = 0;
bufferInfo.size = staging_buffer_block_size;
bufferInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
bufferInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
bufferInfo.queueFamilyIndexCount = 0;
bufferInfo.pQueueFamilyIndices = nullptr;
VmaAllocationCreateInfo allocInfo;
allocInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT;
allocInfo.usage = VMA_MEMORY_USAGE_AUTO_PREFER_HOST;
allocInfo.requiredFlags = 0;
allocInfo.preferredFlags = 0;
allocInfo.memoryTypeBits = 0;
allocInfo.pool = nullptr;
allocInfo.pUserData = nullptr;
StagingBufferBlock block;
VkResult err = vmaCreateBuffer(allocator, &bufferInfo, &allocInfo, &block.buffer, &block.allocation, nullptr);
ERR_FAIL_COND_V_MSG(err, ERR_CANT_CREATE, "vmaCreateBuffer failed with error " + itos(err) + ".");
block.frame_used = 0;
block.fill_amount = 0;
staging_buffer_blocks.insert(staging_buffer_current, block);
return OK;
}
Error RenderingDeviceVulkan::_staging_buffer_allocate(uint32_t p_amount, uint32_t p_required_align, uint32_t &r_alloc_offset, uint32_t &r_alloc_size, bool p_can_segment) {
// Determine a block to use.
r_alloc_size = p_amount;
while (true) {
r_alloc_offset = 0;
// See if we can use current block.
if (staging_buffer_blocks[staging_buffer_current].frame_used == frames_drawn) {
// We used this block this frame, let's see if there is still room.
uint32_t write_from = staging_buffer_blocks[staging_buffer_current].fill_amount;
{
uint32_t align_remainder = write_from % p_required_align;
if (align_remainder != 0) {
write_from += p_required_align - align_remainder;
}
}
int32_t available_bytes = int32_t(staging_buffer_block_size) - int32_t(write_from);
if ((int32_t)p_amount < available_bytes) {
// All is good, we should be ok, all will fit.
r_alloc_offset = write_from;
} else if (p_can_segment && available_bytes >= (int32_t)p_required_align) {
// Ok all won't fit but at least we can fit a chunkie.
// All is good, update what needs to be written to.
r_alloc_offset = write_from;
r_alloc_size = available_bytes - (available_bytes % p_required_align);
} else {
// Can't fit it into this buffer.
// Will need to try next buffer.
staging_buffer_current = (staging_buffer_current + 1) % staging_buffer_blocks.size();
// Before doing anything, though, let's check that we didn't manage to fill all blocks.
// Possible in a single frame.
if (staging_buffer_blocks[staging_buffer_current].frame_used == frames_drawn) {
// Guess we did.. ok, let's see if we can insert a new block.
if ((uint64_t)staging_buffer_blocks.size() * staging_buffer_block_size < staging_buffer_max_size) {
// We can, so we are safe.
Error err = _insert_staging_block();
if (err) {
return err;
}
// Claim for this frame.
staging_buffer_blocks.write[staging_buffer_current].frame_used = frames_drawn;
} else {
// Ok, worst case scenario, all the staging buffers belong to this frame
// and this frame is not even done.
// If this is the main thread, it means the user is likely loading a lot of resources at once,.
// Otherwise, the thread should just be blocked until the next frame (currently unimplemented).
if (false) { // Separate thread from render.
//block_until_next_frame()
continue;
} else {
// Flush EVERYTHING including setup commands. IF not immediate, also need to flush the draw commands.
_flush(true);
// Clear the whole staging buffer.
for (int i = 0; i < staging_buffer_blocks.size(); i++) {
staging_buffer_blocks.write[i].frame_used = 0;
staging_buffer_blocks.write[i].fill_amount = 0;
}
// Claim current.
staging_buffer_blocks.write[staging_buffer_current].frame_used = frames_drawn;
}
}
} else {
// Not from current frame, so continue and try again.
continue;
}
}
} else if (staging_buffer_blocks[staging_buffer_current].frame_used <= frames_drawn - frame_count) {
// This is an old block, which was already processed, let's reuse.
staging_buffer_blocks.write[staging_buffer_current].frame_used = frames_drawn;
staging_buffer_blocks.write[staging_buffer_current].fill_amount = 0;
} else {
// This block may still be in use, let's not touch it unless we have to, so.. can we create a new one?
if ((uint64_t)staging_buffer_blocks.size() * staging_buffer_block_size < staging_buffer_max_size) {
// We are still allowed to create a new block, so let's do that and insert it for current pos.
Error err = _insert_staging_block();
if (err) {
return err;
}
// Claim for this frame.
staging_buffer_blocks.write[staging_buffer_current].frame_used = frames_drawn;
} else {
// Oops, we are out of room and we can't create more.
// Let's flush older frames.
// The logic here is that if a game is loading a lot of data from the main thread, it will need to be stalled anyway.
// If loading from a separate thread, we can block that thread until next frame when more room is made (not currently implemented, though).
if (false) {
// Separate thread from render.
//block_until_next_frame()
continue; // And try again.
} else {
_flush(false);
for (int i = 0; i < staging_buffer_blocks.size(); i++) {
// Clear all blocks but the ones from this frame.
int block_idx = (i + staging_buffer_current) % staging_buffer_blocks.size();
if (staging_buffer_blocks[block_idx].frame_used == frames_drawn) {
break; // Ok, we reached something from this frame, abort.
}
staging_buffer_blocks.write[block_idx].frame_used = 0;
staging_buffer_blocks.write[block_idx].fill_amount = 0;
}
// Claim for current frame.
staging_buffer_blocks.write[staging_buffer_current].frame_used = frames_drawn;
}
}
}
// All was good, break.
break;
}
staging_buffer_used = true;
return OK;
}
Error RenderingDeviceVulkan::_buffer_update(Buffer *p_buffer, size_t p_offset, const uint8_t *p_data, size_t p_data_size, bool p_use_draw_command_buffer, uint32_t p_required_align) {
// Submitting may get chunked for various reasons, so convert this to a task.
size_t to_submit = p_data_size;
size_t submit_from = 0;
while (to_submit > 0) {
uint32_t block_write_offset;
uint32_t block_write_amount;
Error err = _staging_buffer_allocate(MIN(to_submit, staging_buffer_block_size), p_required_align, block_write_offset, block_write_amount);
if (err) {
return err;
}
// Map staging buffer (It's CPU and coherent).
void *data_ptr = nullptr;
{
VkResult vkerr = vmaMapMemory(allocator, staging_buffer_blocks[staging_buffer_current].allocation, &data_ptr);
ERR_FAIL_COND_V_MSG(vkerr, ERR_CANT_CREATE, "vmaMapMemory failed with error " + itos(vkerr) + ".");
}
// Copy to staging buffer.
memcpy(((uint8_t *)data_ptr) + block_write_offset, p_data + submit_from, block_write_amount);
// Unmap.
vmaUnmapMemory(allocator, staging_buffer_blocks[staging_buffer_current].allocation);
// Insert a command to copy this.
VkBufferCopy region;
region.srcOffset = block_write_offset;
region.dstOffset = submit_from + p_offset;
region.size = block_write_amount;
vkCmdCopyBuffer(p_use_draw_command_buffer ? frames[frame].draw_command_buffer : frames[frame].setup_command_buffer, staging_buffer_blocks[staging_buffer_current].buffer, p_buffer->buffer, 1, &region);
staging_buffer_blocks.write[staging_buffer_current].fill_amount = block_write_offset + block_write_amount;
to_submit -= block_write_amount;
submit_from += block_write_amount;
}
return OK;
}
void RenderingDeviceVulkan::_memory_barrier(VkPipelineStageFlags p_src_stage_mask, VkPipelineStageFlags p_dst_stage_mask, VkAccessFlags p_src_access, VkAccessFlags p_dst_access, bool p_sync_with_draw) {
VkMemoryBarrier mem_barrier;
mem_barrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
mem_barrier.pNext = nullptr;
mem_barrier.srcAccessMask = p_src_access;
mem_barrier.dstAccessMask = p_dst_access;
if (p_src_stage_mask == 0 || p_dst_stage_mask == 0) {
return; // No barrier, since this is invalid.
}
vkCmdPipelineBarrier(p_sync_with_draw ? frames[frame].draw_command_buffer : frames[frame].setup_command_buffer, p_src_stage_mask, p_dst_stage_mask, 0, 1, &mem_barrier, 0, nullptr, 0, nullptr);
}
void RenderingDeviceVulkan::_full_barrier(bool p_sync_with_draw) {
// Used for debug.
_memory_barrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
VK_ACCESS_INDIRECT_COMMAND_READ_BIT |
VK_ACCESS_INDEX_READ_BIT |
VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT |
VK_ACCESS_UNIFORM_READ_BIT |
VK_ACCESS_INPUT_ATTACHMENT_READ_BIT |
VK_ACCESS_SHADER_READ_BIT |
VK_ACCESS_SHADER_WRITE_BIT |
VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
VK_ACCESS_TRANSFER_READ_BIT |
VK_ACCESS_TRANSFER_WRITE_BIT |
VK_ACCESS_HOST_READ_BIT |
VK_ACCESS_HOST_WRITE_BIT,
VK_ACCESS_INDIRECT_COMMAND_READ_BIT |
VK_ACCESS_INDEX_READ_BIT |
VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT |
VK_ACCESS_UNIFORM_READ_BIT |
VK_ACCESS_INPUT_ATTACHMENT_READ_BIT |
VK_ACCESS_SHADER_READ_BIT |
VK_ACCESS_SHADER_WRITE_BIT |
VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
VK_ACCESS_TRANSFER_READ_BIT |
VK_ACCESS_TRANSFER_WRITE_BIT |
VK_ACCESS_HOST_READ_BIT |
VK_ACCESS_HOST_WRITE_BIT,
p_sync_with_draw);
}
void RenderingDeviceVulkan::_buffer_memory_barrier(VkBuffer buffer, uint64_t p_from, uint64_t p_size, VkPipelineStageFlags p_src_stage_mask, VkPipelineStageFlags p_dst_stage_mask, VkAccessFlags p_src_access, VkAccessFlags p_dst_access, bool p_sync_with_draw) {
VkBufferMemoryBarrier buffer_mem_barrier;
buffer_mem_barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
buffer_mem_barrier.pNext = nullptr;
buffer_mem_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
buffer_mem_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
buffer_mem_barrier.srcAccessMask = p_src_access;
buffer_mem_barrier.dstAccessMask = p_dst_access;
buffer_mem_barrier.buffer = buffer;
buffer_mem_barrier.offset = p_from;
buffer_mem_barrier.size = p_size;
vkCmdPipelineBarrier(p_sync_with_draw ? frames[frame].draw_command_buffer : frames[frame].setup_command_buffer, p_src_stage_mask, p_dst_stage_mask, 0, 0, nullptr, 1, &buffer_mem_barrier, 0, nullptr);
}
/*****************/
/**** TEXTURE ****/
/*****************/
RID RenderingDeviceVulkan::texture_create(const TextureFormat &p_format, const TextureView &p_view, const Vector<Vector<uint8_t>> &p_data) {
_THREAD_SAFE_METHOD_
VkImageCreateInfo image_create_info;
image_create_info.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO;
image_create_info.pNext = nullptr;
image_create_info.flags = 0;
VkImageFormatListCreateInfoKHR format_list_create_info; // Keep out of the if, needed for creation.
Vector<VkFormat> allowed_formats; // Keep out of the if, needed for creation.
if (p_format.shareable_formats.size()) {
image_create_info.flags |= VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT;
if (context->is_device_extension_enabled(VK_KHR_IMAGE_FORMAT_LIST_EXTENSION_NAME)) {
for (int i = 0; i < p_format.shareable_formats.size(); i++) {
allowed_formats.push_back(vulkan_formats[p_format.shareable_formats[i]]);
}
format_list_create_info.sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_LIST_CREATE_INFO_KHR;
format_list_create_info.pNext = nullptr;
format_list_create_info.viewFormatCount = allowed_formats.size();
format_list_create_info.pViewFormats = allowed_formats.ptr();
image_create_info.pNext = &format_list_create_info;
ERR_FAIL_COND_V_MSG(p_format.shareable_formats.find(p_format.format) == -1, RID(),
"If supplied a list of shareable formats, the current format must be present in the list");
ERR_FAIL_COND_V_MSG(p_view.format_override != DATA_FORMAT_MAX && p_format.shareable_formats.find(p_view.format_override) == -1, RID(),
"If supplied a list of shareable formats, the current view format override must be present in the list");
}
}
if (p_format.texture_type == TEXTURE_TYPE_CUBE || p_format.texture_type == TEXTURE_TYPE_CUBE_ARRAY) {
image_create_info.flags |= VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT;
}
/*if (p_format.type == TEXTURE_TYPE_2D || p_format.type == TEXTURE_TYPE_2D_ARRAY) {
image_create_info.flags |= VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT;
}*/
ERR_FAIL_INDEX_V(p_format.texture_type, TEXTURE_TYPE_MAX, RID());
image_create_info.imageType = vulkan_image_type[p_format.texture_type];
ERR_FAIL_COND_V_MSG(p_format.width < 1, RID(), "Width must be equal or greater than 1 for all textures");
image_create_info.format = vulkan_formats[p_format.format];
image_create_info.extent.width = p_format.width;
if (image_create_info.imageType == VK_IMAGE_TYPE_3D || image_create_info.imageType == VK_IMAGE_TYPE_2D) {
ERR_FAIL_COND_V_MSG(p_format.height < 1, RID(), "Height must be equal or greater than 1 for 2D and 3D textures");
image_create_info.extent.height = p_format.height;
} else {
image_create_info.extent.height = 1;
}
if (image_create_info.imageType == VK_IMAGE_TYPE_3D) {
ERR_FAIL_COND_V_MSG(p_format.depth < 1, RID(), "Depth must be equal or greater than 1 for 3D textures");
image_create_info.extent.depth = p_format.depth;
} else {
image_create_info.extent.depth = 1;
}
ERR_FAIL_COND_V(p_format.mipmaps < 1, RID());
image_create_info.mipLevels = p_format.mipmaps;
if (p_format.texture_type == TEXTURE_TYPE_1D_ARRAY || p_format.texture_type == TEXTURE_TYPE_2D_ARRAY || p_format.texture_type == TEXTURE_TYPE_CUBE_ARRAY || p_format.texture_type == TEXTURE_TYPE_CUBE) {
ERR_FAIL_COND_V_MSG(p_format.array_layers < 1, RID(),
"Amount of layers must be equal or greater than 1 for arrays and cubemaps.");
ERR_FAIL_COND_V_MSG((p_format.texture_type == TEXTURE_TYPE_CUBE_ARRAY || p_format.texture_type == TEXTURE_TYPE_CUBE) && (p_format.array_layers % 6) != 0, RID(),
"Cubemap and cubemap array textures must provide a layer number that is multiple of 6");
image_create_info.arrayLayers = p_format.array_layers;
} else {
image_create_info.arrayLayers = 1;
}
ERR_FAIL_INDEX_V(p_format.samples, TEXTURE_SAMPLES_MAX, RID());
image_create_info.samples = _ensure_supported_sample_count(p_format.samples);
image_create_info.tiling = (p_format.usage_bits & TEXTURE_USAGE_CPU_READ_BIT) ? VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL;
// Usage.
image_create_info.usage = 0;
if (p_format.usage_bits & TEXTURE_USAGE_SAMPLING_BIT) {
image_create_info.usage |= VK_IMAGE_USAGE_SAMPLED_BIT;
}
if (p_format.usage_bits & TEXTURE_USAGE_STORAGE_BIT) {
image_create_info.usage |= VK_IMAGE_USAGE_STORAGE_BIT;
}
if (p_format.usage_bits & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT) {
image_create_info.usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
}
if (p_format.usage_bits & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) {
image_create_info.usage |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT;
}
if (p_format.usage_bits & TEXTURE_USAGE_INPUT_ATTACHMENT_BIT) {
image_create_info.usage |= VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT;
}
if (p_format.usage_bits & TEXTURE_USAGE_VRS_ATTACHMENT_BIT) {
image_create_info.usage |= VK_IMAGE_USAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR;
}
if (p_format.usage_bits & TEXTURE_USAGE_CAN_UPDATE_BIT) {
image_create_info.usage |= VK_IMAGE_USAGE_TRANSFER_DST_BIT;
}
if (p_format.usage_bits & TEXTURE_USAGE_CAN_COPY_FROM_BIT) {
image_create_info.usage |= VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
}
if (p_format.usage_bits & TEXTURE_USAGE_CAN_COPY_TO_BIT) {
image_create_info.usage |= VK_IMAGE_USAGE_TRANSFER_DST_BIT;
}
image_create_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
image_create_info.queueFamilyIndexCount = 0;
image_create_info.pQueueFamilyIndices = nullptr;
image_create_info.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
uint32_t required_mipmaps = get_image_required_mipmaps(image_create_info.extent.width, image_create_info.extent.height, image_create_info.extent.depth);
ERR_FAIL_COND_V_MSG(required_mipmaps < image_create_info.mipLevels, RID(),
"Too many mipmaps requested for texture format and dimensions (" + itos(image_create_info.mipLevels) + "), maximum allowed: (" + itos(required_mipmaps) + ").");
if (p_data.size()) {
ERR_FAIL_COND_V_MSG(!(p_format.usage_bits & TEXTURE_USAGE_CAN_UPDATE_BIT), RID(),
"Texture needs the TEXTURE_USAGE_CAN_UPDATE_BIT usage flag in order to be updated at initialization or later");
int expected_images = image_create_info.arrayLayers;
ERR_FAIL_COND_V_MSG(p_data.size() != expected_images, RID(),
"Default supplied data for image format is of invalid length (" + itos(p_data.size()) + "), should be (" + itos(expected_images) + ").");
for (uint32_t i = 0; i < image_create_info.arrayLayers; i++) {
uint32_t required_size = get_image_format_required_size(p_format.format, image_create_info.extent.width, image_create_info.extent.height, image_create_info.extent.depth, image_create_info.mipLevels);
ERR_FAIL_COND_V_MSG((uint32_t)p_data[i].size() != required_size, RID(),
"Data for slice index " + itos(i) + " (mapped to layer " + itos(i) + ") differs in size (supplied: " + itos(p_data[i].size()) + ") than what is required by the format (" + itos(required_size) + ").");
}
}
{
// Validate that this image is supported for the intended use.
VkFormatProperties properties;
vkGetPhysicalDeviceFormatProperties(context->get_physical_device(), image_create_info.format, &properties);
VkFormatFeatureFlags flags;
String format_text = "'" + String(named_formats[p_format.format]) + "'";
if (p_format.usage_bits & TEXTURE_USAGE_CPU_READ_BIT) {
flags = properties.linearTilingFeatures;
format_text += " (with CPU read bit)";
} else {
flags = properties.optimalTilingFeatures;
}
if (p_format.usage_bits & TEXTURE_USAGE_SAMPLING_BIT && !(flags & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT)) {
ERR_FAIL_V_MSG(RID(), "Format " + format_text + " does not support usage as sampling texture.");
}
if (p_format.usage_bits & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT && !(flags & VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT)) {
ERR_FAIL_V_MSG(RID(), "Format " + format_text + " does not support usage as color attachment.");
}
if (p_format.usage_bits & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT && !(flags & VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT)) {
ERR_FAIL_V_MSG(RID(), "Format " + format_text + " does not support usage as depth-stencil attachment.");
}
if (p_format.usage_bits & TEXTURE_USAGE_STORAGE_BIT && !(flags & VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT)) {
ERR_FAIL_V_MSG(RID(), "Format " + format_text + " does not support usage as storage image.");
}
if (p_format.usage_bits & TEXTURE_USAGE_STORAGE_ATOMIC_BIT && !(flags & VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT)) {
ERR_FAIL_V_MSG(RID(), "Format " + format_text + " does not support usage as atomic storage image.");
}
// Validation via VK_FORMAT_FEATURE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR fails if VRS attachment is not supported.
if (p_format.usage_bits & TEXTURE_USAGE_VRS_ATTACHMENT_BIT && p_format.format != DATA_FORMAT_R8_UINT) {
ERR_FAIL_V_MSG(RID(), "Format " + format_text + " does not support usage as VRS attachment.");
}
}
// Some view validation.
if (p_view.format_override != DATA_FORMAT_MAX) {
ERR_FAIL_INDEX_V(p_view.format_override, DATA_FORMAT_MAX, RID());
}
ERR_FAIL_INDEX_V(p_view.swizzle_r, TEXTURE_SWIZZLE_MAX, RID());
ERR_FAIL_INDEX_V(p_view.swizzle_g, TEXTURE_SWIZZLE_MAX, RID());
ERR_FAIL_INDEX_V(p_view.swizzle_b, TEXTURE_SWIZZLE_MAX, RID());
ERR_FAIL_INDEX_V(p_view.swizzle_a, TEXTURE_SWIZZLE_MAX, RID());
// Allocate memory.
uint32_t width, height;
uint32_t image_size = get_image_format_required_size(p_format.format, p_format.width, p_format.height, p_format.depth, p_format.mipmaps, &width, &height);
VmaAllocationCreateInfo allocInfo;
allocInfo.flags = (p_format.usage_bits & TEXTURE_USAGE_CPU_READ_BIT) ? VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT : 0;
allocInfo.pool = nullptr;
allocInfo.usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE;
allocInfo.requiredFlags = 0;
allocInfo.preferredFlags = 0;
allocInfo.memoryTypeBits = 0;
allocInfo.pUserData = nullptr;
if (image_size <= SMALL_ALLOCATION_MAX_SIZE) {
uint32_t mem_type_index = 0;
vmaFindMemoryTypeIndexForImageInfo(allocator, &image_create_info, &allocInfo, &mem_type_index);
allocInfo.pool = _find_or_create_small_allocs_pool(mem_type_index);
}
Texture texture;
VkResult err = vmaCreateImage(allocator, &image_create_info, &allocInfo, &texture.image, &texture.allocation, &texture.allocation_info);
ERR_FAIL_COND_V_MSG(err, RID(), "vmaCreateImage failed with error " + itos(err) + ".");
image_memory += texture.allocation_info.size;
texture.type = p_format.texture_type;
texture.format = p_format.format;
texture.width = image_create_info.extent.width;
texture.height = image_create_info.extent.height;
texture.depth = image_create_info.extent.depth;
texture.layers = image_create_info.arrayLayers;
texture.mipmaps = image_create_info.mipLevels;
texture.base_mipmap = 0;
texture.base_layer = 0;
texture.is_resolve_buffer = p_format.is_resolve_buffer;
texture.usage_flags = p_format.usage_bits;
texture.samples = p_format.samples;
texture.allowed_shared_formats = p_format.shareable_formats;
// Set base layout based on usage priority.
if (p_format.usage_bits & TEXTURE_USAGE_SAMPLING_BIT) {
// First priority, readable.
texture.layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
} else if (p_format.usage_bits & TEXTURE_USAGE_STORAGE_BIT) {
// Second priority, storage.
texture.layout = VK_IMAGE_LAYOUT_GENERAL;
} else if (p_format.usage_bits & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT) {
// Third priority, color or depth.
texture.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
} else if (p_format.usage_bits & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) {
texture.layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
} else {
texture.layout = VK_IMAGE_LAYOUT_GENERAL;
}
if (p_format.usage_bits & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) {
texture.read_aspect_mask = VK_IMAGE_ASPECT_DEPTH_BIT;
texture.barrier_aspect_mask = VK_IMAGE_ASPECT_DEPTH_BIT;
if (format_has_stencil(p_format.format)) {
texture.barrier_aspect_mask |= VK_IMAGE_ASPECT_STENCIL_BIT;
}
} else {
texture.read_aspect_mask = VK_IMAGE_ASPECT_COLOR_BIT;
texture.barrier_aspect_mask = VK_IMAGE_ASPECT_COLOR_BIT;
}
texture.bound = false;
// Create view.
VkImageViewCreateInfo image_view_create_info;
image_view_create_info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
image_view_create_info.pNext = nullptr;
image_view_create_info.flags = 0;
image_view_create_info.image = texture.image;
static const VkImageViewType view_types[TEXTURE_TYPE_MAX] = {
VK_IMAGE_VIEW_TYPE_1D,
VK_IMAGE_VIEW_TYPE_2D,
VK_IMAGE_VIEW_TYPE_3D,
VK_IMAGE_VIEW_TYPE_CUBE,
VK_IMAGE_VIEW_TYPE_1D_ARRAY,
VK_IMAGE_VIEW_TYPE_2D_ARRAY,
VK_IMAGE_VIEW_TYPE_CUBE_ARRAY,
};
image_view_create_info.viewType = view_types[p_format.texture_type];
if (p_view.format_override == DATA_FORMAT_MAX) {
image_view_create_info.format = image_create_info.format;
} else {
image_view_create_info.format = vulkan_formats[p_view.format_override];
}
static const VkComponentSwizzle component_swizzles[TEXTURE_SWIZZLE_MAX] = {
VK_COMPONENT_SWIZZLE_IDENTITY,
VK_COMPONENT_SWIZZLE_ZERO,
VK_COMPONENT_SWIZZLE_ONE,
VK_COMPONENT_SWIZZLE_R,
VK_COMPONENT_SWIZZLE_G,
VK_COMPONENT_SWIZZLE_B,
VK_COMPONENT_SWIZZLE_A
};
image_view_create_info.components.r = component_swizzles[p_view.swizzle_r];
image_view_create_info.components.g = component_swizzles[p_view.swizzle_g];
image_view_create_info.components.b = component_swizzles[p_view.swizzle_b];
image_view_create_info.components.a = component_swizzles[p_view.swizzle_a];
image_view_create_info.subresourceRange.baseMipLevel = 0;
image_view_create_info.subresourceRange.levelCount = image_create_info.mipLevels;
image_view_create_info.subresourceRange.baseArrayLayer = 0;
image_view_create_info.subresourceRange.layerCount = image_create_info.arrayLayers;
if (p_format.usage_bits & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) {
image_view_create_info.subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
} else {
image_view_create_info.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
}
err = vkCreateImageView(device, &image_view_create_info, nullptr, &texture.view);
if (err) {
vmaDestroyImage(allocator, texture.image, texture.allocation);
ERR_FAIL_V_MSG(RID(), "vkCreateImageView failed with error " + itos(err) + ".");
}
// Barrier to set layout.
{
VkImageMemoryBarrier image_memory_barrier;
image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
image_memory_barrier.pNext = nullptr;
image_memory_barrier.srcAccessMask = 0;
image_memory_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
image_memory_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
image_memory_barrier.newLayout = texture.layout;
image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.image = texture.image;
image_memory_barrier.subresourceRange.aspectMask = texture.barrier_aspect_mask;
image_memory_barrier.subresourceRange.baseMipLevel = 0;
image_memory_barrier.subresourceRange.levelCount = image_create_info.mipLevels;
image_memory_barrier.subresourceRange.baseArrayLayer = 0;
image_memory_barrier.subresourceRange.layerCount = image_create_info.arrayLayers;
vkCmdPipelineBarrier(frames[frame].setup_command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier);
}
RID id = texture_owner.make_rid(texture);
#ifdef DEV_ENABLED
set_resource_name(id, "RID:" + itos(id.get_id()));
#endif
if (p_data.size()) {
for (uint32_t i = 0; i < image_create_info.arrayLayers; i++) {
_texture_update(id, i, p_data[i], RD::BARRIER_MASK_ALL_BARRIERS, true);
}
}
return id;
}
RID RenderingDeviceVulkan::texture_create_shared(const TextureView &p_view, RID p_with_texture) {
_THREAD_SAFE_METHOD_
Texture *src_texture = texture_owner.get_or_null(p_with_texture);
ERR_FAIL_COND_V(!src_texture, RID());
if (src_texture->owner.is_valid()) { // Ahh this is a share.
p_with_texture = src_texture->owner;
src_texture = texture_owner.get_or_null(src_texture->owner);
ERR_FAIL_COND_V(!src_texture, RID()); // This is a bug.
}
// Create view.
Texture texture = *src_texture;
VkImageViewCreateInfo image_view_create_info;
image_view_create_info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
image_view_create_info.pNext = nullptr;
image_view_create_info.flags = 0;
image_view_create_info.image = texture.image;
static const VkImageViewType view_types[TEXTURE_TYPE_MAX] = {
VK_IMAGE_VIEW_TYPE_1D,
VK_IMAGE_VIEW_TYPE_2D,
VK_IMAGE_VIEW_TYPE_3D,
VK_IMAGE_VIEW_TYPE_CUBE,
VK_IMAGE_VIEW_TYPE_1D_ARRAY,
VK_IMAGE_VIEW_TYPE_2D_ARRAY,
VK_IMAGE_VIEW_TYPE_CUBE_ARRAY,
};
image_view_create_info.viewType = view_types[texture.type];
if (p_view.format_override == DATA_FORMAT_MAX || p_view.format_override == texture.format) {
image_view_create_info.format = vulkan_formats[texture.format];
} else {
ERR_FAIL_INDEX_V(p_view.format_override, DATA_FORMAT_MAX, RID());
ERR_FAIL_COND_V_MSG(texture.allowed_shared_formats.find(p_view.format_override) == -1, RID(),
"Format override is not in the list of allowed shareable formats for original texture.");
image_view_create_info.format = vulkan_formats[p_view.format_override];
}
static const VkComponentSwizzle component_swizzles[TEXTURE_SWIZZLE_MAX] = {
VK_COMPONENT_SWIZZLE_IDENTITY,
VK_COMPONENT_SWIZZLE_ZERO,
VK_COMPONENT_SWIZZLE_ONE,
VK_COMPONENT_SWIZZLE_R,
VK_COMPONENT_SWIZZLE_G,
VK_COMPONENT_SWIZZLE_B,
VK_COMPONENT_SWIZZLE_A
};
image_view_create_info.components.r = component_swizzles[p_view.swizzle_r];
image_view_create_info.components.g = component_swizzles[p_view.swizzle_g];
image_view_create_info.components.b = component_swizzles[p_view.swizzle_b];
image_view_create_info.components.a = component_swizzles[p_view.swizzle_a];
image_view_create_info.subresourceRange.baseMipLevel = 0;
image_view_create_info.subresourceRange.levelCount = texture.mipmaps;
image_view_create_info.subresourceRange.layerCount = texture.layers;
image_view_create_info.subresourceRange.baseArrayLayer = 0;
if (texture.usage_flags & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) {
image_view_create_info.subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
} else {
image_view_create_info.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
}
VkImageViewUsageCreateInfo usage_info;
if (context->is_device_extension_enabled(VK_KHR_MAINTENANCE_2_EXTENSION_NAME)) {
// May need to make VK_KHR_maintenance2 manditory and thus has Vulkan 1.1 be our minimum supported version
// if we require setting this information. Vulkan 1.0 may simply not care..
usage_info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO;
usage_info.pNext = nullptr;
if (p_view.format_override != DATA_FORMAT_MAX) {
// Need to validate usage with vulkan.
usage_info.usage = 0;
if (texture.usage_flags & TEXTURE_USAGE_SAMPLING_BIT) {
usage_info.usage |= VK_IMAGE_USAGE_SAMPLED_BIT;
}
if (texture.usage_flags & TEXTURE_USAGE_STORAGE_BIT) {
if (texture_is_format_supported_for_usage(p_view.format_override, TEXTURE_USAGE_STORAGE_BIT)) {
usage_info.usage |= VK_IMAGE_USAGE_STORAGE_BIT;
}
}
if (texture.usage_flags & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT) {
if (texture_is_format_supported_for_usage(p_view.format_override, TEXTURE_USAGE_COLOR_ATTACHMENT_BIT)) {
usage_info.usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
}
}
if (texture.usage_flags & TEXTURE_USAGE_INPUT_ATTACHMENT_BIT) {
usage_info.usage |= VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT;
}
if (texture.usage_flags & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) {
usage_info.usage |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT;
}
if (texture.usage_flags & TEXTURE_USAGE_CAN_UPDATE_BIT) {
usage_info.usage |= VK_IMAGE_USAGE_TRANSFER_DST_BIT;
}
if (texture.usage_flags & TEXTURE_USAGE_CAN_COPY_FROM_BIT) {
usage_info.usage |= VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
}
if (texture.usage_flags & TEXTURE_USAGE_CAN_COPY_TO_BIT) {
usage_info.usage |= VK_IMAGE_USAGE_TRANSFER_DST_BIT;
}
image_view_create_info.pNext = &usage_info;
}
}
VkResult err = vkCreateImageView(device, &image_view_create_info, nullptr, &texture.view);
ERR_FAIL_COND_V_MSG(err, RID(), "vkCreateImageView failed with error " + itos(err) + ".");
texture.owner = p_with_texture;
RID id = texture_owner.make_rid(texture);
#ifdef DEV_ENABLED
set_resource_name(id, "RID:" + itos(id.get_id()));
#endif
_add_dependency(id, p_with_texture);
return id;
}
RID RenderingDeviceVulkan::texture_create_from_extension(TextureType p_type, DataFormat p_format, TextureSamples p_samples, uint64_t p_flags, uint64_t p_image, uint64_t p_width, uint64_t p_height, uint64_t p_depth, uint64_t p_layers) {
_THREAD_SAFE_METHOD_
// This method creates a texture object using a VkImage created by an extension, module or other external source (OpenXR uses this).
VkImage image = (VkImage)p_image;
Texture texture;
texture.image = image;
// If we leave texture.allocation as a nullptr, would that be enough to detect we don't "own" the image?
// Also leave texture.allocation_info alone.
// We'll set texture.view later on.
texture.type = p_type;
texture.format = p_format;
texture.samples = p_samples;
texture.width = p_width;
texture.height = p_height;
texture.depth = p_depth;
texture.layers = p_layers;
texture.mipmaps = 1;
texture.usage_flags = p_flags;
texture.base_mipmap = 0;
texture.base_layer = 0;
texture.allowed_shared_formats.push_back(RD::DATA_FORMAT_R8G8B8A8_UNORM);
texture.allowed_shared_formats.push_back(RD::DATA_FORMAT_R8G8B8A8_SRGB);
// Set base layout based on usage priority.
if (texture.usage_flags & TEXTURE_USAGE_SAMPLING_BIT) {
// First priority, readable.
texture.layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
} else if (texture.usage_flags & TEXTURE_USAGE_STORAGE_BIT) {
// Second priority, storage.
texture.layout = VK_IMAGE_LAYOUT_GENERAL;
} else if (texture.usage_flags & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT) {
// Third priority, color or depth.
texture.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
} else if (texture.usage_flags & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) {
texture.layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
} else {
texture.layout = VK_IMAGE_LAYOUT_GENERAL;
}
if (texture.usage_flags & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) {
texture.read_aspect_mask = VK_IMAGE_ASPECT_DEPTH_BIT;
texture.barrier_aspect_mask = VK_IMAGE_ASPECT_DEPTH_BIT;
// if (format_has_stencil(p_format.format)) {
// texture.barrier_aspect_mask |= VK_IMAGE_ASPECT_STENCIL_BIT;
// }
} else {
texture.read_aspect_mask = VK_IMAGE_ASPECT_COLOR_BIT;
texture.barrier_aspect_mask = VK_IMAGE_ASPECT_COLOR_BIT;
}
// Create a view for us to use.
VkImageViewCreateInfo image_view_create_info;
image_view_create_info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
image_view_create_info.pNext = nullptr;
image_view_create_info.flags = 0;
image_view_create_info.image = texture.image;
static const VkImageViewType view_types[TEXTURE_TYPE_MAX] = {
VK_IMAGE_VIEW_TYPE_1D,
VK_IMAGE_VIEW_TYPE_2D,
VK_IMAGE_VIEW_TYPE_3D,
VK_IMAGE_VIEW_TYPE_CUBE,
VK_IMAGE_VIEW_TYPE_1D_ARRAY,
VK_IMAGE_VIEW_TYPE_2D_ARRAY,
VK_IMAGE_VIEW_TYPE_CUBE_ARRAY,
};
image_view_create_info.viewType = view_types[texture.type];
image_view_create_info.format = vulkan_formats[texture.format];
static const VkComponentSwizzle component_swizzles[TEXTURE_SWIZZLE_MAX] = {
VK_COMPONENT_SWIZZLE_IDENTITY,
VK_COMPONENT_SWIZZLE_ZERO,
VK_COMPONENT_SWIZZLE_ONE,
VK_COMPONENT_SWIZZLE_R,
VK_COMPONENT_SWIZZLE_G,
VK_COMPONENT_SWIZZLE_B,
VK_COMPONENT_SWIZZLE_A
};
// Hardcode for now, maybe make this settable from outside.
image_view_create_info.components.r = component_swizzles[TEXTURE_SWIZZLE_R];
image_view_create_info.components.g = component_swizzles[TEXTURE_SWIZZLE_G];
image_view_create_info.components.b = component_swizzles[TEXTURE_SWIZZLE_B];
image_view_create_info.components.a = component_swizzles[TEXTURE_SWIZZLE_A];
image_view_create_info.subresourceRange.baseMipLevel = 0;
image_view_create_info.subresourceRange.levelCount = texture.mipmaps;
image_view_create_info.subresourceRange.baseArrayLayer = 0;
image_view_create_info.subresourceRange.layerCount = texture.layers;
if (texture.usage_flags & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) {
image_view_create_info.subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
} else {
image_view_create_info.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
}
VkResult err = vkCreateImageView(device, &image_view_create_info, nullptr, &texture.view);
if (err) {
// vmaDestroyImage(allocator, texture.image, texture.allocation);
ERR_FAIL_V_MSG(RID(), "vkCreateImageView failed with error " + itos(err) + ".");
}
// Barrier to set layout.
{
VkImageMemoryBarrier image_memory_barrier;
image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
image_memory_barrier.pNext = nullptr;
image_memory_barrier.srcAccessMask = 0;
image_memory_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
image_memory_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
image_memory_barrier.newLayout = texture.layout;
image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.image = texture.image;
image_memory_barrier.subresourceRange.aspectMask = texture.barrier_aspect_mask;
image_memory_barrier.subresourceRange.baseMipLevel = 0;
image_memory_barrier.subresourceRange.levelCount = texture.mipmaps;
image_memory_barrier.subresourceRange.baseArrayLayer = 0;
image_memory_barrier.subresourceRange.layerCount = texture.layers;
vkCmdPipelineBarrier(frames[frame].setup_command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier);
}
RID id = texture_owner.make_rid(texture);
#ifdef DEV_ENABLED
set_resource_name(id, "RID:" + itos(id.get_id()));
#endif
return id;
}
RID RenderingDeviceVulkan::texture_create_shared_from_slice(const TextureView &p_view, RID p_with_texture, uint32_t p_layer, uint32_t p_mipmap, uint32_t p_mipmaps, TextureSliceType p_slice_type, uint32_t p_layers) {
_THREAD_SAFE_METHOD_
Texture *src_texture = texture_owner.get_or_null(p_with_texture);
ERR_FAIL_COND_V(!src_texture, RID());
if (src_texture->owner.is_valid()) { // Ahh this is a share.
p_with_texture = src_texture->owner;
src_texture = texture_owner.get_or_null(src_texture->owner);
ERR_FAIL_COND_V(!src_texture, RID()); // This is a bug.
}
ERR_FAIL_COND_V_MSG(p_slice_type == TEXTURE_SLICE_CUBEMAP && (src_texture->type != TEXTURE_TYPE_CUBE && src_texture->type != TEXTURE_TYPE_CUBE_ARRAY), RID(),
"Can only create a cubemap slice from a cubemap or cubemap array mipmap");
ERR_FAIL_COND_V_MSG(p_slice_type == TEXTURE_SLICE_3D && src_texture->type != TEXTURE_TYPE_3D, RID(),
"Can only create a 3D slice from a 3D texture");
ERR_FAIL_COND_V_MSG(p_slice_type == TEXTURE_SLICE_2D_ARRAY && (src_texture->type != TEXTURE_TYPE_2D_ARRAY), RID(),
"Can only create an array slice from a 2D array mipmap");
// Create view.
ERR_FAIL_UNSIGNED_INDEX_V(p_mipmap, src_texture->mipmaps, RID());
ERR_FAIL_COND_V(p_mipmap + p_mipmaps > src_texture->mipmaps, RID());
ERR_FAIL_UNSIGNED_INDEX_V(p_layer, src_texture->layers, RID());
int slice_layers = 1;
if (p_layers != 0) {
ERR_FAIL_COND_V_MSG(p_layers > 1 && p_slice_type != TEXTURE_SLICE_2D_ARRAY, RID(), "layer slicing only supported for 2D arrays");
ERR_FAIL_COND_V_MSG(p_layer + p_layers > src_texture->layers, RID(), "layer slice is out of bounds");
slice_layers = p_layers;
} else if (p_slice_type == TEXTURE_SLICE_2D_ARRAY) {
ERR_FAIL_COND_V_MSG(p_layer != 0, RID(), "layer must be 0 when obtaining a 2D array mipmap slice");
slice_layers = src_texture->layers;
} else if (p_slice_type == TEXTURE_SLICE_CUBEMAP) {
slice_layers = 6;
}
Texture texture = *src_texture;
get_image_format_required_size(texture.format, texture.width, texture.height, texture.depth, p_mipmap + 1, &texture.width, &texture.height);
texture.mipmaps = p_mipmaps;
texture.layers = slice_layers;
texture.base_mipmap = p_mipmap;
texture.base_layer = p_layer;
VkImageViewCreateInfo image_view_create_info;
image_view_create_info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
image_view_create_info.pNext = nullptr;
image_view_create_info.flags = 0;
image_view_create_info.image = texture.image;
static const VkImageViewType view_types[TEXTURE_TYPE_MAX] = {
VK_IMAGE_VIEW_TYPE_1D,
VK_IMAGE_VIEW_TYPE_2D,
VK_IMAGE_VIEW_TYPE_2D,
VK_IMAGE_VIEW_TYPE_2D,
VK_IMAGE_VIEW_TYPE_1D,
VK_IMAGE_VIEW_TYPE_2D,
VK_IMAGE_VIEW_TYPE_2D,
};
image_view_create_info.viewType = view_types[texture.type];
if (p_slice_type == TEXTURE_SLICE_CUBEMAP) {
image_view_create_info.viewType = VK_IMAGE_VIEW_TYPE_CUBE;
} else if (p_slice_type == TEXTURE_SLICE_3D) {
image_view_create_info.viewType = VK_IMAGE_VIEW_TYPE_3D;
} else if (p_slice_type == TEXTURE_SLICE_2D_ARRAY) {
image_view_create_info.viewType = VK_IMAGE_VIEW_TYPE_2D_ARRAY;
}
if (p_slice_type == TEXTURE_SLICE_2D) {
texture.type = TEXTURE_TYPE_2D;
} else if (p_slice_type == TEXTURE_SLICE_3D) {
texture.type = TEXTURE_TYPE_3D;
}
if (p_view.format_override == DATA_FORMAT_MAX || p_view.format_override == texture.format) {
image_view_create_info.format = vulkan_formats[texture.format];
} else {
ERR_FAIL_INDEX_V(p_view.format_override, DATA_FORMAT_MAX, RID());
ERR_FAIL_COND_V_MSG(texture.allowed_shared_formats.find(p_view.format_override) == -1, RID(),
"Format override is not in the list of allowed shareable formats for original texture.");
image_view_create_info.format = vulkan_formats[p_view.format_override];
}
static const VkComponentSwizzle component_swizzles[TEXTURE_SWIZZLE_MAX] = {
VK_COMPONENT_SWIZZLE_IDENTITY,
VK_COMPONENT_SWIZZLE_ZERO,
VK_COMPONENT_SWIZZLE_ONE,
VK_COMPONENT_SWIZZLE_R,
VK_COMPONENT_SWIZZLE_G,
VK_COMPONENT_SWIZZLE_B,
VK_COMPONENT_SWIZZLE_A
};
image_view_create_info.components.r = component_swizzles[p_view.swizzle_r];
image_view_create_info.components.g = component_swizzles[p_view.swizzle_g];
image_view_create_info.components.b = component_swizzles[p_view.swizzle_b];
image_view_create_info.components.a = component_swizzles[p_view.swizzle_a];
if (p_slice_type == TEXTURE_SLICE_CUBEMAP) {
ERR_FAIL_COND_V_MSG(p_layer >= src_texture->layers, RID(),
"Specified layer is invalid for cubemap");
ERR_FAIL_COND_V_MSG((p_layer % 6) != 0, RID(),
"Specified layer must be a multiple of 6.");
}
image_view_create_info.subresourceRange.baseMipLevel = p_mipmap;
image_view_create_info.subresourceRange.levelCount = p_mipmaps;
image_view_create_info.subresourceRange.layerCount = slice_layers;
image_view_create_info.subresourceRange.baseArrayLayer = p_layer;
if (texture.usage_flags & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) {
image_view_create_info.subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
} else {
image_view_create_info.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
}
VkResult err = vkCreateImageView(device, &image_view_create_info, nullptr, &texture.view);
ERR_FAIL_COND_V_MSG(err, RID(), "vkCreateImageView failed with error " + itos(err) + ".");
texture.owner = p_with_texture;
RID id = texture_owner.make_rid(texture);
#ifdef DEV_ENABLED
set_resource_name(id, "RID:" + itos(id.get_id()));
#endif
_add_dependency(id, p_with_texture);
return id;
}
Error RenderingDeviceVulkan::texture_update(RID p_texture, uint32_t p_layer, const Vector<uint8_t> &p_data, BitField<BarrierMask> p_post_barrier) {
return _texture_update(p_texture, p_layer, p_data, p_post_barrier, false);
}
static _ALWAYS_INLINE_ void _copy_region(uint8_t const *__restrict p_src, uint8_t *__restrict p_dst, uint32_t p_src_x, uint32_t p_src_y, uint32_t p_src_w, uint32_t p_src_h, uint32_t p_src_full_w, uint32_t p_unit_size) {
uint32_t src_offset = (p_src_y * p_src_full_w + p_src_x) * p_unit_size;
uint32_t dst_offset = 0;
for (uint32_t y = p_src_h; y > 0; y--) {
uint8_t const *__restrict src = p_src + src_offset;
uint8_t *__restrict dst = p_dst + dst_offset;
for (uint32_t x = p_src_w * p_unit_size; x > 0; x--) {
*dst = *src;
src++;
dst++;
}
src_offset += p_src_full_w * p_unit_size;
dst_offset += p_src_w * p_unit_size;
}
}
Error RenderingDeviceVulkan::_texture_update(RID p_texture, uint32_t p_layer, const Vector<uint8_t> &p_data, BitField<BarrierMask> p_post_barrier, bool p_use_setup_queue) {
_THREAD_SAFE_METHOD_
ERR_FAIL_COND_V_MSG((draw_list || compute_list) && !p_use_setup_queue, ERR_INVALID_PARAMETER,
"Updating textures is forbidden during creation of a draw or compute list");
Texture *texture = texture_owner.get_or_null(p_texture);
ERR_FAIL_COND_V(!texture, ERR_INVALID_PARAMETER);
if (texture->owner != RID()) {
p_texture = texture->owner;
texture = texture_owner.get_or_null(texture->owner);
ERR_FAIL_COND_V(!texture, ERR_BUG); // This is a bug.
}
ERR_FAIL_COND_V_MSG(texture->bound, ERR_CANT_ACQUIRE_RESOURCE,
"Texture can't be updated while a draw list that uses it as part of a framebuffer is being created. Ensure the draw list is finalized (and that the color/depth texture using it is not set to `RenderingDevice.FINAL_ACTION_CONTINUE`) to update this texture.");
ERR_FAIL_COND_V_MSG(!(texture->usage_flags & TEXTURE_USAGE_CAN_UPDATE_BIT), ERR_INVALID_PARAMETER,
"Texture requires the `RenderingDevice.TEXTURE_USAGE_CAN_UPDATE_BIT` to be set to be updatable.");
uint32_t layer_count = texture->layers;
if (texture->type == TEXTURE_TYPE_CUBE || texture->type == TEXTURE_TYPE_CUBE_ARRAY) {
layer_count *= 6;
}
ERR_FAIL_COND_V(p_layer >= layer_count, ERR_INVALID_PARAMETER);
uint32_t width, height;
uint32_t image_size = get_image_format_required_size(texture->format, texture->width, texture->height, texture->depth, texture->mipmaps, &width, &height);
uint32_t required_size = image_size;
uint32_t required_align = get_compressed_image_format_block_byte_size(texture->format);
if (required_align == 1) {
required_align = get_image_format_pixel_size(texture->format);
}
if ((required_align % 4) != 0) { // Alignment rules are really strange.
required_align *= 4;
}
ERR_FAIL_COND_V_MSG(required_size != (uint32_t)p_data.size(), ERR_INVALID_PARAMETER,
"Required size for texture update (" + itos(required_size) + ") does not match data supplied size (" + itos(p_data.size()) + ").");
uint32_t region_size = texture_upload_region_size_px;
const uint8_t *r = p_data.ptr();
VkCommandBuffer command_buffer = p_use_setup_queue ? frames[frame].setup_command_buffer : frames[frame].draw_command_buffer;
// Barrier to transfer.
{
VkImageMemoryBarrier image_memory_barrier;
image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
image_memory_barrier.pNext = nullptr;
image_memory_barrier.srcAccessMask = 0;
image_memory_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
image_memory_barrier.oldLayout = texture->layout;
image_memory_barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.image = texture->image;
image_memory_barrier.subresourceRange.aspectMask = texture->barrier_aspect_mask;
image_memory_barrier.subresourceRange.baseMipLevel = 0;
image_memory_barrier.subresourceRange.levelCount = texture->mipmaps;
image_memory_barrier.subresourceRange.baseArrayLayer = p_layer;
image_memory_barrier.subresourceRange.layerCount = 1;
vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier);
}
uint32_t mipmap_offset = 0;
uint32_t logic_width = texture->width;
uint32_t logic_height = texture->height;
for (uint32_t mm_i = 0; mm_i < texture->mipmaps; mm_i++) {
uint32_t depth;
uint32_t image_total = get_image_format_required_size(texture->format, texture->width, texture->height, texture->depth, mm_i + 1, &width, &height, &depth);
const uint8_t *read_ptr_mipmap = r + mipmap_offset;
image_size = image_total - mipmap_offset;
for (uint32_t z = 0; z < depth; z++) { // For 3D textures, depth may be > 0.
const uint8_t *read_ptr = read_ptr_mipmap + (image_size / depth) * z;
for (uint32_t y = 0; y < height; y += region_size) {
for (uint32_t x = 0; x < width; x += region_size) {
uint32_t region_w = MIN(region_size, width - x);
uint32_t region_h = MIN(region_size, height - y);
uint32_t region_logic_w = MIN(region_size, logic_width - x);
uint32_t region_logic_h = MIN(region_size, logic_height - y);
uint32_t pixel_size = get_image_format_pixel_size(texture->format);
uint32_t to_allocate = region_w * region_h * pixel_size;
to_allocate >>= get_compressed_image_format_pixel_rshift(texture->format);
uint32_t alloc_offset, alloc_size;
Error err = _staging_buffer_allocate(to_allocate, required_align, alloc_offset, alloc_size, false);
ERR_FAIL_COND_V(err, ERR_CANT_CREATE);
uint8_t *write_ptr;
{ // Map.
void *data_ptr = nullptr;
VkResult vkerr = vmaMapMemory(allocator, staging_buffer_blocks[staging_buffer_current].allocation, &data_ptr);
ERR_FAIL_COND_V_MSG(vkerr, ERR_CANT_CREATE, "vmaMapMemory failed with error " + itos(vkerr) + ".");
write_ptr = (uint8_t *)data_ptr;
write_ptr += alloc_offset;
}
uint32_t block_w, block_h;
get_compressed_image_format_block_dimensions(texture->format, block_w, block_h);
ERR_FAIL_COND_V(region_w % block_w, ERR_BUG);
ERR_FAIL_COND_V(region_h % block_h, ERR_BUG);
if (block_w != 1 || block_h != 1) {
// Compressed image (blocks).
// Must copy a block region.
uint32_t block_size = get_compressed_image_format_block_byte_size(texture->format);
// Re-create current variables in blocky format.
uint32_t xb = x / block_w;
uint32_t yb = y / block_h;
uint32_t wb = width / block_w;
//uint32_t hb = height / block_h;
uint32_t region_wb = region_w / block_w;
uint32_t region_hb = region_h / block_h;
_copy_region(read_ptr, write_ptr, xb, yb, region_wb, region_hb, wb, block_size);
} else {
// Regular image (pixels).
// Must copy a pixel region.
_copy_region(read_ptr, write_ptr, x, y, region_w, region_h, width, pixel_size);
}
{ // Unmap.
vmaUnmapMemory(allocator, staging_buffer_blocks[staging_buffer_current].allocation);
}
VkBufferImageCopy buffer_image_copy;
buffer_image_copy.bufferOffset = alloc_offset;
buffer_image_copy.bufferRowLength = 0; // Tightly packed.
buffer_image_copy.bufferImageHeight = 0; // Tightly packed.
buffer_image_copy.imageSubresource.aspectMask = texture->read_aspect_mask;
buffer_image_copy.imageSubresource.mipLevel = mm_i;
buffer_image_copy.imageSubresource.baseArrayLayer = p_layer;
buffer_image_copy.imageSubresource.layerCount = 1;
buffer_image_copy.imageOffset.x = x;
buffer_image_copy.imageOffset.y = y;
buffer_image_copy.imageOffset.z = z;
buffer_image_copy.imageExtent.width = region_logic_w;
buffer_image_copy.imageExtent.height = region_logic_h;
buffer_image_copy.imageExtent.depth = 1;
vkCmdCopyBufferToImage(command_buffer, staging_buffer_blocks[staging_buffer_current].buffer, texture->image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &buffer_image_copy);
staging_buffer_blocks.write[staging_buffer_current].fill_amount += alloc_size;
}
}
}
mipmap_offset = image_total;
logic_width = MAX(1u, logic_width >> 1);
logic_height = MAX(1u, logic_height >> 1);
}
// Barrier to restore layout.
{
uint32_t barrier_flags = 0;
uint32_t access_flags = 0;
if (p_post_barrier.has_flag(BARRIER_MASK_COMPUTE)) {
barrier_flags |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
}
if (p_post_barrier.has_flag(BARRIER_MASK_VERTEX)) {
barrier_flags |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT;
access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
}
if (p_post_barrier.has_flag(BARRIER_MASK_FRAGMENT)) {
barrier_flags |= VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
}
if (p_post_barrier.has_flag(BARRIER_MASK_TRANSFER)) {
barrier_flags |= VK_PIPELINE_STAGE_TRANSFER_BIT;
access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT;
}
if (barrier_flags == 0) {
barrier_flags = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
}
VkImageMemoryBarrier image_memory_barrier;
image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
image_memory_barrier.pNext = nullptr;
image_memory_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
image_memory_barrier.dstAccessMask = access_flags;
image_memory_barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
image_memory_barrier.newLayout = texture->layout;
image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.image = texture->image;
image_memory_barrier.subresourceRange.aspectMask = texture->barrier_aspect_mask;
image_memory_barrier.subresourceRange.baseMipLevel = 0;
image_memory_barrier.subresourceRange.levelCount = texture->mipmaps;
image_memory_barrier.subresourceRange.baseArrayLayer = p_layer;
image_memory_barrier.subresourceRange.layerCount = 1;
vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT, barrier_flags, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier);
}
if (texture->used_in_frame != frames_drawn) {
texture->used_in_raster = false;
texture->used_in_compute = false;
texture->used_in_frame = frames_drawn;
}
texture->used_in_transfer = true;
return OK;
}
Vector<uint8_t> RenderingDeviceVulkan::_texture_get_data_from_image(Texture *tex, VkImage p_image, VmaAllocation p_allocation, uint32_t p_layer, bool p_2d) {
uint32_t width, height, depth;
uint32_t image_size = get_image_format_required_size(tex->format, tex->width, tex->height, p_2d ? 1 : tex->depth, tex->mipmaps, &width, &height, &depth);
Vector<uint8_t> image_data;
image_data.resize(image_size);
void *img_mem;
vmaMapMemory(allocator, p_allocation, &img_mem);
uint32_t blockw, blockh;
get_compressed_image_format_block_dimensions(tex->format, blockw, blockh);
uint32_t block_size = get_compressed_image_format_block_byte_size(tex->format);
uint32_t pixel_size = get_image_format_pixel_size(tex->format);
{
uint8_t *w = image_data.ptrw();
uint32_t mipmap_offset = 0;
for (uint32_t mm_i = 0; mm_i < tex->mipmaps; mm_i++) {
uint32_t image_total = get_image_format_required_size(tex->format, tex->width, tex->height, p_2d ? 1 : tex->depth, mm_i + 1, &width, &height, &depth);
uint8_t *write_ptr_mipmap = w + mipmap_offset;
image_size = image_total - mipmap_offset;
VkImageSubresource image_sub_resorce;
image_sub_resorce.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
image_sub_resorce.arrayLayer = p_layer;
image_sub_resorce.mipLevel = mm_i;
VkSubresourceLayout layout;
vkGetImageSubresourceLayout(device, p_image, &image_sub_resorce, &layout);
for (uint32_t z = 0; z < depth; z++) {
uint8_t *write_ptr = write_ptr_mipmap + z * image_size / depth;
const uint8_t *slice_read_ptr = ((uint8_t *)img_mem) + layout.offset + z * layout.depthPitch;
if (block_size > 1) {
// Compressed.
uint32_t line_width = (block_size * (width / blockw));
for (uint32_t y = 0; y < height / blockh; y++) {
const uint8_t *rptr = slice_read_ptr + y * layout.rowPitch;
uint8_t *wptr = write_ptr + y * line_width;
memcpy(wptr, rptr, line_width);
}
} else {
// Uncompressed.
for (uint32_t y = 0; y < height; y++) {
const uint8_t *rptr = slice_read_ptr + y * layout.rowPitch;
uint8_t *wptr = write_ptr + y * pixel_size * width;
memcpy(wptr, rptr, (uint64_t)pixel_size * width);
}
}
}
mipmap_offset = image_total;
}
}
vmaUnmapMemory(allocator, p_allocation);
return image_data;
}
Vector<uint8_t> RenderingDeviceVulkan::texture_get_data(RID p_texture, uint32_t p_layer) {
_THREAD_SAFE_METHOD_
Texture *tex = texture_owner.get_or_null(p_texture);
ERR_FAIL_COND_V(!tex, Vector<uint8_t>());
ERR_FAIL_COND_V_MSG(tex->bound, Vector<uint8_t>(),
"Texture can't be retrieved while a draw list that uses it as part of a framebuffer is being created. Ensure the draw list is finalized (and that the color/depth texture using it is not set to `RenderingDevice.FINAL_ACTION_CONTINUE`) to retrieve this texture.");
ERR_FAIL_COND_V_MSG(!(tex->usage_flags & TEXTURE_USAGE_CAN_COPY_FROM_BIT), Vector<uint8_t>(),
"Texture requires the `RenderingDevice.TEXTURE_USAGE_CAN_COPY_FROM_BIT` to be set to be retrieved.");
uint32_t layer_count = tex->layers;
if (tex->type == TEXTURE_TYPE_CUBE || tex->type == TEXTURE_TYPE_CUBE_ARRAY) {
layer_count *= 6;
}
ERR_FAIL_COND_V(p_layer >= layer_count, Vector<uint8_t>());
if (tex->usage_flags & TEXTURE_USAGE_CPU_READ_BIT) {
// Does not need anything fancy, map and read.
return _texture_get_data_from_image(tex, tex->image, tex->allocation, p_layer);
} else {
// Compute total image size.
uint32_t width, height, depth;
uint32_t buffer_size = get_image_format_required_size(tex->format, tex->width, tex->height, tex->depth, tex->mipmaps, &width, &height, &depth);
// Allocate buffer.
VkCommandBuffer command_buffer = frames[frame].draw_command_buffer; // Makes more sense to retrieve.
Buffer tmp_buffer;
_buffer_allocate(&tmp_buffer, buffer_size, VK_BUFFER_USAGE_TRANSFER_DST_BIT, VMA_MEMORY_USAGE_AUTO_PREFER_HOST, VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT);
{ // Source image barrier.
VkImageMemoryBarrier image_memory_barrier;
image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
image_memory_barrier.pNext = nullptr;
image_memory_barrier.srcAccessMask = 0;
image_memory_barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
image_memory_barrier.oldLayout = tex->layout;
image_memory_barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.image = tex->image;
image_memory_barrier.subresourceRange.aspectMask = tex->barrier_aspect_mask;
image_memory_barrier.subresourceRange.baseMipLevel = 0;
image_memory_barrier.subresourceRange.levelCount = tex->mipmaps;
image_memory_barrier.subresourceRange.baseArrayLayer = p_layer;
image_memory_barrier.subresourceRange.layerCount = 1;
vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier);
}
uint32_t computed_w = tex->width;
uint32_t computed_h = tex->height;
uint32_t computed_d = tex->depth;
uint32_t prev_size = 0;
uint32_t offset = 0;
for (uint32_t i = 0; i < tex->mipmaps; i++) {
VkBufferImageCopy buffer_image_copy;
uint32_t image_size = get_image_format_required_size(tex->format, tex->width, tex->height, tex->depth, i + 1);
uint32_t size = image_size - prev_size;
prev_size = image_size;
buffer_image_copy.bufferOffset = offset;
buffer_image_copy.bufferImageHeight = 0;
buffer_image_copy.bufferRowLength = 0;
buffer_image_copy.imageSubresource.aspectMask = tex->read_aspect_mask;
buffer_image_copy.imageSubresource.baseArrayLayer = p_layer;
buffer_image_copy.imageSubresource.layerCount = 1;
buffer_image_copy.imageSubresource.mipLevel = i;
buffer_image_copy.imageOffset.x = 0;
buffer_image_copy.imageOffset.y = 0;
buffer_image_copy.imageOffset.z = 0;
buffer_image_copy.imageExtent.width = computed_w;
buffer_image_copy.imageExtent.height = computed_h;
buffer_image_copy.imageExtent.depth = computed_d;
vkCmdCopyImageToBuffer(command_buffer, tex->image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, tmp_buffer.buffer, 1, &buffer_image_copy);
computed_w = MAX(1u, computed_w >> 1);
computed_h = MAX(1u, computed_h >> 1);
computed_d = MAX(1u, computed_d >> 1);
offset += size;
}
{ // Restore src.
VkImageMemoryBarrier image_memory_barrier;
image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
image_memory_barrier.pNext = nullptr;
image_memory_barrier.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
image_memory_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
if (tex->usage_flags & TEXTURE_USAGE_STORAGE_BIT) {
image_memory_barrier.dstAccessMask |= VK_ACCESS_SHADER_WRITE_BIT;
}
image_memory_barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
image_memory_barrier.newLayout = tex->layout;
image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.image = tex->image;
image_memory_barrier.subresourceRange.aspectMask = tex->barrier_aspect_mask;
image_memory_barrier.subresourceRange.baseMipLevel = 0;
image_memory_barrier.subresourceRange.levelCount = tex->mipmaps;
image_memory_barrier.subresourceRange.baseArrayLayer = p_layer;
image_memory_barrier.subresourceRange.layerCount = 1;
vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier);
}
_flush(true);
void *buffer_mem;
VkResult vkerr = vmaMapMemory(allocator, tmp_buffer.allocation, &buffer_mem);
ERR_FAIL_COND_V_MSG(vkerr, Vector<uint8_t>(), "vmaMapMemory failed with error " + itos(vkerr) + ".");
Vector<uint8_t> buffer_data;
{
buffer_data.resize(buffer_size);
uint8_t *w = buffer_data.ptrw();
memcpy(w, buffer_mem, buffer_size);
}
vmaUnmapMemory(allocator, tmp_buffer.allocation);
_buffer_free(&tmp_buffer);
return buffer_data;
}
}
bool RenderingDeviceVulkan::texture_is_shared(RID p_texture) {
_THREAD_SAFE_METHOD_
Texture *tex = texture_owner.get_or_null(p_texture);
ERR_FAIL_COND_V(!tex, false);
return tex->owner.is_valid();
}
bool RenderingDeviceVulkan::texture_is_valid(RID p_texture) {
return texture_owner.owns(p_texture);
}
RD::TextureFormat RenderingDeviceVulkan::texture_get_format(RID p_texture) {
_THREAD_SAFE_METHOD_
Texture *tex = texture_owner.get_or_null(p_texture);
ERR_FAIL_COND_V(!tex, TextureFormat());
TextureFormat tf;
tf.format = tex->format;
tf.width = tex->width;
tf.height = tex->height;
tf.depth = tex->depth;
tf.array_layers = tex->layers;
tf.mipmaps = tex->mipmaps;
tf.texture_type = tex->type;
tf.samples = tex->samples;
tf.usage_bits = tex->usage_flags;
tf.shareable_formats = tex->allowed_shared_formats;
tf.is_resolve_buffer = tex->is_resolve_buffer;
return tf;
}
Size2i RenderingDeviceVulkan::texture_size(RID p_texture) {
_THREAD_SAFE_METHOD_
Texture *tex = texture_owner.get_or_null(p_texture);
ERR_FAIL_COND_V(!tex, Size2i());
return Size2i(tex->width, tex->height);
}
uint64_t RenderingDeviceVulkan::texture_get_native_handle(RID p_texture) {
_THREAD_SAFE_METHOD_
Texture *tex = texture_owner.get_or_null(p_texture);
ERR_FAIL_COND_V(!tex, 0);
return (uint64_t)tex->image;
}
Error RenderingDeviceVulkan::texture_copy(RID p_from_texture, RID p_to_texture, const Vector3 &p_from, const Vector3 &p_to, const Vector3 &p_size, uint32_t p_src_mipmap, uint32_t p_dst_mipmap, uint32_t p_src_layer, uint32_t p_dst_layer, BitField<BarrierMask> p_post_barrier) {
_THREAD_SAFE_METHOD_
Texture *src_tex = texture_owner.get_or_null(p_from_texture);
ERR_FAIL_COND_V(!src_tex, ERR_INVALID_PARAMETER);
ERR_FAIL_COND_V_MSG(src_tex->bound, ERR_INVALID_PARAMETER,
"Source texture can't be copied while a draw list that uses it as part of a framebuffer is being created. Ensure the draw list is finalized (and that the color/depth texture using it is not set to `RenderingDevice.FINAL_ACTION_CONTINUE`) to copy this texture.");
ERR_FAIL_COND_V_MSG(!(src_tex->usage_flags & TEXTURE_USAGE_CAN_COPY_FROM_BIT), ERR_INVALID_PARAMETER,
"Source texture requires the `RenderingDevice.TEXTURE_USAGE_CAN_COPY_FROM_BIT` to be set to be retrieved.");
uint32_t src_layer_count = src_tex->layers;
uint32_t src_width, src_height, src_depth;
get_image_format_required_size(src_tex->format, src_tex->width, src_tex->height, src_tex->depth, p_src_mipmap + 1, &src_width, &src_height, &src_depth);
if (src_tex->type == TEXTURE_TYPE_CUBE || src_tex->type == TEXTURE_TYPE_CUBE_ARRAY) {
src_layer_count *= 6;
}
ERR_FAIL_COND_V(p_from.x < 0 || p_from.x + p_size.x > src_width, ERR_INVALID_PARAMETER);
ERR_FAIL_COND_V(p_from.y < 0 || p_from.y + p_size.y > src_height, ERR_INVALID_PARAMETER);
ERR_FAIL_COND_V(p_from.z < 0 || p_from.z + p_size.z > src_depth, ERR_INVALID_PARAMETER);
ERR_FAIL_COND_V(p_src_mipmap >= src_tex->mipmaps, ERR_INVALID_PARAMETER);
ERR_FAIL_COND_V(p_src_layer >= src_layer_count, ERR_INVALID_PARAMETER);
Texture *dst_tex = texture_owner.get_or_null(p_to_texture);
ERR_FAIL_COND_V(!dst_tex, ERR_INVALID_PARAMETER);
ERR_FAIL_COND_V_MSG(dst_tex->bound, ERR_INVALID_PARAMETER,
"Destination texture can't be copied while a draw list that uses it as part of a framebuffer is being created. Ensure the draw list is finalized (and that the color/depth texture using it is not set to `RenderingDevice.FINAL_ACTION_CONTINUE`) to copy this texture.");
ERR_FAIL_COND_V_MSG(!(dst_tex->usage_flags & TEXTURE_USAGE_CAN_COPY_TO_BIT), ERR_INVALID_PARAMETER,
"Destination texture requires the `RenderingDevice.TEXTURE_USAGE_CAN_COPY_TO_BIT` to be set to be retrieved.");
uint32_t dst_layer_count = dst_tex->layers;
uint32_t dst_width, dst_height, dst_depth;
get_image_format_required_size(dst_tex->format, dst_tex->width, dst_tex->height, dst_tex->depth, p_dst_mipmap + 1, &dst_width, &dst_height, &dst_depth);
if (dst_tex->type == TEXTURE_TYPE_CUBE || dst_tex->type == TEXTURE_TYPE_CUBE_ARRAY) {
dst_layer_count *= 6;
}
ERR_FAIL_COND_V(p_to.x < 0 || p_to.x + p_size.x > dst_width, ERR_INVALID_PARAMETER);
ERR_FAIL_COND_V(p_to.y < 0 || p_to.y + p_size.y > dst_height, ERR_INVALID_PARAMETER);
ERR_FAIL_COND_V(p_to.z < 0 || p_to.z + p_size.z > dst_depth, ERR_INVALID_PARAMETER);
ERR_FAIL_COND_V(p_dst_mipmap >= dst_tex->mipmaps, ERR_INVALID_PARAMETER);
ERR_FAIL_COND_V(p_dst_layer >= dst_layer_count, ERR_INVALID_PARAMETER);
ERR_FAIL_COND_V_MSG(src_tex->read_aspect_mask != dst_tex->read_aspect_mask, ERR_INVALID_PARAMETER,
"Source and destination texture must be of the same type (color or depth).");
VkCommandBuffer command_buffer = frames[frame].draw_command_buffer;
{
// PRE Copy the image.
{ // Source.
VkImageMemoryBarrier image_memory_barrier;
image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
image_memory_barrier.pNext = nullptr;
image_memory_barrier.srcAccessMask = 0;
image_memory_barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
image_memory_barrier.oldLayout = src_tex->layout;
image_memory_barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.image = src_tex->image;
image_memory_barrier.subresourceRange.aspectMask = src_tex->barrier_aspect_mask;
image_memory_barrier.subresourceRange.baseMipLevel = p_src_mipmap;
image_memory_barrier.subresourceRange.levelCount = 1;
image_memory_barrier.subresourceRange.baseArrayLayer = p_src_layer;
image_memory_barrier.subresourceRange.layerCount = 1;
vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier);
}
{ // Dest.
VkImageMemoryBarrier image_memory_barrier;
image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
image_memory_barrier.pNext = nullptr;
image_memory_barrier.srcAccessMask = 0;
image_memory_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
image_memory_barrier.oldLayout = dst_tex->layout;
image_memory_barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.image = dst_tex->image;
image_memory_barrier.subresourceRange.aspectMask = dst_tex->read_aspect_mask;
image_memory_barrier.subresourceRange.baseMipLevel = p_dst_mipmap;
image_memory_barrier.subresourceRange.levelCount = 1;
image_memory_barrier.subresourceRange.baseArrayLayer = p_dst_layer;
image_memory_barrier.subresourceRange.layerCount = 1;
vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier);
}
// COPY.
{
VkImageCopy image_copy_region;
image_copy_region.srcSubresource.aspectMask = src_tex->read_aspect_mask;
image_copy_region.srcSubresource.baseArrayLayer = p_src_layer;
image_copy_region.srcSubresource.layerCount = 1;
image_copy_region.srcSubresource.mipLevel = p_src_mipmap;
image_copy_region.srcOffset.x = p_from.x;
image_copy_region.srcOffset.y = p_from.y;
image_copy_region.srcOffset.z = p_from.z;
image_copy_region.dstSubresource.aspectMask = dst_tex->read_aspect_mask;
image_copy_region.dstSubresource.baseArrayLayer = p_dst_layer;
image_copy_region.dstSubresource.layerCount = 1;
image_copy_region.dstSubresource.mipLevel = p_dst_mipmap;
image_copy_region.dstOffset.x = p_to.x;
image_copy_region.dstOffset.y = p_to.y;
image_copy_region.dstOffset.z = p_to.z;
image_copy_region.extent.width = p_size.x;
image_copy_region.extent.height = p_size.y;
image_copy_region.extent.depth = p_size.z;
vkCmdCopyImage(command_buffer, src_tex->image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dst_tex->image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &image_copy_region);
}
// RESTORE LAYOUT for SRC and DST.
uint32_t barrier_flags = 0;
uint32_t access_flags = 0;
if (p_post_barrier.has_flag(BARRIER_MASK_COMPUTE)) {
barrier_flags |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
}
if (p_post_barrier.has_flag(BARRIER_MASK_VERTEX)) {
barrier_flags |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT;
access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
}
if (p_post_barrier.has_flag(BARRIER_MASK_FRAGMENT)) {
barrier_flags |= VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
}
if (p_post_barrier.has_flag(BARRIER_MASK_TRANSFER)) {
barrier_flags |= VK_PIPELINE_STAGE_TRANSFER_BIT;
access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT;
}
if (barrier_flags == 0) {
barrier_flags = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
}
{ // Restore src.
VkImageMemoryBarrier image_memory_barrier;
image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
image_memory_barrier.pNext = nullptr;
image_memory_barrier.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
image_memory_barrier.dstAccessMask = access_flags;
image_memory_barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
image_memory_barrier.newLayout = src_tex->layout;
image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.image = src_tex->image;
image_memory_barrier.subresourceRange.aspectMask = src_tex->barrier_aspect_mask;
image_memory_barrier.subresourceRange.baseMipLevel = p_src_mipmap;
image_memory_barrier.subresourceRange.levelCount = src_tex->mipmaps;
image_memory_barrier.subresourceRange.baseArrayLayer = p_src_layer;
image_memory_barrier.subresourceRange.layerCount = 1;
vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT, barrier_flags, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier);
}
{ // Make dst readable.
VkImageMemoryBarrier image_memory_barrier;
image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
image_memory_barrier.pNext = nullptr;
image_memory_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
image_memory_barrier.dstAccessMask = access_flags;
image_memory_barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
image_memory_barrier.newLayout = dst_tex->layout;
image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.image = dst_tex->image;
image_memory_barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
image_memory_barrier.subresourceRange.baseMipLevel = p_src_mipmap;
image_memory_barrier.subresourceRange.levelCount = 1;
image_memory_barrier.subresourceRange.baseArrayLayer = p_src_layer;
image_memory_barrier.subresourceRange.layerCount = 1;
vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT, barrier_flags, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier);
}
}
if (dst_tex->used_in_frame != frames_drawn) {
dst_tex->used_in_raster = false;
dst_tex->used_in_compute = false;
dst_tex->used_in_frame = frames_drawn;
}
dst_tex->used_in_transfer = true;
return OK;
}
Error RenderingDeviceVulkan::texture_resolve_multisample(RID p_from_texture, RID p_to_texture, BitField<BarrierMask> p_post_barrier) {
_THREAD_SAFE_METHOD_
Texture *src_tex = texture_owner.get_or_null(p_from_texture);
ERR_FAIL_COND_V(!src_tex, ERR_INVALID_PARAMETER);
ERR_FAIL_COND_V_MSG(src_tex->bound, ERR_INVALID_PARAMETER,
"Source texture can't be copied while a draw list that uses it as part of a framebuffer is being created. Ensure the draw list is finalized (and that the color/depth texture using it is not set to `RenderingDevice.FINAL_ACTION_CONTINUE`) to copy this texture.");
ERR_FAIL_COND_V_MSG(!(src_tex->usage_flags & TEXTURE_USAGE_CAN_COPY_FROM_BIT), ERR_INVALID_PARAMETER,
"Source texture requires the `RenderingDevice.TEXTURE_USAGE_CAN_COPY_FROM_BIT` to be set to be retrieved.");
ERR_FAIL_COND_V_MSG(src_tex->type != TEXTURE_TYPE_2D, ERR_INVALID_PARAMETER, "Source texture must be 2D (or a slice of a 3D/Cube texture)");
ERR_FAIL_COND_V_MSG(src_tex->samples == TEXTURE_SAMPLES_1, ERR_INVALID_PARAMETER, "Source texture must be multisampled.");
Texture *dst_tex = texture_owner.get_or_null(p_to_texture);
ERR_FAIL_COND_V(!dst_tex, ERR_INVALID_PARAMETER);
ERR_FAIL_COND_V_MSG(dst_tex->bound, ERR_INVALID_PARAMETER,
"Destination texture can't be copied while a draw list that uses it as part of a framebuffer is being created. Ensure the draw list is finalized (and that the color/depth texture using it is not set to `RenderingDevice.FINAL_ACTION_CONTINUE`) to copy this texture.");
ERR_FAIL_COND_V_MSG(!(dst_tex->usage_flags & TEXTURE_USAGE_CAN_COPY_TO_BIT), ERR_INVALID_PARAMETER,
"Destination texture requires the `RenderingDevice.TEXTURE_USAGE_CAN_COPY_TO_BIT` to be set to be retrieved.");
ERR_FAIL_COND_V_MSG(dst_tex->type != TEXTURE_TYPE_2D, ERR_INVALID_PARAMETER, "Destination texture must be 2D (or a slice of a 3D/Cube texture).");
ERR_FAIL_COND_V_MSG(dst_tex->samples != TEXTURE_SAMPLES_1, ERR_INVALID_PARAMETER, "Destination texture must not be multisampled.");
ERR_FAIL_COND_V_MSG(src_tex->format != dst_tex->format, ERR_INVALID_PARAMETER, "Source and Destination textures must be the same format.");
ERR_FAIL_COND_V_MSG(src_tex->width != dst_tex->width && src_tex->height != dst_tex->height && src_tex->depth != dst_tex->depth, ERR_INVALID_PARAMETER, "Source and Destination textures must have the same dimensions.");
ERR_FAIL_COND_V_MSG(src_tex->read_aspect_mask != dst_tex->read_aspect_mask, ERR_INVALID_PARAMETER,
"Source and destination texture must be of the same type (color or depth).");
VkCommandBuffer command_buffer = frames[frame].draw_command_buffer;
{
// PRE Copy the image.
{ // Source.
VkImageMemoryBarrier image_memory_barrier;
image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
image_memory_barrier.pNext = nullptr;
image_memory_barrier.srcAccessMask = 0;
image_memory_barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
image_memory_barrier.oldLayout = src_tex->layout;
image_memory_barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.image = src_tex->image;
image_memory_barrier.subresourceRange.aspectMask = src_tex->barrier_aspect_mask;
image_memory_barrier.subresourceRange.baseMipLevel = src_tex->base_mipmap;
image_memory_barrier.subresourceRange.levelCount = 1;
image_memory_barrier.subresourceRange.baseArrayLayer = src_tex->base_layer;
image_memory_barrier.subresourceRange.layerCount = 1;
vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier);
}
{ // Dest.
VkImageMemoryBarrier image_memory_barrier;
image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
image_memory_barrier.pNext = nullptr;
image_memory_barrier.srcAccessMask = 0;
image_memory_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
image_memory_barrier.oldLayout = dst_tex->layout;
image_memory_barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.image = dst_tex->image;
image_memory_barrier.subresourceRange.aspectMask = dst_tex->read_aspect_mask;
image_memory_barrier.subresourceRange.baseMipLevel = dst_tex->base_mipmap;
image_memory_barrier.subresourceRange.levelCount = 1;
image_memory_barrier.subresourceRange.baseArrayLayer = dst_tex->base_layer;
image_memory_barrier.subresourceRange.layerCount = 1;
vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier);
}
// COPY.
{
VkImageResolve image_copy_region;
image_copy_region.srcSubresource.aspectMask = src_tex->read_aspect_mask;
image_copy_region.srcSubresource.baseArrayLayer = src_tex->base_layer;
image_copy_region.srcSubresource.layerCount = 1;
image_copy_region.srcSubresource.mipLevel = src_tex->base_mipmap;
image_copy_region.srcOffset.x = 0;
image_copy_region.srcOffset.y = 0;
image_copy_region.srcOffset.z = 0;
image_copy_region.dstSubresource.aspectMask = dst_tex->read_aspect_mask;
image_copy_region.dstSubresource.baseArrayLayer = dst_tex->base_layer;
image_copy_region.dstSubresource.layerCount = 1;
image_copy_region.dstSubresource.mipLevel = dst_tex->base_mipmap;
image_copy_region.dstOffset.x = 0;
image_copy_region.dstOffset.y = 0;
image_copy_region.dstOffset.z = 0;
image_copy_region.extent.width = src_tex->width;
image_copy_region.extent.height = src_tex->height;
image_copy_region.extent.depth = src_tex->depth;
vkCmdResolveImage(command_buffer, src_tex->image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dst_tex->image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &image_copy_region);
}
// RESTORE LAYOUT for SRC and DST.
uint32_t barrier_flags = 0;
uint32_t access_flags = 0;
if (p_post_barrier.has_flag(BARRIER_MASK_COMPUTE)) {
barrier_flags |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
}
if (p_post_barrier.has_flag(BARRIER_MASK_VERTEX)) {
barrier_flags |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT;
access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
}
if (p_post_barrier.has_flag(BARRIER_MASK_FRAGMENT)) {
barrier_flags |= VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
}
if (p_post_barrier.has_flag(BARRIER_MASK_TRANSFER)) {
barrier_flags |= VK_PIPELINE_STAGE_TRANSFER_BIT;
access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT;
}
if (barrier_flags == 0) {
barrier_flags = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
}
{ // Restore src.
VkImageMemoryBarrier image_memory_barrier;
image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
image_memory_barrier.pNext = nullptr;
image_memory_barrier.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
image_memory_barrier.dstAccessMask = access_flags;
image_memory_barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
image_memory_barrier.newLayout = src_tex->layout;
image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.image = src_tex->image;
image_memory_barrier.subresourceRange.aspectMask = src_tex->barrier_aspect_mask;
image_memory_barrier.subresourceRange.baseMipLevel = src_tex->base_mipmap;
image_memory_barrier.subresourceRange.levelCount = 1;
image_memory_barrier.subresourceRange.baseArrayLayer = src_tex->base_layer;
image_memory_barrier.subresourceRange.layerCount = 1;
vkCmdPipelineBarrier(command_buffer, VK_ACCESS_TRANSFER_WRITE_BIT, barrier_flags, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier);
}
{ // Make dst readable.
VkImageMemoryBarrier image_memory_barrier;
image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
image_memory_barrier.pNext = nullptr;
image_memory_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
image_memory_barrier.dstAccessMask = access_flags;
image_memory_barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
image_memory_barrier.newLayout = dst_tex->layout;
image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.image = dst_tex->image;
image_memory_barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
image_memory_barrier.subresourceRange.baseMipLevel = dst_tex->base_mipmap;
image_memory_barrier.subresourceRange.levelCount = 1;
image_memory_barrier.subresourceRange.baseArrayLayer = dst_tex->base_layer;
image_memory_barrier.subresourceRange.layerCount = 1;
vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT, barrier_flags, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier);
}
}
return OK;
}
Error RenderingDeviceVulkan::texture_clear(RID p_texture, const Color &p_color, uint32_t p_base_mipmap, uint32_t p_mipmaps, uint32_t p_base_layer, uint32_t p_layers, BitField<BarrierMask> p_post_barrier) {
_THREAD_SAFE_METHOD_
Texture *src_tex = texture_owner.get_or_null(p_texture);
ERR_FAIL_COND_V(!src_tex, ERR_INVALID_PARAMETER);
ERR_FAIL_COND_V_MSG(src_tex->bound, ERR_INVALID_PARAMETER,
"Source texture can't be cleared while a draw list that uses it as part of a framebuffer is being created. Ensure the draw list is finalized (and that the color/depth texture using it is not set to `RenderingDevice.FINAL_ACTION_CONTINUE`) to clear this texture.");
ERR_FAIL_COND_V(p_layers == 0, ERR_INVALID_PARAMETER);
ERR_FAIL_COND_V(p_mipmaps == 0, ERR_INVALID_PARAMETER);
ERR_FAIL_COND_V_MSG(!(src_tex->usage_flags & TEXTURE_USAGE_CAN_COPY_TO_BIT), ERR_INVALID_PARAMETER,
"Source texture requires the `RenderingDevice.TEXTURE_USAGE_CAN_COPY_TO_BIT` to be set to be cleared.");
uint32_t src_layer_count = src_tex->layers;
if (src_tex->type == TEXTURE_TYPE_CUBE || src_tex->type == TEXTURE_TYPE_CUBE_ARRAY) {
src_layer_count *= 6;
}
ERR_FAIL_COND_V(p_base_mipmap + p_mipmaps > src_tex->mipmaps, ERR_INVALID_PARAMETER);
ERR_FAIL_COND_V(p_base_layer + p_layers > src_layer_count, ERR_INVALID_PARAMETER);
VkCommandBuffer command_buffer = frames[frame].draw_command_buffer;
VkImageLayout clear_layout = (src_tex->layout == VK_IMAGE_LAYOUT_GENERAL) ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
// NOTE: Perhaps the valid stages/accesses for a given owner should be a property of the owner. (Here and places like _get_buffer_from_owner.)
const VkPipelineStageFlags valid_texture_stages = VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
constexpr VkAccessFlags read_access = VK_ACCESS_SHADER_READ_BIT;
constexpr VkAccessFlags read_write_access = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
const VkAccessFlags valid_texture_access = (src_tex->usage_flags & TEXTURE_USAGE_STORAGE_BIT) ? read_write_access : read_access;
{ // Barrier from previous access with optional layout change (see clear_layout logic above).
VkImageMemoryBarrier image_memory_barrier;
image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
image_memory_barrier.pNext = nullptr;
image_memory_barrier.srcAccessMask = valid_texture_access;
image_memory_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
image_memory_barrier.oldLayout = src_tex->layout;
image_memory_barrier.newLayout = clear_layout;
image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.image = src_tex->image;
image_memory_barrier.subresourceRange.aspectMask = src_tex->read_aspect_mask;
image_memory_barrier.subresourceRange.baseMipLevel = src_tex->base_mipmap + p_base_mipmap;
image_memory_barrier.subresourceRange.levelCount = p_mipmaps;
image_memory_barrier.subresourceRange.baseArrayLayer = src_tex->base_layer + p_base_layer;
image_memory_barrier.subresourceRange.layerCount = p_layers;
vkCmdPipelineBarrier(command_buffer, valid_texture_stages, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier);
}
VkClearColorValue clear_color;
clear_color.float32[0] = p_color.r;
clear_color.float32[1] = p_color.g;
clear_color.float32[2] = p_color.b;
clear_color.float32[3] = p_color.a;
VkImageSubresourceRange range;
range.aspectMask = src_tex->read_aspect_mask;
range.baseArrayLayer = src_tex->base_layer + p_base_layer;
range.layerCount = p_layers;
range.baseMipLevel = src_tex->base_mipmap + p_base_mipmap;
range.levelCount = p_mipmaps;
vkCmdClearColorImage(command_buffer, src_tex->image, clear_layout, &clear_color, 1, &range);
{ // Barrier to post clear accesses (changing back the layout if needed).
uint32_t barrier_flags = 0;
uint32_t access_flags = 0;
if (p_post_barrier.has_flag(BARRIER_MASK_COMPUTE)) {
barrier_flags |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
}
if (p_post_barrier.has_flag(BARRIER_MASK_VERTEX)) {
barrier_flags |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT;
access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
}
if (p_post_barrier.has_flag(BARRIER_MASK_FRAGMENT)) {
barrier_flags |= VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
}
if (p_post_barrier.has_flag(BARRIER_MASK_TRANSFER)) {
barrier_flags |= VK_PIPELINE_STAGE_TRANSFER_BIT;
access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT;
}
if (barrier_flags == 0) {
barrier_flags = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
}
VkImageMemoryBarrier image_memory_barrier;
image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
image_memory_barrier.pNext = nullptr;
image_memory_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
image_memory_barrier.dstAccessMask = access_flags;
image_memory_barrier.oldLayout = clear_layout;
image_memory_barrier.newLayout = src_tex->layout;
image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.image = src_tex->image;
image_memory_barrier.subresourceRange.aspectMask = src_tex->read_aspect_mask;
image_memory_barrier.subresourceRange.baseMipLevel = src_tex->base_mipmap + p_base_mipmap;
image_memory_barrier.subresourceRange.levelCount = p_mipmaps;
image_memory_barrier.subresourceRange.baseArrayLayer = src_tex->base_layer + p_base_layer;
image_memory_barrier.subresourceRange.layerCount = p_layers;
vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT, barrier_flags, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier);
}
if (src_tex->used_in_frame != frames_drawn) {
src_tex->used_in_raster = false;
src_tex->used_in_compute = false;
src_tex->used_in_frame = frames_drawn;
}
src_tex->used_in_transfer = true;
return OK;
}
bool RenderingDeviceVulkan::texture_is_format_supported_for_usage(DataFormat p_format, BitField<RenderingDevice::TextureUsageBits> p_usage) const {
ERR_FAIL_INDEX_V(p_format, DATA_FORMAT_MAX, false);
_THREAD_SAFE_METHOD_
// Validate that this image is supported for the intended use.
VkFormatProperties properties;
vkGetPhysicalDeviceFormatProperties(context->get_physical_device(), vulkan_formats[p_format], &properties);
VkFormatFeatureFlags flags;
if (p_usage.has_flag(TEXTURE_USAGE_CPU_READ_BIT)) {
flags = properties.linearTilingFeatures;
} else {
flags = properties.optimalTilingFeatures;
}
if (p_usage.has_flag(TEXTURE_USAGE_SAMPLING_BIT) && !(flags & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT)) {
return false;
}
if (p_usage.has_flag(TEXTURE_USAGE_COLOR_ATTACHMENT_BIT) && !(flags & VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT)) {
return false;
}
if (p_usage.has_flag(TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) && !(flags & VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT)) {
return false;
}
if (p_usage.has_flag(TEXTURE_USAGE_STORAGE_BIT) && !(flags & VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT)) {
return false;
}
if (p_usage.has_flag(TEXTURE_USAGE_STORAGE_ATOMIC_BIT) && !(flags & VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT)) {
return false;
}
// Validation via VK_FORMAT_FEATURE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR fails if VRS attachment is not supported.
if (p_usage.has_flag(TEXTURE_USAGE_VRS_ATTACHMENT_BIT) && p_format != DATA_FORMAT_R8_UINT) {
return false;
}
return true;
}
/********************/
/**** ATTACHMENT ****/
/********************/
VkRenderPass RenderingDeviceVulkan::_render_pass_create(const Vector<AttachmentFormat> &p_attachments, const Vector<FramebufferPass> &p_passes, InitialAction p_initial_action, FinalAction p_final_action, InitialAction p_initial_depth_action, FinalAction p_final_depth_action, uint32_t p_view_count, Vector<TextureSamples> *r_samples) {
// Set up dependencies from/to external equivalent to the default (implicit) one, and then amend them.
const VkPipelineStageFlags default_access_mask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT |
VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | // From Section 7.1 of Vulkan API Spec v1.1.148.
VK_ACCESS_FRAGMENT_SHADING_RATE_ATTACHMENT_READ_BIT_KHR;
VkPipelineStageFlags reading_stages = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT;
VkSubpassDependency2KHR dependencies[2] = {
{ VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2_KHR, nullptr, VK_SUBPASS_EXTERNAL, 0, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, 0, default_access_mask, 0, 0 },
{ VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2_KHR, nullptr, 0, VK_SUBPASS_EXTERNAL, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, default_access_mask, 0, 0, 0 }
};
VkSubpassDependency2KHR &dependency_from_external = dependencies[0];
VkSubpassDependency2KHR &dependency_to_external = dependencies[1];
LocalVector<int32_t> attachment_last_pass;
attachment_last_pass.resize(p_attachments.size());
if (p_view_count > 1) {
const VulkanContext::MultiviewCapabilities capabilities = context->get_multiview_capabilities();
// This only works with multiview!
ERR_FAIL_COND_V_MSG(!capabilities.is_supported, VK_NULL_HANDLE, "Multiview not supported");
// Make sure we limit this to the number of views we support.
ERR_FAIL_COND_V_MSG(p_view_count > capabilities.max_view_count, VK_NULL_HANDLE, "Hardware does not support requested number of views for Multiview render pass");
}
// These are only used if we use multiview but we need to define them in scope.
const uint32_t view_mask = (1 << p_view_count) - 1;
const uint32_t correlation_mask = (1 << p_view_count) - 1;
Vector<VkAttachmentDescription2KHR> attachments;
Vector<int> attachment_remap;
for (int i = 0; i < p_attachments.size(); i++) {
if (p_attachments[i].usage_flags == AttachmentFormat::UNUSED_ATTACHMENT) {
attachment_remap.push_back(VK_ATTACHMENT_UNUSED);
continue;
}
ERR_FAIL_INDEX_V(p_attachments[i].format, DATA_FORMAT_MAX, VK_NULL_HANDLE);
ERR_FAIL_INDEX_V(p_attachments[i].samples, TEXTURE_SAMPLES_MAX, VK_NULL_HANDLE);
ERR_FAIL_COND_V_MSG(!(p_attachments[i].usage_flags & (TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | TEXTURE_USAGE_INPUT_ATTACHMENT_BIT | TEXTURE_USAGE_VRS_ATTACHMENT_BIT)),
VK_NULL_HANDLE, "Texture format for index (" + itos(i) + ") requires an attachment (color, depth-stencil, input or VRS) bit set.");
VkAttachmentDescription2KHR description = {};
description.sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2_KHR;
description.pNext = nullptr;
description.flags = 0;
description.format = vulkan_formats[p_attachments[i].format];
description.samples = _ensure_supported_sample_count(p_attachments[i].samples);
bool is_sampled = p_attachments[i].usage_flags & TEXTURE_USAGE_SAMPLING_BIT;
bool is_storage = p_attachments[i].usage_flags & TEXTURE_USAGE_STORAGE_BIT;
bool is_depth = p_attachments[i].usage_flags & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT;
// We can setup a framebuffer where we write to our VRS texture to set it up.
// We make the assumption here that if our texture is actually used as our VRS attachment.
// It is used as such for each subpass. This is fairly certain seeing the restrictions on subpasses.
bool is_vrs = p_attachments[i].usage_flags & TEXTURE_USAGE_VRS_ATTACHMENT_BIT && i == p_passes[0].vrs_attachment;
if (is_vrs) {
// For VRS we only read, there is no writing to this texture.
description.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
description.initialLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
} else {
// For each UNDEFINED, assume the prior use was a *read*, as we'd be discarding the output of a write.
// Also, each UNDEFINED will do an immediate layout transition (write), s.t. we must ensure execution synchronization vs
// the read. If this is a performance issue, one could track the actual last accessor of each resource, adding only that
// stage.
switch (is_depth ? p_initial_depth_action : p_initial_action) {
case INITIAL_ACTION_CLEAR_REGION:
case INITIAL_ACTION_CLEAR: {
if (p_attachments[i].usage_flags & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT) {
description.loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR;
description.initialLayout = is_sampled ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : (is_storage ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
} else if (p_attachments[i].usage_flags & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) {
description.loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR;
description.initialLayout = is_sampled ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : (is_storage ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL);
description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_CLEAR;
dependency_from_external.srcStageMask |= reading_stages;
} else {
description.loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
description.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; // Don't care what is there.
dependency_from_external.srcStageMask |= reading_stages;
}
} break;
case INITIAL_ACTION_KEEP: {
if (p_attachments[i].usage_flags & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT) {
description.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
description.initialLayout = is_sampled ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : (is_storage ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
} else if (p_attachments[i].usage_flags & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) {
description.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
description.initialLayout = is_sampled ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : (is_storage ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL);
description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
dependency_from_external.srcStageMask |= reading_stages;
} else {
description.loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
description.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; // Don't care what is there.
dependency_from_external.srcStageMask |= reading_stages;
}
} break;
case INITIAL_ACTION_DROP: {
if (p_attachments[i].usage_flags & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT) {
description.loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
description.initialLayout = is_sampled ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : (is_storage ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
} else if (p_attachments[i].usage_flags & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) {
description.loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
description.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; // Don't care what is there.
description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
dependency_from_external.srcStageMask |= reading_stages;
} else {
description.loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
description.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; // Don't care what is there.
dependency_from_external.srcStageMask |= reading_stages;
}
} break;
case INITIAL_ACTION_CLEAR_REGION_CONTINUE:
case INITIAL_ACTION_CONTINUE: {
if (p_attachments[i].usage_flags & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT) {
description.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
description.initialLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
} else if (p_attachments[i].usage_flags & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) {
description.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
description.initialLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
} else {
description.loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
description.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; // Don't care what is there.
dependency_from_external.srcStageMask |= reading_stages;
}
} break;
default: {
ERR_FAIL_V(VK_NULL_HANDLE); // Should never reach here.
}
}
}
bool used_last = false;
{
int last_pass = p_passes.size() - 1;
if (is_depth) {
// Likely missing depth resolve?
if (p_passes[last_pass].depth_attachment == i) {
used_last = true;
}
} else if (is_vrs) {
if (p_passes[last_pass].vrs_attachment == i) {
used_last = true;
}
} else {
if (p_passes[last_pass].resolve_attachments.size()) {
// If using resolve attachments, check resolve attachments.
for (int j = 0; j < p_passes[last_pass].resolve_attachments.size(); j++) {
if (p_passes[last_pass].resolve_attachments[j] == i) {
used_last = true;
break;
}
}
}
if (!used_last) {
for (int j = 0; j < p_passes[last_pass].color_attachments.size(); j++) {
if (p_passes[last_pass].color_attachments[j] == i) {
used_last = true;
break;
}
}
}
}
if (!used_last) {
for (int j = 0; j < p_passes[last_pass].preserve_attachments.size(); j++) {
if (p_passes[last_pass].preserve_attachments[j] == i) {
used_last = true;
break;
}
}
}
}
FinalAction final_action = p_final_action;
FinalAction final_depth_action = p_final_depth_action;
if (!used_last) {
if (is_depth) {
final_depth_action = FINAL_ACTION_DISCARD;
} else {
final_action = FINAL_ACTION_DISCARD;
}
}
if (is_vrs) {
// We don't change our VRS texture during this process.
description.storeOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
description.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
description.finalLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
// TODO: Do we need to update our external dependency?
// update_external_dependency_for_store(dependency_to_external, is_sampled, is_storage, false);
} else {
switch (is_depth ? final_depth_action : final_action) {
case FINAL_ACTION_READ: {
if (p_attachments[i].usage_flags & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT) {
description.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
description.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
description.finalLayout = is_sampled ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : (is_storage ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
update_external_dependency_for_store(dependency_to_external, is_sampled, is_storage, false);
} else if (p_attachments[i].usage_flags & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) {
description.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
description.stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE;
description.finalLayout = is_sampled ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : (is_storage ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL);
update_external_dependency_for_store(dependency_to_external, is_sampled, is_storage, true);
} else {
description.loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
description.finalLayout = VK_IMAGE_LAYOUT_UNDEFINED; // Don't care what is there.
// TODO: What does this mean about the next usage (and thus appropriate dependency masks.
}
} break;
case FINAL_ACTION_DISCARD: {
if (p_attachments[i].usage_flags & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT) {
description.storeOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
description.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
description.finalLayout = is_sampled ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : (is_storage ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
} else if (p_attachments[i].usage_flags & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) {
description.storeOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
description.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
description.finalLayout = is_sampled ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : (is_storage ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL);
} else {
description.storeOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
description.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
description.finalLayout = VK_IMAGE_LAYOUT_UNDEFINED; // Don't care what is there.
}
} break;
case FINAL_ACTION_CONTINUE: {
if (p_attachments[i].usage_flags & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT) {
description.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
description.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
description.finalLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
} else if (p_attachments[i].usage_flags & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) {
description.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
description.stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE;
description.finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
} else {
description.storeOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
description.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
description.finalLayout = VK_IMAGE_LAYOUT_UNDEFINED; // Don't care what is there.
}
} break;
default: {
ERR_FAIL_V(VK_NULL_HANDLE); // Should never reach here.
}
}
}
attachment_last_pass[i] = -1;
attachment_remap.push_back(attachments.size());
attachments.push_back(description);
}
LocalVector<VkSubpassDescription2KHR> subpasses;
LocalVector<LocalVector<VkAttachmentReference2KHR>> color_reference_array;
LocalVector<LocalVector<VkAttachmentReference2KHR>> input_reference_array;
LocalVector<LocalVector<VkAttachmentReference2KHR>> resolve_reference_array;
LocalVector<LocalVector<uint32_t>> preserve_reference_array;
LocalVector<VkAttachmentReference2KHR> depth_reference_array;
LocalVector<VkAttachmentReference2KHR> vrs_reference_array;
LocalVector<VkFragmentShadingRateAttachmentInfoKHR> vrs_attachment_info_array;
subpasses.resize(p_passes.size());
color_reference_array.resize(p_passes.size());
input_reference_array.resize(p_passes.size());
resolve_reference_array.resize(p_passes.size());
preserve_reference_array.resize(p_passes.size());
depth_reference_array.resize(p_passes.size());
vrs_reference_array.resize(p_passes.size());
vrs_attachment_info_array.resize(p_passes.size());
LocalVector<VkSubpassDependency2KHR> subpass_dependencies;
for (int i = 0; i < p_passes.size(); i++) {
const FramebufferPass *pass = &p_passes[i];
LocalVector<VkAttachmentReference2KHR> &color_references = color_reference_array[i];
TextureSamples texture_samples = TEXTURE_SAMPLES_1;
bool is_multisample_first = true;
void *subpass_nextptr = nullptr;
for (int j = 0; j < pass->color_attachments.size(); j++) {
int32_t attachment = pass->color_attachments[j];
VkAttachmentReference2KHR reference;
reference.sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2_KHR;
reference.pNext = nullptr;
if (attachment == FramebufferPass::ATTACHMENT_UNUSED) {
reference.attachment = VK_ATTACHMENT_UNUSED;
reference.layout = VK_IMAGE_LAYOUT_UNDEFINED;
} else {
ERR_FAIL_INDEX_V_MSG(attachment, p_attachments.size(), VK_NULL_HANDLE, "Invalid framebuffer format attachment(" + itos(attachment) + "), in pass (" + itos(i) + "), color attachment (" + itos(j) + ").");
ERR_FAIL_COND_V_MSG(!(p_attachments[attachment].usage_flags & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT), VK_NULL_HANDLE, "Invalid framebuffer format attachment(" + itos(attachment) + "), in pass (" + itos(i) + "), it's marked as depth, but it's not usable as color attachment.");
ERR_FAIL_COND_V_MSG(attachment_last_pass[attachment] == i, VK_NULL_HANDLE, "Invalid framebuffer format attachment(" + itos(attachment) + "), in pass (" + itos(i) + "), it already was used for something else before in this pass.");
if (is_multisample_first) {
texture_samples = p_attachments[attachment].samples;
is_multisample_first = false;
} else {
ERR_FAIL_COND_V_MSG(texture_samples != p_attachments[attachment].samples, VK_NULL_HANDLE, "Invalid framebuffer format attachment(" + itos(attachment) + "), in pass (" + itos(i) + "), if an attachment is marked as multisample, all of them should be multisample and use the same number of samples.");
}
reference.attachment = attachment_remap[attachment];
reference.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
attachment_last_pass[attachment] = i;
}
reference.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
color_references.push_back(reference);
}
LocalVector<VkAttachmentReference2KHR> &input_references = input_reference_array[i];
for (int j = 0; j < pass->input_attachments.size(); j++) {
int32_t attachment = pass->input_attachments[j];
VkAttachmentReference2KHR reference;
reference.sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2_KHR;
reference.pNext = nullptr;
if (attachment == FramebufferPass::ATTACHMENT_UNUSED) {
reference.attachment = VK_ATTACHMENT_UNUSED;
reference.layout = VK_IMAGE_LAYOUT_UNDEFINED;
} else {
ERR_FAIL_INDEX_V_MSG(attachment, p_attachments.size(), VK_NULL_HANDLE, "Invalid framebuffer format attachment(" + itos(attachment) + "), in pass (" + itos(i) + "), input attachment (" + itos(j) + ").");
ERR_FAIL_COND_V_MSG(!(p_attachments[attachment].usage_flags & TEXTURE_USAGE_INPUT_ATTACHMENT_BIT), VK_NULL_HANDLE, "Invalid framebuffer format attachment(" + itos(attachment) + "), in pass (" + itos(i) + "), it isn't marked as an input texture.");
ERR_FAIL_COND_V_MSG(attachment_last_pass[attachment] == i, VK_NULL_HANDLE, "Invalid framebuffer format attachment(" + itos(attachment) + "), in pass (" + itos(i) + "), it already was used for something else before in this pass.");
reference.attachment = attachment_remap[attachment];
reference.layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
attachment_last_pass[attachment] = i;
}
reference.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
input_references.push_back(reference);
}
LocalVector<VkAttachmentReference2KHR> &resolve_references = resolve_reference_array[i];
if (pass->resolve_attachments.size() > 0) {
ERR_FAIL_COND_V_MSG(pass->resolve_attachments.size() != pass->color_attachments.size(), VK_NULL_HANDLE, "The amount of resolve attachments (" + itos(pass->resolve_attachments.size()) + ") must match the number of color attachments (" + itos(pass->color_attachments.size()) + ").");
ERR_FAIL_COND_V_MSG(texture_samples == TEXTURE_SAMPLES_1, VK_NULL_HANDLE, "Resolve attachments specified, but color attachments are not multisample.");
}
for (int j = 0; j < pass->resolve_attachments.size(); j++) {
int32_t attachment = pass->resolve_attachments[j];
VkAttachmentReference2KHR reference;
reference.sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2_KHR;
reference.pNext = nullptr;
if (attachment == FramebufferPass::ATTACHMENT_UNUSED) {
reference.attachment = VK_ATTACHMENT_UNUSED;
reference.layout = VK_IMAGE_LAYOUT_UNDEFINED;
} else {
ERR_FAIL_INDEX_V_MSG(attachment, p_attachments.size(), VK_NULL_HANDLE, "Invalid framebuffer format attachment(" + itos(attachment) + "), in pass (" + itos(i) + "), resolve attachment (" + itos(j) + ").");
ERR_FAIL_COND_V_MSG(pass->color_attachments[j] == FramebufferPass::ATTACHMENT_UNUSED, VK_NULL_HANDLE, "Invalid framebuffer format attachment(" + itos(attachment) + "), in pass (" + itos(i) + "), resolve attachment (" + itos(j) + "), the respective color attachment is marked as unused.");
ERR_FAIL_COND_V_MSG(!(p_attachments[attachment].usage_flags & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT), VK_NULL_HANDLE, "Invalid framebuffer format attachment(" + itos(attachment) + "), in pass (" + itos(i) + "), resolve attachment, it isn't marked as a color texture.");
ERR_FAIL_COND_V_MSG(attachment_last_pass[attachment] == i, VK_NULL_HANDLE, "Invalid framebuffer format attachment(" + itos(attachment) + "), in pass (" + itos(i) + "), it already was used for something else before in this pass.");
bool multisample = p_attachments[attachment].samples > TEXTURE_SAMPLES_1;
ERR_FAIL_COND_V_MSG(multisample, VK_NULL_HANDLE, "Invalid framebuffer format attachment(" + itos(attachment) + "), in pass (" + itos(i) + "), resolve attachments can't be multisample.");
reference.attachment = attachment_remap[attachment];
reference.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; // VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL
attachment_last_pass[attachment] = i;
}
reference.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
resolve_references.push_back(reference);
}
VkAttachmentReference2KHR &depth_stencil_reference = depth_reference_array[i];
depth_stencil_reference.sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2_KHR;
depth_stencil_reference.pNext = nullptr;
if (pass->depth_attachment != FramebufferPass::ATTACHMENT_UNUSED) {
int32_t attachment = pass->depth_attachment;
ERR_FAIL_INDEX_V_MSG(attachment, p_attachments.size(), VK_NULL_HANDLE, "Invalid framebuffer depth format attachment(" + itos(attachment) + "), in pass (" + itos(i) + "), depth attachment.");
ERR_FAIL_COND_V_MSG(!(p_attachments[attachment].usage_flags & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT), VK_NULL_HANDLE, "Invalid framebuffer depth format attachment(" + itos(attachment) + "), in pass (" + itos(i) + "), it's marked as depth, but it's not a depth attachment.");
ERR_FAIL_COND_V_MSG(attachment_last_pass[attachment] == i, VK_NULL_HANDLE, "Invalid framebuffer depth format attachment(" + itos(attachment) + "), in pass (" + itos(i) + "), it already was used for something else before in this pass.");
depth_stencil_reference.attachment = attachment_remap[attachment];
depth_stencil_reference.layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
depth_stencil_reference.aspectMask = VK_IMAGE_ASPECT_NONE;
attachment_last_pass[attachment] = i;
if (is_multisample_first) {
texture_samples = p_attachments[attachment].samples;
is_multisample_first = false;
} else {
ERR_FAIL_COND_V_MSG(texture_samples != p_attachments[attachment].samples, VK_NULL_HANDLE, "Invalid framebuffer depth format attachment(" + itos(attachment) + "), in pass (" + itos(i) + "), if an attachment is marked as multisample, all of them should be multisample and use the same number of samples including the depth.");
}
} else {
depth_stencil_reference.attachment = VK_ATTACHMENT_UNUSED;
depth_stencil_reference.layout = VK_IMAGE_LAYOUT_UNDEFINED;
}
if (context->get_vrs_capabilities().attachment_vrs_supported && pass->vrs_attachment != FramebufferPass::ATTACHMENT_UNUSED) {
int32_t attachment = pass->vrs_attachment;
ERR_FAIL_INDEX_V_MSG(attachment, p_attachments.size(), VK_NULL_HANDLE, "Invalid framebuffer VRS format attachment(" + itos(attachment) + "), in pass (" + itos(i) + "), VRS attachment.");
ERR_FAIL_COND_V_MSG(!(p_attachments[attachment].usage_flags & TEXTURE_USAGE_VRS_ATTACHMENT_BIT), VK_NULL_HANDLE, "Invalid framebuffer VRS format attachment(" + itos(attachment) + "), in pass (" + itos(i) + "), it's marked as VRS, but it's not a VRS attachment.");
ERR_FAIL_COND_V_MSG(attachment_last_pass[attachment] == i, VK_NULL_HANDLE, "Invalid framebuffer VRS attachment(" + itos(attachment) + "), in pass (" + itos(i) + "), it already was used for something else before in this pass.");
VkAttachmentReference2KHR &vrs_reference = vrs_reference_array[i];
vrs_reference.sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2_KHR;
vrs_reference.pNext = nullptr;
vrs_reference.attachment = attachment_remap[attachment];
vrs_reference.layout = VK_IMAGE_LAYOUT_FRAGMENT_SHADING_RATE_ATTACHMENT_OPTIMAL_KHR;
vrs_reference.aspectMask = VK_IMAGE_ASPECT_NONE;
Size2i texel_size = context->get_vrs_capabilities().texel_size;
VkFragmentShadingRateAttachmentInfoKHR &vrs_attachment_info = vrs_attachment_info_array[i];
vrs_attachment_info.sType = VK_STRUCTURE_TYPE_FRAGMENT_SHADING_RATE_ATTACHMENT_INFO_KHR;
vrs_attachment_info.pNext = nullptr;
vrs_attachment_info.pFragmentShadingRateAttachment = &vrs_reference;
vrs_attachment_info.shadingRateAttachmentTexelSize = { uint32_t(texel_size.x), uint32_t(texel_size.y) };
attachment_last_pass[attachment] = i;
subpass_nextptr = &vrs_attachment_info;
}
LocalVector<uint32_t> &preserve_references = preserve_reference_array[i];
for (int j = 0; j < pass->preserve_attachments.size(); j++) {
int32_t attachment = pass->preserve_attachments[j];
ERR_FAIL_COND_V_MSG(attachment == FramebufferPass::ATTACHMENT_UNUSED, VK_NULL_HANDLE, "Invalid framebuffer format attachment(" + itos(attachment) + "), in pass (" + itos(i) + "), preserve attachment (" + itos(j) + "). Preserve attachments can't be unused.");
ERR_FAIL_INDEX_V_MSG(attachment, p_attachments.size(), VK_NULL_HANDLE, "Invalid framebuffer format attachment(" + itos(attachment) + "), in pass (" + itos(i) + "), preserve attachment (" + itos(j) + ").");
if (attachment_last_pass[attachment] != i) {
// Preserve can still be used to keep depth or color from being discarded after use.
attachment_last_pass[attachment] = i;
preserve_references.push_back(attachment);
}
}
VkSubpassDescription2KHR &subpass = subpasses[i];
subpass.sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2_KHR;
subpass.pNext = subpass_nextptr;
subpass.flags = 0;
subpass.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
if (p_view_count == 1) {
// VUID-VkSubpassDescription2-multiview-06558: If the multiview feature is not enabled, viewMask must be 0.
subpass.viewMask = 0;
} else {
subpass.viewMask = view_mask;
}
subpass.inputAttachmentCount = input_references.size();
if (input_references.size()) {
subpass.pInputAttachments = input_references.ptr();
} else {
subpass.pInputAttachments = nullptr;
}
subpass.colorAttachmentCount = color_references.size();
if (color_references.size()) {
subpass.pColorAttachments = color_references.ptr();
} else {
subpass.pColorAttachments = nullptr;
}
if (depth_stencil_reference.attachment != VK_ATTACHMENT_UNUSED) {
subpass.pDepthStencilAttachment = &depth_stencil_reference;
} else {
subpass.pDepthStencilAttachment = nullptr;
}
if (resolve_references.size()) {
subpass.pResolveAttachments = resolve_references.ptr();
} else {
subpass.pResolveAttachments = nullptr;
}
subpass.preserveAttachmentCount = preserve_references.size();
if (preserve_references.size()) {
subpass.pPreserveAttachments = preserve_references.ptr();
} else {
subpass.pPreserveAttachments = nullptr;
}
if (r_samples) {
r_samples->push_back(texture_samples);
}
if (i > 0) {
VkSubpassDependency2KHR dependency;
dependency.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2_KHR;
dependency.pNext = nullptr;
dependency.srcSubpass = i - 1;
dependency.dstSubpass = i;
dependency.srcStageMask = 0;
dependency.srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
dependency.dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
dependency.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
dependency.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_INPUT_ATTACHMENT_READ_BIT;
dependency.dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT;
dependency.viewOffset = 0;
subpass_dependencies.push_back(dependency);
}
/*
// NOTE: Big Mallet Approach -- any layout transition causes a full barrier.
if (reference.layout != description.initialLayout) {
// NOTE: This should be smarter based on the texture's knowledge of its previous role.
dependency_from_external.srcStageMask |= VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
dependency_from_external.srcAccessMask |= VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT;
}
if (reference.layout != description.finalLayout) {
// NOTE: This should be smarter based on the texture's knowledge of its subsequent role.
dependency_to_external.dstStageMask |= VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
dependency_to_external.dstAccessMask |= VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT;
}
*/
}
VkRenderPassCreateInfo2KHR render_pass_create_info;
render_pass_create_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2_KHR;
render_pass_create_info.pNext = nullptr;
render_pass_create_info.flags = 0;
render_pass_create_info.attachmentCount = attachments.size();
render_pass_create_info.pAttachments = attachments.ptr();
render_pass_create_info.subpassCount = subpasses.size();
render_pass_create_info.pSubpasses = subpasses.ptr();
// Commenting this because it seems it just avoids raster and compute to work at the same time.
// Other barriers seem to be protecting the render pass fine.
// render_pass_create_info.dependencyCount = 2;
// render_pass_create_info.pDependencies = dependencies;
render_pass_create_info.dependencyCount = subpass_dependencies.size();
if (subpass_dependencies.size()) {
render_pass_create_info.pDependencies = subpass_dependencies.ptr();
} else {
render_pass_create_info.pDependencies = nullptr;
}
if (p_view_count == 1) {
// VUID-VkRenderPassCreateInfo2-viewMask-03057: If the VkSubpassDescription2::viewMask member of all elements of pSubpasses is 0, correlatedViewMaskCount must be 0.
render_pass_create_info.correlatedViewMaskCount = 0;
render_pass_create_info.pCorrelatedViewMasks = nullptr;
} else {
render_pass_create_info.correlatedViewMaskCount = 1;
render_pass_create_info.pCorrelatedViewMasks = &correlation_mask;
}
Vector<uint32_t> view_masks;
VkRenderPassMultiviewCreateInfo render_pass_multiview_create_info;
if ((p_view_count > 1) && !context->supports_renderpass2()) {
// This is only required when using vkCreateRenderPass, we add it if vkCreateRenderPass2KHR is not supported
// resulting this in being passed to our vkCreateRenderPass fallback.
// Set view masks for each subpass.
for (uint32_t i = 0; i < subpasses.size(); i++) {
view_masks.push_back(view_mask);
}
render_pass_multiview_create_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_MULTIVIEW_CREATE_INFO;
render_pass_multiview_create_info.pNext = nullptr;
render_pass_multiview_create_info.subpassCount = subpasses.size();
render_pass_multiview_create_info.pViewMasks = view_masks.ptr();
render_pass_multiview_create_info.dependencyCount = 0;
render_pass_multiview_create_info.pViewOffsets = nullptr;
render_pass_multiview_create_info.correlationMaskCount = 1;
render_pass_multiview_create_info.pCorrelationMasks = &correlation_mask;
render_pass_create_info.pNext = &render_pass_multiview_create_info;
}
VkRenderPass render_pass;
VkResult res = context->vkCreateRenderPass2KHR(device, &render_pass_create_info, nullptr, &render_pass);
ERR_FAIL_COND_V_MSG(res, VK_NULL_HANDLE, "vkCreateRenderPass2KHR failed with error " + itos(res) + ".");
return render_pass;
}
RenderingDevice::FramebufferFormatID RenderingDeviceVulkan::framebuffer_format_create(const Vector<AttachmentFormat> &p_format, uint32_t p_view_count) {
FramebufferPass pass;
for (int i = 0; i < p_format.size(); i++) {
if (p_format[i].usage_flags & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) {
pass.depth_attachment = i;
} else {
pass.color_attachments.push_back(i);
}
}
Vector<FramebufferPass> passes;
passes.push_back(pass);
return framebuffer_format_create_multipass(p_format, passes, p_view_count);
}
RenderingDevice::FramebufferFormatID RenderingDeviceVulkan::framebuffer_format_create_multipass(const Vector<AttachmentFormat> &p_attachments, const Vector<FramebufferPass> &p_passes, uint32_t p_view_count) {
_THREAD_SAFE_METHOD_
FramebufferFormatKey key;
key.attachments = p_attachments;
key.passes = p_passes;
key.view_count = p_view_count;
const RBMap<FramebufferFormatKey, FramebufferFormatID>::Element *E = framebuffer_format_cache.find(key);
if (E) {
// Exists, return.
return E->get();
}
Vector<TextureSamples> samples;
VkRenderPass render_pass = _render_pass_create(p_attachments, p_passes, INITIAL_ACTION_CLEAR, FINAL_ACTION_READ, INITIAL_ACTION_CLEAR, FINAL_ACTION_READ, p_view_count, &samples); // Actions don't matter for this use case.
if (render_pass == VK_NULL_HANDLE) { // Was likely invalid.
return INVALID_ID;
}
FramebufferFormatID id = FramebufferFormatID(framebuffer_format_cache.size()) | (FramebufferFormatID(ID_TYPE_FRAMEBUFFER_FORMAT) << FramebufferFormatID(ID_BASE_SHIFT));
E = framebuffer_format_cache.insert(key, id);
FramebufferFormat fb_format;
fb_format.E = E;
fb_format.render_pass = render_pass;
fb_format.pass_samples = samples;
fb_format.view_count = p_view_count;
framebuffer_formats[id] = fb_format;
return id;
}
RenderingDevice::FramebufferFormatID RenderingDeviceVulkan::framebuffer_format_create_empty(TextureSamples p_samples) {
FramebufferFormatKey key;
key.passes.push_back(FramebufferPass());
const RBMap<FramebufferFormatKey, FramebufferFormatID>::Element *E = framebuffer_format_cache.find(key);
if (E) {
// Exists, return.
return E->get();
}
VkSubpassDescription2KHR subpass;
subpass.sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2_KHR;
subpass.pNext = nullptr;
subpass.flags = 0;
subpass.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
subpass.viewMask = 0;
subpass.inputAttachmentCount = 0; // Unsupported for now.
subpass.pInputAttachments = nullptr;
subpass.colorAttachmentCount = 0;
subpass.pColorAttachments = nullptr;
subpass.pDepthStencilAttachment = nullptr;
subpass.pResolveAttachments = nullptr;
subpass.preserveAttachmentCount = 0;
subpass.pPreserveAttachments = nullptr;
VkRenderPassCreateInfo2KHR render_pass_create_info;
render_pass_create_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2_KHR;
render_pass_create_info.pNext = nullptr;
render_pass_create_info.flags = 0;
render_pass_create_info.attachmentCount = 0;
render_pass_create_info.pAttachments = nullptr;
render_pass_create_info.subpassCount = 1;
render_pass_create_info.pSubpasses = &subpass;
render_pass_create_info.dependencyCount = 0;
render_pass_create_info.pDependencies = nullptr;
render_pass_create_info.correlatedViewMaskCount = 0;
render_pass_create_info.pCorrelatedViewMasks = nullptr;
VkRenderPass render_pass;
VkResult res = context->vkCreateRenderPass2KHR(device, &render_pass_create_info, nullptr, &render_pass);
ERR_FAIL_COND_V_MSG(res, 0, "vkCreateRenderPass2KHR for empty fb failed with error " + itos(res) + ".");
if (render_pass == VK_NULL_HANDLE) { // Was likely invalid.
return INVALID_ID;
}
FramebufferFormatID id = FramebufferFormatID(framebuffer_format_cache.size()) | (FramebufferFormatID(ID_TYPE_FRAMEBUFFER_FORMAT) << FramebufferFormatID(ID_BASE_SHIFT));
E = framebuffer_format_cache.insert(key, id);
FramebufferFormat fb_format;
fb_format.E = E;
fb_format.render_pass = render_pass;
fb_format.pass_samples.push_back(p_samples);
framebuffer_formats[id] = fb_format;
return id;
}
RenderingDevice::TextureSamples RenderingDeviceVulkan::framebuffer_format_get_texture_samples(FramebufferFormatID p_format, uint32_t p_pass) {
HashMap<FramebufferFormatID, FramebufferFormat>::Iterator E = framebuffer_formats.find(p_format);
ERR_FAIL_COND_V(!E, TEXTURE_SAMPLES_1);
ERR_FAIL_COND_V(p_pass >= uint32_t(E->value.pass_samples.size()), TEXTURE_SAMPLES_1);
return E->value.pass_samples[p_pass];
}
/***********************/
/**** RENDER TARGET ****/
/***********************/
RID RenderingDeviceVulkan::framebuffer_create_empty(const Size2i &p_size, TextureSamples p_samples, FramebufferFormatID p_format_check) {
_THREAD_SAFE_METHOD_
Framebuffer framebuffer;
framebuffer.format_id = framebuffer_format_create_empty(p_samples);
ERR_FAIL_COND_V(p_format_check != INVALID_FORMAT_ID && framebuffer.format_id != p_format_check, RID());
framebuffer.size = p_size;
framebuffer.view_count = 1;
RID id = framebuffer_owner.make_rid(framebuffer);
#ifdef DEV_ENABLED
set_resource_name(id, "RID:" + itos(id.get_id()));
#endif
return id;
}
RID RenderingDeviceVulkan::framebuffer_create(const Vector<RID> &p_texture_attachments, FramebufferFormatID p_format_check, uint32_t p_view_count) {
_THREAD_SAFE_METHOD_
FramebufferPass pass;
for (int i = 0; i < p_texture_attachments.size(); i++) {
Texture *texture = texture_owner.get_or_null(p_texture_attachments[i]);
ERR_FAIL_COND_V_MSG(texture && texture->layers != p_view_count, RID(), "Layers of our texture doesn't match view count for this framebuffer");
if (texture && texture->usage_flags & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) {
pass.depth_attachment = i;
} else if (texture && texture->usage_flags & TEXTURE_USAGE_VRS_ATTACHMENT_BIT) {
pass.vrs_attachment = i;
} else {
if (texture && texture->is_resolve_buffer) {
pass.resolve_attachments.push_back(i);
} else {
pass.color_attachments.push_back(texture ? i : FramebufferPass::ATTACHMENT_UNUSED);
}
}
}
Vector<FramebufferPass> passes;
passes.push_back(pass);
return framebuffer_create_multipass(p_texture_attachments, passes, p_format_check, p_view_count);
}
RID RenderingDeviceVulkan::framebuffer_create_multipass(const Vector<RID> &p_texture_attachments, const Vector<FramebufferPass> &p_passes, FramebufferFormatID p_format_check, uint32_t p_view_count) {
_THREAD_SAFE_METHOD_
Vector<AttachmentFormat> attachments;
attachments.resize(p_texture_attachments.size());
Size2i size;
bool size_set = false;
for (int i = 0; i < p_texture_attachments.size(); i++) {
AttachmentFormat af;
Texture *texture = texture_owner.get_or_null(p_texture_attachments[i]);
if (!texture) {
af.usage_flags = AttachmentFormat::UNUSED_ATTACHMENT;
} else {
ERR_FAIL_COND_V_MSG(texture->layers != p_view_count, RID(), "Layers of our texture doesn't match view count for this framebuffer");
if (!size_set) {
size.width = texture->width;
size.height = texture->height;
size_set = true;
} else if (texture->usage_flags & TEXTURE_USAGE_VRS_ATTACHMENT_BIT) {
// If this is not the first attachment we assume this is used as the VRS attachment.
// In this case this texture will be 1/16th the size of the color attachment.
// So we skip the size check.
} else {
ERR_FAIL_COND_V_MSG((uint32_t)size.width != texture->width || (uint32_t)size.height != texture->height, RID(),
"All textures in a framebuffer should be the same size.");
}
af.format = texture->format;
af.samples = texture->samples;
af.usage_flags = texture->usage_flags;
}
attachments.write[i] = af;
}
ERR_FAIL_COND_V_MSG(!size_set, RID(), "All attachments unused.");
FramebufferFormatID format_id = framebuffer_format_create_multipass(attachments, p_passes, p_view_count);
if (format_id == INVALID_ID) {
return RID();
}
ERR_FAIL_COND_V_MSG(p_format_check != INVALID_ID && format_id != p_format_check, RID(),
"The format used to check this framebuffer differs from the intended framebuffer format.");
Framebuffer framebuffer;
framebuffer.format_id = format_id;
framebuffer.texture_ids = p_texture_attachments;
framebuffer.size = size;
framebuffer.view_count = p_view_count;
RID id = framebuffer_owner.make_rid(framebuffer);
#ifdef DEV_ENABLED
set_resource_name(id, "RID:" + itos(id.get_id()));
#endif
for (int i = 0; i < p_texture_attachments.size(); i++) {
if (p_texture_attachments[i].is_valid()) {
_add_dependency(id, p_texture_attachments[i]);
}
}
return id;
}
RenderingDevice::FramebufferFormatID RenderingDeviceVulkan::framebuffer_get_format(RID p_framebuffer) {
_THREAD_SAFE_METHOD_
Framebuffer *framebuffer = framebuffer_owner.get_or_null(p_framebuffer);
ERR_FAIL_COND_V(!framebuffer, INVALID_ID);
return framebuffer->format_id;
}
bool RenderingDeviceVulkan::framebuffer_is_valid(RID p_framebuffer) const {
_THREAD_SAFE_METHOD_
return framebuffer_owner.owns(p_framebuffer);
}
void RenderingDeviceVulkan::framebuffer_set_invalidation_callback(RID p_framebuffer, InvalidationCallback p_callback, void *p_userdata) {
_THREAD_SAFE_METHOD_
Framebuffer *framebuffer = framebuffer_owner.get_or_null(p_framebuffer);
ERR_FAIL_COND(!framebuffer);
framebuffer->invalidated_callback = p_callback;
framebuffer->invalidated_callback_userdata = p_userdata;
}
/*****************/
/**** SAMPLER ****/
/*****************/
RID RenderingDeviceVulkan::sampler_create(const SamplerState &p_state) {
_THREAD_SAFE_METHOD_
VkSamplerCreateInfo sampler_create_info;
sampler_create_info.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO;
sampler_create_info.pNext = nullptr;
sampler_create_info.flags = 0;
sampler_create_info.magFilter = p_state.mag_filter == SAMPLER_FILTER_LINEAR ? VK_FILTER_LINEAR : VK_FILTER_NEAREST;
sampler_create_info.minFilter = p_state.min_filter == SAMPLER_FILTER_LINEAR ? VK_FILTER_LINEAR : VK_FILTER_NEAREST;
sampler_create_info.mipmapMode = p_state.mip_filter == SAMPLER_FILTER_LINEAR ? VK_SAMPLER_MIPMAP_MODE_LINEAR : VK_SAMPLER_MIPMAP_MODE_NEAREST;
ERR_FAIL_INDEX_V(p_state.repeat_u, SAMPLER_REPEAT_MODE_MAX, RID());
sampler_create_info.addressModeU = address_modes[p_state.repeat_u];
ERR_FAIL_INDEX_V(p_state.repeat_v, SAMPLER_REPEAT_MODE_MAX, RID());
sampler_create_info.addressModeV = address_modes[p_state.repeat_v];
ERR_FAIL_INDEX_V(p_state.repeat_w, SAMPLER_REPEAT_MODE_MAX, RID());
sampler_create_info.addressModeW = address_modes[p_state.repeat_w];
sampler_create_info.mipLodBias = p_state.lod_bias;
sampler_create_info.anisotropyEnable = p_state.use_anisotropy && context->get_physical_device_features().samplerAnisotropy;
sampler_create_info.maxAnisotropy = p_state.anisotropy_max;
sampler_create_info.compareEnable = p_state.enable_compare;
ERR_FAIL_INDEX_V(p_state.compare_op, COMPARE_OP_MAX, RID());
sampler_create_info.compareOp = compare_operators[p_state.compare_op];
sampler_create_info.minLod = p_state.min_lod;
sampler_create_info.maxLod = p_state.max_lod;
ERR_FAIL_INDEX_V(p_state.border_color, SAMPLER_BORDER_COLOR_MAX, RID());
sampler_create_info.borderColor = sampler_border_colors[p_state.border_color];
sampler_create_info.unnormalizedCoordinates = p_state.unnormalized_uvw;
VkSampler sampler;
VkResult res = vkCreateSampler(device, &sampler_create_info, nullptr, &sampler);
ERR_FAIL_COND_V_MSG(res, RID(), "vkCreateSampler failed with error " + itos(res) + ".");
RID id = sampler_owner.make_rid(sampler);
#ifdef DEV_ENABLED
set_resource_name(id, "RID:" + itos(id.get_id()));
#endif
return id;
}
bool RenderingDeviceVulkan::sampler_is_format_supported_for_filter(DataFormat p_format, SamplerFilter p_sampler_filter) const {
ERR_FAIL_INDEX_V(p_format, DATA_FORMAT_MAX, false);
_THREAD_SAFE_METHOD_
// Validate that this image is supported for the intended filtering.
VkFormatProperties properties;
vkGetPhysicalDeviceFormatProperties(context->get_physical_device(), vulkan_formats[p_format], &properties);
return p_sampler_filter == RD::SAMPLER_FILTER_NEAREST || (p_sampler_filter == RD::SAMPLER_FILTER_LINEAR && (properties.optimalTilingFeatures & VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT));
}
/**********************/
/**** VERTEX ARRAY ****/
/**********************/
RID RenderingDeviceVulkan::vertex_buffer_create(uint32_t p_size_bytes, const Vector<uint8_t> &p_data, bool p_use_as_storage) {
_THREAD_SAFE_METHOD_
ERR_FAIL_COND_V(p_data.size() && (uint32_t)p_data.size() != p_size_bytes, RID());
uint32_t usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
if (p_use_as_storage) {
usage |= VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
}
Buffer buffer;
_buffer_allocate(&buffer, p_size_bytes, usage, VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE, 0);
if (p_data.size()) {
uint64_t data_size = p_data.size();
const uint8_t *r = p_data.ptr();
_buffer_update(&buffer, 0, r, data_size);
_buffer_memory_barrier(buffer.buffer, 0, data_size, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT, false);
}
RID id = vertex_buffer_owner.make_rid(buffer);
#ifdef DEV_ENABLED
set_resource_name(id, "RID:" + itos(id.get_id()));
#endif
return id;
}
// Internally reference counted, this ID is warranted to be unique for the same description, but needs to be freed as many times as it was allocated.
RenderingDevice::VertexFormatID RenderingDeviceVulkan::vertex_format_create(const Vector<VertexAttribute> &p_vertex_formats) {
_THREAD_SAFE_METHOD_
VertexDescriptionKey key;
key.vertex_formats = p_vertex_formats;
VertexFormatID *idptr = vertex_format_cache.getptr(key);
if (idptr) {
return *idptr;
}
// Does not exist, create one and cache it.
VertexDescriptionCache vdcache;
vdcache.bindings = memnew_arr(VkVertexInputBindingDescription, p_vertex_formats.size());
vdcache.attributes = memnew_arr(VkVertexInputAttributeDescription, p_vertex_formats.size());
HashSet<int> used_locations;
for (int i = 0; i < p_vertex_formats.size(); i++) {
ERR_CONTINUE(p_vertex_formats[i].format >= DATA_FORMAT_MAX);
ERR_FAIL_COND_V(used_locations.has(p_vertex_formats[i].location), INVALID_ID);
ERR_FAIL_COND_V_MSG(get_format_vertex_size(p_vertex_formats[i].format) == 0, INVALID_ID,
"Data format for attachment (" + itos(i) + "), '" + named_formats[p_vertex_formats[i].format] + "', is not valid for a vertex array.");
vdcache.bindings[i].binding = i;
vdcache.bindings[i].stride = p_vertex_formats[i].stride;
vdcache.bindings[i].inputRate = p_vertex_formats[i].frequency == VERTEX_FREQUENCY_INSTANCE ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX;
vdcache.attributes[i].binding = i;
vdcache.attributes[i].location = p_vertex_formats[i].location;
vdcache.attributes[i].format = vulkan_formats[p_vertex_formats[i].format];
vdcache.attributes[i].offset = p_vertex_formats[i].offset;
used_locations.insert(p_vertex_formats[i].location);
}
vdcache.create_info.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO;
vdcache.create_info.pNext = nullptr;
vdcache.create_info.flags = 0;
vdcache.create_info.vertexAttributeDescriptionCount = p_vertex_formats.size();
vdcache.create_info.pVertexAttributeDescriptions = vdcache.attributes;
vdcache.create_info.vertexBindingDescriptionCount = p_vertex_formats.size();
vdcache.create_info.pVertexBindingDescriptions = vdcache.bindings;
vdcache.vertex_formats = p_vertex_formats;
VertexFormatID id = VertexFormatID(vertex_format_cache.size()) | (VertexFormatID(ID_TYPE_VERTEX_FORMAT) << ID_BASE_SHIFT);
vertex_format_cache[key] = id;
vertex_formats[id] = vdcache;
return id;
}
RID RenderingDeviceVulkan::vertex_array_create(uint32_t p_vertex_count, VertexFormatID p_vertex_format, const Vector<RID> &p_src_buffers, const Vector<uint64_t> &p_offsets) {
_THREAD_SAFE_METHOD_
ERR_FAIL_COND_V(!vertex_formats.has(p_vertex_format), RID());
const VertexDescriptionCache &vd = vertex_formats[p_vertex_format];
ERR_FAIL_COND_V(vd.vertex_formats.size() != p_src_buffers.size(), RID());
for (int i = 0; i < p_src_buffers.size(); i++) {
ERR_FAIL_COND_V(!vertex_buffer_owner.owns(p_src_buffers[i]), RID());
}
VertexArray vertex_array;
if (p_offsets.is_empty()) {
vertex_array.offsets.resize_zeroed(p_src_buffers.size());
} else {
ERR_FAIL_COND_V(p_offsets.size() != p_src_buffers.size(), RID());
vertex_array.offsets = p_offsets;
}
vertex_array.vertex_count = p_vertex_count;
vertex_array.description = p_vertex_format;
vertex_array.max_instances_allowed = 0xFFFFFFFF; // By default as many as you want.
for (int i = 0; i < p_src_buffers.size(); i++) {
Buffer *buffer = vertex_buffer_owner.get_or_null(p_src_buffers[i]);
// Validate with buffer.
{
const VertexAttribute &atf = vd.vertex_formats[i];
uint32_t element_size = get_format_vertex_size(atf.format);
ERR_FAIL_COND_V(element_size == 0, RID()); // Should never happens since this was prevalidated.
if (atf.frequency == VERTEX_FREQUENCY_VERTEX) {
// Validate size for regular drawing.
uint64_t total_size = uint64_t(atf.stride) * (p_vertex_count - 1) + atf.offset + element_size;
ERR_FAIL_COND_V_MSG(total_size > buffer->size, RID(),
"Attachment (" + itos(i) + ") will read past the end of the buffer.");
} else {
// Validate size for instances drawing.
uint64_t available = buffer->size - atf.offset;
ERR_FAIL_COND_V_MSG(available < element_size, RID(),
"Attachment (" + itos(i) + ") uses instancing, but it's just too small.");
uint32_t instances_allowed = available / atf.stride;
vertex_array.max_instances_allowed = MIN(instances_allowed, vertex_array.max_instances_allowed);
}
}
vertex_array.buffers.push_back(buffer->buffer);
}
RID id = vertex_array_owner.make_rid(vertex_array);
for (int i = 0; i < p_src_buffers.size(); i++) {
_add_dependency(id, p_src_buffers[i]);
}
return id;
}
RID RenderingDeviceVulkan::index_buffer_create(uint32_t p_index_count, IndexBufferFormat p_format, const Vector<uint8_t> &p_data, bool p_use_restart_indices) {
_THREAD_SAFE_METHOD_
ERR_FAIL_COND_V(p_index_count == 0, RID());
IndexBuffer index_buffer;
index_buffer.index_type = (p_format == INDEX_BUFFER_FORMAT_UINT16) ? VK_INDEX_TYPE_UINT16 : VK_INDEX_TYPE_UINT32;
index_buffer.supports_restart_indices = p_use_restart_indices;
index_buffer.index_count = p_index_count;
uint32_t size_bytes = p_index_count * ((p_format == INDEX_BUFFER_FORMAT_UINT16) ? 2 : 4);
#ifdef DEBUG_ENABLED
if (p_data.size()) {
index_buffer.max_index = 0;
ERR_FAIL_COND_V_MSG((uint32_t)p_data.size() != size_bytes, RID(),
"Default index buffer initializer array size (" + itos(p_data.size()) + ") does not match format required size (" + itos(size_bytes) + ").");
const uint8_t *r = p_data.ptr();
if (p_format == INDEX_BUFFER_FORMAT_UINT16) {
const uint16_t *index16 = (const uint16_t *)r;
for (uint32_t i = 0; i < p_index_count; i++) {
if (p_use_restart_indices && index16[i] == 0xFFFF) {
continue; // Restart index, ignore.
}
index_buffer.max_index = MAX(index16[i], index_buffer.max_index);
}
} else {
const uint32_t *index32 = (const uint32_t *)r;
for (uint32_t i = 0; i < p_index_count; i++) {
if (p_use_restart_indices && index32[i] == 0xFFFFFFFF) {
continue; // Restart index, ignore.
}
index_buffer.max_index = MAX(index32[i], index_buffer.max_index);
}
}
} else {
index_buffer.max_index = 0xFFFFFFFF;
}
#else
index_buffer.max_index = 0xFFFFFFFF;
#endif
_buffer_allocate(&index_buffer, size_bytes, VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT, VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE, 0);
if (p_data.size()) {
uint64_t data_size = p_data.size();
const uint8_t *r = p_data.ptr();
_buffer_update(&index_buffer, 0, r, data_size);
_buffer_memory_barrier(index_buffer.buffer, 0, data_size, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_INDEX_READ_BIT, false);
}
RID id = index_buffer_owner.make_rid(index_buffer);
#ifdef DEV_ENABLED
set_resource_name(id, "RID:" + itos(id.get_id()));
#endif
return id;
}
RID RenderingDeviceVulkan::index_array_create(RID p_index_buffer, uint32_t p_index_offset, uint32_t p_index_count) {
_THREAD_SAFE_METHOD_
ERR_FAIL_COND_V(!index_buffer_owner.owns(p_index_buffer), RID());
IndexBuffer *index_buffer = index_buffer_owner.get_or_null(p_index_buffer);
ERR_FAIL_COND_V(p_index_count == 0, RID());
ERR_FAIL_COND_V(p_index_offset + p_index_count > index_buffer->index_count, RID());
IndexArray index_array;
index_array.max_index = index_buffer->max_index;
index_array.buffer = index_buffer->buffer;
index_array.offset = p_index_offset;
index_array.indices = p_index_count;
index_array.index_type = index_buffer->index_type;
index_array.supports_restart_indices = index_buffer->supports_restart_indices;
RID id = index_array_owner.make_rid(index_array);
_add_dependency(id, p_index_buffer);
return id;
}
/****************/
/**** SHADER ****/
/****************/
static const char *shader_uniform_names[RenderingDevice::UNIFORM_TYPE_MAX] = {
"Sampler", "CombinedSampler", "Texture", "Image", "TextureBuffer", "SamplerTextureBuffer", "ImageBuffer", "UniformBuffer", "StorageBuffer", "InputAttachment"
};
static VkShaderStageFlagBits shader_stage_masks[RenderingDevice::SHADER_STAGE_MAX] = {
VK_SHADER_STAGE_VERTEX_BIT,
VK_SHADER_STAGE_FRAGMENT_BIT,
VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT,
VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
VK_SHADER_STAGE_COMPUTE_BIT,
};
String RenderingDeviceVulkan::_shader_uniform_debug(RID p_shader, int p_set) {
String ret;
const Shader *shader = shader_owner.get_or_null(p_shader);
ERR_FAIL_COND_V(!shader, String());
for (int i = 0; i < shader->sets.size(); i++) {
if (p_set >= 0 && i != p_set) {
continue;
}
for (int j = 0; j < shader->sets[i].uniform_info.size(); j++) {
const UniformInfo &ui = shader->sets[i].uniform_info[j];
if (!ret.is_empty()) {
ret += "\n";
}
ret += "Set: " + itos(i) + " Binding: " + itos(ui.binding) + " Type: " + shader_uniform_names[ui.type] + " Writable: " + (ui.writable ? "Y" : "N") + " Length: " + itos(ui.length);
}
}
return ret;
}
// Version 1: initial.
// Version 2: Added shader name.
// Version 3: Added writable.
#define SHADER_BINARY_VERSION 3
String RenderingDeviceVulkan::shader_get_binary_cache_key() const {
return "Vulkan-SV" + itos(SHADER_BINARY_VERSION);
}
struct RenderingDeviceVulkanShaderBinaryDataBinding {
uint32_t type;
uint32_t binding;
uint32_t stages;
uint32_t length; // Size of arrays (in total elements), or ubos (in bytes * total elements).
uint32_t writable;
};
struct RenderingDeviceVulkanShaderBinarySpecializationConstant {
uint32_t type;
uint32_t constant_id;
union {
uint32_t int_value;
float float_value;
bool bool_value;
};
uint32_t stage_flags;
};
struct RenderingDeviceVulkanShaderBinaryData {
uint32_t vertex_input_mask;
uint32_t fragment_output_mask;
uint32_t specialization_constants_count;
uint32_t is_compute;
uint32_t compute_local_size[3];
uint32_t set_count;
uint32_t push_constant_size;
uint32_t push_constant_vk_stages_mask;
uint32_t stage_count;
uint32_t shader_name_len;
};
Vector<uint8_t> RenderingDeviceVulkan::shader_compile_binary_from_spirv(const Vector<ShaderStageSPIRVData> &p_spirv, const String &p_shader_name) {
SpirvReflectionData spirv_data;
if (_reflect_spirv(p_spirv, spirv_data) != OK) {
return Vector<uint8_t>();
}
ERR_FAIL_COND_V_MSG((uint32_t)spirv_data.uniforms.size() > limits.maxBoundDescriptorSets, Vector<uint8_t>(),
"Number of uniform sets is larger than what is supported by the hardware (" + itos(limits.maxBoundDescriptorSets) + ").");
// Collect reflection data into binary data.
RenderingDeviceVulkanShaderBinaryData binary_data{};
Vector<Vector<RenderingDeviceVulkanShaderBinaryDataBinding>> uniform_info; // Set bindings.
Vector<RenderingDeviceVulkanShaderBinarySpecializationConstant> specialization_constants;
{
binary_data.vertex_input_mask = spirv_data.vertex_input_mask;
binary_data.fragment_output_mask = spirv_data.fragment_output_mask;
binary_data.specialization_constants_count = spirv_data.specialization_constants.size();
binary_data.is_compute = spirv_data.is_compute;
binary_data.compute_local_size[0] = spirv_data.compute_local_size[0];
binary_data.compute_local_size[1] = spirv_data.compute_local_size[1];
binary_data.compute_local_size[2] = spirv_data.compute_local_size[2];
binary_data.set_count = spirv_data.uniforms.size();
binary_data.push_constant_size = spirv_data.push_constant_size;
for (uint32_t i = 0; i < SHADER_STAGE_MAX; i++) {
if (spirv_data.push_constant_stages_mask.has_flag((ShaderStage)(1 << i))) {
binary_data.push_constant_vk_stages_mask |= shader_stage_masks[i];
}
}
for (const Vector<SpirvReflectionData::Uniform> &spirv_set : spirv_data.uniforms) {
Vector<RenderingDeviceVulkanShaderBinaryDataBinding> set_bindings;
for (const SpirvReflectionData::Uniform &spirv_uniform : spirv_set) {
RenderingDeviceVulkanShaderBinaryDataBinding binding{};
binding.type = (uint32_t)spirv_uniform.type;
binding.binding = spirv_uniform.binding;
binding.stages = (uint32_t)spirv_uniform.stages_mask;
binding.length = spirv_uniform.length;
binding.writable = (uint32_t)spirv_uniform.writable;
set_bindings.push_back(binding);
}
uniform_info.push_back(set_bindings);
}
for (const SpirvReflectionData::SpecializationConstant &spirv_sc : spirv_data.specialization_constants) {
RenderingDeviceVulkanShaderBinarySpecializationConstant spec_constant{};
spec_constant.type = (uint32_t)spirv_sc.type;
spec_constant.constant_id = spirv_sc.constant_id;
spec_constant.int_value = spirv_sc.int_value;
spec_constant.stage_flags = (uint32_t)spirv_sc.stages_mask;
specialization_constants.push_back(spec_constant);
}
}
Vector<Vector<uint8_t>> compressed_stages;
Vector<uint32_t> smolv_size;
Vector<uint32_t> zstd_size; // If 0, zstd not used.
uint32_t stages_binary_size = 0;
bool strip_debug = false;
for (int i = 0; i < p_spirv.size(); i++) {
smolv::ByteArray smolv;
if (!smolv::Encode(p_spirv[i].spir_v.ptr(), p_spirv[i].spir_v.size(), smolv, strip_debug ? smolv::kEncodeFlagStripDebugInfo : 0)) {
ERR_FAIL_V_MSG(Vector<uint8_t>(), "Error compressing shader stage :" + String(shader_stage_names[p_spirv[i].shader_stage]));
} else {
smolv_size.push_back(smolv.size());
{ // zstd.
Vector<uint8_t> zstd;
zstd.resize(Compression::get_max_compressed_buffer_size(smolv.size(), Compression::MODE_ZSTD));
int dst_size = Compression::compress(zstd.ptrw(), &smolv[0], smolv.size(), Compression::MODE_ZSTD);
if (dst_size > 0 && (uint32_t)dst_size < smolv.size()) {
zstd_size.push_back(dst_size);
zstd.resize(dst_size);
compressed_stages.push_back(zstd);
} else {
Vector<uint8_t> smv;
smv.resize(smolv.size());
memcpy(smv.ptrw(), &smolv[0], smolv.size());
zstd_size.push_back(0); // Not using zstd.
compressed_stages.push_back(smv);
}
}
}
uint32_t s = compressed_stages[i].size();
if (s % 4 != 0) {
s += 4 - (s % 4);
}
stages_binary_size += s;
}
binary_data.specialization_constants_count = specialization_constants.size();
binary_data.set_count = uniform_info.size();
binary_data.stage_count = p_spirv.size();
CharString shader_name_utf = p_shader_name.utf8();
binary_data.shader_name_len = shader_name_utf.length();
uint32_t total_size = sizeof(uint32_t) * 3; // Header + version + main datasize;.
total_size += sizeof(RenderingDeviceVulkanShaderBinaryData);
total_size += binary_data.shader_name_len;
if ((binary_data.shader_name_len % 4) != 0) { // Alignment rules are really strange.
total_size += 4 - (binary_data.shader_name_len % 4);
}
for (int i = 0; i < uniform_info.size(); i++) {
total_size += sizeof(uint32_t);
total_size += uniform_info[i].size() * sizeof(RenderingDeviceVulkanShaderBinaryDataBinding);
}
total_size += sizeof(RenderingDeviceVulkanShaderBinarySpecializationConstant) * specialization_constants.size();
total_size += compressed_stages.size() * sizeof(uint32_t) * 3; // Sizes.
total_size += stages_binary_size;
Vector<uint8_t> ret;
ret.resize(total_size);
{
uint32_t offset = 0;
uint8_t *binptr = ret.ptrw();
binptr[0] = 'G';
binptr[1] = 'S';
binptr[2] = 'B';
binptr[3] = 'D'; // Godot Shader Binary Data.
offset += 4;
encode_uint32(SHADER_BINARY_VERSION, binptr + offset);
offset += sizeof(uint32_t);
encode_uint32(sizeof(RenderingDeviceVulkanShaderBinaryData), binptr + offset);
offset += sizeof(uint32_t);
memcpy(binptr + offset, &binary_data, sizeof(RenderingDeviceVulkanShaderBinaryData));
offset += sizeof(RenderingDeviceVulkanShaderBinaryData);
memcpy(binptr + offset, shader_name_utf.ptr(), binary_data.shader_name_len);
offset += binary_data.shader_name_len;
if ((binary_data.shader_name_len % 4) != 0) { // Alignment rules are really strange.
offset += 4 - (binary_data.shader_name_len % 4);
}
for (int i = 0; i < uniform_info.size(); i++) {
int count = uniform_info[i].size();
encode_uint32(count, binptr + offset);
offset += sizeof(uint32_t);
if (count > 0) {
memcpy(binptr + offset, uniform_info[i].ptr(), sizeof(RenderingDeviceVulkanShaderBinaryDataBinding) * count);
offset += sizeof(RenderingDeviceVulkanShaderBinaryDataBinding) * count;
}
}
if (specialization_constants.size()) {
memcpy(binptr + offset, specialization_constants.ptr(), sizeof(RenderingDeviceVulkanShaderBinarySpecializationConstant) * specialization_constants.size());
offset += sizeof(RenderingDeviceVulkanShaderBinarySpecializationConstant) * specialization_constants.size();
}
for (int i = 0; i < compressed_stages.size(); i++) {
encode_uint32(p_spirv[i].shader_stage, binptr + offset);
offset += sizeof(uint32_t);
encode_uint32(smolv_size[i], binptr + offset);
offset += sizeof(uint32_t);
encode_uint32(zstd_size[i], binptr + offset);
offset += sizeof(uint32_t);
memcpy(binptr + offset, compressed_stages[i].ptr(), compressed_stages[i].size());
uint32_t s = compressed_stages[i].size();
if (s % 4 != 0) {
s += 4 - (s % 4);
}
offset += s;
}
ERR_FAIL_COND_V(offset != (uint32_t)ret.size(), Vector<uint8_t>());
}
return ret;
}
RID RenderingDeviceVulkan::shader_create_from_bytecode(const Vector<uint8_t> &p_shader_binary) {
const uint8_t *binptr = p_shader_binary.ptr();
uint32_t binsize = p_shader_binary.size();
uint32_t read_offset = 0;
// Consistency check.
ERR_FAIL_COND_V(binsize < sizeof(uint32_t) * 3 + sizeof(RenderingDeviceVulkanShaderBinaryData), RID());
ERR_FAIL_COND_V(binptr[0] != 'G' || binptr[1] != 'S' || binptr[2] != 'B' || binptr[3] != 'D', RID());
uint32_t bin_version = decode_uint32(binptr + 4);
ERR_FAIL_COND_V(bin_version != SHADER_BINARY_VERSION, RID());
uint32_t bin_data_size = decode_uint32(binptr + 8);
const RenderingDeviceVulkanShaderBinaryData &binary_data = *(reinterpret_cast<const RenderingDeviceVulkanShaderBinaryData *>(binptr + 12));
Shader::PushConstant push_constant;
push_constant.size = binary_data.push_constant_size;
push_constant.vk_stages_mask = binary_data.push_constant_vk_stages_mask;
uint32_t vertex_input_mask = binary_data.vertex_input_mask;
uint32_t fragment_output_mask = binary_data.fragment_output_mask;
bool is_compute = binary_data.is_compute;
const uint32_t compute_local_size[3] = { binary_data.compute_local_size[0], binary_data.compute_local_size[1], binary_data.compute_local_size[2] };
read_offset += sizeof(uint32_t) * 3 + bin_data_size;
String name;
if (binary_data.shader_name_len) {
name.parse_utf8((const char *)(binptr + read_offset), binary_data.shader_name_len);
read_offset += binary_data.shader_name_len;
if ((binary_data.shader_name_len % 4) != 0) { // Alignment rules are really strange.
read_offset += 4 - (binary_data.shader_name_len % 4);
}
}
Vector<Vector<VkDescriptorSetLayoutBinding>> set_bindings;
Vector<Vector<UniformInfo>> uniform_info;
set_bindings.resize(binary_data.set_count);
uniform_info.resize(binary_data.set_count);
for (uint32_t i = 0; i < binary_data.set_count; i++) {
ERR_FAIL_COND_V(read_offset + sizeof(uint32_t) >= binsize, RID());
uint32_t set_count = decode_uint32(binptr + read_offset);
read_offset += sizeof(uint32_t);
const RenderingDeviceVulkanShaderBinaryDataBinding *set_ptr = reinterpret_cast<const RenderingDeviceVulkanShaderBinaryDataBinding *>(binptr + read_offset);
uint32_t set_size = set_count * sizeof(RenderingDeviceVulkanShaderBinaryDataBinding);
ERR_FAIL_COND_V(read_offset + set_size >= binsize, RID());
for (uint32_t j = 0; j < set_count; j++) {
UniformInfo info;
info.type = UniformType(set_ptr[j].type);
info.writable = set_ptr[j].writable;
info.length = set_ptr[j].length;
info.binding = set_ptr[j].binding;
info.stages = set_ptr[j].stages;
VkDescriptorSetLayoutBinding layout_binding;
layout_binding.pImmutableSamplers = nullptr;
layout_binding.binding = set_ptr[j].binding;
layout_binding.descriptorCount = 1;
layout_binding.stageFlags = 0;
for (uint32_t k = 0; k < SHADER_STAGE_MAX; k++) {
if (set_ptr[j].stages & (1 << k)) {
layout_binding.stageFlags |= shader_stage_masks[k];
}
}
switch (info.type) {
case UNIFORM_TYPE_SAMPLER: {
layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER;
layout_binding.descriptorCount = set_ptr[j].length;
} break;
case UNIFORM_TYPE_SAMPLER_WITH_TEXTURE: {
layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
layout_binding.descriptorCount = set_ptr[j].length;
} break;
case UNIFORM_TYPE_TEXTURE: {
layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE;
layout_binding.descriptorCount = set_ptr[j].length;
} break;
case UNIFORM_TYPE_IMAGE: {
layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
layout_binding.descriptorCount = set_ptr[j].length;
} break;
case UNIFORM_TYPE_TEXTURE_BUFFER: {
layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER;
layout_binding.descriptorCount = set_ptr[j].length;
} break;
case UNIFORM_TYPE_IMAGE_BUFFER: {
layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER;
} break;
case UNIFORM_TYPE_UNIFORM_BUFFER: {
layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
} break;
case UNIFORM_TYPE_STORAGE_BUFFER: {
layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
} break;
case UNIFORM_TYPE_INPUT_ATTACHMENT: {
layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT;
} break;
default: {
ERR_FAIL_V(RID());
}
}
set_bindings.write[i].push_back(layout_binding);
uniform_info.write[i].push_back(info);
}
read_offset += set_size;
}
ERR_FAIL_COND_V(read_offset + binary_data.specialization_constants_count * sizeof(RenderingDeviceVulkanShaderBinarySpecializationConstant) >= binsize, RID());
Vector<Shader::SpecializationConstant> specialization_constants;
for (uint32_t i = 0; i < binary_data.specialization_constants_count; i++) {
const RenderingDeviceVulkanShaderBinarySpecializationConstant &src_sc = *(reinterpret_cast<const RenderingDeviceVulkanShaderBinarySpecializationConstant *>(binptr + read_offset));
Shader::SpecializationConstant sc;
sc.constant.int_value = src_sc.int_value;
sc.constant.type = PipelineSpecializationConstantType(src_sc.type);
sc.constant.constant_id = src_sc.constant_id;
sc.stage_flags = src_sc.stage_flags;
specialization_constants.push_back(sc);
read_offset += sizeof(RenderingDeviceVulkanShaderBinarySpecializationConstant);
}
Vector<Vector<uint8_t>> stage_spirv_data;
Vector<ShaderStage> stage_type;
for (uint32_t i = 0; i < binary_data.stage_count; i++) {
ERR_FAIL_COND_V(read_offset + sizeof(uint32_t) * 3 >= binsize, RID());
uint32_t stage = decode_uint32(binptr + read_offset);
read_offset += sizeof(uint32_t);
uint32_t smolv_size = decode_uint32(binptr + read_offset);
read_offset += sizeof(uint32_t);
uint32_t zstd_size = decode_uint32(binptr + read_offset);
read_offset += sizeof(uint32_t);
uint32_t buf_size = (zstd_size > 0) ? zstd_size : smolv_size;
Vector<uint8_t> smolv;
const uint8_t *src_smolv = nullptr;
if (zstd_size > 0) {
// Decompress to smolv.
smolv.resize(smolv_size);
int dec_smolv_size = Compression::decompress(smolv.ptrw(), smolv.size(), binptr + read_offset, zstd_size, Compression::MODE_ZSTD);
ERR_FAIL_COND_V(dec_smolv_size != (int32_t)smolv_size, RID());
src_smolv = smolv.ptr();
} else {
src_smolv = binptr + read_offset;
}
Vector<uint8_t> spirv;
uint32_t spirv_size = smolv::GetDecodedBufferSize(src_smolv, smolv_size);
spirv.resize(spirv_size);
if (!smolv::Decode(src_smolv, smolv_size, spirv.ptrw(), spirv_size)) {
ERR_FAIL_V_MSG(RID(), "Malformed smolv input uncompressing shader stage:" + String(shader_stage_names[stage]));
}
stage_spirv_data.push_back(spirv);
stage_type.push_back(ShaderStage(stage));
if (buf_size % 4 != 0) {
buf_size += 4 - (buf_size % 4);
}
ERR_FAIL_COND_V(read_offset + buf_size > binsize, RID());
read_offset += buf_size;
}
ERR_FAIL_COND_V(read_offset != binsize, RID());
// All good, let's create modules.
_THREAD_SAFE_METHOD_
Shader shader;
shader.vertex_input_mask = vertex_input_mask;
shader.fragment_output_mask = fragment_output_mask;
shader.push_constant = push_constant;
shader.is_compute = is_compute;
shader.compute_local_size[0] = compute_local_size[0];
shader.compute_local_size[1] = compute_local_size[1];
shader.compute_local_size[2] = compute_local_size[2];
shader.specialization_constants = specialization_constants;
shader.name = name;
String error_text;
bool success = true;
for (int i = 0; i < stage_spirv_data.size(); i++) {
VkShaderModuleCreateInfo shader_module_create_info;
shader_module_create_info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
shader_module_create_info.pNext = nullptr;
shader_module_create_info.flags = 0;
shader_module_create_info.codeSize = stage_spirv_data[i].size();
const uint8_t *r = stage_spirv_data[i].ptr();
shader_module_create_info.pCode = (const uint32_t *)r;
VkShaderModule module;
VkResult res = vkCreateShaderModule(device, &shader_module_create_info, nullptr, &module);
if (res) {
success = false;
error_text = "Error (" + itos(res) + ") creating shader module for stage: " + String(shader_stage_names[stage_type[i]]);
break;
}
VkPipelineShaderStageCreateInfo shader_stage;
shader_stage.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
shader_stage.pNext = nullptr;
shader_stage.flags = 0;
shader_stage.stage = shader_stage_masks[stage_type[i]];
shader_stage.module = module;
shader_stage.pName = "main";
shader_stage.pSpecializationInfo = nullptr;
shader.pipeline_stages.push_back(shader_stage);
}
// Proceed to create descriptor sets.
if (success) {
for (int i = 0; i < set_bindings.size(); i++) {
// Empty ones are fine if they were not used according to spec (binding count will be 0).
VkDescriptorSetLayoutCreateInfo layout_create_info;
layout_create_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
layout_create_info.pNext = nullptr;
layout_create_info.flags = 0;
layout_create_info.bindingCount = set_bindings[i].size();
layout_create_info.pBindings = set_bindings[i].ptr();
VkDescriptorSetLayout layout;
VkResult res = vkCreateDescriptorSetLayout(device, &layout_create_info, nullptr, &layout);
if (res) {
error_text = "Error (" + itos(res) + ") creating descriptor set layout for set " + itos(i);
success = false;
break;
}
Shader::Set set;
set.descriptor_set_layout = layout;
set.uniform_info = uniform_info[i];
// Sort and hash.
set.uniform_info.sort();
uint32_t format = 0; // No format, default.
if (set.uniform_info.size()) {
// Has data, needs an actual format.
UniformSetFormat usformat;
usformat.uniform_info = set.uniform_info;
RBMap<UniformSetFormat, uint32_t>::Element *E = uniform_set_format_cache.find(usformat);
if (E) {
format = E->get();
} else {
format = uniform_set_format_cache.size() + 1;
uniform_set_format_cache.insert(usformat, format);
}
}
shader.sets.push_back(set);
shader.set_formats.push_back(format);
}
}
if (success) {
// Create pipeline layout.
VkPipelineLayoutCreateInfo pipeline_layout_create_info;
pipeline_layout_create_info.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO;
pipeline_layout_create_info.pNext = nullptr;
pipeline_layout_create_info.flags = 0;
pipeline_layout_create_info.setLayoutCount = shader.sets.size();
Vector<VkDescriptorSetLayout> layouts;
layouts.resize(shader.sets.size());
for (int i = 0; i < layouts.size(); i++) {
layouts.write[i] = shader.sets[i].descriptor_set_layout;
}
pipeline_layout_create_info.pSetLayouts = layouts.ptr();
// Needs to be declared in this outer scope, otherwise it may not outlive its assignment
// to pipeline_layout_create_info.
VkPushConstantRange push_constant_range;
if (push_constant.size) {
push_constant_range.stageFlags = push_constant.vk_stages_mask;
push_constant_range.offset = 0;
push_constant_range.size = push_constant.size;
pipeline_layout_create_info.pushConstantRangeCount = 1;
pipeline_layout_create_info.pPushConstantRanges = &push_constant_range;
} else {
pipeline_layout_create_info.pushConstantRangeCount = 0;
pipeline_layout_create_info.pPushConstantRanges = nullptr;
}
VkResult err = vkCreatePipelineLayout(device, &pipeline_layout_create_info, nullptr, &shader.pipeline_layout);
if (err) {
error_text = "Error (" + itos(err) + ") creating pipeline layout.";
success = false;
}
}
if (!success) {
// Clean up if failed.
for (int i = 0; i < shader.pipeline_stages.size(); i++) {
vkDestroyShaderModule(device, shader.pipeline_stages[i].module, nullptr);
}
for (int i = 0; i < shader.sets.size(); i++) {
vkDestroyDescriptorSetLayout(device, shader.sets[i].descriptor_set_layout, nullptr);
}
ERR_FAIL_V_MSG(RID(), error_text);
}
RID id = shader_owner.make_rid(shader);
#ifdef DEV_ENABLED
set_resource_name(id, "RID:" + itos(id.get_id()));
#endif
return id;
}
uint32_t RenderingDeviceVulkan::shader_get_vertex_input_attribute_mask(RID p_shader) {
_THREAD_SAFE_METHOD_
const Shader *shader = shader_owner.get_or_null(p_shader);
ERR_FAIL_COND_V(!shader, 0);
return shader->vertex_input_mask;
}
/******************/
/**** UNIFORMS ****/
/******************/
RID RenderingDeviceVulkan::uniform_buffer_create(uint32_t p_size_bytes, const Vector<uint8_t> &p_data) {
_THREAD_SAFE_METHOD_
ERR_FAIL_COND_V(p_data.size() && (uint32_t)p_data.size() != p_size_bytes, RID());
Buffer buffer;
Error err = _buffer_allocate(&buffer, p_size_bytes, VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE, 0);
ERR_FAIL_COND_V(err != OK, RID());
if (p_data.size()) {
uint64_t data_size = p_data.size();
const uint8_t *r = p_data.ptr();
_buffer_update(&buffer, 0, r, data_size);
_buffer_memory_barrier(buffer.buffer, 0, data_size, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_UNIFORM_READ_BIT, false);
}
RID id = uniform_buffer_owner.make_rid(buffer);
#ifdef DEV_ENABLED
set_resource_name(id, "RID:" + itos(id.get_id()));
#endif
return id;
}
RID RenderingDeviceVulkan::storage_buffer_create(uint32_t p_size_bytes, const Vector<uint8_t> &p_data, BitField<StorageBufferUsage> p_usage) {
_THREAD_SAFE_METHOD_
ERR_FAIL_COND_V(p_data.size() && (uint32_t)p_data.size() != p_size_bytes, RID());
Buffer buffer;
uint32_t flags = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
if (p_usage.has_flag(STORAGE_BUFFER_USAGE_DISPATCH_INDIRECT)) {
flags |= VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT;
}
Error err = _buffer_allocate(&buffer, p_size_bytes, flags, VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE, 0);
ERR_FAIL_COND_V(err != OK, RID());
if (p_data.size()) {
uint64_t data_size = p_data.size();
const uint8_t *r = p_data.ptr();
_buffer_update(&buffer, 0, r, data_size);
_buffer_memory_barrier(buffer.buffer, 0, data_size, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, false);
}
return storage_buffer_owner.make_rid(buffer);
}
RID RenderingDeviceVulkan::texture_buffer_create(uint32_t p_size_elements, DataFormat p_format, const Vector<uint8_t> &p_data) {
_THREAD_SAFE_METHOD_
uint32_t element_size = get_format_vertex_size(p_format);
ERR_FAIL_COND_V_MSG(element_size == 0, RID(), "Format requested is not supported for texture buffers");
uint64_t size_bytes = uint64_t(element_size) * p_size_elements;
ERR_FAIL_COND_V(p_data.size() && (uint32_t)p_data.size() != size_bytes, RID());
TextureBuffer texture_buffer;
Error err = _buffer_allocate(&texture_buffer.buffer, size_bytes, VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE, 0);
ERR_FAIL_COND_V(err != OK, RID());
if (p_data.size()) {
uint64_t data_size = p_data.size();
const uint8_t *r = p_data.ptr();
_buffer_update(&texture_buffer.buffer, 0, r, data_size);
_buffer_memory_barrier(texture_buffer.buffer.buffer, 0, data_size, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, false);
}
VkBufferViewCreateInfo view_create_info;
view_create_info.sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO;
view_create_info.pNext = nullptr;
view_create_info.flags = 0;
view_create_info.buffer = texture_buffer.buffer.buffer;
view_create_info.format = vulkan_formats[p_format];
view_create_info.offset = 0;
view_create_info.range = size_bytes;
texture_buffer.view = VK_NULL_HANDLE;
VkResult res = vkCreateBufferView(device, &view_create_info, nullptr, &texture_buffer.view);
if (res) {
_buffer_free(&texture_buffer.buffer);
ERR_FAIL_V_MSG(RID(), "Unable to create buffer view, error " + itos(res) + ".");
}
// Allocate the view.
RID id = texture_buffer_owner.make_rid(texture_buffer);
#ifdef DEV_ENABLED
set_resource_name(id, "RID:" + itos(id.get_id()));
#endif
return id;
}
RenderingDeviceVulkan::DescriptorPool *RenderingDeviceVulkan::_descriptor_pool_allocate(const DescriptorPoolKey &p_key) {
if (!descriptor_pools.has(p_key)) {
descriptor_pools[p_key] = HashSet<DescriptorPool *>();
}
DescriptorPool *pool = nullptr;
for (DescriptorPool *E : descriptor_pools[p_key]) {
if (E->usage < max_descriptors_per_pool) {
pool = E;
break;
}
}
if (!pool) {
// Create a new one.
pool = memnew(DescriptorPool);
pool->usage = 0;
VkDescriptorPoolCreateInfo descriptor_pool_create_info;
descriptor_pool_create_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
descriptor_pool_create_info.pNext = nullptr;
descriptor_pool_create_info.flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT; // Can't think how somebody may NOT need this flag.
descriptor_pool_create_info.maxSets = max_descriptors_per_pool;
Vector<VkDescriptorPoolSize> sizes;
// Here comes more vulkan API strangeness.
if (p_key.uniform_type[UNIFORM_TYPE_SAMPLER]) {
VkDescriptorPoolSize s;
s.type = VK_DESCRIPTOR_TYPE_SAMPLER;
s.descriptorCount = p_key.uniform_type[UNIFORM_TYPE_SAMPLER] * max_descriptors_per_pool;
sizes.push_back(s);
}
if (p_key.uniform_type[UNIFORM_TYPE_SAMPLER_WITH_TEXTURE]) {
VkDescriptorPoolSize s;
s.type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
s.descriptorCount = p_key.uniform_type[UNIFORM_TYPE_SAMPLER_WITH_TEXTURE] * max_descriptors_per_pool;
sizes.push_back(s);
}
if (p_key.uniform_type[UNIFORM_TYPE_TEXTURE]) {
VkDescriptorPoolSize s;
s.type = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE;
s.descriptorCount = p_key.uniform_type[UNIFORM_TYPE_TEXTURE] * max_descriptors_per_pool;
sizes.push_back(s);
}
if (p_key.uniform_type[UNIFORM_TYPE_IMAGE]) {
VkDescriptorPoolSize s;
s.type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
s.descriptorCount = p_key.uniform_type[UNIFORM_TYPE_IMAGE] * max_descriptors_per_pool;
sizes.push_back(s);
}
if (p_key.uniform_type[UNIFORM_TYPE_TEXTURE_BUFFER] || p_key.uniform_type[UNIFORM_TYPE_SAMPLER_WITH_TEXTURE_BUFFER]) {
VkDescriptorPoolSize s;
s.type = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER;
s.descriptorCount = (p_key.uniform_type[UNIFORM_TYPE_TEXTURE_BUFFER] + p_key.uniform_type[UNIFORM_TYPE_SAMPLER_WITH_TEXTURE_BUFFER]) * max_descriptors_per_pool;
sizes.push_back(s);
}
if (p_key.uniform_type[UNIFORM_TYPE_IMAGE_BUFFER]) {
VkDescriptorPoolSize s;
s.type = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER;
s.descriptorCount = p_key.uniform_type[UNIFORM_TYPE_IMAGE_BUFFER] * max_descriptors_per_pool;
sizes.push_back(s);
}
if (p_key.uniform_type[UNIFORM_TYPE_UNIFORM_BUFFER]) {
VkDescriptorPoolSize s;
s.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
s.descriptorCount = p_key.uniform_type[UNIFORM_TYPE_UNIFORM_BUFFER] * max_descriptors_per_pool;
sizes.push_back(s);
}
if (p_key.uniform_type[UNIFORM_TYPE_STORAGE_BUFFER]) {
VkDescriptorPoolSize s;
s.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
s.descriptorCount = p_key.uniform_type[UNIFORM_TYPE_STORAGE_BUFFER] * max_descriptors_per_pool;
sizes.push_back(s);
}
if (p_key.uniform_type[UNIFORM_TYPE_INPUT_ATTACHMENT]) {
VkDescriptorPoolSize s;
s.type = VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT;
s.descriptorCount = p_key.uniform_type[UNIFORM_TYPE_INPUT_ATTACHMENT] * max_descriptors_per_pool;
sizes.push_back(s);
}
descriptor_pool_create_info.poolSizeCount = sizes.size();
descriptor_pool_create_info.pPoolSizes = sizes.ptr();
VkResult res = vkCreateDescriptorPool(device, &descriptor_pool_create_info, nullptr, &pool->pool);
if (res) {
memdelete(pool);
ERR_FAIL_COND_V_MSG(res, nullptr, "vkCreateDescriptorPool failed with error " + itos(res) + ".");
}
descriptor_pools[p_key].insert(pool);
}
pool->usage++;
return pool;
}
void RenderingDeviceVulkan::_descriptor_pool_free(const DescriptorPoolKey &p_key, DescriptorPool *p_pool) {
#ifdef DEBUG_ENABLED
ERR_FAIL_COND(!descriptor_pools[p_key].has(p_pool));
#endif
ERR_FAIL_COND(p_pool->usage == 0);
p_pool->usage--;
if (p_pool->usage == 0) {
vkDestroyDescriptorPool(device, p_pool->pool, nullptr);
descriptor_pools[p_key].erase(p_pool);
memdelete(p_pool);
if (descriptor_pools[p_key].is_empty()) {
descriptor_pools.erase(p_key);
}
}
}
RID RenderingDeviceVulkan::uniform_set_create(const Vector<Uniform> &p_uniforms, RID p_shader, uint32_t p_shader_set) {
_THREAD_SAFE_METHOD_
ERR_FAIL_COND_V(p_uniforms.size() == 0, RID());
Shader *shader = shader_owner.get_or_null(p_shader);
ERR_FAIL_COND_V(!shader, RID());
ERR_FAIL_COND_V_MSG(p_shader_set >= (uint32_t)shader->sets.size() || shader->sets[p_shader_set].uniform_info.size() == 0, RID(),
"Desired set (" + itos(p_shader_set) + ") not used by shader.");
// See that all sets in shader are satisfied.
const Shader::Set &set = shader->sets[p_shader_set];
uint32_t uniform_count = p_uniforms.size();
const Uniform *uniforms = p_uniforms.ptr();
uint32_t set_uniform_count = set.uniform_info.size();
const UniformInfo *set_uniforms = set.uniform_info.ptr();
Vector<VkWriteDescriptorSet> writes;
DescriptorPoolKey pool_key;
// To keep them alive until update call.
List<Vector<VkDescriptorBufferInfo>> buffer_infos;
List<Vector<VkBufferView>> buffer_views;
List<Vector<VkDescriptorImageInfo>> image_infos;
// Used for verification to make sure a uniform set does not use a framebuffer bound texture.
LocalVector<UniformSet::AttachableTexture> attachable_textures;
Vector<Texture *> mutable_sampled_textures;
Vector<Texture *> mutable_storage_textures;
for (uint32_t i = 0; i < set_uniform_count; i++) {
const UniformInfo &set_uniform = set_uniforms[i];
int uniform_idx = -1;
for (int j = 0; j < (int)uniform_count; j++) {
if (uniforms[j].binding == set_uniform.binding) {
uniform_idx = j;
}
}
ERR_FAIL_COND_V_MSG(uniform_idx == -1, RID(),
"All the shader bindings for the given set must be covered by the uniforms provided. Binding (" + itos(set_uniform.binding) + "), set (" + itos(p_shader_set) + ") was not provided.");
const Uniform &uniform = uniforms[uniform_idx];
ERR_FAIL_COND_V_MSG(uniform.uniform_type != set_uniform.type, RID(),
"Mismatch uniform type for binding (" + itos(set_uniform.binding) + "), set (" + itos(p_shader_set) + "). Expected '" + shader_uniform_names[set_uniform.type] + "', supplied: '" + shader_uniform_names[uniform.uniform_type] + "'.");
VkWriteDescriptorSet write; // Common header.
write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
write.pNext = nullptr;
write.dstSet = VK_NULL_HANDLE; // Will assign afterwards when everything is valid.
write.dstBinding = set_uniform.binding;
write.dstArrayElement = 0;
write.descriptorCount = 0;
write.descriptorType = VK_DESCRIPTOR_TYPE_MAX_ENUM; // Invalid value.
write.pImageInfo = nullptr;
write.pBufferInfo = nullptr;
write.pTexelBufferView = nullptr;
uint32_t type_size = 1;
switch (uniform.uniform_type) {
case UNIFORM_TYPE_SAMPLER: {
if (uniform.get_id_count() != (uint32_t)set_uniform.length) {
if (set_uniform.length > 1) {
ERR_FAIL_V_MSG(RID(), "Sampler (binding: " + itos(uniform.binding) + ") is an array of (" + itos(set_uniform.length) + ") sampler elements, so it should be provided equal number of sampler IDs to satisfy it (IDs provided: " + itos(uniform.get_id_count()) + ").");
} else {
ERR_FAIL_V_MSG(RID(), "Sampler (binding: " + itos(uniform.binding) + ") should provide one ID referencing a sampler (IDs provided: " + itos(uniform.get_id_count()) + ").");
}
}
Vector<VkDescriptorImageInfo> image_info;
for (uint32_t j = 0; j < uniform.get_id_count(); j++) {
VkSampler *sampler = sampler_owner.get_or_null(uniform.get_id(j));
ERR_FAIL_COND_V_MSG(!sampler, RID(), "Sampler (binding: " + itos(uniform.binding) + ", index " + itos(j) + ") is not a valid sampler.");
VkDescriptorImageInfo img_info;
img_info.sampler = *sampler;
img_info.imageView = VK_NULL_HANDLE;
img_info.imageLayout = VK_IMAGE_LAYOUT_UNDEFINED;
image_info.push_back(img_info);
}
write.dstArrayElement = 0;
write.descriptorCount = uniform.get_id_count();
write.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER;
write.pImageInfo = image_infos.push_back(image_info)->get().ptr();
write.pBufferInfo = nullptr;
write.pTexelBufferView = nullptr;
type_size = uniform.get_id_count();
} break;
case UNIFORM_TYPE_SAMPLER_WITH_TEXTURE: {
if (uniform.get_id_count() != (uint32_t)set_uniform.length * 2) {
if (set_uniform.length > 1) {
ERR_FAIL_V_MSG(RID(), "SamplerTexture (binding: " + itos(uniform.binding) + ") is an array of (" + itos(set_uniform.length) + ") sampler&texture elements, so it should provided twice the amount of IDs (sampler,texture pairs) to satisfy it (IDs provided: " + itos(uniform.get_id_count()) + ").");
} else {
ERR_FAIL_V_MSG(RID(), "SamplerTexture (binding: " + itos(uniform.binding) + ") should provide two IDs referencing a sampler and then a texture (IDs provided: " + itos(uniform.get_id_count()) + ").");
}
}
Vector<VkDescriptorImageInfo> image_info;
for (uint32_t j = 0; j < uniform.get_id_count(); j += 2) {
VkSampler *sampler = sampler_owner.get_or_null(uniform.get_id(j + 0));
ERR_FAIL_COND_V_MSG(!sampler, RID(), "SamplerBuffer (binding: " + itos(uniform.binding) + ", index " + itos(j + 1) + ") is not a valid sampler.");
Texture *texture = texture_owner.get_or_null(uniform.get_id(j + 1));
ERR_FAIL_COND_V_MSG(!texture, RID(), "Texture (binding: " + itos(uniform.binding) + ", index " + itos(j) + ") is not a valid texture.");
ERR_FAIL_COND_V_MSG(!(texture->usage_flags & TEXTURE_USAGE_SAMPLING_BIT), RID(),
"Texture (binding: " + itos(uniform.binding) + ", index " + itos(j) + ") needs the TEXTURE_USAGE_SAMPLING_BIT usage flag set in order to be used as uniform.");
VkDescriptorImageInfo img_info;
img_info.sampler = *sampler;
img_info.imageView = texture->view;
if (texture->usage_flags & (TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | TEXTURE_USAGE_INPUT_ATTACHMENT_BIT)) {
UniformSet::AttachableTexture attachable_texture;
attachable_texture.bind = set_uniform.binding;
attachable_texture.texture = texture->owner.is_valid() ? texture->owner : uniform.get_id(j + 1);
attachable_textures.push_back(attachable_texture);
}
if (texture->usage_flags & TEXTURE_USAGE_STORAGE_BIT) {
// Can also be used as storage, add to mutable sampled.
mutable_sampled_textures.push_back(texture);
}
DEV_ASSERT(!texture->owner.is_valid() || texture_owner.get_or_null(texture->owner));
img_info.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
image_info.push_back(img_info);
}
write.dstArrayElement = 0;
write.descriptorCount = uniform.get_id_count() / 2;
write.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
write.pImageInfo = image_infos.push_back(image_info)->get().ptr();
write.pBufferInfo = nullptr;
write.pTexelBufferView = nullptr;
type_size = uniform.get_id_count() / 2;
} break;
case UNIFORM_TYPE_TEXTURE: {
if (uniform.get_id_count() != (uint32_t)set_uniform.length) {
if (set_uniform.length > 1) {
ERR_FAIL_V_MSG(RID(), "Texture (binding: " + itos(uniform.binding) + ") is an array of (" + itos(set_uniform.length) + ") textures, so it should be provided equal number of texture IDs to satisfy it (IDs provided: " + itos(uniform.get_id_count()) + ").");
} else {
ERR_FAIL_V_MSG(RID(), "Texture (binding: " + itos(uniform.binding) + ") should provide one ID referencing a texture (IDs provided: " + itos(uniform.get_id_count()) + ").");
}
}
Vector<VkDescriptorImageInfo> image_info;
for (uint32_t j = 0; j < uniform.get_id_count(); j++) {
Texture *texture = texture_owner.get_or_null(uniform.get_id(j));
ERR_FAIL_COND_V_MSG(!texture, RID(), "Texture (binding: " + itos(uniform.binding) + ", index " + itos(j) + ") is not a valid texture.");
ERR_FAIL_COND_V_MSG(!(texture->usage_flags & TEXTURE_USAGE_SAMPLING_BIT), RID(),
"Texture (binding: " + itos(uniform.binding) + ", index " + itos(j) + ") needs the TEXTURE_USAGE_SAMPLING_BIT usage flag set in order to be used as uniform.");
VkDescriptorImageInfo img_info;
img_info.sampler = VK_NULL_HANDLE;
img_info.imageView = texture->view;
if (texture->usage_flags & (TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | TEXTURE_USAGE_INPUT_ATTACHMENT_BIT)) {
UniformSet::AttachableTexture attachable_texture;
attachable_texture.bind = set_uniform.binding;
attachable_texture.texture = texture->owner.is_valid() ? texture->owner : uniform.get_id(j);
attachable_textures.push_back(attachable_texture);
}
if (texture->usage_flags & TEXTURE_USAGE_STORAGE_BIT) {
// Can also be used as storage, add to mutable sampled.
mutable_sampled_textures.push_back(texture);
}
DEV_ASSERT(!texture->owner.is_valid() || texture_owner.get_or_null(texture->owner));
img_info.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
image_info.push_back(img_info);
}
write.dstArrayElement = 0;
write.descriptorCount = uniform.get_id_count();
write.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE;
write.pImageInfo = image_infos.push_back(image_info)->get().ptr();
write.pBufferInfo = nullptr;
write.pTexelBufferView = nullptr;
type_size = uniform.get_id_count();
} break;
case UNIFORM_TYPE_IMAGE: {
if (uniform.get_id_count() != (uint32_t)set_uniform.length) {
if (set_uniform.length > 1) {
ERR_FAIL_V_MSG(RID(), "Image (binding: " + itos(uniform.binding) + ") is an array of (" + itos(set_uniform.length) + ") textures, so it should be provided equal number of texture IDs to satisfy it (IDs provided: " + itos(uniform.get_id_count()) + ").");
} else {
ERR_FAIL_V_MSG(RID(), "Image (binding: " + itos(uniform.binding) + ") should provide one ID referencing a texture (IDs provided: " + itos(uniform.get_id_count()) + ").");
}
}
Vector<VkDescriptorImageInfo> image_info;
for (uint32_t j = 0; j < uniform.get_id_count(); j++) {
Texture *texture = texture_owner.get_or_null(uniform.get_id(j));
ERR_FAIL_COND_V_MSG(!texture, RID(),
"Image (binding: " + itos(uniform.binding) + ", index " + itos(j) + ") is not a valid texture.");
ERR_FAIL_COND_V_MSG(!(texture->usage_flags & TEXTURE_USAGE_STORAGE_BIT), RID(),
"Image (binding: " + itos(uniform.binding) + ", index " + itos(j) + ") needs the TEXTURE_USAGE_STORAGE_BIT usage flag set in order to be used as uniform.");
VkDescriptorImageInfo img_info;
img_info.sampler = VK_NULL_HANDLE;
img_info.imageView = texture->view;
if (texture->usage_flags & TEXTURE_USAGE_SAMPLING_BIT) {
// Can also be used as storage, add to mutable sampled.
mutable_storage_textures.push_back(texture);
}
DEV_ASSERT(!texture->owner.is_valid() || texture_owner.get_or_null(texture->owner));
img_info.imageLayout = VK_IMAGE_LAYOUT_GENERAL;
image_info.push_back(img_info);
}
write.dstArrayElement = 0;
write.descriptorCount = uniform.get_id_count();
write.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
write.pImageInfo = image_infos.push_back(image_info)->get().ptr();
write.pBufferInfo = nullptr;
write.pTexelBufferView = nullptr;
type_size = uniform.get_id_count();
} break;
case UNIFORM_TYPE_TEXTURE_BUFFER: {
if (uniform.get_id_count() != (uint32_t)set_uniform.length) {
if (set_uniform.length > 1) {
ERR_FAIL_V_MSG(RID(), "Buffer (binding: " + itos(uniform.binding) + ") is an array of (" + itos(set_uniform.length) + ") texture buffer elements, so it should be provided equal number of texture buffer IDs to satisfy it (IDs provided: " + itos(uniform.get_id_count()) + ").");
} else {
ERR_FAIL_V_MSG(RID(), "Buffer (binding: " + itos(uniform.binding) + ") should provide one ID referencing a texture buffer (IDs provided: " + itos(uniform.get_id_count()) + ").");
}
}
Vector<VkDescriptorBufferInfo> buffer_info;
Vector<VkBufferView> buffer_view;
for (uint32_t j = 0; j < uniform.get_id_count(); j++) {
TextureBuffer *buffer = texture_buffer_owner.get_or_null(uniform.get_id(j));
ERR_FAIL_COND_V_MSG(!buffer, RID(), "Texture Buffer (binding: " + itos(uniform.binding) + ", index " + itos(j) + ") is not a valid texture buffer.");
buffer_info.push_back(buffer->buffer.buffer_info);
buffer_view.push_back(buffer->view);
}
write.dstArrayElement = 0;
write.descriptorCount = uniform.get_id_count();
write.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER;
write.pImageInfo = nullptr;
write.pBufferInfo = buffer_infos.push_back(buffer_info)->get().ptr();
write.pTexelBufferView = buffer_views.push_back(buffer_view)->get().ptr();
type_size = uniform.get_id_count();
} break;
case UNIFORM_TYPE_SAMPLER_WITH_TEXTURE_BUFFER: {
if (uniform.get_id_count() != (uint32_t)set_uniform.length * 2) {
if (set_uniform.length > 1) {
ERR_FAIL_V_MSG(RID(), "SamplerBuffer (binding: " + itos(uniform.binding) + ") is an array of (" + itos(set_uniform.length) + ") sampler buffer elements, so it should provided twice the amount of IDs (sampler,buffer pairs) to satisfy it (IDs provided: " + itos(uniform.get_id_count()) + ").");
} else {
ERR_FAIL_V_MSG(RID(), "SamplerBuffer (binding: " + itos(uniform.binding) + ") should provide two IDs referencing a sampler and then a texture buffer (IDs provided: " + itos(uniform.get_id_count()) + ").");
}
}
Vector<VkDescriptorImageInfo> image_info;
Vector<VkDescriptorBufferInfo> buffer_info;
Vector<VkBufferView> buffer_view;
for (uint32_t j = 0; j < uniform.get_id_count(); j += 2) {
VkSampler *sampler = sampler_owner.get_or_null(uniform.get_id(j + 0));
ERR_FAIL_COND_V_MSG(!sampler, RID(), "SamplerBuffer (binding: " + itos(uniform.binding) + ", index " + itos(j + 1) + ") is not a valid sampler.");
TextureBuffer *buffer = texture_buffer_owner.get_or_null(uniform.get_id(j + 1));
VkDescriptorImageInfo img_info;
img_info.sampler = *sampler;
img_info.imageView = VK_NULL_HANDLE;
img_info.imageLayout = VK_IMAGE_LAYOUT_UNDEFINED;
image_info.push_back(img_info);
ERR_FAIL_COND_V_MSG(!buffer, RID(), "SamplerBuffer (binding: " + itos(uniform.binding) + ", index " + itos(j + 1) + ") is not a valid texture buffer.");
buffer_info.push_back(buffer->buffer.buffer_info);
buffer_view.push_back(buffer->view);
}
write.dstArrayElement = 0;
write.descriptorCount = uniform.get_id_count() / 2;
write.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER;
write.pImageInfo = image_infos.push_back(image_info)->get().ptr();
write.pBufferInfo = buffer_infos.push_back(buffer_info)->get().ptr();
write.pTexelBufferView = buffer_views.push_back(buffer_view)->get().ptr();
type_size = uniform.get_id_count() / 2;
} break;
case UNIFORM_TYPE_IMAGE_BUFFER: {
// Todo.
} break;
case UNIFORM_TYPE_UNIFORM_BUFFER: {
ERR_FAIL_COND_V_MSG(uniform.get_id_count() != 1, RID(),
"Uniform buffer supplied (binding: " + itos(uniform.binding) + ") must provide one ID (" + itos(uniform.get_id_count()) + " provided).");
Buffer *buffer = uniform_buffer_owner.get_or_null(uniform.get_id(0));
ERR_FAIL_COND_V_MSG(!buffer, RID(), "Uniform buffer supplied (binding: " + itos(uniform.binding) + ") is invalid.");
ERR_FAIL_COND_V_MSG(buffer->size != (uint32_t)set_uniform.length, RID(),
"Uniform buffer supplied (binding: " + itos(uniform.binding) + ") size (" + itos(buffer->size) + " does not match size of shader uniform: (" + itos(set_uniform.length) + ").");
write.dstArrayElement = 0;
write.descriptorCount = 1;
write.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
write.pImageInfo = nullptr;
write.pBufferInfo = &buffer->buffer_info;
write.pTexelBufferView = nullptr;
} break;
case UNIFORM_TYPE_STORAGE_BUFFER: {
ERR_FAIL_COND_V_MSG(uniform.get_id_count() != 1, RID(),
"Storage buffer supplied (binding: " + itos(uniform.binding) + ") must provide one ID (" + itos(uniform.get_id_count()) + " provided).");
Buffer *buffer = nullptr;
if (storage_buffer_owner.owns(uniform.get_id(0))) {
buffer = storage_buffer_owner.get_or_null(uniform.get_id(0));
} else if (vertex_buffer_owner.owns(uniform.get_id(0))) {
buffer = vertex_buffer_owner.get_or_null(uniform.get_id(0));
ERR_FAIL_COND_V_MSG(!(buffer->usage & VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), RID(), "Vertex buffer supplied (binding: " + itos(uniform.binding) + ") was not created with storage flag.");
}
ERR_FAIL_COND_V_MSG(!buffer, RID(), "Storage buffer supplied (binding: " + itos(uniform.binding) + ") is invalid.");
// If 0, then it's sized on link time.
ERR_FAIL_COND_V_MSG(set_uniform.length > 0 && buffer->size != (uint32_t)set_uniform.length, RID(),
"Storage buffer supplied (binding: " + itos(uniform.binding) + ") size (" + itos(buffer->size) + " does not match size of shader uniform: (" + itos(set_uniform.length) + ").");
write.dstArrayElement = 0;
write.descriptorCount = 1;
write.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
write.pImageInfo = nullptr;
write.pBufferInfo = &buffer->buffer_info;
write.pTexelBufferView = nullptr;
} break;
case UNIFORM_TYPE_INPUT_ATTACHMENT: {
ERR_FAIL_COND_V_MSG(shader->is_compute, RID(), "InputAttachment (binding: " + itos(uniform.binding) + ") supplied for compute shader (this is not allowed).");
if (uniform.get_id_count() != (uint32_t)set_uniform.length) {
if (set_uniform.length > 1) {
ERR_FAIL_V_MSG(RID(), "InputAttachment (binding: " + itos(uniform.binding) + ") is an array of (" + itos(set_uniform.length) + ") textures, so it should be provided equal number of texture IDs to satisfy it (IDs provided: " + itos(uniform.get_id_count()) + ").");
} else {
ERR_FAIL_V_MSG(RID(), "InputAttachment (binding: " + itos(uniform.binding) + ") should provide one ID referencing a texture (IDs provided: " + itos(uniform.get_id_count()) + ").");
}
}
Vector<VkDescriptorImageInfo> image_info;
for (uint32_t j = 0; j < uniform.get_id_count(); j++) {
Texture *texture = texture_owner.get_or_null(uniform.get_id(j));
ERR_FAIL_COND_V_MSG(!texture, RID(),
"InputAttachment (binding: " + itos(uniform.binding) + ", index " + itos(j) + ") is not a valid texture.");
ERR_FAIL_COND_V_MSG(!(texture->usage_flags & TEXTURE_USAGE_SAMPLING_BIT), RID(),
"InputAttachment (binding: " + itos(uniform.binding) + ", index " + itos(j) + ") needs the TEXTURE_USAGE_SAMPLING_BIT usage flag set in order to be used as uniform.");
VkDescriptorImageInfo img_info;
img_info.sampler = VK_NULL_HANDLE;
img_info.imageView = texture->view;
DEV_ASSERT(!texture->owner.is_valid() || texture_owner.get_or_null(texture->owner));
img_info.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
image_info.push_back(img_info);
}
write.dstArrayElement = 0;
write.descriptorCount = uniform.get_id_count();
write.descriptorType = VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT;
write.pImageInfo = image_infos.push_back(image_info)->get().ptr();
write.pBufferInfo = nullptr;
write.pTexelBufferView = nullptr;
type_size = uniform.get_id_count();
} break;
default: {
}
}
writes.push_back(write);
ERR_FAIL_COND_V_MSG(pool_key.uniform_type[set_uniform.type] == MAX_DESCRIPTOR_POOL_ELEMENT, RID(),
"Uniform set reached the limit of bindings for the same type (" + itos(MAX_DESCRIPTOR_POOL_ELEMENT) + ").");
pool_key.uniform_type[set_uniform.type] += type_size;
}
// Need a descriptor pool.
DescriptorPool *pool = _descriptor_pool_allocate(pool_key);
ERR_FAIL_COND_V(!pool, RID());
VkDescriptorSetAllocateInfo descriptor_set_allocate_info;
descriptor_set_allocate_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
descriptor_set_allocate_info.pNext = nullptr;
descriptor_set_allocate_info.descriptorPool = pool->pool;
descriptor_set_allocate_info.descriptorSetCount = 1;
descriptor_set_allocate_info.pSetLayouts = &shader->sets[p_shader_set].descriptor_set_layout;
VkDescriptorSet descriptor_set;
VkResult res = vkAllocateDescriptorSets(device, &descriptor_set_allocate_info, &descriptor_set);
if (res) {
_descriptor_pool_free(pool_key, pool); // Meh.
ERR_FAIL_V_MSG(RID(), "Cannot allocate descriptor sets, error " + itos(res) + ".");
}
UniformSet uniform_set;
uniform_set.pool = pool;
uniform_set.pool_key = pool_key;
uniform_set.descriptor_set = descriptor_set;
uniform_set.format = shader->set_formats[p_shader_set];
uniform_set.attachable_textures = attachable_textures;
uniform_set.mutable_sampled_textures = mutable_sampled_textures;
uniform_set.mutable_storage_textures = mutable_storage_textures;
uniform_set.shader_set = p_shader_set;
uniform_set.shader_id = p_shader;
RID id = uniform_set_owner.make_rid(uniform_set);
#ifdef DEV_ENABLED
set_resource_name(id, "RID:" + itos(id.get_id()));
#endif
// Add dependencies.
_add_dependency(id, p_shader);
for (uint32_t i = 0; i < uniform_count; i++) {
const Uniform &uniform = uniforms[i];
int id_count = uniform.get_id_count();
for (int j = 0; j < id_count; j++) {
_add_dependency(id, uniform.get_id(j));
}
}
// Write the contents.
if (writes.size()) {
for (int i = 0; i < writes.size(); i++) {
writes.write[i].dstSet = descriptor_set;
}
vkUpdateDescriptorSets(device, writes.size(), writes.ptr(), 0, nullptr);
}
return id;
}
bool RenderingDeviceVulkan::uniform_set_is_valid(RID p_uniform_set) {
return uniform_set_owner.owns(p_uniform_set);
}
void RenderingDeviceVulkan::uniform_set_set_invalidation_callback(RID p_uniform_set, InvalidationCallback p_callback, void *p_userdata) {
UniformSet *us = uniform_set_owner.get_or_null(p_uniform_set);
ERR_FAIL_COND(!us);
us->invalidated_callback = p_callback;
us->invalidated_callback_userdata = p_userdata;
}
Error RenderingDeviceVulkan::buffer_update(RID p_buffer, uint32_t p_offset, uint32_t p_size, const void *p_data, BitField<BarrierMask> p_post_barrier) {
_THREAD_SAFE_METHOD_
ERR_FAIL_COND_V_MSG(draw_list, ERR_INVALID_PARAMETER,
"Updating buffers is forbidden during creation of a draw list");
ERR_FAIL_COND_V_MSG(compute_list, ERR_INVALID_PARAMETER,
"Updating buffers is forbidden during creation of a compute list");
VkPipelineStageFlags dst_stage_mask = 0;
VkAccessFlags dst_access = 0;
if (p_post_barrier.has_flag(BARRIER_MASK_TRANSFER)) {
// Protect subsequent updates.
dst_stage_mask = VK_PIPELINE_STAGE_TRANSFER_BIT;
dst_access = VK_ACCESS_TRANSFER_WRITE_BIT;
}
Buffer *buffer = _get_buffer_from_owner(p_buffer, dst_stage_mask, dst_access, p_post_barrier);
if (!buffer) {
ERR_FAIL_V_MSG(ERR_INVALID_PARAMETER, "Buffer argument is not a valid buffer of any type.");
}
ERR_FAIL_COND_V_MSG(p_offset + p_size > buffer->size, ERR_INVALID_PARAMETER,
"Attempted to write buffer (" + itos((p_offset + p_size) - buffer->size) + " bytes) past the end.");
// No barrier should be needed here.
// _buffer_memory_barrier(buffer->buffer, p_offset, p_size, dst_stage_mask, VK_PIPELINE_STAGE_TRANSFER_BIT, dst_access, VK_ACCESS_TRANSFER_WRITE_BIT, true);
Error err = _buffer_update(buffer, p_offset, (uint8_t *)p_data, p_size, p_post_barrier);
if (err) {
return err;
}
#ifdef FORCE_FULL_BARRIER
_full_barrier(true);
#else
if (dst_stage_mask == 0) {
dst_stage_mask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
}
if (p_post_barrier != RD::BARRIER_MASK_NO_BARRIER) {
_buffer_memory_barrier(buffer->buffer, p_offset, p_size, VK_PIPELINE_STAGE_TRANSFER_BIT, dst_stage_mask, VK_ACCESS_TRANSFER_WRITE_BIT, dst_access, true);
}
#endif
return err;
}
Error RenderingDeviceVulkan::buffer_clear(RID p_buffer, uint32_t p_offset, uint32_t p_size, BitField<BarrierMask> p_post_barrier) {
_THREAD_SAFE_METHOD_
ERR_FAIL_COND_V_MSG((p_size % 4) != 0, ERR_INVALID_PARAMETER,
"Size must be a multiple of four");
ERR_FAIL_COND_V_MSG(draw_list, ERR_INVALID_PARAMETER,
"Updating buffers in is forbidden during creation of a draw list");
ERR_FAIL_COND_V_MSG(compute_list, ERR_INVALID_PARAMETER,
"Updating buffers is forbidden during creation of a compute list");
VkPipelineStageFlags dst_stage_mask = 0;
VkAccessFlags dst_access = 0;
if (p_post_barrier.has_flag(BARRIER_MASK_TRANSFER)) {
// Protect subsequent updates.
dst_stage_mask = VK_PIPELINE_STAGE_TRANSFER_BIT;
dst_access = VK_ACCESS_TRANSFER_WRITE_BIT;
}
Buffer *buffer = _get_buffer_from_owner(p_buffer, dst_stage_mask, dst_access, p_post_barrier);
if (!buffer) {
ERR_FAIL_V_MSG(ERR_INVALID_PARAMETER, "Buffer argument is not a valid buffer of any type.");
}
ERR_FAIL_COND_V_MSG(p_offset + p_size > buffer->size, ERR_INVALID_PARAMETER,
"Attempted to write buffer (" + itos((p_offset + p_size) - buffer->size) + " bytes) past the end.");
// Should not be needed.
// _buffer_memory_barrier(buffer->buffer, p_offset, p_size, dst_stage_mask, VK_PIPELINE_STAGE_TRANSFER_BIT, dst_access, VK_ACCESS_TRANSFER_WRITE_BIT, p_post_barrier);
vkCmdFillBuffer(frames[frame].draw_command_buffer, buffer->buffer, p_offset, p_size, 0);
#ifdef FORCE_FULL_BARRIER
_full_barrier(true);
#else
if (dst_stage_mask == 0) {
dst_stage_mask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
}
_buffer_memory_barrier(buffer->buffer, p_offset, p_size, VK_PIPELINE_STAGE_TRANSFER_BIT, dst_stage_mask, VK_ACCESS_TRANSFER_WRITE_BIT, dst_access, dst_stage_mask);
#endif
return OK;
}
Vector<uint8_t> RenderingDeviceVulkan::buffer_get_data(RID p_buffer, uint32_t p_offset, uint32_t p_size) {
_THREAD_SAFE_METHOD_
// It could be this buffer was just created.
VkPipelineShaderStageCreateFlags src_stage_mask = VK_PIPELINE_STAGE_TRANSFER_BIT;
VkAccessFlags src_access_mask = VK_ACCESS_TRANSFER_WRITE_BIT;
// Get the vulkan buffer and the potential stage/access possible.
Buffer *buffer = _get_buffer_from_owner(p_buffer, src_stage_mask, src_access_mask, BARRIER_MASK_ALL_BARRIERS);
if (!buffer) {
ERR_FAIL_V_MSG(Vector<uint8_t>(), "Buffer is either invalid or this type of buffer can't be retrieved. Only Index and Vertex buffers allow retrieving.");
}
// Make sure no one is using the buffer -- the "true" gets us to the same command buffer as below.
_buffer_memory_barrier(buffer->buffer, 0, buffer->size, src_stage_mask, VK_PIPELINE_STAGE_TRANSFER_BIT, src_access_mask, VK_ACCESS_TRANSFER_READ_BIT, true);
VkCommandBuffer command_buffer = frames[frame].draw_command_buffer;
// Size of buffer to retrieve.
if (!p_size) {
p_size = buffer->size;
} else {
ERR_FAIL_COND_V_MSG(p_size + p_offset > buffer->size, Vector<uint8_t>(),
"Size is larger than the buffer.");
}
Buffer tmp_buffer;
_buffer_allocate(&tmp_buffer, p_size, VK_BUFFER_USAGE_TRANSFER_DST_BIT, VMA_MEMORY_USAGE_AUTO_PREFER_HOST, VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT);
VkBufferCopy region;
region.srcOffset = p_offset;
region.dstOffset = 0;
region.size = p_size;
vkCmdCopyBuffer(command_buffer, buffer->buffer, tmp_buffer.buffer, 1, &region); // Dst buffer is in CPU, but I wonder if src buffer needs a barrier for this.
// Flush everything so memory can be safely mapped.
_flush(true);
void *buffer_mem;
VkResult vkerr = vmaMapMemory(allocator, tmp_buffer.allocation, &buffer_mem);
ERR_FAIL_COND_V_MSG(vkerr, Vector<uint8_t>(), "vmaMapMemory failed with error " + itos(vkerr) + ".");
Vector<uint8_t> buffer_data;
{
buffer_data.resize(p_size);
uint8_t *w = buffer_data.ptrw();
memcpy(w, buffer_mem, p_size);
}
vmaUnmapMemory(allocator, tmp_buffer.allocation);
_buffer_free(&tmp_buffer);
return buffer_data;
}
/*************************/
/**** RENDER PIPELINE ****/
/*************************/
RID RenderingDeviceVulkan::render_pipeline_create(RID p_shader, FramebufferFormatID p_framebuffer_format, VertexFormatID p_vertex_format, RenderPrimitive p_render_primitive, const PipelineRasterizationState &p_rasterization_state, const PipelineMultisampleState &p_multisample_state, const PipelineDepthStencilState &p_depth_stencil_state, const PipelineColorBlendState &p_blend_state, BitField<PipelineDynamicStateFlags> p_dynamic_state_flags, uint32_t p_for_render_pass, const Vector<PipelineSpecializationConstant> &p_specialization_constants) {
_THREAD_SAFE_METHOD_
// Needs a shader.
Shader *shader = shader_owner.get_or_null(p_shader);
ERR_FAIL_COND_V(!shader, RID());
ERR_FAIL_COND_V_MSG(shader->is_compute, RID(),
"Compute shaders can't be used in render pipelines");
if (p_framebuffer_format == INVALID_ID) {
// If nothing provided, use an empty one (no attachments).
p_framebuffer_format = framebuffer_format_create(Vector<AttachmentFormat>());
}
ERR_FAIL_COND_V(!framebuffer_formats.has(p_framebuffer_format), RID());
const FramebufferFormat &fb_format = framebuffer_formats[p_framebuffer_format];
{ // Validate shader vs framebuffer.
ERR_FAIL_COND_V_MSG(p_for_render_pass >= uint32_t(fb_format.E->key().passes.size()), RID(), "Render pass requested for pipeline creation (" + itos(p_for_render_pass) + ") is out of bounds");
const FramebufferPass &pass = fb_format.E->key().passes[p_for_render_pass];
uint32_t output_mask = 0;
for (int i = 0; i < pass.color_attachments.size(); i++) {
if (pass.color_attachments[i] != FramebufferPass::ATTACHMENT_UNUSED) {
output_mask |= 1 << i;
}
}
ERR_FAIL_COND_V_MSG(shader->fragment_output_mask != output_mask, RID(),
"Mismatch fragment shader output mask (" + itos(shader->fragment_output_mask) + ") and framebuffer color output mask (" + itos(output_mask) + ") when binding both in render pipeline.");
}
// Vertex.
VkPipelineVertexInputStateCreateInfo pipeline_vertex_input_state_create_info;
if (p_vertex_format != INVALID_ID) {
// Uses vertices, else it does not.
ERR_FAIL_COND_V(!vertex_formats.has(p_vertex_format), RID());
const VertexDescriptionCache &vd = vertex_formats[p_vertex_format];
pipeline_vertex_input_state_create_info = vd.create_info;
// Validate with inputs.
for (uint32_t i = 0; i < 32; i++) {
if (!(shader->vertex_input_mask & (1UL << i))) {
continue;
}
bool found = false;
for (int j = 0; j < vd.vertex_formats.size(); j++) {
if (vd.vertex_formats[j].location == i) {
found = true;
}
}
ERR_FAIL_COND_V_MSG(!found, RID(),
"Shader vertex input location (" + itos(i) + ") not provided in vertex input description for pipeline creation.");
}
} else {
// Does not use vertices.
pipeline_vertex_input_state_create_info.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO;
pipeline_vertex_input_state_create_info.pNext = nullptr;
pipeline_vertex_input_state_create_info.flags = 0;
pipeline_vertex_input_state_create_info.vertexBindingDescriptionCount = 0;
pipeline_vertex_input_state_create_info.pVertexBindingDescriptions = nullptr;
pipeline_vertex_input_state_create_info.vertexAttributeDescriptionCount = 0;
pipeline_vertex_input_state_create_info.pVertexAttributeDescriptions = nullptr;
ERR_FAIL_COND_V_MSG(shader->vertex_input_mask != 0, RID(),
"Shader contains vertex inputs, but no vertex input description was provided for pipeline creation.");
}
// Input assembly.
ERR_FAIL_INDEX_V(p_render_primitive, RENDER_PRIMITIVE_MAX, RID());
VkPipelineInputAssemblyStateCreateInfo input_assembly_create_info;
input_assembly_create_info.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO;
input_assembly_create_info.pNext = nullptr;
input_assembly_create_info.flags = 0;
static const VkPrimitiveTopology topology_list[RENDER_PRIMITIVE_MAX] = {
VK_PRIMITIVE_TOPOLOGY_POINT_LIST,
VK_PRIMITIVE_TOPOLOGY_LINE_LIST,
VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY,
VK_PRIMITIVE_TOPOLOGY_LINE_STRIP,
VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY,
VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY,
VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY,
VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
VK_PRIMITIVE_TOPOLOGY_PATCH_LIST
};
input_assembly_create_info.topology = topology_list[p_render_primitive];
input_assembly_create_info.primitiveRestartEnable = (p_render_primitive == RENDER_PRIMITIVE_TRIANGLE_STRIPS_WITH_RESTART_INDEX);
// Tessellation.
VkPipelineTessellationStateCreateInfo tessellation_create_info;
tessellation_create_info.sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO;
tessellation_create_info.pNext = nullptr;
tessellation_create_info.flags = 0;
ERR_FAIL_COND_V(limits.maxTessellationPatchSize > 0 && (p_rasterization_state.patch_control_points < 1 || p_rasterization_state.patch_control_points > limits.maxTessellationPatchSize), RID());
tessellation_create_info.patchControlPoints = p_rasterization_state.patch_control_points;
VkPipelineViewportStateCreateInfo viewport_state_create_info;
viewport_state_create_info.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO;
viewport_state_create_info.pNext = nullptr;
viewport_state_create_info.flags = 0;
viewport_state_create_info.viewportCount = 1; // If VR extensions are supported at some point, this will have to be customizable in the framebuffer format.
viewport_state_create_info.pViewports = nullptr;
viewport_state_create_info.scissorCount = 1;
viewport_state_create_info.pScissors = nullptr;
// Rasterization.
VkPipelineRasterizationStateCreateInfo rasterization_state_create_info;
rasterization_state_create_info.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO;
rasterization_state_create_info.pNext = nullptr;
rasterization_state_create_info.flags = 0;
rasterization_state_create_info.depthClampEnable = p_rasterization_state.enable_depth_clamp;
rasterization_state_create_info.rasterizerDiscardEnable = p_rasterization_state.discard_primitives;
rasterization_state_create_info.polygonMode = (p_rasterization_state.wireframe ? VK_POLYGON_MODE_LINE : VK_POLYGON_MODE_FILL);
static const VkCullModeFlags cull_mode[3] = {
VK_CULL_MODE_NONE,
VK_CULL_MODE_FRONT_BIT,
VK_CULL_MODE_BACK_BIT
};
ERR_FAIL_INDEX_V(p_rasterization_state.cull_mode, 3, RID());
rasterization_state_create_info.cullMode = cull_mode[p_rasterization_state.cull_mode];
rasterization_state_create_info.frontFace = (p_rasterization_state.front_face == POLYGON_FRONT_FACE_CLOCKWISE ? VK_FRONT_FACE_CLOCKWISE : VK_FRONT_FACE_COUNTER_CLOCKWISE);
rasterization_state_create_info.depthBiasEnable = p_rasterization_state.depth_bias_enabled;
rasterization_state_create_info.depthBiasConstantFactor = p_rasterization_state.depth_bias_constant_factor;
rasterization_state_create_info.depthBiasClamp = p_rasterization_state.depth_bias_clamp;
rasterization_state_create_info.depthBiasSlopeFactor = p_rasterization_state.depth_bias_slope_factor;
rasterization_state_create_info.lineWidth = p_rasterization_state.line_width;
// Multisample.
VkPipelineMultisampleStateCreateInfo multisample_state_create_info;
multisample_state_create_info.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO;
multisample_state_create_info.pNext = nullptr;
multisample_state_create_info.flags = 0;
multisample_state_create_info.rasterizationSamples = _ensure_supported_sample_count(p_multisample_state.sample_count);
multisample_state_create_info.sampleShadingEnable = p_multisample_state.enable_sample_shading;
multisample_state_create_info.minSampleShading = p_multisample_state.min_sample_shading;
Vector<VkSampleMask> sample_mask;
if (p_multisample_state.sample_mask.size()) {
// Use sample mask.
const int rasterization_sample_mask_expected_size[TEXTURE_SAMPLES_MAX] = {
1, 2, 4, 8, 16, 32, 64
};
ERR_FAIL_COND_V(rasterization_sample_mask_expected_size[p_multisample_state.sample_count] != p_multisample_state.sample_mask.size(), RID());
sample_mask.resize(p_multisample_state.sample_mask.size());
for (int i = 0; i < p_multisample_state.sample_mask.size(); i++) {
VkSampleMask mask = p_multisample_state.sample_mask[i];
sample_mask.push_back(mask);
}
multisample_state_create_info.pSampleMask = sample_mask.ptr();
} else {
multisample_state_create_info.pSampleMask = nullptr;
}
multisample_state_create_info.alphaToCoverageEnable = p_multisample_state.enable_alpha_to_coverage;
multisample_state_create_info.alphaToOneEnable = p_multisample_state.enable_alpha_to_one;
// Depth stencil.
VkPipelineDepthStencilStateCreateInfo depth_stencil_state_create_info;
depth_stencil_state_create_info.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO;
depth_stencil_state_create_info.pNext = nullptr;
depth_stencil_state_create_info.flags = 0;
depth_stencil_state_create_info.depthTestEnable = p_depth_stencil_state.enable_depth_test;
depth_stencil_state_create_info.depthWriteEnable = p_depth_stencil_state.enable_depth_write;
ERR_FAIL_INDEX_V(p_depth_stencil_state.depth_compare_operator, COMPARE_OP_MAX, RID());
depth_stencil_state_create_info.depthCompareOp = compare_operators[p_depth_stencil_state.depth_compare_operator];
depth_stencil_state_create_info.depthBoundsTestEnable = p_depth_stencil_state.enable_depth_range;
depth_stencil_state_create_info.stencilTestEnable = p_depth_stencil_state.enable_stencil;
ERR_FAIL_INDEX_V(p_depth_stencil_state.front_op.fail, STENCIL_OP_MAX, RID());
depth_stencil_state_create_info.front.failOp = stencil_operations[p_depth_stencil_state.front_op.fail];
ERR_FAIL_INDEX_V(p_depth_stencil_state.front_op.pass, STENCIL_OP_MAX, RID());
depth_stencil_state_create_info.front.passOp = stencil_operations[p_depth_stencil_state.front_op.pass];
ERR_FAIL_INDEX_V(p_depth_stencil_state.front_op.depth_fail, STENCIL_OP_MAX, RID());
depth_stencil_state_create_info.front.depthFailOp = stencil_operations[p_depth_stencil_state.front_op.depth_fail];
ERR_FAIL_INDEX_V(p_depth_stencil_state.front_op.compare, COMPARE_OP_MAX, RID());
depth_stencil_state_create_info.front.compareOp = compare_operators[p_depth_stencil_state.front_op.compare];
depth_stencil_state_create_info.front.compareMask = p_depth_stencil_state.front_op.compare_mask;
depth_stencil_state_create_info.front.writeMask = p_depth_stencil_state.front_op.write_mask;
depth_stencil_state_create_info.front.reference = p_depth_stencil_state.front_op.reference;
ERR_FAIL_INDEX_V(p_depth_stencil_state.back_op.fail, STENCIL_OP_MAX, RID());
depth_stencil_state_create_info.back.failOp = stencil_operations[p_depth_stencil_state.back_op.fail];
ERR_FAIL_INDEX_V(p_depth_stencil_state.back_op.pass, STENCIL_OP_MAX, RID());
depth_stencil_state_create_info.back.passOp = stencil_operations[p_depth_stencil_state.back_op.pass];
ERR_FAIL_INDEX_V(p_depth_stencil_state.back_op.depth_fail, STENCIL_OP_MAX, RID());
depth_stencil_state_create_info.back.depthFailOp = stencil_operations[p_depth_stencil_state.back_op.depth_fail];
ERR_FAIL_INDEX_V(p_depth_stencil_state.back_op.compare, COMPARE_OP_MAX, RID());
depth_stencil_state_create_info.back.compareOp = compare_operators[p_depth_stencil_state.back_op.compare];
depth_stencil_state_create_info.back.compareMask = p_depth_stencil_state.back_op.compare_mask;
depth_stencil_state_create_info.back.writeMask = p_depth_stencil_state.back_op.write_mask;
depth_stencil_state_create_info.back.reference = p_depth_stencil_state.back_op.reference;
depth_stencil_state_create_info.minDepthBounds = p_depth_stencil_state.depth_range_min;
depth_stencil_state_create_info.maxDepthBounds = p_depth_stencil_state.depth_range_max;
// Blend state.
VkPipelineColorBlendStateCreateInfo color_blend_state_create_info;
color_blend_state_create_info.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO;
color_blend_state_create_info.pNext = nullptr;
color_blend_state_create_info.flags = 0;
color_blend_state_create_info.logicOpEnable = p_blend_state.enable_logic_op;
ERR_FAIL_INDEX_V(p_blend_state.logic_op, LOGIC_OP_MAX, RID());
color_blend_state_create_info.logicOp = logic_operations[p_blend_state.logic_op];
Vector<VkPipelineColorBlendAttachmentState> attachment_states;
{
const FramebufferPass &pass = fb_format.E->key().passes[p_for_render_pass];
attachment_states.resize(pass.color_attachments.size());
ERR_FAIL_COND_V(p_blend_state.attachments.size() < pass.color_attachments.size(), RID());
for (int i = 0; i < pass.color_attachments.size(); i++) {
VkPipelineColorBlendAttachmentState state;
if (pass.color_attachments[i] == FramebufferPass::ATTACHMENT_UNUSED) {
state.blendEnable = false;
state.srcColorBlendFactor = VK_BLEND_FACTOR_ZERO;
state.dstColorBlendFactor = VK_BLEND_FACTOR_ZERO;
state.colorBlendOp = VK_BLEND_OP_ADD;
state.srcAlphaBlendFactor = VK_BLEND_FACTOR_ZERO;
state.dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO;
state.alphaBlendOp = VK_BLEND_OP_ADD;
state.colorWriteMask = 0;
} else {
state.blendEnable = p_blend_state.attachments[i].enable_blend;
ERR_FAIL_INDEX_V(p_blend_state.attachments[i].src_color_blend_factor, BLEND_FACTOR_MAX, RID());
state.srcColorBlendFactor = blend_factors[p_blend_state.attachments[i].src_color_blend_factor];
ERR_FAIL_INDEX_V(p_blend_state.attachments[i].dst_color_blend_factor, BLEND_FACTOR_MAX, RID());
state.dstColorBlendFactor = blend_factors[p_blend_state.attachments[i].dst_color_blend_factor];
ERR_FAIL_INDEX_V(p_blend_state.attachments[i].color_blend_op, BLEND_OP_MAX, RID());
state.colorBlendOp = blend_operations[p_blend_state.attachments[i].color_blend_op];
ERR_FAIL_INDEX_V(p_blend_state.attachments[i].src_alpha_blend_factor, BLEND_FACTOR_MAX, RID());
state.srcAlphaBlendFactor = blend_factors[p_blend_state.attachments[i].src_alpha_blend_factor];
ERR_FAIL_INDEX_V(p_blend_state.attachments[i].dst_alpha_blend_factor, BLEND_FACTOR_MAX, RID());
state.dstAlphaBlendFactor = blend_factors[p_blend_state.attachments[i].dst_alpha_blend_factor];
ERR_FAIL_INDEX_V(p_blend_state.attachments[i].alpha_blend_op, BLEND_OP_MAX, RID());
state.alphaBlendOp = blend_operations[p_blend_state.attachments[i].alpha_blend_op];
state.colorWriteMask = 0;
if (p_blend_state.attachments[i].write_r) {
state.colorWriteMask |= VK_COLOR_COMPONENT_R_BIT;
}
if (p_blend_state.attachments[i].write_g) {
state.colorWriteMask |= VK_COLOR_COMPONENT_G_BIT;
}
if (p_blend_state.attachments[i].write_b) {
state.colorWriteMask |= VK_COLOR_COMPONENT_B_BIT;
}
if (p_blend_state.attachments[i].write_a) {
state.colorWriteMask |= VK_COLOR_COMPONENT_A_BIT;
}
}
attachment_states.write[i] = state;
}
}
color_blend_state_create_info.attachmentCount = attachment_states.size();
color_blend_state_create_info.pAttachments = attachment_states.ptr();
color_blend_state_create_info.blendConstants[0] = p_blend_state.blend_constant.r;
color_blend_state_create_info.blendConstants[1] = p_blend_state.blend_constant.g;
color_blend_state_create_info.blendConstants[2] = p_blend_state.blend_constant.b;
color_blend_state_create_info.blendConstants[3] = p_blend_state.blend_constant.a;
// Dynamic state.
VkPipelineDynamicStateCreateInfo dynamic_state_create_info;
dynamic_state_create_info.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO;
dynamic_state_create_info.pNext = nullptr;
dynamic_state_create_info.flags = 0;
Vector<VkDynamicState> dynamic_states; // Vulkan is weird.
dynamic_states.push_back(VK_DYNAMIC_STATE_VIEWPORT); // Viewport and scissor are always dynamic.
dynamic_states.push_back(VK_DYNAMIC_STATE_SCISSOR);
if (p_dynamic_state_flags.has_flag(DYNAMIC_STATE_LINE_WIDTH)) {
dynamic_states.push_back(VK_DYNAMIC_STATE_LINE_WIDTH);
}
if (p_dynamic_state_flags.has_flag(DYNAMIC_STATE_DEPTH_BIAS)) {
dynamic_states.push_back(VK_DYNAMIC_STATE_DEPTH_BIAS);
}
if (p_dynamic_state_flags.has_flag(DYNAMIC_STATE_BLEND_CONSTANTS)) {
dynamic_states.push_back(VK_DYNAMIC_STATE_BLEND_CONSTANTS);
}
if (p_dynamic_state_flags.has_flag(DYNAMIC_STATE_DEPTH_BOUNDS)) {
dynamic_states.push_back(VK_DYNAMIC_STATE_DEPTH_BOUNDS);
}
if (p_dynamic_state_flags.has_flag(DYNAMIC_STATE_STENCIL_COMPARE_MASK)) {
dynamic_states.push_back(VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK);
}
if (p_dynamic_state_flags.has_flag(DYNAMIC_STATE_STENCIL_WRITE_MASK)) {
dynamic_states.push_back(VK_DYNAMIC_STATE_STENCIL_WRITE_MASK);
}
if (p_dynamic_state_flags.has_flag(DYNAMIC_STATE_STENCIL_REFERENCE)) {
dynamic_states.push_back(VK_DYNAMIC_STATE_STENCIL_REFERENCE);
}
dynamic_state_create_info.dynamicStateCount = dynamic_states.size();
dynamic_state_create_info.pDynamicStates = dynamic_states.ptr();
void *graphics_pipeline_nextptr = nullptr;
VkPipelineFragmentShadingRateStateCreateInfoKHR vrs_create_info;
if (context->get_vrs_capabilities().attachment_vrs_supported) {
// If VRS is used, this defines how the different VRS types are combined.
// combinerOps[0] decides how we use the output of pipeline and primitive (drawcall) VRS.
// combinerOps[1] decides how we use the output of combinerOps[0] and our attachment VRS.
vrs_create_info.sType = VK_STRUCTURE_TYPE_PIPELINE_FRAGMENT_SHADING_RATE_STATE_CREATE_INFO_KHR;
vrs_create_info.pNext = nullptr;
vrs_create_info.fragmentSize = { 4, 4 };
vrs_create_info.combinerOps[0] = VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR; // We don't use pipeline/primitive VRS so this really doesn't matter.
vrs_create_info.combinerOps[1] = VK_FRAGMENT_SHADING_RATE_COMBINER_OP_REPLACE_KHR; // Always use the outcome of attachment VRS if enabled.
graphics_pipeline_nextptr = &vrs_create_info;
}
// Finally, pipeline create info.
VkGraphicsPipelineCreateInfo graphics_pipeline_create_info;
graphics_pipeline_create_info.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO;
graphics_pipeline_create_info.pNext = graphics_pipeline_nextptr;
graphics_pipeline_create_info.flags = 0;
Vector<VkPipelineShaderStageCreateInfo> pipeline_stages = shader->pipeline_stages;
Vector<VkSpecializationInfo> specialization_info;
Vector<Vector<VkSpecializationMapEntry>> specialization_map_entries;
Vector<uint32_t> specialization_constant_data;
if (shader->specialization_constants.size()) {
specialization_constant_data.resize(shader->specialization_constants.size());
uint32_t *data_ptr = specialization_constant_data.ptrw();
specialization_info.resize(pipeline_stages.size());
specialization_map_entries.resize(pipeline_stages.size());
for (int i = 0; i < shader->specialization_constants.size(); i++) {
// See if overridden.
const Shader::SpecializationConstant &sc = shader->specialization_constants[i];
data_ptr[i] = sc.constant.int_value; // Just copy the 32 bits.
for (int j = 0; j < p_specialization_constants.size(); j++) {
const PipelineSpecializationConstant &psc = p_specialization_constants[j];
if (psc.constant_id == sc.constant.constant_id) {
ERR_FAIL_COND_V_MSG(psc.type != sc.constant.type, RID(), "Specialization constant provided for id (" + itos(sc.constant.constant_id) + ") is of the wrong type.");
data_ptr[i] = psc.int_value;
break;
}
}
VkSpecializationMapEntry entry;
entry.constantID = sc.constant.constant_id;
entry.offset = i * sizeof(uint32_t);
entry.size = sizeof(uint32_t);
for (int j = 0; j < SHADER_STAGE_MAX; j++) {
if (sc.stage_flags & (1 << j)) {
VkShaderStageFlagBits stage = shader_stage_masks[j];
for (int k = 0; k < pipeline_stages.size(); k++) {
if (pipeline_stages[k].stage == stage) {
specialization_map_entries.write[k].push_back(entry);
}
}
}
}
}
for (int i = 0; i < pipeline_stages.size(); i++) {
if (specialization_map_entries[i].size()) {
specialization_info.write[i].dataSize = specialization_constant_data.size() * sizeof(uint32_t);
specialization_info.write[i].pData = data_ptr;
specialization_info.write[i].mapEntryCount = specialization_map_entries[i].size();
specialization_info.write[i].pMapEntries = specialization_map_entries[i].ptr();
pipeline_stages.write[i].pSpecializationInfo = specialization_info.ptr() + i;
}
}
}
graphics_pipeline_create_info.stageCount = pipeline_stages.size();
graphics_pipeline_create_info.pStages = pipeline_stages.ptr();
graphics_pipeline_create_info.pVertexInputState = &pipeline_vertex_input_state_create_info;
graphics_pipeline_create_info.pInputAssemblyState = &input_assembly_create_info;
graphics_pipeline_create_info.pTessellationState = &tessellation_create_info;
graphics_pipeline_create_info.pViewportState = &viewport_state_create_info;
graphics_pipeline_create_info.pRasterizationState = &rasterization_state_create_info;
graphics_pipeline_create_info.pMultisampleState = &multisample_state_create_info;
graphics_pipeline_create_info.pDepthStencilState = &depth_stencil_state_create_info;
graphics_pipeline_create_info.pColorBlendState = &color_blend_state_create_info;
graphics_pipeline_create_info.pDynamicState = &dynamic_state_create_info;
graphics_pipeline_create_info.layout = shader->pipeline_layout;
graphics_pipeline_create_info.renderPass = fb_format.render_pass;
graphics_pipeline_create_info.subpass = p_for_render_pass;
graphics_pipeline_create_info.basePipelineHandle = VK_NULL_HANDLE;
graphics_pipeline_create_info.basePipelineIndex = 0;
RenderPipeline pipeline;
VkResult err = vkCreateGraphicsPipelines(device, pipelines_cache.cache_object, 1, &graphics_pipeline_create_info, nullptr, &pipeline.pipeline);
ERR_FAIL_COND_V_MSG(err, RID(), "vkCreateGraphicsPipelines failed with error " + itos(err) + " for shader '" + shader->name + "'.");
if (pipelines_cache.cache_object != VK_NULL_HANDLE) {
_update_pipeline_cache();
}
pipeline.set_formats = shader->set_formats;
pipeline.push_constant_stages_mask = shader->push_constant.vk_stages_mask;
pipeline.pipeline_layout = shader->pipeline_layout;
pipeline.shader = p_shader;
pipeline.push_constant_size = shader->push_constant.size;
#ifdef DEBUG_ENABLED
pipeline.validation.dynamic_state = p_dynamic_state_flags;
pipeline.validation.framebuffer_format = p_framebuffer_format;
pipeline.validation.render_pass = p_for_render_pass;
pipeline.validation.vertex_format = p_vertex_format;
pipeline.validation.uses_restart_indices = input_assembly_create_info.primitiveRestartEnable;
static const uint32_t primitive_divisor[RENDER_PRIMITIVE_MAX] = {
1, 2, 1, 1, 1, 3, 1, 1, 1, 1, 1
};
pipeline.validation.primitive_divisor = primitive_divisor[p_render_primitive];
static const uint32_t primitive_minimum[RENDER_PRIMITIVE_MAX] = {
1,
2,
2,
2,
2,
3,
3,
3,
3,
3,
1,
};
pipeline.validation.primitive_minimum = primitive_minimum[p_render_primitive];
#endif
// Create ID to associate with this pipeline.
RID id = render_pipeline_owner.make_rid(pipeline);
#ifdef DEV_ENABLED
set_resource_name(id, "RID:" + itos(id.get_id()));
#endif
// Now add all the dependencies.
_add_dependency(id, p_shader);
return id;
}
bool RenderingDeviceVulkan::render_pipeline_is_valid(RID p_pipeline) {
_THREAD_SAFE_METHOD_
return render_pipeline_owner.owns(p_pipeline);
}
/**************************/
/**** COMPUTE PIPELINE ****/
/**************************/
RID RenderingDeviceVulkan::compute_pipeline_create(RID p_shader, const Vector<PipelineSpecializationConstant> &p_specialization_constants) {
_THREAD_SAFE_METHOD_
// Needs a shader.
Shader *shader = shader_owner.get_or_null(p_shader);
ERR_FAIL_COND_V(!shader, RID());
ERR_FAIL_COND_V_MSG(!shader->is_compute, RID(),
"Non-compute shaders can't be used in compute pipelines");
// Finally, pipeline create info.
VkComputePipelineCreateInfo compute_pipeline_create_info;
compute_pipeline_create_info.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO;
compute_pipeline_create_info.pNext = nullptr;
compute_pipeline_create_info.flags = 0;
compute_pipeline_create_info.stage = shader->pipeline_stages[0];
compute_pipeline_create_info.layout = shader->pipeline_layout;
compute_pipeline_create_info.basePipelineHandle = VK_NULL_HANDLE;
compute_pipeline_create_info.basePipelineIndex = 0;
VkSpecializationInfo specialization_info;
Vector<VkSpecializationMapEntry> specialization_map_entries;
Vector<uint32_t> specialization_constant_data;
if (shader->specialization_constants.size()) {
specialization_constant_data.resize(shader->specialization_constants.size());
uint32_t *data_ptr = specialization_constant_data.ptrw();
for (int i = 0; i < shader->specialization_constants.size(); i++) {
// See if overridden.
const Shader::SpecializationConstant &sc = shader->specialization_constants[i];
data_ptr[i] = sc.constant.int_value; // Just copy the 32 bits.
for (int j = 0; j < p_specialization_constants.size(); j++) {
const PipelineSpecializationConstant &psc = p_specialization_constants[j];
if (psc.constant_id == sc.constant.constant_id) {
ERR_FAIL_COND_V_MSG(psc.type != sc.constant.type, RID(), "Specialization constant provided for id (" + itos(sc.constant.constant_id) + ") is of the wrong type.");
data_ptr[i] = psc.int_value;
break;
}
}
VkSpecializationMapEntry entry;
entry.constantID = sc.constant.constant_id;
entry.offset = i * sizeof(uint32_t);
entry.size = sizeof(uint32_t);
specialization_map_entries.push_back(entry);
}
specialization_info.dataSize = specialization_constant_data.size() * sizeof(uint32_t);
specialization_info.pData = data_ptr;
specialization_info.mapEntryCount = specialization_map_entries.size();
specialization_info.pMapEntries = specialization_map_entries.ptr();
compute_pipeline_create_info.stage.pSpecializationInfo = &specialization_info;
}
ComputePipeline pipeline;
VkResult err = vkCreateComputePipelines(device, pipelines_cache.cache_object, 1, &compute_pipeline_create_info, nullptr, &pipeline.pipeline);
ERR_FAIL_COND_V_MSG(err, RID(), "vkCreateComputePipelines failed with error " + itos(err) + ".");
if (pipelines_cache.cache_object != VK_NULL_HANDLE) {
_update_pipeline_cache();
}
pipeline.set_formats = shader->set_formats;
pipeline.push_constant_stages_mask = shader->push_constant.vk_stages_mask;
pipeline.pipeline_layout = shader->pipeline_layout;
pipeline.shader = p_shader;
pipeline.push_constant_size = shader->push_constant.size;
pipeline.local_group_size[0] = shader->compute_local_size[0];
pipeline.local_group_size[1] = shader->compute_local_size[1];
pipeline.local_group_size[2] = shader->compute_local_size[2];
// Create ID to associate with this pipeline.
RID id = compute_pipeline_owner.make_rid(pipeline);
#ifdef DEV_ENABLED
set_resource_name(id, "RID:" + itos(id.get_id()));
#endif
// Now add all the dependencies.
_add_dependency(id, p_shader);
return id;
}
bool RenderingDeviceVulkan::compute_pipeline_is_valid(RID p_pipeline) {
return compute_pipeline_owner.owns(p_pipeline);
}
/****************/
/**** SCREEN ****/
/****************/
int RenderingDeviceVulkan::screen_get_width(DisplayServer::WindowID p_screen) const {
_THREAD_SAFE_METHOD_
ERR_FAIL_COND_V_MSG(local_device.is_valid(), -1, "Local devices have no screen");
return context->window_get_width(p_screen);
}
int RenderingDeviceVulkan::screen_get_height(DisplayServer::WindowID p_screen) const {
_THREAD_SAFE_METHOD_
ERR_FAIL_COND_V_MSG(local_device.is_valid(), -1, "Local devices have no screen");
return context->window_get_height(p_screen);
}
RenderingDevice::FramebufferFormatID RenderingDeviceVulkan::screen_get_framebuffer_format() const {
_THREAD_SAFE_METHOD_
ERR_FAIL_COND_V_MSG(local_device.is_valid(), INVALID_ID, "Local devices have no screen");
// Very hacky, but not used often per frame so I guess ok.
VkFormat vkformat = context->get_screen_format();
DataFormat format = DATA_FORMAT_MAX;
for (int i = 0; i < DATA_FORMAT_MAX; i++) {
if (vkformat == vulkan_formats[i]) {
format = DataFormat(i);
break;
}
}
ERR_FAIL_COND_V(format == DATA_FORMAT_MAX, INVALID_ID);
AttachmentFormat attachment;
attachment.format = format;
attachment.samples = TEXTURE_SAMPLES_1;
attachment.usage_flags = TEXTURE_USAGE_COLOR_ATTACHMENT_BIT;
Vector<AttachmentFormat> screen_attachment;
screen_attachment.push_back(attachment);
return const_cast<RenderingDeviceVulkan *>(this)->framebuffer_format_create(screen_attachment);
}
/*******************/
/**** DRAW LIST ****/
/*******************/
RenderingDevice::DrawListID RenderingDeviceVulkan::draw_list_begin_for_screen(DisplayServer::WindowID p_screen, const Color &p_clear_color) {
_THREAD_SAFE_METHOD_
ERR_FAIL_COND_V_MSG(local_device.is_valid(), INVALID_ID, "Local devices have no screen");
ERR_FAIL_COND_V_MSG(draw_list != nullptr, INVALID_ID, "Only one draw list can be active at the same time.");
ERR_FAIL_COND_V_MSG(compute_list != nullptr, INVALID_ID, "Only one draw/compute list can be active at the same time.");
VkCommandBuffer command_buffer = frames[frame].draw_command_buffer;
if (!context->window_is_valid_swapchain(p_screen)) {
return INVALID_ID;
}
Size2i size = Size2i(context->window_get_width(p_screen), context->window_get_height(p_screen));
_draw_list_allocate(Rect2i(Vector2i(), size), 0, 0);
#ifdef DEBUG_ENABLED
draw_list_framebuffer_format = screen_get_framebuffer_format();
#endif
draw_list_subpass_count = 1;
VkRenderPassBeginInfo render_pass_begin;
render_pass_begin.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO;
render_pass_begin.pNext = nullptr;
render_pass_begin.renderPass = context->window_get_render_pass(p_screen);
render_pass_begin.framebuffer = context->window_get_framebuffer(p_screen);
render_pass_begin.renderArea.extent.width = size.width;
render_pass_begin.renderArea.extent.height = size.height;
render_pass_begin.renderArea.offset.x = 0;
render_pass_begin.renderArea.offset.y = 0;
render_pass_begin.clearValueCount = 1;
VkClearValue clear_value;
clear_value.color.float32[0] = p_clear_color.r;
clear_value.color.float32[1] = p_clear_color.g;
clear_value.color.float32[2] = p_clear_color.b;
clear_value.color.float32[3] = p_clear_color.a;
render_pass_begin.pClearValues = &clear_value;
vkCmdBeginRenderPass(command_buffer, &render_pass_begin, VK_SUBPASS_CONTENTS_INLINE);
uint32_t size_x = screen_get_width(p_screen);
uint32_t size_y = screen_get_height(p_screen);
VkViewport viewport;
viewport.x = 0;
viewport.y = 0;
viewport.width = size_x;
viewport.height = size_y;
viewport.minDepth = 0;
viewport.maxDepth = 1.0;
vkCmdSetViewport(command_buffer, 0, 1, &viewport);
VkRect2D scissor;
scissor.offset.x = 0;
scissor.offset.y = 0;
scissor.extent.width = size_x;
scissor.extent.height = size_y;
vkCmdSetScissor(command_buffer, 0, 1, &scissor);
return int64_t(ID_TYPE_DRAW_LIST) << ID_BASE_SHIFT;
}
Error RenderingDeviceVulkan::_draw_list_setup_framebuffer(Framebuffer *p_framebuffer, InitialAction p_initial_color_action, FinalAction p_final_color_action, InitialAction p_initial_depth_action, FinalAction p_final_depth_action, VkFramebuffer *r_framebuffer, VkRenderPass *r_render_pass, uint32_t *r_subpass_count) {
Framebuffer::VersionKey vk;
vk.initial_color_action = p_initial_color_action;
vk.final_color_action = p_final_color_action;
vk.initial_depth_action = p_initial_depth_action;
vk.final_depth_action = p_final_depth_action;
vk.view_count = p_framebuffer->view_count;
if (!p_framebuffer->framebuffers.has(vk)) {
// Need to create this version.
Framebuffer::Version version;
version.render_pass = _render_pass_create(framebuffer_formats[p_framebuffer->format_id].E->key().attachments, framebuffer_formats[p_framebuffer->format_id].E->key().passes, p_initial_color_action, p_final_color_action, p_initial_depth_action, p_final_depth_action, p_framebuffer->view_count);
VkFramebufferCreateInfo framebuffer_create_info;
framebuffer_create_info.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO;
framebuffer_create_info.pNext = nullptr;
framebuffer_create_info.flags = 0;
framebuffer_create_info.renderPass = version.render_pass;
Vector<VkImageView> attachments;
for (int i = 0; i < p_framebuffer->texture_ids.size(); i++) {
Texture *texture = texture_owner.get_or_null(p_framebuffer->texture_ids[i]);
if (texture) {
attachments.push_back(texture->view);
if (!(texture->usage_flags & TEXTURE_USAGE_VRS_ATTACHMENT_BIT)) { // VRS attachment will be a different size.
ERR_FAIL_COND_V(texture->width != p_framebuffer->size.width, ERR_BUG);
ERR_FAIL_COND_V(texture->height != p_framebuffer->size.height, ERR_BUG);
}
}
}
framebuffer_create_info.attachmentCount = attachments.size();
framebuffer_create_info.pAttachments = attachments.ptr();
framebuffer_create_info.width = p_framebuffer->size.width;
framebuffer_create_info.height = p_framebuffer->size.height;
framebuffer_create_info.layers = 1;
VkResult err = vkCreateFramebuffer(device, &framebuffer_create_info, nullptr, &version.framebuffer);
ERR_FAIL_COND_V_MSG(err, ERR_CANT_CREATE, "vkCreateFramebuffer failed with error " + itos(err) + ".");
version.subpass_count = framebuffer_formats[p_framebuffer->format_id].E->key().passes.size();
p_framebuffer->framebuffers.insert(vk, version);
}
const Framebuffer::Version &version = p_framebuffer->framebuffers[vk];
*r_framebuffer = version.framebuffer;
*r_render_pass = version.render_pass;
*r_subpass_count = version.subpass_count;
return OK;
}
Error RenderingDeviceVulkan::_draw_list_render_pass_begin(Framebuffer *framebuffer, InitialAction p_initial_color_action, FinalAction p_final_color_action, InitialAction p_initial_depth_action, FinalAction p_final_depth_action, const Vector<Color> &p_clear_colors, float p_clear_depth, uint32_t p_clear_stencil, Point2i viewport_offset, Point2i viewport_size, VkFramebuffer vkframebuffer, VkRenderPass render_pass, VkCommandBuffer command_buffer, VkSubpassContents subpass_contents, const Vector<RID> &p_storage_textures) {
VkRenderPassBeginInfo render_pass_begin;
render_pass_begin.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO;
render_pass_begin.pNext = nullptr;
render_pass_begin.renderPass = render_pass;
render_pass_begin.framebuffer = vkframebuffer;
/*
* Given how API works, it makes sense to always fully operate on the whole framebuffer.
* This allows better continue operations for operations like shadowmapping.
render_pass_begin.renderArea.extent.width = viewport_size.width;
render_pass_begin.renderArea.extent.height = viewport_size.height;
render_pass_begin.renderArea.offset.x = viewport_offset.x;
render_pass_begin.renderArea.offset.y = viewport_offset.y;
*/
render_pass_begin.renderArea.extent.width = framebuffer->size.width;
render_pass_begin.renderArea.extent.height = framebuffer->size.height;
render_pass_begin.renderArea.offset.x = 0;
render_pass_begin.renderArea.offset.y = 0;
Vector<VkClearValue> clear_values;
clear_values.resize(framebuffer->texture_ids.size());
int clear_values_count = 0;
{
int color_index = 0;
for (int i = 0; i < framebuffer->texture_ids.size(); i++) {
VkClearValue clear_value;
Texture *texture = texture_owner.get_or_null(framebuffer->texture_ids[i]);
if (!texture) {
color_index++;
continue;
}
if (color_index < p_clear_colors.size() && texture->usage_flags & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT) {
ERR_FAIL_INDEX_V(color_index, p_clear_colors.size(), ERR_BUG); // A bug.
Color clear_color = p_clear_colors[color_index];
clear_value.color.float32[0] = clear_color.r;
clear_value.color.float32[1] = clear_color.g;
clear_value.color.float32[2] = clear_color.b;
clear_value.color.float32[3] = clear_color.a;
color_index++;
} else if (texture->usage_flags & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) {
clear_value.depthStencil.depth = p_clear_depth;
clear_value.depthStencil.stencil = p_clear_stencil;
} else {
clear_value.color.float32[0] = 0;
clear_value.color.float32[1] = 0;
clear_value.color.float32[2] = 0;
clear_value.color.float32[3] = 0;
}
clear_values.write[clear_values_count++] = clear_value;
}
}
render_pass_begin.clearValueCount = clear_values_count;
render_pass_begin.pClearValues = clear_values.ptr();
for (int i = 0; i < p_storage_textures.size(); i++) {
Texture *texture = texture_owner.get_or_null(p_storage_textures[i]);
if (!texture) {
continue;
}
ERR_CONTINUE_MSG(!(texture->usage_flags & TEXTURE_USAGE_STORAGE_BIT), "Supplied storage texture " + itos(i) + " for draw list is not set to be used for storage.");
if (texture->usage_flags & TEXTURE_USAGE_SAMPLING_BIT) {
// Must change layout to general.
VkImageMemoryBarrier image_memory_barrier;
image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
image_memory_barrier.pNext = nullptr;
image_memory_barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
image_memory_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
image_memory_barrier.oldLayout = texture->layout;
image_memory_barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL;
image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.image = texture->image;
image_memory_barrier.subresourceRange.aspectMask = texture->read_aspect_mask;
image_memory_barrier.subresourceRange.baseMipLevel = texture->base_mipmap;
image_memory_barrier.subresourceRange.levelCount = texture->mipmaps;
image_memory_barrier.subresourceRange.baseArrayLayer = texture->base_layer;
image_memory_barrier.subresourceRange.layerCount = texture->layers;
vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier);
texture->layout = VK_IMAGE_LAYOUT_GENERAL;
draw_list_storage_textures.push_back(p_storage_textures[i]);
}
}
vkCmdBeginRenderPass(command_buffer, &render_pass_begin, subpass_contents);
// Mark textures as bound.
draw_list_bound_textures.clear();
draw_list_unbind_color_textures = p_final_color_action != FINAL_ACTION_CONTINUE;
draw_list_unbind_depth_textures = p_final_depth_action != FINAL_ACTION_CONTINUE;
for (int i = 0; i < framebuffer->texture_ids.size(); i++) {
Texture *texture = texture_owner.get_or_null(framebuffer->texture_ids[i]);
if (!texture) {
continue;
}
texture->bound = true;
draw_list_bound_textures.push_back(framebuffer->texture_ids[i]);
}
return OK;
}
void RenderingDeviceVulkan::_draw_list_insert_clear_region(DrawList *p_draw_list, Framebuffer *p_framebuffer, Point2i p_viewport_offset, Point2i p_viewport_size, bool p_clear_color, const Vector<Color> &p_clear_colors, bool p_clear_depth, float p_depth, uint32_t p_stencil) {
Vector<VkClearAttachment> clear_attachments;
int color_index = 0;
int texture_index = 0;
for (int i = 0; i < p_framebuffer->texture_ids.size(); i++) {
Texture *texture = texture_owner.get_or_null(p_framebuffer->texture_ids[i]);
if (!texture) {
texture_index++;
continue;
}
VkClearAttachment clear_at = {};
if (p_clear_color && texture->usage_flags & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT) {
Color clear_color = p_clear_colors[texture_index++];
clear_at.clearValue.color.float32[0] = clear_color.r;
clear_at.clearValue.color.float32[1] = clear_color.g;
clear_at.clearValue.color.float32[2] = clear_color.b;
clear_at.clearValue.color.float32[3] = clear_color.a;
clear_at.colorAttachment = color_index++;
clear_at.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
} else if (p_clear_depth && texture->usage_flags & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) {
clear_at.clearValue.depthStencil.depth = p_depth;
clear_at.clearValue.depthStencil.stencil = p_stencil;
clear_at.colorAttachment = 0;
clear_at.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
if (format_has_stencil(texture->format)) {
clear_at.aspectMask |= VK_IMAGE_ASPECT_STENCIL_BIT;
}
} else {
ERR_CONTINUE(true);
}
clear_attachments.push_back(clear_at);
}
VkClearRect cr;
cr.baseArrayLayer = 0;
cr.layerCount = 1;
cr.rect.offset.x = p_viewport_offset.x;
cr.rect.offset.y = p_viewport_offset.y;
cr.rect.extent.width = p_viewport_size.width;
cr.rect.extent.height = p_viewport_size.height;
vkCmdClearAttachments(p_draw_list->command_buffer, clear_attachments.size(), clear_attachments.ptr(), 1, &cr);
}
RenderingDevice::DrawListID RenderingDeviceVulkan::draw_list_begin(RID p_framebuffer, InitialAction p_initial_color_action, FinalAction p_final_color_action, InitialAction p_initial_depth_action, FinalAction p_final_depth_action, const Vector<Color> &p_clear_color_values, float p_clear_depth, uint32_t p_clear_stencil, const Rect2 &p_region, const Vector<RID> &p_storage_textures) {
_THREAD_SAFE_METHOD_
ERR_FAIL_COND_V_MSG(draw_list != nullptr, INVALID_ID, "Only one draw list can be active at the same time.");
ERR_FAIL_COND_V_MSG(compute_list != nullptr && !compute_list->state.allow_draw_overlap, INVALID_ID, "Only one draw/compute list can be active at the same time.");
Framebuffer *framebuffer = framebuffer_owner.get_or_null(p_framebuffer);
ERR_FAIL_COND_V(!framebuffer, INVALID_ID);
Point2i viewport_offset;
Point2i viewport_size = framebuffer->size;
bool needs_clear_color = false;
bool needs_clear_depth = false;
if (p_region != Rect2() && p_region != Rect2(Vector2(), viewport_size)) { // Check custom region.
Rect2i viewport(viewport_offset, viewport_size);
Rect2i regioni = p_region;
if (!(regioni.position.x >= viewport.position.x) && (regioni.position.y >= viewport.position.y) &&
((regioni.position.x + regioni.size.x) <= (viewport.position.x + viewport.size.x)) &&
((regioni.position.y + regioni.size.y) <= (viewport.position.y + viewport.size.y))) {
ERR_FAIL_V_MSG(INVALID_ID, "When supplying a custom region, it must be contained within the framebuffer rectangle");
}
viewport_offset = regioni.position;
viewport_size = regioni.size;
if (p_initial_color_action == INITIAL_ACTION_CLEAR_REGION_CONTINUE) {
needs_clear_color = true;
p_initial_color_action = INITIAL_ACTION_CONTINUE;
}
if (p_initial_depth_action == INITIAL_ACTION_CLEAR_REGION_CONTINUE) {
needs_clear_depth = true;
p_initial_depth_action = INITIAL_ACTION_CONTINUE;
}
if (p_initial_color_action == INITIAL_ACTION_CLEAR_REGION) {
needs_clear_color = true;
p_initial_color_action = INITIAL_ACTION_KEEP;
}
if (p_initial_depth_action == INITIAL_ACTION_CLEAR_REGION) {
needs_clear_depth = true;
p_initial_depth_action = INITIAL_ACTION_KEEP;
}
}
if (p_initial_color_action == INITIAL_ACTION_CLEAR || needs_clear_color) { // Check clear values.
int color_count = 0;
for (int i = 0; i < framebuffer->texture_ids.size(); i++) {
Texture *texture = texture_owner.get_or_null(framebuffer->texture_ids[i]);
// We only check for our VRS usage bit if this is not the first texture id.
// If it is the first we're likely populating our VRS texture.
// Bit dirty but...
if (!texture || (!(texture->usage_flags & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) && !(i != 0 && texture->usage_flags & TEXTURE_USAGE_VRS_ATTACHMENT_BIT))) {
if (!texture || !texture->is_resolve_buffer) {
color_count++;
}
}
}
ERR_FAIL_COND_V_MSG(p_clear_color_values.size() != color_count, INVALID_ID, "Clear color values supplied (" + itos(p_clear_color_values.size()) + ") differ from the amount required for framebuffer color attachments (" + itos(color_count) + ").");
}
VkFramebuffer vkframebuffer;
VkRenderPass render_pass;
Error err = _draw_list_setup_framebuffer(framebuffer, p_initial_color_action, p_final_color_action, p_initial_depth_action, p_final_depth_action, &vkframebuffer, &render_pass, &draw_list_subpass_count);
ERR_FAIL_COND_V(err != OK, INVALID_ID);
VkCommandBuffer command_buffer = frames[frame].draw_command_buffer;
err = _draw_list_render_pass_begin(framebuffer, p_initial_color_action, p_final_color_action, p_initial_depth_action, p_final_depth_action, p_clear_color_values, p_clear_depth, p_clear_stencil, viewport_offset, viewport_size, vkframebuffer, render_pass, command_buffer, VK_SUBPASS_CONTENTS_INLINE, p_storage_textures);
if (err != OK) {
return INVALID_ID;
}
draw_list_render_pass = render_pass;
draw_list_vkframebuffer = vkframebuffer;
_draw_list_allocate(Rect2i(viewport_offset, viewport_size), 0, 0);
#ifdef DEBUG_ENABLED
draw_list_framebuffer_format = framebuffer->format_id;
#endif
draw_list_current_subpass = 0;
if (needs_clear_color || needs_clear_depth) {
_draw_list_insert_clear_region(draw_list, framebuffer, viewport_offset, viewport_size, needs_clear_color, p_clear_color_values, needs_clear_depth, p_clear_depth, p_clear_stencil);
}
VkViewport viewport;
viewport.x = viewport_offset.x;
viewport.y = viewport_offset.y;
viewport.width = viewport_size.width;
viewport.height = viewport_size.height;
viewport.minDepth = 0;
viewport.maxDepth = 1.0;
vkCmdSetViewport(command_buffer, 0, 1, &viewport);
VkRect2D scissor;
scissor.offset.x = viewport_offset.x;
scissor.offset.y = viewport_offset.y;
scissor.extent.width = viewport_size.width;
scissor.extent.height = viewport_size.height;
vkCmdSetScissor(command_buffer, 0, 1, &scissor);
return int64_t(ID_TYPE_DRAW_LIST) << ID_BASE_SHIFT;
}
Error RenderingDeviceVulkan::draw_list_begin_split(RID p_framebuffer, uint32_t p_splits, DrawListID *r_split_ids, InitialAction p_initial_color_action, FinalAction p_final_color_action, InitialAction p_initial_depth_action, FinalAction p_final_depth_action, const Vector<Color> &p_clear_color_values, float p_clear_depth, uint32_t p_clear_stencil, const Rect2 &p_region, const Vector<RID> &p_storage_textures) {
_THREAD_SAFE_METHOD_
ERR_FAIL_COND_V_MSG(draw_list != nullptr, ERR_BUSY, "Only one draw list can be active at the same time.");
ERR_FAIL_COND_V_MSG(compute_list != nullptr && !compute_list->state.allow_draw_overlap, ERR_BUSY, "Only one draw/compute list can be active at the same time.");
ERR_FAIL_COND_V(p_splits < 1, ERR_INVALID_DECLARATION);
Framebuffer *framebuffer = framebuffer_owner.get_or_null(p_framebuffer);
ERR_FAIL_COND_V(!framebuffer, ERR_INVALID_DECLARATION);
Point2i viewport_offset;
Point2i viewport_size = framebuffer->size;
bool needs_clear_color = false;
bool needs_clear_depth = false;
if (p_region != Rect2() && p_region != Rect2(Vector2(), viewport_size)) { // Check custom region.
Rect2i viewport(viewport_offset, viewport_size);
Rect2i regioni = p_region;
if (!(regioni.position.x >= viewport.position.x) && (regioni.position.y >= viewport.position.y) &&
((regioni.position.x + regioni.size.x) <= (viewport.position.x + viewport.size.x)) &&
((regioni.position.y + regioni.size.y) <= (viewport.position.y + viewport.size.y))) {
ERR_FAIL_V_MSG(ERR_INVALID_PARAMETER, "When supplying a custom region, it must be contained within the framebuffer rectangle");
}
viewport_offset = regioni.position;
viewport_size = regioni.size;
if (p_initial_color_action == INITIAL_ACTION_CLEAR_REGION) {
needs_clear_color = true;
p_initial_color_action = INITIAL_ACTION_KEEP;
}
if (p_initial_depth_action == INITIAL_ACTION_CLEAR_REGION) {
needs_clear_depth = true;
p_initial_depth_action = INITIAL_ACTION_KEEP;
}
}
if (p_initial_color_action == INITIAL_ACTION_CLEAR || needs_clear_color) { // Check clear values.
int color_count = 0;
for (int i = 0; i < framebuffer->texture_ids.size(); i++) {
Texture *texture = texture_owner.get_or_null(framebuffer->texture_ids[i]);
if (!texture || !(texture->usage_flags & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)) {
color_count++;
}
}
ERR_FAIL_COND_V_MSG(p_clear_color_values.size() != color_count, ERR_INVALID_PARAMETER,
"Clear color values supplied (" + itos(p_clear_color_values.size()) + ") differ from the amount required for framebuffer (" + itos(color_count) + ").");
}
VkFramebuffer vkframebuffer;
VkRenderPass render_pass;
Error err = _draw_list_setup_framebuffer(framebuffer, p_initial_color_action, p_final_color_action, p_initial_depth_action, p_final_depth_action, &vkframebuffer, &render_pass, &draw_list_subpass_count);
ERR_FAIL_COND_V(err != OK, ERR_CANT_CREATE);
VkCommandBuffer frame_command_buffer = frames[frame].draw_command_buffer;
err = _draw_list_render_pass_begin(framebuffer, p_initial_color_action, p_final_color_action, p_initial_depth_action, p_final_depth_action, p_clear_color_values, p_clear_depth, p_clear_stencil, viewport_offset, viewport_size, vkframebuffer, render_pass, frame_command_buffer, VK_SUBPASS_CONTENTS_SECONDARY_COMMAND_BUFFERS, p_storage_textures);
if (err != OK) {
return ERR_CANT_CREATE;
}
draw_list_current_subpass = 0;
#ifdef DEBUG_ENABLED
draw_list_framebuffer_format = framebuffer->format_id;
#endif
draw_list_render_pass = render_pass;
draw_list_vkframebuffer = vkframebuffer;
err = _draw_list_allocate(Rect2i(viewport_offset, viewport_size), p_splits, 0);
if (err != OK) {
return err;
}
if (needs_clear_color || needs_clear_depth) {
_draw_list_insert_clear_region(&draw_list[0], framebuffer, viewport_offset, viewport_size, needs_clear_color, p_clear_color_values, needs_clear_depth, p_clear_depth, p_clear_stencil);
}
for (uint32_t i = 0; i < p_splits; i++) {
VkViewport viewport;
viewport.x = viewport_offset.x;
viewport.y = viewport_offset.y;
viewport.width = viewport_size.width;
viewport.height = viewport_size.height;
viewport.minDepth = 0;
viewport.maxDepth = 1.0;
vkCmdSetViewport(draw_list[i].command_buffer, 0, 1, &viewport);
VkRect2D scissor;
scissor.offset.x = viewport_offset.x;
scissor.offset.y = viewport_offset.y;
scissor.extent.width = viewport_size.width;
scissor.extent.height = viewport_size.height;
vkCmdSetScissor(draw_list[i].command_buffer, 0, 1, &scissor);
r_split_ids[i] = (int64_t(ID_TYPE_SPLIT_DRAW_LIST) << ID_BASE_SHIFT) + i;
}
return OK;
}
RenderingDeviceVulkan::DrawList *RenderingDeviceVulkan::_get_draw_list_ptr(DrawListID p_id) {
if (p_id < 0) {
return nullptr;
}
if (!draw_list) {
return nullptr;
} else if (p_id == (int64_t(ID_TYPE_DRAW_LIST) << ID_BASE_SHIFT)) {
if (draw_list_split) {
return nullptr;
}
return draw_list;
} else if (p_id >> DrawListID(ID_BASE_SHIFT) == ID_TYPE_SPLIT_DRAW_LIST) {
if (!draw_list_split) {
return nullptr;
}
uint64_t index = p_id & ((DrawListID(1) << DrawListID(ID_BASE_SHIFT)) - 1); // Mask.
if (index >= draw_list_count) {
return nullptr;
}
return &draw_list[index];
} else {
return nullptr;
}
}
void RenderingDeviceVulkan::draw_list_set_blend_constants(DrawListID p_list, const Color &p_color) {
DrawList *dl = _get_draw_list_ptr(p_list);
ERR_FAIL_COND(!dl);
#ifdef DEBUG_ENABLED
ERR_FAIL_COND_MSG(!dl->validation.active, "Submitted Draw Lists can no longer be modified.");
#endif
vkCmdSetBlendConstants(dl->command_buffer, p_color.components);
}
void RenderingDeviceVulkan::draw_list_bind_render_pipeline(DrawListID p_list, RID p_render_pipeline) {
DrawList *dl = _get_draw_list_ptr(p_list);
ERR_FAIL_COND(!dl);
#ifdef DEBUG_ENABLED
ERR_FAIL_COND_MSG(!dl->validation.active, "Submitted Draw Lists can no longer be modified.");
#endif
const RenderPipeline *pipeline = render_pipeline_owner.get_or_null(p_render_pipeline);
ERR_FAIL_COND(!pipeline);
#ifdef DEBUG_ENABLED
ERR_FAIL_COND(pipeline->validation.framebuffer_format != draw_list_framebuffer_format && pipeline->validation.render_pass != draw_list_current_subpass);
#endif
if (p_render_pipeline == dl->state.pipeline) {
return; // Redundant state, return.
}
dl->state.pipeline = p_render_pipeline;
dl->state.pipeline_layout = pipeline->pipeline_layout;
vkCmdBindPipeline(dl->command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline->pipeline);
if (dl->state.pipeline_shader != pipeline->shader) {
// Shader changed, so descriptor sets may become incompatible.
// Go through ALL sets, and unbind them (and all those above) if the format is different.
uint32_t pcount = pipeline->set_formats.size(); // Formats count in this pipeline.
dl->state.set_count = MAX(dl->state.set_count, pcount);
const uint32_t *pformats = pipeline->set_formats.ptr(); // Pipeline set formats.
bool sets_valid = true; // Once invalid, all above become invalid.
for (uint32_t i = 0; i < pcount; i++) {
// If a part of the format is different, invalidate it (and the rest).
if (!sets_valid || dl->state.sets[i].pipeline_expected_format != pformats[i]) {
dl->state.sets[i].bound = false;
dl->state.sets[i].pipeline_expected_format = pformats[i];
sets_valid = false;
}
}
for (uint32_t i = pcount; i < dl->state.set_count; i++) {
// Unbind the ones above (not used) if exist.
dl->state.sets[i].bound = false;
}
dl->state.set_count = pcount; // Update set count.
if (pipeline->push_constant_size) {
dl->state.pipeline_push_constant_stages = pipeline->push_constant_stages_mask;
#ifdef DEBUG_ENABLED
dl->validation.pipeline_push_constant_supplied = false;
#endif
}
dl->state.pipeline_shader = pipeline->shader;
}
#ifdef DEBUG_ENABLED
// Update render pass pipeline info.
dl->validation.pipeline_active = true;
dl->validation.pipeline_dynamic_state = pipeline->validation.dynamic_state;
dl->validation.pipeline_vertex_format = pipeline->validation.vertex_format;
dl->validation.pipeline_uses_restart_indices = pipeline->validation.uses_restart_indices;
dl->validation.pipeline_primitive_divisor = pipeline->validation.primitive_divisor;
dl->validation.pipeline_primitive_minimum = pipeline->validation.primitive_minimum;
dl->validation.pipeline_push_constant_size = pipeline->push_constant_size;
#endif
}
void RenderingDeviceVulkan::draw_list_bind_uniform_set(DrawListID p_list, RID p_uniform_set, uint32_t p_index) {
#ifdef DEBUG_ENABLED
ERR_FAIL_COND_MSG(p_index >= limits.maxBoundDescriptorSets || p_index >= MAX_UNIFORM_SETS,
"Attempting to bind a descriptor set (" + itos(p_index) + ") greater than what the hardware supports (" + itos(limits.maxBoundDescriptorSets) + ").");
#endif
DrawList *dl = _get_draw_list_ptr(p_list);
ERR_FAIL_COND(!dl);
#ifdef DEBUG_ENABLED
ERR_FAIL_COND_MSG(!dl->validation.active, "Submitted Draw Lists can no longer be modified.");
#endif
const UniformSet *uniform_set = uniform_set_owner.get_or_null(p_uniform_set);
ERR_FAIL_COND(!uniform_set);
if (p_index > dl->state.set_count) {
dl->state.set_count = p_index;
}
dl->state.sets[p_index].descriptor_set = uniform_set->descriptor_set; // Update set pointer.
dl->state.sets[p_index].bound = false; // Needs rebind.
dl->state.sets[p_index].uniform_set_format = uniform_set->format;
dl->state.sets[p_index].uniform_set = p_uniform_set;
uint32_t mst_count = uniform_set->mutable_storage_textures.size();
if (mst_count) {
Texture **mst_textures = const_cast<UniformSet *>(uniform_set)->mutable_storage_textures.ptrw();
for (uint32_t i = 0; i < mst_count; i++) {
if (mst_textures[i]->used_in_frame != frames_drawn) {
mst_textures[i]->used_in_frame = frames_drawn;
mst_textures[i]->used_in_transfer = false;
mst_textures[i]->used_in_compute = false;
}
mst_textures[i]->used_in_raster = true;
}
}
#ifdef DEBUG_ENABLED
{ // Validate that textures bound are not attached as framebuffer bindings.
uint32_t attachable_count = uniform_set->attachable_textures.size();
const UniformSet::AttachableTexture *attachable_ptr = uniform_set->attachable_textures.ptr();
uint32_t bound_count = draw_list_bound_textures.size();
const RID *bound_ptr = draw_list_bound_textures.ptr();
for (uint32_t i = 0; i < attachable_count; i++) {
for (uint32_t j = 0; j < bound_count; j++) {
ERR_FAIL_COND_MSG(attachable_ptr[i].texture == bound_ptr[j],
"Attempted to use the same texture in framebuffer attachment and a uniform (set: " + itos(p_index) + ", binding: " + itos(attachable_ptr[i].bind) + "), this is not allowed.");
}
}
}
#endif
}
void RenderingDeviceVulkan::draw_list_bind_vertex_array(DrawListID p_list, RID p_vertex_array) {
DrawList *dl = _get_draw_list_ptr(p_list);
ERR_FAIL_COND(!dl);
#ifdef DEBUG_ENABLED
ERR_FAIL_COND_MSG(!dl->validation.active, "Submitted Draw Lists can no longer be modified.");
#endif
const VertexArray *vertex_array = vertex_array_owner.get_or_null(p_vertex_array);
ERR_FAIL_COND(!vertex_array);
if (dl->state.vertex_array == p_vertex_array) {
return; // Already set.
}
dl->state.vertex_array = p_vertex_array;
#ifdef DEBUG_ENABLED
dl->validation.vertex_format = vertex_array->description;
dl->validation.vertex_max_instances_allowed = vertex_array->max_instances_allowed;
#endif
dl->validation.vertex_array_size = vertex_array->vertex_count;
vkCmdBindVertexBuffers(dl->command_buffer, 0, vertex_array->buffers.size(), vertex_array->buffers.ptr(), vertex_array->offsets.ptr());
}
void RenderingDeviceVulkan::draw_list_bind_index_array(DrawListID p_list, RID p_index_array) {
DrawList *dl = _get_draw_list_ptr(p_list);
ERR_FAIL_COND(!dl);
#ifdef DEBUG_ENABLED
ERR_FAIL_COND_MSG(!dl->validation.active, "Submitted Draw Lists can no longer be modified.");
#endif
const IndexArray *index_array = index_array_owner.get_or_null(p_index_array);
ERR_FAIL_COND(!index_array);
if (dl->state.index_array == p_index_array) {
return; // Already set.
}
dl->state.index_array = p_index_array;
#ifdef DEBUG_ENABLED
dl->validation.index_array_max_index = index_array->max_index;
#endif
dl->validation.index_array_size = index_array->indices;
dl->validation.index_array_offset = index_array->offset;
vkCmdBindIndexBuffer(dl->command_buffer, index_array->buffer, 0, index_array->index_type);
}
void RenderingDeviceVulkan::draw_list_set_line_width(DrawListID p_list, float p_width) {
DrawList *dl = _get_draw_list_ptr(p_list);
ERR_FAIL_COND(!dl);
#ifdef DEBUG_ENABLED
ERR_FAIL_COND_MSG(!dl->validation.active, "Submitted Draw Lists can no longer be modified.");
#endif
vkCmdSetLineWidth(dl->command_buffer, p_width);
}
void RenderingDeviceVulkan::draw_list_set_push_constant(DrawListID p_list, const void *p_data, uint32_t p_data_size) {
DrawList *dl = _get_draw_list_ptr(p_list);
ERR_FAIL_COND(!dl);
#ifdef DEBUG_ENABLED
ERR_FAIL_COND_MSG(!dl->validation.active, "Submitted Draw Lists can no longer be modified.");
#endif
#ifdef DEBUG_ENABLED
ERR_FAIL_COND_MSG(p_data_size != dl->validation.pipeline_push_constant_size,
"This render pipeline requires (" + itos(dl->validation.pipeline_push_constant_size) + ") bytes of push constant data, supplied: (" + itos(p_data_size) + ")");
#endif
vkCmdPushConstants(dl->command_buffer, dl->state.pipeline_layout, dl->state.pipeline_push_constant_stages, 0, p_data_size, p_data);
#ifdef DEBUG_ENABLED
dl->validation.pipeline_push_constant_supplied = true;
#endif
}
void RenderingDeviceVulkan::draw_list_draw(DrawListID p_list, bool p_use_indices, uint32_t p_instances, uint32_t p_procedural_vertices) {
DrawList *dl = _get_draw_list_ptr(p_list);
ERR_FAIL_COND(!dl);
#ifdef DEBUG_ENABLED
ERR_FAIL_COND_MSG(!dl->validation.active, "Submitted Draw Lists can no longer be modified.");
#endif
#ifdef DEBUG_ENABLED
ERR_FAIL_COND_MSG(!dl->validation.pipeline_active,
"No render pipeline was set before attempting to draw.");
if (dl->validation.pipeline_vertex_format != INVALID_ID) {
// Pipeline uses vertices, validate format.
ERR_FAIL_COND_MSG(dl->validation.vertex_format == INVALID_ID,
"No vertex array was bound, and render pipeline expects vertices.");
// Make sure format is right.
ERR_FAIL_COND_MSG(dl->validation.pipeline_vertex_format != dl->validation.vertex_format,
"The vertex format used to create the pipeline does not match the vertex format bound.");
// Make sure number of instances is valid.
ERR_FAIL_COND_MSG(p_instances > dl->validation.vertex_max_instances_allowed,
"Number of instances requested (" + itos(p_instances) + " is larger than the maximum number supported by the bound vertex array (" + itos(dl->validation.vertex_max_instances_allowed) + ").");
}
if (dl->validation.pipeline_push_constant_size > 0) {
// Using push constants, check that they were supplied.
ERR_FAIL_COND_MSG(!dl->validation.pipeline_push_constant_supplied,
"The shader in this pipeline requires a push constant to be set before drawing, but it's not present.");
}
#endif
// Bind descriptor sets.
for (uint32_t i = 0; i < dl->state.set_count; i++) {
if (dl->state.sets[i].pipeline_expected_format == 0) {
continue; // Nothing expected by this pipeline.
}
#ifdef DEBUG_ENABLED
if (dl->state.sets[i].pipeline_expected_format != dl->state.sets[i].uniform_set_format) {
if (dl->state.sets[i].uniform_set_format == 0) {
ERR_FAIL_MSG("Uniforms were never supplied for set (" + itos(i) + ") at the time of drawing, which are required by the pipeline");
} else if (uniform_set_owner.owns(dl->state.sets[i].uniform_set)) {
UniformSet *us = uniform_set_owner.get_or_null(dl->state.sets[i].uniform_set);
ERR_FAIL_MSG("Uniforms supplied for set (" + itos(i) + "):\n" + _shader_uniform_debug(us->shader_id, us->shader_set) + "\nare not the same format as required by the pipeline shader. Pipeline shader requires the following bindings:\n" + _shader_uniform_debug(dl->state.pipeline_shader));
} else {
ERR_FAIL_MSG("Uniforms supplied for set (" + itos(i) + ", which was was just freed) are not the same format as required by the pipeline shader. Pipeline shader requires the following bindings:\n" + _shader_uniform_debug(dl->state.pipeline_shader));
}
}
#endif
if (!dl->state.sets[i].bound) {
// All good, see if this requires re-binding.
vkCmdBindDescriptorSets(dl->command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, dl->state.pipeline_layout, i, 1, &dl->state.sets[i].descriptor_set, 0, nullptr);
dl->state.sets[i].bound = true;
}
}
if (p_use_indices) {
#ifdef DEBUG_ENABLED
ERR_FAIL_COND_MSG(p_procedural_vertices > 0,
"Procedural vertices can't be used together with indices.");
ERR_FAIL_COND_MSG(!dl->validation.index_array_size,
"Draw command requested indices, but no index buffer was set.");
ERR_FAIL_COND_MSG(dl->validation.pipeline_uses_restart_indices != dl->validation.index_buffer_uses_restart_indices,
"The usage of restart indices in index buffer does not match the render primitive in the pipeline.");
#endif
uint32_t to_draw = dl->validation.index_array_size;
#ifdef DEBUG_ENABLED
ERR_FAIL_COND_MSG(to_draw < dl->validation.pipeline_primitive_minimum,
"Too few indices (" + itos(to_draw) + ") for the render primitive set in the render pipeline (" + itos(dl->validation.pipeline_primitive_minimum) + ").");
ERR_FAIL_COND_MSG((to_draw % dl->validation.pipeline_primitive_divisor) != 0,
"Index amount (" + itos(to_draw) + ") must be a multiple of the amount of indices required by the render primitive (" + itos(dl->validation.pipeline_primitive_divisor) + ").");
#endif
vkCmdDrawIndexed(dl->command_buffer, to_draw, p_instances, dl->validation.index_array_offset, 0, 0);
} else {
uint32_t to_draw;
if (p_procedural_vertices > 0) {
#ifdef DEBUG_ENABLED
ERR_FAIL_COND_MSG(dl->validation.pipeline_vertex_format != INVALID_ID,
"Procedural vertices requested, but pipeline expects a vertex array.");
#endif
to_draw = p_procedural_vertices;
} else {
#ifdef DEBUG_ENABLED
ERR_FAIL_COND_MSG(dl->validation.pipeline_vertex_format == INVALID_ID,
"Draw command lacks indices, but pipeline format does not use vertices.");
#endif
to_draw = dl->validation.vertex_array_size;
}
#ifdef DEBUG_ENABLED
ERR_FAIL_COND_MSG(to_draw < dl->validation.pipeline_primitive_minimum,
"Too few vertices (" + itos(to_draw) + ") for the render primitive set in the render pipeline (" + itos(dl->validation.pipeline_primitive_minimum) + ").");
ERR_FAIL_COND_MSG((to_draw % dl->validation.pipeline_primitive_divisor) != 0,
"Vertex amount (" + itos(to_draw) + ") must be a multiple of the amount of vertices required by the render primitive (" + itos(dl->validation.pipeline_primitive_divisor) + ").");
#endif
vkCmdDraw(dl->command_buffer, to_draw, p_instances, 0, 0);
}
}
void RenderingDeviceVulkan::draw_list_enable_scissor(DrawListID p_list, const Rect2 &p_rect) {
DrawList *dl = _get_draw_list_ptr(p_list);
ERR_FAIL_COND(!dl);
#ifdef DEBUG_ENABLED
ERR_FAIL_COND_MSG(!dl->validation.active, "Submitted Draw Lists can no longer be modified.");
#endif
Rect2i rect = p_rect;
rect.position += dl->viewport.position;
rect = dl->viewport.intersection(rect);
if (rect.get_area() == 0) {
return;
}
VkRect2D scissor;
scissor.offset.x = rect.position.x;
scissor.offset.y = rect.position.y;
scissor.extent.width = rect.size.width;
scissor.extent.height = rect.size.height;
vkCmdSetScissor(dl->command_buffer, 0, 1, &scissor);
}
void RenderingDeviceVulkan::draw_list_disable_scissor(DrawListID p_list) {
DrawList *dl = _get_draw_list_ptr(p_list);
ERR_FAIL_COND(!dl);
#ifdef DEBUG_ENABLED
ERR_FAIL_COND_MSG(!dl->validation.active, "Submitted Draw Lists can no longer be modified.");
#endif
VkRect2D scissor;
scissor.offset.x = dl->viewport.position.x;
scissor.offset.y = dl->viewport.position.y;
scissor.extent.width = dl->viewport.size.width;
scissor.extent.height = dl->viewport.size.height;
vkCmdSetScissor(dl->command_buffer, 0, 1, &scissor);
}
uint32_t RenderingDeviceVulkan::draw_list_get_current_pass() {
return draw_list_current_subpass;
}
RenderingDevice::DrawListID RenderingDeviceVulkan::draw_list_switch_to_next_pass() {
_THREAD_SAFE_METHOD_
ERR_FAIL_COND_V(draw_list == nullptr, INVALID_ID);
ERR_FAIL_COND_V(draw_list_current_subpass >= draw_list_subpass_count - 1, INVALID_FORMAT_ID);
draw_list_current_subpass++;
Rect2i viewport;
_draw_list_free(&viewport);
vkCmdNextSubpass(frames[frame].draw_command_buffer, VK_SUBPASS_CONTENTS_INLINE);
_draw_list_allocate(viewport, 0, draw_list_current_subpass);
return int64_t(ID_TYPE_DRAW_LIST) << ID_BASE_SHIFT;
}
Error RenderingDeviceVulkan::draw_list_switch_to_next_pass_split(uint32_t p_splits, DrawListID *r_split_ids) {
_THREAD_SAFE_METHOD_
ERR_FAIL_COND_V(draw_list == nullptr, ERR_INVALID_PARAMETER);
ERR_FAIL_COND_V(draw_list_current_subpass >= draw_list_subpass_count - 1, ERR_INVALID_PARAMETER);
draw_list_current_subpass++;
Rect2i viewport;
_draw_list_free(&viewport);
vkCmdNextSubpass(frames[frame].draw_command_buffer, VK_SUBPASS_CONTENTS_INLINE);
_draw_list_allocate(viewport, p_splits, draw_list_current_subpass);
for (uint32_t i = 0; i < p_splits; i++) {
r_split_ids[i] = (int64_t(ID_TYPE_SPLIT_DRAW_LIST) << ID_BASE_SHIFT) + i;
}
return OK;
}
Error RenderingDeviceVulkan::_draw_list_allocate(const Rect2i &p_viewport, uint32_t p_splits, uint32_t p_subpass) {
// Lock while draw_list is active.
_THREAD_SAFE_LOCK_
if (p_splits == 0) {
draw_list = memnew(DrawList);
draw_list->command_buffer = frames[frame].draw_command_buffer;
draw_list->viewport = p_viewport;
draw_list_count = 0;
draw_list_split = false;
} else {
if (p_splits > (uint32_t)split_draw_list_allocators.size()) {
uint32_t from = split_draw_list_allocators.size();
split_draw_list_allocators.resize(p_splits);
for (uint32_t i = from; i < p_splits; i++) {
VkCommandPoolCreateInfo cmd_pool_info;
cmd_pool_info.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
cmd_pool_info.pNext = nullptr;
cmd_pool_info.queueFamilyIndex = context->get_graphics_queue_family_index();
cmd_pool_info.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT;
VkResult res = vkCreateCommandPool(device, &cmd_pool_info, nullptr, &split_draw_list_allocators.write[i].command_pool);
ERR_FAIL_COND_V_MSG(res, ERR_CANT_CREATE, "vkCreateCommandPool failed with error " + itos(res) + ".");
for (int j = 0; j < frame_count; j++) {
VkCommandBuffer command_buffer;
VkCommandBufferAllocateInfo cmdbuf;
// No command buffer exists, create it.
cmdbuf.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
cmdbuf.pNext = nullptr;
cmdbuf.commandPool = split_draw_list_allocators[i].command_pool;
cmdbuf.level = VK_COMMAND_BUFFER_LEVEL_SECONDARY;
cmdbuf.commandBufferCount = 1;
VkResult err = vkAllocateCommandBuffers(device, &cmdbuf, &command_buffer);
ERR_FAIL_COND_V_MSG(err, ERR_CANT_CREATE, "vkAllocateCommandBuffers failed with error " + itos(err) + ".");
split_draw_list_allocators.write[i].command_buffers.push_back(command_buffer);
}
}
}
draw_list = memnew_arr(DrawList, p_splits);
draw_list_count = p_splits;
draw_list_split = true;
for (uint32_t i = 0; i < p_splits; i++) {
// Take a command buffer and initialize it.
VkCommandBuffer command_buffer = split_draw_list_allocators[i].command_buffers[frame];
VkCommandBufferInheritanceInfo inheritance_info;
inheritance_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_INFO;
inheritance_info.pNext = nullptr;
inheritance_info.renderPass = draw_list_render_pass;
inheritance_info.subpass = p_subpass;
inheritance_info.framebuffer = draw_list_vkframebuffer;
inheritance_info.occlusionQueryEnable = false;
inheritance_info.queryFlags = 0; // ?
inheritance_info.pipelineStatistics = 0;
VkCommandBufferBeginInfo cmdbuf_begin;
cmdbuf_begin.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
cmdbuf_begin.pNext = nullptr;
cmdbuf_begin.flags = VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT | VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT;
cmdbuf_begin.pInheritanceInfo = &inheritance_info;
VkResult res = vkResetCommandBuffer(command_buffer, 0);
if (res) {
memdelete_arr(draw_list);
draw_list = nullptr;
ERR_FAIL_V_MSG(ERR_CANT_CREATE, "vkResetCommandBuffer failed with error " + itos(res) + ".");
}
res = vkBeginCommandBuffer(command_buffer, &cmdbuf_begin);
if (res) {
memdelete_arr(draw_list);
draw_list = nullptr;
ERR_FAIL_V_MSG(ERR_CANT_CREATE, "vkBeginCommandBuffer failed with error " + itos(res) + ".");
}
draw_list[i].command_buffer = command_buffer;
draw_list[i].viewport = p_viewport;
}
}
return OK;
}
void RenderingDeviceVulkan::_draw_list_free(Rect2i *r_last_viewport) {
if (draw_list_split) {
// Send all command buffers.
VkCommandBuffer *command_buffers = (VkCommandBuffer *)alloca(sizeof(VkCommandBuffer) * draw_list_count);
for (uint32_t i = 0; i < draw_list_count; i++) {
vkEndCommandBuffer(draw_list[i].command_buffer);
command_buffers[i] = draw_list[i].command_buffer;
if (r_last_viewport) {
if (i == 0 || draw_list[i].viewport_set) {
*r_last_viewport = draw_list[i].viewport;
}
}
}
vkCmdExecuteCommands(frames[frame].draw_command_buffer, draw_list_count, command_buffers);
memdelete_arr(draw_list);
draw_list = nullptr;
} else {
if (r_last_viewport) {
*r_last_viewport = draw_list->viewport;
}
// Just end the list.
memdelete(draw_list);
draw_list = nullptr;
}
// Draw_list is no longer active.
_THREAD_SAFE_UNLOCK_
}
void RenderingDeviceVulkan::draw_list_end(BitField<BarrierMask> p_post_barrier) {
_THREAD_SAFE_METHOD_
ERR_FAIL_COND_MSG(!draw_list, "Immediate draw list is already inactive.");
_draw_list_free();
vkCmdEndRenderPass(frames[frame].draw_command_buffer);
for (int i = 0; i < draw_list_bound_textures.size(); i++) {
Texture *texture = texture_owner.get_or_null(draw_list_bound_textures[i]);
ERR_CONTINUE(!texture); // Wtf.
if (draw_list_unbind_color_textures && (texture->usage_flags & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT)) {
texture->bound = false;
}
if (draw_list_unbind_depth_textures && (texture->usage_flags & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)) {
texture->bound = false;
}
}
uint32_t barrier_flags = 0;
uint32_t access_flags = 0;
if (p_post_barrier.has_flag(BARRIER_MASK_COMPUTE)) {
barrier_flags |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
}
if (p_post_barrier.has_flag(BARRIER_MASK_VERTEX)) {
barrier_flags |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT /*| VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT*/;
access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_INDEX_READ_BIT | VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT /*| VK_ACCESS_INDIRECT_COMMAND_READ_BIT*/;
}
if (p_post_barrier.has_flag(BARRIER_MASK_FRAGMENT)) {
barrier_flags |= VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT /*| VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT*/;
access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT /*| VK_ACCESS_INDIRECT_COMMAND_READ_BIT*/;
}
if (p_post_barrier.has_flag(BARRIER_MASK_TRANSFER)) {
barrier_flags |= VK_PIPELINE_STAGE_TRANSFER_BIT;
access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT | VK_ACCESS_TRANSFER_READ_BIT;
}
if (barrier_flags == 0) {
barrier_flags = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
}
draw_list_bound_textures.clear();
VkImageMemoryBarrier *image_barriers = nullptr;
uint32_t image_barrier_count = draw_list_storage_textures.size();
if (image_barrier_count) {
image_barriers = (VkImageMemoryBarrier *)alloca(sizeof(VkImageMemoryBarrier) * draw_list_storage_textures.size());
}
uint32_t src_stage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
uint32_t src_access = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
if (image_barrier_count) {
src_stage |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
src_access |= VK_ACCESS_SHADER_WRITE_BIT;
}
for (uint32_t i = 0; i < image_barrier_count; i++) {
Texture *texture = texture_owner.get_or_null(draw_list_storage_textures[i]);
VkImageMemoryBarrier &image_memory_barrier = image_barriers[i];
image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
image_memory_barrier.pNext = nullptr;
image_memory_barrier.srcAccessMask = src_access;
image_memory_barrier.dstAccessMask = access_flags;
image_memory_barrier.oldLayout = texture->layout;
image_memory_barrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.image = texture->image;
image_memory_barrier.subresourceRange.aspectMask = texture->read_aspect_mask;
image_memory_barrier.subresourceRange.baseMipLevel = texture->base_mipmap;
image_memory_barrier.subresourceRange.levelCount = texture->mipmaps;
image_memory_barrier.subresourceRange.baseArrayLayer = texture->base_layer;
image_memory_barrier.subresourceRange.layerCount = texture->layers;
texture->layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
}
draw_list_storage_textures.clear();
// To ensure proper synchronization, we must make sure rendering is done before:
// * Some buffer is copied.
// * Another render pass happens (since we may be done).
VkMemoryBarrier mem_barrier;
mem_barrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
mem_barrier.pNext = nullptr;
mem_barrier.srcAccessMask = src_access;
mem_barrier.dstAccessMask = access_flags;
if (image_barrier_count > 0 || p_post_barrier != BARRIER_MASK_NO_BARRIER) {
vkCmdPipelineBarrier(frames[frame].draw_command_buffer, src_stage, barrier_flags, 0, 1, &mem_barrier, 0, nullptr, image_barrier_count, image_barriers);
}
#ifdef FORCE_FULL_BARRIER
_full_barrier(true);
#endif
}
/***********************/
/**** COMPUTE LISTS ****/
/***********************/
RenderingDevice::ComputeListID RenderingDeviceVulkan::compute_list_begin(bool p_allow_draw_overlap) {
_THREAD_SAFE_METHOD_
ERR_FAIL_COND_V_MSG(!p_allow_draw_overlap && draw_list != nullptr, INVALID_ID, "Only one draw list can be active at the same time.");
ERR_FAIL_COND_V_MSG(compute_list != nullptr, INVALID_ID, "Only one draw/compute list can be active at the same time.");
// Lock while compute_list is active.
_THREAD_SAFE_LOCK_
compute_list = memnew(ComputeList);
compute_list->command_buffer = frames[frame].draw_command_buffer;
compute_list->state.allow_draw_overlap = p_allow_draw_overlap;
return ID_TYPE_COMPUTE_LIST;
}
void RenderingDeviceVulkan::compute_list_bind_compute_pipeline(ComputeListID p_list, RID p_compute_pipeline) {
// Must be called within a compute list, the class mutex is locked during that time
ERR_FAIL_COND(p_list != ID_TYPE_COMPUTE_LIST);
ERR_FAIL_COND(!compute_list);
ComputeList *cl = compute_list;
const ComputePipeline *pipeline = compute_pipeline_owner.get_or_null(p_compute_pipeline);
ERR_FAIL_COND(!pipeline);
if (p_compute_pipeline == cl->state.pipeline) {
return; // Redundant state, return.
}
cl->state.pipeline = p_compute_pipeline;
cl->state.pipeline_layout = pipeline->pipeline_layout;
vkCmdBindPipeline(cl->command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline->pipeline);
if (cl->state.pipeline_shader != pipeline->shader) {
// Shader changed, so descriptor sets may become incompatible.
// Go through ALL sets, and unbind them (and all those above) if the format is different.
uint32_t pcount = pipeline->set_formats.size(); // Formats count in this pipeline.
cl->state.set_count = MAX(cl->state.set_count, pcount);
const uint32_t *pformats = pipeline->set_formats.ptr(); // Pipeline set formats.
bool sets_valid = true; // Once invalid, all above become invalid.
for (uint32_t i = 0; i < pcount; i++) {
// If a part of the format is different, invalidate it (and the rest).
if (!sets_valid || cl->state.sets[i].pipeline_expected_format != pformats[i]) {
cl->state.sets[i].bound = false;
cl->state.sets[i].pipeline_expected_format = pformats[i];
sets_valid = false;
}
}
for (uint32_t i = pcount; i < cl->state.set_count; i++) {
// Unbind the ones above (not used) if exist.
cl->state.sets[i].bound = false;
}
cl->state.set_count = pcount; // Update set count.
if (pipeline->push_constant_size) {
cl->state.pipeline_push_constant_stages = pipeline->push_constant_stages_mask;
#ifdef DEBUG_ENABLED
cl->validation.pipeline_push_constant_supplied = false;
#endif
}
cl->state.pipeline_shader = pipeline->shader;
cl->state.local_group_size[0] = pipeline->local_group_size[0];
cl->state.local_group_size[1] = pipeline->local_group_size[1];
cl->state.local_group_size[2] = pipeline->local_group_size[2];
}
#ifdef DEBUG_ENABLED
// Update compute pass pipeline info.
cl->validation.pipeline_active = true;
cl->validation.pipeline_push_constant_size = pipeline->push_constant_size;
#endif
}
void RenderingDeviceVulkan::compute_list_bind_uniform_set(ComputeListID p_list, RID p_uniform_set, uint32_t p_index) {
// Must be called within a compute list, the class mutex is locked during that time
ERR_FAIL_COND(p_list != ID_TYPE_COMPUTE_LIST);
ERR_FAIL_COND(!compute_list);
ComputeList *cl = compute_list;
#ifdef DEBUG_ENABLED
ERR_FAIL_COND_MSG(p_index >= limits.maxBoundDescriptorSets || p_index >= MAX_UNIFORM_SETS,
"Attempting to bind a descriptor set (" + itos(p_index) + ") greater than what the hardware supports (" + itos(limits.maxBoundDescriptorSets) + ").");
#endif
#ifdef DEBUG_ENABLED
ERR_FAIL_COND_MSG(!cl->validation.active, "Submitted Compute Lists can no longer be modified.");
#endif
UniformSet *uniform_set = uniform_set_owner.get_or_null(p_uniform_set);
ERR_FAIL_COND(!uniform_set);
if (p_index > cl->state.set_count) {
cl->state.set_count = p_index;
}
cl->state.sets[p_index].descriptor_set = uniform_set->descriptor_set; // Update set pointer.
cl->state.sets[p_index].bound = false; // Needs rebind.
cl->state.sets[p_index].uniform_set_format = uniform_set->format;
cl->state.sets[p_index].uniform_set = p_uniform_set;
uint32_t textures_to_sampled_count = uniform_set->mutable_sampled_textures.size();
uint32_t textures_to_storage_count = uniform_set->mutable_storage_textures.size();
Texture **textures_to_sampled = uniform_set->mutable_sampled_textures.ptrw();
VkImageMemoryBarrier *texture_barriers = nullptr;
if (textures_to_sampled_count + textures_to_storage_count) {
texture_barriers = (VkImageMemoryBarrier *)alloca(sizeof(VkImageMemoryBarrier) * (textures_to_sampled_count + textures_to_storage_count));
}
uint32_t texture_barrier_count = 0;
uint32_t src_stage_flags = 0;
for (uint32_t i = 0; i < textures_to_sampled_count; i++) {
if (textures_to_sampled[i]->layout != VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL) {
src_stage_flags |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
VkImageMemoryBarrier &image_memory_barrier = texture_barriers[texture_barrier_count++];
image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
image_memory_barrier.pNext = nullptr;
image_memory_barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
image_memory_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
image_memory_barrier.oldLayout = textures_to_sampled[i]->layout;
image_memory_barrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.image = textures_to_sampled[i]->image;
image_memory_barrier.subresourceRange.aspectMask = textures_to_sampled[i]->read_aspect_mask;
image_memory_barrier.subresourceRange.baseMipLevel = textures_to_sampled[i]->base_mipmap;
image_memory_barrier.subresourceRange.levelCount = textures_to_sampled[i]->mipmaps;
image_memory_barrier.subresourceRange.baseArrayLayer = textures_to_sampled[i]->base_layer;
image_memory_barrier.subresourceRange.layerCount = textures_to_sampled[i]->layers;
textures_to_sampled[i]->layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
cl->state.textures_to_sampled_layout.erase(textures_to_sampled[i]);
}
if (textures_to_sampled[i]->used_in_frame != frames_drawn) {
textures_to_sampled[i]->used_in_frame = frames_drawn;
textures_to_sampled[i]->used_in_transfer = false;
textures_to_sampled[i]->used_in_raster = false;
}
textures_to_sampled[i]->used_in_compute = true;
}
Texture **textures_to_storage = uniform_set->mutable_storage_textures.ptrw();
for (uint32_t i = 0; i < textures_to_storage_count; i++) {
if (textures_to_storage[i]->layout != VK_IMAGE_LAYOUT_GENERAL) {
uint32_t src_access_flags = 0;
if (textures_to_storage[i]->used_in_frame == frames_drawn) {
if (textures_to_storage[i]->used_in_compute) {
src_stage_flags |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
src_access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
}
if (textures_to_storage[i]->used_in_raster) {
src_stage_flags |= VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT;
src_access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
}
if (textures_to_storage[i]->used_in_transfer) {
src_stage_flags |= VK_PIPELINE_STAGE_TRANSFER_BIT;
src_access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT | VK_ACCESS_TRANSFER_READ_BIT;
}
textures_to_storage[i]->used_in_compute = false;
textures_to_storage[i]->used_in_raster = false;
textures_to_storage[i]->used_in_transfer = false;
} else {
src_access_flags = 0;
textures_to_storage[i]->used_in_compute = false;
textures_to_storage[i]->used_in_raster = false;
textures_to_storage[i]->used_in_transfer = false;
textures_to_storage[i]->used_in_frame = frames_drawn;
}
VkImageMemoryBarrier &image_memory_barrier = texture_barriers[texture_barrier_count++];
image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
image_memory_barrier.pNext = nullptr;
image_memory_barrier.srcAccessMask = src_access_flags;
image_memory_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
image_memory_barrier.oldLayout = textures_to_storage[i]->layout;
image_memory_barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL;
image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.image = textures_to_storage[i]->image;
image_memory_barrier.subresourceRange.aspectMask = textures_to_storage[i]->read_aspect_mask;
image_memory_barrier.subresourceRange.baseMipLevel = textures_to_storage[i]->base_mipmap;
image_memory_barrier.subresourceRange.levelCount = textures_to_storage[i]->mipmaps;
image_memory_barrier.subresourceRange.baseArrayLayer = textures_to_storage[i]->base_layer;
image_memory_barrier.subresourceRange.layerCount = textures_to_storage[i]->layers;
textures_to_storage[i]->layout = VK_IMAGE_LAYOUT_GENERAL;
cl->state.textures_to_sampled_layout.insert(textures_to_storage[i]); // Needs to go back to sampled layout afterwards.
}
}
if (texture_barrier_count) {
if (src_stage_flags == 0) {
src_stage_flags = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
}
vkCmdPipelineBarrier(cl->command_buffer, src_stage_flags, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, 0, nullptr, texture_barrier_count, texture_barriers);
}
#if 0
{ // Validate that textures bound are not attached as framebuffer bindings.
uint32_t attachable_count = uniform_set->attachable_textures.size();
const RID *attachable_ptr = uniform_set->attachable_textures.ptr();
uint32_t bound_count = draw_list_bound_textures.size();
const RID *bound_ptr = draw_list_bound_textures.ptr();
for (uint32_t i = 0; i < attachable_count; i++) {
for (uint32_t j = 0; j < bound_count; j++) {
ERR_FAIL_COND_MSG(attachable_ptr[i] == bound_ptr[j],
"Attempted to use the same texture in framebuffer attachment and a uniform set, this is not allowed.");
}
}
}
#endif
}
void RenderingDeviceVulkan::compute_list_set_push_constant(ComputeListID p_list, const void *p_data, uint32_t p_data_size) {
ERR_FAIL_COND(p_list != ID_TYPE_COMPUTE_LIST);
ERR_FAIL_COND(!compute_list);
ComputeList *cl = compute_list;
#ifdef DEBUG_ENABLED
ERR_FAIL_COND_MSG(!cl->validation.active, "Submitted Compute Lists can no longer be modified.");
#endif
#ifdef DEBUG_ENABLED
ERR_FAIL_COND_MSG(p_data_size != cl->validation.pipeline_push_constant_size,
"This compute pipeline requires (" + itos(cl->validation.pipeline_push_constant_size) + ") bytes of push constant data, supplied: (" + itos(p_data_size) + ")");
#endif
vkCmdPushConstants(cl->command_buffer, cl->state.pipeline_layout, cl->state.pipeline_push_constant_stages, 0, p_data_size, p_data);
#ifdef DEBUG_ENABLED
cl->validation.pipeline_push_constant_supplied = true;
#endif
}
void RenderingDeviceVulkan::compute_list_dispatch(ComputeListID p_list, uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups) {
// Must be called within a compute list, the class mutex is locked during that time
ERR_FAIL_COND(p_list != ID_TYPE_COMPUTE_LIST);
ERR_FAIL_COND(!compute_list);
ComputeList *cl = compute_list;
#ifdef DEBUG_ENABLED
ERR_FAIL_COND_MSG(p_x_groups == 0, "Dispatch amount of X compute groups (" + itos(p_x_groups) + ") is zero.");
ERR_FAIL_COND_MSG(p_z_groups == 0, "Dispatch amount of Z compute groups (" + itos(p_z_groups) + ") is zero.");
ERR_FAIL_COND_MSG(p_y_groups == 0, "Dispatch amount of Y compute groups (" + itos(p_y_groups) + ") is zero.");
ERR_FAIL_COND_MSG(p_x_groups > limits.maxComputeWorkGroupCount[0],
"Dispatch amount of X compute groups (" + itos(p_x_groups) + ") is larger than device limit (" + itos(limits.maxComputeWorkGroupCount[0]) + ")");
ERR_FAIL_COND_MSG(p_y_groups > limits.maxComputeWorkGroupCount[1],
"Dispatch amount of Y compute groups (" + itos(p_y_groups) + ") is larger than device limit (" + itos(limits.maxComputeWorkGroupCount[1]) + ")");
ERR_FAIL_COND_MSG(p_z_groups > limits.maxComputeWorkGroupCount[2],
"Dispatch amount of Z compute groups (" + itos(p_z_groups) + ") is larger than device limit (" + itos(limits.maxComputeWorkGroupCount[2]) + ")");
ERR_FAIL_COND_MSG(!cl->validation.active, "Submitted Compute Lists can no longer be modified.");
#endif
#ifdef DEBUG_ENABLED
ERR_FAIL_COND_MSG(!cl->validation.pipeline_active, "No compute pipeline was set before attempting to draw.");
if (cl->validation.pipeline_push_constant_size > 0) {
// Using push constants, check that they were supplied.
ERR_FAIL_COND_MSG(!cl->validation.pipeline_push_constant_supplied,
"The shader in this pipeline requires a push constant to be set before drawing, but it's not present.");
}
#endif
// Bind descriptor sets.
for (uint32_t i = 0; i < cl->state.set_count; i++) {
if (cl->state.sets[i].pipeline_expected_format == 0) {
continue; // Nothing expected by this pipeline.
}
#ifdef DEBUG_ENABLED
if (cl->state.sets[i].pipeline_expected_format != cl->state.sets[i].uniform_set_format) {
if (cl->state.sets[i].uniform_set_format == 0) {
ERR_FAIL_MSG("Uniforms were never supplied for set (" + itos(i) + ") at the time of drawing, which are required by the pipeline");
} else if (uniform_set_owner.owns(cl->state.sets[i].uniform_set)) {
UniformSet *us = uniform_set_owner.get_or_null(cl->state.sets[i].uniform_set);
ERR_FAIL_MSG("Uniforms supplied for set (" + itos(i) + "):\n" + _shader_uniform_debug(us->shader_id, us->shader_set) + "\nare not the same format as required by the pipeline shader. Pipeline shader requires the following bindings:\n" + _shader_uniform_debug(cl->state.pipeline_shader));
} else {
ERR_FAIL_MSG("Uniforms supplied for set (" + itos(i) + ", which was was just freed) are not the same format as required by the pipeline shader. Pipeline shader requires the following bindings:\n" + _shader_uniform_debug(cl->state.pipeline_shader));
}
}
#endif
if (!cl->state.sets[i].bound) {
// All good, see if this requires re-binding.
vkCmdBindDescriptorSets(cl->command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, cl->state.pipeline_layout, i, 1, &cl->state.sets[i].descriptor_set, 0, nullptr);
cl->state.sets[i].bound = true;
}
}
vkCmdDispatch(cl->command_buffer, p_x_groups, p_y_groups, p_z_groups);
}
void RenderingDeviceVulkan::compute_list_dispatch_threads(ComputeListID p_list, uint32_t p_x_threads, uint32_t p_y_threads, uint32_t p_z_threads) {
ERR_FAIL_COND(p_list != ID_TYPE_COMPUTE_LIST);
ERR_FAIL_COND(!compute_list);
#ifdef DEBUG_ENABLED
ERR_FAIL_COND_MSG(p_x_threads == 0, "Dispatch amount of X compute threads (" + itos(p_x_threads) + ") is zero.");
ERR_FAIL_COND_MSG(p_y_threads == 0, "Dispatch amount of Y compute threads (" + itos(p_y_threads) + ") is zero.");
ERR_FAIL_COND_MSG(p_z_threads == 0, "Dispatch amount of Z compute threads (" + itos(p_z_threads) + ") is zero.");
#endif
ComputeList *cl = compute_list;
#ifdef DEBUG_ENABLED
ERR_FAIL_COND_MSG(!cl->validation.pipeline_active, "No compute pipeline was set before attempting to draw.");
if (cl->validation.pipeline_push_constant_size > 0) {
// Using push constants, check that they were supplied.
ERR_FAIL_COND_MSG(!cl->validation.pipeline_push_constant_supplied,
"The shader in this pipeline requires a push constant to be set before drawing, but it's not present.");
}
#endif
compute_list_dispatch(p_list, (p_x_threads - 1) / cl->state.local_group_size[0] + 1, (p_y_threads - 1) / cl->state.local_group_size[1] + 1, (p_z_threads - 1) / cl->state.local_group_size[2] + 1);
}
void RenderingDeviceVulkan::compute_list_dispatch_indirect(ComputeListID p_list, RID p_buffer, uint32_t p_offset) {
ERR_FAIL_COND(p_list != ID_TYPE_COMPUTE_LIST);
ERR_FAIL_COND(!compute_list);
ComputeList *cl = compute_list;
Buffer *buffer = storage_buffer_owner.get_or_null(p_buffer);
ERR_FAIL_COND(!buffer);
ERR_FAIL_COND_MSG(!(buffer->usage & VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT), "Buffer provided was not created to do indirect dispatch.");
ERR_FAIL_COND_MSG(p_offset + 12 > buffer->size, "Offset provided (+12) is past the end of buffer.");
#ifdef DEBUG_ENABLED
ERR_FAIL_COND_MSG(!cl->validation.active, "Submitted Compute Lists can no longer be modified.");
#endif
#ifdef DEBUG_ENABLED
ERR_FAIL_COND_MSG(!cl->validation.pipeline_active, "No compute pipeline was set before attempting to draw.");
if (cl->validation.pipeline_push_constant_size > 0) {
// Using push constants, check that they were supplied.
ERR_FAIL_COND_MSG(!cl->validation.pipeline_push_constant_supplied,
"The shader in this pipeline requires a push constant to be set before drawing, but it's not present.");
}
#endif
// Bind descriptor sets.
for (uint32_t i = 0; i < cl->state.set_count; i++) {
if (cl->state.sets[i].pipeline_expected_format == 0) {
continue; // Nothing expected by this pipeline.
}
#ifdef DEBUG_ENABLED
if (cl->state.sets[i].pipeline_expected_format != cl->state.sets[i].uniform_set_format) {
if (cl->state.sets[i].uniform_set_format == 0) {
ERR_FAIL_MSG("Uniforms were never supplied for set (" + itos(i) + ") at the time of drawing, which are required by the pipeline");
} else if (uniform_set_owner.owns(cl->state.sets[i].uniform_set)) {
UniformSet *us = uniform_set_owner.get_or_null(cl->state.sets[i].uniform_set);
ERR_FAIL_MSG("Uniforms supplied for set (" + itos(i) + "):\n" + _shader_uniform_debug(us->shader_id, us->shader_set) + "\nare not the same format as required by the pipeline shader. Pipeline shader requires the following bindings:\n" + _shader_uniform_debug(cl->state.pipeline_shader));
} else {
ERR_FAIL_MSG("Uniforms supplied for set (" + itos(i) + ", which was was just freed) are not the same format as required by the pipeline shader. Pipeline shader requires the following bindings:\n" + _shader_uniform_debug(cl->state.pipeline_shader));
}
}
#endif
if (!cl->state.sets[i].bound) {
// All good, see if this requires re-binding.
vkCmdBindDescriptorSets(cl->command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, cl->state.pipeline_layout, i, 1, &cl->state.sets[i].descriptor_set, 0, nullptr);
cl->state.sets[i].bound = true;
}
}
vkCmdDispatchIndirect(cl->command_buffer, buffer->buffer, p_offset);
}
void RenderingDeviceVulkan::compute_list_add_barrier(ComputeListID p_list) {
// Must be called within a compute list, the class mutex is locked during that time
uint32_t barrier_flags = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
uint32_t access_flags = VK_ACCESS_SHADER_READ_BIT;
_compute_list_add_barrier(BARRIER_MASK_COMPUTE, barrier_flags, access_flags);
}
void RenderingDeviceVulkan::_compute_list_add_barrier(BitField<BarrierMask> p_post_barrier, uint32_t p_barrier_flags, uint32_t p_access_flags) {
ERR_FAIL_COND(!compute_list);
VkImageMemoryBarrier *image_barriers = nullptr;
uint32_t image_barrier_count = compute_list->state.textures_to_sampled_layout.size();
if (image_barrier_count) {
image_barriers = (VkImageMemoryBarrier *)alloca(sizeof(VkImageMemoryBarrier) * image_barrier_count);
}
image_barrier_count = 0; // We'll count how many we end up issuing.
for (Texture *E : compute_list->state.textures_to_sampled_layout) {
if (E->layout != VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL) {
VkImageMemoryBarrier &image_memory_barrier = image_barriers[image_barrier_count++];
image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
image_memory_barrier.pNext = nullptr;
image_memory_barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
image_memory_barrier.dstAccessMask = p_access_flags;
image_memory_barrier.oldLayout = E->layout;
image_memory_barrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_memory_barrier.image = E->image;
image_memory_barrier.subresourceRange.aspectMask = E->read_aspect_mask;
image_memory_barrier.subresourceRange.baseMipLevel = E->base_mipmap;
image_memory_barrier.subresourceRange.levelCount = E->mipmaps;
image_memory_barrier.subresourceRange.baseArrayLayer = E->base_layer;
image_memory_barrier.subresourceRange.layerCount = E->layers;
E->layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
}
if (E->used_in_frame != frames_drawn) {
E->used_in_transfer = false;
E->used_in_raster = false;
E->used_in_compute = false;
E->used_in_frame = frames_drawn;
}
}
if (p_barrier_flags) {
VkMemoryBarrier mem_barrier;
mem_barrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
mem_barrier.pNext = nullptr;
mem_barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
mem_barrier.dstAccessMask = p_access_flags;
vkCmdPipelineBarrier(compute_list->command_buffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, p_barrier_flags, 0, 1, &mem_barrier, 0, nullptr, image_barrier_count, image_barriers);
} else if (image_barrier_count) {
vkCmdPipelineBarrier(compute_list->command_buffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, 0, 0, nullptr, 0, nullptr, image_barrier_count, image_barriers);
}
#ifdef FORCE_FULL_BARRIER
_full_barrier(true);
#endif
}
void RenderingDeviceVulkan::compute_list_end(BitField<BarrierMask> p_post_barrier) {
ERR_FAIL_COND(!compute_list);
uint32_t barrier_flags = 0;
uint32_t access_flags = 0;
if (p_post_barrier.has_flag(BARRIER_MASK_COMPUTE)) {
barrier_flags |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
}
if (p_post_barrier.has_flag(BARRIER_MASK_VERTEX)) {
barrier_flags |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT;
access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_INDEX_READ_BIT | VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_INDIRECT_COMMAND_READ_BIT;
}
if (p_post_barrier.has_flag(BARRIER_MASK_FRAGMENT)) {
barrier_flags |= VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT;
access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_INDIRECT_COMMAND_READ_BIT;
}
if (p_post_barrier.has_flag(BARRIER_MASK_TRANSFER)) {
barrier_flags |= VK_PIPELINE_STAGE_TRANSFER_BIT;
access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT | VK_ACCESS_TRANSFER_READ_BIT;
}
_compute_list_add_barrier(p_post_barrier, barrier_flags, access_flags);
memdelete(compute_list);
compute_list = nullptr;
// Compute_list is no longer active.
_THREAD_SAFE_UNLOCK_
}
void RenderingDeviceVulkan::barrier(BitField<BarrierMask> p_from, BitField<BarrierMask> p_to) {
uint32_t src_barrier_flags = 0;
uint32_t src_access_flags = 0;
if (p_from == 0) {
src_barrier_flags = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
} else {
if (p_from.has_flag(BARRIER_MASK_COMPUTE)) {
src_barrier_flags |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
src_access_flags |= VK_ACCESS_SHADER_WRITE_BIT;
}
if (p_from.has_flag(BARRIER_MASK_FRAGMENT)) {
src_barrier_flags |= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
src_access_flags |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
}
if (p_from.has_flag(BARRIER_MASK_TRANSFER)) {
src_barrier_flags |= VK_PIPELINE_STAGE_TRANSFER_BIT;
src_access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT;
}
}
uint32_t dst_barrier_flags = 0;
uint32_t dst_access_flags = 0;
if (p_to == 0) {
dst_barrier_flags = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
} else {
if (p_to.has_flag(BARRIER_MASK_COMPUTE)) {
dst_barrier_flags |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
dst_access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
}
if (p_to.has_flag(BARRIER_MASK_VERTEX)) {
dst_barrier_flags |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT;
dst_access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_INDEX_READ_BIT | VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_INDIRECT_COMMAND_READ_BIT;
}
if (p_to.has_flag(BARRIER_MASK_FRAGMENT)) {
dst_barrier_flags |= VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT;
dst_access_flags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_INDIRECT_COMMAND_READ_BIT;
}
if (p_to.has_flag(BARRIER_MASK_TRANSFER)) {
dst_barrier_flags |= VK_PIPELINE_STAGE_TRANSFER_BIT;
dst_access_flags |= VK_ACCESS_TRANSFER_WRITE_BIT | VK_ACCESS_TRANSFER_READ_BIT;
}
}
_memory_barrier(src_barrier_flags, dst_barrier_flags, src_access_flags, dst_access_flags, true);
}
void RenderingDeviceVulkan::full_barrier() {
#ifndef DEBUG_ENABLED
ERR_PRINT("Full barrier is debug-only, should not be used in production");
#endif
_full_barrier(true);
}
#if 0
void RenderingDeviceVulkan::draw_list_render_secondary_to_framebuffer(ID p_framebuffer, ID *p_draw_lists, uint32_t p_draw_list_count, InitialAction p_initial_action, FinalAction p_final_action, const Vector<Variant> &p_clear_colors) {
VkCommandBuffer frame_cmdbuf = frames[frame].frame_buffer;
ERR_FAIL_COND(!frame_cmdbuf);
VkRenderPassBeginInfo render_pass_begin;
render_pass_begin.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO;
render_pass_begin.pNext = nullptr;
render_pass_begin.renderPass = context->get_render_pass();
render_pass_begin.framebuffer = context->get_frame_framebuffer(frame);
render_pass_begin.renderArea.extent.width = context->get_screen_width(p_screen);
render_pass_begin.renderArea.extent.height = context->get_screen_height(p_screen);
render_pass_begin.renderArea.offset.x = 0;
render_pass_begin.renderArea.offset.y = 0;
render_pass_begin.clearValueCount = 1;
VkClearValue clear_value;
clear_value.color.float32[0] = p_clear_color.r;
clear_value.color.float32[1] = p_clear_color.g;
clear_value.color.float32[2] = p_clear_color.b;
clear_value.color.float32[3] = p_clear_color.a;
render_pass_begin.pClearValues = &clear_value;
vkCmdBeginRenderPass(frame_cmdbuf, &render_pass_begin, VK_SUBPASS_CONTENTS_SECONDARY_COMMAND_BUFFERS);
ID screen_format = screen_get_framebuffer_format();
{
VkCommandBuffer *command_buffers = (VkCommandBuffer *)alloca(sizeof(VkCommandBuffer) * p_draw_list_count);
uint32_t command_buffer_count = 0;
for (uint32_t i = 0; i < p_draw_list_count; i++) {
DrawList *dl = _get_draw_list_ptr(p_draw_lists[i]);
ERR_CONTINUE_MSG(!dl, "Draw list index (" + itos(i) + ") is not a valid draw list ID.");
ERR_CONTINUE_MSG(dl->validation.framebuffer_format != p_format_check,
"Draw list index (" + itos(i) + ") is created with a framebuffer format incompatible with this render pass.");
if (dl->validation.active) {
// Needs to be closed, so close it.
vkEndCommandBuffer(dl->command_buffer);
dl->validation.active = false;
}
command_buffers[command_buffer_count++] = dl->command_buffer;
}
print_line("to draw: " + itos(command_buffer_count));
vkCmdExecuteCommands(p_primary, command_buffer_count, command_buffers);
}
vkCmdEndRenderPass(frame_cmdbuf);
}
#endif
void RenderingDeviceVulkan::_free_internal(RID p_id) {
#ifdef DEV_ENABLED
String resource_name;
if (resource_names.has(p_id)) {
resource_name = resource_names[p_id];
resource_names.erase(p_id);
}
#endif
// Push everything so it's disposed of next time this frame index is processed (means, it's safe to do it).
if (texture_owner.owns(p_id)) {
Texture *texture = texture_owner.get_or_null(p_id);
frames[frame].textures_to_dispose_of.push_back(*texture);
texture_owner.free(p_id);
} else if (framebuffer_owner.owns(p_id)) {
Framebuffer *framebuffer = framebuffer_owner.get_or_null(p_id);
frames[frame].framebuffers_to_dispose_of.push_back(*framebuffer);
if (framebuffer->invalidated_callback != nullptr) {
framebuffer->invalidated_callback(framebuffer->invalidated_callback_userdata);
}
framebuffer_owner.free(p_id);
} else if (sampler_owner.owns(p_id)) {
VkSampler *sampler = sampler_owner.get_or_null(p_id);
frames[frame].samplers_to_dispose_of.push_back(*sampler);
sampler_owner.free(p_id);
} else if (vertex_buffer_owner.owns(p_id)) {
Buffer *vertex_buffer = vertex_buffer_owner.get_or_null(p_id);
frames[frame].buffers_to_dispose_of.push_back(*vertex_buffer);
vertex_buffer_owner.free(p_id);
} else if (vertex_array_owner.owns(p_id)) {
vertex_array_owner.free(p_id);
} else if (index_buffer_owner.owns(p_id)) {
IndexBuffer *index_buffer = index_buffer_owner.get_or_null(p_id);
Buffer b;
b.allocation = index_buffer->allocation;
b.buffer = index_buffer->buffer;
b.size = index_buffer->size;
b.buffer_info = {};
frames[frame].buffers_to_dispose_of.push_back(b);
index_buffer_owner.free(p_id);
} else if (index_array_owner.owns(p_id)) {
index_array_owner.free(p_id);
} else if (shader_owner.owns(p_id)) {
Shader *shader = shader_owner.get_or_null(p_id);
frames[frame].shaders_to_dispose_of.push_back(*shader);
shader_owner.free(p_id);
} else if (uniform_buffer_owner.owns(p_id)) {
Buffer *uniform_buffer = uniform_buffer_owner.get_or_null(p_id);
frames[frame].buffers_to_dispose_of.push_back(*uniform_buffer);
uniform_buffer_owner.free(p_id);
} else if (texture_buffer_owner.owns(p_id)) {
TextureBuffer *texture_buffer = texture_buffer_owner.get_or_null(p_id);
frames[frame].buffers_to_dispose_of.push_back(texture_buffer->buffer);
frames[frame].buffer_views_to_dispose_of.push_back(texture_buffer->view);
texture_buffer_owner.free(p_id);
} else if (storage_buffer_owner.owns(p_id)) {
Buffer *storage_buffer = storage_buffer_owner.get_or_null(p_id);
frames[frame].buffers_to_dispose_of.push_back(*storage_buffer);
storage_buffer_owner.free(p_id);
} else if (uniform_set_owner.owns(p_id)) {
UniformSet *uniform_set = uniform_set_owner.get_or_null(p_id);
frames[frame].uniform_sets_to_dispose_of.push_back(*uniform_set);
uniform_set_owner.free(p_id);
if (uniform_set->invalidated_callback != nullptr) {
uniform_set->invalidated_callback(uniform_set->invalidated_callback_userdata);
}
} else if (render_pipeline_owner.owns(p_id)) {
RenderPipeline *pipeline = render_pipeline_owner.get_or_null(p_id);
frames[frame].render_pipelines_to_dispose_of.push_back(*pipeline);
render_pipeline_owner.free(p_id);
} else if (compute_pipeline_owner.owns(p_id)) {
ComputePipeline *pipeline = compute_pipeline_owner.get_or_null(p_id);
frames[frame].compute_pipelines_to_dispose_of.push_back(*pipeline);
compute_pipeline_owner.free(p_id);
} else {
#ifdef DEV_ENABLED
ERR_PRINT("Attempted to free invalid ID: " + itos(p_id.get_id()) + " " + resource_name);
#else
ERR_PRINT("Attempted to free invalid ID: " + itos(p_id.get_id()));
#endif
}
}
void RenderingDeviceVulkan::free(RID p_id) {
_THREAD_SAFE_METHOD_
_free_dependencies(p_id); // Recursively erase dependencies first, to avoid potential API problems.
_free_internal(p_id);
}
// The full list of resources that can be named is in the VkObjectType enum.
// We just expose the resources that are owned and can be accessed easily.
void RenderingDeviceVulkan::set_resource_name(RID p_id, const String p_name) {
if (texture_owner.owns(p_id)) {
Texture *texture = texture_owner.get_or_null(p_id);
if (texture->owner.is_null()) {
// Don't set the source texture's name when calling on a texture view.
context->set_object_name(VK_OBJECT_TYPE_IMAGE, uint64_t(texture->image), p_name);
}
context->set_object_name(VK_OBJECT_TYPE_IMAGE_VIEW, uint64_t(texture->view), p_name + " View");
} else if (framebuffer_owner.owns(p_id)) {
//Framebuffer *framebuffer = framebuffer_owner.get_or_null(p_id);
// Not implemented for now as the relationship between Framebuffer and RenderPass is very complex.
} else if (sampler_owner.owns(p_id)) {
VkSampler *sampler = sampler_owner.get_or_null(p_id);
context->set_object_name(VK_OBJECT_TYPE_SAMPLER, uint64_t(*sampler), p_name);
} else if (vertex_buffer_owner.owns(p_id)) {
Buffer *vertex_buffer = vertex_buffer_owner.get_or_null(p_id);
context->set_object_name(VK_OBJECT_TYPE_BUFFER, uint64_t(vertex_buffer->buffer), p_name);
} else if (index_buffer_owner.owns(p_id)) {
IndexBuffer *index_buffer = index_buffer_owner.get_or_null(p_id);
context->set_object_name(VK_OBJECT_TYPE_BUFFER, uint64_t(index_buffer->buffer), p_name);
} else if (shader_owner.owns(p_id)) {
Shader *shader = shader_owner.get_or_null(p_id);
context->set_object_name(VK_OBJECT_TYPE_PIPELINE_LAYOUT, uint64_t(shader->pipeline_layout), p_name + " Pipeline Layout");
for (int i = 0; i < shader->sets.size(); i++) {
context->set_object_name(VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT, uint64_t(shader->sets[i].descriptor_set_layout), p_name);
}
} else if (uniform_buffer_owner.owns(p_id)) {
Buffer *uniform_buffer = uniform_buffer_owner.get_or_null(p_id);
context->set_object_name(VK_OBJECT_TYPE_BUFFER, uint64_t(uniform_buffer->buffer), p_name);
} else if (texture_buffer_owner.owns(p_id)) {
TextureBuffer *texture_buffer = texture_buffer_owner.get_or_null(p_id);
context->set_object_name(VK_OBJECT_TYPE_BUFFER, uint64_t(texture_buffer->buffer.buffer), p_name);
context->set_object_name(VK_OBJECT_TYPE_BUFFER_VIEW, uint64_t(texture_buffer->view), p_name + " View");
} else if (storage_buffer_owner.owns(p_id)) {
Buffer *storage_buffer = storage_buffer_owner.get_or_null(p_id);
context->set_object_name(VK_OBJECT_TYPE_BUFFER, uint64_t(storage_buffer->buffer), p_name);
} else if (uniform_set_owner.owns(p_id)) {
UniformSet *uniform_set = uniform_set_owner.get_or_null(p_id);
context->set_object_name(VK_OBJECT_TYPE_DESCRIPTOR_SET, uint64_t(uniform_set->descriptor_set), p_name);
} else if (render_pipeline_owner.owns(p_id)) {
RenderPipeline *pipeline = render_pipeline_owner.get_or_null(p_id);
context->set_object_name(VK_OBJECT_TYPE_PIPELINE, uint64_t(pipeline->pipeline), p_name);
context->set_object_name(VK_OBJECT_TYPE_PIPELINE_LAYOUT, uint64_t(pipeline->pipeline_layout), p_name + " Layout");
} else if (compute_pipeline_owner.owns(p_id)) {
ComputePipeline *pipeline = compute_pipeline_owner.get_or_null(p_id);
context->set_object_name(VK_OBJECT_TYPE_PIPELINE, uint64_t(pipeline->pipeline), p_name);
context->set_object_name(VK_OBJECT_TYPE_PIPELINE_LAYOUT, uint64_t(pipeline->pipeline_layout), p_name + " Layout");
} else {
ERR_PRINT("Attempted to name invalid ID: " + itos(p_id.get_id()));
return;
}
#ifdef DEV_ENABLED
resource_names[p_id] = p_name;
#endif
}
void RenderingDeviceVulkan::draw_command_begin_label(String p_label_name, const Color p_color) {
_THREAD_SAFE_METHOD_
context->command_begin_label(frames[frame].draw_command_buffer, p_label_name, p_color);
}
void RenderingDeviceVulkan::draw_command_insert_label(String p_label_name, const Color p_color) {
_THREAD_SAFE_METHOD_
context->command_insert_label(frames[frame].draw_command_buffer, p_label_name, p_color);
}
void RenderingDeviceVulkan::draw_command_end_label() {
_THREAD_SAFE_METHOD_
context->command_end_label(frames[frame].draw_command_buffer);
}
String RenderingDeviceVulkan::get_device_vendor_name() const {
return context->get_device_vendor_name();
}
String RenderingDeviceVulkan::get_device_name() const {
return context->get_device_name();
}
RenderingDevice::DeviceType RenderingDeviceVulkan::get_device_type() const {
return context->get_device_type();
}
String RenderingDeviceVulkan::get_device_api_version() const {
return context->get_device_api_version();
}
String RenderingDeviceVulkan::get_device_pipeline_cache_uuid() const {
return context->get_device_pipeline_cache_uuid();
}
void RenderingDeviceVulkan::_finalize_command_bufers() {
if (draw_list) {
ERR_PRINT("Found open draw list at the end of the frame, this should never happen (further drawing will likely not work).");
}
if (compute_list) {
ERR_PRINT("Found open compute list at the end of the frame, this should never happen (further compute will likely not work).");
}
{ // Complete the setup buffer (that needs to be processed before anything else).
vkEndCommandBuffer(frames[frame].setup_command_buffer);
vkEndCommandBuffer(frames[frame].draw_command_buffer);
}
}
void RenderingDeviceVulkan::_begin_frame() {
// Erase pending resources.
_free_pending_resources(frame);
// Create setup command buffer and set as the setup buffer.
{
VkCommandBufferBeginInfo cmdbuf_begin;
cmdbuf_begin.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
cmdbuf_begin.pNext = nullptr;
cmdbuf_begin.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
cmdbuf_begin.pInheritanceInfo = nullptr;
VkResult err = vkResetCommandBuffer(frames[frame].setup_command_buffer, 0);
ERR_FAIL_COND_MSG(err, "vkResetCommandBuffer failed with error " + itos(err) + ".");
err = vkBeginCommandBuffer(frames[frame].setup_command_buffer, &cmdbuf_begin);
ERR_FAIL_COND_MSG(err, "vkBeginCommandBuffer failed with error " + itos(err) + ".");
err = vkBeginCommandBuffer(frames[frame].draw_command_buffer, &cmdbuf_begin);
ERR_FAIL_COND_MSG(err, "vkBeginCommandBuffer failed with error " + itos(err) + ".");
if (local_device.is_null()) {
context->append_command_buffer(frames[frame].draw_command_buffer);
context->set_setup_buffer(frames[frame].setup_command_buffer); // Append now so it's added before everything else.
}
}
// Advance current frame.
frames_drawn++;
// Advance staging buffer if used.
if (staging_buffer_used) {
staging_buffer_current = (staging_buffer_current + 1) % staging_buffer_blocks.size();
staging_buffer_used = false;
}
if (frames[frame].timestamp_count) {
vkGetQueryPoolResults(device, frames[frame].timestamp_pool, 0, frames[frame].timestamp_count, sizeof(uint64_t) * max_timestamp_query_elements, frames[frame].timestamp_result_values.ptr(), sizeof(uint64_t), VK_QUERY_RESULT_64_BIT);
vkCmdResetQueryPool(frames[frame].setup_command_buffer, frames[frame].timestamp_pool, 0, frames[frame].timestamp_count);
SWAP(frames[frame].timestamp_names, frames[frame].timestamp_result_names);
SWAP(frames[frame].timestamp_cpu_values, frames[frame].timestamp_cpu_result_values);
}
frames[frame].timestamp_result_count = frames[frame].timestamp_count;
frames[frame].timestamp_count = 0;
frames[frame].index = Engine::get_singleton()->get_frames_drawn();
}
VkSampleCountFlagBits RenderingDeviceVulkan::_ensure_supported_sample_count(TextureSamples p_requested_sample_count) const {
VkSampleCountFlags sample_count_flags = limits.framebufferColorSampleCounts & limits.framebufferDepthSampleCounts;
if (sample_count_flags & rasterization_sample_count[p_requested_sample_count]) {
// The requested sample count is supported.
return rasterization_sample_count[p_requested_sample_count];
} else {
// Find the closest lower supported sample count.
VkSampleCountFlagBits sample_count = rasterization_sample_count[p_requested_sample_count];
while (sample_count > VK_SAMPLE_COUNT_1_BIT) {
if (sample_count_flags & sample_count) {
return sample_count;
}
sample_count = (VkSampleCountFlagBits)(sample_count >> 1);
}
}
return VK_SAMPLE_COUNT_1_BIT;
}
void RenderingDeviceVulkan::swap_buffers() {
ERR_FAIL_COND_MSG(local_device.is_valid(), "Local devices can't swap buffers.");
_THREAD_SAFE_METHOD_
_finalize_command_bufers();
screen_prepared = false;
// Swap buffers.
context->swap_buffers();
frame = (frame + 1) % frame_count;
_begin_frame();
}
void RenderingDeviceVulkan::submit() {
_THREAD_SAFE_METHOD_
ERR_FAIL_COND_MSG(local_device.is_null(), "Only local devices can submit and sync.");
ERR_FAIL_COND_MSG(local_device_processing, "device already submitted, call sync to wait until done.");
_finalize_command_bufers();
VkCommandBuffer command_buffers[2] = { frames[frame].setup_command_buffer, frames[frame].draw_command_buffer };
context->local_device_push_command_buffers(local_device, command_buffers, 2);
local_device_processing = true;
}
void RenderingDeviceVulkan::sync() {
_THREAD_SAFE_METHOD_
ERR_FAIL_COND_MSG(local_device.is_null(), "Only local devices can submit and sync.");
ERR_FAIL_COND_MSG(!local_device_processing, "sync can only be called after a submit");
context->local_device_sync(local_device);
_begin_frame();
local_device_processing = false;
}
VmaPool RenderingDeviceVulkan::_find_or_create_small_allocs_pool(uint32_t p_mem_type_index) {
if (small_allocs_pools.has(p_mem_type_index)) {
return small_allocs_pools[p_mem_type_index];
}
print_verbose("Creating VMA small objects pool for memory type index " + itos(p_mem_type_index));
VmaPoolCreateInfo pci;
pci.memoryTypeIndex = p_mem_type_index;
pci.flags = 0;
pci.blockSize = 0;
pci.minBlockCount = 0;
pci.maxBlockCount = SIZE_MAX;
pci.priority = 0.5f;
pci.minAllocationAlignment = 0;
pci.pMemoryAllocateNext = nullptr;
VmaPool pool = VK_NULL_HANDLE;
VkResult res = vmaCreatePool(allocator, &pci, &pool);
small_allocs_pools[p_mem_type_index] = pool; // Don't try to create it again if failed the first time.
ERR_FAIL_COND_V_MSG(res, pool, "vmaCreatePool failed with error " + itos(res) + ".");
return pool;
}
void RenderingDeviceVulkan::_free_pending_resources(int p_frame) {
// Free in dependency usage order, so nothing weird happens.
// Pipelines.
while (frames[p_frame].render_pipelines_to_dispose_of.front()) {
RenderPipeline *pipeline = &frames[p_frame].render_pipelines_to_dispose_of.front()->get();
vkDestroyPipeline(device, pipeline->pipeline, nullptr);
frames[p_frame].render_pipelines_to_dispose_of.pop_front();
}
while (frames[p_frame].compute_pipelines_to_dispose_of.front()) {
ComputePipeline *pipeline = &frames[p_frame].compute_pipelines_to_dispose_of.front()->get();
vkDestroyPipeline(device, pipeline->pipeline, nullptr);
frames[p_frame].compute_pipelines_to_dispose_of.pop_front();
}
// Uniform sets.
while (frames[p_frame].uniform_sets_to_dispose_of.front()) {
UniformSet *uniform_set = &frames[p_frame].uniform_sets_to_dispose_of.front()->get();
vkFreeDescriptorSets(device, uniform_set->pool->pool, 1, &uniform_set->descriptor_set);
_descriptor_pool_free(uniform_set->pool_key, uniform_set->pool);
frames[p_frame].uniform_sets_to_dispose_of.pop_front();
}
// Buffer views.
while (frames[p_frame].buffer_views_to_dispose_of.front()) {
VkBufferView buffer_view = frames[p_frame].buffer_views_to_dispose_of.front()->get();
vkDestroyBufferView(device, buffer_view, nullptr);
frames[p_frame].buffer_views_to_dispose_of.pop_front();
}
// Shaders.
while (frames[p_frame].shaders_to_dispose_of.front()) {
Shader *shader = &frames[p_frame].shaders_to_dispose_of.front()->get();
// Descriptor set layout for each set.
for (int i = 0; i < shader->sets.size(); i++) {
vkDestroyDescriptorSetLayout(device, shader->sets[i].descriptor_set_layout, nullptr);
}
// Pipeline layout.
vkDestroyPipelineLayout(device, shader->pipeline_layout, nullptr);
// Shaders themselves.
for (int i = 0; i < shader->pipeline_stages.size(); i++) {
vkDestroyShaderModule(device, shader->pipeline_stages[i].module, nullptr);
}
frames[p_frame].shaders_to_dispose_of.pop_front();
}
// Samplers.
while (frames[p_frame].samplers_to_dispose_of.front()) {
VkSampler sampler = frames[p_frame].samplers_to_dispose_of.front()->get();
vkDestroySampler(device, sampler, nullptr);
frames[p_frame].samplers_to_dispose_of.pop_front();
}
// Framebuffers.
while (frames[p_frame].framebuffers_to_dispose_of.front()) {
Framebuffer *framebuffer = &frames[p_frame].framebuffers_to_dispose_of.front()->get();
for (const KeyValue<Framebuffer::VersionKey, Framebuffer::Version> &E : framebuffer->framebuffers) {
// First framebuffer, then render pass because it depends on it.
vkDestroyFramebuffer(device, E.value.framebuffer, nullptr);
vkDestroyRenderPass(device, E.value.render_pass, nullptr);
}
frames[p_frame].framebuffers_to_dispose_of.pop_front();
}
// Textures.
while (frames[p_frame].textures_to_dispose_of.front()) {
Texture *texture = &frames[p_frame].textures_to_dispose_of.front()->get();
if (texture->bound) {
WARN_PRINT("Deleted a texture while it was bound.");
}
vkDestroyImageView(device, texture->view, nullptr);
if (texture->owner.is_null()) {
// Actually owns the image and the allocation too.
image_memory -= texture->allocation_info.size;
vmaDestroyImage(allocator, texture->image, texture->allocation);
}
frames[p_frame].textures_to_dispose_of.pop_front();
}
// Buffers.
while (frames[p_frame].buffers_to_dispose_of.front()) {
_buffer_free(&frames[p_frame].buffers_to_dispose_of.front()->get());
frames[p_frame].buffers_to_dispose_of.pop_front();
}
}
void RenderingDeviceVulkan::prepare_screen_for_drawing() {
_THREAD_SAFE_METHOD_
context->prepare_buffers();
screen_prepared = true;
}
uint32_t RenderingDeviceVulkan::get_frame_delay() const {
return frame_count;
}
uint64_t RenderingDeviceVulkan::get_memory_usage(MemoryType p_type) const {
if (p_type == MEMORY_BUFFERS) {
return buffer_memory;
} else if (p_type == MEMORY_TEXTURES) {
return image_memory;
} else {
VmaTotalStatistics stats;
vmaCalculateStatistics(allocator, &stats);
return stats.total.statistics.allocationBytes;
}
}
void RenderingDeviceVulkan::_flush(bool p_current_frame) {
if (local_device.is_valid() && !p_current_frame) {
return; // Flushing previous frames has no effect with local device.
}
// Not doing this crashes RADV (undefined behavior).
if (p_current_frame) {
vkEndCommandBuffer(frames[frame].setup_command_buffer);
vkEndCommandBuffer(frames[frame].draw_command_buffer);
}
if (local_device.is_valid()) {
VkCommandBuffer command_buffers[2] = { frames[frame].setup_command_buffer, frames[frame].draw_command_buffer };
context->local_device_push_command_buffers(local_device, command_buffers, 2);
context->local_device_sync(local_device);
VkCommandBufferBeginInfo cmdbuf_begin;
cmdbuf_begin.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
cmdbuf_begin.pNext = nullptr;
cmdbuf_begin.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
cmdbuf_begin.pInheritanceInfo = nullptr;
VkResult err = vkBeginCommandBuffer(frames[frame].setup_command_buffer, &cmdbuf_begin);
ERR_FAIL_COND_MSG(err, "vkBeginCommandBuffer failed with error " + itos(err) + ".");
err = vkBeginCommandBuffer(frames[frame].draw_command_buffer, &cmdbuf_begin);
ERR_FAIL_COND_MSG(err, "vkBeginCommandBuffer failed with error " + itos(err) + ".");
} else {
context->flush(p_current_frame, p_current_frame);
// Re-create the setup command.
if (p_current_frame) {
VkCommandBufferBeginInfo cmdbuf_begin;
cmdbuf_begin.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
cmdbuf_begin.pNext = nullptr;
cmdbuf_begin.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
cmdbuf_begin.pInheritanceInfo = nullptr;
VkResult err = vkBeginCommandBuffer(frames[frame].setup_command_buffer, &cmdbuf_begin);
ERR_FAIL_COND_MSG(err, "vkBeginCommandBuffer failed with error " + itos(err) + ".");
context->set_setup_buffer(frames[frame].setup_command_buffer); // Append now so it's added before everything else.
}
if (p_current_frame) {
VkCommandBufferBeginInfo cmdbuf_begin;
cmdbuf_begin.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
cmdbuf_begin.pNext = nullptr;
cmdbuf_begin.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
cmdbuf_begin.pInheritanceInfo = nullptr;
VkResult err = vkBeginCommandBuffer(frames[frame].draw_command_buffer, &cmdbuf_begin);
ERR_FAIL_COND_MSG(err, "vkBeginCommandBuffer failed with error " + itos(err) + ".");
context->append_command_buffer(frames[frame].draw_command_buffer);
}
}
}
void RenderingDeviceVulkan::initialize(VulkanContext *p_context, bool p_local_device) {
// Get our device capabilities.
{
device_capabilities.version_major = p_context->get_vulkan_major();
device_capabilities.version_minor = p_context->get_vulkan_minor();
}
context = p_context;
device = p_context->get_device();
if (p_local_device) {
frame_count = 1;
local_device = p_context->local_device_create();
device = p_context->local_device_get_vk_device(local_device);
} else {
frame_count = p_context->get_swapchain_image_count() + 1; // Always need one extra to ensure it's unused at any time, without having to use a fence for this.
}
limits = p_context->get_device_limits();
max_timestamp_query_elements = 256;
{ // Initialize allocator.
VmaAllocatorCreateInfo allocatorInfo;
memset(&allocatorInfo, 0, sizeof(VmaAllocatorCreateInfo));
allocatorInfo.physicalDevice = p_context->get_physical_device();
allocatorInfo.device = device;
allocatorInfo.instance = p_context->get_instance();
vmaCreateAllocator(&allocatorInfo, &allocator);
}
frames.resize(frame_count);
frame = 0;
// Create setup and frame buffers.
for (int i = 0; i < frame_count; i++) {
frames[i].index = 0;
{ // Create command pool, one per frame is recommended.
VkCommandPoolCreateInfo cmd_pool_info;
cmd_pool_info.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
cmd_pool_info.pNext = nullptr;
cmd_pool_info.queueFamilyIndex = p_context->get_graphics_queue_family_index();
cmd_pool_info.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT;
VkResult res = vkCreateCommandPool(device, &cmd_pool_info, nullptr, &frames[i].command_pool);
ERR_FAIL_COND_MSG(res, "vkCreateCommandPool failed with error " + itos(res) + ".");
}
{ // Create command buffers.
VkCommandBufferAllocateInfo cmdbuf;
// No command buffer exists, create it.
cmdbuf.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
cmdbuf.pNext = nullptr;
cmdbuf.commandPool = frames[i].command_pool;
cmdbuf.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
cmdbuf.commandBufferCount = 1;
VkResult err = vkAllocateCommandBuffers(device, &cmdbuf, &frames[i].setup_command_buffer);
ERR_CONTINUE_MSG(err, "vkAllocateCommandBuffers failed with error " + itos(err) + ".");
err = vkAllocateCommandBuffers(device, &cmdbuf, &frames[i].draw_command_buffer);
ERR_CONTINUE_MSG(err, "vkAllocateCommandBuffers failed with error " + itos(err) + ".");
}
{
// Create query pool.
VkQueryPoolCreateInfo query_pool_create_info;
query_pool_create_info.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO;
query_pool_create_info.flags = 0;
query_pool_create_info.pNext = nullptr;
query_pool_create_info.queryType = VK_QUERY_TYPE_TIMESTAMP;
query_pool_create_info.queryCount = max_timestamp_query_elements;
query_pool_create_info.pipelineStatistics = 0;
vkCreateQueryPool(device, &query_pool_create_info, nullptr, &frames[i].timestamp_pool);
frames[i].timestamp_names.resize(max_timestamp_query_elements);
frames[i].timestamp_cpu_values.resize(max_timestamp_query_elements);
frames[i].timestamp_count = 0;
frames[i].timestamp_result_names.resize(max_timestamp_query_elements);
frames[i].timestamp_cpu_result_values.resize(max_timestamp_query_elements);
frames[i].timestamp_result_values.resize(max_timestamp_query_elements);
frames[i].timestamp_result_count = 0;
}
}
{
// Begin the first command buffer for the first frame, so
// setting up things can be done in the meantime until swap_buffers(), which is called before advance.
VkCommandBufferBeginInfo cmdbuf_begin;
cmdbuf_begin.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
cmdbuf_begin.pNext = nullptr;
cmdbuf_begin.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
cmdbuf_begin.pInheritanceInfo = nullptr;
VkResult err = vkBeginCommandBuffer(frames[0].setup_command_buffer, &cmdbuf_begin);
ERR_FAIL_COND_MSG(err, "vkBeginCommandBuffer failed with error " + itos(err) + ".");
err = vkBeginCommandBuffer(frames[0].draw_command_buffer, &cmdbuf_begin);
ERR_FAIL_COND_MSG(err, "vkBeginCommandBuffer failed with error " + itos(err) + ".");
if (local_device.is_null()) {
context->set_setup_buffer(frames[0].setup_command_buffer); // Append now so it's added before everything else.
context->append_command_buffer(frames[0].draw_command_buffer);
}
}
for (int i = 0; i < frame_count; i++) {
//Reset all queries in a query pool before doing any operations with them.
vkCmdResetQueryPool(frames[0].setup_command_buffer, frames[i].timestamp_pool, 0, max_timestamp_query_elements);
}
staging_buffer_block_size = GLOBAL_GET("rendering/rendering_device/staging_buffer/block_size_kb");
staging_buffer_block_size = MAX(4u, staging_buffer_block_size);
staging_buffer_block_size *= 1024; // Kb -> bytes.
staging_buffer_max_size = GLOBAL_GET("rendering/rendering_device/staging_buffer/max_size_mb");
staging_buffer_max_size = MAX(1u, staging_buffer_max_size);
staging_buffer_max_size *= 1024 * 1024;
if (staging_buffer_max_size < staging_buffer_block_size * 4) {
// Validate enough blocks.
staging_buffer_max_size = staging_buffer_block_size * 4;
}
texture_upload_region_size_px = GLOBAL_GET("rendering/rendering_device/staging_buffer/texture_upload_region_size_px");
texture_upload_region_size_px = nearest_power_of_2_templated(texture_upload_region_size_px);
frames_drawn = frame_count; // Start from frame count, so everything else is immediately old.
// Ensure current staging block is valid and at least one per frame exists.
staging_buffer_current = 0;
staging_buffer_used = false;
for (int i = 0; i < frame_count; i++) {
// Staging was never used, create a block.
Error err = _insert_staging_block();
ERR_CONTINUE(err != OK);
}
max_descriptors_per_pool = GLOBAL_GET("rendering/rendering_device/vulkan/max_descriptors_per_pool");
// Check to make sure DescriptorPoolKey is good.
static_assert(sizeof(uint64_t) * 3 >= UNIFORM_TYPE_MAX * sizeof(uint16_t));
draw_list = nullptr;
draw_list_count = 0;
draw_list_split = false;
compute_list = nullptr;
_load_pipeline_cache();
print_verbose(vformat("Startup PSO cache (%.1f MiB)", pipelines_cache.buffer.size() / (1024.0f * 1024.0f)));
VkPipelineCacheCreateInfo cache_info = {};
cache_info.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO;
cache_info.pNext = nullptr;
cache_info.flags = 0;
cache_info.initialDataSize = pipelines_cache.buffer.size();
cache_info.pInitialData = pipelines_cache.buffer.ptr();
VkResult err = vkCreatePipelineCache(device, &cache_info, nullptr, &pipelines_cache.cache_object);
if (err != VK_SUCCESS) {
WARN_PRINT("vkCreatePipelinecache failed with error " + itos(err) + ".");
}
}
void RenderingDeviceVulkan::_load_pipeline_cache() {
if (!DirAccess::exists("user://vulkan/")) {
Ref<DirAccess> da = DirAccess::create(DirAccess::ACCESS_USERDATA);
if (da.is_valid()) {
da->make_dir_recursive("user://vulkan/");
}
}
if (FileAccess::exists("user://vulkan/pipelines.cache")) {
Error file_error;
Vector<uint8_t> file_data = FileAccess::get_file_as_bytes("user://vulkan/pipelines.cache", &file_error);
if (file_error != OK || file_data.size() <= (int)sizeof(PipelineCacheHeader)) {
WARN_PRINT("Invalid/corrupt pipelines cache.");
return;
}
PipelineCacheHeader header = {};
memcpy((char *)&header, file_data.ptr(), sizeof(PipelineCacheHeader));
if (header.magic != 868 + VK_PIPELINE_CACHE_HEADER_VERSION_ONE) {
WARN_PRINT("Invalid pipelines cache magic number.");
return;
}
pipelines_cache.buffer.resize(file_data.size() - sizeof(PipelineCacheHeader));
memcpy(pipelines_cache.buffer.ptrw(), file_data.ptr() + sizeof(PipelineCacheHeader), pipelines_cache.buffer.size());
VkPhysicalDeviceProperties props;
vkGetPhysicalDeviceProperties(context->get_physical_device(), &props);
bool invalid_uuid = false;
for (size_t i = 0; i < VK_UUID_SIZE; i++) {
if (header.uuid[i] != props.pipelineCacheUUID[i]) {
invalid_uuid = true;
break;
}
}
if (header.data_hash != hash_murmur3_buffer(pipelines_cache.buffer.ptr(), pipelines_cache.buffer.size()) || header.data_size != (uint32_t)pipelines_cache.buffer.size() || header.vendor_id != props.vendorID || header.device_id != props.deviceID || header.driver_abi != sizeof(void *) || invalid_uuid) {
WARN_PRINT("Invalid pipelines cache header.");
pipelines_cache.current_size = 0;
pipelines_cache.buffer.clear();
} else {
pipelines_cache.current_size = pipelines_cache.buffer.size();
}
}
}
void RenderingDeviceVulkan::_update_pipeline_cache(bool p_closing) {
size_t pso_blob_size = 0;
float save_interval = GLOBAL_GET("rendering/rendering_device/pipeline_cache/save_chunk_size_mb");
VkResult vr = vkGetPipelineCacheData(device, pipelines_cache.cache_object, &pso_blob_size, nullptr);
ERR_FAIL_COND(vr);
size_t difference = (pso_blob_size - pipelines_cache.current_size) / (1024 * 1024);
if (p_closing && Engine::get_singleton()->is_editor_hint()) {
// This is mostly for the editor to check if after playing the game, game's pipeline cache size still matches with editor's cache.
_load_pipeline_cache();
if (pipelines_cache.current_size > pso_blob_size) {
pso_blob_size = pipelines_cache.current_size;
if (pipelines_cache_save_task != WorkerThreadPool::INVALID_TASK_ID || !WorkerThreadPool::get_singleton()->is_task_completed(pipelines_cache_save_task)) {
WorkerThreadPool::get_singleton()->wait_for_task_completion(pipelines_cache_save_task);
}
}
}
if (pso_blob_size == pipelines_cache.current_size) {
return;
} else if (difference < save_interval && !p_closing) {
return;
}
if (p_closing) {
if (pipelines_cache_save_task == WorkerThreadPool::INVALID_TASK_ID || WorkerThreadPool::get_singleton()->is_task_completed(pipelines_cache_save_task)) {
pipelines_cache_save_task = WorkerThreadPool::get_singleton()->add_template_task(this, &RenderingDeviceVulkan::_save_pipeline_cache_threaded, pso_blob_size, false, "PipelineCacheSave");
WorkerThreadPool::get_singleton()->wait_for_task_completion(pipelines_cache_save_task);
} else {
WorkerThreadPool::get_singleton()->wait_for_task_completion(pipelines_cache_save_task);
pipelines_cache_save_task = WorkerThreadPool::get_singleton()->add_template_task(this, &RenderingDeviceVulkan::_save_pipeline_cache_threaded, pso_blob_size, false, "PipelineCacheSave");
WorkerThreadPool::get_singleton()->wait_for_task_completion(pipelines_cache_save_task);
}
} else {
if (pipelines_cache_save_task == WorkerThreadPool::INVALID_TASK_ID || WorkerThreadPool::get_singleton()->is_task_completed(pipelines_cache_save_task)) {
pipelines_cache_save_task = WorkerThreadPool::get_singleton()->add_template_task(this, &RenderingDeviceVulkan::_save_pipeline_cache_threaded, pso_blob_size, false, "PipelineCacheSave");
}
}
}
void RenderingDeviceVulkan::_save_pipeline_cache_threaded(size_t p_pso_blob_size) {
pipelines_cache.current_size = p_pso_blob_size;
pipelines_cache.buffer.clear();
pipelines_cache.buffer.resize(p_pso_blob_size);
VkResult vr = vkGetPipelineCacheData(device, pipelines_cache.cache_object, &p_pso_blob_size, pipelines_cache.buffer.ptrw());
ERR_FAIL_COND(vr);
print_verbose(vformat("Updated PSO cache (%.1f MiB)", p_pso_blob_size / (1024.0f * 1024.0f)));
VkPhysicalDeviceProperties props;
vkGetPhysicalDeviceProperties(context->get_physical_device(), &props);
PipelineCacheHeader header = {};
header.magic = 868 + VK_PIPELINE_CACHE_HEADER_VERSION_ONE;
header.data_size = pipelines_cache.buffer.size();
header.data_hash = hash_murmur3_buffer(pipelines_cache.buffer.ptr(), pipelines_cache.buffer.size());
header.device_id = props.deviceID;
header.vendor_id = props.vendorID;
header.driver_version = props.driverVersion;
for (size_t i = 0; i < VK_UUID_SIZE; i++) {
header.uuid[i] = props.pipelineCacheUUID[i];
}
header.driver_abi = sizeof(void *);
Ref<FileAccess> f = FileAccess::open("user://vulkan/pipelines.cache", FileAccess::WRITE, nullptr);
if (f.is_valid()) {
f->store_buffer((const uint8_t *)&header, sizeof(PipelineCacheHeader));
f->store_buffer(pipelines_cache.buffer);
}
}
template <class T>
void RenderingDeviceVulkan::_free_rids(T &p_owner, const char *p_type) {
List<RID> owned;
p_owner.get_owned_list(&owned);
if (owned.size()) {
if (owned.size() == 1) {
WARN_PRINT(vformat("1 RID of type \"%s\" was leaked.", p_type));
} else {
WARN_PRINT(vformat("%d RIDs of type \"%s\" were leaked.", owned.size(), p_type));
}
for (const RID &E : owned) {
#ifdef DEV_ENABLED
if (resource_names.has(E)) {
print_line(String(" - ") + resource_names[E]);
}
#endif
free(E);
}
}
}
void RenderingDeviceVulkan::capture_timestamp(const String &p_name) {
ERR_FAIL_COND_MSG(draw_list != nullptr, "Capturing timestamps during draw list creation is not allowed. Offending timestamp was: " + p_name);
ERR_FAIL_COND(frames[frame].timestamp_count >= max_timestamp_query_elements);
// This should be optional for profiling, else it will slow things down.
{
VkMemoryBarrier memoryBarrier;
memoryBarrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
memoryBarrier.pNext = nullptr;
memoryBarrier.srcAccessMask = VK_ACCESS_INDIRECT_COMMAND_READ_BIT |
VK_ACCESS_INDEX_READ_BIT |
VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT |
VK_ACCESS_UNIFORM_READ_BIT |
VK_ACCESS_INPUT_ATTACHMENT_READ_BIT |
VK_ACCESS_SHADER_READ_BIT |
VK_ACCESS_SHADER_WRITE_BIT |
VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
VK_ACCESS_TRANSFER_READ_BIT |
VK_ACCESS_TRANSFER_WRITE_BIT |
VK_ACCESS_HOST_READ_BIT |
VK_ACCESS_HOST_WRITE_BIT;
memoryBarrier.dstAccessMask = VK_ACCESS_INDIRECT_COMMAND_READ_BIT |
VK_ACCESS_INDEX_READ_BIT |
VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT |
VK_ACCESS_UNIFORM_READ_BIT |
VK_ACCESS_INPUT_ATTACHMENT_READ_BIT |
VK_ACCESS_SHADER_READ_BIT |
VK_ACCESS_SHADER_WRITE_BIT |
VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
VK_ACCESS_TRANSFER_READ_BIT |
VK_ACCESS_TRANSFER_WRITE_BIT |
VK_ACCESS_HOST_READ_BIT |
VK_ACCESS_HOST_WRITE_BIT;
vkCmdPipelineBarrier(frames[frame].draw_command_buffer, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, 1, &memoryBarrier, 0, nullptr, 0, nullptr);
}
vkCmdWriteTimestamp(frames[frame].draw_command_buffer, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, frames[frame].timestamp_pool, frames[frame].timestamp_count);
frames[frame].timestamp_names[frames[frame].timestamp_count] = p_name;
frames[frame].timestamp_cpu_values[frames[frame].timestamp_count] = OS::get_singleton()->get_ticks_usec();
frames[frame].timestamp_count++;
}
uint64_t RenderingDeviceVulkan::get_driver_resource(DriverResource p_resource, RID p_rid, uint64_t p_index) {
_THREAD_SAFE_METHOD_
switch (p_resource) {
case DRIVER_RESOURCE_VULKAN_DEVICE: {
return (uint64_t)context->get_device();
} break;
case DRIVER_RESOURCE_VULKAN_PHYSICAL_DEVICE: {
return (uint64_t)context->get_physical_device();
} break;
case DRIVER_RESOURCE_VULKAN_INSTANCE: {
return (uint64_t)context->get_instance();
} break;
case DRIVER_RESOURCE_VULKAN_QUEUE: {
return (uint64_t)context->get_graphics_queue();
} break;
case DRIVER_RESOURCE_VULKAN_QUEUE_FAMILY_INDEX: {
return context->get_graphics_queue_family_index();
} break;
case DRIVER_RESOURCE_VULKAN_IMAGE: {
Texture *tex = texture_owner.get_or_null(p_rid);
ERR_FAIL_NULL_V(tex, 0);
return (uint64_t)tex->image;
} break;
case DRIVER_RESOURCE_VULKAN_IMAGE_VIEW: {
Texture *tex = texture_owner.get_or_null(p_rid);
ERR_FAIL_NULL_V(tex, 0);
return (uint64_t)tex->view;
} break;
case DRIVER_RESOURCE_VULKAN_IMAGE_NATIVE_TEXTURE_FORMAT: {
Texture *tex = texture_owner.get_or_null(p_rid);
ERR_FAIL_NULL_V(tex, 0);
return vulkan_formats[tex->format];
} break;
case DRIVER_RESOURCE_VULKAN_SAMPLER: {
VkSampler *sampler = sampler_owner.get_or_null(p_rid);
ERR_FAIL_NULL_V(sampler, 0);
return uint64_t(*sampler);
} break;
case DRIVER_RESOURCE_VULKAN_DESCRIPTOR_SET: {
UniformSet *uniform_set = uniform_set_owner.get_or_null(p_rid);
ERR_FAIL_NULL_V(uniform_set, 0);
return uint64_t(uniform_set->descriptor_set);
} break;
case DRIVER_RESOURCE_VULKAN_BUFFER: {
Buffer *buffer = nullptr;
if (vertex_buffer_owner.owns(p_rid)) {
buffer = vertex_buffer_owner.get_or_null(p_rid);
} else if (index_buffer_owner.owns(p_rid)) {
buffer = index_buffer_owner.get_or_null(p_rid);
} else if (uniform_buffer_owner.owns(p_rid)) {
buffer = uniform_buffer_owner.get_or_null(p_rid);
} else if (texture_buffer_owner.owns(p_rid)) {
buffer = &texture_buffer_owner.get_or_null(p_rid)->buffer;
} else if (storage_buffer_owner.owns(p_rid)) {
buffer = storage_buffer_owner.get_or_null(p_rid);
}
ERR_FAIL_NULL_V(buffer, 0);
return uint64_t(buffer->buffer);
} break;
case DRIVER_RESOURCE_VULKAN_COMPUTE_PIPELINE: {
ComputePipeline *compute_pipeline = compute_pipeline_owner.get_or_null(p_rid);
ERR_FAIL_NULL_V(compute_pipeline, 0);
return uint64_t(compute_pipeline->pipeline);
} break;
case DRIVER_RESOURCE_VULKAN_RENDER_PIPELINE: {
RenderPipeline *render_pipeline = render_pipeline_owner.get_or_null(p_rid);
ERR_FAIL_NULL_V(render_pipeline, 0);
return uint64_t(render_pipeline->pipeline);
} break;
default: {
// Not supported for this driver.
return 0;
} break;
}
}
uint32_t RenderingDeviceVulkan::get_captured_timestamps_count() const {
return frames[frame].timestamp_result_count;
}
uint64_t RenderingDeviceVulkan::get_captured_timestamps_frame() const {
return frames[frame].index;
}
static void mult64to128(uint64_t u, uint64_t v, uint64_t &h, uint64_t &l) {
uint64_t u1 = (u & 0xffffffff);
uint64_t v1 = (v & 0xffffffff);
uint64_t t = (u1 * v1);
uint64_t w3 = (t & 0xffffffff);
uint64_t k = (t >> 32);
u >>= 32;
t = (u * v1) + k;
k = (t & 0xffffffff);
uint64_t w1 = (t >> 32);
v >>= 32;
t = (u1 * v) + k;
k = (t >> 32);
h = (u * v) + w1 + k;
l = (t << 32) + w3;
}
uint64_t RenderingDeviceVulkan::get_captured_timestamp_gpu_time(uint32_t p_index) const {
ERR_FAIL_UNSIGNED_INDEX_V(p_index, frames[frame].timestamp_result_count, 0);
// This sucks because timestampPeriod multiplier is a float, while the timestamp is 64 bits nanosecs.
// So, in cases like nvidia which give you enormous numbers and 1 as multiplier, multiplying is next to impossible.
// Need to do 128 bits fixed point multiplication to get the right value.
uint64_t shift_bits = 16;
uint64_t h, l;
mult64to128(frames[frame].timestamp_result_values[p_index], uint64_t(double(limits.timestampPeriod) * double(1 << shift_bits)), h, l);
l >>= shift_bits;
l |= h << (64 - shift_bits);
return l;
}
uint64_t RenderingDeviceVulkan::get_captured_timestamp_cpu_time(uint32_t p_index) const {
ERR_FAIL_UNSIGNED_INDEX_V(p_index, frames[frame].timestamp_result_count, 0);
return frames[frame].timestamp_cpu_result_values[p_index];
}
String RenderingDeviceVulkan::get_captured_timestamp_name(uint32_t p_index) const {
ERR_FAIL_UNSIGNED_INDEX_V(p_index, frames[frame].timestamp_result_count, String());
return frames[frame].timestamp_result_names[p_index];
}
uint64_t RenderingDeviceVulkan::limit_get(Limit p_limit) const {
switch (p_limit) {
case LIMIT_MAX_BOUND_UNIFORM_SETS:
return limits.maxBoundDescriptorSets;
case LIMIT_MAX_FRAMEBUFFER_COLOR_ATTACHMENTS:
return limits.maxColorAttachments;
case LIMIT_MAX_TEXTURES_PER_UNIFORM_SET:
return limits.maxDescriptorSetSampledImages;
case LIMIT_MAX_SAMPLERS_PER_UNIFORM_SET:
return limits.maxDescriptorSetSamplers;
case LIMIT_MAX_STORAGE_BUFFERS_PER_UNIFORM_SET:
return limits.maxDescriptorSetStorageBuffers;
case LIMIT_MAX_STORAGE_IMAGES_PER_UNIFORM_SET:
return limits.maxDescriptorSetStorageImages;
case LIMIT_MAX_UNIFORM_BUFFERS_PER_UNIFORM_SET:
return limits.maxDescriptorSetUniformBuffers;
case LIMIT_MAX_DRAW_INDEXED_INDEX:
return limits.maxDrawIndexedIndexValue;
case LIMIT_MAX_FRAMEBUFFER_HEIGHT:
return limits.maxFramebufferHeight;
case LIMIT_MAX_FRAMEBUFFER_WIDTH:
return limits.maxFramebufferWidth;
case LIMIT_MAX_TEXTURE_ARRAY_LAYERS:
return limits.maxImageArrayLayers;
case LIMIT_MAX_TEXTURE_SIZE_1D:
return limits.maxImageDimension1D;
case LIMIT_MAX_TEXTURE_SIZE_2D:
return limits.maxImageDimension2D;
case LIMIT_MAX_TEXTURE_SIZE_3D:
return limits.maxImageDimension3D;
case LIMIT_MAX_TEXTURE_SIZE_CUBE:
return limits.maxImageDimensionCube;
case LIMIT_MAX_TEXTURES_PER_SHADER_STAGE:
return limits.maxPerStageDescriptorSampledImages;
case LIMIT_MAX_SAMPLERS_PER_SHADER_STAGE:
return limits.maxPerStageDescriptorSamplers;
case LIMIT_MAX_STORAGE_BUFFERS_PER_SHADER_STAGE:
return limits.maxPerStageDescriptorStorageBuffers;
case LIMIT_MAX_STORAGE_IMAGES_PER_SHADER_STAGE:
return limits.maxPerStageDescriptorStorageImages;
case LIMIT_MAX_UNIFORM_BUFFERS_PER_SHADER_STAGE:
return limits.maxPerStageDescriptorUniformBuffers;
case LIMIT_MAX_PUSH_CONSTANT_SIZE:
return limits.maxPushConstantsSize;
case LIMIT_MAX_UNIFORM_BUFFER_SIZE:
return limits.maxUniformBufferRange;
case LIMIT_MAX_VERTEX_INPUT_ATTRIBUTE_OFFSET:
return limits.maxVertexInputAttributeOffset;
case LIMIT_MAX_VERTEX_INPUT_ATTRIBUTES:
return limits.maxVertexInputAttributes;
case LIMIT_MAX_VERTEX_INPUT_BINDINGS:
return limits.maxVertexInputBindings;
case LIMIT_MAX_VERTEX_INPUT_BINDING_STRIDE:
return limits.maxVertexInputBindingStride;
case LIMIT_MIN_UNIFORM_BUFFER_OFFSET_ALIGNMENT:
return limits.minUniformBufferOffsetAlignment;
case LIMIT_MAX_COMPUTE_WORKGROUP_COUNT_X:
return limits.maxComputeWorkGroupCount[0];
case LIMIT_MAX_COMPUTE_WORKGROUP_COUNT_Y:
return limits.maxComputeWorkGroupCount[1];
case LIMIT_MAX_COMPUTE_WORKGROUP_COUNT_Z:
return limits.maxComputeWorkGroupCount[2];
case LIMIT_MAX_COMPUTE_WORKGROUP_INVOCATIONS:
return limits.maxComputeWorkGroupInvocations;
case LIMIT_MAX_COMPUTE_WORKGROUP_SIZE_X:
return limits.maxComputeWorkGroupSize[0];
case LIMIT_MAX_COMPUTE_WORKGROUP_SIZE_Y:
return limits.maxComputeWorkGroupSize[1];
case LIMIT_MAX_COMPUTE_WORKGROUP_SIZE_Z:
return limits.maxComputeWorkGroupSize[2];
case LIMIT_MAX_VIEWPORT_DIMENSIONS_X:
return limits.maxViewportDimensions[0];
case LIMIT_MAX_VIEWPORT_DIMENSIONS_Y:
return limits.maxViewportDimensions[1];
case LIMIT_SUBGROUP_SIZE: {
VulkanContext::SubgroupCapabilities subgroup_capabilities = context->get_subgroup_capabilities();
return subgroup_capabilities.size;
}
case LIMIT_SUBGROUP_IN_SHADERS: {
VulkanContext::SubgroupCapabilities subgroup_capabilities = context->get_subgroup_capabilities();
return subgroup_capabilities.supported_stages_flags_rd();
}
case LIMIT_SUBGROUP_OPERATIONS: {
VulkanContext::SubgroupCapabilities subgroup_capabilities = context->get_subgroup_capabilities();
return subgroup_capabilities.supported_operations_flags_rd();
}
case LIMIT_VRS_TEXEL_WIDTH: {
return context->get_vrs_capabilities().texel_size.x;
}
case LIMIT_VRS_TEXEL_HEIGHT: {
return context->get_vrs_capabilities().texel_size.y;
}
default:
ERR_FAIL_V(0);
}
return 0;
}
void RenderingDeviceVulkan::finalize() {
// Free all resources.
_flush(false);
_free_rids(render_pipeline_owner, "Pipeline");
_free_rids(compute_pipeline_owner, "Compute");
_free_rids(uniform_set_owner, "UniformSet");
_free_rids(texture_buffer_owner, "TextureBuffer");
_free_rids(storage_buffer_owner, "StorageBuffer");
_free_rids(uniform_buffer_owner, "UniformBuffer");
_free_rids(shader_owner, "Shader");
_free_rids(index_array_owner, "IndexArray");
_free_rids(index_buffer_owner, "IndexBuffer");
_free_rids(vertex_array_owner, "VertexArray");
_free_rids(vertex_buffer_owner, "VertexBuffer");
_free_rids(framebuffer_owner, "Framebuffer");
_free_rids(sampler_owner, "Sampler");
{
// For textures it's a bit more difficult because they may be shared.
List<RID> owned;
texture_owner.get_owned_list(&owned);
if (owned.size()) {
if (owned.size() == 1) {
WARN_PRINT("1 RID of type \"Texture\" was leaked.");
} else {
WARN_PRINT(vformat("%d RIDs of type \"Texture\" were leaked.", owned.size()));
}
// Free shared first.
for (List<RID>::Element *E = owned.front(); E;) {
List<RID>::Element *N = E->next();
if (texture_is_shared(E->get())) {
#ifdef DEV_ENABLED
if (resource_names.has(E->get())) {
print_line(String(" - ") + resource_names[E->get()]);
}
#endif
free(E->get());
owned.erase(E);
}
E = N;
}
// Free non shared second, this will avoid an error trying to free unexisting textures due to dependencies.
for (const RID &E : owned) {
#ifdef DEV_ENABLED
if (resource_names.has(E)) {
print_line(String(" - ") + resource_names[E]);
}
#endif
free(E);
}
}
}
// Free everything pending.
for (int i = 0; i < frame_count; i++) {
int f = (frame + i) % frame_count;
_free_pending_resources(f);
vkDestroyCommandPool(device, frames[i].command_pool, nullptr);
vkDestroyQueryPool(device, frames[i].timestamp_pool, nullptr);
}
_update_pipeline_cache(true);
vkDestroyPipelineCache(device, pipelines_cache.cache_object, nullptr);
for (int i = 0; i < split_draw_list_allocators.size(); i++) {
vkDestroyCommandPool(device, split_draw_list_allocators[i].command_pool, nullptr);
}
frames.clear();
for (int i = 0; i < staging_buffer_blocks.size(); i++) {
vmaDestroyBuffer(allocator, staging_buffer_blocks[i].buffer, staging_buffer_blocks[i].allocation);
}
while (small_allocs_pools.size()) {
HashMap<uint32_t, VmaPool>::Iterator E = small_allocs_pools.begin();
vmaDestroyPool(allocator, E->value);
small_allocs_pools.remove(E);
}
vmaDestroyAllocator(allocator);
while (vertex_formats.size()) {
HashMap<VertexFormatID, VertexDescriptionCache>::Iterator temp = vertex_formats.begin();
memdelete_arr(temp->value.bindings);
memdelete_arr(temp->value.attributes);
vertex_formats.remove(temp);
}
for (KeyValue<FramebufferFormatID, FramebufferFormat> &E : framebuffer_formats) {
vkDestroyRenderPass(device, E.value.render_pass, nullptr);
}
framebuffer_formats.clear();
// All these should be clear at this point.
ERR_FAIL_COND(descriptor_pools.size());
ERR_FAIL_COND(dependency_map.size());
ERR_FAIL_COND(reverse_dependency_map.size());
}
RenderingDevice *RenderingDeviceVulkan::create_local_device() {
RenderingDeviceVulkan *rd = memnew(RenderingDeviceVulkan);
rd->initialize(context, true);
return rd;
}
bool RenderingDeviceVulkan::has_feature(const Features p_feature) const {
switch (p_feature) {
case SUPPORTS_MULTIVIEW: {
VulkanContext::MultiviewCapabilities multiview_capabilies = context->get_multiview_capabilities();
return multiview_capabilies.is_supported && multiview_capabilies.max_view_count > 1;
} break;
case SUPPORTS_FSR_HALF_FLOAT: {
return context->get_shader_capabilities().shader_float16_is_supported && context->get_physical_device_features().shaderInt16 && context->get_storage_buffer_capabilities().storage_buffer_16_bit_access_is_supported;
} break;
case SUPPORTS_ATTACHMENT_VRS: {
VulkanContext::VRSCapabilities vrs_capabilities = context->get_vrs_capabilities();
return vrs_capabilities.attachment_vrs_supported && context->get_physical_device_features().shaderStorageImageExtendedFormats;
} break;
case SUPPORTS_FRAGMENT_SHADER_WITH_ONLY_SIDE_EFFECTS: {
return true;
} break;
default: {
return false;
}
}
}
RenderingDeviceVulkan::RenderingDeviceVulkan() {
device_capabilities.device_family = DEVICE_VULKAN;
}
RenderingDeviceVulkan::~RenderingDeviceVulkan() {
if (local_device.is_valid()) {
finalize();
context->local_device_free(local_device);
}
}