From 0aa22b8f1309652353a1ca257f75ff6b3a0437c8 Mon Sep 17 00:00:00 2001 From: lawnjelly Date: Tue, 19 Dec 2023 16:41:30 +0000 Subject: [PATCH] Vertex cache optimizer Optimizes indices to make good use of vertex cache on GPU. --- core/math/vertex_cache_optimizer.cpp | 304 ++++++++++++++++++++++ core/math/vertex_cache_optimizer.h | 120 +++++++++ doc/classes/Mesh.xml | 4 + editor/import/resource_importer_obj.cpp | 8 +- editor/import/resource_importer_scene.cpp | 6 +- scene/resources/mesh.cpp | 1 + scene/resources/mesh.h | 1 + servers/visual_server.cpp | 14 + servers/visual_server.h | 1 + 9 files changed, 456 insertions(+), 3 deletions(-) create mode 100644 core/math/vertex_cache_optimizer.cpp create mode 100644 core/math/vertex_cache_optimizer.h diff --git a/core/math/vertex_cache_optimizer.cpp b/core/math/vertex_cache_optimizer.cpp new file mode 100644 index 00000000000..bb0e9090abb --- /dev/null +++ b/core/math/vertex_cache_optimizer.cpp @@ -0,0 +1,304 @@ +/**************************************************************************/ +/* vertex_cache_optimizer.cpp */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +#include "vertex_cache_optimizer.h" + +#include "core/math/math_funcs.h" + +// Precalculate the tables. +void VertexCacheOptimizer::init() { + for (int i = 0; i < Constants::CACHE_SCORE_TABLE_SIZE; i++) { + float score = 0; + if (i < 3) { + // This vertex was used in the last triangle, + // so it has a fixed score, which ever of the three + // it's in. Otherwise, you can get very different + // answers depending on whether you add + // the triangle 1,2,3 or 3,1,2 - which is silly. + score = Constants::LAST_TRI_SCORE; + } else { + // Points for being high in the cache. + const float scaler = 1.0f / (Constants::CACHE_FUNCTION_LENGTH - 3); + score = 1.0f - (i - 3) * scaler; + score = Math::pow(score, Constants::CACHE_DECAY_POWER); + } + _cache_position_score[i] = (SCORE_TYPE)(Constants::SCORE_SCALING * score); + } + + for (int i = 1; i < Constants::VALENCE_SCORE_TABLE_SIZE; i++) { + // Bonus points for having a low number of tris still to + // use the vert, so we get rid of lone verts quickly. + float valence_boost = Math::pow(i, -Constants::VALENCE_BOOST_POWER); + float score = Constants::VALENCE_BOOST_SCALE * valence_boost; + _valence_score[i] = (SCORE_TYPE)(Constants::SCORE_SCALING * score); + } +} + +VertexCacheOptimizer::SCORE_TYPE VertexCacheOptimizer::find_vertex_score(int p_num_active_tris, int p_cache_position) { + if (p_num_active_tris == 0) { + // No triangles need this vertex! + return 0; + } + + SCORE_TYPE score = 0; + if (p_cache_position < 0) { + // Vertex is not in LRU cache - no score. + } else { + score = _cache_position_score[p_cache_position]; + } + + if (p_num_active_tris < Constants::VALENCE_SCORE_TABLE_SIZE) { + score += _valence_score[p_num_active_tris]; + } + return score; +} + +VertexCacheOptimizer::VERTEX_INDEX_TYPE *VertexCacheOptimizer::_reorder_indices(VERTEX_INDEX_TYPE *r_dest_indices, const VERTEX_INDEX_TYPE *p_source_indices, int p_num_triangles, int p_num_vertices) { + ADJACENCY_TYPE *num_active_tris = (ADJACENCY_TYPE *)memalloc(sizeof(ADJACENCY_TYPE) * p_num_vertices); + memset(num_active_tris, 0, sizeof(ADJACENCY_TYPE) * p_num_vertices); + + // First scan over the vertex data, count the total number of + // occurrances of each vertex. + for (int i = 0; i < 3 * p_num_triangles; i++) { + if (num_active_tris[p_source_indices[i]] == Constants::MAX_ADJACENCY) { + // Unsupported mesh, + // vertex shared by too many triangles. + memfree(num_active_tris); + return nullptr; + } + num_active_tris[p_source_indices[i]]++; + } + + // Allocate the rest of the arrays. + ARRAY_INDEX_TYPE *offsets = (ARRAY_INDEX_TYPE *)memalloc(sizeof(ARRAY_INDEX_TYPE) * p_num_vertices); + SCORE_TYPE *last_score = (SCORE_TYPE *)memalloc(sizeof(SCORE_TYPE) * p_num_vertices); + CACHE_POS_TYPE *cache_tag = (CACHE_POS_TYPE *)memalloc(sizeof(CACHE_POS_TYPE) * p_num_vertices); + + uint8_t *triangle_added = (uint8_t *)memalloc((p_num_triangles + 7) / 8); + SCORE_TYPE *triangle_score = (SCORE_TYPE *)memalloc(sizeof(SCORE_TYPE) * p_num_triangles); + TRIANGLE_INDEX_TYPE *triangle_indices = (TRIANGLE_INDEX_TYPE *)memalloc(sizeof(TRIANGLE_INDEX_TYPE) * 3 * p_num_triangles); + memset(triangle_added, 0, sizeof(uint8_t) * ((p_num_triangles + 7) / 8)); + memset(triangle_score, 0, sizeof(SCORE_TYPE) * p_num_triangles); + memset(triangle_indices, 0, sizeof(TRIANGLE_INDEX_TYPE) * 3 * p_num_triangles); + + // Count the triangle array offset for each vertex, + // initialize the rest of the data. + int sum = 0; + for (int i = 0; i < p_num_vertices; i++) { + offsets[i] = sum; + sum += num_active_tris[i]; + num_active_tris[i] = 0; + cache_tag[i] = -1; + } + + // Fill the vertex data structures with indices to the triangles + // using each vertex. + for (int i = 0; i < p_num_triangles; i++) { + for (int j = 0; j < 3; j++) { + int v = p_source_indices[3 * i + j]; + triangle_indices[offsets[v] + num_active_tris[v]] = i; + num_active_tris[v]++; + } + } + + // Initialize the score for all vertices. + for (int i = 0; i < p_num_vertices; i++) { + last_score[i] = find_vertex_score(num_active_tris[i], cache_tag[i]); + for (int j = 0; j < num_active_tris[i]; j++) { + triangle_score[triangle_indices[offsets[i] + j]] += last_score[i]; + } + } + + // Find the best triangle. + int best_triangle = -1; + int best_score = -1; + + for (int i = 0; i < p_num_triangles; i++) { + if (triangle_score[i] > best_score) { + best_score = triangle_score[i]; + best_triangle = i; + } + } + + // Allocate the output array. + TRIANGLE_INDEX_TYPE *out_triangles = (TRIANGLE_INDEX_TYPE *)memalloc(sizeof(TRIANGLE_INDEX_TYPE) * p_num_triangles); + int out_pos = 0; + + // Initialize the cache. + int cache[Constants::VERTEX_CACHE_SIZE + 3]; + for (int i = 0; i < Constants::VERTEX_CACHE_SIZE + 3; i++) { + cache[i] = -1; + } + + int scan_pos = 0; + + // Output the currently best triangle, as long as there + // are triangles left to output. + while (best_triangle >= 0) { + // Mark the triangle as added. + set_added(triangle_added, best_triangle); + // Output this triangle. + out_triangles[out_pos++] = best_triangle; + for (int i = 0; i < 3; i++) { + // Update this vertex. + int v = p_source_indices[3 * best_triangle + i]; + + // Check the current cache position, if it + // is in the cache. + int endpos = cache_tag[v]; + if (endpos < 0) { + endpos = Constants::VERTEX_CACHE_SIZE + i; + } + if (endpos > i) { + // Move all cache entries from the previous position + // in the cache to the new target position (i) one + // step backwards. + for (int j = endpos; j > i; j--) { + cache[j] = cache[j - 1]; + // If this cache slot contains a real + // vertex, update its cache tag. + if (cache[j] >= 0) { + cache_tag[cache[j]]++; + } + } + // Insert the current vertex into its new target + // slot. + cache[i] = v; + cache_tag[v] = i; + } + + // Find the current triangle in the list of active + // triangles and remove it (moving the last + // triangle in the list to the slot of this triangle). + for (int j = 0; j < num_active_tris[v]; j++) { + if (triangle_indices[offsets[v] + j] == best_triangle) { + triangle_indices[offsets[v] + j] = triangle_indices[offsets[v] + num_active_tris[v] - 1]; + break; + } + } + // Shorten the list. + num_active_tris[v]--; + } + // Update the scores of all triangles in the cache. + for (int i = 0; i < Constants::VERTEX_CACHE_SIZE + 3; i++) { + int v = cache[i]; + if (v < 0) { + break; + } + // This vertex has been pushed outside of the + // actual cache. + if (i >= Constants::VERTEX_CACHE_SIZE) { + cache_tag[v] = -1; + cache[i] = -1; + } + SCORE_TYPE newScore = find_vertex_score(num_active_tris[v], cache_tag[v]); + SCORE_TYPE diff = newScore - last_score[v]; + for (int j = 0; j < num_active_tris[v]; j++) { + triangle_score[triangle_indices[offsets[v] + j]] += diff; + } + last_score[v] = newScore; + } + // Find the best triangle referenced by vertices in the cache. + best_triangle = -1; + best_score = -1; + for (int i = 0; i < Constants::VERTEX_CACHE_SIZE; i++) { + if (cache[i] < 0) { + break; + } + int v = cache[i]; + for (int j = 0; j < num_active_tris[v]; j++) { + int t = triangle_indices[offsets[v] + j]; + if (triangle_score[t] > best_score) { + best_triangle = t; + best_score = triangle_score[t]; + } + } + } + // If no active triangle was found at all, continue + // scanning the whole list of triangles. + if (best_triangle < 0) { + for (; scan_pos < p_num_triangles; scan_pos++) { + if (!is_added(triangle_added, scan_pos)) { + best_triangle = scan_pos; + break; + } + } + } + } + + // Convert the triangle index array into a full triangle list. + out_pos = 0; + for (int i = 0; i < p_num_triangles; i++) { + int t = out_triangles[i]; + for (int j = 0; j < 3; j++) { + int v = p_source_indices[3 * t + j]; + r_dest_indices[out_pos++] = v; + } + } + + // Clean up. + memfree(triangle_indices); + memfree(offsets); + memfree(last_score); + memfree(num_active_tris); + memfree(cache_tag); + memfree(triangle_added); + memfree(triangle_score); + memfree(out_triangles); + + return r_dest_indices; +} + +bool VertexCacheOptimizer::reorder_indices_pool(PoolVector &r_indices, uint32_t p_num_triangles, uint32_t p_num_verts) { + LocalVector temp; + temp = r_indices; + if (reorder_indices(temp, p_num_triangles, p_num_verts)) { + r_indices = temp; + return true; + } + return false; +} + +bool VertexCacheOptimizer::reorder_indices(LocalVector &r_indices, uint32_t p_num_triangles, uint32_t p_num_verts) { + LocalVector temp; + temp.resize(r_indices.size()); + if (_reorder_indices((VERTEX_INDEX_TYPE *)temp.ptr(), (VERTEX_INDEX_TYPE *)r_indices.ptr(), p_num_triangles, p_num_verts)) { +#if 0 + uint32_t show = MIN(r_indices.size(), 16); + for (uint32_t n = 0; n < show; n++) { + print_line(itos(n) + " : " + itos(r_indices[n]) + " to " + itos(temp[n])); + } +#endif + + r_indices = temp; + return true; + } + return false; +} diff --git a/core/math/vertex_cache_optimizer.h b/core/math/vertex_cache_optimizer.h new file mode 100644 index 00000000000..31f8c6f6bef --- /dev/null +++ b/core/math/vertex_cache_optimizer.h @@ -0,0 +1,120 @@ +/**************************************************************************/ +/* vertex_cache_optimizer.h */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +#ifndef VERTEX_CACHE_OPTIMIZER_H +#define VERTEX_CACHE_OPTIMIZER_H + +// This class is derived from +// https://www.martin.st/thesis/ +// Based on Tom Forsyth's vertex cache optimizer + +/* + Copyright (C) 2008 Martin Storsjo + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. +*/ + +#include "core/local_vector.h" +#include "core/math/math_defs.h" + +#include + +class VertexCacheOptimizer { + typedef uint32_t VERTEX_INDEX_TYPE; + + // The size of these data types affect the memory usage. + typedef uint16_t SCORE_TYPE; + typedef uint8_t ADJACENCY_TYPE; + typedef int8_t CACHE_POS_TYPE; + typedef int32_t TRIANGLE_INDEX_TYPE; + typedef int32_t ARRAY_INDEX_TYPE; + + struct Constants { + // The size of the precalculated tables. + static const int CACHE_SCORE_TABLE_SIZE = 32; + static const int VALENCE_SCORE_TABLE_SIZE = 32; + static const int MAX_ADJACENCY = UINT8_MAX; + static const int SCORE_SCALING = 7281; + + // Score function constants. + static constexpr float CACHE_DECAY_POWER = 1.5; + static constexpr float LAST_TRI_SCORE = 0.75; + static constexpr float VALENCE_BOOST_SCALE = 2.0; + static constexpr float VALENCE_BOOST_POWER = 0.5; + + // Set these to adjust the performance and result quality. + static const int VERTEX_CACHE_SIZE = 24; + static const int CACHE_FUNCTION_LENGTH = 32; + + static_assert(CACHE_SCORE_TABLE_SIZE >= VERTEX_CACHE_SIZE, "Vertex score table too small"); + }; + + // Precalculated tables. + SCORE_TYPE _cache_position_score[Constants::CACHE_SCORE_TABLE_SIZE]; + SCORE_TYPE _valence_score[Constants::VALENCE_SCORE_TABLE_SIZE]; + + int is_added(const uint8_t *p_triangle_added, int p_x) const { + return p_triangle_added[(p_x) >> 3] & (1 << (p_x & 7)); + } + + void set_added(uint8_t *p_triangle_added, int p_x) const { + p_triangle_added[(p_x) >> 3] |= (1 << (p_x & 7)); + } + + // Precalculate the tables. + void init(); + + // Calculate the score for a vertex. + SCORE_TYPE find_vertex_score(int p_num_active_tris, int p_cache_position); + + // The main reordering function. + VERTEX_INDEX_TYPE *_reorder_indices(VERTEX_INDEX_TYPE *r_dest_indices, const VERTEX_INDEX_TYPE *p_source_indices, int p_num_triangles, int p_num_vertices); + +public: + VertexCacheOptimizer() { + init(); + } + + bool reorder_indices(LocalVector &r_indices, uint32_t p_num_triangles, uint32_t p_num_verts); + bool reorder_indices_pool(PoolVector &r_indices, uint32_t p_num_triangles, uint32_t p_num_verts); +}; + +#endif // VERTEX_CACHE_OPTIMIZER_H diff --git a/doc/classes/Mesh.xml b/doc/classes/Mesh.xml index 950de4e90e3..e93198dd184 100644 --- a/doc/classes/Mesh.xml +++ b/doc/classes/Mesh.xml @@ -192,6 +192,10 @@ Flag used to mark that the array uses an octahedral representation of normal and tangent vectors rather than cartesian. + + Flag used to request vertex cache optimization. + This re-orders indices in order to make best use of GPU vertex caches, which can improve rendering performance particularly with high poly models. + Used to set flags [constant ARRAY_COMPRESS_VERTEX], [constant ARRAY_COMPRESS_NORMAL], [constant ARRAY_COMPRESS_TANGENT], [constant ARRAY_COMPRESS_COLOR], [constant ARRAY_COMPRESS_TEX_UV], [constant ARRAY_COMPRESS_TEX_UV2], [constant ARRAY_COMPRESS_WEIGHTS], and [constant ARRAY_FLAG_USE_OCTAHEDRAL_COMPRESSION] quickly. [b]Note:[/b] Since this flag enables [constant ARRAY_COMPRESS_COLOR], vertex colors will be stored as 8-bit unsigned integers. This will clamp overbright colors to [code]Color(1, 1, 1, 1)[/code] and reduce colors' precision. diff --git a/editor/import/resource_importer_obj.cpp b/editor/import/resource_importer_obj.cpp index fcd3793c706..bdcc45c0c91 100644 --- a/editor/import/resource_importer_obj.cpp +++ b/editor/import/resource_importer_obj.cpp @@ -495,11 +495,12 @@ String ResourceImporterOBJ::get_preset_name(int p_idx) const { } void ResourceImporterOBJ::get_import_options(List *r_options, int p_preset) const { - r_options->push_back(ImportOption(PropertyInfo(Variant::BOOL, "generate_tangents"), true)); r_options->push_back(ImportOption(PropertyInfo(Variant::VECTOR3, "scale_mesh"), Vector3(1, 1, 1))); r_options->push_back(ImportOption(PropertyInfo(Variant::VECTOR3, "offset_mesh"), Vector3(0, 0, 0))); - r_options->push_back(ImportOption(PropertyInfo(Variant::BOOL, "octahedral_compression"), true)); r_options->push_back(ImportOption(PropertyInfo(Variant::INT, "optimize_mesh_flags", PROPERTY_HINT_FLAGS, "Vertex,Normal,Tangent,Color,TexUV,TexUV2,Bones,Weights,Index"), VS::ARRAY_COMPRESS_DEFAULT >> VS::ARRAY_COMPRESS_BASE)); + r_options->push_back(ImportOption(PropertyInfo(Variant::BOOL, "generate_tangents"), true)); + r_options->push_back(ImportOption(PropertyInfo(Variant::BOOL, "vertex_cache_optimization"), true)); + r_options->push_back(ImportOption(PropertyInfo(Variant::BOOL, "octahedral_compression"), true)); } bool ResourceImporterOBJ::get_option_visibility(const String &p_option, const Map &p_options) const { return true; @@ -512,6 +513,9 @@ Error ResourceImporterOBJ::import(const String &p_source_file, const String &p_s if (bool(p_options["octahedral_compression"])) { compress_flags |= VS::ARRAY_FLAG_USE_OCTAHEDRAL_COMPRESSION; } + if (bool(p_options["vertex_cache_optimization"])) { + compress_flags |= VS::ARRAY_FLAG_USE_VERTEX_CACHE_OPTIMIZATION; + } Error err = _parse_obj(p_source_file, meshes, true, p_options["generate_tangents"], compress_flags, p_options["scale_mesh"], p_options["offset_mesh"], nullptr); ERR_FAIL_COND_V(err != OK, err); diff --git a/editor/import/resource_importer_scene.cpp b/editor/import/resource_importer_scene.cpp index d8dd3bbc4c0..a5862664f21 100644 --- a/editor/import/resource_importer_scene.cpp +++ b/editor/import/resource_importer_scene.cpp @@ -1109,9 +1109,10 @@ void ResourceImporterScene::get_import_options(List *r_options, in r_options->push_back(ImportOption(PropertyInfo(Variant::INT, "materials/location", PROPERTY_HINT_ENUM, "Node,Mesh"), (meshes_out || materials_out) ? 1 : 0)); r_options->push_back(ImportOption(PropertyInfo(Variant::INT, "materials/storage", PROPERTY_HINT_ENUM, "Built-In,Files (.material),Files (.tres)", PROPERTY_USAGE_DEFAULT | PROPERTY_USAGE_UPDATE_ALL_IF_MODIFIED), materials_out ? 1 : 0)); r_options->push_back(ImportOption(PropertyInfo(Variant::BOOL, "materials/keep_on_reimport"), materials_out)); - r_options->push_back(ImportOption(PropertyInfo(Variant::BOOL, "meshes/octahedral_compression"), true)); r_options->push_back(ImportOption(PropertyInfo(Variant::INT, "meshes/compress", PROPERTY_HINT_FLAGS, "Vertex,Normal,Tangent,Color,TexUV,TexUV2,Bones,Weights,Index"), VS::ARRAY_COMPRESS_DEFAULT >> VS::ARRAY_COMPRESS_BASE)); r_options->push_back(ImportOption(PropertyInfo(Variant::BOOL, "meshes/ensure_tangents"), true)); + r_options->push_back(ImportOption(PropertyInfo(Variant::BOOL, "meshes/octahedral_compression"), true)); + r_options->push_back(ImportOption(PropertyInfo(Variant::BOOL, "meshes/vertex_cache_optimization"), true)); r_options->push_back(ImportOption(PropertyInfo(Variant::INT, "meshes/storage", PROPERTY_HINT_ENUM, "Built-In,Files (.mesh),Files (.tres)"), meshes_out ? 1 : 0)); r_options->push_back(ImportOption(PropertyInfo(Variant::INT, "meshes/light_baking", PROPERTY_HINT_ENUM, "Disabled,Enable,Gen Lightmaps", PROPERTY_USAGE_DEFAULT | PROPERTY_USAGE_UPDATE_ALL_IF_MODIFIED), 0)); r_options->push_back(ImportOption(PropertyInfo(Variant::REAL, "meshes/lightmap_texel_size", PROPERTY_HINT_RANGE, "0.001,100,0.001"), 0.1)); @@ -1257,6 +1258,9 @@ Error ResourceImporterScene::import(const String &p_source_file, const String &p if (bool(p_options["meshes/octahedral_compression"])) { compress_flags |= VS::ARRAY_FLAG_USE_OCTAHEDRAL_COMPRESSION; } + if (bool(p_options["meshes/vertex_cache_optimization"])) { + compress_flags |= VS::ARRAY_FLAG_USE_VERTEX_CACHE_OPTIMIZATION; + } if (bool(p_options["meshes/ensure_tangents"])) { import_flags |= EditorSceneImporter::IMPORT_GENERATE_TANGENT_ARRAYS; } diff --git a/scene/resources/mesh.cpp b/scene/resources/mesh.cpp index d83a3461c4d..f2f723f2143 100644 --- a/scene/resources/mesh.cpp +++ b/scene/resources/mesh.cpp @@ -603,6 +603,7 @@ void Mesh::_bind_methods() { BIND_ENUM_CONSTANT(ARRAY_FLAG_USE_2D_VERTICES); BIND_ENUM_CONSTANT(ARRAY_FLAG_USE_16_BIT_BONES); BIND_ENUM_CONSTANT(ARRAY_FLAG_USE_OCTAHEDRAL_COMPRESSION); + BIND_ENUM_CONSTANT(ARRAY_FLAG_USE_VERTEX_CACHE_OPTIMIZATION); BIND_ENUM_CONSTANT(ARRAY_COMPRESS_DEFAULT); diff --git a/scene/resources/mesh.h b/scene/resources/mesh.h index f40ad164ab7..dab429c538c 100644 --- a/scene/resources/mesh.h +++ b/scene/resources/mesh.h @@ -98,6 +98,7 @@ public: ARRAY_FLAG_USE_16_BIT_BONES = ARRAY_COMPRESS_INDEX << 2, ARRAY_FLAG_USE_DYNAMIC_UPDATE = ARRAY_COMPRESS_INDEX << 3, ARRAY_FLAG_USE_OCTAHEDRAL_COMPRESSION = ARRAY_COMPRESS_INDEX << 4, + ARRAY_FLAG_USE_VERTEX_CACHE_OPTIMIZATION = ARRAY_COMPRESS_INDEX << 5, ARRAY_COMPRESS_DEFAULT = ARRAY_COMPRESS_NORMAL | ARRAY_COMPRESS_TANGENT | ARRAY_COMPRESS_COLOR | ARRAY_COMPRESS_TEX_UV | ARRAY_COMPRESS_TEX_UV2 | ARRAY_COMPRESS_WEIGHTS | ARRAY_FLAG_USE_OCTAHEDRAL_COMPRESSION diff --git a/servers/visual_server.cpp b/servers/visual_server.cpp index 2b53cb09a95..514cfef8b43 100644 --- a/servers/visual_server.cpp +++ b/servers/visual_server.cpp @@ -31,6 +31,7 @@ #include "visual_server.h" #include "core/engine.h" +#include "core/math/vertex_cache_optimizer.h" #include "core/method_bind_ext.gen.inc" #include "core/project_settings.h" @@ -768,6 +769,14 @@ Error VisualServer::_surface_set_data(Array p_arrays, uint32_t p_format, uint32_ ERR_FAIL_COND_V(indices.size() == 0, ERR_INVALID_PARAMETER); ERR_FAIL_COND_V(indices.size() != p_index_array_len, ERR_INVALID_PARAMETER); + // Vertex cache optimization? + if (p_format & ARRAY_FLAG_USE_VERTEX_CACHE_OPTIMIZATION) { + // Expecting triangles. + ERR_FAIL_COND_V((indices.size() % 3) != 0, ERR_INVALID_PARAMETER); + VertexCacheOptimizer opt; + opt.reorder_indices_pool(indices, indices.size() / 3, p_vertex_array_len); + } + /* determine whether using 16 or 32 bits indices */ PoolVector::Read read = indices.read(); @@ -1276,6 +1285,11 @@ void VisualServer::mesh_add_surface_from_arrays(RID p_mesh, PrimitiveType p_prim int index_array_len = 0; int array_len = 0; + // Only implemented for triangles. + if (p_primitive != PrimitiveType::PRIMITIVE_TRIANGLES) { + p_compress_format &= ~ARRAY_FLAG_USE_VERTEX_CACHE_OPTIMIZATION; + } + bool res = _mesh_find_format(p_primitive, p_arrays, p_blend_shapes, p_compress_format, use_split_stream, offsets, attributes_base_offset, attributes_stride, positions_stride, format, index_array_len, array_len); ERR_FAIL_COND(!res); diff --git a/servers/visual_server.h b/servers/visual_server.h index fd34fe13938..0d8991fcf4a 100644 --- a/servers/visual_server.h +++ b/servers/visual_server.h @@ -273,6 +273,7 @@ public: ARRAY_FLAG_USE_16_BIT_BONES = ARRAY_COMPRESS_INDEX << 2, ARRAY_FLAG_USE_DYNAMIC_UPDATE = ARRAY_COMPRESS_INDEX << 3, ARRAY_FLAG_USE_OCTAHEDRAL_COMPRESSION = ARRAY_COMPRESS_INDEX << 4, + ARRAY_FLAG_USE_VERTEX_CACHE_OPTIMIZATION = ARRAY_COMPRESS_INDEX << 5, ARRAY_COMPRESS_DEFAULT = ARRAY_COMPRESS_NORMAL | ARRAY_COMPRESS_TANGENT | ARRAY_COMPRESS_COLOR | ARRAY_COMPRESS_TEX_UV | ARRAY_COMPRESS_TEX_UV2 | ARRAY_COMPRESS_WEIGHTS | ARRAY_FLAG_USE_OCTAHEDRAL_COMPRESSION