From cf9df3b5d49a80e8adad87dc362e479cc1d7d436 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Verschelde?= Date: Thu, 22 Dec 2022 16:22:33 +0100 Subject: [PATCH] meshoptimizer: Sync with upstream commit 4a287848f https://github.com/zeux/meshoptimizer/commit/4a287848fd664ae1c3fc8e5e008560534ceeb526 --- COPYRIGHT.txt | 2 +- scene/3d/occluder_instance_3d.cpp | 5 +- scene/resources/importer_mesh.cpp | 2 + scene/resources/surface_tool.cpp | 3 +- scene/resources/surface_tool.h | 9 +- thirdparty/README.md | 2 +- thirdparty/meshoptimizer/LICENSE.md | 2 +- thirdparty/meshoptimizer/clusterizer.cpp | 152 +++++++++++------- thirdparty/meshoptimizer/indexgenerator.cpp | 6 +- thirdparty/meshoptimizer/meshoptimizer.h | 70 ++++---- thirdparty/meshoptimizer/overdrawanalyzer.cpp | 2 +- .../meshoptimizer/overdrawoptimizer.cpp | 2 +- ...-aware-simplify-distance-only-metric.patch | 30 ++-- .../patches/attribute-aware-simplify.patch | 49 +++--- thirdparty/meshoptimizer/simplifier.cpp | 24 +-- thirdparty/meshoptimizer/spatialorder.cpp | 4 +- thirdparty/meshoptimizer/vcacheoptimizer.cpp | 28 ++-- thirdparty/meshoptimizer/vertexcodec.cpp | 88 ++++++++-- thirdparty/meshoptimizer/vertexfilter.cpp | 2 +- 19 files changed, 295 insertions(+), 187 deletions(-) diff --git a/COPYRIGHT.txt b/COPYRIGHT.txt index 8a966dcb055..a3e06c545fc 100644 --- a/COPYRIGHT.txt +++ b/COPYRIGHT.txt @@ -288,7 +288,7 @@ License: Apache-2.0 Files: ./thirdparty/meshoptimizer/ Comment: meshoptimizer -Copyright: 2016-2021, Arseny Kapoulkine +Copyright: 2016-2022, Arseny Kapoulkine License: Expat Files: ./thirdparty/minimp3/ diff --git a/scene/3d/occluder_instance_3d.cpp b/scene/3d/occluder_instance_3d.cpp index 632b27953fb..594580a205b 100644 --- a/scene/3d/occluder_instance_3d.cpp +++ b/scene/3d/occluder_instance_3d.cpp @@ -542,13 +542,14 @@ void OccluderInstance3D::_bake_surface(const Transform3D &p_transform, Array p_s float error = -1.0f; int target_index_count = MIN(indices.size(), 36); + const int simplify_options = SurfaceTool::SIMPLIFY_LOCK_BORDER; + uint32_t index_count = SurfaceTool::simplify_func( (unsigned int *)indices.ptrw(), (unsigned int *)indices.ptr(), indices.size(), vertices_f32.ptr(), vertices.size(), sizeof(float) * 3, - target_index_count, target_error, &error); - + target_index_count, target_error, simplify_options, &error); indices.resize(index_count); } diff --git a/scene/resources/importer_mesh.cpp b/scene/resources/importer_mesh.cpp index 742ac2bbd91..672581bbe25 100644 --- a/scene/resources/importer_mesh.cpp +++ b/scene/resources/importer_mesh.cpp @@ -452,6 +452,7 @@ void ImporterMesh::generate_lods(float p_normal_merge_angle, float p_normal_spli new_indices.resize(index_count); Vector merged_normals_f32 = vector3_to_float32_array(merged_normals.ptr(), merged_normals.size()); + const int simplify_options = SurfaceTool::SIMPLIFY_LOCK_BORDER; size_t new_index_count = SurfaceTool::simplify_with_attrib_func( (unsigned int *)new_indices.ptrw(), @@ -460,6 +461,7 @@ void ImporterMesh::generate_lods(float p_normal_merge_angle, float p_normal_spli sizeof(float) * 3, // Vertex stride index_target, max_mesh_error, + simplify_options, &mesh_error, merged_normals_f32.ptr(), normal_weights.ptr(), 3); diff --git a/scene/resources/surface_tool.cpp b/scene/resources/surface_tool.cpp index 5a2b917b9a9..16cc1c33701 100644 --- a/scene/resources/surface_tool.cpp +++ b/scene/resources/surface_tool.cpp @@ -1307,7 +1307,8 @@ Vector SurfaceTool::generate_lod(float p_threshold, int p_target_index_coun } float error; - uint32_t index_count = simplify_func((unsigned int *)lod.ptrw(), (unsigned int *)index_array.ptr(), index_array.size(), vertices.ptr(), vertex_array.size(), sizeof(float) * 3, p_target_index_count, p_threshold, &error); + const int simplify_options = SIMPLIFY_LOCK_BORDER; + uint32_t index_count = simplify_func((unsigned int *)lod.ptrw(), (unsigned int *)index_array.ptr(), index_array.size(), vertices.ptr(), vertex_array.size(), sizeof(float) * 3, p_target_index_count, p_threshold, simplify_options, &error); ERR_FAIL_COND_V(index_count == 0, lod); lod.resize(index_count); diff --git a/scene/resources/surface_tool.h b/scene/resources/surface_tool.h index 00438c4a534..77318bb061f 100644 --- a/scene/resources/surface_tool.h +++ b/scene/resources/surface_tool.h @@ -74,11 +74,16 @@ public: SKIN_8_WEIGHTS }; + enum { + /* Do not move vertices that are located on the topological border (vertices on triangle edges that don't have a paired triangle). Useful for simplifying portions of the larger mesh. */ + SIMPLIFY_LOCK_BORDER = 1 << 0, // From meshopt_SimplifyLockBorder + }; + typedef void (*OptimizeVertexCacheFunc)(unsigned int *destination, const unsigned int *indices, size_t index_count, size_t vertex_count); static OptimizeVertexCacheFunc optimize_vertex_cache_func; - typedef size_t (*SimplifyFunc)(unsigned int *destination, const unsigned int *indices, size_t index_count, const float *vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, float *r_error); + typedef size_t (*SimplifyFunc)(unsigned int *destination, const unsigned int *indices, size_t index_count, const float *vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, unsigned int options, float *r_error); static SimplifyFunc simplify_func; - typedef size_t (*SimplifyWithAttribFunc)(unsigned int *destination, const unsigned int *indices, size_t index_count, const float *vertex_data, size_t vertex_count, size_t vertex_stride, size_t target_index_count, float target_error, float *result_error, const float *attributes, const float *attribute_weights, size_t attribute_count); + typedef size_t (*SimplifyWithAttribFunc)(unsigned int *destination, const unsigned int *indices, size_t index_count, const float *vertex_data, size_t vertex_count, size_t vertex_stride, size_t target_index_count, float target_error, unsigned int options, float *result_error, const float *attributes, const float *attribute_weights, size_t attribute_count); static SimplifyWithAttribFunc simplify_with_attrib_func; typedef float (*SimplifyScaleFunc)(const float *vertex_positions, size_t vertex_count, size_t vertex_positions_stride); static SimplifyScaleFunc simplify_scale_func; diff --git a/thirdparty/README.md b/thirdparty/README.md index 38001b87822..38ace2c2e3d 100644 --- a/thirdparty/README.md +++ b/thirdparty/README.md @@ -376,7 +376,7 @@ File extracted from upstream release tarball: ## meshoptimizer - Upstream: https://github.com/zeux/meshoptimizer -- Version: git (ea4558d1c0f217f1d67ed7fe0b07896ece88ae18, 2022) +- Version: git (4a287848fd664ae1c3fc8e5e008560534ceeb526, 2022) - License: MIT Files extracted from upstream repository: diff --git a/thirdparty/meshoptimizer/LICENSE.md b/thirdparty/meshoptimizer/LICENSE.md index 3c52415f625..b673c248b22 100644 --- a/thirdparty/meshoptimizer/LICENSE.md +++ b/thirdparty/meshoptimizer/LICENSE.md @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2016-2021 Arseny Kapoulkine +Copyright (c) 2016-2022 Arseny Kapoulkine Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/thirdparty/meshoptimizer/clusterizer.cpp b/thirdparty/meshoptimizer/clusterizer.cpp index b1f7b359c18..c4672ad6063 100644 --- a/thirdparty/meshoptimizer/clusterizer.cpp +++ b/thirdparty/meshoptimizer/clusterizer.cpp @@ -283,6 +283,79 @@ static bool appendMeshlet(meshopt_Meshlet& meshlet, unsigned int a, unsigned int return result; } +static unsigned int getNeighborTriangle(const meshopt_Meshlet& meshlet, const Cone* meshlet_cone, unsigned int* meshlet_vertices, const unsigned int* indices, const TriangleAdjacency2& adjacency, const Cone* triangles, const unsigned int* live_triangles, const unsigned char* used, float meshlet_expected_radius, float cone_weight, unsigned int* out_extra) +{ + unsigned int best_triangle = ~0u; + unsigned int best_extra = 5; + float best_score = FLT_MAX; + + for (size_t i = 0; i < meshlet.vertex_count; ++i) + { + unsigned int index = meshlet_vertices[meshlet.vertex_offset + i]; + + unsigned int* neighbors = &adjacency.data[0] + adjacency.offsets[index]; + size_t neighbors_size = adjacency.counts[index]; + + for (size_t j = 0; j < neighbors_size; ++j) + { + unsigned int triangle = neighbors[j]; + unsigned int a = indices[triangle * 3 + 0], b = indices[triangle * 3 + 1], c = indices[triangle * 3 + 2]; + + unsigned int extra = (used[a] == 0xff) + (used[b] == 0xff) + (used[c] == 0xff); + + // triangles that don't add new vertices to meshlets are max. priority + if (extra != 0) + { + // artificially increase the priority of dangling triangles as they're expensive to add to new meshlets + if (live_triangles[a] == 1 || live_triangles[b] == 1 || live_triangles[c] == 1) + extra = 0; + + extra++; + } + + // since topology-based priority is always more important than the score, we can skip scoring in some cases + if (extra > best_extra) + continue; + + float score = 0; + + // caller selects one of two scoring functions: geometrical (based on meshlet cone) or topological (based on remaining triangles) + if (meshlet_cone) + { + const Cone& tri_cone = triangles[triangle]; + + float distance2 = + (tri_cone.px - meshlet_cone->px) * (tri_cone.px - meshlet_cone->px) + + (tri_cone.py - meshlet_cone->py) * (tri_cone.py - meshlet_cone->py) + + (tri_cone.pz - meshlet_cone->pz) * (tri_cone.pz - meshlet_cone->pz); + + float spread = tri_cone.nx * meshlet_cone->nx + tri_cone.ny * meshlet_cone->ny + tri_cone.nz * meshlet_cone->nz; + + score = getMeshletScore(distance2, spread, cone_weight, meshlet_expected_radius); + } + else + { + // each live_triangles entry is >= 1 since it includes the current triangle we're processing + score = float(live_triangles[a] + live_triangles[b] + live_triangles[c] - 3); + } + + // note that topology-based priority is always more important than the score + // this helps maintain reasonable effectiveness of meshlet data and reduces scoring cost + if (extra < best_extra || score < best_score) + { + best_triangle = triangle; + best_extra = extra; + best_score = score; + } + } + } + + if (out_extra) + *out_extra = best_extra; + + return best_triangle; +} + struct KDNode { union @@ -464,13 +537,15 @@ size_t meshopt_buildMeshlets(meshopt_Meshlet* meshlets, unsigned int* meshlet_ve using namespace meshopt; assert(index_count % 3 == 0); - assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256); + assert(vertex_positions_stride >= 12 && vertex_positions_stride <= 256); assert(vertex_positions_stride % sizeof(float) == 0); assert(max_vertices >= 3 && max_vertices <= kMeshletMaxVertices); assert(max_triangles >= 1 && max_triangles <= kMeshletMaxTriangles); assert(max_triangles % 4 == 0); // ensures the caller will compute output space properly as index data is 4b aligned + assert(cone_weight >= 0 && cone_weight <= 1); + meshopt_Allocator allocator; TriangleAdjacency2 adjacency = {}; @@ -511,65 +586,18 @@ size_t meshopt_buildMeshlets(meshopt_Meshlet* meshlets, unsigned int* meshlet_ve for (;;) { - unsigned int best_triangle = ~0u; - unsigned int best_extra = 5; - float best_score = FLT_MAX; - Cone meshlet_cone = getMeshletCone(meshlet_cone_acc, meshlet.triangle_count); - for (size_t i = 0; i < meshlet.vertex_count; ++i) + unsigned int best_extra = 0; + unsigned int best_triangle = getNeighborTriangle(meshlet, &meshlet_cone, meshlet_vertices, indices, adjacency, triangles, live_triangles, used, meshlet_expected_radius, cone_weight, &best_extra); + + // if the best triangle doesn't fit into current meshlet, the spatial scoring we've used is not very meaningful, so we re-select using topological scoring + if (best_triangle != ~0u && (meshlet.vertex_count + best_extra > max_vertices || meshlet.triangle_count >= max_triangles)) { - unsigned int index = meshlet_vertices[meshlet.vertex_offset + i]; - - unsigned int* neighbours = &adjacency.data[0] + adjacency.offsets[index]; - size_t neighbours_size = adjacency.counts[index]; - - for (size_t j = 0; j < neighbours_size; ++j) - { - unsigned int triangle = neighbours[j]; - assert(!emitted_flags[triangle]); - - unsigned int a = indices[triangle * 3 + 0], b = indices[triangle * 3 + 1], c = indices[triangle * 3 + 2]; - assert(a < vertex_count && b < vertex_count && c < vertex_count); - - unsigned int extra = (used[a] == 0xff) + (used[b] == 0xff) + (used[c] == 0xff); - - // triangles that don't add new vertices to meshlets are max. priority - if (extra != 0) - { - // artificially increase the priority of dangling triangles as they're expensive to add to new meshlets - if (live_triangles[a] == 1 || live_triangles[b] == 1 || live_triangles[c] == 1) - extra = 0; - - extra++; - } - - // since topology-based priority is always more important than the score, we can skip scoring in some cases - if (extra > best_extra) - continue; - - const Cone& tri_cone = triangles[triangle]; - - float distance2 = - (tri_cone.px - meshlet_cone.px) * (tri_cone.px - meshlet_cone.px) + - (tri_cone.py - meshlet_cone.py) * (tri_cone.py - meshlet_cone.py) + - (tri_cone.pz - meshlet_cone.pz) * (tri_cone.pz - meshlet_cone.pz); - - float spread = tri_cone.nx * meshlet_cone.nx + tri_cone.ny * meshlet_cone.ny + tri_cone.nz * meshlet_cone.nz; - - float score = getMeshletScore(distance2, spread, cone_weight, meshlet_expected_radius); - - // note that topology-based priority is always more important than the score - // this helps maintain reasonable effectiveness of meshlet data and reduces scoring cost - if (extra < best_extra || score < best_score) - { - best_triangle = triangle; - best_extra = extra; - best_score = score; - } - } + best_triangle = getNeighborTriangle(meshlet, NULL, meshlet_vertices, indices, adjacency, triangles, live_triangles, used, meshlet_expected_radius, 0.f, NULL); } + // when we run out of neighboring triangles we need to switch to spatial search; we currently just pick the closest triangle irrespective of connectivity if (best_triangle == ~0u) { float position[3] = {meshlet_cone.px, meshlet_cone.py, meshlet_cone.pz}; @@ -604,16 +632,16 @@ size_t meshopt_buildMeshlets(meshopt_Meshlet* meshlets, unsigned int* meshlet_ve { unsigned int index = indices[best_triangle * 3 + k]; - unsigned int* neighbours = &adjacency.data[0] + adjacency.offsets[index]; - size_t neighbours_size = adjacency.counts[index]; + unsigned int* neighbors = &adjacency.data[0] + adjacency.offsets[index]; + size_t neighbors_size = adjacency.counts[index]; - for (size_t i = 0; i < neighbours_size; ++i) + for (size_t i = 0; i < neighbors_size; ++i) { - unsigned int tri = neighbours[i]; + unsigned int tri = neighbors[i]; if (tri == best_triangle) { - neighbours[i] = neighbours[neighbours_size - 1]; + neighbors[i] = neighbors[neighbors_size - 1]; adjacency.counts[index]--; break; } @@ -687,7 +715,7 @@ meshopt_Bounds meshopt_computeClusterBounds(const unsigned int* indices, size_t assert(index_count % 3 == 0); assert(index_count / 3 <= kMeshletMaxTriangles); - assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256); + assert(vertex_positions_stride >= 12 && vertex_positions_stride <= 256); assert(vertex_positions_stride % sizeof(float) == 0); (void)vertex_count; @@ -839,7 +867,7 @@ meshopt_Bounds meshopt_computeMeshletBounds(const unsigned int* meshlet_vertices using namespace meshopt; assert(triangle_count <= kMeshletMaxTriangles); - assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256); + assert(vertex_positions_stride >= 12 && vertex_positions_stride <= 256); assert(vertex_positions_stride % sizeof(float) == 0); unsigned int indices[kMeshletMaxTriangles * 3]; diff --git a/thirdparty/meshoptimizer/indexgenerator.cpp b/thirdparty/meshoptimizer/indexgenerator.cpp index f60db0dc4f8..cad808a2b18 100644 --- a/thirdparty/meshoptimizer/indexgenerator.cpp +++ b/thirdparty/meshoptimizer/indexgenerator.cpp @@ -187,7 +187,7 @@ size_t meshopt_generateVertexRemap(unsigned int* destination, const unsigned int using namespace meshopt; assert(indices || index_count == vertex_count); - assert(index_count % 3 == 0); + assert(!indices || index_count % 3 == 0); assert(vertex_size > 0 && vertex_size <= 256); meshopt_Allocator allocator; @@ -412,7 +412,7 @@ void meshopt_generateAdjacencyIndexBuffer(unsigned int* destination, const unsig using namespace meshopt; assert(index_count % 3 == 0); - assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256); + assert(vertex_positions_stride >= 12 && vertex_positions_stride <= 256); assert(vertex_positions_stride % sizeof(float) == 0); meshopt_Allocator allocator; @@ -483,7 +483,7 @@ void meshopt_generateTessellationIndexBuffer(unsigned int* destination, const un using namespace meshopt; assert(index_count % 3 == 0); - assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256); + assert(vertex_positions_stride >= 12 && vertex_positions_stride <= 256); assert(vertex_positions_stride % sizeof(float) == 0); meshopt_Allocator allocator; diff --git a/thirdparty/meshoptimizer/meshoptimizer.h b/thirdparty/meshoptimizer/meshoptimizer.h index 463fad29daa..46d28d3ea3b 100644 --- a/thirdparty/meshoptimizer/meshoptimizer.h +++ b/thirdparty/meshoptimizer/meshoptimizer.h @@ -1,7 +1,7 @@ /** - * meshoptimizer - version 0.17 + * meshoptimizer - version 0.18 * - * Copyright (C) 2016-2021, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com) + * Copyright (C) 2016-2022, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com) * Report bugs and download new versions at https://github.com/zeux/meshoptimizer * * This library is distributed under the MIT License. See notice at the end of this file. @@ -12,7 +12,7 @@ #include /* Version macro; major * 1000 + minor * 10 + patch */ -#define MESHOPTIMIZER_VERSION 170 /* 0.17 */ +#define MESHOPTIMIZER_VERSION 180 /* 0.18 */ /* If no API is defined, assume default */ #ifndef MESHOPTIMIZER_API @@ -37,8 +37,8 @@ extern "C" { #endif /** - * Vertex attribute stream, similar to glVertexPointer - * Each element takes size bytes, with stride controlling the spacing between successive elements. + * Vertex attribute stream + * Each element takes size bytes, beginning at data, with stride controlling the spacing between successive elements (stride >= size). */ struct meshopt_Stream { @@ -115,7 +115,7 @@ MESHOPTIMIZER_API void meshopt_generateShadowIndexBufferMulti(unsigned int* dest * This can be used to implement algorithms like silhouette detection/expansion and other forms of GS-driven rendering. * * destination must contain enough space for the resulting index buffer (index_count*2 elements) - * vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer + * vertex_positions should have float3 position in the first 12 bytes of each vertex */ MESHOPTIMIZER_API void meshopt_generateAdjacencyIndexBuffer(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride); @@ -131,7 +131,7 @@ MESHOPTIMIZER_API void meshopt_generateAdjacencyIndexBuffer(unsigned int* destin * See "Tessellation on Any Budget" (John McDonald, GDC 2011) for implementation details. * * destination must contain enough space for the resulting index buffer (index_count*4 elements) - * vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer + * vertex_positions should have float3 position in the first 12 bytes of each vertex */ MESHOPTIMIZER_API void meshopt_generateTessellationIndexBuffer(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride); @@ -171,7 +171,7 @@ MESHOPTIMIZER_API void meshopt_optimizeVertexCacheFifo(unsigned int* destination * * destination must contain enough space for the resulting index buffer (index_count elements) * indices must contain index data that is the result of meshopt_optimizeVertexCache (*not* the original mesh indices!) - * vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer + * vertex_positions should have float3 position in the first 12 bytes of each vertex * threshold indicates how much the overdraw optimizer can degrade vertex cache efficiency (1.05 = up to 5%) to reduce overdraw more efficiently */ MESHOPTIMIZER_API void meshopt_optimizeOverdraw(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, float threshold); @@ -313,7 +313,21 @@ MESHOPTIMIZER_EXPERIMENTAL void meshopt_encodeFilterQuat(void* destination, size MESHOPTIMIZER_EXPERIMENTAL void meshopt_encodeFilterExp(void* destination, size_t count, size_t stride, int bits, const float* data); /** - * Experimental: Mesh simplifier + * Simplification options + */ +enum +{ + /* Do not move vertices that are located on the topological border (vertices on triangle edges that don't have a paired triangle). Useful for simplifying portions of the larger mesh. */ + meshopt_SimplifyLockBorder = 1 << 0, +}; + +/** + * Experimental: Mesh simplifier with attribute metric; attributes follow xyz position data atm (vertex data must contain 3 + attribute_count floats per vertex) + */ +MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_simplifyWithAttributes(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_data, size_t vertex_count, size_t vertex_stride, size_t target_index_count, float target_error, unsigned int options, float* result_error, const float* attributes, const float* attribute_weights, size_t attribute_count); + +/** + * Mesh simplifier * Reduces the number of triangles in the mesh, attempting to preserve mesh appearance as much as possible * The algorithm tries to preserve mesh topology and can stop short of the target goal based on topology constraints or target error. * If not all attributes from the input mesh are required, it's recommended to reindex the mesh using meshopt_generateShadowIndexBuffer prior to simplification. @@ -322,16 +336,12 @@ MESHOPTIMIZER_EXPERIMENTAL void meshopt_encodeFilterExp(void* destination, size_ * If the original vertex data isn't required, creating a compact vertex buffer using meshopt_optimizeVertexFetch is recommended. * * destination must contain enough space for the target index buffer, worst case is index_count elements (*not* target_index_count)! - * vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer - * target_error represents the error relative to mesh extents that can be tolerated, e.g. 0.01 = 1% deformation + * vertex_positions should have float3 position in the first 12 bytes of each vertex + * target_error represents the error relative to mesh extents that can be tolerated, e.g. 0.01 = 1% deformation; value range [0..1] + * options must be a bitmask composed of meshopt_SimplifyX options; 0 is a safe default * result_error can be NULL; when it's not NULL, it will contain the resulting (relative) error after simplification */ -MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, float* result_error); - -/** - * Experimental: Mesh simplifier with attribute metric; attributes follow xyz position data atm (vertex data must contain 3 + attribute_count floats per vertex) - */ -MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_simplifyWithAttributes(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_data, size_t vertex_count, size_t vertex_stride, size_t target_index_count, float target_error, float* result_error, const float* attributes, const float* attribute_weights, size_t attribute_count); +MESHOPTIMIZER_API size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, unsigned int options, float* result_error); /** * Experimental: Mesh simplifier (sloppy) @@ -342,8 +352,8 @@ MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_simplifyWithAttributes(unsigned int* d * If the original vertex data isn't required, creating a compact vertex buffer using meshopt_optimizeVertexFetch is recommended. * * destination must contain enough space for the target index buffer, worst case is index_count elements (*not* target_index_count)! - * vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer - * target_error represents the error relative to mesh extents that can be tolerated, e.g. 0.01 = 1% deformation + * vertex_positions should have float3 position in the first 12 bytes of each vertex + * target_error represents the error relative to mesh extents that can be tolerated, e.g. 0.01 = 1% deformation; value range [0..1] * result_error can be NULL; when it's not NULL, it will contain the resulting (relative) error after simplification */ MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_simplifySloppy(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, float* result_error); @@ -356,17 +366,17 @@ MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_simplifySloppy(unsigned int* destinati * If the original vertex data isn't required, creating a compact vertex buffer using meshopt_optimizeVertexFetch is recommended. * * destination must contain enough space for the target index buffer (target_vertex_count elements) - * vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer + * vertex_positions should have float3 position in the first 12 bytes of each vertex */ MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_simplifyPoints(unsigned int* destination, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_vertex_count); /** - * Experimental: Returns the error scaling factor used by the simplifier to convert between absolute and relative extents + * Returns the error scaling factor used by the simplifier to convert between absolute and relative extents * * Absolute error must be *divided* by the scaling factor before passing it to meshopt_simplify as target_error * Relative error returned by meshopt_simplify via result_error must be *multiplied* by the scaling factor to get absolute error. */ -MESHOPTIMIZER_EXPERIMENTAL float meshopt_simplifyScale(const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride); +MESHOPTIMIZER_API float meshopt_simplifyScale(const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride); /** * Mesh stripifier @@ -418,7 +428,7 @@ struct meshopt_OverdrawStatistics * Returns overdraw statistics using a software rasterizer * Results may not match actual GPU performance * - * vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer + * vertex_positions should have float3 position in the first 12 bytes of each vertex */ MESHOPTIMIZER_API struct meshopt_OverdrawStatistics meshopt_analyzeOverdraw(const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride); @@ -456,7 +466,7 @@ struct meshopt_Meshlet * meshlets must contain enough space for all meshlets, worst case size can be computed with meshopt_buildMeshletsBound * meshlet_vertices must contain enough space for all meshlets, worst case size is equal to max_meshlets * max_vertices * meshlet_triangles must contain enough space for all meshlets, worst case size is equal to max_meshlets * max_triangles * 3 - * vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer + * vertex_positions should have float3 position in the first 12 bytes of each vertex * max_vertices and max_triangles must not exceed implementation limits (max_vertices <= 255 - not 256!, max_triangles <= 512) * cone_weight should be set to 0 when cone culling is not used, and a value between 0 and 1 otherwise to balance between cluster size and cone culling efficiency */ @@ -498,7 +508,7 @@ struct meshopt_Bounds * The formula that uses the apex is slightly more accurate but needs the apex; if you are already using bounding sphere * to do frustum/occlusion culling, the formula that doesn't use the apex may be preferable. * - * vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer + * vertex_positions should have float3 position in the first 12 bytes of each vertex * index_count/3 should be less than or equal to 512 (the function assumes clusters of limited size) */ MESHOPTIMIZER_API struct meshopt_Bounds meshopt_computeClusterBounds(const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride); @@ -518,7 +528,7 @@ MESHOPTIMIZER_EXPERIMENTAL void meshopt_spatialSortRemap(unsigned int* destinati * Reorders triangles for spatial locality, and generates a new index buffer. The resulting index buffer can be used with other functions like optimizeVertexCache. * * destination must contain enough space for the resulting index buffer (index_count elements) - * vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer + * vertex_positions should have float3 position in the first 12 bytes of each vertex */ MESHOPTIMIZER_EXPERIMENTAL void meshopt_spatialSortTriangles(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride); @@ -610,7 +620,7 @@ inline size_t meshopt_encodeIndexSequence(unsigned char* buffer, size_t buffer_s template inline int meshopt_decodeIndexSequence(T* destination, size_t index_count, const unsigned char* buffer, size_t buffer_size); template -inline size_t meshopt_simplify(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, float* result_error = 0); +inline size_t meshopt_simplify(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, unsigned int options = 0, float* result_error = 0); template inline size_t meshopt_simplifySloppy(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, float* result_error = 0); template @@ -945,12 +955,12 @@ inline int meshopt_decodeIndexSequence(T* destination, size_t index_count, const } template -inline size_t meshopt_simplify(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, float* result_error) +inline size_t meshopt_simplify(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, unsigned int options, float* result_error) { meshopt_IndexAdapter in(0, indices, index_count); meshopt_IndexAdapter out(destination, 0, index_count); - return meshopt_simplify(out.data, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride, target_index_count, target_error, result_error); + return meshopt_simplify(out.data, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride, target_index_count, target_error, options, result_error); } template @@ -1039,7 +1049,7 @@ inline void meshopt_spatialSortTriangles(T* destination, const T* indices, size_ #endif /** - * Copyright (c) 2016-2021 Arseny Kapoulkine + * Copyright (c) 2016-2022 Arseny Kapoulkine * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation diff --git a/thirdparty/meshoptimizer/overdrawanalyzer.cpp b/thirdparty/meshoptimizer/overdrawanalyzer.cpp index 8d5859ba396..8b6f254134b 100644 --- a/thirdparty/meshoptimizer/overdrawanalyzer.cpp +++ b/thirdparty/meshoptimizer/overdrawanalyzer.cpp @@ -147,7 +147,7 @@ meshopt_OverdrawStatistics meshopt_analyzeOverdraw(const unsigned int* indices, using namespace meshopt; assert(index_count % 3 == 0); - assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256); + assert(vertex_positions_stride >= 12 && vertex_positions_stride <= 256); assert(vertex_positions_stride % sizeof(float) == 0); meshopt_Allocator allocator; diff --git a/thirdparty/meshoptimizer/overdrawoptimizer.cpp b/thirdparty/meshoptimizer/overdrawoptimizer.cpp index 143656ed762..cc22dbcffc1 100644 --- a/thirdparty/meshoptimizer/overdrawoptimizer.cpp +++ b/thirdparty/meshoptimizer/overdrawoptimizer.cpp @@ -272,7 +272,7 @@ void meshopt_optimizeOverdraw(unsigned int* destination, const unsigned int* ind using namespace meshopt; assert(index_count % 3 == 0); - assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256); + assert(vertex_positions_stride >= 12 && vertex_positions_stride <= 256); assert(vertex_positions_stride % sizeof(float) == 0); meshopt_Allocator allocator; diff --git a/thirdparty/meshoptimizer/patches/attribute-aware-simplify-distance-only-metric.patch b/thirdparty/meshoptimizer/patches/attribute-aware-simplify-distance-only-metric.patch index 21daac6eec8..5cac985dc51 100644 --- a/thirdparty/meshoptimizer/patches/attribute-aware-simplify-distance-only-metric.patch +++ b/thirdparty/meshoptimizer/patches/attribute-aware-simplify-distance-only-metric.patch @@ -1,5 +1,5 @@ diff --git a/thirdparty/meshoptimizer/simplifier.cpp b/thirdparty/meshoptimizer/simplifier.cpp -index 5e92e2dc73..e40c141e76 100644 +index d8d4a67391..3847afc736 100644 --- a/thirdparty/meshoptimizer/simplifier.cpp +++ b/thirdparty/meshoptimizer/simplifier.cpp @@ -20,7 +20,7 @@ @@ -11,7 +11,7 @@ index 5e92e2dc73..e40c141e76 100644 // This work is based on: // Michael Garland and Paul S. Heckbert. Surface simplification using quadric error metrics. 1997 -@@ -453,6 +453,7 @@ struct Collapse +@@ -458,6 +458,7 @@ struct Collapse float error; unsigned int errorui; }; @@ -19,7 +19,7 @@ index 5e92e2dc73..e40c141e76 100644 }; static float normalize(Vector3& v) -@@ -533,6 +534,34 @@ static float quadricError(const Quadric& Q, const Vector3& v) +@@ -538,6 +539,34 @@ static float quadricError(const Quadric& Q, const Vector3& v) return fabsf(r) * s; } @@ -54,7 +54,7 @@ index 5e92e2dc73..e40c141e76 100644 static void quadricFromPlane(Quadric& Q, float a, float b, float c, float d, float w) { float aw = a * w; -@@ -688,7 +717,7 @@ static void quadricUpdateAttributes(Quadric& Q, const Vector3& p0, const Vector3 +@@ -693,7 +722,7 @@ static void quadricUpdateAttributes(Quadric& Q, const Vector3& p0, const Vector3 } #endif @@ -63,7 +63,7 @@ index 5e92e2dc73..e40c141e76 100644 { for (size_t i = 0; i < index_count; i += 3) { -@@ -698,6 +727,9 @@ static void fillFaceQuadrics(Quadric* vertex_quadrics, const unsigned int* indic +@@ -703,6 +732,9 @@ static void fillFaceQuadrics(Quadric* vertex_quadrics, const unsigned int* indic Quadric Q; quadricFromTriangle(Q, vertex_positions[i0], vertex_positions[i1], vertex_positions[i2], 1.f); @@ -73,7 +73,7 @@ index 5e92e2dc73..e40c141e76 100644 #if ATTRIBUTES quadricUpdateAttributes(Q, vertex_positions[i0], vertex_positions[i1], vertex_positions[i2], Q.w); -@@ -708,7 +740,7 @@ static void fillFaceQuadrics(Quadric* vertex_quadrics, const unsigned int* indic +@@ -713,7 +745,7 @@ static void fillFaceQuadrics(Quadric* vertex_quadrics, const unsigned int* indic } } @@ -82,7 +82,7 @@ index 5e92e2dc73..e40c141e76 100644 { for (size_t i = 0; i < index_count; i += 3) { -@@ -752,6 +784,9 @@ static void fillEdgeQuadrics(Quadric* vertex_quadrics, const unsigned int* indic +@@ -757,6 +789,9 @@ static void fillEdgeQuadrics(Quadric* vertex_quadrics, const unsigned int* indic quadricAdd(vertex_quadrics[remap[i0]], Q); quadricAdd(vertex_quadrics[remap[i1]], Q); @@ -92,7 +92,7 @@ index 5e92e2dc73..e40c141e76 100644 } } } -@@ -856,7 +891,7 @@ static size_t pickEdgeCollapses(Collapse* collapses, const unsigned int* indices +@@ -861,7 +896,7 @@ static size_t pickEdgeCollapses(Collapse* collapses, const unsigned int* indices return collapse_count; } @@ -101,7 +101,7 @@ index 5e92e2dc73..e40c141e76 100644 { for (size_t i = 0; i < collapse_count; ++i) { -@@ -876,10 +911,14 @@ static void rankEdgeCollapses(Collapse* collapses, size_t collapse_count, const +@@ -881,10 +916,14 @@ static void rankEdgeCollapses(Collapse* collapses, size_t collapse_count, const float ei = quadricError(qi, vertex_positions[i1]); float ej = quadricError(qj, vertex_positions[j1]); @@ -116,7 +116,7 @@ index 5e92e2dc73..e40c141e76 100644 } } -@@ -976,7 +1015,7 @@ static void sortEdgeCollapses(unsigned int* sort_order, const Collapse* collapse +@@ -981,7 +1020,7 @@ static void sortEdgeCollapses(unsigned int* sort_order, const Collapse* collapse } } @@ -125,7 +125,7 @@ index 5e92e2dc73..e40c141e76 100644 { size_t edge_collapses = 0; size_t triangle_collapses = 0; -@@ -1038,6 +1077,7 @@ static size_t performEdgeCollapses(unsigned int* collapse_remap, unsigned char* +@@ -1043,6 +1082,7 @@ static size_t performEdgeCollapses(unsigned int* collapse_remap, unsigned char* assert(collapse_remap[r1] == r1); quadricAdd(vertex_quadrics[r1], vertex_quadrics[r0]); @@ -133,7 +133,7 @@ index 5e92e2dc73..e40c141e76 100644 if (vertex_kind[i0] == Kind_Complex) { -@@ -1075,7 +1115,7 @@ static size_t performEdgeCollapses(unsigned int* collapse_remap, unsigned char* +@@ -1080,7 +1120,7 @@ static size_t performEdgeCollapses(unsigned int* collapse_remap, unsigned char* triangle_collapses += (vertex_kind[i0] == Kind_Border) ? 1 : 2; edge_collapses++; @@ -142,7 +142,7 @@ index 5e92e2dc73..e40c141e76 100644 } #if TRACE -@@ -1463,9 +1503,11 @@ size_t meshopt_simplifyWithAttributes(unsigned int* destination, const unsigned +@@ -1469,9 +1509,11 @@ size_t meshopt_simplifyWithAttributes(unsigned int* destination, const unsigned Quadric* vertex_quadrics = allocator.allocate(vertex_count); memset(vertex_quadrics, 0, vertex_count * sizeof(Quadric)); @@ -156,7 +156,7 @@ index 5e92e2dc73..e40c141e76 100644 if (result != indices) memcpy(result, indices, index_count * sizeof(unsigned int)); -@@ -1496,7 +1538,7 @@ size_t meshopt_simplifyWithAttributes(unsigned int* destination, const unsigned +@@ -1502,7 +1544,7 @@ size_t meshopt_simplifyWithAttributes(unsigned int* destination, const unsigned if (edge_collapse_count == 0) break; @@ -165,7 +165,7 @@ index 5e92e2dc73..e40c141e76 100644 #if TRACE > 1 dumpEdgeCollapses(edge_collapses, edge_collapse_count, vertex_kind); -@@ -1515,7 +1557,7 @@ size_t meshopt_simplifyWithAttributes(unsigned int* destination, const unsigned +@@ -1521,7 +1563,7 @@ size_t meshopt_simplifyWithAttributes(unsigned int* destination, const unsigned printf("pass %d: ", int(pass_count++)); #endif diff --git a/thirdparty/meshoptimizer/patches/attribute-aware-simplify.patch b/thirdparty/meshoptimizer/patches/attribute-aware-simplify.patch index 33a17fe9fa7..c065026a7d0 100644 --- a/thirdparty/meshoptimizer/patches/attribute-aware-simplify.patch +++ b/thirdparty/meshoptimizer/patches/attribute-aware-simplify.patch @@ -1,21 +1,21 @@ diff --git a/thirdparty/meshoptimizer/meshoptimizer.h b/thirdparty/meshoptimizer/meshoptimizer.h -index be4b765d97..463fad29da 100644 +index d95725dd71..46d28d3ea3 100644 --- a/thirdparty/meshoptimizer/meshoptimizer.h +++ b/thirdparty/meshoptimizer/meshoptimizer.h -@@ -328,6 +328,11 @@ MESHOPTIMIZER_EXPERIMENTAL void meshopt_encodeFilterExp(void* destination, size_ - */ - MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, float* result_error); +@@ -321,6 +321,11 @@ enum + meshopt_SimplifyLockBorder = 1 << 0, + }; +/** + * Experimental: Mesh simplifier with attribute metric; attributes follow xyz position data atm (vertex data must contain 3 + attribute_count floats per vertex) + */ -+MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_simplifyWithAttributes(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_data, size_t vertex_count, size_t vertex_stride, size_t target_index_count, float target_error, float* result_error, const float* attributes, const float* attribute_weights, size_t attribute_count); ++MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_simplifyWithAttributes(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_data, size_t vertex_count, size_t vertex_stride, size_t target_index_count, float target_error, unsigned int options, float* result_error, const float* attributes, const float* attribute_weights, size_t attribute_count); + /** - * Experimental: Mesh simplifier (sloppy) - * Reduces the number of triangles in the mesh, sacrificing mesh appearance for simplification performance + * Mesh simplifier + * Reduces the number of triangles in the mesh, attempting to preserve mesh appearance as much as possible diff --git a/thirdparty/meshoptimizer/simplifier.cpp b/thirdparty/meshoptimizer/simplifier.cpp -index a74b08a97d..5e92e2dc73 100644 +index 5f0e9bac31..797329b010 100644 --- a/thirdparty/meshoptimizer/simplifier.cpp +++ b/thirdparty/meshoptimizer/simplifier.cpp @@ -20,6 +20,8 @@ @@ -27,7 +27,7 @@ index a74b08a97d..5e92e2dc73 100644 // This work is based on: // Michael Garland and Paul S. Heckbert. Surface simplification using quadric error metrics. 1997 // Michael Garland. Quadric-based polygonal surface simplification. 1999 -@@ -371,6 +373,10 @@ static void classifyVertices(unsigned char* result, unsigned int* loop, unsigned +@@ -376,6 +378,10 @@ static void classifyVertices(unsigned char* result, unsigned int* loop, unsigned struct Vector3 { float x, y, z; @@ -38,7 +38,7 @@ index a74b08a97d..5e92e2dc73 100644 }; static float rescalePositions(Vector3* result, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride) -@@ -427,6 +433,13 @@ struct Quadric +@@ -432,6 +438,13 @@ struct Quadric float a10, a20, a21; float b0, b1, b2, c; float w; @@ -52,7 +52,7 @@ index a74b08a97d..5e92e2dc73 100644 }; struct Collapse -@@ -469,6 +482,16 @@ static void quadricAdd(Quadric& Q, const Quadric& R) +@@ -474,6 +487,16 @@ static void quadricAdd(Quadric& Q, const Quadric& R) Q.b2 += R.b2; Q.c += R.c; Q.w += R.w; @@ -69,7 +69,7 @@ index a74b08a97d..5e92e2dc73 100644 } static float quadricError(const Quadric& Q, const Vector3& v) -@@ -494,6 +517,17 @@ static float quadricError(const Quadric& Q, const Vector3& v) +@@ -499,6 +522,17 @@ static float quadricError(const Quadric& Q, const Vector3& v) r += ry * v.y; r += rz * v.z; @@ -87,7 +87,7 @@ index a74b08a97d..5e92e2dc73 100644 float s = Q.w == 0.f ? 0.f : 1.f / Q.w; return fabsf(r) * s; -@@ -517,6 +551,13 @@ static void quadricFromPlane(Quadric& Q, float a, float b, float c, float d, flo +@@ -522,6 +556,13 @@ static void quadricFromPlane(Quadric& Q, float a, float b, float c, float d, flo Q.b2 = c * dw; Q.c = d * dw; Q.w = w; @@ -101,7 +101,7 @@ index a74b08a97d..5e92e2dc73 100644 } static void quadricFromPoint(Quadric& Q, float x, float y, float z, float w) -@@ -569,6 +610,84 @@ static void quadricFromTriangleEdge(Quadric& Q, const Vector3& p0, const Vector3 +@@ -574,6 +615,84 @@ static void quadricFromTriangleEdge(Quadric& Q, const Vector3& p0, const Vector3 quadricFromPlane(Q, normal.x, normal.y, normal.z, -distance, length * weight); } @@ -186,7 +186,7 @@ index a74b08a97d..5e92e2dc73 100644 static void fillFaceQuadrics(Quadric* vertex_quadrics, const unsigned int* indices, size_t index_count, const Vector3* vertex_positions, const unsigned int* remap) { for (size_t i = 0; i < index_count; i += 3) -@@ -580,6 +699,9 @@ static void fillFaceQuadrics(Quadric* vertex_quadrics, const unsigned int* indic +@@ -585,6 +704,9 @@ static void fillFaceQuadrics(Quadric* vertex_quadrics, const unsigned int* indic Quadric Q; quadricFromTriangle(Q, vertex_positions[i0], vertex_positions[i1], vertex_positions[i2], 1.f); @@ -196,29 +196,30 @@ index a74b08a97d..5e92e2dc73 100644 quadricAdd(vertex_quadrics[remap[i0]], Q); quadricAdd(vertex_quadrics[remap[i1]], Q); quadricAdd(vertex_quadrics[remap[i2]], Q); -@@ -1273,13 +1395,19 @@ MESHOPTIMIZER_API unsigned int* meshopt_simplifyDebugLoopBack = 0; +@@ -1278,14 +1400,20 @@ MESHOPTIMIZER_API unsigned int* meshopt_simplifyDebugLoopBack = 0; #endif - size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, float* out_result_error) + size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, unsigned int options, float* out_result_error) +{ -+ return meshopt_simplifyWithAttributes(destination, indices, index_count, vertex_positions_data, vertex_count, vertex_positions_stride, target_index_count, target_error, out_result_error, 0, 0, 0); ++ return meshopt_simplifyWithAttributes(destination, indices, index_count, vertex_positions_data, vertex_count, vertex_positions_stride, target_index_count, target_error, options, out_result_error, 0, 0, 0); +} + -+size_t meshopt_simplifyWithAttributes(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_data, size_t vertex_count, size_t vertex_stride, size_t target_index_count, float target_error, float* out_result_error, const float* attributes, const float* attribute_weights, size_t attribute_count) ++size_t meshopt_simplifyWithAttributes(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_data, size_t vertex_count, size_t vertex_stride, size_t target_index_count, float target_error, unsigned int options, float* out_result_error, const float* attributes, const float* attribute_weights, size_t attribute_count) { using namespace meshopt; assert(index_count % 3 == 0); -- assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256); +- assert(vertex_positions_stride >= 12 && vertex_positions_stride <= 256); - assert(vertex_positions_stride % sizeof(float) == 0); -+ assert(vertex_stride > 0 && vertex_stride <= 256); ++ assert(vertex_stride >= 12 && vertex_stride <= 256); + assert(vertex_stride % sizeof(float) == 0); assert(target_index_count <= index_count); + assert((options & ~(meshopt_SimplifyLockBorder)) == 0); + assert(attribute_count <= ATTRIBUTES); meshopt_Allocator allocator; -@@ -1293,7 +1421,7 @@ size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, +@@ -1299,7 +1427,7 @@ size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, // build position remap that maps each vertex to the one with identical position unsigned int* remap = allocator.allocate(vertex_count); unsigned int* wedge = allocator.allocate(vertex_count); @@ -227,7 +228,7 @@ index a74b08a97d..5e92e2dc73 100644 // classify vertices; vertex kind determines collapse rules, see kCanCollapse unsigned char* vertex_kind = allocator.allocate(vertex_count); -@@ -1317,7 +1445,21 @@ size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, +@@ -1323,7 +1451,21 @@ size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, #endif Vector3* vertex_positions = allocator.allocate(vertex_count); @@ -250,7 +251,7 @@ index a74b08a97d..5e92e2dc73 100644 Quadric* vertex_quadrics = allocator.allocate(vertex_count); memset(vertex_quadrics, 0, vertex_count * sizeof(Quadric)); -@@ -1409,7 +1551,9 @@ size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, +@@ -1415,7 +1557,9 @@ size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, // result_error is quadratic; we need to remap it back to linear if (out_result_error) diff --git a/thirdparty/meshoptimizer/simplifier.cpp b/thirdparty/meshoptimizer/simplifier.cpp index e40c141e762..391a77861ab 100644 --- a/thirdparty/meshoptimizer/simplifier.cpp +++ b/thirdparty/meshoptimizer/simplifier.cpp @@ -254,7 +254,7 @@ static bool hasEdge(const EdgeAdjacency& adjacency, unsigned int a, unsigned int return false; } -static void classifyVertices(unsigned char* result, unsigned int* loop, unsigned int* loopback, size_t vertex_count, const EdgeAdjacency& adjacency, const unsigned int* remap, const unsigned int* wedge) +static void classifyVertices(unsigned char* result, unsigned int* loop, unsigned int* loopback, size_t vertex_count, const EdgeAdjacency& adjacency, const unsigned int* remap, const unsigned int* wedge, unsigned int options) { memset(loop, -1, vertex_count * sizeof(unsigned int)); memset(loopback, -1, vertex_count * sizeof(unsigned int)); @@ -364,6 +364,11 @@ static void classifyVertices(unsigned char* result, unsigned int* loop, unsigned } } + if (options & meshopt_SimplifyLockBorder) + for (size_t i = 0; i < vertex_count; ++i) + if (result[i] == Kind_Border) + result[i] = Kind_Locked; + #if TRACE printf("locked: many open edges %d, disconnected seam %d, many seam edges %d, many wedges %d\n", int(stats[0]), int(stats[1]), int(stats[2]), int(stats[3])); @@ -1434,19 +1439,20 @@ MESHOPTIMIZER_API unsigned int* meshopt_simplifyDebugLoop = 0; MESHOPTIMIZER_API unsigned int* meshopt_simplifyDebugLoopBack = 0; #endif -size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, float* out_result_error) +size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, unsigned int options, float* out_result_error) { - return meshopt_simplifyWithAttributes(destination, indices, index_count, vertex_positions_data, vertex_count, vertex_positions_stride, target_index_count, target_error, out_result_error, 0, 0, 0); + return meshopt_simplifyWithAttributes(destination, indices, index_count, vertex_positions_data, vertex_count, vertex_positions_stride, target_index_count, target_error, options, out_result_error, 0, 0, 0); } -size_t meshopt_simplifyWithAttributes(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_data, size_t vertex_count, size_t vertex_stride, size_t target_index_count, float target_error, float* out_result_error, const float* attributes, const float* attribute_weights, size_t attribute_count) +size_t meshopt_simplifyWithAttributes(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_data, size_t vertex_count, size_t vertex_stride, size_t target_index_count, float target_error, unsigned int options, float* out_result_error, const float* attributes, const float* attribute_weights, size_t attribute_count) { using namespace meshopt; assert(index_count % 3 == 0); - assert(vertex_stride > 0 && vertex_stride <= 256); + assert(vertex_stride >= 12 && vertex_stride <= 256); assert(vertex_stride % sizeof(float) == 0); assert(target_index_count <= index_count); + assert((options & ~(meshopt_SimplifyLockBorder)) == 0); assert(attribute_count <= ATTRIBUTES); meshopt_Allocator allocator; @@ -1467,7 +1473,7 @@ size_t meshopt_simplifyWithAttributes(unsigned int* destination, const unsigned unsigned char* vertex_kind = allocator.allocate(vertex_count); unsigned int* loop = allocator.allocate(vertex_count); unsigned int* loopback = allocator.allocate(vertex_count); - classifyVertices(vertex_kind, loop, loopback, vertex_count, adjacency, remap, wedge); + classifyVertices(vertex_kind, loop, loopback, vertex_count, adjacency, remap, wedge, options); #if TRACE size_t unique_positions = 0; @@ -1605,7 +1611,7 @@ size_t meshopt_simplifySloppy(unsigned int* destination, const unsigned int* ind using namespace meshopt; assert(index_count % 3 == 0); - assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256); + assert(vertex_positions_stride >= 12 && vertex_positions_stride <= 256); assert(vertex_positions_stride % sizeof(float) == 0); assert(target_index_count <= index_count); @@ -1736,7 +1742,7 @@ size_t meshopt_simplifyPoints(unsigned int* destination, const float* vertex_pos { using namespace meshopt; - assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256); + assert(vertex_positions_stride >= 12 && vertex_positions_stride <= 256); assert(vertex_positions_stride % sizeof(float) == 0); assert(target_vertex_count <= vertex_count); @@ -1848,7 +1854,7 @@ float meshopt_simplifyScale(const float* vertex_positions, size_t vertex_count, { using namespace meshopt; - assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256); + assert(vertex_positions_stride >= 12 && vertex_positions_stride <= 256); assert(vertex_positions_stride % sizeof(float) == 0); float extent = rescalePositions(NULL, vertex_positions, vertex_count, vertex_positions_stride); diff --git a/thirdparty/meshoptimizer/spatialorder.cpp b/thirdparty/meshoptimizer/spatialorder.cpp index b09f80ac6f3..7b1a0694501 100644 --- a/thirdparty/meshoptimizer/spatialorder.cpp +++ b/thirdparty/meshoptimizer/spatialorder.cpp @@ -113,7 +113,7 @@ void meshopt_spatialSortRemap(unsigned int* destination, const float* vertex_pos { using namespace meshopt; - assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256); + assert(vertex_positions_stride >= 12 && vertex_positions_stride <= 256); assert(vertex_positions_stride % sizeof(float) == 0); meshopt_Allocator allocator; @@ -144,7 +144,7 @@ void meshopt_spatialSortTriangles(unsigned int* destination, const unsigned int* using namespace meshopt; assert(index_count % 3 == 0); - assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256); + assert(vertex_positions_stride >= 12 && vertex_positions_stride <= 256); assert(vertex_positions_stride % sizeof(float) == 0); (void)vertex_count; diff --git a/thirdparty/meshoptimizer/vcacheoptimizer.cpp b/thirdparty/meshoptimizer/vcacheoptimizer.cpp index fb8ade4b771..ce8fd3a887b 100644 --- a/thirdparty/meshoptimizer/vcacheoptimizer.cpp +++ b/thirdparty/meshoptimizer/vcacheoptimizer.cpp @@ -110,7 +110,7 @@ static unsigned int getNextVertexDeadEnd(const unsigned int* dead_end, unsigned return ~0u; } -static unsigned int getNextVertexNeighbour(const unsigned int* next_candidates_begin, const unsigned int* next_candidates_end, const unsigned int* live_triangles, const unsigned int* cache_timestamps, unsigned int timestamp, unsigned int cache_size) +static unsigned int getNextVertexNeighbor(const unsigned int* next_candidates_begin, const unsigned int* next_candidates_end, const unsigned int* live_triangles, const unsigned int* cache_timestamps, unsigned int timestamp, unsigned int cache_size) { unsigned int best_candidate = ~0u; int best_priority = -1; @@ -281,16 +281,16 @@ void meshopt_optimizeVertexCacheTable(unsigned int* destination, const unsigned { unsigned int index = indices[current_triangle * 3 + k]; - unsigned int* neighbours = &adjacency.data[0] + adjacency.offsets[index]; - size_t neighbours_size = adjacency.counts[index]; + unsigned int* neighbors = &adjacency.data[0] + adjacency.offsets[index]; + size_t neighbors_size = adjacency.counts[index]; - for (size_t i = 0; i < neighbours_size; ++i) + for (size_t i = 0; i < neighbors_size; ++i) { - unsigned int tri = neighbours[i]; + unsigned int tri = neighbors[i]; if (tri == current_triangle) { - neighbours[i] = neighbours[neighbours_size - 1]; + neighbors[i] = neighbors[neighbors_size - 1]; adjacency.counts[index]--; break; } @@ -314,10 +314,10 @@ void meshopt_optimizeVertexCacheTable(unsigned int* destination, const unsigned vertex_scores[index] = score; // update scores of vertex triangles - const unsigned int* neighbours_begin = &adjacency.data[0] + adjacency.offsets[index]; - const unsigned int* neighbours_end = neighbours_begin + adjacency.counts[index]; + const unsigned int* neighbors_begin = &adjacency.data[0] + adjacency.offsets[index]; + const unsigned int* neighbors_end = neighbors_begin + adjacency.counts[index]; - for (const unsigned int* it = neighbours_begin; it != neighbours_end; ++it) + for (const unsigned int* it = neighbors_begin; it != neighbors_end; ++it) { unsigned int tri = *it; assert(!emitted_flags[tri]); @@ -412,11 +412,11 @@ void meshopt_optimizeVertexCacheFifo(unsigned int* destination, const unsigned i { const unsigned int* next_candidates_begin = &dead_end[0] + dead_end_top; - // emit all vertex neighbours - const unsigned int* neighbours_begin = &adjacency.data[0] + adjacency.offsets[current_vertex]; - const unsigned int* neighbours_end = neighbours_begin + adjacency.counts[current_vertex]; + // emit all vertex neighbors + const unsigned int* neighbors_begin = &adjacency.data[0] + adjacency.offsets[current_vertex]; + const unsigned int* neighbors_end = neighbors_begin + adjacency.counts[current_vertex]; - for (const unsigned int* it = neighbours_begin; it != neighbours_end; ++it) + for (const unsigned int* it = neighbors_begin; it != neighbors_end; ++it) { unsigned int triangle = *it; @@ -461,7 +461,7 @@ void meshopt_optimizeVertexCacheFifo(unsigned int* destination, const unsigned i const unsigned int* next_candidates_end = &dead_end[0] + dead_end_top; // get next vertex - current_vertex = getNextVertexNeighbour(next_candidates_begin, next_candidates_end, &live_triangles[0], &cache_timestamps[0], timestamp, cache_size); + current_vertex = getNextVertexNeighbor(next_candidates_begin, next_candidates_end, &live_triangles[0], &cache_timestamps[0], timestamp, cache_size); if (current_vertex == ~0u) { diff --git a/thirdparty/meshoptimizer/vertexcodec.cpp b/thirdparty/meshoptimizer/vertexcodec.cpp index 7925ea862c4..4bd11121d2f 100644 --- a/thirdparty/meshoptimizer/vertexcodec.cpp +++ b/thirdparty/meshoptimizer/vertexcodec.cpp @@ -50,6 +50,12 @@ #define SIMD_TARGET #endif +// When targeting AArch64/x64, optimize for latency to allow decoding of individual 16-byte groups to overlap +// We don't do this for 32-bit systems because we need 64-bit math for this and this will hurt in-order CPUs +#if defined(__x86_64__) || defined(_M_X64) || defined(__aarch64__) || defined(_M_ARM64) +#define SIMD_LATENCYOPT +#endif + #endif // !MESHOPTIMIZER_NO_SIMD #ifdef SIMD_SSE @@ -472,6 +478,18 @@ static const unsigned char* decodeBytesGroupSimd(const unsigned char* data, unsi typedef int unaligned_int; #endif +#ifdef SIMD_LATENCYOPT + unsigned int data32; + memcpy(&data32, data, 4); + data32 &= data32 >> 1; + + // arrange bits such that low bits of nibbles of data64 contain all 2-bit elements of data32 + unsigned long long data64 = ((unsigned long long)data32 << 30) | (data32 & 0x3fffffff); + + // adds all 1-bit nibbles together; the sum fits in 4 bits because datacnt=16 would have used mode 3 + int datacnt = int(((data64 & 0x1111111111111111ull) * 0x1111111111111111ull) >> 60); +#endif + __m128i sel2 = _mm_cvtsi32_si128(*reinterpret_cast(data)); __m128i rest = _mm_loadu_si128(reinterpret_cast(data + 4)); @@ -490,11 +508,25 @@ static const unsigned char* decodeBytesGroupSimd(const unsigned char* data, unsi _mm_storeu_si128(reinterpret_cast<__m128i*>(buffer), result); +#ifdef SIMD_LATENCYOPT + return data + 4 + datacnt; +#else return data + 4 + kDecodeBytesGroupCount[mask0] + kDecodeBytesGroupCount[mask1]; +#endif } case 2: { +#ifdef SIMD_LATENCYOPT + unsigned long long data64; + memcpy(&data64, data, 8); + data64 &= data64 >> 1; + data64 &= data64 >> 2; + + // adds all 1-bit nibbles together; the sum fits in 4 bits because datacnt=16 would have used mode 3 + int datacnt = int(((data64 & 0x1111111111111111ull) * 0x1111111111111111ull) >> 60); +#endif + __m128i sel4 = _mm_loadl_epi64(reinterpret_cast(data)); __m128i rest = _mm_loadu_si128(reinterpret_cast(data + 8)); @@ -512,7 +544,11 @@ static const unsigned char* decodeBytesGroupSimd(const unsigned char* data, unsi _mm_storeu_si128(reinterpret_cast<__m128i*>(buffer), result); +#ifdef SIMD_LATENCYOPT + return data + 8 + datacnt; +#else return data + 8 + kDecodeBytesGroupCount[mask0] + kDecodeBytesGroupCount[mask1]; +#endif } case 3: @@ -604,24 +640,13 @@ static uint8x16_t shuffleBytes(unsigned char mask0, unsigned char mask1, uint8x8 static void neonMoveMask(uint8x16_t mask, unsigned char& mask0, unsigned char& mask1) { - static const unsigned char byte_mask_data[16] = {1, 2, 4, 8, 16, 32, 64, 128, 1, 2, 4, 8, 16, 32, 64, 128}; + // magic constant found using z3 SMT assuming mask has 8 groups of 0xff or 0x00 + const uint64_t magic = 0x000103070f1f3f80ull; - uint8x16_t byte_mask = vld1q_u8(byte_mask_data); - uint8x16_t masked = vandq_u8(mask, byte_mask); + uint64x2_t mask2 = vreinterpretq_u64_u8(mask); -#ifdef __aarch64__ - // aarch64 has horizontal sums; MSVC doesn't expose this via arm64_neon.h so this path is exclusive to clang/gcc - mask0 = vaddv_u8(vget_low_u8(masked)); - mask1 = vaddv_u8(vget_high_u8(masked)); -#else - // we need horizontal sums of each half of masked, which can be done in 3 steps (yielding sums of sizes 2, 4, 8) - uint8x8_t sum1 = vpadd_u8(vget_low_u8(masked), vget_high_u8(masked)); - uint8x8_t sum2 = vpadd_u8(sum1, sum1); - uint8x8_t sum3 = vpadd_u8(sum2, sum2); - - mask0 = vget_lane_u8(sum3, 0); - mask1 = vget_lane_u8(sum3, 1); -#endif + mask0 = uint8_t((vgetq_lane_u64(mask2, 0) * magic) >> 56); + mask1 = uint8_t((vgetq_lane_u64(mask2, 1) * magic) >> 56); } static const unsigned char* decodeBytesGroupSimd(const unsigned char* data, unsigned char* buffer, int bitslog2) @@ -639,6 +664,18 @@ static const unsigned char* decodeBytesGroupSimd(const unsigned char* data, unsi case 1: { +#ifdef SIMD_LATENCYOPT + unsigned int data32; + memcpy(&data32, data, 4); + data32 &= data32 >> 1; + + // arrange bits such that low bits of nibbles of data64 contain all 2-bit elements of data32 + unsigned long long data64 = ((unsigned long long)data32 << 30) | (data32 & 0x3fffffff); + + // adds all 1-bit nibbles together; the sum fits in 4 bits because datacnt=16 would have used mode 3 + int datacnt = int(((data64 & 0x1111111111111111ull) * 0x1111111111111111ull) >> 60); +#endif + uint8x8_t sel2 = vld1_u8(data); uint8x8_t sel22 = vzip_u8(vshr_n_u8(sel2, 4), sel2).val[0]; uint8x8x2_t sel2222 = vzip_u8(vshr_n_u8(sel22, 2), sel22); @@ -655,11 +692,25 @@ static const unsigned char* decodeBytesGroupSimd(const unsigned char* data, unsi vst1q_u8(buffer, result); +#ifdef SIMD_LATENCYOPT + return data + 4 + datacnt; +#else return data + 4 + kDecodeBytesGroupCount[mask0] + kDecodeBytesGroupCount[mask1]; +#endif } case 2: { +#ifdef SIMD_LATENCYOPT + unsigned long long data64; + memcpy(&data64, data, 8); + data64 &= data64 >> 1; + data64 &= data64 >> 2; + + // adds all 1-bit nibbles together; the sum fits in 4 bits because datacnt=16 would have used mode 3 + int datacnt = int(((data64 & 0x1111111111111111ull) * 0x1111111111111111ull) >> 60); +#endif + uint8x8_t sel4 = vld1_u8(data); uint8x8x2_t sel44 = vzip_u8(vshr_n_u8(sel4, 4), vand_u8(sel4, vdup_n_u8(15))); uint8x16_t sel = vcombine_u8(sel44.val[0], sel44.val[1]); @@ -675,7 +726,11 @@ static const unsigned char* decodeBytesGroupSimd(const unsigned char* data, unsi vst1q_u8(buffer, result); +#ifdef SIMD_LATENCYOPT + return data + 8 + datacnt; +#else return data + 8 + kDecodeBytesGroupCount[mask0] + kDecodeBytesGroupCount[mask1]; +#endif } case 3: @@ -715,7 +770,6 @@ static void wasmMoveMask(v128_t mask, unsigned char& mask0, unsigned char& mask1 // magic constant found using z3 SMT assuming mask has 8 groups of 0xff or 0x00 const uint64_t magic = 0x000103070f1f3f80ull; - // TODO: This can use v8x16_bitmask in the future mask0 = uint8_t((wasm_i64x2_extract_lane(mask, 0) * magic) >> 56); mask1 = uint8_t((wasm_i64x2_extract_lane(mask, 1) * magic) >> 56); } diff --git a/thirdparty/meshoptimizer/vertexfilter.cpp b/thirdparty/meshoptimizer/vertexfilter.cpp index 606a280aa9e..14a73b1dddc 100644 --- a/thirdparty/meshoptimizer/vertexfilter.cpp +++ b/thirdparty/meshoptimizer/vertexfilter.cpp @@ -931,7 +931,7 @@ void meshopt_encodeFilterExp(void* destination_, size_t count, size_t stride, in const float* v = &data[i * stride_float]; unsigned int* d = &destination[i * stride_float]; - // use maximum exponent to encode values; this guarantess that mantissa is [-1, 1] + // use maximum exponent to encode values; this guarantees that mantissa is [-1, 1] int exp = -100; for (size_t j = 0; j < stride_float; ++j)