diff --git a/core/io/marshalls.cpp b/core/io/marshalls.cpp index 9ba653e1a90..9f89f5d8c97 100644 --- a/core/io/marshalls.cpp +++ b/core/io/marshalls.cpp @@ -1813,3 +1813,24 @@ Error encode_variant(const Variant &p_variant, uint8_t *r_buffer, int &r_len, bo return OK; } + +Vector vector3_to_float32_array(const Vector3 *vecs, size_t count) { + // We always allocate a new array, and we don't memcpy. + // We also don't consider returning a pointer to the passed vectors when sizeof(real_t) == 4. + // One reason is that we could decide to put a 4th component in Vector3 for SIMD/mobile performance, + // which would cause trouble with these optimizations. + Vector floats; + if (count == 0) { + return floats; + } + floats.resize(count * 3); + float *floats_w = floats.ptrw(); + for (size_t i = 0; i < count; ++i) { + const Vector3 v = vecs[i]; + floats_w[0] = v.x; + floats_w[1] = v.y; + floats_w[2] = v.z; + floats_w += 3; + } + return floats; +} diff --git a/core/io/marshalls.h b/core/io/marshalls.h index fef3a1c2c1b..66e25710663 100644 --- a/core/io/marshalls.h +++ b/core/io/marshalls.h @@ -215,4 +215,6 @@ public: Error decode_variant(Variant &r_variant, const uint8_t *p_buffer, int p_len, int *r_len = nullptr, bool p_allow_objects = false, int p_depth = 0); Error encode_variant(const Variant &p_variant, uint8_t *r_buffer, int &r_len, bool p_full_objects = false, int p_depth = 0); +Vector vector3_to_float32_array(const Vector3 *vecs, size_t count); + #endif // MARSHALLS_H diff --git a/scene/3d/occluder_instance_3d.cpp b/scene/3d/occluder_instance_3d.cpp index 4e1ed5654a8..f150a602b3b 100644 --- a/scene/3d/occluder_instance_3d.cpp +++ b/scene/3d/occluder_instance_3d.cpp @@ -32,6 +32,7 @@ #include "core/config/project_settings.h" #include "core/core_string_names.h" +#include "core/io/marshalls.h" #include "core/math/geometry_2d.h" #include "core/math/triangulate.h" #include "scene/3d/importer_mesh_instance_3d.h" @@ -533,11 +534,20 @@ void OccluderInstance3D::_bake_surface(const Transform3D &p_transform, Array p_s } if (!Math::is_zero_approx(p_simplification_dist) && SurfaceTool::simplify_func) { - float error_scale = SurfaceTool::simplify_scale_func((float *)vertices.ptr(), vertices.size(), sizeof(Vector3)); + Vector vertices_f32 = vector3_to_float32_array(vertices.ptr(), vertices.size()); + + float error_scale = SurfaceTool::simplify_scale_func(vertices_f32.ptr(), vertices.size(), sizeof(float) * 3); float target_error = p_simplification_dist / error_scale; float error = -1.0f; int target_index_count = MIN(indices.size(), 36); - uint32_t index_count = SurfaceTool::simplify_func((unsigned int *)indices.ptrw(), (unsigned int *)indices.ptr(), indices.size(), (float *)vertices.ptr(), vertices.size(), sizeof(Vector3), target_index_count, target_error, &error); + + uint32_t index_count = SurfaceTool::simplify_func( + (unsigned int *)indices.ptrw(), + (unsigned int *)indices.ptr(), + indices.size(), + vertices_f32.ptr(), vertices.size(), sizeof(float) * 3, + target_index_count, target_error, &error); + indices.resize(index_count); } diff --git a/scene/resources/importer_mesh.cpp b/scene/resources/importer_mesh.cpp index d1278f9340f..b5e02b2f76e 100644 --- a/scene/resources/importer_mesh.cpp +++ b/scene/resources/importer_mesh.cpp @@ -30,6 +30,7 @@ #include "importer_mesh.h" +#include "core/io/marshalls.h" #include "core/math/random_pcg.h" #include "core/math/static_raycaster.h" #include "scene/resources/surface_tool.h" @@ -424,9 +425,8 @@ void ImporterMesh::generate_lods(float p_normal_merge_angle, float p_normal_spli normal_weights[j] = 2.0; // Give some weight to normal preservation, may be worth exposing as an import setting } - const float max_mesh_error = FLT_MAX; // We don't want to limit by error, just by index target - float scale = SurfaceTool::simplify_scale_func((const float *)merged_vertices_ptr, merged_vertex_count, sizeof(Vector3)); - float mesh_error = 0.0f; + Vector merged_vertices_f32 = vector3_to_float32_array(merged_vertices_ptr, merged_vertex_count); + float scale = SurfaceTool::simplify_scale_func(merged_vertices_f32.ptr(), merged_vertex_count, sizeof(float) * 3); unsigned int index_target = 12; // Start with the smallest target, 4 triangles unsigned int last_index_count = 0; @@ -446,11 +446,25 @@ void ImporterMesh::generate_lods(float p_normal_merge_angle, float p_normal_spli raycaster->commit(); } + const float max_mesh_error = FLT_MAX; // We don't want to limit by error, just by index target + float mesh_error = 0.0f; + while (index_target < index_count) { PackedInt32Array new_indices; new_indices.resize(index_count); - size_t new_index_count = SurfaceTool::simplify_with_attrib_func((unsigned int *)new_indices.ptrw(), (const uint32_t *)merged_indices_ptr, index_count, (const float *)merged_vertices_ptr, merged_vertex_count, sizeof(Vector3), index_target, max_mesh_error, &mesh_error, (float *)merged_normals.ptr(), normal_weights.ptr(), 3); + Vector merged_normals_f32 = vector3_to_float32_array(merged_normals.ptr(), merged_normals.size()); + + size_t new_index_count = SurfaceTool::simplify_with_attrib_func( + (unsigned int *)new_indices.ptrw(), + (const uint32_t *)merged_indices_ptr, index_count, + merged_vertices_f32.ptr(), merged_vertex_count, + sizeof(float) * 3, // Vertex stride + index_target, + max_mesh_error, + &mesh_error, + merged_normals_f32.ptr(), + normal_weights.ptr(), 3); if (new_index_count < last_index_count * 1.5f) { index_target = index_target * 1.5f; diff --git a/scene/resources/surface_tool.cpp b/scene/resources/surface_tool.cpp index 94967352c84..185cbdc6bf8 100644 --- a/scene/resources/surface_tool.cpp +++ b/scene/resources/surface_tool.cpp @@ -46,7 +46,7 @@ void SurfaceTool::strip_mesh_arrays(PackedVector3Array &r_vertices, PackedInt32A Vector remap; remap.resize(r_vertices.size()); - uint32_t new_vertex_count = generate_remap_func(remap.ptrw(), (unsigned int *)r_indices.ptr(), r_indices.size(), (float *)r_vertices.ptr(), r_vertices.size(), sizeof(Vector3)); + uint32_t new_vertex_count = generate_remap_func(remap.ptrw(), (unsigned int *)r_indices.ptr(), r_indices.size(), r_vertices.ptr(), r_vertices.size(), sizeof(Vector3)); remap_vertex_func(r_vertices.ptrw(), r_vertices.ptr(), r_vertices.size(), sizeof(Vector3), remap.ptr()); r_vertices.resize(new_vertex_count); remap_index_func((unsigned int *)r_indices.ptrw(), (unsigned int *)r_indices.ptr(), r_indices.size(), remap.ptr());