Properly working instancing, and compatibility fixing for old meshes

This commit is contained in:
Juan Linietsky 2019-09-23 16:01:05 -03:00
parent 123ee5995c
commit bc3dbe8240
5 changed files with 207 additions and 85 deletions

View file

@ -593,19 +593,23 @@ Mesh::Mesh() {
static PoolVector<uint8_t> _fix_array_compatibility(const PoolVector<uint8_t> &p_src, uint32_t p_format, uint32_t p_elements) {
bool vertex_16bit = p_format & ((1 << (Mesh::ARRAY_VERTEX + Mesh::ARRAY_COMPRESS_BASE)));
bool bone_32_bits = (p_format & Mesh::ARRAY_FORMAT_BONES) && !(p_format & (Mesh::ARRAY_COMPRESS_INDEX << 2));
bool has_bones = (p_format & Mesh::ARRAY_FORMAT_BONES);
bool bone_8 = has_bones && !(p_format & (Mesh::ARRAY_COMPRESS_INDEX << 2));
bool weight_32 = has_bones && !(p_format & (Mesh::ARRAY_COMPRESS_TEX_UV2 << 2));
print_line("convert vertex16: " + itos(vertex_16bit) + " bone 32 " + itos(bone_32_bits));
if (!vertex_16bit && !bone_32_bits) {
print_line("convert vertex16: " + itos(vertex_16bit) + " convert bone 8 " + itos(bone_8) + " convert weight 32 " + itos(weight_32));
if (!vertex_16bit && !bone_8 && !weight_32) {
return p_src;
}
bool vertex_2d = (p_format & (Mesh::ARRAY_COMPRESS_INDEX << 1));
uint32_t src_stride = p_src.size() / p_elements;
uint32_t dst_stride = src_stride + (vertex_16bit ? 4 : 0) - (bone_32_bits ? 16 : 0);
uint32_t dst_stride = src_stride + (vertex_16bit ? 4 : 0) + (bone_8 ? 4 : 0) - (weight_32 ? 8 : 0);
PoolVector<uint8_t> ret = p_src;
PoolVector<uint8_t> ret;
ret.resize(dst_stride * p_elements);
{
PoolVector<uint8_t>::Write w = ret.write();
@ -646,27 +650,58 @@ static PoolVector<uint8_t> _fix_array_compatibility(const PoolVector<uint8_t> &p
dst += 8;
}
if (bone_32_bits) {
if (has_bones) {
const uint32_t *src_bones = (const uint32_t *)&src[remaining - 32];
const float *src_weights = (const float *)&src[remaining - 16];
uint16_t *dstw = (uint16_t *)&dst[remaining - 32];
dstw[0] = src_bones[0];
dstw[1] = src_bones[1];
dstw[2] = src_bones[2];
dstw[3] = src_bones[3];
dstw[4] = CLAMP(src_weights[0] * 65535, 0, 65535); //16bits unorm
dstw[5] = CLAMP(src_weights[1] * 65535, 0, 65535);
dstw[6] = CLAMP(src_weights[2] * 65535, 0, 65535);
dstw[7] = CLAMP(src_weights[3] * 65535, 0, 65535);
remaining -= 32;
remaining -= bone_8 ? 4 : 8;
remaining -= weight_32 ? 16 : 8;
}
for (uint32_t j = 0; j < remaining; j++) {
dst[j] = src[j];
}
if (has_bones) {
dst += remaining;
src += remaining;
if (bone_8) {
const uint8_t *src_bones = (const uint8_t *)src;
uint16_t *dst_bones = (uint16_t *)dst;
dst_bones[0] = src_bones[0];
dst_bones[1] = src_bones[1];
dst_bones[2] = src_bones[2];
dst_bones[3] = src_bones[3];
src += 4;
} else {
for (uint32_t j = 0; j < 8; j++) {
dst[j] = src[j];
}
src += 8;
}
dst += 8;
if (weight_32) {
const float *src_weights = (const float *)src;
uint16_t *dst_weights = (uint16_t *)dst;
dst_weights[0] = CLAMP(src_weights[0] * 65535, 0, 65535); //16bits unorm
dst_weights[1] = CLAMP(src_weights[1] * 65535, 0, 65535);
dst_weights[2] = CLAMP(src_weights[2] * 65535, 0, 65535);
dst_weights[3] = CLAMP(src_weights[3] * 65535, 0, 65535);
} else {
for (uint32_t j = 0; j < 8; j++) {
dst[j] = src[j];
}
}
}
}
}
@ -728,7 +763,7 @@ bool ArrayMesh::_set(const StringName &p_name, const Variant &p_value) {
add_surface_from_arrays(PrimitiveType(int(d["primitive"])), d["arrays"], d["morph_arrays"]);
} else if (d.has("array_data")) {
print_line("array data (old style");
//print_line("array data (old style");
//older format (3.x)
PoolVector<uint8_t> array_data = d["array_data"];
PoolVector<uint8_t> array_index_data;
@ -774,7 +809,9 @@ bool ArrayMesh::_set(const StringName &p_name, const Variant &p_value) {
}
//clear unused flags
format &= ~((1 << (ARRAY_VERTEX + ARRAY_COMPRESS_BASE)) | (ARRAY_COMPRESS_INDEX << 2));
print_line("format pre: " + itos(format));
format &= ~uint32_t((1 << (ARRAY_VERTEX + ARRAY_COMPRESS_BASE)) | (ARRAY_COMPRESS_INDEX << 2) | (ARRAY_COMPRESS_TEX_UV2 << 2));
print_line("format post: " + itos(format));
ERR_FAIL_COND_V(!d.has("aabb"), false);
AABB aabb = d["aabb"];
@ -873,6 +910,7 @@ Array ArrayMesh::_get_surfaces() const {
void ArrayMesh::_create_if_empty() const {
if (!mesh.is_valid()) {
mesh = VS::get_singleton()->mesh_create();
VS::get_singleton()->mesh_set_blend_shape_mode(mesh, (VS::BlendShapeMode)blend_shape_mode);
}
}
@ -962,6 +1000,7 @@ void ArrayMesh::_set_surfaces(const Array &p_surfaces) {
// if mesh does not exist (first time this is loaded, most likely),
// we can create it with a single call, which is a lot more efficient and thread friendly
mesh = VS::get_singleton()->mesh_create_from_surfaces(surface_data);
VS::get_singleton()->mesh_set_blend_shape_mode(mesh, (VS::BlendShapeMode)blend_shape_mode);
}
surfaces.clear();
@ -1166,7 +1205,9 @@ void ArrayMesh::clear_blend_shapes() {
void ArrayMesh::set_blend_shape_mode(BlendShapeMode p_mode) {
blend_shape_mode = p_mode;
VS::get_singleton()->mesh_set_blend_shape_mode(mesh, (VS::BlendShapeMode)p_mode);
if (mesh.is_valid()) {
VS::get_singleton()->mesh_set_blend_shape_mode(mesh, (VS::BlendShapeMode)p_mode);
}
}
ArrayMesh::BlendShapeMode ArrayMesh::get_blend_shape_mode() const {

View file

@ -855,7 +855,7 @@ void RasterizerSceneForwardRD::_render_list(RenderingDevice::DrawListID p_draw_l
}
if (xforms_uniform_set.is_valid() && prev_xforms_uniform_set != xforms_uniform_set) {
RD::get_singleton()->draw_list_bind_uniform_set(draw_list, material->uniform_set, 1);
RD::get_singleton()->draw_list_bind_uniform_set(draw_list, xforms_uniform_set, 1);
prev_xforms_uniform_set = xforms_uniform_set;
}
@ -1287,7 +1287,7 @@ void RasterizerSceneForwardRD::_fill_render_list(InstanceBase **p_cull_result, i
for (uint32_t j = 0; j < surface_count; j++) {
uint32_t surface_index = storage->mesh_surface_get_multimesh_render_pass_index(inst->base, j, render_pass, &geometry_index);
uint32_t surface_index = storage->mesh_surface_get_multimesh_render_pass_index(mesh, j, render_pass, &geometry_index);
_add_geometry(inst, j, materials[j], p_pass_mode, surface_index);
}

View file

@ -1758,6 +1758,90 @@ void RasterizerStorageRD::mesh_add_surface(RID p_mesh, const VS::SurfaceData &p_
ERR_FAIL_COND(mesh->blend_shape_count && p_surface.blend_shapes.size() != (int)mesh->blend_shape_count);
ERR_FAIL_COND(mesh->blend_shape_count && p_surface.bone_aabbs.size() != mesh->bone_aabbs.size());
#ifdef DEBUG_ENABLED
//do a validation, to catch errors first
{
uint32_t stride = 0;
for (int i = 0; i < VS::ARRAY_WEIGHTS; i++) {
if ((p_surface.format & (1 << i))) {
switch (i) {
case VS::ARRAY_VERTEX: {
if (p_surface.format & VS::ARRAY_FLAG_USE_2D_VERTICES) {
stride += sizeof(float) * 2;
} else {
stride += sizeof(float) * 3;
}
} break;
case VS::ARRAY_NORMAL: {
if (p_surface.format & VS::ARRAY_COMPRESS_NORMAL) {
stride += sizeof(int8_t) * 4;
} else {
stride += sizeof(float) * 4;
}
} break;
case VS::ARRAY_TANGENT: {
if (p_surface.format & VS::ARRAY_COMPRESS_TANGENT) {
stride += sizeof(int8_t) * 4;
} else {
stride += sizeof(float) * 4;
}
} break;
case VS::ARRAY_COLOR: {
if (p_surface.format & VS::ARRAY_COMPRESS_COLOR) {
stride += sizeof(int8_t) * 4;
} else {
stride += sizeof(float) * 4;
}
} break;
case VS::ARRAY_TEX_UV: {
if (p_surface.format & VS::ARRAY_COMPRESS_TEX_UV) {
stride += sizeof(int16_t) * 2;
} else {
stride += sizeof(float) * 2;
}
} break;
case VS::ARRAY_TEX_UV2: {
if (p_surface.format & VS::ARRAY_COMPRESS_TEX_UV2) {
stride += sizeof(int16_t) * 2;
} else {
stride += sizeof(float) * 2;
}
} break;
case VS::ARRAY_BONES: {
//assumed weights too
//unique format, internally 16 bits, exposed as single array for 32
stride += sizeof(int32_t) * 4;
} break;
}
}
}
int expected_size = stride * p_surface.vertex_count;
ERR_FAIL_COND_MSG(expected_size != p_surface.vertex_data.size(), "Size of data provided (" + itos(p_surface.vertex_data.size()) + ") does not match expected (" + itos(expected_size) + ")");
}
#endif
Mesh::Surface *s = memnew(Mesh::Surface);
s->format = p_surface.format;
@ -2174,7 +2258,7 @@ void RasterizerStorageRD::multimesh_allocate(RID p_multimesh, int p_instances, V
multimesh->stride_cache = multimesh->custom_data_offset_cache + (p_use_custom_data ? 4 : 0);
multimesh->buffer_set = false;
print_line("allocate, elements: " + itos(p_instances) + " 2D: " + itos(p_transform_format == VS::MULTIMESH_TRANSFORM_2D) + " colors " + itos(multimesh->uses_colors) + " data " + itos(multimesh->uses_custom_data) + " stride " + itos(multimesh->stride_cache) + " total size " + itos(multimesh->stride_cache * multimesh->instances));
//print_line("allocate, elements: " + itos(p_instances) + " 2D: " + itos(p_transform_format == VS::MULTIMESH_TRANSFORM_2D) + " colors " + itos(multimesh->uses_colors) + " data " + itos(multimesh->uses_custom_data) + " stride " + itos(multimesh->stride_cache) + " total size " + itos(multimesh->stride_cache * multimesh->instances));
multimesh->data_cache = PoolVector<float>();
multimesh->aabb = AABB();
multimesh->aabb_dirty = false;
@ -2182,7 +2266,7 @@ void RasterizerStorageRD::multimesh_allocate(RID p_multimesh, int p_instances, V
if (multimesh->instances) {
multimesh->buffer = RD::get_singleton()->storage_buffer_create(multimesh->instances * multimesh->stride_cache);
multimesh->buffer = RD::get_singleton()->storage_buffer_create(multimesh->instances * multimesh->stride_cache * 4);
}
}
@ -2209,7 +2293,7 @@ void RasterizerStorageRD::multimesh_set_mesh(RID p_multimesh, RID p_mesh) {
_multimesh_mark_all_dirty(multimesh, false, true);
} else if (multimesh->instances) {
//need to re-create AABB unfortunately, calling this has a penalty
{
if (multimesh->buffer_set) {
PoolVector<uint8_t> buffer = RD::get_singleton()->buffer_get_data(multimesh->buffer);
PoolVector<uint8_t>::Read r = buffer.read();
const float *data = (const float *)r.ptr();
@ -2308,17 +2392,18 @@ void RasterizerStorageRD::_multimesh_re_create_aabb(MultiMesh *multimesh, const
Transform t;
if (multimesh->xform_format == VS::MULTIMESH_TRANSFORM_3D) {
t.basis[0].x = data[0];
t.basis[0].y = data[1];
t.basis[0].z = data[2];
t.basis[1].x = data[3];
t.basis[1].y = data[4];
t.basis[1].z = data[5];
t.basis[2].x = data[6];
t.basis[2].y = data[7];
t.basis[2].z = data[8];
t.origin.x = data[9];
t.origin.y = data[10];
t.basis.elements[0][0] = data[0];
t.basis.elements[0][1] = data[1];
t.basis.elements[0][2] = data[2];
t.origin.x = data[3];
t.basis.elements[1][0] = data[4];
t.basis.elements[1][1] = data[5];
t.basis.elements[1][2] = data[6];
t.origin.y = data[7];
t.basis.elements[2][0] = data[8];
t.basis.elements[2][1] = data[9];
t.basis.elements[2][2] = data[10];
t.origin.z = data[11];
} else {
@ -2356,17 +2441,17 @@ void RasterizerStorageRD::multimesh_instance_set_transform(RID p_multimesh, int
float *dataptr = w.ptr() + p_index * multimesh->stride_cache;
dataptr[0] = p_transform.basis[0].x;
dataptr[1] = p_transform.basis[0].y;
dataptr[2] = p_transform.basis[0].z;
dataptr[3] = p_transform.basis[1].x;
dataptr[4] = p_transform.basis[1].y;
dataptr[5] = p_transform.basis[1].z;
dataptr[6] = p_transform.basis[2].x;
dataptr[7] = p_transform.basis[2].y;
dataptr[8] = p_transform.basis[2].z;
dataptr[9] = p_transform.origin.x;
dataptr[10] = p_transform.origin.y;
dataptr[0] = p_transform.basis.elements[0][0];
dataptr[1] = p_transform.basis.elements[0][1];
dataptr[2] = p_transform.basis.elements[0][2];
dataptr[3] = p_transform.origin.x;
dataptr[4] = p_transform.basis.elements[1][0];
dataptr[5] = p_transform.basis.elements[1][1];
dataptr[6] = p_transform.basis.elements[1][2];
dataptr[7] = p_transform.origin.y;
dataptr[8] = p_transform.basis.elements[2][0];
dataptr[9] = p_transform.basis.elements[2][1];
dataptr[10] = p_transform.basis.elements[2][2];
dataptr[11] = p_transform.origin.z;
}
@ -2387,14 +2472,14 @@ void RasterizerStorageRD::multimesh_instance_set_transform_2d(RID p_multimesh, i
float *dataptr = w.ptr() + p_index * multimesh->stride_cache;
dataptr[0] = p_transform.elements[0].x;
dataptr[1] = p_transform.elements[1].x;
dataptr[0] = p_transform.elements[0][0];
dataptr[1] = p_transform.elements[1][0];
dataptr[2] = 0;
dataptr[3] = p_transform.elements[2].x;
dataptr[4] = p_transform.elements[0].y;
dataptr[5] = p_transform.elements[1].y;
dataptr[3] = p_transform.elements[2][0];
dataptr[4] = p_transform.elements[0][1];
dataptr[5] = p_transform.elements[1][1];
dataptr[6] = 0;
dataptr[7] = p_transform.elements[2].y;
dataptr[7] = p_transform.elements[2][1];
}
_multimesh_mark_dirty(multimesh, p_index, true);
@ -2466,17 +2551,17 @@ Transform RasterizerStorageRD::multimesh_instance_get_transform(RID p_multimesh,
const float *dataptr = r.ptr() + p_index * multimesh->stride_cache;
t.basis[0].x = dataptr[0];
t.basis[0].y = dataptr[1];
t.basis[0].z = dataptr[2];
t.basis[1].x = dataptr[3];
t.basis[1].y = dataptr[4];
t.basis[1].z = dataptr[5];
t.basis[2].x = dataptr[6];
t.basis[2].y = dataptr[7];
t.basis[2].z = dataptr[8];
t.origin.x = dataptr[9];
t.origin.y = dataptr[10];
t.basis.elements[0][0] = dataptr[0];
t.basis.elements[0][1] = dataptr[1];
t.basis.elements[0][2] = dataptr[2];
t.origin.x = dataptr[3];
t.basis.elements[1][0] = dataptr[4];
t.basis.elements[1][1] = dataptr[5];
t.basis.elements[1][2] = dataptr[6];
t.origin.y = dataptr[7];
t.basis.elements[2][0] = dataptr[8];
t.basis.elements[2][1] = dataptr[9];
t.basis.elements[2][2] = dataptr[10];
t.origin.z = dataptr[11];
}
@ -2497,13 +2582,12 @@ Transform2D RasterizerStorageRD::multimesh_instance_get_transform_2d(RID p_multi
const float *dataptr = r.ptr() + p_index * multimesh->stride_cache;
t.elements[0].x = dataptr[0];
t.elements[1].x = dataptr[1];
t.elements[2].x = dataptr[3];
t.elements[0].y = dataptr[4];
t.elements[1].y = dataptr[5];
t.elements[2].y = dataptr[7];
t.elements[0][0] = dataptr[0];
t.elements[1][0] = dataptr[1];
t.elements[2][0] = dataptr[3];
t.elements[0][1] = dataptr[4];
t.elements[1][1] = dataptr[5];
t.elements[2][1] = dataptr[7];
}
return t;
@ -2663,13 +2747,13 @@ void RasterizerStorageRD::_update_dirty_multimeshes() {
if (multimesh->data_cache_used_dirty_regions > 32 || multimesh->data_cache_used_dirty_regions > visible_region_count / 2) {
//if there too many dirty regions, or represent the majority of regions, just copy all, else transfer cost piles up too much
RD::get_singleton()->buffer_update(multimesh->buffer, 0, MIN(visible_region_count * region_size, multimesh->instances * multimesh->stride_cache), data, false);
RD::get_singleton()->buffer_update(multimesh->buffer, 0, MIN(visible_region_count * region_size, multimesh->instances * multimesh->stride_cache * sizeof(float)), data, false);
} else {
//not that many regions? update them all
for (uint32_t i = 0; i < visible_region_count; i++) {
if (multimesh->data_cache_dirty_regions[i]) {
uint64_t offset = i * region_size;
uint64_t size = multimesh->stride_cache * multimesh->instances;
uint64_t size = multimesh->stride_cache * multimesh->instances * sizeof(float);
RD::get_singleton()->buffer_update(multimesh->buffer, offset, MIN(region_size, size - offset), &data[i * region_size], false);
}
}

View file

@ -102,10 +102,10 @@ void main() {
mat4 matrix;
if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_MULTIMESH_FORMAT_2D)) {
mat4 matrix = mat4(transforms.data[offset+0],transforms.data[offset+1],vec4(0.0,0.0,1.0,0.0),vec4(0.0,0.0,0.0,1.0));
matrix = mat4(transforms.data[offset+0],transforms.data[offset+1],vec4(0.0,0.0,1.0,0.0),vec4(0.0,0.0,0.0,1.0));
offset+=2;
} else {
mat4 matrix = mat4(transforms.data[offset+0],transforms.data[offset+1],transforms.data[offset+2],vec4(0.0,0.0,0.0,1.0));
matrix = mat4(transforms.data[offset+0],transforms.data[offset+1],transforms.data[offset+2],vec4(0.0,0.0,0.0,1.0));
offset+=3;
}
@ -120,9 +120,10 @@ void main() {
instance_custom = transforms.data[offset];
}
//transposed, so multiply in opposite order
world_matrix = matrix * world_matrix;
world_normal_matrix = mat3(matrix) * world_normal_matrix;
//transpose
matrix = transpose(matrix);
world_matrix = world_matrix * matrix;
world_normal_matrix = world_normal_matrix * mat3(matrix);
} else {
//not a multimesh, instances are for multiple draw calls

View file

@ -1144,10 +1144,6 @@ Array VisualServer::_get_array_from_surface(uint32_t p_format, PoolVector<uint8_
elem_size *= sizeof(float);
}
if (elem_size == 6) {
elem_size = 8;
}
} break;
case VS::ARRAY_NORMAL: {