Fix buffer orphaning on desktop

Vertex / Index Buffer orphaning was previously turned off on desktop, which was causing performance problems on some platforms, especially Mac.
This commit is contained in:
lawnjelly 2020-10-12 10:06:30 +01:00
parent a392aa455f
commit 42bca1a4a9
4 changed files with 39 additions and 69 deletions

View file

@ -430,14 +430,10 @@ void RasterizerCanvasBaseGLES2::_copy_texscreen(const Rect2 &p_rect) {
void RasterizerCanvasBaseGLES2::_draw_polygon(const int *p_indices, int p_index_count, int p_vertex_count, const Vector2 *p_vertices, const Vector2 *p_uvs, const Color *p_colors, bool p_singlecolor, const float *p_weights, const int *p_bones) {
glBindBuffer(GL_ARRAY_BUFFER, data.polygon_buffer);
#ifndef GLES_OVER_GL
// Orphan the buffer to avoid CPU/GPU sync points caused by glBufferSubData
glBufferData(GL_ARRAY_BUFFER, data.polygon_buffer_size, NULL, GL_DYNAMIC_DRAW);
#endif
uint32_t buffer_ofs = 0;
storage->buffer_orphan_and_upload(data.polygon_buffer_size, 0, sizeof(Vector2) * p_vertex_count, p_vertices);
glBufferSubData(GL_ARRAY_BUFFER, 0, sizeof(Vector2) * p_vertex_count, p_vertices);
glEnableVertexAttribArray(VS::ARRAY_VERTEX);
glVertexAttribPointer(VS::ARRAY_VERTEX, 2, GL_FLOAT, GL_FALSE, sizeof(Vector2), NULL);
buffer_ofs += sizeof(Vector2) * p_vertex_count;
@ -482,13 +478,9 @@ void RasterizerCanvasBaseGLES2::_draw_polygon(const int *p_indices, int p_index_
}
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, data.polygon_index_buffer);
#ifndef GLES_OVER_GL
// Orphan the buffer to avoid CPU/GPU sync points caused by glBufferSubData
glBufferData(GL_ELEMENT_ARRAY_BUFFER, data.polygon_index_buffer_size, NULL, GL_DYNAMIC_DRAW);
#endif
if (storage->config.support_32_bits_indices) { //should check for
glBufferSubData(GL_ELEMENT_ARRAY_BUFFER, 0, sizeof(int) * p_index_count, p_indices);
storage->buffer_orphan_and_upload(data.polygon_index_buffer_size, 0, sizeof(int) * p_index_count, p_indices, GL_ELEMENT_ARRAY_BUFFER);
glDrawElements(GL_TRIANGLES, p_index_count, GL_UNSIGNED_INT, 0);
storage->info.render._2d_draw_call_count++;
} else {
@ -496,7 +488,7 @@ void RasterizerCanvasBaseGLES2::_draw_polygon(const int *p_indices, int p_index_
for (int i = 0; i < p_index_count; i++) {
index16[i] = uint16_t(p_indices[i]);
}
glBufferSubData(GL_ELEMENT_ARRAY_BUFFER, 0, sizeof(uint16_t) * p_index_count, index16);
storage->buffer_orphan_and_upload(data.polygon_index_buffer_size, 0, sizeof(uint16_t) * p_index_count, index16, GL_ELEMENT_ARRAY_BUFFER);
glDrawElements(GL_TRIANGLES, p_index_count, GL_UNSIGNED_SHORT, 0);
storage->info.render._2d_draw_call_count++;
}
@ -508,14 +500,10 @@ void RasterizerCanvasBaseGLES2::_draw_polygon(const int *p_indices, int p_index_
void RasterizerCanvasBaseGLES2::_draw_generic(GLuint p_primitive, int p_vertex_count, const Vector2 *p_vertices, const Vector2 *p_uvs, const Color *p_colors, bool p_singlecolor) {
glBindBuffer(GL_ARRAY_BUFFER, data.polygon_buffer);
#ifndef GLES_OVER_GL
// Orphan the buffer to avoid CPU/GPU sync points caused by glBufferSubData
glBufferData(GL_ARRAY_BUFFER, data.polygon_buffer_size, NULL, GL_DYNAMIC_DRAW);
#endif
uint32_t buffer_ofs = 0;
storage->buffer_orphan_and_upload(data.polygon_buffer_size, 0, sizeof(Vector2) * p_vertex_count, p_vertices);
glBufferSubData(GL_ARRAY_BUFFER, 0, sizeof(Vector2) * p_vertex_count, p_vertices);
glEnableVertexAttribArray(VS::ARRAY_VERTEX);
glVertexAttribPointer(VS::ARRAY_VERTEX, 2, GL_FLOAT, GL_FALSE, sizeof(Vector2), NULL);
buffer_ofs += sizeof(Vector2) * p_vertex_count;
@ -551,14 +539,10 @@ void RasterizerCanvasBaseGLES2::_draw_generic(GLuint p_primitive, int p_vertex_c
void RasterizerCanvasBaseGLES2::_draw_generic_indices(GLuint p_primitive, const int *p_indices, int p_index_count, int p_vertex_count, const Vector2 *p_vertices, const Vector2 *p_uvs, const Color *p_colors, bool p_singlecolor) {
glBindBuffer(GL_ARRAY_BUFFER, data.polygon_buffer);
#ifndef GLES_OVER_GL
// Orphan the buffer to avoid CPU/GPU sync points caused by glBufferSubData
glBufferData(GL_ARRAY_BUFFER, data.polygon_buffer_size, NULL, GL_DYNAMIC_DRAW);
#endif
uint32_t buffer_ofs = 0;
storage->buffer_orphan_and_upload(data.polygon_buffer_size, 0, sizeof(Vector2) * p_vertex_count, p_vertices);
glBufferSubData(GL_ARRAY_BUFFER, 0, sizeof(Vector2) * p_vertex_count, p_vertices);
glEnableVertexAttribArray(VS::ARRAY_VERTEX);
glVertexAttribPointer(VS::ARRAY_VERTEX, 2, GL_FLOAT, GL_FALSE, sizeof(Vector2), NULL);
buffer_ofs += sizeof(Vector2) * p_vertex_count;
@ -587,13 +571,9 @@ void RasterizerCanvasBaseGLES2::_draw_generic_indices(GLuint p_primitive, const
}
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, data.polygon_index_buffer);
#ifndef GLES_OVER_GL
// Orphan the buffer to avoid CPU/GPU sync points caused by glBufferSubData
glBufferData(GL_ELEMENT_ARRAY_BUFFER, data.polygon_index_buffer_size, NULL, GL_DYNAMIC_DRAW);
#endif
if (storage->config.support_32_bits_indices) { //should check for
glBufferSubData(GL_ELEMENT_ARRAY_BUFFER, 0, sizeof(int) * p_index_count, p_indices);
storage->buffer_orphan_and_upload(data.polygon_index_buffer_size, 0, sizeof(int) * p_index_count, p_indices, GL_ELEMENT_ARRAY_BUFFER);
glDrawElements(p_primitive, p_index_count, GL_UNSIGNED_INT, 0);
storage->info.render._2d_draw_call_count++;
} else {
@ -601,7 +581,7 @@ void RasterizerCanvasBaseGLES2::_draw_generic_indices(GLuint p_primitive, const
for (int i = 0; i < p_index_count; i++) {
index16[i] = uint16_t(p_indices[i]);
}
glBufferSubData(GL_ELEMENT_ARRAY_BUFFER, 0, sizeof(uint16_t) * p_index_count, index16);
storage->buffer_orphan_and_upload(data.polygon_index_buffer_size, 0, sizeof(uint16_t) * p_index_count, index16, GL_ELEMENT_ARRAY_BUFFER);
glDrawElements(p_primitive, p_index_count, GL_UNSIGNED_SHORT, 0);
storage->info.render._2d_draw_call_count++;
}
@ -664,11 +644,7 @@ void RasterizerCanvasBaseGLES2::_draw_gui_primitive(int p_points, const Vector2
}
glBindBuffer(GL_ARRAY_BUFFER, data.polygon_buffer);
#ifndef GLES_OVER_GL
// Orphan the buffer to avoid CPU/GPU sync points caused by glBufferSubData
glBufferData(GL_ARRAY_BUFFER, data.polygon_buffer_size, NULL, GL_DYNAMIC_DRAW);
#endif
glBufferSubData(GL_ARRAY_BUFFER, 0, p_points * stride * 4 * sizeof(float), buffer_data);
storage->buffer_orphan_and_upload(data.polygon_buffer_size, 0, p_points * stride * 4 * sizeof(float), buffer_data);
glVertexAttribPointer(VS::ARRAY_VERTEX, 2, GL_FLOAT, GL_FALSE, stride * sizeof(float), NULL);

View file

@ -1333,7 +1333,18 @@ public:
virtual String get_video_adapter_name() const;
virtual String get_video_adapter_vendor() const;
void buffer_orphan_and_upload(unsigned int p_buffer_size, unsigned int p_offset, unsigned int p_data_size, const void *p_data, GLenum p_target = GL_ARRAY_BUFFER);
RasterizerStorageGLES2();
};
// standardize the orphan / upload in one place so it can be changed per platform as necessary, and avoid future
// bugs causing pipeline stalls
inline void RasterizerStorageGLES2::buffer_orphan_and_upload(unsigned int p_buffer_size, unsigned int p_offset, unsigned int p_data_size, const void *p_data, GLenum p_target) {
// Orphan the buffer to avoid CPU/GPU sync points caused by glBufferSubData
// Was previously #ifndef GLES_OVER_GL however this causes stalls on desktop mac also (and possibly other)
glBufferData(p_target, p_buffer_size, NULL, GL_DYNAMIC_DRAW);
glBufferSubData(p_target, p_offset, p_data_size, p_data);
}
#endif // RASTERIZERSTORAGEGLES2_H

View file

@ -331,15 +331,9 @@ void RasterizerCanvasGLES3::_draw_polygon(const int *p_indices, int p_index_coun
glBindVertexArray(data.polygon_buffer_pointer_array);
glBindBuffer(GL_ARRAY_BUFFER, data.polygon_buffer);
#ifndef GLES_OVER_GL
// Orphan the buffer to avoid CPU/GPU sync points caused by glBufferSubData
glBufferData(GL_ARRAY_BUFFER, data.polygon_buffer_size, NULL, GL_DYNAMIC_DRAW);
#endif
uint32_t buffer_ofs = 0;
storage->buffer_orphan_and_upload(data.polygon_buffer_size, buffer_ofs, sizeof(Vector2) * p_vertex_count, p_vertices);
//vertex
glBufferSubData(GL_ARRAY_BUFFER, buffer_ofs, sizeof(Vector2) * p_vertex_count, p_vertices);
glEnableVertexAttribArray(VS::ARRAY_VERTEX);
glVertexAttribPointer(VS::ARRAY_VERTEX, 2, GL_FLOAT, false, sizeof(Vector2), CAST_INT_TO_UCHAR_PTR(buffer_ofs));
buffer_ofs += sizeof(Vector2) * p_vertex_count;
@ -406,11 +400,7 @@ void RasterizerCanvasGLES3::_draw_polygon(const int *p_indices, int p_index_coun
//bind the indices buffer.
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, data.polygon_index_buffer);
#ifndef GLES_OVER_GL
// Orphan the buffer to avoid CPU/GPU sync points caused by glBufferSubData
glBufferData(GL_ELEMENT_ARRAY_BUFFER, data.polygon_index_buffer_size, NULL, GL_DYNAMIC_DRAW);
#endif
glBufferSubData(GL_ELEMENT_ARRAY_BUFFER, 0, sizeof(int) * p_index_count, p_indices);
storage->buffer_orphan_and_upload(data.polygon_index_buffer_size, 0, sizeof(int) * p_index_count, p_indices, GL_ELEMENT_ARRAY_BUFFER);
//draw the triangles.
glDrawElements(GL_TRIANGLES, p_index_count, GL_UNSIGNED_INT, 0);
@ -432,15 +422,9 @@ void RasterizerCanvasGLES3::_draw_generic(GLuint p_primitive, int p_vertex_count
glBindVertexArray(data.polygon_buffer_pointer_array);
glBindBuffer(GL_ARRAY_BUFFER, data.polygon_buffer);
#ifndef GLES_OVER_GL
// Orphan the buffer to avoid CPU/GPU sync points caused by glBufferSubData
glBufferData(GL_ARRAY_BUFFER, data.polygon_buffer_size, NULL, GL_DYNAMIC_DRAW);
#endif
uint32_t buffer_ofs = 0;
storage->buffer_orphan_and_upload(data.polygon_buffer_size, buffer_ofs, sizeof(Vector2) * p_vertex_count, p_vertices);
//vertex
glBufferSubData(GL_ARRAY_BUFFER, buffer_ofs, sizeof(Vector2) * p_vertex_count, p_vertices);
glEnableVertexAttribArray(VS::ARRAY_VERTEX);
glVertexAttribPointer(VS::ARRAY_VERTEX, 2, GL_FLOAT, false, sizeof(Vector2), CAST_INT_TO_UCHAR_PTR(buffer_ofs));
buffer_ofs += sizeof(Vector2) * p_vertex_count;
@ -485,15 +469,9 @@ void RasterizerCanvasGLES3::_draw_generic_indices(GLuint p_primitive, const int
glBindVertexArray(data.polygon_buffer_pointer_array);
glBindBuffer(GL_ARRAY_BUFFER, data.polygon_buffer);
#ifndef GLES_OVER_GL
// Orphan the buffer to avoid CPU/GPU sync points caused by glBufferSubData
glBufferData(GL_ARRAY_BUFFER, data.polygon_buffer_size, NULL, GL_DYNAMIC_DRAW);
#endif
uint32_t buffer_ofs = 0;
storage->buffer_orphan_and_upload(data.polygon_buffer_size, buffer_ofs, sizeof(Vector2) * p_vertex_count, p_vertices);
//vertex
glBufferSubData(GL_ARRAY_BUFFER, buffer_ofs, sizeof(Vector2) * p_vertex_count, p_vertices);
glEnableVertexAttribArray(VS::ARRAY_VERTEX);
glVertexAttribPointer(VS::ARRAY_VERTEX, 2, GL_FLOAT, false, sizeof(Vector2), CAST_INT_TO_UCHAR_PTR(buffer_ofs));
buffer_ofs += sizeof(Vector2) * p_vertex_count;
@ -538,11 +516,8 @@ void RasterizerCanvasGLES3::_draw_generic_indices(GLuint p_primitive, const int
//bind the indices buffer.
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, data.polygon_index_buffer);
#ifndef GLES_OVER_GL
// Orphan the buffer to avoid CPU/GPU sync points caused by glBufferSubData
glBufferData(GL_ELEMENT_ARRAY_BUFFER, data.polygon_index_buffer_size, NULL, GL_DYNAMIC_DRAW);
#endif
glBufferSubData(GL_ELEMENT_ARRAY_BUFFER, 0, sizeof(int) * p_index_count, p_indices);
storage->buffer_orphan_and_upload(data.polygon_index_buffer_size, 0, sizeof(int) * p_index_count, p_indices, GL_ELEMENT_ARRAY_BUFFER);
//draw the triangles.
glDrawElements(p_primitive, p_index_count, GL_UNSIGNED_INT, 0);
@ -616,12 +591,9 @@ void RasterizerCanvasGLES3::_draw_gui_primitive(int p_points, const Vector2 *p_v
}
glBindBuffer(GL_ARRAY_BUFFER, data.polygon_buffer);
#ifndef GLES_OVER_GL
// Orphan the buffer to avoid CPU/GPU sync points caused by glBufferSubData
glBufferData(GL_ARRAY_BUFFER, data.polygon_buffer_size, NULL, GL_DYNAMIC_DRAW);
#endif
//TODO the below call may need to be replaced with: glBufferSubData(GL_ARRAY_BUFFER, 0, p_points * stride * 4 * sizeof(float), &b[0]);
glBufferSubData(GL_ARRAY_BUFFER, 0, p_points * stride * 4, &b[0]);
//TODO the below call may need to be replaced with: (p_points * stride * 4 * sizeof(float), &b[0]);
storage->buffer_orphan_and_upload(data.polygon_buffer_size, 0, p_points * stride * 4, &b[0]);
glBindVertexArray(data.polygon_buffer_quad_arrays[version]);
glDrawArrays(prim[p_points], 0, p_points);
glBindVertexArray(0);

View file

@ -1481,7 +1481,18 @@ public:
virtual String get_video_adapter_name() const;
virtual String get_video_adapter_vendor() const;
void buffer_orphan_and_upload(unsigned int p_buffer_size, unsigned int p_offset, unsigned int p_data_size, const void *p_data, GLenum p_target = GL_ARRAY_BUFFER);
RasterizerStorageGLES3();
};
// standardize the orphan / upload in one place so it can be changed per platform as necessary, and avoid future
// bugs causing pipeline stalls
inline void RasterizerStorageGLES3::buffer_orphan_and_upload(unsigned int p_buffer_size, unsigned int p_offset, unsigned int p_data_size, const void *p_data, GLenum p_target) {
// Orphan the buffer to avoid CPU/GPU sync points caused by glBufferSubData
// Was previously #ifndef GLES_OVER_GL however this causes stalls on desktop mac also (and possibly other)
glBufferData(p_target, p_buffer_size, NULL, GL_DYNAMIC_DRAW);
glBufferSubData(p_target, p_offset, p_data_size, p_data);
}
#endif // RASTERIZERSTORAGEGLES3_H