Merge pull request #41323 from lawnjelly/kessel_lightangles

GLES2 2D fix normal mapping - batching and nvidia workaround
This commit is contained in:
Rémi Verschelde 2020-09-28 18:45:43 +02:00 committed by GitHub
commit 422c279fcb
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 691 additions and 327 deletions

View file

@ -71,6 +71,75 @@
#include <string.h> #include <string.h>
// very simple non-growable array, that keeps track of the size of a 'unit'
// which can be cast to whatever vertex format FVF required, and is initially
// created with enough memory to hold the biggest FVF.
// This allows multiple FVFs to use the same array.
class RasterizerUnitArrayGLES2 {
public:
RasterizerUnitArrayGLES2() {
_list = nullptr;
free();
}
~RasterizerUnitArrayGLES2() { free(); }
uint8_t *get_unit(unsigned int ui) { return &_list[ui * _unit_size_bytes]; }
const uint8_t *get_unit(unsigned int ui) const { return &_list[ui * _unit_size_bytes]; }
int size() const { return _size; }
int max_size() const { return _max_size; }
void free() {
if (_list) {
memdelete_arr(_list);
_list = 0;
}
_size = 0;
_max_size = 0;
_max_size_bytes = 0;
_unit_size_bytes = 0;
}
void create(int p_max_size_units, int p_max_unit_size_bytes) {
free();
_max_unit_size_bytes = p_max_unit_size_bytes;
_max_size = p_max_size_units;
_max_size_bytes = p_max_size_units * p_max_unit_size_bytes;
if (_max_size_bytes) {
_list = memnew_arr(uint8_t, _max_size_bytes);
}
}
void prepare(int p_unit_size_bytes) {
_unit_size_bytes = p_unit_size_bytes;
_size = 0;
}
// several items at a time
uint8_t *request(int p_num_items = 1) {
int old_size = _size;
_size += p_num_items;
if (_size <= _max_size) {
return get_unit(old_size);
}
// revert
_size = old_size;
return nullptr;
}
private:
uint8_t *_list;
int _size; // in units
int _max_size; // in units
int _max_size_bytes;
int _unit_size_bytes;
int _max_unit_size_bytes;
};
template <class T> template <class T>
class RasterizerArrayGLES2 { class RasterizerArrayGLES2 {
public: public:

View file

@ -52,8 +52,13 @@ void RasterizerCanvasBaseGLES2::light_internal_free(RID p_rid) {
void RasterizerCanvasBaseGLES2::canvas_begin() { void RasterizerCanvasBaseGLES2::canvas_begin() {
state.canvas_shader.bind();
state.using_transparent_rt = false; state.using_transparent_rt = false;
// always start with light_angle unset
state.using_light_angle = false;
state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_LIGHT_ANGLE, false);
state.canvas_shader.bind();
int viewport_x, viewport_y, viewport_width, viewport_height; int viewport_x, viewport_y, viewport_width, viewport_height;
if (storage->frame.current_rt) { if (storage->frame.current_rt) {
@ -155,6 +160,16 @@ void RasterizerCanvasBaseGLES2::draw_generic_textured_rect(const Rect2 &p_rect,
glDrawArrays(GL_TRIANGLE_FAN, 0, 4); glDrawArrays(GL_TRIANGLE_FAN, 0, 4);
} }
void RasterizerCanvasBaseGLES2::_set_texture_rect_mode(bool p_texture_rect, bool p_light_angle) {
// always set this directly (this could be state checked)
state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_TEXTURE_RECT, p_texture_rect);
if (state.using_light_angle != p_light_angle) {
state.using_light_angle = p_light_angle;
state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_LIGHT_ANGLE, p_light_angle);
}
}
RasterizerStorageGLES2::Texture *RasterizerCanvasBaseGLES2::_bind_canvas_texture(const RID &p_texture, const RID &p_normal_map) { RasterizerStorageGLES2::Texture *RasterizerCanvasBaseGLES2::_bind_canvas_texture(const RID &p_texture, const RID &p_normal_map) {
RasterizerStorageGLES2::Texture *tex_return = NULL; RasterizerStorageGLES2::Texture *tex_return = NULL;
@ -595,12 +610,13 @@ void RasterizerCanvasBaseGLES2::_draw_generic_indices(GLuint p_primitive, const
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0);
} }
void RasterizerCanvasBaseGLES2::_draw_gui_primitive(int p_points, const Vector2 *p_vertices, const Color *p_colors, const Vector2 *p_uvs) { void RasterizerCanvasBaseGLES2::_draw_gui_primitive(int p_points, const Vector2 *p_vertices, const Color *p_colors, const Vector2 *p_uvs, const float *p_light_angles) {
static const GLenum prim[5] = { GL_POINTS, GL_POINTS, GL_LINES, GL_TRIANGLES, GL_TRIANGLE_FAN }; static const GLenum prim[5] = { GL_POINTS, GL_POINTS, GL_LINES, GL_TRIANGLES, GL_TRIANGLE_FAN };
int color_offset = 0; int color_offset = 0;
int uv_offset = 0; int uv_offset = 0;
int light_angle_offset = 0;
int stride = 2; int stride = 2;
if (p_colors) { if (p_colors) {
@ -613,7 +629,12 @@ void RasterizerCanvasBaseGLES2::_draw_gui_primitive(int p_points, const Vector2
stride += 2; stride += 2;
} }
float buffer_data[(2 + 2 + 4) * 4]; if (p_light_angles) { //light_angles
light_angle_offset = stride;
stride += 1;
}
float buffer_data[(2 + 2 + 4 + 1) * 4];
for (int i = 0; i < p_points; i++) { for (int i = 0; i < p_points; i++) {
buffer_data[stride * i + 0] = p_vertices[i].x; buffer_data[stride * i + 0] = p_vertices[i].x;
@ -636,6 +657,12 @@ void RasterizerCanvasBaseGLES2::_draw_gui_primitive(int p_points, const Vector2
} }
} }
if (p_light_angles) {
for (int i = 0; i < p_points; i++) {
buffer_data[stride * i + light_angle_offset + 0] = p_light_angles[i];
}
}
glBindBuffer(GL_ARRAY_BUFFER, data.polygon_buffer); glBindBuffer(GL_ARRAY_BUFFER, data.polygon_buffer);
#ifndef GLES_OVER_GL #ifndef GLES_OVER_GL
// Orphan the buffer to avoid CPU/GPU sync points caused by glBufferSubData // Orphan the buffer to avoid CPU/GPU sync points caused by glBufferSubData
@ -655,9 +682,19 @@ void RasterizerCanvasBaseGLES2::_draw_gui_primitive(int p_points, const Vector2
glEnableVertexAttribArray(VS::ARRAY_TEX_UV); glEnableVertexAttribArray(VS::ARRAY_TEX_UV);
} }
if (p_light_angles) {
glVertexAttribPointer(VS::ARRAY_TANGENT, 1, GL_FLOAT, GL_FALSE, stride * sizeof(float), CAST_INT_TO_UCHAR_PTR(light_angle_offset * sizeof(float)));
glEnableVertexAttribArray(VS::ARRAY_TANGENT);
}
glDrawArrays(prim[p_points], 0, p_points); glDrawArrays(prim[p_points], 0, p_points);
storage->info.render._2d_draw_call_count++; storage->info.render._2d_draw_call_count++;
if (p_light_angles) {
// may not be needed
glDisableVertexAttribArray(VS::ARRAY_TANGENT);
}
glBindBuffer(GL_ARRAY_BUFFER, 0); glBindBuffer(GL_ARRAY_BUFFER, 0);
} }
@ -993,7 +1030,7 @@ void RasterizerCanvasBaseGLES2::initialize() {
state.canvas_shader.init(); state.canvas_shader.init();
state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_TEXTURE_RECT, true); _set_texture_rect_mode(true);
state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_RGBA_SHADOWS, storage->config.use_rgba_2d_shadows); state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_RGBA_SHADOWS, storage->config.use_rgba_2d_shadows);
state.canvas_shader.bind(); state.canvas_shader.bind();

View file

@ -77,6 +77,7 @@ public:
LensDistortedShaderGLES2 lens_shader; LensDistortedShaderGLES2 lens_shader;
bool using_texture_rect; bool using_texture_rect;
bool using_light_angle;
bool using_ninepatch; bool using_ninepatch;
bool using_skeleton; bool using_skeleton;
@ -112,7 +113,7 @@ public:
virtual void canvas_begin(); virtual void canvas_begin();
virtual void canvas_end(); virtual void canvas_end();
void _draw_gui_primitive(int p_points, const Vector2 *p_vertices, const Color *p_colors, const Vector2 *p_uvs); void _draw_gui_primitive(int p_points, const Vector2 *p_vertices, const Color *p_colors, const Vector2 *p_uvs, const float *p_light_angles = nullptr);
void _draw_polygon(const int *p_indices, int p_index_count, int p_vertex_count, const Vector2 *p_vertices, const Vector2 *p_uvs, const Color *p_colors, bool p_singlecolor, const float *p_weights = NULL, const int *p_bones = NULL); void _draw_polygon(const int *p_indices, int p_index_count, int p_vertex_count, const Vector2 *p_vertices, const Vector2 *p_uvs, const Color *p_colors, bool p_singlecolor, const float *p_weights = NULL, const int *p_bones = NULL);
void _draw_generic(GLuint p_primitive, int p_vertex_count, const Vector2 *p_vertices, const Vector2 *p_uvs, const Color *p_colors, bool p_singlecolor); void _draw_generic(GLuint p_primitive, int p_vertex_count, const Vector2 *p_vertices, const Vector2 *p_uvs, const Color *p_colors, bool p_singlecolor);
void _draw_generic_indices(GLuint p_primitive, const int *p_indices, int p_index_count, int p_vertex_count, const Vector2 *p_vertices, const Vector2 *p_uvs, const Color *p_colors, bool p_singlecolor); void _draw_generic_indices(GLuint p_primitive, const int *p_indices, int p_index_count, int p_vertex_count, const Vector2 *p_vertices, const Vector2 *p_uvs, const Color *p_colors, bool p_singlecolor);
@ -130,6 +131,7 @@ public:
virtual void canvas_debug_viewport_shadows(Light *p_lights_with_shadow); virtual void canvas_debug_viewport_shadows(Light *p_lights_with_shadow);
RasterizerStorageGLES2::Texture *_bind_canvas_texture(const RID &p_texture, const RID &p_normal_map); RasterizerStorageGLES2::Texture *_bind_canvas_texture(const RID &p_texture, const RID &p_normal_map);
void _set_texture_rect_mode(bool p_texture_rect, bool p_light_angle = false);
void initialize(); void initialize();
void finalize(); void finalize();

View file

@ -55,6 +55,7 @@ RasterizerCanvasGLES2::BatchData::BatchData() {
index_buffer_size_units = 0; index_buffer_size_units = 0;
index_buffer_size_bytes = 0; index_buffer_size_bytes = 0;
use_colored_vertices = false; use_colored_vertices = false;
use_light_angles = false;
settings_use_batching = false; settings_use_batching = false;
settings_max_join_item_commands = 0; settings_max_join_item_commands = 0;
settings_colored_vertex_format_threshold = 0.0f; settings_colored_vertex_format_threshold = 0.0f;
@ -212,10 +213,14 @@ void RasterizerCanvasGLES2::_batch_upload_buffers() {
// orphan the old (for now) // orphan the old (for now)
glBufferData(GL_ARRAY_BUFFER, 0, 0, GL_DYNAMIC_DRAW); glBufferData(GL_ARRAY_BUFFER, 0, 0, GL_DYNAMIC_DRAW);
if (!bdata.use_colored_vertices) { if (!bdata.use_light_angles) {
glBufferData(GL_ARRAY_BUFFER, sizeof(BatchVertex) * bdata.vertices.size(), bdata.vertices.get_data(), GL_DYNAMIC_DRAW); if (!bdata.use_colored_vertices) {
glBufferData(GL_ARRAY_BUFFER, sizeof(BatchVertex) * bdata.vertices.size(), bdata.vertices.get_data(), GL_DYNAMIC_DRAW);
} else {
glBufferData(GL_ARRAY_BUFFER, sizeof(BatchVertexColored) * bdata.unit_vertices.size(), bdata.unit_vertices.get_unit(0), GL_DYNAMIC_DRAW);
}
} else { } else {
glBufferData(GL_ARRAY_BUFFER, sizeof(BatchVertexColored) * bdata.vertices_colored.size(), bdata.vertices_colored.get_data(), GL_DYNAMIC_DRAW); glBufferData(GL_ARRAY_BUFFER, sizeof(BatchVertexLightAngled) * bdata.unit_vertices.size(), bdata.unit_vertices.get_unit(0), GL_DYNAMIC_DRAW);
} }
// might not be necessary // might not be necessary
@ -251,10 +256,6 @@ bool RasterizerCanvasGLES2::prefill_joined_item(FillState &r_fill_state, int &r_
int command_count = p_item->commands.size(); int command_count = p_item->commands.size();
Item::Command *const *commands = p_item->commands.ptr(); Item::Command *const *commands = p_item->commands.ptr();
// locals, might be more efficient in a register (check)
Vector2 texpixel_size = r_fill_state.texpixel_size;
const float uv_epsilon = bdata.settings_uv_contract_amount;
// checking the color for not being white makes it 92/90 times faster in the case where it is white // checking the color for not being white makes it 92/90 times faster in the case where it is white
bool multiply_final_modulate = false; bool multiply_final_modulate = false;
if (!r_fill_state.use_hardware_transform && (r_fill_state.final_modulate != Color(1, 1, 1, 1))) { if (!r_fill_state.use_hardware_transform && (r_fill_state.final_modulate != Color(1, 1, 1, 1))) {
@ -316,196 +317,21 @@ bool RasterizerCanvasGLES2::prefill_joined_item(FillState &r_fill_state, int &r_
Item::CommandRect *rect = static_cast<Item::CommandRect *>(command); Item::CommandRect *rect = static_cast<Item::CommandRect *>(command);
bool change_batch = false; // unoptimized - could this be done once per batch / batch texture?
bool send_light_angles = rect->normal_map != RID();
// conditions for creating a new batch bool buffer_full = false;
if (r_fill_state.curr_batch->type != Batch::BT_RECT) {
change_batch = true;
// check for special case if there is only a single or small number of rects, // the template params must be explicit for compilation,
// in which case we will use the legacy default rect renderer // this forces building the multiple versions of the function.
// because it is faster for single rects if (send_light_angles) {
buffer_full = prefill_rect<true>(rect, r_fill_state, r_command_start, command_num, command_count, commands, p_item, multiply_final_modulate);
// we only want to do this if not a joined item with more than 1 item, } else {
// because joined items with more than 1, the command * will be incorrect buffer_full = prefill_rect<false>(rect, r_fill_state, r_command_start, command_num, command_count, commands, p_item, multiply_final_modulate);
// NOTE - this is assuming that use_hardware_transform means that it is a non-joined item!!
// If that assumption is incorrect this will go horribly wrong.
if (bdata.settings_use_single_rect_fallback && r_fill_state.use_hardware_transform) {
bool is_single_rect = false;
int command_num_next = command_num + 1;
if (command_num_next < command_count) {
Item::Command *command_next = commands[command_num_next];
if ((command_next->type != Item::Command::TYPE_RECT) && (command_next->type != Item::Command::TYPE_TRANSFORM)) {
is_single_rect = true;
}
} else {
is_single_rect = true;
}
// if it is a rect on its own, do exactly the same as the default routine
if (is_single_rect) {
_prefill_default_batch(r_fill_state, command_num, *p_item);
break;
}
} // if use hardware transform
} }
Color col = rect->modulate; if (buffer_full)
if (multiply_final_modulate) {
col *= r_fill_state.final_modulate;
}
// instead of doing all the texture preparation for EVERY rect,
// we build a list of texture combinations and do this once off.
// This means we have a potentially rather slow step to identify which texture combo
// using the RIDs.
int old_batch_tex_id = r_fill_state.batch_tex_id;
r_fill_state.batch_tex_id = _batch_find_or_create_tex(rect->texture, rect->normal_map, rect->flags & CANVAS_RECT_TILE, old_batch_tex_id);
// try to create vertices BEFORE creating a batch,
// because if the vertex buffer is full, we need to finish this
// function, draw what we have so far, and then start a new set of batches
// request FOUR vertices at a time, this is more efficient
BatchVertex *bvs = bdata.vertices.request(4);
if (!bvs) {
// run out of space in the vertex buffer .. finish this function and draw what we have so far
// return where we got to
r_command_start = command_num;
return true; return true;
}
// conditions for creating a new batch
if (old_batch_tex_id != r_fill_state.batch_tex_id) {
change_batch = true;
}
// we need to treat color change separately because we need to count these
// to decide whether to switch on the fly to colored vertices.
if (!r_fill_state.curr_batch->color.equals(col)) {
change_batch = true;
bdata.total_color_changes++;
}
if (change_batch) {
// put the tex pixel size in a local (less verbose and can be a register)
const BatchTex &batchtex = bdata.batch_textures[r_fill_state.batch_tex_id];
batchtex.tex_pixel_size.to(texpixel_size);
if (bdata.settings_uv_contract) {
r_fill_state.contract_uvs = (batchtex.flags & VS::TEXTURE_FLAG_FILTER) == 0;
}
// need to preserve texpixel_size between items
r_fill_state.texpixel_size = texpixel_size;
// open new batch (this should never fail, it dynamically grows)
r_fill_state.curr_batch = _batch_request_new(false);
r_fill_state.curr_batch->type = Batch::BT_RECT;
r_fill_state.curr_batch->color.set(col);
r_fill_state.curr_batch->batch_texture_id = r_fill_state.batch_tex_id;
r_fill_state.curr_batch->first_command = command_num;
r_fill_state.curr_batch->num_commands = 1;
r_fill_state.curr_batch->first_quad = bdata.total_quads;
} else {
// we could alternatively do the count when closing a batch .. perhaps more efficient
r_fill_state.curr_batch->num_commands++;
}
// fill the quad geometry
Vector2 mins = rect->rect.position;
if (r_fill_state.transform_mode == TM_TRANSLATE) {
_software_transform_vertex(mins, r_fill_state.transform_combined);
}
Vector2 maxs = mins + rect->rect.size;
// just aliases
BatchVertex *bA = &bvs[0];
BatchVertex *bB = &bvs[1];
BatchVertex *bC = &bvs[2];
BatchVertex *bD = &bvs[3];
bA->pos.x = mins.x;
bA->pos.y = mins.y;
bB->pos.x = maxs.x;
bB->pos.y = mins.y;
bC->pos.x = maxs.x;
bC->pos.y = maxs.y;
bD->pos.x = mins.x;
bD->pos.y = maxs.y;
if (rect->rect.size.x < 0) {
SWAP(bA->pos, bB->pos);
SWAP(bC->pos, bD->pos);
}
if (rect->rect.size.y < 0) {
SWAP(bA->pos, bD->pos);
SWAP(bB->pos, bC->pos);
}
if (r_fill_state.transform_mode == TM_ALL) {
_software_transform_vertex(bA->pos, r_fill_state.transform_combined);
_software_transform_vertex(bB->pos, r_fill_state.transform_combined);
_software_transform_vertex(bC->pos, r_fill_state.transform_combined);
_software_transform_vertex(bD->pos, r_fill_state.transform_combined);
}
// uvs
Vector2 src_min;
Vector2 src_max;
if (rect->flags & CANVAS_RECT_REGION) {
src_min = rect->source.position;
src_max = src_min + rect->source.size;
src_min *= texpixel_size;
src_max *= texpixel_size;
// nudge offset for the maximum to prevent precision error on GPU reading into line outside the source rect
// this is very difficult to get right.
if (r_fill_state.contract_uvs) {
src_min.x += uv_epsilon;
src_min.y += uv_epsilon;
src_max.x -= uv_epsilon;
src_max.y -= uv_epsilon;
}
} else {
src_min = Vector2(0, 0);
src_max = Vector2(1, 1);
}
// 10% faster calculating the max first
Vector2 uvs[4] = {
src_min,
Vector2(src_max.x, src_min.y),
src_max,
Vector2(src_min.x, src_max.y),
};
if (rect->flags & CANVAS_RECT_TRANSPOSE) {
SWAP(uvs[1], uvs[3]);
}
if (rect->flags & CANVAS_RECT_FLIP_H) {
SWAP(uvs[0], uvs[1]);
SWAP(uvs[2], uvs[3]);
}
if (rect->flags & CANVAS_RECT_FLIP_V) {
SWAP(uvs[0], uvs[3]);
SWAP(uvs[1], uvs[2]);
}
bA->uv.set(uvs[0]);
bB->uv.set(uvs[1]);
bC->uv.set(uvs[2]);
bD->uv.set(uvs[3]);
// increment quad count
bdata.total_quads++;
} break; } break;
} }
@ -519,119 +345,29 @@ bool RasterizerCanvasGLES2::prefill_joined_item(FillState &r_fill_state, int &r_
return false; return false;
} }
// convert the stupidly high amount of batches (each with its own color)
// to larger batches where the color is stored in the verts instead...
// There is a trade off. Non colored verts are smaller so work faster, but
// there comes a point where it is better to just use colored verts to avoid lots of
// batches.
void RasterizerCanvasGLES2::_batch_translate_to_colored() {
bdata.vertices_colored.reset();
bdata.batches_temp.reset();
// As the vertices_colored and batches_temp are 'mirrors' of the non-colored version,
// the sizes should be equal, and allocations should never fail. Hence the use of debug
// asserts to check program flow, these should not occur at runtime unless the allocation
// code has been altered.
#ifdef DEBUG_ENABLED
CRASH_COND(bdata.vertices_colored.max_size() != bdata.vertices.max_size());
CRASH_COND(bdata.batches_temp.max_size() != bdata.batches.max_size());
#endif
Color curr_col(-1.0, -1.0, -1.0, -1.0);
Batch *dest_batch = 0;
// translate the batches into vertex colored batches
for (int n = 0; n < bdata.batches.size(); n++) {
const Batch &source_batch = bdata.batches[n];
bool needs_new_batch = true;
if (dest_batch) {
if (dest_batch->type == source_batch.type) {
if (source_batch.type == Batch::BT_RECT) {
if (dest_batch->batch_texture_id == source_batch.batch_texture_id) {
// add to previous batch
dest_batch->num_commands += source_batch.num_commands;
needs_new_batch = false;
// create the colored verts (only if not default)
int first_vert = source_batch.first_quad * 4;
int end_vert = 4 * (source_batch.first_quad + source_batch.num_commands);
for (int v = first_vert; v < end_vert; v++) {
const BatchVertex &bv = bdata.vertices[v];
BatchVertexColored *cv = bdata.vertices_colored.request();
#ifdef DEBUG_ENABLED
CRASH_COND(!cv);
#endif
cv->pos = bv.pos;
cv->uv = bv.uv;
cv->col = source_batch.color;
}
} // textures match
} else {
// default
// we can still join, but only under special circumstances
// does this ever happen? not sure at this stage, but left for future expansion
uint32_t source_last_command = source_batch.first_command + source_batch.num_commands;
if (source_last_command == dest_batch->first_command) {
dest_batch->num_commands += source_batch.num_commands;
needs_new_batch = false;
} // if the commands line up exactly
}
} // if both batches are the same type
} // if dest batch is valid
if (needs_new_batch) {
dest_batch = bdata.batches_temp.request();
#ifdef DEBUG_ENABLED
CRASH_COND(!dest_batch);
#endif
*dest_batch = source_batch;
// create the colored verts (only if not default)
if (source_batch.type != Batch::BT_DEFAULT) {
int first_vert = source_batch.first_quad * 4;
int end_vert = 4 * (source_batch.first_quad + source_batch.num_commands);
for (int v = first_vert; v < end_vert; v++) {
const BatchVertex &bv = bdata.vertices[v];
BatchVertexColored *cv = bdata.vertices_colored.request();
#ifdef DEBUG_ENABLED
CRASH_COND(!cv);
#endif
cv->pos = bv.pos;
cv->uv = bv.uv;
cv->col = source_batch.color;
}
}
}
}
// copy the temporary batches to the master batch list (this could be avoided but it makes the code cleaner)
bdata.batches.copy_from(bdata.batches_temp);
}
void RasterizerCanvasGLES2::_batch_render_rects(const Batch &p_batch, RasterizerStorageGLES2::Material *p_material) { void RasterizerCanvasGLES2::_batch_render_rects(const Batch &p_batch, RasterizerStorageGLES2::Material *p_material) {
ERR_FAIL_COND(p_batch.num_commands <= 0); ERR_FAIL_COND(p_batch.num_commands <= 0);
const bool &colored_verts = bdata.use_colored_vertices; const bool &colored_verts = bdata.use_colored_vertices;
const bool &use_light_angles = bdata.use_light_angles;
int sizeof_vert; int sizeof_vert;
if (!colored_verts) { if (!use_light_angles) {
sizeof_vert = sizeof(BatchVertex); if (!colored_verts) {
sizeof_vert = sizeof(BatchVertex);
} else {
sizeof_vert = sizeof(BatchVertexColored);
}
} else { } else {
sizeof_vert = sizeof(BatchVertexColored); sizeof_vert = sizeof(BatchVertexLightAngled);
} }
// batch tex // batch tex
const BatchTex &tex = bdata.batch_textures[p_batch.batch_texture_id]; const BatchTex &tex = bdata.batch_textures[p_batch.batch_texture_id];
// make sure to set all conditionals BEFORE binding the shader // make sure to set all conditionals BEFORE binding the shader
state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_TEXTURE_RECT, false); _set_texture_rect_mode(false, use_light_angles);
// force repeat is set if non power of 2 texture, and repeat is needed if hardware doesn't support npot // force repeat is set if non power of 2 texture, and repeat is needed if hardware doesn't support npot
if (tex.tile_mode == BatchTex::TILE_FORCE_REPEAT) { if (tex.tile_mode == BatchTex::TILE_FORCE_REPEAT) {
@ -665,6 +401,11 @@ void RasterizerCanvasGLES2::_batch_render_rects(const Batch &p_batch, Rasterizer
glEnableVertexAttribArray(VS::ARRAY_COLOR); glEnableVertexAttribArray(VS::ARRAY_COLOR);
} }
if (use_light_angles) {
glVertexAttribPointer(VS::ARRAY_TANGENT, 1, GL_FLOAT, GL_FALSE, sizeof_vert, CAST_INT_TO_UCHAR_PTR(pointer + (8 * 4)));
glEnableVertexAttribArray(VS::ARRAY_TANGENT);
}
// We only want to set the GL wrapping mode if the texture is not already tiled (i.e. set in Import). // We only want to set the GL wrapping mode if the texture is not already tiled (i.e. set in Import).
// This is an optimization left over from the legacy renderer. // This is an optimization left over from the legacy renderer.
// If we DID set tiling in the API, and reverted to clamped, then the next draw using this texture // If we DID set tiling in the API, and reverted to clamped, then the next draw using this texture
@ -707,8 +448,10 @@ void RasterizerCanvasGLES2::_batch_render_rects(const Batch &p_batch, Rasterizer
} break; } break;
} }
// could these have ifs?
glDisableVertexAttribArray(VS::ARRAY_TEX_UV); glDisableVertexAttribArray(VS::ARRAY_TEX_UV);
glDisableVertexAttribArray(VS::ARRAY_COLOR); glDisableVertexAttribArray(VS::ARRAY_COLOR);
glDisableVertexAttribArray(VS::ARRAY_TANGENT);
// may not be necessary .. state change optimization still TODO // may not be necessary .. state change optimization still TODO
glBindBuffer(GL_ARRAY_BUFFER, 0); glBindBuffer(GL_ARRAY_BUFFER, 0);
@ -848,7 +591,7 @@ void RasterizerCanvasGLES2::render_batches(Item::Command *const *p_commands, Ite
Item::CommandLine *line = static_cast<Item::CommandLine *>(command); Item::CommandLine *line = static_cast<Item::CommandLine *>(command);
state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_TEXTURE_RECT, false); _set_texture_rect_mode(false);
if (state.canvas_shader.bind()) { if (state.canvas_shader.bind()) {
_set_uniforms(); _set_uniforms();
state.canvas_shader.use_material((void *)p_material); state.canvas_shader.use_material((void *)p_material);
@ -934,7 +677,17 @@ void RasterizerCanvasGLES2::render_batches(Item::Command *const *p_commands, Ite
// To work it around, we use a simpler draw method which does not flicker, but gives // To work it around, we use a simpler draw method which does not flicker, but gives
// a non negligible performance hit, so it's opt-in (GH-24466). // a non negligible performance hit, so it's opt-in (GH-24466).
if (use_nvidia_rect_workaround) { if (use_nvidia_rect_workaround) {
state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_TEXTURE_RECT, false);
// are we using normal maps, if so we want to use light angle
bool send_light_angles = false;
// only need to use light angles when normal mapping
// otherwise we can use the default shader
if (state.current_normal != RID()) {
send_light_angles = true;
}
_set_texture_rect_mode(false, send_light_angles);
if (state.canvas_shader.bind()) { if (state.canvas_shader.bind()) {
_set_uniforms(); _set_uniforms();
@ -971,6 +724,10 @@ void RasterizerCanvasGLES2::render_batches(Item::Command *const *p_commands, Ite
src_rect.position + Vector2(0.0, src_rect.size.y), src_rect.position + Vector2(0.0, src_rect.size.y),
}; };
// for encoding in light angle
bool flip_h = false;
bool flip_v = false;
if (r->flags & CANVAS_RECT_TRANSPOSE) { if (r->flags & CANVAS_RECT_TRANSPOSE) {
SWAP(uvs[1], uvs[3]); SWAP(uvs[1], uvs[3]);
} }
@ -978,10 +735,13 @@ void RasterizerCanvasGLES2::render_batches(Item::Command *const *p_commands, Ite
if (r->flags & CANVAS_RECT_FLIP_H) { if (r->flags & CANVAS_RECT_FLIP_H) {
SWAP(uvs[0], uvs[1]); SWAP(uvs[0], uvs[1]);
SWAP(uvs[2], uvs[3]); SWAP(uvs[2], uvs[3]);
flip_h = true;
flip_v = !flip_v;
} }
if (r->flags & CANVAS_RECT_FLIP_V) { if (r->flags & CANVAS_RECT_FLIP_V) {
SWAP(uvs[0], uvs[3]); SWAP(uvs[0], uvs[3]);
SWAP(uvs[1], uvs[2]); SWAP(uvs[1], uvs[2]);
flip_v = !flip_v;
} }
state.canvas_shader.set_uniform(CanvasShaderGLES2::COLOR_TEXPIXEL_SIZE, texpixel_size); state.canvas_shader.set_uniform(CanvasShaderGLES2::COLOR_TEXPIXEL_SIZE, texpixel_size);
@ -994,7 +754,33 @@ void RasterizerCanvasGLES2::render_batches(Item::Command *const *p_commands, Ite
untile = true; untile = true;
} }
_draw_gui_primitive(4, points, NULL, uvs); if (send_light_angles) {
// for single rects, there is no need to fully utilize the light angle,
// we only need it to encode flips (horz and vert). But the shader can be reused with
// batching in which case the angle encodes the transform as well as
// the flips.
// Note transpose is NYI. I don't think it worked either with the non-nvidia method.
// if horizontal flip, angle is 180
float angle = 0.0f;
if (flip_h)
angle = Math_PI;
// add 1 (to take care of zero floating point error with sign)
angle += 1.0f;
// flip if necessary
if (flip_v)
angle *= -1.0f;
// light angle must be sent for each vert, instead as a single uniform in the uniform draw method
// this has the benefit of enabling batching with light angles.
float light_angles[4] = { angle, angle, angle, angle };
_draw_gui_primitive(4, points, NULL, uvs, light_angles);
} else {
_draw_gui_primitive(4, points, NULL, uvs);
}
if (untile) { if (untile) {
glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
@ -1016,7 +802,7 @@ void RasterizerCanvasGLES2::render_batches(Item::Command *const *p_commands, Ite
// This branch is better for performance, but can produce flicker on Nvidia, see above comment. // This branch is better for performance, but can produce flicker on Nvidia, see above comment.
_bind_quad_buffer(); _bind_quad_buffer();
state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_TEXTURE_RECT, true); _set_texture_rect_mode(true);
if (state.canvas_shader.bind()) { if (state.canvas_shader.bind()) {
_set_uniforms(); _set_uniforms();
@ -1104,7 +890,7 @@ void RasterizerCanvasGLES2::render_batches(Item::Command *const *p_commands, Ite
Item::CommandNinePatch *np = static_cast<Item::CommandNinePatch *>(command); Item::CommandNinePatch *np = static_cast<Item::CommandNinePatch *>(command);
state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_TEXTURE_RECT, false); _set_texture_rect_mode(false);
if (state.canvas_shader.bind()) { if (state.canvas_shader.bind()) {
_set_uniforms(); _set_uniforms();
state.canvas_shader.use_material((void *)p_material); state.canvas_shader.use_material((void *)p_material);
@ -1280,7 +1066,7 @@ void RasterizerCanvasGLES2::render_batches(Item::Command *const *p_commands, Ite
Item::CommandCircle *circle = static_cast<Item::CommandCircle *>(command); Item::CommandCircle *circle = static_cast<Item::CommandCircle *>(command);
state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_TEXTURE_RECT, false); _set_texture_rect_mode(false);
if (state.canvas_shader.bind()) { if (state.canvas_shader.bind()) {
_set_uniforms(); _set_uniforms();
@ -1310,7 +1096,7 @@ void RasterizerCanvasGLES2::render_batches(Item::Command *const *p_commands, Ite
Item::CommandPolygon *polygon = static_cast<Item::CommandPolygon *>(command); Item::CommandPolygon *polygon = static_cast<Item::CommandPolygon *>(command);
state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_TEXTURE_RECT, false); _set_texture_rect_mode(false);
if (state.canvas_shader.bind()) { if (state.canvas_shader.bind()) {
_set_uniforms(); _set_uniforms();
@ -1340,7 +1126,7 @@ void RasterizerCanvasGLES2::render_batches(Item::Command *const *p_commands, Ite
case Item::Command::TYPE_MESH: { case Item::Command::TYPE_MESH: {
Item::CommandMesh *mesh = static_cast<Item::CommandMesh *>(command); Item::CommandMesh *mesh = static_cast<Item::CommandMesh *>(command);
state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_TEXTURE_RECT, false); _set_texture_rect_mode(false);
if (state.canvas_shader.bind()) { if (state.canvas_shader.bind()) {
_set_uniforms(); _set_uniforms();
@ -1416,7 +1202,7 @@ void RasterizerCanvasGLES2::render_batches(Item::Command *const *p_commands, Ite
state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_INSTANCE_CUSTOM, multi_mesh->custom_data_format != VS::MULTIMESH_CUSTOM_DATA_NONE); state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_INSTANCE_CUSTOM, multi_mesh->custom_data_format != VS::MULTIMESH_CUSTOM_DATA_NONE);
state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_INSTANCING, true); state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_INSTANCING, true);
state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_TEXTURE_RECT, false); _set_texture_rect_mode(false);
if (state.canvas_shader.bind()) { if (state.canvas_shader.bind()) {
_set_uniforms(); _set_uniforms();
@ -1520,7 +1306,7 @@ void RasterizerCanvasGLES2::render_batches(Item::Command *const *p_commands, Ite
} }
} }
state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_INSTANCE_CUSTOM, false); _set_texture_rect_mode(false);
state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_INSTANCING, false); state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_INSTANCING, false);
storage->info.render._2d_draw_call_count++; storage->info.render._2d_draw_call_count++;
@ -1580,7 +1366,7 @@ void RasterizerCanvasGLES2::render_batches(Item::Command *const *p_commands, Ite
case Item::Command::TYPE_PRIMITIVE: { case Item::Command::TYPE_PRIMITIVE: {
Item::CommandPrimitive *primitive = static_cast<Item::CommandPrimitive *>(command); Item::CommandPrimitive *primitive = static_cast<Item::CommandPrimitive *>(command);
state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_TEXTURE_RECT, false); _set_texture_rect_mode(false);
if (state.canvas_shader.bind()) { if (state.canvas_shader.bind()) {
_set_uniforms(); _set_uniforms();
@ -1732,23 +1518,28 @@ void RasterizerCanvasGLES2::flush_render_batches(Item *p_first_item, Item *p_cur
// .. however probably not necessary // .. however probably not necessary
bdata.use_colored_vertices = false; bdata.use_colored_vertices = false;
// only check whether to convert if there are quads (prevent divide by zero) if (bdata.use_light_angles) {
// and we haven't decided to prevent color baking (due to e.g. MODULATE _translate_batches_to_larger_FVF<BatchVertexLightAngled, true>();
// being used in a shader) } else {
if (bdata.total_quads && !(bdata.joined_item_batch_flags & RasterizerStorageGLES2::Shader::CanvasItem::PREVENT_COLOR_BAKING)) { // only check whether to convert if there are quads (prevent divide by zero)
// minus 1 to prevent single primitives (ratio 1.0) always being converted to colored.. // and we haven't decided to prevent color baking (due to e.g. MODULATE
// in that case it is slightly cheaper to just have the color as part of the batch // being used in a shader)
float ratio = (float)(bdata.total_color_changes - 1) / (float)bdata.total_quads; if (bdata.total_quads && !(bdata.joined_item_batch_flags & RasterizerStorageGLES2::Shader::CanvasItem::PREVENT_COLOR_BAKING)) {
// minus 1 to prevent single primitives (ratio 1.0) always being converted to colored..
// in that case it is slightly cheaper to just have the color as part of the batch
float ratio = (float)(bdata.total_color_changes - 1) / (float)bdata.total_quads;
// use bigger than or equal so that 0.0 threshold can force always using colored verts // use bigger than or equal so that 0.0 threshold can force always using colored verts
if (ratio >= bdata.settings_colored_vertex_format_threshold) { if (ratio >= bdata.settings_colored_vertex_format_threshold) {
bdata.use_colored_vertices = true; bdata.use_colored_vertices = true;
// small perf cost versus going straight to colored verts (maybe around 10%) // small perf cost versus going straight to colored verts (maybe around 10%)
// however more straightforward // however more straightforward
_batch_translate_to_colored(); _translate_batches_to_larger_FVF<BatchVertexColored, false>();
//_batch_translate_to_colored();
}
} }
} } // if not using light angles
// send buffers to opengl // send buffers to opengl
_batch_upload_buffers(); _batch_upload_buffers();
@ -3517,9 +3308,12 @@ void RasterizerCanvasGLES2::initialize() {
bdata.vertex_buffer_size_bytes = bdata.vertex_buffer_size_units * sizeof_batch_vert; bdata.vertex_buffer_size_bytes = bdata.vertex_buffer_size_units * sizeof_batch_vert;
bdata.index_buffer_size_bytes = bdata.index_buffer_size_units * 2; // 16 bit inds bdata.index_buffer_size_bytes = bdata.index_buffer_size_units * 2; // 16 bit inds
// create equal number of norma and colored verts (as the normal may need to be translated to colored) // create equal number of normal and (max) unit sized verts (as the normal may need to be translated to a larger FVF)
bdata.vertices.create(bdata.vertex_buffer_size_units); // 512k bdata.vertices.create(bdata.vertex_buffer_size_units); // 512k
bdata.vertices_colored.create(bdata.vertices.max_size()); // 1024k bdata.unit_vertices.create(bdata.vertices.max_size(), sizeof(BatchVertexLightAngled));
// extra data per vert needed for larger FVFs
bdata.light_angles.create(bdata.vertices.max_size());
// num batches will be auto increased dynamically if required // num batches will be auto increased dynamically if required
bdata.batches.create(1024); bdata.batches.create(1024);

View file

@ -89,6 +89,11 @@ class RasterizerCanvasGLES2 : public RasterizerCanvasBaseGLES2 {
BatchColor col; BatchColor col;
}; };
struct BatchVertexLightAngled : public BatchVertexColored {
// must be pod
float light_angle;
};
struct Batch { struct Batch {
enum CommandType : uint32_t { enum CommandType : uint32_t {
BT_DEFAULT, BT_DEFAULT,
@ -167,10 +172,13 @@ class RasterizerCanvasGLES2 : public RasterizerCanvasBaseGLES2 {
void reset_flush() { void reset_flush() {
batches.reset(); batches.reset();
batch_textures.reset(); batch_textures.reset();
vertices.reset(); vertices.reset();
light_angles.reset();
total_quads = 0; total_quads = 0;
total_color_changes = 0; total_color_changes = 0;
use_light_angles = false;
} }
GLuint gl_vertex_buffer; GLuint gl_vertex_buffer;
@ -182,13 +190,28 @@ class RasterizerCanvasGLES2 : public RasterizerCanvasBaseGLES2 {
uint32_t index_buffer_size_units; uint32_t index_buffer_size_units;
uint32_t index_buffer_size_bytes; uint32_t index_buffer_size_bytes;
// small vertex FVF type - pos and UV.
// This will always be written to initially, but can be translated
// to larger FVFs if necessary.
RasterizerArrayGLES2<BatchVertex> vertices; RasterizerArrayGLES2<BatchVertex> vertices;
RasterizerArrayGLES2<BatchVertexColored> vertices_colored;
// extra data which can be stored during prefilling, for later translation to larger FVFs
RasterizerArrayGLES2<float> light_angles;
// instead of having a different buffer for each vertex FVF type
// we have a special array big enough for the biggest FVF
// which can have a changeable unit size, and reuse it.
RasterizerUnitArrayGLES2 unit_vertices;
RasterizerArrayGLES2<Batch> batches; RasterizerArrayGLES2<Batch> batches;
RasterizerArrayGLES2<Batch> batches_temp; // used for translating to colored vertex batches RasterizerArrayGLES2<Batch> batches_temp; // used for translating to colored vertex batches
RasterizerArray_non_pod_GLES2<BatchTex> batch_textures; // the only reason this is non-POD is because of RIDs RasterizerArray_non_pod_GLES2<BatchTex> batch_textures; // the only reason this is non-POD is because of RIDs
// flexible vertex format.
// all verts have pos and UV.
// some have color, some light angles etc.
bool use_colored_vertices; bool use_colored_vertices;
bool use_light_angles;
RasterizerArrayGLES2<BItemJoined> items_joined; RasterizerArrayGLES2<BItemJoined> items_joined;
RasterizerArrayGLES2<BItemRef> item_refs; RasterizerArrayGLES2<BItemRef> item_refs;
@ -321,11 +344,12 @@ private:
bool try_join_item(Item *p_ci, RenderItemState &r_ris, bool &r_batch_break); bool try_join_item(Item *p_ci, RenderItemState &r_ris, bool &r_batch_break);
void render_joined_item_commands(const BItemJoined &p_bij, Item *p_current_clip, bool &r_reclip, RasterizerStorageGLES2::Material *p_material, bool p_lit); void render_joined_item_commands(const BItemJoined &p_bij, Item *p_current_clip, bool &r_reclip, RasterizerStorageGLES2::Material *p_material, bool p_lit);
void render_batches(Item::Command *const *p_commands, Item *p_current_clip, bool &r_reclip, RasterizerStorageGLES2::Material *p_material); void render_batches(Item::Command *const *p_commands, Item *p_current_clip, bool &r_reclip, RasterizerStorageGLES2::Material *p_material);
bool prefill_joined_item(FillState &r_fill_state, int &r_command_start, Item *p_item, Item *p_current_clip, bool &r_reclip, RasterizerStorageGLES2::Material *p_material); bool prefill_joined_item(FillState &r_fill_state, int &r_command_start, Item *p_item, Item *p_current_clip, bool &r_reclip, RasterizerStorageGLES2::Material *p_material);
void flush_render_batches(Item *p_first_item, Item *p_current_clip, bool &r_reclip, RasterizerStorageGLES2::Material *p_material); void flush_render_batches(Item *p_first_item, Item *p_current_clip, bool &r_reclip, RasterizerStorageGLES2::Material *p_material);
// low level batch funcs // low level batch funcs
void _batch_translate_to_colored();
int _batch_find_or_create_tex(const RID &p_texture, const RID &p_normal, bool p_tile, int p_previous_match); int _batch_find_or_create_tex(const RID &p_texture, const RID &p_normal, bool p_tile, int p_previous_match);
RasterizerStorageGLES2::Texture *_get_canvas_texture(const RID &p_texture) const; RasterizerStorageGLES2::Texture *_get_canvas_texture(const RID &p_texture) const;
void _batch_upload_buffers(); void _batch_upload_buffers();
@ -358,6 +382,13 @@ private:
public: public:
void initialize(); void initialize();
RasterizerCanvasGLES2(); RasterizerCanvasGLES2();
private:
template <bool SEND_LIGHT_ANGLES>
bool prefill_rect(Item::CommandRect *rect, FillState &r_fill_state, int &r_command_start, int command_num, int command_count, Item::Command *const *commands, Item *p_item, bool multiply_final_modulate);
template <class BATCH_VERTEX_TYPE, bool INCLUDE_LIGHT_ANGLES>
void _translate_batches_to_larger_FVF();
}; };
////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////
@ -492,4 +523,407 @@ inline bool RasterizerCanvasGLES2::_sort_items_match(const BSortItem &p_a, const
return true; return true;
} }
//////////////////////////////////////////////////////////////
// TEMPLATE FUNCS
// Translation always involved adding color to the FVF, which enables
// joining of batches that have different colors.
// There is a trade off. Non colored verts are smaller so work faster, but
// there comes a point where it is better to just use colored verts to avoid lots of
// batches.
// In addition this can optionally add light angles to the FVF, necessary for normal mapping.
template <class BATCH_VERTEX_TYPE, bool INCLUDE_LIGHT_ANGLES>
void RasterizerCanvasGLES2::_translate_batches_to_larger_FVF() {
// zeros the size and sets up how big each unit is
bdata.unit_vertices.prepare(sizeof(BATCH_VERTEX_TYPE));
bdata.batches_temp.reset();
// As the vertices_colored and batches_temp are 'mirrors' of the non-colored version,
// the sizes should be equal, and allocations should never fail. Hence the use of debug
// asserts to check program flow, these should not occur at runtime unless the allocation
// code has been altered.
#if defined(TOOLS_ENABLED) && defined(DEBUG_ENABLED)
CRASH_COND(bdata.unit_vertices.max_size() != bdata.vertices.max_size());
CRASH_COND(bdata.batches_temp.max_size() != bdata.batches.max_size());
#endif
Color curr_col(-1.0, -1.0, -1.0, -1.0);
Batch *dest_batch = 0;
const float *source_light_angles = &bdata.light_angles[0];
// translate the batches into vertex colored batches
for (int n = 0; n < bdata.batches.size(); n++) {
const Batch &source_batch = bdata.batches[n];
// does source batch use light angles?
const BatchTex &btex = bdata.batch_textures[source_batch.batch_texture_id];
bool source_batch_uses_light_angles = btex.RID_normal != RID();
bool needs_new_batch = true;
if (dest_batch) {
if (dest_batch->type == source_batch.type) {
if (source_batch.type == Batch::BT_RECT) {
if (dest_batch->batch_texture_id == source_batch.batch_texture_id) {
// add to previous batch
dest_batch->num_commands += source_batch.num_commands;
needs_new_batch = false;
// create the colored verts (only if not default)
int first_vert = source_batch.first_quad * 4;
int end_vert = 4 * (source_batch.first_quad + source_batch.num_commands);
for (int v = first_vert; v < end_vert; v++) {
const BatchVertex &bv = bdata.vertices[v];
BATCH_VERTEX_TYPE *cv = (BatchVertexLightAngled *)bdata.unit_vertices.request();
#if defined(TOOLS_ENABLED) && defined(DEBUG_ENABLED)
CRASH_COND(!cv);
#endif
cv->pos = bv.pos;
cv->uv = bv.uv;
cv->col = source_batch.color;
if (INCLUDE_LIGHT_ANGLES) {
// this is required to allow compilation with non light angle vertex.
// it should be compiled out.
BatchVertexLightAngled *lv = (BatchVertexLightAngled *)cv;
if (source_batch_uses_light_angles)
lv->light_angle = *source_light_angles++;
else
lv->light_angle = 0.0f; // dummy, unused in vertex shader (could possibly be left uninitialized, but probably bad idea)
}
}
} // textures match
} else {
// default
// we can still join, but only under special circumstances
// does this ever happen? not sure at this stage, but left for future expansion
uint32_t source_last_command = source_batch.first_command + source_batch.num_commands;
if (source_last_command == dest_batch->first_command) {
dest_batch->num_commands += source_batch.num_commands;
needs_new_batch = false;
} // if the commands line up exactly
}
} // if both batches are the same type
} // if dest batch is valid
if (needs_new_batch) {
dest_batch = bdata.batches_temp.request();
#if defined(TOOLS_ENABLED) && defined(DEBUG_ENABLED)
CRASH_COND(!dest_batch);
#endif
*dest_batch = source_batch;
// create the colored verts (only if not default)
if (source_batch.type != Batch::BT_DEFAULT) {
int first_vert = source_batch.first_quad * 4;
int end_vert = 4 * (source_batch.first_quad + source_batch.num_commands);
for (int v = first_vert; v < end_vert; v++) {
const BatchVertex &bv = bdata.vertices[v];
BATCH_VERTEX_TYPE *cv = (BatchVertexLightAngled *)bdata.unit_vertices.request();
#if defined(TOOLS_ENABLED) && defined(DEBUG_ENABLED)
CRASH_COND(!cv);
#endif
cv->pos = bv.pos;
cv->uv = bv.uv;
cv->col = source_batch.color;
if (INCLUDE_LIGHT_ANGLES) {
// this is required to allow compilation with non light angle vertex.
// it should be compiled out.
BatchVertexLightAngled *lv = (BatchVertexLightAngled *)cv;
if (source_batch_uses_light_angles)
lv->light_angle = *source_light_angles++;
else
lv->light_angle = 0.0f; // dummy, unused in vertex shader (could possibly be left uninitialized, but probably bad idea)
} // if using light angles
}
}
}
}
// copy the temporary batches to the master batch list (this could be avoided but it makes the code cleaner)
bdata.batches.copy_from(bdata.batches_temp);
}
// return true if buffer full up, else return false
template <bool SEND_LIGHT_ANGLES>
bool RasterizerCanvasGLES2::prefill_rect(Item::CommandRect *rect, FillState &r_fill_state, int &r_command_start, int command_num, int command_count, Item::Command *const *commands, Item *p_item, bool multiply_final_modulate) {
bool change_batch = false;
// conditions for creating a new batch
if (r_fill_state.curr_batch->type != Batch::BT_RECT) {
change_batch = true;
// check for special case if there is only a single or small number of rects,
// in which case we will use the legacy default rect renderer
// because it is faster for single rects
// we only want to do this if not a joined item with more than 1 item,
// because joined items with more than 1, the command * will be incorrect
// NOTE - this is assuming that use_hardware_transform means that it is a non-joined item!!
// If that assumption is incorrect this will go horribly wrong.
if (bdata.settings_use_single_rect_fallback && r_fill_state.use_hardware_transform) {
bool is_single_rect = false;
int command_num_next = command_num + 1;
if (command_num_next < command_count) {
Item::Command *command_next = commands[command_num_next];
if ((command_next->type != Item::Command::TYPE_RECT) && (command_next->type != Item::Command::TYPE_TRANSFORM)) {
is_single_rect = true;
}
} else {
is_single_rect = true;
}
// if it is a rect on its own, do exactly the same as the default routine
if (is_single_rect) {
_prefill_default_batch(r_fill_state, command_num, *p_item);
return false;
}
} // if use hardware transform
}
Color col = rect->modulate;
if (multiply_final_modulate) {
col *= r_fill_state.final_modulate;
}
// instead of doing all the texture preparation for EVERY rect,
// we build a list of texture combinations and do this once off.
// This means we have a potentially rather slow step to identify which texture combo
// using the RIDs.
int old_batch_tex_id = r_fill_state.batch_tex_id;
r_fill_state.batch_tex_id = _batch_find_or_create_tex(rect->texture, rect->normal_map, rect->flags & CANVAS_RECT_TILE, old_batch_tex_id);
//r_fill_state.use_light_angles = send_light_angles;
if (SEND_LIGHT_ANGLES)
bdata.use_light_angles = true;
// try to create vertices BEFORE creating a batch,
// because if the vertex buffer is full, we need to finish this
// function, draw what we have so far, and then start a new set of batches
// request FOUR vertices at a time, this is more efficient
BatchVertex *bvs = bdata.vertices.request(4);
if (!bvs) {
// run out of space in the vertex buffer .. finish this function and draw what we have so far
// return where we got to
r_command_start = command_num;
return true;
}
// conditions for creating a new batch
if (old_batch_tex_id != r_fill_state.batch_tex_id) {
change_batch = true;
}
// we need to treat color change separately because we need to count these
// to decide whether to switch on the fly to colored vertices.
if (!r_fill_state.curr_batch->color.equals(col)) {
change_batch = true;
bdata.total_color_changes++;
}
if (change_batch) {
// put the tex pixel size in a local (less verbose and can be a register)
const BatchTex &batchtex = bdata.batch_textures[r_fill_state.batch_tex_id];
batchtex.tex_pixel_size.to(r_fill_state.texpixel_size);
if (bdata.settings_uv_contract) {
r_fill_state.contract_uvs = (batchtex.flags & VS::TEXTURE_FLAG_FILTER) == 0;
}
// need to preserve texpixel_size between items
r_fill_state.texpixel_size = r_fill_state.texpixel_size;
// open new batch (this should never fail, it dynamically grows)
r_fill_state.curr_batch = _batch_request_new(false);
r_fill_state.curr_batch->type = Batch::BT_RECT;
r_fill_state.curr_batch->color.set(col);
r_fill_state.curr_batch->batch_texture_id = r_fill_state.batch_tex_id;
r_fill_state.curr_batch->first_command = command_num;
r_fill_state.curr_batch->num_commands = 1;
r_fill_state.curr_batch->first_quad = bdata.total_quads;
} else {
// we could alternatively do the count when closing a batch .. perhaps more efficient
r_fill_state.curr_batch->num_commands++;
}
// fill the quad geometry
Vector2 mins = rect->rect.position;
if (r_fill_state.transform_mode == TM_TRANSLATE) {
_software_transform_vertex(mins, r_fill_state.transform_combined);
}
Vector2 maxs = mins + rect->rect.size;
// just aliases
BatchVertex *bA = &bvs[0];
BatchVertex *bB = &bvs[1];
BatchVertex *bC = &bvs[2];
BatchVertex *bD = &bvs[3];
bA->pos.x = mins.x;
bA->pos.y = mins.y;
bB->pos.x = maxs.x;
bB->pos.y = mins.y;
bC->pos.x = maxs.x;
bC->pos.y = maxs.y;
bD->pos.x = mins.x;
bD->pos.y = maxs.y;
// possibility of applying flips here for normal mapping .. but they don't seem to be used
if (rect->rect.size.x < 0) {
SWAP(bA->pos, bB->pos);
SWAP(bC->pos, bD->pos);
}
if (rect->rect.size.y < 0) {
SWAP(bA->pos, bD->pos);
SWAP(bB->pos, bC->pos);
}
if (r_fill_state.transform_mode == TM_ALL) {
_software_transform_vertex(bA->pos, r_fill_state.transform_combined);
_software_transform_vertex(bB->pos, r_fill_state.transform_combined);
_software_transform_vertex(bC->pos, r_fill_state.transform_combined);
_software_transform_vertex(bD->pos, r_fill_state.transform_combined);
}
// uvs
Vector2 src_min;
Vector2 src_max;
if (rect->flags & CANVAS_RECT_REGION) {
src_min = rect->source.position;
src_max = src_min + rect->source.size;
src_min *= r_fill_state.texpixel_size;
src_max *= r_fill_state.texpixel_size;
const float uv_epsilon = bdata.settings_uv_contract_amount;
// nudge offset for the maximum to prevent precision error on GPU reading into line outside the source rect
// this is very difficult to get right.
if (r_fill_state.contract_uvs) {
src_min.x += uv_epsilon;
src_min.y += uv_epsilon;
src_max.x -= uv_epsilon;
src_max.y -= uv_epsilon;
}
} else {
src_min = Vector2(0, 0);
src_max = Vector2(1, 1);
}
// 10% faster calculating the max first
Vector2 uvs[4] = {
src_min,
Vector2(src_max.x, src_min.y),
src_max,
Vector2(src_min.x, src_max.y),
};
// for encoding in light angle
// flips should be optimized out when not being used for light angle.
bool flip_h = false;
bool flip_v = false;
if (rect->flags & CANVAS_RECT_TRANSPOSE) {
SWAP(uvs[1], uvs[3]);
}
if (rect->flags & CANVAS_RECT_FLIP_H) {
SWAP(uvs[0], uvs[1]);
SWAP(uvs[2], uvs[3]);
flip_h = !flip_h;
flip_v = !flip_v;
}
if (rect->flags & CANVAS_RECT_FLIP_V) {
SWAP(uvs[0], uvs[3]);
SWAP(uvs[1], uvs[2]);
flip_v = !flip_v;
}
bA->uv.set(uvs[0]);
bB->uv.set(uvs[1]);
bC->uv.set(uvs[2]);
bD->uv.set(uvs[3]);
if (SEND_LIGHT_ANGLES) {
// we can either keep the light angles in sync with the verts when writing,
// or sync them up during translation. We are syncing in translation.
// N.B. There may be batches that don't require light_angles between batches that do.
float *angles = bdata.light_angles.request(4);
#if defined(TOOLS_ENABLED) && defined(DEBUG_ENABLED)
CRASH_COND(angles == nullptr);
#endif
float angle = 0.0f;
const float TWO_PI = Math_PI * 2;
if (r_fill_state.transform_mode != TM_NONE) {
const Transform2D &tr = r_fill_state.transform_combined;
// apply to an x axis
// the x axis and y axis can be taken directly from the transform (no need to xform identity vectors)
Vector2 x_axis(tr.elements[0][0], tr.elements[1][0]);
// have to do a y axis to check for scaling flips
// this is hassle and extra slowness. We could only allow flips via the flags.
Vector2 y_axis(tr.elements[0][1], tr.elements[1][1]);
// has the x / y axis flipped due to scaling?
float cross = x_axis.cross(y_axis);
if (cross < 0.0f) {
flip_v = !flip_v;
}
// passing an angle is smaller than a vector, it can be reconstructed in the shader
angle = x_axis.angle();
// we don't want negative angles, as negative is used to encode flips.
// This moves range from -PI to PI to 0 to TWO_PI
if (angle < 0.0f)
angle += TWO_PI;
} // if transform needed
// if horizontal flip, angle is shifted by 180 degrees
if (flip_h) {
angle += Math_PI;
// mod to get back to 0 to TWO_PI range
angle = fmodf(angle, TWO_PI);
}
// add 1 (to take care of zero floating point error with sign)
angle += 1.0f;
// flip if necessary to indicate a vertical flip in the shader
if (flip_v)
angle *= -1.0f;
// light angle must be sent for each vert, instead as a single uniform in the uniform draw method
// this has the benefit of enabling batching with light angles.
for (int n = 0; n < 4; n++) {
angles[n] = angle;
}
}
// increment quad count
bdata.total_quads++;
return false;
}
#endif // RASTERIZERCANVASGLES2_H #endif // RASTERIZERCANVASGLES2_H

View file

@ -407,7 +407,7 @@ void RasterizerGLES2::blit_render_target_to_screen(RID p_render_target, const Re
RasterizerStorageGLES2::RenderTarget *rt = storage->render_target_owner.getornull(p_render_target); RasterizerStorageGLES2::RenderTarget *rt = storage->render_target_owner.getornull(p_render_target);
ERR_FAIL_COND(!rt); ERR_FAIL_COND(!rt);
canvas->state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_TEXTURE_RECT, true); canvas->_set_texture_rect_mode(true);
canvas->state.canvas_shader.set_custom_shader(0); canvas->state.canvas_shader.set_custom_shader(0);
canvas->state.canvas_shader.bind(); canvas->state.canvas_shader.bind();

View file

@ -18,6 +18,12 @@ uniform highp mat4 projection_matrix;
uniform highp mat4 modelview_matrix; uniform highp mat4 modelview_matrix;
uniform highp mat4 extra_matrix; uniform highp mat4 extra_matrix;
attribute highp vec2 vertex; // attrib:0 attribute highp vec2 vertex; // attrib:0
#ifdef USE_LIGHT_ANGLE
// shared with tangent, not used in canvas shader
attribute highp float light_angle; // attrib:2
#endif
attribute vec4 color_attrib; // attrib:3 attribute vec4 color_attrib; // attrib:3
attribute vec2 uv_attrib; // attrib:4 attribute vec2 uv_attrib; // attrib:4
@ -219,12 +225,34 @@ VERTEX_SHADER_CODE
pos = outvec.xy; pos = outvec.xy;
#endif #endif
#ifdef USE_LIGHT_ANGLE
// we add a fixed offset because we are using the sign later,
// and don't want floating point error around 0.0
float la = abs(light_angle) - 1.0;
// vector light angle
vec4 vla;
vla.xy = vec2(cos(la), sin(la));
vla.zw = vec2(-vla.y, vla.x);
// vertical flip encoded in the sign
vla.zw *= sign(light_angle);
// apply the transform matrix.
// The rotate will be encoded in the transform matrix for single rects,
// and just the flips in the light angle.
// For batching we will encode the rotation and the flips
// in the light angle, and can use the same shader.
local_rot.xy = normalize((modelview_matrix * (extra_matrix_instance * vec4(vla.xy, 0.0, 0.0))).xy);
local_rot.zw = normalize((modelview_matrix * (extra_matrix_instance * vec4(vla.zw, 0.0, 0.0))).xy);
#else
local_rot.xy = normalize((modelview_matrix * (extra_matrix_instance * vec4(1.0, 0.0, 0.0, 0.0))).xy); local_rot.xy = normalize((modelview_matrix * (extra_matrix_instance * vec4(1.0, 0.0, 0.0, 0.0))).xy);
local_rot.zw = normalize((modelview_matrix * (extra_matrix_instance * vec4(0.0, 1.0, 0.0, 0.0))).xy); local_rot.zw = normalize((modelview_matrix * (extra_matrix_instance * vec4(0.0, 1.0, 0.0, 0.0))).xy);
#ifdef USE_TEXTURE_RECT #ifdef USE_TEXTURE_RECT
local_rot.xy *= sign(src_rect.z); local_rot.xy *= sign(src_rect.z);
local_rot.zw *= sign(src_rect.w); local_rot.zw *= sign(src_rect.w);
#endif #endif
#endif // not using light angle
#endif #endif
} }