GLES2 2D fix normal mapping - batching and nvidia workaround
Normal mapping previously took no account of rotation or flips in any path except the TEXTURE_RECT (uniform draw) method. This passed flips to the shader in uniforms. In order to pass flips and rotations to the shader in batching and nvidia workaround, a per vertex attribute is required rather than a uniform. This introduces LIGHT_ANGLE which encodes both the rotation of a quad (vertex) and the horizontal and vertical flip. In order to optionally store light angles in batching, we switch to using a 'unit' sized array which can be reused for different FVF types, as there is no need for a separate array for each FVF, as it is a waste of memory.
This commit is contained in:
parent
212744e7a5
commit
ecd39094ed
7 changed files with 691 additions and 327 deletions
|
@ -71,6 +71,75 @@
|
|||
|
||||
#include <string.h>
|
||||
|
||||
// very simple non-growable array, that keeps track of the size of a 'unit'
|
||||
// which can be cast to whatever vertex format FVF required, and is initially
|
||||
// created with enough memory to hold the biggest FVF.
|
||||
// This allows multiple FVFs to use the same array.
|
||||
class RasterizerUnitArrayGLES2 {
|
||||
public:
|
||||
RasterizerUnitArrayGLES2() {
|
||||
_list = nullptr;
|
||||
free();
|
||||
}
|
||||
~RasterizerUnitArrayGLES2() { free(); }
|
||||
|
||||
uint8_t *get_unit(unsigned int ui) { return &_list[ui * _unit_size_bytes]; }
|
||||
const uint8_t *get_unit(unsigned int ui) const { return &_list[ui * _unit_size_bytes]; }
|
||||
|
||||
int size() const { return _size; }
|
||||
int max_size() const { return _max_size; }
|
||||
|
||||
void free() {
|
||||
if (_list) {
|
||||
memdelete_arr(_list);
|
||||
_list = 0;
|
||||
}
|
||||
_size = 0;
|
||||
_max_size = 0;
|
||||
_max_size_bytes = 0;
|
||||
_unit_size_bytes = 0;
|
||||
}
|
||||
|
||||
void create(int p_max_size_units, int p_max_unit_size_bytes) {
|
||||
free();
|
||||
|
||||
_max_unit_size_bytes = p_max_unit_size_bytes;
|
||||
_max_size = p_max_size_units;
|
||||
_max_size_bytes = p_max_size_units * p_max_unit_size_bytes;
|
||||
|
||||
if (_max_size_bytes) {
|
||||
_list = memnew_arr(uint8_t, _max_size_bytes);
|
||||
}
|
||||
}
|
||||
|
||||
void prepare(int p_unit_size_bytes) {
|
||||
_unit_size_bytes = p_unit_size_bytes;
|
||||
_size = 0;
|
||||
}
|
||||
|
||||
// several items at a time
|
||||
uint8_t *request(int p_num_items = 1) {
|
||||
int old_size = _size;
|
||||
_size += p_num_items;
|
||||
|
||||
if (_size <= _max_size) {
|
||||
return get_unit(old_size);
|
||||
}
|
||||
|
||||
// revert
|
||||
_size = old_size;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
private:
|
||||
uint8_t *_list;
|
||||
int _size; // in units
|
||||
int _max_size; // in units
|
||||
int _max_size_bytes;
|
||||
int _unit_size_bytes;
|
||||
int _max_unit_size_bytes;
|
||||
};
|
||||
|
||||
template <class T>
|
||||
class RasterizerArrayGLES2 {
|
||||
public:
|
||||
|
|
|
@ -52,8 +52,13 @@ void RasterizerCanvasBaseGLES2::light_internal_free(RID p_rid) {
|
|||
|
||||
void RasterizerCanvasBaseGLES2::canvas_begin() {
|
||||
|
||||
state.canvas_shader.bind();
|
||||
state.using_transparent_rt = false;
|
||||
|
||||
// always start with light_angle unset
|
||||
state.using_light_angle = false;
|
||||
state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_LIGHT_ANGLE, false);
|
||||
state.canvas_shader.bind();
|
||||
|
||||
int viewport_x, viewport_y, viewport_width, viewport_height;
|
||||
|
||||
if (storage->frame.current_rt) {
|
||||
|
@ -155,6 +160,16 @@ void RasterizerCanvasBaseGLES2::draw_generic_textured_rect(const Rect2 &p_rect,
|
|||
glDrawArrays(GL_TRIANGLE_FAN, 0, 4);
|
||||
}
|
||||
|
||||
void RasterizerCanvasBaseGLES2::_set_texture_rect_mode(bool p_texture_rect, bool p_light_angle) {
|
||||
// always set this directly (this could be state checked)
|
||||
state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_TEXTURE_RECT, p_texture_rect);
|
||||
|
||||
if (state.using_light_angle != p_light_angle) {
|
||||
state.using_light_angle = p_light_angle;
|
||||
state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_LIGHT_ANGLE, p_light_angle);
|
||||
}
|
||||
}
|
||||
|
||||
RasterizerStorageGLES2::Texture *RasterizerCanvasBaseGLES2::_bind_canvas_texture(const RID &p_texture, const RID &p_normal_map) {
|
||||
|
||||
RasterizerStorageGLES2::Texture *tex_return = NULL;
|
||||
|
@ -595,12 +610,13 @@ void RasterizerCanvasBaseGLES2::_draw_generic_indices(GLuint p_primitive, const
|
|||
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0);
|
||||
}
|
||||
|
||||
void RasterizerCanvasBaseGLES2::_draw_gui_primitive(int p_points, const Vector2 *p_vertices, const Color *p_colors, const Vector2 *p_uvs) {
|
||||
void RasterizerCanvasBaseGLES2::_draw_gui_primitive(int p_points, const Vector2 *p_vertices, const Color *p_colors, const Vector2 *p_uvs, const float *p_light_angles) {
|
||||
|
||||
static const GLenum prim[5] = { GL_POINTS, GL_POINTS, GL_LINES, GL_TRIANGLES, GL_TRIANGLE_FAN };
|
||||
|
||||
int color_offset = 0;
|
||||
int uv_offset = 0;
|
||||
int light_angle_offset = 0;
|
||||
int stride = 2;
|
||||
|
||||
if (p_colors) {
|
||||
|
@ -613,7 +629,12 @@ void RasterizerCanvasBaseGLES2::_draw_gui_primitive(int p_points, const Vector2
|
|||
stride += 2;
|
||||
}
|
||||
|
||||
float buffer_data[(2 + 2 + 4) * 4];
|
||||
if (p_light_angles) { //light_angles
|
||||
light_angle_offset = stride;
|
||||
stride += 1;
|
||||
}
|
||||
|
||||
float buffer_data[(2 + 2 + 4 + 1) * 4];
|
||||
|
||||
for (int i = 0; i < p_points; i++) {
|
||||
buffer_data[stride * i + 0] = p_vertices[i].x;
|
||||
|
@ -636,6 +657,12 @@ void RasterizerCanvasBaseGLES2::_draw_gui_primitive(int p_points, const Vector2
|
|||
}
|
||||
}
|
||||
|
||||
if (p_light_angles) {
|
||||
for (int i = 0; i < p_points; i++) {
|
||||
buffer_data[stride * i + light_angle_offset + 0] = p_light_angles[i];
|
||||
}
|
||||
}
|
||||
|
||||
glBindBuffer(GL_ARRAY_BUFFER, data.polygon_buffer);
|
||||
#ifndef GLES_OVER_GL
|
||||
// Orphan the buffer to avoid CPU/GPU sync points caused by glBufferSubData
|
||||
|
@ -655,9 +682,19 @@ void RasterizerCanvasBaseGLES2::_draw_gui_primitive(int p_points, const Vector2
|
|||
glEnableVertexAttribArray(VS::ARRAY_TEX_UV);
|
||||
}
|
||||
|
||||
if (p_light_angles) {
|
||||
glVertexAttribPointer(VS::ARRAY_TANGENT, 1, GL_FLOAT, GL_FALSE, stride * sizeof(float), CAST_INT_TO_UCHAR_PTR(light_angle_offset * sizeof(float)));
|
||||
glEnableVertexAttribArray(VS::ARRAY_TANGENT);
|
||||
}
|
||||
|
||||
glDrawArrays(prim[p_points], 0, p_points);
|
||||
storage->info.render._2d_draw_call_count++;
|
||||
|
||||
if (p_light_angles) {
|
||||
// may not be needed
|
||||
glDisableVertexAttribArray(VS::ARRAY_TANGENT);
|
||||
}
|
||||
|
||||
glBindBuffer(GL_ARRAY_BUFFER, 0);
|
||||
}
|
||||
|
||||
|
@ -993,7 +1030,7 @@ void RasterizerCanvasBaseGLES2::initialize() {
|
|||
|
||||
state.canvas_shader.init();
|
||||
|
||||
state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_TEXTURE_RECT, true);
|
||||
_set_texture_rect_mode(true);
|
||||
state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_RGBA_SHADOWS, storage->config.use_rgba_2d_shadows);
|
||||
|
||||
state.canvas_shader.bind();
|
||||
|
|
|
@ -77,6 +77,7 @@ public:
|
|||
LensDistortedShaderGLES2 lens_shader;
|
||||
|
||||
bool using_texture_rect;
|
||||
bool using_light_angle;
|
||||
bool using_ninepatch;
|
||||
bool using_skeleton;
|
||||
|
||||
|
@ -112,7 +113,7 @@ public:
|
|||
virtual void canvas_begin();
|
||||
virtual void canvas_end();
|
||||
|
||||
void _draw_gui_primitive(int p_points, const Vector2 *p_vertices, const Color *p_colors, const Vector2 *p_uvs);
|
||||
void _draw_gui_primitive(int p_points, const Vector2 *p_vertices, const Color *p_colors, const Vector2 *p_uvs, const float *p_light_angles = nullptr);
|
||||
void _draw_polygon(const int *p_indices, int p_index_count, int p_vertex_count, const Vector2 *p_vertices, const Vector2 *p_uvs, const Color *p_colors, bool p_singlecolor, const float *p_weights = NULL, const int *p_bones = NULL);
|
||||
void _draw_generic(GLuint p_primitive, int p_vertex_count, const Vector2 *p_vertices, const Vector2 *p_uvs, const Color *p_colors, bool p_singlecolor);
|
||||
void _draw_generic_indices(GLuint p_primitive, const int *p_indices, int p_index_count, int p_vertex_count, const Vector2 *p_vertices, const Vector2 *p_uvs, const Color *p_colors, bool p_singlecolor);
|
||||
|
@ -130,6 +131,7 @@ public:
|
|||
virtual void canvas_debug_viewport_shadows(Light *p_lights_with_shadow);
|
||||
|
||||
RasterizerStorageGLES2::Texture *_bind_canvas_texture(const RID &p_texture, const RID &p_normal_map);
|
||||
void _set_texture_rect_mode(bool p_texture_rect, bool p_light_angle = false);
|
||||
|
||||
void initialize();
|
||||
void finalize();
|
||||
|
|
|
@ -55,6 +55,7 @@ RasterizerCanvasGLES2::BatchData::BatchData() {
|
|||
index_buffer_size_units = 0;
|
||||
index_buffer_size_bytes = 0;
|
||||
use_colored_vertices = false;
|
||||
use_light_angles = false;
|
||||
settings_use_batching = false;
|
||||
settings_max_join_item_commands = 0;
|
||||
settings_colored_vertex_format_threshold = 0.0f;
|
||||
|
@ -212,10 +213,14 @@ void RasterizerCanvasGLES2::_batch_upload_buffers() {
|
|||
// orphan the old (for now)
|
||||
glBufferData(GL_ARRAY_BUFFER, 0, 0, GL_DYNAMIC_DRAW);
|
||||
|
||||
if (!bdata.use_colored_vertices) {
|
||||
glBufferData(GL_ARRAY_BUFFER, sizeof(BatchVertex) * bdata.vertices.size(), bdata.vertices.get_data(), GL_DYNAMIC_DRAW);
|
||||
if (!bdata.use_light_angles) {
|
||||
if (!bdata.use_colored_vertices) {
|
||||
glBufferData(GL_ARRAY_BUFFER, sizeof(BatchVertex) * bdata.vertices.size(), bdata.vertices.get_data(), GL_DYNAMIC_DRAW);
|
||||
} else {
|
||||
glBufferData(GL_ARRAY_BUFFER, sizeof(BatchVertexColored) * bdata.unit_vertices.size(), bdata.unit_vertices.get_unit(0), GL_DYNAMIC_DRAW);
|
||||
}
|
||||
} else {
|
||||
glBufferData(GL_ARRAY_BUFFER, sizeof(BatchVertexColored) * bdata.vertices_colored.size(), bdata.vertices_colored.get_data(), GL_DYNAMIC_DRAW);
|
||||
glBufferData(GL_ARRAY_BUFFER, sizeof(BatchVertexLightAngled) * bdata.unit_vertices.size(), bdata.unit_vertices.get_unit(0), GL_DYNAMIC_DRAW);
|
||||
}
|
||||
|
||||
// might not be necessary
|
||||
|
@ -251,10 +256,6 @@ bool RasterizerCanvasGLES2::prefill_joined_item(FillState &r_fill_state, int &r_
|
|||
int command_count = p_item->commands.size();
|
||||
Item::Command *const *commands = p_item->commands.ptr();
|
||||
|
||||
// locals, might be more efficient in a register (check)
|
||||
Vector2 texpixel_size = r_fill_state.texpixel_size;
|
||||
const float uv_epsilon = bdata.settings_uv_contract_amount;
|
||||
|
||||
// checking the color for not being white makes it 92/90 times faster in the case where it is white
|
||||
bool multiply_final_modulate = false;
|
||||
if (!r_fill_state.use_hardware_transform && (r_fill_state.final_modulate != Color(1, 1, 1, 1))) {
|
||||
|
@ -316,196 +317,21 @@ bool RasterizerCanvasGLES2::prefill_joined_item(FillState &r_fill_state, int &r_
|
|||
|
||||
Item::CommandRect *rect = static_cast<Item::CommandRect *>(command);
|
||||
|
||||
bool change_batch = false;
|
||||
// unoptimized - could this be done once per batch / batch texture?
|
||||
bool send_light_angles = rect->normal_map != RID();
|
||||
|
||||
// conditions for creating a new batch
|
||||
if (r_fill_state.curr_batch->type != Batch::BT_RECT) {
|
||||
change_batch = true;
|
||||
bool buffer_full = false;
|
||||
|
||||
// check for special case if there is only a single or small number of rects,
|
||||
// in which case we will use the legacy default rect renderer
|
||||
// because it is faster for single rects
|
||||
|
||||
// we only want to do this if not a joined item with more than 1 item,
|
||||
// because joined items with more than 1, the command * will be incorrect
|
||||
// NOTE - this is assuming that use_hardware_transform means that it is a non-joined item!!
|
||||
// If that assumption is incorrect this will go horribly wrong.
|
||||
if (bdata.settings_use_single_rect_fallback && r_fill_state.use_hardware_transform) {
|
||||
bool is_single_rect = false;
|
||||
int command_num_next = command_num + 1;
|
||||
if (command_num_next < command_count) {
|
||||
Item::Command *command_next = commands[command_num_next];
|
||||
if ((command_next->type != Item::Command::TYPE_RECT) && (command_next->type != Item::Command::TYPE_TRANSFORM)) {
|
||||
is_single_rect = true;
|
||||
}
|
||||
} else {
|
||||
is_single_rect = true;
|
||||
}
|
||||
// if it is a rect on its own, do exactly the same as the default routine
|
||||
if (is_single_rect) {
|
||||
_prefill_default_batch(r_fill_state, command_num, *p_item);
|
||||
break;
|
||||
}
|
||||
} // if use hardware transform
|
||||
// the template params must be explicit for compilation,
|
||||
// this forces building the multiple versions of the function.
|
||||
if (send_light_angles) {
|
||||
buffer_full = prefill_rect<true>(rect, r_fill_state, r_command_start, command_num, command_count, commands, p_item, multiply_final_modulate);
|
||||
} else {
|
||||
buffer_full = prefill_rect<false>(rect, r_fill_state, r_command_start, command_num, command_count, commands, p_item, multiply_final_modulate);
|
||||
}
|
||||
|
||||
Color col = rect->modulate;
|
||||
if (multiply_final_modulate) {
|
||||
col *= r_fill_state.final_modulate;
|
||||
}
|
||||
|
||||
// instead of doing all the texture preparation for EVERY rect,
|
||||
// we build a list of texture combinations and do this once off.
|
||||
// This means we have a potentially rather slow step to identify which texture combo
|
||||
// using the RIDs.
|
||||
int old_batch_tex_id = r_fill_state.batch_tex_id;
|
||||
r_fill_state.batch_tex_id = _batch_find_or_create_tex(rect->texture, rect->normal_map, rect->flags & CANVAS_RECT_TILE, old_batch_tex_id);
|
||||
|
||||
// try to create vertices BEFORE creating a batch,
|
||||
// because if the vertex buffer is full, we need to finish this
|
||||
// function, draw what we have so far, and then start a new set of batches
|
||||
|
||||
// request FOUR vertices at a time, this is more efficient
|
||||
BatchVertex *bvs = bdata.vertices.request(4);
|
||||
if (!bvs) {
|
||||
// run out of space in the vertex buffer .. finish this function and draw what we have so far
|
||||
// return where we got to
|
||||
r_command_start = command_num;
|
||||
if (buffer_full)
|
||||
return true;
|
||||
}
|
||||
|
||||
// conditions for creating a new batch
|
||||
if (old_batch_tex_id != r_fill_state.batch_tex_id) {
|
||||
change_batch = true;
|
||||
}
|
||||
|
||||
// we need to treat color change separately because we need to count these
|
||||
// to decide whether to switch on the fly to colored vertices.
|
||||
if (!r_fill_state.curr_batch->color.equals(col)) {
|
||||
change_batch = true;
|
||||
bdata.total_color_changes++;
|
||||
}
|
||||
|
||||
if (change_batch) {
|
||||
// put the tex pixel size in a local (less verbose and can be a register)
|
||||
const BatchTex &batchtex = bdata.batch_textures[r_fill_state.batch_tex_id];
|
||||
batchtex.tex_pixel_size.to(texpixel_size);
|
||||
|
||||
if (bdata.settings_uv_contract) {
|
||||
r_fill_state.contract_uvs = (batchtex.flags & VS::TEXTURE_FLAG_FILTER) == 0;
|
||||
}
|
||||
|
||||
// need to preserve texpixel_size between items
|
||||
r_fill_state.texpixel_size = texpixel_size;
|
||||
|
||||
// open new batch (this should never fail, it dynamically grows)
|
||||
r_fill_state.curr_batch = _batch_request_new(false);
|
||||
|
||||
r_fill_state.curr_batch->type = Batch::BT_RECT;
|
||||
r_fill_state.curr_batch->color.set(col);
|
||||
r_fill_state.curr_batch->batch_texture_id = r_fill_state.batch_tex_id;
|
||||
r_fill_state.curr_batch->first_command = command_num;
|
||||
r_fill_state.curr_batch->num_commands = 1;
|
||||
r_fill_state.curr_batch->first_quad = bdata.total_quads;
|
||||
} else {
|
||||
// we could alternatively do the count when closing a batch .. perhaps more efficient
|
||||
r_fill_state.curr_batch->num_commands++;
|
||||
}
|
||||
|
||||
// fill the quad geometry
|
||||
Vector2 mins = rect->rect.position;
|
||||
|
||||
if (r_fill_state.transform_mode == TM_TRANSLATE) {
|
||||
_software_transform_vertex(mins, r_fill_state.transform_combined);
|
||||
}
|
||||
|
||||
Vector2 maxs = mins + rect->rect.size;
|
||||
|
||||
// just aliases
|
||||
BatchVertex *bA = &bvs[0];
|
||||
BatchVertex *bB = &bvs[1];
|
||||
BatchVertex *bC = &bvs[2];
|
||||
BatchVertex *bD = &bvs[3];
|
||||
|
||||
bA->pos.x = mins.x;
|
||||
bA->pos.y = mins.y;
|
||||
|
||||
bB->pos.x = maxs.x;
|
||||
bB->pos.y = mins.y;
|
||||
|
||||
bC->pos.x = maxs.x;
|
||||
bC->pos.y = maxs.y;
|
||||
|
||||
bD->pos.x = mins.x;
|
||||
bD->pos.y = maxs.y;
|
||||
|
||||
if (rect->rect.size.x < 0) {
|
||||
SWAP(bA->pos, bB->pos);
|
||||
SWAP(bC->pos, bD->pos);
|
||||
}
|
||||
if (rect->rect.size.y < 0) {
|
||||
SWAP(bA->pos, bD->pos);
|
||||
SWAP(bB->pos, bC->pos);
|
||||
}
|
||||
|
||||
if (r_fill_state.transform_mode == TM_ALL) {
|
||||
_software_transform_vertex(bA->pos, r_fill_state.transform_combined);
|
||||
_software_transform_vertex(bB->pos, r_fill_state.transform_combined);
|
||||
_software_transform_vertex(bC->pos, r_fill_state.transform_combined);
|
||||
_software_transform_vertex(bD->pos, r_fill_state.transform_combined);
|
||||
}
|
||||
|
||||
// uvs
|
||||
Vector2 src_min;
|
||||
Vector2 src_max;
|
||||
if (rect->flags & CANVAS_RECT_REGION) {
|
||||
src_min = rect->source.position;
|
||||
src_max = src_min + rect->source.size;
|
||||
|
||||
src_min *= texpixel_size;
|
||||
src_max *= texpixel_size;
|
||||
|
||||
// nudge offset for the maximum to prevent precision error on GPU reading into line outside the source rect
|
||||
// this is very difficult to get right.
|
||||
if (r_fill_state.contract_uvs) {
|
||||
src_min.x += uv_epsilon;
|
||||
src_min.y += uv_epsilon;
|
||||
src_max.x -= uv_epsilon;
|
||||
src_max.y -= uv_epsilon;
|
||||
}
|
||||
} else {
|
||||
src_min = Vector2(0, 0);
|
||||
src_max = Vector2(1, 1);
|
||||
}
|
||||
|
||||
// 10% faster calculating the max first
|
||||
Vector2 uvs[4] = {
|
||||
src_min,
|
||||
Vector2(src_max.x, src_min.y),
|
||||
src_max,
|
||||
Vector2(src_min.x, src_max.y),
|
||||
};
|
||||
|
||||
if (rect->flags & CANVAS_RECT_TRANSPOSE) {
|
||||
SWAP(uvs[1], uvs[3]);
|
||||
}
|
||||
|
||||
if (rect->flags & CANVAS_RECT_FLIP_H) {
|
||||
SWAP(uvs[0], uvs[1]);
|
||||
SWAP(uvs[2], uvs[3]);
|
||||
}
|
||||
if (rect->flags & CANVAS_RECT_FLIP_V) {
|
||||
SWAP(uvs[0], uvs[3]);
|
||||
SWAP(uvs[1], uvs[2]);
|
||||
}
|
||||
|
||||
bA->uv.set(uvs[0]);
|
||||
bB->uv.set(uvs[1]);
|
||||
bC->uv.set(uvs[2]);
|
||||
bD->uv.set(uvs[3]);
|
||||
|
||||
// increment quad count
|
||||
bdata.total_quads++;
|
||||
|
||||
} break;
|
||||
}
|
||||
|
@ -519,119 +345,29 @@ bool RasterizerCanvasGLES2::prefill_joined_item(FillState &r_fill_state, int &r_
|
|||
return false;
|
||||
}
|
||||
|
||||
// convert the stupidly high amount of batches (each with its own color)
|
||||
// to larger batches where the color is stored in the verts instead...
|
||||
// There is a trade off. Non colored verts are smaller so work faster, but
|
||||
// there comes a point where it is better to just use colored verts to avoid lots of
|
||||
// batches.
|
||||
void RasterizerCanvasGLES2::_batch_translate_to_colored() {
|
||||
bdata.vertices_colored.reset();
|
||||
bdata.batches_temp.reset();
|
||||
|
||||
// As the vertices_colored and batches_temp are 'mirrors' of the non-colored version,
|
||||
// the sizes should be equal, and allocations should never fail. Hence the use of debug
|
||||
// asserts to check program flow, these should not occur at runtime unless the allocation
|
||||
// code has been altered.
|
||||
#ifdef DEBUG_ENABLED
|
||||
CRASH_COND(bdata.vertices_colored.max_size() != bdata.vertices.max_size());
|
||||
CRASH_COND(bdata.batches_temp.max_size() != bdata.batches.max_size());
|
||||
#endif
|
||||
|
||||
Color curr_col(-1.0, -1.0, -1.0, -1.0);
|
||||
|
||||
Batch *dest_batch = 0;
|
||||
|
||||
// translate the batches into vertex colored batches
|
||||
for (int n = 0; n < bdata.batches.size(); n++) {
|
||||
const Batch &source_batch = bdata.batches[n];
|
||||
|
||||
bool needs_new_batch = true;
|
||||
|
||||
if (dest_batch) {
|
||||
if (dest_batch->type == source_batch.type) {
|
||||
if (source_batch.type == Batch::BT_RECT) {
|
||||
if (dest_batch->batch_texture_id == source_batch.batch_texture_id) {
|
||||
// add to previous batch
|
||||
dest_batch->num_commands += source_batch.num_commands;
|
||||
needs_new_batch = false;
|
||||
|
||||
// create the colored verts (only if not default)
|
||||
int first_vert = source_batch.first_quad * 4;
|
||||
int end_vert = 4 * (source_batch.first_quad + source_batch.num_commands);
|
||||
|
||||
for (int v = first_vert; v < end_vert; v++) {
|
||||
const BatchVertex &bv = bdata.vertices[v];
|
||||
BatchVertexColored *cv = bdata.vertices_colored.request();
|
||||
#ifdef DEBUG_ENABLED
|
||||
CRASH_COND(!cv);
|
||||
#endif
|
||||
cv->pos = bv.pos;
|
||||
cv->uv = bv.uv;
|
||||
cv->col = source_batch.color;
|
||||
}
|
||||
} // textures match
|
||||
} else {
|
||||
// default
|
||||
// we can still join, but only under special circumstances
|
||||
// does this ever happen? not sure at this stage, but left for future expansion
|
||||
uint32_t source_last_command = source_batch.first_command + source_batch.num_commands;
|
||||
if (source_last_command == dest_batch->first_command) {
|
||||
dest_batch->num_commands += source_batch.num_commands;
|
||||
needs_new_batch = false;
|
||||
} // if the commands line up exactly
|
||||
}
|
||||
} // if both batches are the same type
|
||||
|
||||
} // if dest batch is valid
|
||||
|
||||
if (needs_new_batch) {
|
||||
dest_batch = bdata.batches_temp.request();
|
||||
#ifdef DEBUG_ENABLED
|
||||
CRASH_COND(!dest_batch);
|
||||
#endif
|
||||
|
||||
*dest_batch = source_batch;
|
||||
|
||||
// create the colored verts (only if not default)
|
||||
if (source_batch.type != Batch::BT_DEFAULT) {
|
||||
int first_vert = source_batch.first_quad * 4;
|
||||
int end_vert = 4 * (source_batch.first_quad + source_batch.num_commands);
|
||||
|
||||
for (int v = first_vert; v < end_vert; v++) {
|
||||
const BatchVertex &bv = bdata.vertices[v];
|
||||
BatchVertexColored *cv = bdata.vertices_colored.request();
|
||||
#ifdef DEBUG_ENABLED
|
||||
CRASH_COND(!cv);
|
||||
#endif
|
||||
cv->pos = bv.pos;
|
||||
cv->uv = bv.uv;
|
||||
cv->col = source_batch.color;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// copy the temporary batches to the master batch list (this could be avoided but it makes the code cleaner)
|
||||
bdata.batches.copy_from(bdata.batches_temp);
|
||||
}
|
||||
|
||||
void RasterizerCanvasGLES2::_batch_render_rects(const Batch &p_batch, RasterizerStorageGLES2::Material *p_material) {
|
||||
|
||||
ERR_FAIL_COND(p_batch.num_commands <= 0);
|
||||
|
||||
const bool &colored_verts = bdata.use_colored_vertices;
|
||||
const bool &use_light_angles = bdata.use_light_angles;
|
||||
|
||||
int sizeof_vert;
|
||||
if (!colored_verts) {
|
||||
sizeof_vert = sizeof(BatchVertex);
|
||||
if (!use_light_angles) {
|
||||
if (!colored_verts) {
|
||||
sizeof_vert = sizeof(BatchVertex);
|
||||
} else {
|
||||
sizeof_vert = sizeof(BatchVertexColored);
|
||||
}
|
||||
} else {
|
||||
sizeof_vert = sizeof(BatchVertexColored);
|
||||
sizeof_vert = sizeof(BatchVertexLightAngled);
|
||||
}
|
||||
|
||||
// batch tex
|
||||
const BatchTex &tex = bdata.batch_textures[p_batch.batch_texture_id];
|
||||
|
||||
// make sure to set all conditionals BEFORE binding the shader
|
||||
state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_TEXTURE_RECT, false);
|
||||
_set_texture_rect_mode(false, use_light_angles);
|
||||
|
||||
// force repeat is set if non power of 2 texture, and repeat is needed if hardware doesn't support npot
|
||||
if (tex.tile_mode == BatchTex::TILE_FORCE_REPEAT) {
|
||||
|
@ -665,6 +401,11 @@ void RasterizerCanvasGLES2::_batch_render_rects(const Batch &p_batch, Rasterizer
|
|||
glEnableVertexAttribArray(VS::ARRAY_COLOR);
|
||||
}
|
||||
|
||||
if (use_light_angles) {
|
||||
glVertexAttribPointer(VS::ARRAY_TANGENT, 1, GL_FLOAT, GL_FALSE, sizeof_vert, CAST_INT_TO_UCHAR_PTR(pointer + (8 * 4)));
|
||||
glEnableVertexAttribArray(VS::ARRAY_TANGENT);
|
||||
}
|
||||
|
||||
// We only want to set the GL wrapping mode if the texture is not already tiled (i.e. set in Import).
|
||||
// This is an optimization left over from the legacy renderer.
|
||||
// If we DID set tiling in the API, and reverted to clamped, then the next draw using this texture
|
||||
|
@ -707,8 +448,10 @@ void RasterizerCanvasGLES2::_batch_render_rects(const Batch &p_batch, Rasterizer
|
|||
} break;
|
||||
}
|
||||
|
||||
// could these have ifs?
|
||||
glDisableVertexAttribArray(VS::ARRAY_TEX_UV);
|
||||
glDisableVertexAttribArray(VS::ARRAY_COLOR);
|
||||
glDisableVertexAttribArray(VS::ARRAY_TANGENT);
|
||||
|
||||
// may not be necessary .. state change optimization still TODO
|
||||
glBindBuffer(GL_ARRAY_BUFFER, 0);
|
||||
|
@ -848,7 +591,7 @@ void RasterizerCanvasGLES2::render_batches(Item::Command *const *p_commands, Ite
|
|||
|
||||
Item::CommandLine *line = static_cast<Item::CommandLine *>(command);
|
||||
|
||||
state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_TEXTURE_RECT, false);
|
||||
_set_texture_rect_mode(false);
|
||||
if (state.canvas_shader.bind()) {
|
||||
_set_uniforms();
|
||||
state.canvas_shader.use_material((void *)p_material);
|
||||
|
@ -934,7 +677,17 @@ void RasterizerCanvasGLES2::render_batches(Item::Command *const *p_commands, Ite
|
|||
// To work it around, we use a simpler draw method which does not flicker, but gives
|
||||
// a non negligible performance hit, so it's opt-in (GH-24466).
|
||||
if (use_nvidia_rect_workaround) {
|
||||
state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_TEXTURE_RECT, false);
|
||||
|
||||
// are we using normal maps, if so we want to use light angle
|
||||
bool send_light_angles = false;
|
||||
|
||||
// only need to use light angles when normal mapping
|
||||
// otherwise we can use the default shader
|
||||
if (state.current_normal != RID()) {
|
||||
send_light_angles = true;
|
||||
}
|
||||
|
||||
_set_texture_rect_mode(false, send_light_angles);
|
||||
|
||||
if (state.canvas_shader.bind()) {
|
||||
_set_uniforms();
|
||||
|
@ -971,6 +724,10 @@ void RasterizerCanvasGLES2::render_batches(Item::Command *const *p_commands, Ite
|
|||
src_rect.position + Vector2(0.0, src_rect.size.y),
|
||||
};
|
||||
|
||||
// for encoding in light angle
|
||||
bool flip_h = false;
|
||||
bool flip_v = false;
|
||||
|
||||
if (r->flags & CANVAS_RECT_TRANSPOSE) {
|
||||
SWAP(uvs[1], uvs[3]);
|
||||
}
|
||||
|
@ -978,10 +735,13 @@ void RasterizerCanvasGLES2::render_batches(Item::Command *const *p_commands, Ite
|
|||
if (r->flags & CANVAS_RECT_FLIP_H) {
|
||||
SWAP(uvs[0], uvs[1]);
|
||||
SWAP(uvs[2], uvs[3]);
|
||||
flip_h = true;
|
||||
flip_v = !flip_v;
|
||||
}
|
||||
if (r->flags & CANVAS_RECT_FLIP_V) {
|
||||
SWAP(uvs[0], uvs[3]);
|
||||
SWAP(uvs[1], uvs[2]);
|
||||
flip_v = !flip_v;
|
||||
}
|
||||
|
||||
state.canvas_shader.set_uniform(CanvasShaderGLES2::COLOR_TEXPIXEL_SIZE, texpixel_size);
|
||||
|
@ -994,7 +754,33 @@ void RasterizerCanvasGLES2::render_batches(Item::Command *const *p_commands, Ite
|
|||
untile = true;
|
||||
}
|
||||
|
||||
_draw_gui_primitive(4, points, NULL, uvs);
|
||||
if (send_light_angles) {
|
||||
// for single rects, there is no need to fully utilize the light angle,
|
||||
// we only need it to encode flips (horz and vert). But the shader can be reused with
|
||||
// batching in which case the angle encodes the transform as well as
|
||||
// the flips.
|
||||
// Note transpose is NYI. I don't think it worked either with the non-nvidia method.
|
||||
|
||||
// if horizontal flip, angle is 180
|
||||
float angle = 0.0f;
|
||||
if (flip_h)
|
||||
angle = Math_PI;
|
||||
|
||||
// add 1 (to take care of zero floating point error with sign)
|
||||
angle += 1.0f;
|
||||
|
||||
// flip if necessary
|
||||
if (flip_v)
|
||||
angle *= -1.0f;
|
||||
|
||||
// light angle must be sent for each vert, instead as a single uniform in the uniform draw method
|
||||
// this has the benefit of enabling batching with light angles.
|
||||
float light_angles[4] = { angle, angle, angle, angle };
|
||||
|
||||
_draw_gui_primitive(4, points, NULL, uvs, light_angles);
|
||||
} else {
|
||||
_draw_gui_primitive(4, points, NULL, uvs);
|
||||
}
|
||||
|
||||
if (untile) {
|
||||
glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
|
||||
|
@ -1016,7 +802,7 @@ void RasterizerCanvasGLES2::render_batches(Item::Command *const *p_commands, Ite
|
|||
// This branch is better for performance, but can produce flicker on Nvidia, see above comment.
|
||||
_bind_quad_buffer();
|
||||
|
||||
state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_TEXTURE_RECT, true);
|
||||
_set_texture_rect_mode(true);
|
||||
|
||||
if (state.canvas_shader.bind()) {
|
||||
_set_uniforms();
|
||||
|
@ -1104,7 +890,7 @@ void RasterizerCanvasGLES2::render_batches(Item::Command *const *p_commands, Ite
|
|||
|
||||
Item::CommandNinePatch *np = static_cast<Item::CommandNinePatch *>(command);
|
||||
|
||||
state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_TEXTURE_RECT, false);
|
||||
_set_texture_rect_mode(false);
|
||||
if (state.canvas_shader.bind()) {
|
||||
_set_uniforms();
|
||||
state.canvas_shader.use_material((void *)p_material);
|
||||
|
@ -1280,7 +1066,7 @@ void RasterizerCanvasGLES2::render_batches(Item::Command *const *p_commands, Ite
|
|||
|
||||
Item::CommandCircle *circle = static_cast<Item::CommandCircle *>(command);
|
||||
|
||||
state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_TEXTURE_RECT, false);
|
||||
_set_texture_rect_mode(false);
|
||||
|
||||
if (state.canvas_shader.bind()) {
|
||||
_set_uniforms();
|
||||
|
@ -1310,7 +1096,7 @@ void RasterizerCanvasGLES2::render_batches(Item::Command *const *p_commands, Ite
|
|||
|
||||
Item::CommandPolygon *polygon = static_cast<Item::CommandPolygon *>(command);
|
||||
|
||||
state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_TEXTURE_RECT, false);
|
||||
_set_texture_rect_mode(false);
|
||||
|
||||
if (state.canvas_shader.bind()) {
|
||||
_set_uniforms();
|
||||
|
@ -1340,7 +1126,7 @@ void RasterizerCanvasGLES2::render_batches(Item::Command *const *p_commands, Ite
|
|||
case Item::Command::TYPE_MESH: {
|
||||
|
||||
Item::CommandMesh *mesh = static_cast<Item::CommandMesh *>(command);
|
||||
state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_TEXTURE_RECT, false);
|
||||
_set_texture_rect_mode(false);
|
||||
|
||||
if (state.canvas_shader.bind()) {
|
||||
_set_uniforms();
|
||||
|
@ -1416,7 +1202,7 @@ void RasterizerCanvasGLES2::render_batches(Item::Command *const *p_commands, Ite
|
|||
|
||||
state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_INSTANCE_CUSTOM, multi_mesh->custom_data_format != VS::MULTIMESH_CUSTOM_DATA_NONE);
|
||||
state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_INSTANCING, true);
|
||||
state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_TEXTURE_RECT, false);
|
||||
_set_texture_rect_mode(false);
|
||||
|
||||
if (state.canvas_shader.bind()) {
|
||||
_set_uniforms();
|
||||
|
@ -1520,7 +1306,7 @@ void RasterizerCanvasGLES2::render_batches(Item::Command *const *p_commands, Ite
|
|||
}
|
||||
}
|
||||
|
||||
state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_INSTANCE_CUSTOM, false);
|
||||
_set_texture_rect_mode(false);
|
||||
state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_INSTANCING, false);
|
||||
|
||||
storage->info.render._2d_draw_call_count++;
|
||||
|
@ -1580,7 +1366,7 @@ void RasterizerCanvasGLES2::render_batches(Item::Command *const *p_commands, Ite
|
|||
case Item::Command::TYPE_PRIMITIVE: {
|
||||
|
||||
Item::CommandPrimitive *primitive = static_cast<Item::CommandPrimitive *>(command);
|
||||
state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_TEXTURE_RECT, false);
|
||||
_set_texture_rect_mode(false);
|
||||
|
||||
if (state.canvas_shader.bind()) {
|
||||
_set_uniforms();
|
||||
|
@ -1732,23 +1518,28 @@ void RasterizerCanvasGLES2::flush_render_batches(Item *p_first_item, Item *p_cur
|
|||
// .. however probably not necessary
|
||||
bdata.use_colored_vertices = false;
|
||||
|
||||
// only check whether to convert if there are quads (prevent divide by zero)
|
||||
// and we haven't decided to prevent color baking (due to e.g. MODULATE
|
||||
// being used in a shader)
|
||||
if (bdata.total_quads && !(bdata.joined_item_batch_flags & RasterizerStorageGLES2::Shader::CanvasItem::PREVENT_COLOR_BAKING)) {
|
||||
// minus 1 to prevent single primitives (ratio 1.0) always being converted to colored..
|
||||
// in that case it is slightly cheaper to just have the color as part of the batch
|
||||
float ratio = (float)(bdata.total_color_changes - 1) / (float)bdata.total_quads;
|
||||
if (bdata.use_light_angles) {
|
||||
_translate_batches_to_larger_FVF<BatchVertexLightAngled, true>();
|
||||
} else {
|
||||
// only check whether to convert if there are quads (prevent divide by zero)
|
||||
// and we haven't decided to prevent color baking (due to e.g. MODULATE
|
||||
// being used in a shader)
|
||||
if (bdata.total_quads && !(bdata.joined_item_batch_flags & RasterizerStorageGLES2::Shader::CanvasItem::PREVENT_COLOR_BAKING)) {
|
||||
// minus 1 to prevent single primitives (ratio 1.0) always being converted to colored..
|
||||
// in that case it is slightly cheaper to just have the color as part of the batch
|
||||
float ratio = (float)(bdata.total_color_changes - 1) / (float)bdata.total_quads;
|
||||
|
||||
// use bigger than or equal so that 0.0 threshold can force always using colored verts
|
||||
if (ratio >= bdata.settings_colored_vertex_format_threshold) {
|
||||
bdata.use_colored_vertices = true;
|
||||
// use bigger than or equal so that 0.0 threshold can force always using colored verts
|
||||
if (ratio >= bdata.settings_colored_vertex_format_threshold) {
|
||||
bdata.use_colored_vertices = true;
|
||||
|
||||
// small perf cost versus going straight to colored verts (maybe around 10%)
|
||||
// however more straightforward
|
||||
_batch_translate_to_colored();
|
||||
// small perf cost versus going straight to colored verts (maybe around 10%)
|
||||
// however more straightforward
|
||||
_translate_batches_to_larger_FVF<BatchVertexColored, false>();
|
||||
//_batch_translate_to_colored();
|
||||
}
|
||||
}
|
||||
}
|
||||
} // if not using light angles
|
||||
|
||||
// send buffers to opengl
|
||||
_batch_upload_buffers();
|
||||
|
@ -3517,9 +3308,12 @@ void RasterizerCanvasGLES2::initialize() {
|
|||
bdata.vertex_buffer_size_bytes = bdata.vertex_buffer_size_units * sizeof_batch_vert;
|
||||
bdata.index_buffer_size_bytes = bdata.index_buffer_size_units * 2; // 16 bit inds
|
||||
|
||||
// create equal number of norma and colored verts (as the normal may need to be translated to colored)
|
||||
// create equal number of normal and (max) unit sized verts (as the normal may need to be translated to a larger FVF)
|
||||
bdata.vertices.create(bdata.vertex_buffer_size_units); // 512k
|
||||
bdata.vertices_colored.create(bdata.vertices.max_size()); // 1024k
|
||||
bdata.unit_vertices.create(bdata.vertices.max_size(), sizeof(BatchVertexLightAngled));
|
||||
|
||||
// extra data per vert needed for larger FVFs
|
||||
bdata.light_angles.create(bdata.vertices.max_size());
|
||||
|
||||
// num batches will be auto increased dynamically if required
|
||||
bdata.batches.create(1024);
|
||||
|
|
|
@ -89,6 +89,11 @@ class RasterizerCanvasGLES2 : public RasterizerCanvasBaseGLES2 {
|
|||
BatchColor col;
|
||||
};
|
||||
|
||||
struct BatchVertexLightAngled : public BatchVertexColored {
|
||||
// must be pod
|
||||
float light_angle;
|
||||
};
|
||||
|
||||
struct Batch {
|
||||
enum CommandType : uint32_t {
|
||||
BT_DEFAULT,
|
||||
|
@ -167,10 +172,13 @@ class RasterizerCanvasGLES2 : public RasterizerCanvasBaseGLES2 {
|
|||
void reset_flush() {
|
||||
batches.reset();
|
||||
batch_textures.reset();
|
||||
|
||||
vertices.reset();
|
||||
light_angles.reset();
|
||||
|
||||
total_quads = 0;
|
||||
total_color_changes = 0;
|
||||
use_light_angles = false;
|
||||
}
|
||||
|
||||
GLuint gl_vertex_buffer;
|
||||
|
@ -182,13 +190,28 @@ class RasterizerCanvasGLES2 : public RasterizerCanvasBaseGLES2 {
|
|||
uint32_t index_buffer_size_units;
|
||||
uint32_t index_buffer_size_bytes;
|
||||
|
||||
// small vertex FVF type - pos and UV.
|
||||
// This will always be written to initially, but can be translated
|
||||
// to larger FVFs if necessary.
|
||||
RasterizerArrayGLES2<BatchVertex> vertices;
|
||||
RasterizerArrayGLES2<BatchVertexColored> vertices_colored;
|
||||
|
||||
// extra data which can be stored during prefilling, for later translation to larger FVFs
|
||||
RasterizerArrayGLES2<float> light_angles;
|
||||
|
||||
// instead of having a different buffer for each vertex FVF type
|
||||
// we have a special array big enough for the biggest FVF
|
||||
// which can have a changeable unit size, and reuse it.
|
||||
RasterizerUnitArrayGLES2 unit_vertices;
|
||||
|
||||
RasterizerArrayGLES2<Batch> batches;
|
||||
RasterizerArrayGLES2<Batch> batches_temp; // used for translating to colored vertex batches
|
||||
RasterizerArray_non_pod_GLES2<BatchTex> batch_textures; // the only reason this is non-POD is because of RIDs
|
||||
|
||||
// flexible vertex format.
|
||||
// all verts have pos and UV.
|
||||
// some have color, some light angles etc.
|
||||
bool use_colored_vertices;
|
||||
bool use_light_angles;
|
||||
|
||||
RasterizerArrayGLES2<BItemJoined> items_joined;
|
||||
RasterizerArrayGLES2<BItemRef> item_refs;
|
||||
|
@ -321,11 +344,12 @@ private:
|
|||
bool try_join_item(Item *p_ci, RenderItemState &r_ris, bool &r_batch_break);
|
||||
void render_joined_item_commands(const BItemJoined &p_bij, Item *p_current_clip, bool &r_reclip, RasterizerStorageGLES2::Material *p_material, bool p_lit);
|
||||
void render_batches(Item::Command *const *p_commands, Item *p_current_clip, bool &r_reclip, RasterizerStorageGLES2::Material *p_material);
|
||||
|
||||
bool prefill_joined_item(FillState &r_fill_state, int &r_command_start, Item *p_item, Item *p_current_clip, bool &r_reclip, RasterizerStorageGLES2::Material *p_material);
|
||||
|
||||
void flush_render_batches(Item *p_first_item, Item *p_current_clip, bool &r_reclip, RasterizerStorageGLES2::Material *p_material);
|
||||
|
||||
// low level batch funcs
|
||||
void _batch_translate_to_colored();
|
||||
int _batch_find_or_create_tex(const RID &p_texture, const RID &p_normal, bool p_tile, int p_previous_match);
|
||||
RasterizerStorageGLES2::Texture *_get_canvas_texture(const RID &p_texture) const;
|
||||
void _batch_upload_buffers();
|
||||
|
@ -358,6 +382,13 @@ private:
|
|||
public:
|
||||
void initialize();
|
||||
RasterizerCanvasGLES2();
|
||||
|
||||
private:
|
||||
template <bool SEND_LIGHT_ANGLES>
|
||||
bool prefill_rect(Item::CommandRect *rect, FillState &r_fill_state, int &r_command_start, int command_num, int command_count, Item::Command *const *commands, Item *p_item, bool multiply_final_modulate);
|
||||
|
||||
template <class BATCH_VERTEX_TYPE, bool INCLUDE_LIGHT_ANGLES>
|
||||
void _translate_batches_to_larger_FVF();
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////
|
||||
|
@ -485,4 +516,407 @@ inline bool RasterizerCanvasGLES2::_sort_items_match(const BSortItem &p_a, const
|
|||
return true;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////
|
||||
// TEMPLATE FUNCS
|
||||
|
||||
// Translation always involved adding color to the FVF, which enables
|
||||
// joining of batches that have different colors.
|
||||
// There is a trade off. Non colored verts are smaller so work faster, but
|
||||
// there comes a point where it is better to just use colored verts to avoid lots of
|
||||
// batches.
|
||||
// In addition this can optionally add light angles to the FVF, necessary for normal mapping.
|
||||
template <class BATCH_VERTEX_TYPE, bool INCLUDE_LIGHT_ANGLES>
|
||||
void RasterizerCanvasGLES2::_translate_batches_to_larger_FVF() {
|
||||
|
||||
// zeros the size and sets up how big each unit is
|
||||
bdata.unit_vertices.prepare(sizeof(BATCH_VERTEX_TYPE));
|
||||
bdata.batches_temp.reset();
|
||||
|
||||
// As the vertices_colored and batches_temp are 'mirrors' of the non-colored version,
|
||||
// the sizes should be equal, and allocations should never fail. Hence the use of debug
|
||||
// asserts to check program flow, these should not occur at runtime unless the allocation
|
||||
// code has been altered.
|
||||
#if defined(TOOLS_ENABLED) && defined(DEBUG_ENABLED)
|
||||
CRASH_COND(bdata.unit_vertices.max_size() != bdata.vertices.max_size());
|
||||
CRASH_COND(bdata.batches_temp.max_size() != bdata.batches.max_size());
|
||||
#endif
|
||||
|
||||
Color curr_col(-1.0, -1.0, -1.0, -1.0);
|
||||
|
||||
Batch *dest_batch = 0;
|
||||
|
||||
const float *source_light_angles = &bdata.light_angles[0];
|
||||
|
||||
// translate the batches into vertex colored batches
|
||||
for (int n = 0; n < bdata.batches.size(); n++) {
|
||||
const Batch &source_batch = bdata.batches[n];
|
||||
|
||||
// does source batch use light angles?
|
||||
const BatchTex &btex = bdata.batch_textures[source_batch.batch_texture_id];
|
||||
bool source_batch_uses_light_angles = btex.RID_normal != RID();
|
||||
|
||||
bool needs_new_batch = true;
|
||||
|
||||
if (dest_batch) {
|
||||
if (dest_batch->type == source_batch.type) {
|
||||
if (source_batch.type == Batch::BT_RECT) {
|
||||
if (dest_batch->batch_texture_id == source_batch.batch_texture_id) {
|
||||
// add to previous batch
|
||||
dest_batch->num_commands += source_batch.num_commands;
|
||||
needs_new_batch = false;
|
||||
|
||||
// create the colored verts (only if not default)
|
||||
int first_vert = source_batch.first_quad * 4;
|
||||
int end_vert = 4 * (source_batch.first_quad + source_batch.num_commands);
|
||||
|
||||
for (int v = first_vert; v < end_vert; v++) {
|
||||
const BatchVertex &bv = bdata.vertices[v];
|
||||
BATCH_VERTEX_TYPE *cv = (BatchVertexLightAngled *)bdata.unit_vertices.request();
|
||||
#if defined(TOOLS_ENABLED) && defined(DEBUG_ENABLED)
|
||||
CRASH_COND(!cv);
|
||||
#endif
|
||||
cv->pos = bv.pos;
|
||||
cv->uv = bv.uv;
|
||||
cv->col = source_batch.color;
|
||||
|
||||
if (INCLUDE_LIGHT_ANGLES) {
|
||||
// this is required to allow compilation with non light angle vertex.
|
||||
// it should be compiled out.
|
||||
BatchVertexLightAngled *lv = (BatchVertexLightAngled *)cv;
|
||||
if (source_batch_uses_light_angles)
|
||||
lv->light_angle = *source_light_angles++;
|
||||
else
|
||||
lv->light_angle = 0.0f; // dummy, unused in vertex shader (could possibly be left uninitialized, but probably bad idea)
|
||||
}
|
||||
}
|
||||
} // textures match
|
||||
} else {
|
||||
// default
|
||||
// we can still join, but only under special circumstances
|
||||
// does this ever happen? not sure at this stage, but left for future expansion
|
||||
uint32_t source_last_command = source_batch.first_command + source_batch.num_commands;
|
||||
if (source_last_command == dest_batch->first_command) {
|
||||
dest_batch->num_commands += source_batch.num_commands;
|
||||
needs_new_batch = false;
|
||||
} // if the commands line up exactly
|
||||
}
|
||||
} // if both batches are the same type
|
||||
|
||||
} // if dest batch is valid
|
||||
|
||||
if (needs_new_batch) {
|
||||
dest_batch = bdata.batches_temp.request();
|
||||
#if defined(TOOLS_ENABLED) && defined(DEBUG_ENABLED)
|
||||
CRASH_COND(!dest_batch);
|
||||
#endif
|
||||
|
||||
*dest_batch = source_batch;
|
||||
|
||||
// create the colored verts (only if not default)
|
||||
if (source_batch.type != Batch::BT_DEFAULT) {
|
||||
int first_vert = source_batch.first_quad * 4;
|
||||
int end_vert = 4 * (source_batch.first_quad + source_batch.num_commands);
|
||||
|
||||
for (int v = first_vert; v < end_vert; v++) {
|
||||
const BatchVertex &bv = bdata.vertices[v];
|
||||
BATCH_VERTEX_TYPE *cv = (BatchVertexLightAngled *)bdata.unit_vertices.request();
|
||||
#if defined(TOOLS_ENABLED) && defined(DEBUG_ENABLED)
|
||||
CRASH_COND(!cv);
|
||||
#endif
|
||||
cv->pos = bv.pos;
|
||||
cv->uv = bv.uv;
|
||||
cv->col = source_batch.color;
|
||||
|
||||
if (INCLUDE_LIGHT_ANGLES) {
|
||||
// this is required to allow compilation with non light angle vertex.
|
||||
// it should be compiled out.
|
||||
BatchVertexLightAngled *lv = (BatchVertexLightAngled *)cv;
|
||||
if (source_batch_uses_light_angles)
|
||||
lv->light_angle = *source_light_angles++;
|
||||
else
|
||||
lv->light_angle = 0.0f; // dummy, unused in vertex shader (could possibly be left uninitialized, but probably bad idea)
|
||||
} // if using light angles
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// copy the temporary batches to the master batch list (this could be avoided but it makes the code cleaner)
|
||||
bdata.batches.copy_from(bdata.batches_temp);
|
||||
}
|
||||
|
||||
// return true if buffer full up, else return false
|
||||
template <bool SEND_LIGHT_ANGLES>
|
||||
bool RasterizerCanvasGLES2::prefill_rect(Item::CommandRect *rect, FillState &r_fill_state, int &r_command_start, int command_num, int command_count, Item::Command *const *commands, Item *p_item, bool multiply_final_modulate) {
|
||||
bool change_batch = false;
|
||||
|
||||
// conditions for creating a new batch
|
||||
if (r_fill_state.curr_batch->type != Batch::BT_RECT) {
|
||||
change_batch = true;
|
||||
|
||||
// check for special case if there is only a single or small number of rects,
|
||||
// in which case we will use the legacy default rect renderer
|
||||
// because it is faster for single rects
|
||||
|
||||
// we only want to do this if not a joined item with more than 1 item,
|
||||
// because joined items with more than 1, the command * will be incorrect
|
||||
// NOTE - this is assuming that use_hardware_transform means that it is a non-joined item!!
|
||||
// If that assumption is incorrect this will go horribly wrong.
|
||||
if (bdata.settings_use_single_rect_fallback && r_fill_state.use_hardware_transform) {
|
||||
bool is_single_rect = false;
|
||||
int command_num_next = command_num + 1;
|
||||
if (command_num_next < command_count) {
|
||||
Item::Command *command_next = commands[command_num_next];
|
||||
if ((command_next->type != Item::Command::TYPE_RECT) && (command_next->type != Item::Command::TYPE_TRANSFORM)) {
|
||||
is_single_rect = true;
|
||||
}
|
||||
} else {
|
||||
is_single_rect = true;
|
||||
}
|
||||
// if it is a rect on its own, do exactly the same as the default routine
|
||||
if (is_single_rect) {
|
||||
_prefill_default_batch(r_fill_state, command_num, *p_item);
|
||||
return false;
|
||||
}
|
||||
} // if use hardware transform
|
||||
}
|
||||
|
||||
Color col = rect->modulate;
|
||||
if (multiply_final_modulate) {
|
||||
col *= r_fill_state.final_modulate;
|
||||
}
|
||||
|
||||
// instead of doing all the texture preparation for EVERY rect,
|
||||
// we build a list of texture combinations and do this once off.
|
||||
// This means we have a potentially rather slow step to identify which texture combo
|
||||
// using the RIDs.
|
||||
int old_batch_tex_id = r_fill_state.batch_tex_id;
|
||||
r_fill_state.batch_tex_id = _batch_find_or_create_tex(rect->texture, rect->normal_map, rect->flags & CANVAS_RECT_TILE, old_batch_tex_id);
|
||||
|
||||
//r_fill_state.use_light_angles = send_light_angles;
|
||||
if (SEND_LIGHT_ANGLES)
|
||||
bdata.use_light_angles = true;
|
||||
|
||||
// try to create vertices BEFORE creating a batch,
|
||||
// because if the vertex buffer is full, we need to finish this
|
||||
// function, draw what we have so far, and then start a new set of batches
|
||||
|
||||
// request FOUR vertices at a time, this is more efficient
|
||||
BatchVertex *bvs = bdata.vertices.request(4);
|
||||
if (!bvs) {
|
||||
// run out of space in the vertex buffer .. finish this function and draw what we have so far
|
||||
// return where we got to
|
||||
r_command_start = command_num;
|
||||
return true;
|
||||
}
|
||||
|
||||
// conditions for creating a new batch
|
||||
if (old_batch_tex_id != r_fill_state.batch_tex_id) {
|
||||
change_batch = true;
|
||||
}
|
||||
|
||||
// we need to treat color change separately because we need to count these
|
||||
// to decide whether to switch on the fly to colored vertices.
|
||||
if (!r_fill_state.curr_batch->color.equals(col)) {
|
||||
change_batch = true;
|
||||
bdata.total_color_changes++;
|
||||
}
|
||||
|
||||
if (change_batch) {
|
||||
// put the tex pixel size in a local (less verbose and can be a register)
|
||||
const BatchTex &batchtex = bdata.batch_textures[r_fill_state.batch_tex_id];
|
||||
batchtex.tex_pixel_size.to(r_fill_state.texpixel_size);
|
||||
|
||||
if (bdata.settings_uv_contract) {
|
||||
r_fill_state.contract_uvs = (batchtex.flags & VS::TEXTURE_FLAG_FILTER) == 0;
|
||||
}
|
||||
|
||||
// need to preserve texpixel_size between items
|
||||
r_fill_state.texpixel_size = r_fill_state.texpixel_size;
|
||||
|
||||
// open new batch (this should never fail, it dynamically grows)
|
||||
r_fill_state.curr_batch = _batch_request_new(false);
|
||||
|
||||
r_fill_state.curr_batch->type = Batch::BT_RECT;
|
||||
r_fill_state.curr_batch->color.set(col);
|
||||
r_fill_state.curr_batch->batch_texture_id = r_fill_state.batch_tex_id;
|
||||
r_fill_state.curr_batch->first_command = command_num;
|
||||
r_fill_state.curr_batch->num_commands = 1;
|
||||
r_fill_state.curr_batch->first_quad = bdata.total_quads;
|
||||
} else {
|
||||
// we could alternatively do the count when closing a batch .. perhaps more efficient
|
||||
r_fill_state.curr_batch->num_commands++;
|
||||
}
|
||||
|
||||
// fill the quad geometry
|
||||
Vector2 mins = rect->rect.position;
|
||||
|
||||
if (r_fill_state.transform_mode == TM_TRANSLATE) {
|
||||
_software_transform_vertex(mins, r_fill_state.transform_combined);
|
||||
}
|
||||
|
||||
Vector2 maxs = mins + rect->rect.size;
|
||||
|
||||
// just aliases
|
||||
BatchVertex *bA = &bvs[0];
|
||||
BatchVertex *bB = &bvs[1];
|
||||
BatchVertex *bC = &bvs[2];
|
||||
BatchVertex *bD = &bvs[3];
|
||||
|
||||
bA->pos.x = mins.x;
|
||||
bA->pos.y = mins.y;
|
||||
|
||||
bB->pos.x = maxs.x;
|
||||
bB->pos.y = mins.y;
|
||||
|
||||
bC->pos.x = maxs.x;
|
||||
bC->pos.y = maxs.y;
|
||||
|
||||
bD->pos.x = mins.x;
|
||||
bD->pos.y = maxs.y;
|
||||
|
||||
// possibility of applying flips here for normal mapping .. but they don't seem to be used
|
||||
if (rect->rect.size.x < 0) {
|
||||
SWAP(bA->pos, bB->pos);
|
||||
SWAP(bC->pos, bD->pos);
|
||||
}
|
||||
if (rect->rect.size.y < 0) {
|
||||
SWAP(bA->pos, bD->pos);
|
||||
SWAP(bB->pos, bC->pos);
|
||||
}
|
||||
|
||||
if (r_fill_state.transform_mode == TM_ALL) {
|
||||
_software_transform_vertex(bA->pos, r_fill_state.transform_combined);
|
||||
_software_transform_vertex(bB->pos, r_fill_state.transform_combined);
|
||||
_software_transform_vertex(bC->pos, r_fill_state.transform_combined);
|
||||
_software_transform_vertex(bD->pos, r_fill_state.transform_combined);
|
||||
}
|
||||
|
||||
// uvs
|
||||
Vector2 src_min;
|
||||
Vector2 src_max;
|
||||
if (rect->flags & CANVAS_RECT_REGION) {
|
||||
src_min = rect->source.position;
|
||||
src_max = src_min + rect->source.size;
|
||||
|
||||
src_min *= r_fill_state.texpixel_size;
|
||||
src_max *= r_fill_state.texpixel_size;
|
||||
|
||||
const float uv_epsilon = bdata.settings_uv_contract_amount;
|
||||
|
||||
// nudge offset for the maximum to prevent precision error on GPU reading into line outside the source rect
|
||||
// this is very difficult to get right.
|
||||
if (r_fill_state.contract_uvs) {
|
||||
src_min.x += uv_epsilon;
|
||||
src_min.y += uv_epsilon;
|
||||
src_max.x -= uv_epsilon;
|
||||
src_max.y -= uv_epsilon;
|
||||
}
|
||||
} else {
|
||||
src_min = Vector2(0, 0);
|
||||
src_max = Vector2(1, 1);
|
||||
}
|
||||
|
||||
// 10% faster calculating the max first
|
||||
Vector2 uvs[4] = {
|
||||
src_min,
|
||||
Vector2(src_max.x, src_min.y),
|
||||
src_max,
|
||||
Vector2(src_min.x, src_max.y),
|
||||
};
|
||||
|
||||
// for encoding in light angle
|
||||
// flips should be optimized out when not being used for light angle.
|
||||
bool flip_h = false;
|
||||
bool flip_v = false;
|
||||
|
||||
if (rect->flags & CANVAS_RECT_TRANSPOSE) {
|
||||
SWAP(uvs[1], uvs[3]);
|
||||
}
|
||||
|
||||
if (rect->flags & CANVAS_RECT_FLIP_H) {
|
||||
SWAP(uvs[0], uvs[1]);
|
||||
SWAP(uvs[2], uvs[3]);
|
||||
flip_h = !flip_h;
|
||||
flip_v = !flip_v;
|
||||
}
|
||||
if (rect->flags & CANVAS_RECT_FLIP_V) {
|
||||
SWAP(uvs[0], uvs[3]);
|
||||
SWAP(uvs[1], uvs[2]);
|
||||
flip_v = !flip_v;
|
||||
}
|
||||
|
||||
bA->uv.set(uvs[0]);
|
||||
bB->uv.set(uvs[1]);
|
||||
bC->uv.set(uvs[2]);
|
||||
bD->uv.set(uvs[3]);
|
||||
|
||||
if (SEND_LIGHT_ANGLES) {
|
||||
// we can either keep the light angles in sync with the verts when writing,
|
||||
// or sync them up during translation. We are syncing in translation.
|
||||
// N.B. There may be batches that don't require light_angles between batches that do.
|
||||
float *angles = bdata.light_angles.request(4);
|
||||
#if defined(TOOLS_ENABLED) && defined(DEBUG_ENABLED)
|
||||
CRASH_COND(angles == nullptr);
|
||||
#endif
|
||||
|
||||
float angle = 0.0f;
|
||||
const float TWO_PI = Math_PI * 2;
|
||||
|
||||
if (r_fill_state.transform_mode != TM_NONE) {
|
||||
|
||||
const Transform2D &tr = r_fill_state.transform_combined;
|
||||
|
||||
// apply to an x axis
|
||||
// the x axis and y axis can be taken directly from the transform (no need to xform identity vectors)
|
||||
Vector2 x_axis(tr.elements[0][0], tr.elements[1][0]);
|
||||
|
||||
// have to do a y axis to check for scaling flips
|
||||
// this is hassle and extra slowness. We could only allow flips via the flags.
|
||||
Vector2 y_axis(tr.elements[0][1], tr.elements[1][1]);
|
||||
|
||||
// has the x / y axis flipped due to scaling?
|
||||
float cross = x_axis.cross(y_axis);
|
||||
if (cross < 0.0f) {
|
||||
flip_v = !flip_v;
|
||||
}
|
||||
|
||||
// passing an angle is smaller than a vector, it can be reconstructed in the shader
|
||||
angle = x_axis.angle();
|
||||
|
||||
// we don't want negative angles, as negative is used to encode flips.
|
||||
// This moves range from -PI to PI to 0 to TWO_PI
|
||||
if (angle < 0.0f)
|
||||
angle += TWO_PI;
|
||||
|
||||
} // if transform needed
|
||||
|
||||
// if horizontal flip, angle is shifted by 180 degrees
|
||||
if (flip_h) {
|
||||
angle += Math_PI;
|
||||
|
||||
// mod to get back to 0 to TWO_PI range
|
||||
angle = fmodf(angle, TWO_PI);
|
||||
}
|
||||
|
||||
// add 1 (to take care of zero floating point error with sign)
|
||||
angle += 1.0f;
|
||||
|
||||
// flip if necessary to indicate a vertical flip in the shader
|
||||
if (flip_v)
|
||||
angle *= -1.0f;
|
||||
|
||||
// light angle must be sent for each vert, instead as a single uniform in the uniform draw method
|
||||
// this has the benefit of enabling batching with light angles.
|
||||
for (int n = 0; n < 4; n++) {
|
||||
angles[n] = angle;
|
||||
}
|
||||
}
|
||||
|
||||
// increment quad count
|
||||
bdata.total_quads++;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
#endif // RASTERIZERCANVASGLES2_H
|
||||
|
|
|
@ -407,7 +407,7 @@ void RasterizerGLES2::blit_render_target_to_screen(RID p_render_target, const Re
|
|||
RasterizerStorageGLES2::RenderTarget *rt = storage->render_target_owner.getornull(p_render_target);
|
||||
ERR_FAIL_COND(!rt);
|
||||
|
||||
canvas->state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_TEXTURE_RECT, true);
|
||||
canvas->_set_texture_rect_mode(true);
|
||||
|
||||
canvas->state.canvas_shader.set_custom_shader(0);
|
||||
canvas->state.canvas_shader.bind();
|
||||
|
|
|
@ -18,6 +18,12 @@ uniform highp mat4 projection_matrix;
|
|||
uniform highp mat4 modelview_matrix;
|
||||
uniform highp mat4 extra_matrix;
|
||||
attribute highp vec2 vertex; // attrib:0
|
||||
|
||||
#ifdef USE_LIGHT_ANGLE
|
||||
// shared with tangent, not used in canvas shader
|
||||
attribute highp float light_angle; // attrib:2
|
||||
#endif
|
||||
|
||||
attribute vec4 color_attrib; // attrib:3
|
||||
attribute vec2 uv_attrib; // attrib:4
|
||||
|
||||
|
@ -219,12 +225,34 @@ VERTEX_SHADER_CODE
|
|||
pos = outvec.xy;
|
||||
#endif
|
||||
|
||||
#ifdef USE_LIGHT_ANGLE
|
||||
// we add a fixed offset because we are using the sign later,
|
||||
// and don't want floating point error around 0.0
|
||||
float la = abs(light_angle) - 1.0;
|
||||
|
||||
// vector light angle
|
||||
vec4 vla;
|
||||
vla.xy = vec2(cos(la), sin(la));
|
||||
vla.zw = vec2(-vla.y, vla.x);
|
||||
|
||||
// vertical flip encoded in the sign
|
||||
vla.zw *= sign(light_angle);
|
||||
|
||||
// apply the transform matrix.
|
||||
// The rotate will be encoded in the transform matrix for single rects,
|
||||
// and just the flips in the light angle.
|
||||
// For batching we will encode the rotation and the flips
|
||||
// in the light angle, and can use the same shader.
|
||||
local_rot.xy = normalize((modelview_matrix * (extra_matrix_instance * vec4(vla.xy, 0.0, 0.0))).xy);
|
||||
local_rot.zw = normalize((modelview_matrix * (extra_matrix_instance * vec4(vla.zw, 0.0, 0.0))).xy);
|
||||
#else
|
||||
local_rot.xy = normalize((modelview_matrix * (extra_matrix_instance * vec4(1.0, 0.0, 0.0, 0.0))).xy);
|
||||
local_rot.zw = normalize((modelview_matrix * (extra_matrix_instance * vec4(0.0, 1.0, 0.0, 0.0))).xy);
|
||||
#ifdef USE_TEXTURE_RECT
|
||||
local_rot.xy *= sign(src_rect.z);
|
||||
local_rot.zw *= sign(src_rect.w);
|
||||
#endif
|
||||
#endif // not using light angle
|
||||
|
||||
#endif
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue