Merge pull request #62364 from clayjohn/GLES3-sky-optimization

This commit is contained in:
Rémi Verschelde 2022-07-27 12:39:58 +02:00 committed by GitHub
commit d7a1acd229
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 105 additions and 120 deletions

View file

@ -497,8 +497,7 @@ void RasterizerSceneGLES3::_update_dirty_skys() {
while (sky) {
if (sky->radiance == 0) {
sky->mipmap_count = Image::get_image_required_mipmaps(sky->radiance_size, sky->radiance_size, Image::FORMAT_RGBA8) + 1;
sky->mipmap_count = Image::get_image_required_mipmaps(sky->radiance_size, sky->radiance_size, Image::FORMAT_RGBA8) - 2;
// Left uninitialized, will attach a texture at render time
glGenFramebuffers(1, &sky->radiance_framebuffer);
@ -837,7 +836,7 @@ void RasterizerSceneGLES3::_update_sky_radiance(Environment *p_env, const Projec
int max_processing_layer = sky->mipmap_count;
// Update radiance cubemap
if (sky->reflection_dirty && (sky->processing_layer >= max_processing_layer || update_single_frame)) {
if (sky->reflection_dirty && (sky->processing_layer > max_processing_layer || update_single_frame)) {
static const Vector3 view_normals[6] = {
Vector3(+1, 0, 0),
Vector3(-1, 0, 0),
@ -881,7 +880,7 @@ void RasterizerSceneGLES3::_update_sky_radiance(Environment *p_env, const Projec
}
if (update_single_frame) {
for (int i = 0; i < max_processing_layer; i++) {
for (int i = 0; i <= max_processing_layer; i++) {
_filter_sky_radiance(sky, i);
}
} else {
@ -891,13 +890,52 @@ void RasterizerSceneGLES3::_update_sky_radiance(Environment *p_env, const Projec
sky->reflection_dirty = false;
} else {
if (sky_mode == RS::SKY_MODE_INCREMENTAL && sky->processing_layer < max_processing_layer) {
if (sky_mode == RS::SKY_MODE_INCREMENTAL && sky->processing_layer <= max_processing_layer) {
_filter_sky_radiance(sky, sky->processing_layer);
sky->processing_layer++;
}
}
}
// Helper functions for IBL filtering
Vector3 importance_sample_GGX(Vector2 xi, float roughness4) {
// Compute distribution direction
float phi = 2.0 * Math_PI * xi.x;
float cos_theta = sqrt((1.0 - xi.y) / (1.0 + (roughness4 - 1.0) * xi.y));
float sin_theta = sqrt(1.0 - cos_theta * cos_theta);
// Convert to spherical direction
Vector3 half_vector;
half_vector.x = sin_theta * cos(phi);
half_vector.y = sin_theta * sin(phi);
half_vector.z = cos_theta;
return half_vector;
}
float distribution_GGX(float NdotH, float roughness4) {
float NdotH2 = NdotH * NdotH;
float denom = (NdotH2 * (roughness4 - 1.0) + 1.0);
denom = Math_PI * denom * denom;
return roughness4 / denom;
}
float radical_inverse_vdC(uint32_t bits) {
bits = (bits << 16) | (bits >> 16);
bits = ((bits & 0x55555555) << 1) | ((bits & 0xAAAAAAAA) >> 1);
bits = ((bits & 0x33333333) << 2) | ((bits & 0xCCCCCCCC) >> 2);
bits = ((bits & 0x0F0F0F0F) << 4) | ((bits & 0xF0F0F0F0) >> 4);
bits = ((bits & 0x00FF00FF) << 8) | ((bits & 0xFF00FF00) >> 8);
return float(bits) * 2.3283064365386963e-10;
}
Vector2 hammersley(uint32_t i, uint32_t N) {
return Vector2(float(i) / float(N), radical_inverse_vdC(i));
}
void RasterizerSceneGLES3::_filter_sky_radiance(Sky *p_sky, int p_base_layer) {
GLES3::MaterialStorage *material_storage = GLES3::MaterialStorage::get_singleton();
@ -909,21 +947,60 @@ void RasterizerSceneGLES3::_filter_sky_radiance(Sky *p_sky, int p_base_layer) {
if (p_base_layer == 0) {
glGenerateMipmap(GL_TEXTURE_CUBE_MAP);
// Copy over base layer without filtering.
mode = CubemapFilterShaderGLES3::MODE_COPY;
//Copy over base layer
}
glActiveTexture(GL_TEXTURE1);
glBindTexture(GL_TEXTURE_2D, sky_globals.radical_inverse_vdc_cache_tex);
int size = p_sky->radiance_size >> p_base_layer;
glViewport(0, 0, size, size);
glBindVertexArray(sky_globals.screen_triangle_array);
material_storage->shaders.cubemap_filter_shader.version_bind_shader(scene_globals.cubemap_filter_shader_version, mode);
material_storage->shaders.cubemap_filter_shader.version_set_uniform(CubemapFilterShaderGLES3::SAMPLE_COUNT, sky_globals.ggx_samples, scene_globals.cubemap_filter_shader_version, mode);
material_storage->shaders.cubemap_filter_shader.version_set_uniform(CubemapFilterShaderGLES3::ROUGHNESS, float(p_base_layer) / (p_sky->mipmap_count - 1.0), scene_globals.cubemap_filter_shader_version, mode);
material_storage->shaders.cubemap_filter_shader.version_set_uniform(CubemapFilterShaderGLES3::FACE_SIZE, float(size), scene_globals.cubemap_filter_shader_version, mode);
if (p_base_layer > 0) {
const uint32_t sample_counts[4] = { 1, sky_globals.ggx_samples / 4, sky_globals.ggx_samples / 2, sky_globals.ggx_samples };
uint32_t sample_count = sample_counts[MIN(3, p_base_layer)];
float roughness = float(p_base_layer) / (p_sky->mipmap_count);
float roughness4 = roughness * roughness;
roughness4 *= roughness4;
float solid_angle_texel = 4.0 * Math_PI / float(6 * size * size);
LocalVector<float> sample_directions;
sample_directions.resize(4 * sample_count);
uint32_t index = 0;
float weight = 0.0;
for (uint32_t i = 0; i < sample_count; i++) {
Vector2 xi = hammersley(i, sample_count);
Vector3 dir = importance_sample_GGX(xi, roughness4);
Vector3 light_vec = (2.0 * dir.z * dir - Vector3(0.0, 0.0, 1.0));
if (light_vec.z < 0.0) {
continue;
}
sample_directions[index * 4] = light_vec.x;
sample_directions[index * 4 + 1] = light_vec.y;
sample_directions[index * 4 + 2] = light_vec.z;
float D = distribution_GGX(dir.z, roughness4);
float pdf = D * dir.z / (4.0 * dir.z) + 0.0001;
float solid_angle_sample = 1.0 / (float(sample_count) * pdf + 0.0001);
float mip_level = MAX(0.5 * log2(solid_angle_sample / solid_angle_texel) + float(MAX(1, p_base_layer - 3)), 1.0);
sample_directions[index * 4 + 3] = mip_level;
weight += light_vec.z;
index++;
}
glUniform4fv(material_storage->shaders.cubemap_filter_shader.version_get_uniform(CubemapFilterShaderGLES3::SAMPLE_DIRECTIONS_MIP, scene_globals.cubemap_filter_shader_version, mode), sample_count, sample_directions.ptr());
material_storage->shaders.cubemap_filter_shader.version_set_uniform(CubemapFilterShaderGLES3::WEIGHT, weight, scene_globals.cubemap_filter_shader_version, mode);
material_storage->shaders.cubemap_filter_shader.version_set_uniform(CubemapFilterShaderGLES3::SAMPLE_COUNT, index, scene_globals.cubemap_filter_shader_version, mode);
}
for (int i = 0; i < 6; i++) {
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_CUBE_MAP_POSITIVE_X + i, p_sky->radiance, p_base_layer);
@ -2596,8 +2673,12 @@ void fragment() {
material_storage->shaders.sky_shader.initialize(global_defines);
sky_globals.shader_default_version = material_storage->shaders.sky_shader.version_create();
material_storage->shaders.sky_shader.version_bind_shader(sky_globals.shader_default_version, SkyShaderGLES3::MODE_BACKGROUND);
}
material_storage->shaders.cubemap_filter_shader.initialize();
{
String global_defines;
global_defines += "\n#define MAX_SAMPLE_COUNT " + itos(sky_globals.ggx_samples) + "\n";
material_storage->shaders.cubemap_filter_shader.initialize(global_defines);
scene_globals.cubemap_filter_shader_version = material_storage->shaders.cubemap_filter_shader.version_create();
material_storage->shaders.cubemap_filter_shader.version_bind_shader(scene_globals.cubemap_filter_shader_version, CubemapFilterShaderGLES3::MODE_DEFAULT);
}
@ -2667,36 +2748,6 @@ void sky() {
glBindBuffer(GL_ARRAY_BUFFER, 0); //unbind
}
// Radical inverse vdc cache texture used for cubemap filtering.
{
glGenTextures(1, &sky_globals.radical_inverse_vdc_cache_tex);
glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_2D, sky_globals.radical_inverse_vdc_cache_tex);
uint8_t radical_inverse[512];
for (uint32_t i = 0; i < 512; i++) {
uint32_t bits = i;
bits = (bits << 16) | (bits >> 16);
bits = ((bits & 0x55555555) << 1) | ((bits & 0xAAAAAAAA) >> 1);
bits = ((bits & 0x33333333) << 2) | ((bits & 0xCCCCCCCC) >> 2);
bits = ((bits & 0x0F0F0F0F) << 4) | ((bits & 0xF0F0F0F0) >> 4);
bits = ((bits & 0x00FF00FF) << 8) | ((bits & 0xFF00FF00) >> 8);
float value = float(bits) * 2.3283064365386963e-10;
radical_inverse[i] = uint8_t(CLAMP(value * 255.0, 0, 255));
}
glTexImage2D(GL_TEXTURE_2D, 0, GL_RED, 512, 1, 0, GL_RED, GL_UNSIGNED_BYTE, radical_inverse);
glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); //need this for proper sampling
glBindTexture(GL_TEXTURE_2D, 0);
}
#ifdef GLES_OVER_GL
glEnable(_EXT_TEXTURE_CUBE_MAP_SEAMLESS);
#endif

View file

@ -29,19 +29,15 @@ uniform samplerCube source_cube; //texunit:0
/* clang-format on */
uniform int face_id;
uniform float roughness;
uniform float face_size;
uniform int sample_count;
//Todo, profile on low end hardware to see if fixed loop is faster
#ifdef USE_FIXED_SAMPLES
#define FIXED_SAMPLE_COUNT 32
#ifndef MODE_DIRECT_WRITE
uniform int sample_count;
uniform vec4 sample_directions_mip[MAX_SAMPLE_COUNT];
uniform float weight;
#endif
in highp vec2 uv_interp;
uniform sampler2D radical_inverse_vdc_cache; // texunit:1
layout(location = 0) out vec4 frag_color;
#define M_PI 3.14159265359
@ -93,48 +89,6 @@ vec3 texelCoordToVec(vec2 uv, int faceID) {
return normalize(result);
}
vec3 ImportanceSampleGGX(vec2 xi, float roughness4) {
// Compute distribution direction
float Phi = 2.0 * M_PI * xi.x;
float CosTheta = sqrt((1.0 - xi.y) / (1.0 + (roughness4 - 1.0) * xi.y));
float SinTheta = sqrt(1.0 - CosTheta * CosTheta);
// Convert to spherical direction
vec3 H;
H.x = SinTheta * cos(Phi);
H.y = SinTheta * sin(Phi);
H.z = CosTheta;
return H;
}
float DistributionGGX(float NdotH, float roughness4) {
float NdotH2 = NdotH * NdotH;
float denom = (NdotH2 * (roughness4 - 1.0) + 1.0);
denom = M_PI * denom * denom;
return roughness4 / denom;
}
// https://graphicrants.blogspot.com.au/2013/08/specular-brdf-reference.html
float GGX(float NdotV, float a) {
float k = a / 2.0;
return NdotV / (NdotV * (1.0 - k) + k);
}
// https://graphicrants.blogspot.com.au/2013/08/specular-brdf-reference.html
float G_Smith(float a, float nDotV, float nDotL) {
return GGX(nDotL, a * a) * GGX(nDotV, a * a);
}
float radical_inverse_VdC(int i) {
return texture(radical_inverse_vdc_cache, vec2(float(i) / 512.0, 0.0)).x;
}
vec2 Hammersley(int i, int N) {
return vec2(float(i) / float(N), radical_inverse_VdC(i));
}
void main() {
vec3 color = vec3(0.0);
vec2 uv = uv_interp;
@ -145,9 +99,6 @@ void main() {
#else
vec4 sum = vec4(0.0);
float solid_angle_texel = 4.0 * M_PI / (6.0 * face_size * face_size);
float roughness2 = roughness * roughness;
float roughness4 = roughness2 * roughness2;
vec3 UpVector = abs(N.z) < 0.999 ? vec3(0.0, 0.0, 1.0) : vec3(1.0, 0.0, 0.0);
mat3 T;
T[0] = normalize(cross(UpVector, N));
@ -155,32 +106,15 @@ void main() {
T[2] = N;
for (int sample_num = 0; sample_num < sample_count; sample_num++) {
vec2 xi = Hammersley(sample_num, sample_count);
vec3 H = T * ImportanceSampleGGX(xi, roughness4);
float NdotH = dot(N, H);
vec3 L = (2.0 * NdotH * H - N);
float NdotL = clamp(dot(N, L), 0.0, 1.0);
if (NdotL > 0.0) {
float D = DistributionGGX(NdotH, roughness4);
float pdf = D * NdotH / (4.0 * NdotH) + 0.0001;
float solid_angle_sample = 1.0 / (float(sample_count) * pdf + 0.0001);
float mipLevel = roughness == 0.0 ? 0.0 : 0.5 * log2(solid_angle_sample / solid_angle_texel);
vec3 val = textureLod(source_cube, L, mipLevel).rgb;
// Mix using linear
val = srgb_to_linear(val);
sum.rgb += val * NdotL;
sum.a += NdotL;
}
vec4 sample = sample_directions_mip[sample_num];
vec3 L = T * sample.xyz;
vec3 val = textureLod(source_cube, L, sample.w).rgb;
// Mix using linear
val = srgb_to_linear(val);
sum.rgb += val * sample.z;
}
sum /= sum.a;
sum /= weight;
sum.rgb = linear_to_srgb(sum.rgb);
frag_color = vec4(sum.rgb, 1.0);